{ "cells": [ { "cell_type": "code", "id": "initial_id", "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:43:32.795850Z", "start_time": "2025-11-04T21:43:32.794457Z" } }, "source": "import numpy as np", "outputs": [], "execution_count": 45 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:43:32.811210Z", "start_time": "2025-11-04T21:43:32.809638Z" } }, "cell_type": "code", "source": [ "nn_architecture = [\n", " {\"input_dim\": 2, \"output_dim\": 4, \"activation\": \"relu\"},\n", " {\"input_dim\": 4, \"output_dim\": 6, \"activation\": \"relu\"},\n", " {\"input_dim\": 6, \"output_dim\": 6, \"activation\": \"relu\"},\n", " {\"input_dim\": 6, \"output_dim\": 4, \"activation\": \"relu\"},\n", " {\"input_dim\": 4, \"output_dim\": 1, \"activation\": \"sigmoid\"},\n", "]" ], "id": "48cafaf4b64967bb", "outputs": [], "execution_count": 46 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:43:32.862226Z", "start_time": "2025-11-04T21:43:32.860368Z" } }, "cell_type": "code", "source": [ "def init_layers(nn_architecture, seed = 99):\n", " np.random.seed(seed)\n", " number_of_layers = len(nn_architecture)\n", " params_values = {}\n", "\n", " for idx, layer in enumerate(nn_architecture):\n", " layer_idx = idx + 1\n", " layer_input_size = layer[\"input_dim\"]\n", " layer_output_size = layer[\"output_dim\"]\n", "\n", " params_values['W' + str(layer_idx)] = np.random.randn(\n", " layer_output_size, layer_input_size) * 0.1\n", " params_values['b' + str(layer_idx)] = np.random.randn(\n", " layer_output_size, 1) * 0.1\n", "\n", " return params_values\n" ], "id": "d13137630b41b756", "outputs": [], "execution_count": 47 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:46:21.643740Z", "start_time": "2025-11-04T21:46:21.639693Z" } }, "cell_type": "code", "source": [ "params = init_layers(nn_architecture)\n", "# params" ], "id": "31f205147667dea6", "outputs": [], "execution_count": 64 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:43:32.957461Z", "start_time": "2025-11-04T21:43:32.955675Z" } }, "cell_type": "code", "source": [ "def sigmoid(Z):\n", " return 1/(1+np.exp(-Z))\n", "\n", "def relu(Z):\n", " return np.maximum(0,Z)\n", "\n", "def sigmoid_backward(dA, Z):\n", " sig = sigmoid(Z)\n", " return dA * sig * (1 - sig)\n", "\n", "def relu_backward(dA, Z):\n", " dZ = np.array(dA, copy = True)\n", " dZ[Z <= 0] = 0;\n", " return dZ;" ], "id": "c1b960e7dcf09d91", "outputs": [], "execution_count": 49 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:44:22.909895Z", "start_time": "2025-11-04T21:44:22.906363Z" } }, "cell_type": "code", "source": [ "def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation=\"relu\"):\n", " Z_curr = np.dot(W_curr, A_prev) + b_curr\n", "\n", " if activation == \"relu\":\n", " activation_func = relu\n", " elif activation == \"sigmoid\":\n", " activation_func = sigmoid\n", " else:\n", " raise Exception('Non-supported activation function')\n", "\n", " return activation_func(Z_curr), Z_curr" ], "id": "efae2e184daf2fce", "outputs": [], "execution_count": 61 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:43:33.055558Z", "start_time": "2025-11-04T21:43:33.053594Z" } }, "cell_type": "code", "source": [ "def full_forward_propagation(X, params_values, nn_architecture):\n", " memory = {}\n", " A_curr = X\n", "\n", " for idx, layer in enumerate(nn_architecture):\n", " layer_idx = idx + 1\n", " A_prev = A_curr\n", "\n", " activ_function_curr = layer[\"activation\"]\n", " W_curr = params_values[\"W\" + str(layer_idx)]\n", " b_curr = params_values[\"b\" + str(layer_idx)]\n", " A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr)\n", "\n", " memory[\"A\" + str(idx)] = A_prev\n", " memory[\"Z\" + str(layer_idx)] = Z_curr\n", "\n", " return A_curr, memory" ], "id": "c3cd9e8f51dbe967", "outputs": [], "execution_count": 51 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:43:33.103372Z", "start_time": "2025-11-04T21:43:33.101510Z" } }, "cell_type": "code", "source": [ "def get_cost_value(Y_hat, Y):\n", " m = Y_hat.shape[1]\n", " cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))\n", " return np.squeeze(cost)\n", "\n", "# an auxiliary function that converts probability into class\n", "def convert_prob_into_class(probs):\n", " probs_ = np.copy(probs)\n", " probs_[probs_ > 0.5] = 1\n", " probs_[probs_ <= 0.5] = 0\n", " return probs_\n", "\n", "def get_accuracy_value(Y_hat, Y):\n", " Y_hat_ = convert_prob_into_class(Y_hat)\n", " return (Y_hat_ == Y).all(axis=0).mean()" ], "id": "121416e7bbab57bb", "outputs": [], "execution_count": 52 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:43:33.176375Z", "start_time": "2025-11-04T21:43:33.169411Z" } }, "cell_type": "code", "source": [ "def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation=\"relu\"):\n", " m = A_prev.shape[1]\n", "\n", " if activation is \"relu\":\n", " backward_activation_func = relu_backward\n", " elif activation is \"sigmoid\":\n", " backward_activation_func = sigmoid_backward\n", " else:\n", " raise Exception('Non-supported activation function')\n", "\n", " dZ_curr = backward_activation_func(dA_curr, Z_curr)\n", " dW_curr = np.dot(dZ_curr, A_prev.T) / m\n", " db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m\n", " dA_prev = np.dot(W_curr.T, dZ_curr)\n", "\n", " return dA_prev, dW_curr, db_curr" ], "id": "92e4b87664f18a63", "outputs": [], "execution_count": 53 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:43:33.243823Z", "start_time": "2025-11-04T21:43:33.234283Z" } }, "cell_type": "code", "source": [ "def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):\n", " grads_values = {}\n", " m = Y.shape[1]\n", " Y = Y.reshape(Y_hat.shape)\n", "\n", " dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));\n", "\n", " for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):\n", " layer_idx_curr = layer_idx_prev + 1\n", " activ_function_curr = layer[\"activation\"]\n", "\n", " dA_curr = dA_prev\n", "\n", " A_prev = memory[\"A\" + str(layer_idx_prev)]\n", " Z_curr = memory[\"Z\" + str(layer_idx_curr)]\n", " W_curr = params_values[\"W\" + str(layer_idx_curr)]\n", " b_curr = params_values[\"b\" + str(layer_idx_curr)]\n", "\n", " dA_prev, dW_curr, db_curr = single_layer_backward_propagation(\n", " dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)\n", "\n", " grads_values[\"dW\" + str(layer_idx_curr)] = dW_curr\n", " grads_values[\"db\" + str(layer_idx_curr)] = db_curr\n", "\n", " return grads_values" ], "id": "2c8e4eed1846f003", "outputs": [], "execution_count": 54 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:47:33.615104Z", "start_time": "2025-11-04T21:47:33.610483Z" } }, "cell_type": "code", "source": [ "def update(params_values, grads_values, nn_architecture, learning_rate):\n", " for layer_idx, layer in enumerate(nn_architecture):\n", " layer_idx=layer_idx+1\n", " params_values[\"W\" + str(layer_idx)] -= learning_rate * grads_values[\"dW\" + str(layer_idx)]\n", " params_values[\"b\" + str(layer_idx)] -= learning_rate * grads_values[\"db\" + str(layer_idx)]\n", "\n", " return params_values;" ], "id": "16320b953a183511", "outputs": [], "execution_count": 66 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:47:37.239308Z", "start_time": "2025-11-04T21:47:37.236527Z" } }, "cell_type": "code", "source": [ "def train(X, Y, nn_architecture, epochs, learning_rate, verbose=False, callback=None):\n", " # initiation of neural net parameters\n", " params_values = init_layers(nn_architecture, 2)\n", " # initiation of lists storing the history\n", " # of metrics calculated during the learning process\n", " cost_history = []\n", " accuracy_history = []\n", "\n", " # performing calculations for subsequent iterations\n", " for i in range(epochs):\n", " # step forward\n", " Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)\n", "\n", " # calculating metrics and saving them in history\n", " cost = get_cost_value(Y_hat, Y)\n", " cost_history.append(cost)\n", " accuracy = get_accuracy_value(Y_hat, Y)\n", " accuracy_history.append(accuracy)\n", "\n", " # step backward - calculating gradient\n", " grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)\n", " # updating model state\n", " params_values = update(params_values, grads_values, nn_architecture, learning_rate)\n", "\n", " if(i % 50 == 0):\n", " if(verbose):\n", " print(\"Iteration: {:05} - cost: {:.5f} - accuracy: {:.5f}\".format(i, cost, accuracy))\n", " if(callback is not None):\n", " callback(i, params_values)\n", "\n", " return params_values" ], "id": "fce33f70bba3898", "outputs": [], "execution_count": 67 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:43:33.422252Z", "start_time": "2025-11-04T21:43:33.417262Z" } }, "cell_type": "code", "source": [ "import os\n", "import tensorflow as tf\n", "\n", "from sklearn.datasets import make_moons\n", "from sklearn.model_selection import train_test_split\n", "\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from matplotlib import cm\n", "from mpl_toolkits.mplot3d import Axes3D\n", "sns.set_style(\"whitegrid\")\n", "\n", "import keras\n", "from keras.models import Sequential\n", "from keras.layers import Dense\n", "# from keras.utils import np_utils\n", "from keras import regularizers\n", "\n", "from sklearn.metrics import accuracy_score" ], "id": "cccd73b5018799d4", "outputs": [], "execution_count": 57 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:43:33.472509Z", "start_time": "2025-11-04T21:43:33.470657Z" } }, "cell_type": "code", "source": [ "# number of samples in the data set\n", "N_SAMPLES = 1000\n", "# ratio between training and test sets\n", "TEST_SIZE = 0.1" ], "id": "4f66ffa878f01c02", "outputs": [], "execution_count": 58 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:43:33.520603Z", "start_time": "2025-11-04T21:43:33.518562Z" } }, "cell_type": "code", "source": [ "X, y = make_moons(n_samples = N_SAMPLES, noise=0.2, random_state=100)\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)" ], "id": "bebe0ed00a2d514", "outputs": [], "execution_count": 59 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:51:26.021417Z", "start_time": "2025-11-04T21:51:23.520284Z" } }, "cell_type": "code", "source": [ "params_values = train(np.transpose(X_train), np.transpose(y_train.reshape((y_train.shape[0], 1))), nn_architecture, 20000, 0.01)\n", "# params_values\n" ], "id": "ce04892d496c5147", "outputs": [], "execution_count": 77 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:51:27.733451Z", "start_time": "2025-11-04T21:51:27.727264Z" } }, "cell_type": "code", "source": [ "Y_test_hat, _ = full_forward_propagation(np.transpose(X_test), params_values, nn_architecture)\n", "\n", "acc_test = get_accuracy_value(Y_test_hat, np.transpose(y_test.reshape((y_test.shape[0], 1))))\n", "print(\"Test set accuracy: {:.2f} - David\".format(acc_test))\n" ], "id": "26e7a2a8848714d9", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Test set accuracy: 0.46 - David\n" ] } ], "execution_count": 78 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:43:33.666607121Z", "start_time": "2025-11-04T20:21:26.059140Z" } }, "cell_type": "code", "source": [ "startA = np.random.randn(nn_architecture[0][\"input_dim\"],1) * 0.1\n", "full_forward_propagation(startA, params, nn_architecture)" ], "id": "8b672c5fd5832cc", "outputs": [ { "data": { "text/plain": [ "(array([[0.51608074]]),\n", " {'A0': array([[-0.10166672],\n", " [ 0.14706683]]),\n", " 'Z1': array([[ 0.0203953 ],\n", " [-0.22010647],\n", " [-0.01614817],\n", " [ 0.07300465]]),\n", " 'A1': array([[0.0203953 ],\n", " [0. ],\n", " [0. ],\n", " [0.07300465]]),\n", " 'Z2': array([[-0.18085747],\n", " [-0.01827604],\n", " [-0.21683156],\n", " [ 0.08504111],\n", " [ 0.17066065],\n", " [-0.04521306]]),\n", " 'A2': array([[0. ],\n", " [0. ],\n", " [0. ],\n", " [0.08504111],\n", " [0.17066065],\n", " [0. ]]),\n", " 'Z3': array([[-0.17707529],\n", " [ 0.0237745 ],\n", " [-0.07487052],\n", " [-0.02497606],\n", " [ 0.12622027],\n", " [ 0.02613133]]),\n", " 'A3': array([[0. ],\n", " [0.0237745 ],\n", " [0. ],\n", " [0. ],\n", " [0.12622027],\n", " [0.02613133]]),\n", " 'Z4': array([[-0.09066425],\n", " [ 0.05792425],\n", " [ 0.07822296],\n", " [ 0.07317913]]),\n", " 'A4': array([[0. ],\n", " [0.05792425],\n", " [0.07822296],\n", " [0.07317913]]),\n", " 'Z5': array([[0.06434517]])})" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 24 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:52:07.296371Z", "start_time": "2025-11-04T21:52:01.384867Z" } }, "cell_type": "code", "source": [ "model = Sequential()\n", "model.add(Dense(25, input_dim=2,activation='relu'))\n", "model.add(Dense(50, activation='relu'))\n", "model.add(Dense(50, activation='relu'))\n", "model.add(Dense(25, activation='relu'))\n", "model.add(Dense(1, activation='sigmoid'))\n", "\n", "model.compile(loss='binary_crossentropy', optimizer=\"sgd\", metrics=['accuracy'])\n", "\n", "# Training\n", "history = model.fit(X_train, y_train, epochs=200, verbose=0)" ], "id": "f05ff40ed26e45c2", "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/oskar/projects/nn-from-scratch/.venv/lib/python3.13/site-packages/keras/src/layers/core/dense.py:95: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", " super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n", "2025-11-04 22:52:01.409083: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: forward compatibility was attempted on non supported HW\n", "2025-11-04 22:52:01.409097: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:171] verbose logging is disabled. Rerun with verbose logging (usually --v=1 or --vmodule=cuda_diagnostics=1) to get more diagnostic output from this module\n", "2025-11-04 22:52:01.409099: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:176] retrieving CUDA diagnostic information for host: solaria\n", "2025-11-04 22:52:01.409101: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:183] hostname: solaria\n", "2025-11-04 22:52:01.409176: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:190] libcuda reported version is: 580.95.5\n", "2025-11-04 22:52:01.409184: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:194] kernel reported version is: 570.195.3\n", "2025-11-04 22:52:01.409185: E external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:287] kernel version 570.195.3 does not match DSO version 580.95.5 -- cannot find working devices in this configuration\n" ] } ], "execution_count": 79 }, { "metadata": { "ExecuteTime": { "end_time": "2025-11-04T21:53:11.479872Z", "start_time": "2025-11-04T21:53:11.455625Z" } }, "cell_type": "code", "source": [ "Y_test_hat = model.predict_classes(X_test)\n", "acc_test = accuracy_score(y_test, Y_test_hat)\n", "print(\"Test set accuracy: {:.2f} - Goliath\".format(acc_test))" ], "id": "ef52bee9c93081d3", "outputs": [ { "ename": "AttributeError", "evalue": "'Sequential' object has no attribute 'predict_classes'", "output_type": "error", "traceback": [ "\u001B[31m---------------------------------------------------------------------------\u001B[39m", "\u001B[31mAttributeError\u001B[39m Traceback (most recent call last)", "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[83]\u001B[39m\u001B[32m, line 1\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m1\u001B[39m Y_test_hat = \u001B[43mmodel\u001B[49m\u001B[43m.\u001B[49m\u001B[43mpredict_classes\u001B[49m(X_test)\n\u001B[32m 2\u001B[39m acc_test = accuracy_score(y_test, Y_test_hat)\n\u001B[32m 3\u001B[39m \u001B[38;5;28mprint\u001B[39m(\u001B[33m\"\u001B[39m\u001B[33mTest set accuracy: \u001B[39m\u001B[38;5;132;01m{:.2f}\u001B[39;00m\u001B[33m - Goliath\u001B[39m\u001B[33m\"\u001B[39m.format(acc_test))\n", "\u001B[31mAttributeError\u001B[39m: 'Sequential' object has no attribute 'predict_classes'" ] } ], "execution_count": 83 } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.7" } }, "nbformat": 4, "nbformat_minor": 5 }