diff --git a/work-sc.ipynb b/work-sc.ipynb index 98aa8a9..95b123a 100644 --- a/work-sc.ipynb +++ b/work-sc.ipynb @@ -5,19 +5,19 @@ "id": "initial_id", "metadata": { "ExecuteTime": { - "end_time": "2025-11-04T14:28:00.043927Z", - "start_time": "2025-11-04T14:27:59.939813Z" + "end_time": "2025-11-04T21:43:32.795850Z", + "start_time": "2025-11-04T21:43:32.794457Z" } }, "source": "import numpy as np", "outputs": [], - "execution_count": 3 + "execution_count": 45 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-11-04T14:28:09.512985Z", - "start_time": "2025-11-04T14:28:09.508856Z" + "end_time": "2025-11-04T21:43:32.811210Z", + "start_time": "2025-11-04T21:43:32.809638Z" } }, "cell_type": "code", @@ -32,13 +32,13 @@ ], "id": "48cafaf4b64967bb", "outputs": [], - "execution_count": 4 + "execution_count": 46 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-11-04T14:28:39.907457Z", - "start_time": "2025-11-04T14:28:39.903244Z" + "end_time": "2025-11-04T21:43:32.862226Z", + "start_time": "2025-11-04T21:43:32.860368Z" } }, "cell_type": "code", @@ -62,82 +62,520 @@ ], "id": "d13137630b41b756", "outputs": [], - "execution_count": 6 + "execution_count": 47 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-11-04T14:29:00.821197Z", - "start_time": "2025-11-04T14:29:00.795742Z" + "end_time": "2025-11-04T21:46:21.643740Z", + "start_time": "2025-11-04T21:46:21.639693Z" } }, "cell_type": "code", - "source": "init_layers(nn_architecture)", + "source": [ + "params = init_layers(nn_architecture)\n", + "# params" + ], "id": "31f205147667dea6", + "outputs": [], + "execution_count": 64 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:43:32.957461Z", + "start_time": "2025-11-04T21:43:32.955675Z" + } + }, + "cell_type": "code", + "source": [ + "def sigmoid(Z):\n", + " return 1/(1+np.exp(-Z))\n", + "\n", + "def relu(Z):\n", + " return np.maximum(0,Z)\n", + "\n", + "def sigmoid_backward(dA, Z):\n", + " sig = sigmoid(Z)\n", + " return dA * sig * (1 - sig)\n", + "\n", + "def relu_backward(dA, Z):\n", + " dZ = np.array(dA, copy = True)\n", + " dZ[Z <= 0] = 0;\n", + " return dZ;" + ], + "id": "c1b960e7dcf09d91", + "outputs": [], + "execution_count": 49 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:44:22.909895Z", + "start_time": "2025-11-04T21:44:22.906363Z" + } + }, + "cell_type": "code", + "source": [ + "def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation=\"relu\"):\n", + " Z_curr = np.dot(W_curr, A_prev) + b_curr\n", + "\n", + " if activation == \"relu\":\n", + " activation_func = relu\n", + " elif activation == \"sigmoid\":\n", + " activation_func = sigmoid\n", + " else:\n", + " raise Exception('Non-supported activation function')\n", + "\n", + " return activation_func(Z_curr), Z_curr" + ], + "id": "efae2e184daf2fce", + "outputs": [], + "execution_count": 61 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:43:33.055558Z", + "start_time": "2025-11-04T21:43:33.053594Z" + } + }, + "cell_type": "code", + "source": [ + "def full_forward_propagation(X, params_values, nn_architecture):\n", + " memory = {}\n", + " A_curr = X\n", + "\n", + " for idx, layer in enumerate(nn_architecture):\n", + " layer_idx = idx + 1\n", + " A_prev = A_curr\n", + "\n", + " activ_function_curr = layer[\"activation\"]\n", + " W_curr = params_values[\"W\" + str(layer_idx)]\n", + " b_curr = params_values[\"b\" + str(layer_idx)]\n", + " A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr)\n", + "\n", + " memory[\"A\" + str(idx)] = A_prev\n", + " memory[\"Z\" + str(layer_idx)] = Z_curr\n", + "\n", + " return A_curr, memory" + ], + "id": "c3cd9e8f51dbe967", + "outputs": [], + "execution_count": 51 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:43:33.103372Z", + "start_time": "2025-11-04T21:43:33.101510Z" + } + }, + "cell_type": "code", + "source": [ + "def get_cost_value(Y_hat, Y):\n", + " m = Y_hat.shape[1]\n", + " cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))\n", + " return np.squeeze(cost)\n", + "\n", + "# an auxiliary function that converts probability into class\n", + "def convert_prob_into_class(probs):\n", + " probs_ = np.copy(probs)\n", + " probs_[probs_ > 0.5] = 1\n", + " probs_[probs_ <= 0.5] = 0\n", + " return probs_\n", + "\n", + "def get_accuracy_value(Y_hat, Y):\n", + " Y_hat_ = convert_prob_into_class(Y_hat)\n", + " return (Y_hat_ == Y).all(axis=0).mean()" + ], + "id": "121416e7bbab57bb", + "outputs": [], + "execution_count": 52 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:43:33.176375Z", + "start_time": "2025-11-04T21:43:33.169411Z" + } + }, + "cell_type": "code", + "source": [ + "def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation=\"relu\"):\n", + " m = A_prev.shape[1]\n", + "\n", + " if activation is \"relu\":\n", + " backward_activation_func = relu_backward\n", + " elif activation is \"sigmoid\":\n", + " backward_activation_func = sigmoid_backward\n", + " else:\n", + " raise Exception('Non-supported activation function')\n", + "\n", + " dZ_curr = backward_activation_func(dA_curr, Z_curr)\n", + " dW_curr = np.dot(dZ_curr, A_prev.T) / m\n", + " db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m\n", + " dA_prev = np.dot(W_curr.T, dZ_curr)\n", + "\n", + " return dA_prev, dW_curr, db_curr" + ], + "id": "92e4b87664f18a63", + "outputs": [], + "execution_count": 53 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:43:33.243823Z", + "start_time": "2025-11-04T21:43:33.234283Z" + } + }, + "cell_type": "code", + "source": [ + "def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):\n", + " grads_values = {}\n", + " m = Y.shape[1]\n", + " Y = Y.reshape(Y_hat.shape)\n", + "\n", + " dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));\n", + "\n", + " for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):\n", + " layer_idx_curr = layer_idx_prev + 1\n", + " activ_function_curr = layer[\"activation\"]\n", + "\n", + " dA_curr = dA_prev\n", + "\n", + " A_prev = memory[\"A\" + str(layer_idx_prev)]\n", + " Z_curr = memory[\"Z\" + str(layer_idx_curr)]\n", + " W_curr = params_values[\"W\" + str(layer_idx_curr)]\n", + " b_curr = params_values[\"b\" + str(layer_idx_curr)]\n", + "\n", + " dA_prev, dW_curr, db_curr = single_layer_backward_propagation(\n", + " dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)\n", + "\n", + " grads_values[\"dW\" + str(layer_idx_curr)] = dW_curr\n", + " grads_values[\"db\" + str(layer_idx_curr)] = db_curr\n", + "\n", + " return grads_values" + ], + "id": "2c8e4eed1846f003", + "outputs": [], + "execution_count": 54 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:47:33.615104Z", + "start_time": "2025-11-04T21:47:33.610483Z" + } + }, + "cell_type": "code", + "source": [ + "def update(params_values, grads_values, nn_architecture, learning_rate):\n", + " for layer_idx, layer in enumerate(nn_architecture):\n", + " layer_idx=layer_idx+1\n", + " params_values[\"W\" + str(layer_idx)] -= learning_rate * grads_values[\"dW\" + str(layer_idx)]\n", + " params_values[\"b\" + str(layer_idx)] -= learning_rate * grads_values[\"db\" + str(layer_idx)]\n", + "\n", + " return params_values;" + ], + "id": "16320b953a183511", + "outputs": [], + "execution_count": 66 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:47:37.239308Z", + "start_time": "2025-11-04T21:47:37.236527Z" + } + }, + "cell_type": "code", + "source": [ + "def train(X, Y, nn_architecture, epochs, learning_rate, verbose=False, callback=None):\n", + " # initiation of neural net parameters\n", + " params_values = init_layers(nn_architecture, 2)\n", + " # initiation of lists storing the history\n", + " # of metrics calculated during the learning process\n", + " cost_history = []\n", + " accuracy_history = []\n", + "\n", + " # performing calculations for subsequent iterations\n", + " for i in range(epochs):\n", + " # step forward\n", + " Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)\n", + "\n", + " # calculating metrics and saving them in history\n", + " cost = get_cost_value(Y_hat, Y)\n", + " cost_history.append(cost)\n", + " accuracy = get_accuracy_value(Y_hat, Y)\n", + " accuracy_history.append(accuracy)\n", + "\n", + " # step backward - calculating gradient\n", + " grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)\n", + " # updating model state\n", + " params_values = update(params_values, grads_values, nn_architecture, learning_rate)\n", + "\n", + " if(i % 50 == 0):\n", + " if(verbose):\n", + " print(\"Iteration: {:05} - cost: {:.5f} - accuracy: {:.5f}\".format(i, cost, accuracy))\n", + " if(callback is not None):\n", + " callback(i, params_values)\n", + "\n", + " return params_values" + ], + "id": "fce33f70bba3898", + "outputs": [], + "execution_count": 67 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:43:33.422252Z", + "start_time": "2025-11-04T21:43:33.417262Z" + } + }, + "cell_type": "code", + "source": [ + "import os\n", + "import tensorflow as tf\n", + "\n", + "from sklearn.datasets import make_moons\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib import cm\n", + "from mpl_toolkits.mplot3d import Axes3D\n", + "sns.set_style(\"whitegrid\")\n", + "\n", + "import keras\n", + "from keras.models import Sequential\n", + "from keras.layers import Dense\n", + "# from keras.utils import np_utils\n", + "from keras import regularizers\n", + "\n", + "from sklearn.metrics import accuracy_score" + ], + "id": "cccd73b5018799d4", + "outputs": [], + "execution_count": 57 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:43:33.472509Z", + "start_time": "2025-11-04T21:43:33.470657Z" + } + }, + "cell_type": "code", + "source": [ + "# number of samples in the data set\n", + "N_SAMPLES = 1000\n", + "# ratio between training and test sets\n", + "TEST_SIZE = 0.1" + ], + "id": "4f66ffa878f01c02", + "outputs": [], + "execution_count": 58 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:43:33.520603Z", + "start_time": "2025-11-04T21:43:33.518562Z" + } + }, + "cell_type": "code", + "source": [ + "X, y = make_moons(n_samples = N_SAMPLES, noise=0.2, random_state=100)\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)" + ], + "id": "bebe0ed00a2d514", + "outputs": [], + "execution_count": 59 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:51:26.021417Z", + "start_time": "2025-11-04T21:51:23.520284Z" + } + }, + "cell_type": "code", + "source": [ + "params_values = train(np.transpose(X_train), np.transpose(y_train.reshape((y_train.shape[0], 1))), nn_architecture, 20000, 0.01)\n", + "# params_values\n" + ], + "id": "ce04892d496c5147", + "outputs": [], + "execution_count": 77 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:51:27.733451Z", + "start_time": "2025-11-04T21:51:27.727264Z" + } + }, + "cell_type": "code", + "source": [ + "Y_test_hat, _ = full_forward_propagation(np.transpose(X_test), params_values, nn_architecture)\n", + "\n", + "acc_test = get_accuracy_value(Y_test_hat, np.transpose(y_test.reshape((y_test.shape[0], 1))))\n", + "print(\"Test set accuracy: {:.2f} - David\".format(acc_test))\n" + ], + "id": "26e7a2a8848714d9", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test set accuracy: 0.46 - David\n" + ] + } + ], + "execution_count": 78 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:43:33.666607121Z", + "start_time": "2025-11-04T20:21:26.059140Z" + } + }, + "cell_type": "code", + "source": [ + "startA = np.random.randn(nn_architecture[0][\"input_dim\"],1) * 0.1\n", + "full_forward_propagation(startA, params, nn_architecture)" + ], + "id": "8b672c5fd5832cc", "outputs": [ { "data": { "text/plain": [ - "{'W1': array([[-0.01423588, 0.20572217],\n", - " [ 0.02832619, 0.1329812 ],\n", - " [-0.01546219, -0.00690309],\n", - " [ 0.07551805, 0.08256466]]),\n", - " 'b1': array([[-0.01130692],\n", - " [-0.23678376],\n", - " [-0.01670494],\n", - " [ 0.0685398 ]]),\n", - " 'W2': array([[ 0.00235001, 0.04562013, 0.02704928, -0.14350081],\n", - " [ 0.08828171, -0.05800817, -0.05015653, 0.05909533],\n", - " [-0.07316163, 0.02617555, -0.08557956, -0.01875259],\n", - " [-0.03734863, -0.0461971 , -0.08164661, -0.00451233],\n", - " [ 0.01213278, 0.09259528, -0.05738197, 0.00527031],\n", - " [ 0.22073106, 0.03918219, 0.04827134, 0.0433334 ]]),\n", - " 'b2': array([[-0.17042917],\n", - " [-0.02439081],\n", - " [-0.21397038],\n", - " [ 0.08613227],\n", - " [ 0.17002844],\n", - " [-0.05287848]]),\n", - " 'W3': array([[ 0.17634779, -0.11216078, -0.11919342, 0.05527319, -0.08159809,\n", - " -0.04966468],\n", - " [ 0.10862256, -0.09746753, -0.02821358, -0.01172141, 0.03785473,\n", - " 0.07321946],\n", - " [-0.0103571 , -0.11987063, 0.10100356, 0.28753603, 0.08203126,\n", - " 0.05606115],\n", - " [-0.03756422, -0.02521043, -0.13896134, 0.06173323, -0.0135787 ,\n", - " 0.1287905 ],\n", - " [-0.10369944, 0.13643321, -0.03099566, -0.06111171, -0.04831058,\n", - " -0.06089837],\n", - " [-0.20883353, 0.0639322 , 0.0774304 , 0.12785694, 0.0705276 ,\n", - " 0.06559774]]),\n", - " 'b3': array([[-0.1678502 ],\n", - " [ 0.01831099],\n", - " [-0.11332241],\n", - " [-0.02790857],\n", - " [ 0.13966199],\n", - " [ 0.00322194]]),\n", - " 'W4': array([[-0.26136608, -0.10015776, -0.0567511 , -0.0225658 , 0.09380238,\n", - " 0.08367841],\n", - " [ 0.08121485, 0.0232307 , -0.02951077, -0.0361676 , 0.04321151,\n", - " 0.09339585],\n", - " [ 0.15526339, 0.00936234, 0.02948258, 0.14854308, -0.10868852,\n", - " 0.08211628],\n", - " [-0.07879492, 0.15938117, 0.14059044, 0.16447566, 0.15415987,\n", - " 0.08406076]]),\n", - " 'b4': array([[-0.10230944],\n", - " [ 0.04947723],\n", - " [ 0.08957326],\n", - " [ 0.0477352 ]]),\n", - " 'W5': array([[-0.01145305, 0.01568974, 0.03875967, -0.10262266]]),\n", - " 'b5': array([[0.06791429]])}" + "(array([[0.51608074]]),\n", + " {'A0': array([[-0.10166672],\n", + " [ 0.14706683]]),\n", + " 'Z1': array([[ 0.0203953 ],\n", + " [-0.22010647],\n", + " [-0.01614817],\n", + " [ 0.07300465]]),\n", + " 'A1': array([[0.0203953 ],\n", + " [0. ],\n", + " [0. ],\n", + " [0.07300465]]),\n", + " 'Z2': array([[-0.18085747],\n", + " [-0.01827604],\n", + " [-0.21683156],\n", + " [ 0.08504111],\n", + " [ 0.17066065],\n", + " [-0.04521306]]),\n", + " 'A2': array([[0. ],\n", + " [0. ],\n", + " [0. ],\n", + " [0.08504111],\n", + " [0.17066065],\n", + " [0. ]]),\n", + " 'Z3': array([[-0.17707529],\n", + " [ 0.0237745 ],\n", + " [-0.07487052],\n", + " [-0.02497606],\n", + " [ 0.12622027],\n", + " [ 0.02613133]]),\n", + " 'A3': array([[0. ],\n", + " [0.0237745 ],\n", + " [0. ],\n", + " [0. ],\n", + " [0.12622027],\n", + " [0.02613133]]),\n", + " 'Z4': array([[-0.09066425],\n", + " [ 0.05792425],\n", + " [ 0.07822296],\n", + " [ 0.07317913]]),\n", + " 'A4': array([[0. ],\n", + " [0.05792425],\n", + " [0.07822296],\n", + " [0.07317913]]),\n", + " 'Z5': array([[0.06434517]])})" ] }, - "execution_count": 7, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], - "execution_count": 7 + "execution_count": 24 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:52:07.296371Z", + "start_time": "2025-11-04T21:52:01.384867Z" + } + }, + "cell_type": "code", + "source": [ + "model = Sequential()\n", + "model.add(Dense(25, input_dim=2,activation='relu'))\n", + "model.add(Dense(50, activation='relu'))\n", + "model.add(Dense(50, activation='relu'))\n", + "model.add(Dense(25, activation='relu'))\n", + "model.add(Dense(1, activation='sigmoid'))\n", + "\n", + "model.compile(loss='binary_crossentropy', optimizer=\"sgd\", metrics=['accuracy'])\n", + "\n", + "# Training\n", + "history = model.fit(X_train, y_train, epochs=200, verbose=0)" + ], + "id": "f05ff40ed26e45c2", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/oskar/projects/nn-from-scratch/.venv/lib/python3.13/site-packages/keras/src/layers/core/dense.py:95: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", + " super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n", + "2025-11-04 22:52:01.409083: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: forward compatibility was attempted on non supported HW\n", + "2025-11-04 22:52:01.409097: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:171] verbose logging is disabled. Rerun with verbose logging (usually --v=1 or --vmodule=cuda_diagnostics=1) to get more diagnostic output from this module\n", + "2025-11-04 22:52:01.409099: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:176] retrieving CUDA diagnostic information for host: solaria\n", + "2025-11-04 22:52:01.409101: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:183] hostname: solaria\n", + "2025-11-04 22:52:01.409176: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:190] libcuda reported version is: 580.95.5\n", + "2025-11-04 22:52:01.409184: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:194] kernel reported version is: 570.195.3\n", + "2025-11-04 22:52:01.409185: E external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:287] kernel version 570.195.3 does not match DSO version 580.95.5 -- cannot find working devices in this configuration\n" + ] + } + ], + "execution_count": 79 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-04T21:53:11.479872Z", + "start_time": "2025-11-04T21:53:11.455625Z" + } + }, + "cell_type": "code", + "source": [ + "Y_test_hat = model.predict_classes(X_test)\n", + "acc_test = accuracy_score(y_test, Y_test_hat)\n", + "print(\"Test set accuracy: {:.2f} - Goliath\".format(acc_test))" + ], + "id": "ef52bee9c93081d3", + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'Sequential' object has no attribute 'predict_classes'", + "output_type": "error", + "traceback": [ + "\u001B[31m---------------------------------------------------------------------------\u001B[39m", + "\u001B[31mAttributeError\u001B[39m Traceback (most recent call last)", + "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[83]\u001B[39m\u001B[32m, line 1\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m1\u001B[39m Y_test_hat = \u001B[43mmodel\u001B[49m\u001B[43m.\u001B[49m\u001B[43mpredict_classes\u001B[49m(X_test)\n\u001B[32m 2\u001B[39m acc_test = accuracy_score(y_test, Y_test_hat)\n\u001B[32m 3\u001B[39m \u001B[38;5;28mprint\u001B[39m(\u001B[33m\"\u001B[39m\u001B[33mTest set accuracy: \u001B[39m\u001B[38;5;132;01m{:.2f}\u001B[39;00m\u001B[33m - Goliath\u001B[39m\u001B[33m\"\u001B[39m.format(acc_test))\n", + "\u001B[31mAttributeError\u001B[39m: 'Sequential' object has no attribute 'predict_classes'" + ] + } + ], + "execution_count": 83 } ], "metadata": {