my version

This commit is contained in:
oskar 2025-11-04 22:56:05 +01:00
parent 698daaf98a
commit 86a62efd36

View file

@ -5,19 +5,19 @@
"id": "initial_id", "id": "initial_id",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-11-04T14:28:00.043927Z", "end_time": "2025-11-04T21:43:32.795850Z",
"start_time": "2025-11-04T14:27:59.939813Z" "start_time": "2025-11-04T21:43:32.794457Z"
} }
}, },
"source": "import numpy as np", "source": "import numpy as np",
"outputs": [], "outputs": [],
"execution_count": 3 "execution_count": 45
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-11-04T14:28:09.512985Z", "end_time": "2025-11-04T21:43:32.811210Z",
"start_time": "2025-11-04T14:28:09.508856Z" "start_time": "2025-11-04T21:43:32.809638Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@ -32,13 +32,13 @@
], ],
"id": "48cafaf4b64967bb", "id": "48cafaf4b64967bb",
"outputs": [], "outputs": [],
"execution_count": 4 "execution_count": 46
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-11-04T14:28:39.907457Z", "end_time": "2025-11-04T21:43:32.862226Z",
"start_time": "2025-11-04T14:28:39.903244Z" "start_time": "2025-11-04T21:43:32.860368Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@ -62,82 +62,520 @@
], ],
"id": "d13137630b41b756", "id": "d13137630b41b756",
"outputs": [], "outputs": [],
"execution_count": 6 "execution_count": 47
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-11-04T14:29:00.821197Z", "end_time": "2025-11-04T21:46:21.643740Z",
"start_time": "2025-11-04T14:29:00.795742Z" "start_time": "2025-11-04T21:46:21.639693Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
"source": "init_layers(nn_architecture)", "source": [
"params = init_layers(nn_architecture)\n",
"# params"
],
"id": "31f205147667dea6", "id": "31f205147667dea6",
"outputs": [],
"execution_count": 64
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:43:32.957461Z",
"start_time": "2025-11-04T21:43:32.955675Z"
}
},
"cell_type": "code",
"source": [
"def sigmoid(Z):\n",
" return 1/(1+np.exp(-Z))\n",
"\n",
"def relu(Z):\n",
" return np.maximum(0,Z)\n",
"\n",
"def sigmoid_backward(dA, Z):\n",
" sig = sigmoid(Z)\n",
" return dA * sig * (1 - sig)\n",
"\n",
"def relu_backward(dA, Z):\n",
" dZ = np.array(dA, copy = True)\n",
" dZ[Z <= 0] = 0;\n",
" return dZ;"
],
"id": "c1b960e7dcf09d91",
"outputs": [],
"execution_count": 49
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:44:22.909895Z",
"start_time": "2025-11-04T21:44:22.906363Z"
}
},
"cell_type": "code",
"source": [
"def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation=\"relu\"):\n",
" Z_curr = np.dot(W_curr, A_prev) + b_curr\n",
"\n",
" if activation == \"relu\":\n",
" activation_func = relu\n",
" elif activation == \"sigmoid\":\n",
" activation_func = sigmoid\n",
" else:\n",
" raise Exception('Non-supported activation function')\n",
"\n",
" return activation_func(Z_curr), Z_curr"
],
"id": "efae2e184daf2fce",
"outputs": [],
"execution_count": 61
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:43:33.055558Z",
"start_time": "2025-11-04T21:43:33.053594Z"
}
},
"cell_type": "code",
"source": [
"def full_forward_propagation(X, params_values, nn_architecture):\n",
" memory = {}\n",
" A_curr = X\n",
"\n",
" for idx, layer in enumerate(nn_architecture):\n",
" layer_idx = idx + 1\n",
" A_prev = A_curr\n",
"\n",
" activ_function_curr = layer[\"activation\"]\n",
" W_curr = params_values[\"W\" + str(layer_idx)]\n",
" b_curr = params_values[\"b\" + str(layer_idx)]\n",
" A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr)\n",
"\n",
" memory[\"A\" + str(idx)] = A_prev\n",
" memory[\"Z\" + str(layer_idx)] = Z_curr\n",
"\n",
" return A_curr, memory"
],
"id": "c3cd9e8f51dbe967",
"outputs": [],
"execution_count": 51
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:43:33.103372Z",
"start_time": "2025-11-04T21:43:33.101510Z"
}
},
"cell_type": "code",
"source": [
"def get_cost_value(Y_hat, Y):\n",
" m = Y_hat.shape[1]\n",
" cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))\n",
" return np.squeeze(cost)\n",
"\n",
"# an auxiliary function that converts probability into class\n",
"def convert_prob_into_class(probs):\n",
" probs_ = np.copy(probs)\n",
" probs_[probs_ > 0.5] = 1\n",
" probs_[probs_ <= 0.5] = 0\n",
" return probs_\n",
"\n",
"def get_accuracy_value(Y_hat, Y):\n",
" Y_hat_ = convert_prob_into_class(Y_hat)\n",
" return (Y_hat_ == Y).all(axis=0).mean()"
],
"id": "121416e7bbab57bb",
"outputs": [],
"execution_count": 52
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:43:33.176375Z",
"start_time": "2025-11-04T21:43:33.169411Z"
}
},
"cell_type": "code",
"source": [
"def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation=\"relu\"):\n",
" m = A_prev.shape[1]\n",
"\n",
" if activation is \"relu\":\n",
" backward_activation_func = relu_backward\n",
" elif activation is \"sigmoid\":\n",
" backward_activation_func = sigmoid_backward\n",
" else:\n",
" raise Exception('Non-supported activation function')\n",
"\n",
" dZ_curr = backward_activation_func(dA_curr, Z_curr)\n",
" dW_curr = np.dot(dZ_curr, A_prev.T) / m\n",
" db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m\n",
" dA_prev = np.dot(W_curr.T, dZ_curr)\n",
"\n",
" return dA_prev, dW_curr, db_curr"
],
"id": "92e4b87664f18a63",
"outputs": [],
"execution_count": 53
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:43:33.243823Z",
"start_time": "2025-11-04T21:43:33.234283Z"
}
},
"cell_type": "code",
"source": [
"def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):\n",
" grads_values = {}\n",
" m = Y.shape[1]\n",
" Y = Y.reshape(Y_hat.shape)\n",
"\n",
" dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));\n",
"\n",
" for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):\n",
" layer_idx_curr = layer_idx_prev + 1\n",
" activ_function_curr = layer[\"activation\"]\n",
"\n",
" dA_curr = dA_prev\n",
"\n",
" A_prev = memory[\"A\" + str(layer_idx_prev)]\n",
" Z_curr = memory[\"Z\" + str(layer_idx_curr)]\n",
" W_curr = params_values[\"W\" + str(layer_idx_curr)]\n",
" b_curr = params_values[\"b\" + str(layer_idx_curr)]\n",
"\n",
" dA_prev, dW_curr, db_curr = single_layer_backward_propagation(\n",
" dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)\n",
"\n",
" grads_values[\"dW\" + str(layer_idx_curr)] = dW_curr\n",
" grads_values[\"db\" + str(layer_idx_curr)] = db_curr\n",
"\n",
" return grads_values"
],
"id": "2c8e4eed1846f003",
"outputs": [],
"execution_count": 54
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:47:33.615104Z",
"start_time": "2025-11-04T21:47:33.610483Z"
}
},
"cell_type": "code",
"source": [
"def update(params_values, grads_values, nn_architecture, learning_rate):\n",
" for layer_idx, layer in enumerate(nn_architecture):\n",
" layer_idx=layer_idx+1\n",
" params_values[\"W\" + str(layer_idx)] -= learning_rate * grads_values[\"dW\" + str(layer_idx)]\n",
" params_values[\"b\" + str(layer_idx)] -= learning_rate * grads_values[\"db\" + str(layer_idx)]\n",
"\n",
" return params_values;"
],
"id": "16320b953a183511",
"outputs": [],
"execution_count": 66
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:47:37.239308Z",
"start_time": "2025-11-04T21:47:37.236527Z"
}
},
"cell_type": "code",
"source": [
"def train(X, Y, nn_architecture, epochs, learning_rate, verbose=False, callback=None):\n",
" # initiation of neural net parameters\n",
" params_values = init_layers(nn_architecture, 2)\n",
" # initiation of lists storing the history\n",
" # of metrics calculated during the learning process\n",
" cost_history = []\n",
" accuracy_history = []\n",
"\n",
" # performing calculations for subsequent iterations\n",
" for i in range(epochs):\n",
" # step forward\n",
" Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)\n",
"\n",
" # calculating metrics and saving them in history\n",
" cost = get_cost_value(Y_hat, Y)\n",
" cost_history.append(cost)\n",
" accuracy = get_accuracy_value(Y_hat, Y)\n",
" accuracy_history.append(accuracy)\n",
"\n",
" # step backward - calculating gradient\n",
" grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)\n",
" # updating model state\n",
" params_values = update(params_values, grads_values, nn_architecture, learning_rate)\n",
"\n",
" if(i % 50 == 0):\n",
" if(verbose):\n",
" print(\"Iteration: {:05} - cost: {:.5f} - accuracy: {:.5f}\".format(i, cost, accuracy))\n",
" if(callback is not None):\n",
" callback(i, params_values)\n",
"\n",
" return params_values"
],
"id": "fce33f70bba3898",
"outputs": [],
"execution_count": 67
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:43:33.422252Z",
"start_time": "2025-11-04T21:43:33.417262Z"
}
},
"cell_type": "code",
"source": [
"import os\n",
"import tensorflow as tf\n",
"\n",
"from sklearn.datasets import make_moons\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib import cm\n",
"from mpl_toolkits.mplot3d import Axes3D\n",
"sns.set_style(\"whitegrid\")\n",
"\n",
"import keras\n",
"from keras.models import Sequential\n",
"from keras.layers import Dense\n",
"# from keras.utils import np_utils\n",
"from keras import regularizers\n",
"\n",
"from sklearn.metrics import accuracy_score"
],
"id": "cccd73b5018799d4",
"outputs": [],
"execution_count": 57
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:43:33.472509Z",
"start_time": "2025-11-04T21:43:33.470657Z"
}
},
"cell_type": "code",
"source": [
"# number of samples in the data set\n",
"N_SAMPLES = 1000\n",
"# ratio between training and test sets\n",
"TEST_SIZE = 0.1"
],
"id": "4f66ffa878f01c02",
"outputs": [],
"execution_count": 58
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:43:33.520603Z",
"start_time": "2025-11-04T21:43:33.518562Z"
}
},
"cell_type": "code",
"source": [
"X, y = make_moons(n_samples = N_SAMPLES, noise=0.2, random_state=100)\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)"
],
"id": "bebe0ed00a2d514",
"outputs": [],
"execution_count": 59
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:51:26.021417Z",
"start_time": "2025-11-04T21:51:23.520284Z"
}
},
"cell_type": "code",
"source": [
"params_values = train(np.transpose(X_train), np.transpose(y_train.reshape((y_train.shape[0], 1))), nn_architecture, 20000, 0.01)\n",
"# params_values\n"
],
"id": "ce04892d496c5147",
"outputs": [],
"execution_count": 77
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:51:27.733451Z",
"start_time": "2025-11-04T21:51:27.727264Z"
}
},
"cell_type": "code",
"source": [
"Y_test_hat, _ = full_forward_propagation(np.transpose(X_test), params_values, nn_architecture)\n",
"\n",
"acc_test = get_accuracy_value(Y_test_hat, np.transpose(y_test.reshape((y_test.shape[0], 1))))\n",
"print(\"Test set accuracy: {:.2f} - David\".format(acc_test))\n"
],
"id": "26e7a2a8848714d9",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test set accuracy: 0.46 - David\n"
]
}
],
"execution_count": 78
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:43:33.666607121Z",
"start_time": "2025-11-04T20:21:26.059140Z"
}
},
"cell_type": "code",
"source": [
"startA = np.random.randn(nn_architecture[0][\"input_dim\"],1) * 0.1\n",
"full_forward_propagation(startA, params, nn_architecture)"
],
"id": "8b672c5fd5832cc",
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"{'W1': array([[-0.01423588, 0.20572217],\n", "(array([[0.51608074]]),\n",
" [ 0.02832619, 0.1329812 ],\n", " {'A0': array([[-0.10166672],\n",
" [-0.01546219, -0.00690309],\n", " [ 0.14706683]]),\n",
" [ 0.07551805, 0.08256466]]),\n", " 'Z1': array([[ 0.0203953 ],\n",
" 'b1': array([[-0.01130692],\n", " [-0.22010647],\n",
" [-0.23678376],\n", " [-0.01614817],\n",
" [-0.01670494],\n", " [ 0.07300465]]),\n",
" [ 0.0685398 ]]),\n", " 'A1': array([[0.0203953 ],\n",
" 'W2': array([[ 0.00235001, 0.04562013, 0.02704928, -0.14350081],\n", " [0. ],\n",
" [ 0.08828171, -0.05800817, -0.05015653, 0.05909533],\n", " [0. ],\n",
" [-0.07316163, 0.02617555, -0.08557956, -0.01875259],\n", " [0.07300465]]),\n",
" [-0.03734863, -0.0461971 , -0.08164661, -0.00451233],\n", " 'Z2': array([[-0.18085747],\n",
" [ 0.01213278, 0.09259528, -0.05738197, 0.00527031],\n", " [-0.01827604],\n",
" [ 0.22073106, 0.03918219, 0.04827134, 0.0433334 ]]),\n", " [-0.21683156],\n",
" 'b2': array([[-0.17042917],\n", " [ 0.08504111],\n",
" [-0.02439081],\n", " [ 0.17066065],\n",
" [-0.21397038],\n", " [-0.04521306]]),\n",
" [ 0.08613227],\n", " 'A2': array([[0. ],\n",
" [ 0.17002844],\n", " [0. ],\n",
" [-0.05287848]]),\n", " [0. ],\n",
" 'W3': array([[ 0.17634779, -0.11216078, -0.11919342, 0.05527319, -0.08159809,\n", " [0.08504111],\n",
" -0.04966468],\n", " [0.17066065],\n",
" [ 0.10862256, -0.09746753, -0.02821358, -0.01172141, 0.03785473,\n", " [0. ]]),\n",
" 0.07321946],\n", " 'Z3': array([[-0.17707529],\n",
" [-0.0103571 , -0.11987063, 0.10100356, 0.28753603, 0.08203126,\n", " [ 0.0237745 ],\n",
" 0.05606115],\n", " [-0.07487052],\n",
" [-0.03756422, -0.02521043, -0.13896134, 0.06173323, -0.0135787 ,\n", " [-0.02497606],\n",
" 0.1287905 ],\n", " [ 0.12622027],\n",
" [-0.10369944, 0.13643321, -0.03099566, -0.06111171, -0.04831058,\n", " [ 0.02613133]]),\n",
" -0.06089837],\n", " 'A3': array([[0. ],\n",
" [-0.20883353, 0.0639322 , 0.0774304 , 0.12785694, 0.0705276 ,\n", " [0.0237745 ],\n",
" 0.06559774]]),\n", " [0. ],\n",
" 'b3': array([[-0.1678502 ],\n", " [0. ],\n",
" [ 0.01831099],\n", " [0.12622027],\n",
" [-0.11332241],\n", " [0.02613133]]),\n",
" [-0.02790857],\n", " 'Z4': array([[-0.09066425],\n",
" [ 0.13966199],\n", " [ 0.05792425],\n",
" [ 0.00322194]]),\n", " [ 0.07822296],\n",
" 'W4': array([[-0.26136608, -0.10015776, -0.0567511 , -0.0225658 , 0.09380238,\n", " [ 0.07317913]]),\n",
" 0.08367841],\n", " 'A4': array([[0. ],\n",
" [ 0.08121485, 0.0232307 , -0.02951077, -0.0361676 , 0.04321151,\n", " [0.05792425],\n",
" 0.09339585],\n", " [0.07822296],\n",
" [ 0.15526339, 0.00936234, 0.02948258, 0.14854308, -0.10868852,\n", " [0.07317913]]),\n",
" 0.08211628],\n", " 'Z5': array([[0.06434517]])})"
" [-0.07879492, 0.15938117, 0.14059044, 0.16447566, 0.15415987,\n",
" 0.08406076]]),\n",
" 'b4': array([[-0.10230944],\n",
" [ 0.04947723],\n",
" [ 0.08957326],\n",
" [ 0.0477352 ]]),\n",
" 'W5': array([[-0.01145305, 0.01568974, 0.03875967, -0.10262266]]),\n",
" 'b5': array([[0.06791429]])}"
] ]
}, },
"execution_count": 7, "execution_count": 24,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"execution_count": 7 "execution_count": 24
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:52:07.296371Z",
"start_time": "2025-11-04T21:52:01.384867Z"
}
},
"cell_type": "code",
"source": [
"model = Sequential()\n",
"model.add(Dense(25, input_dim=2,activation='relu'))\n",
"model.add(Dense(50, activation='relu'))\n",
"model.add(Dense(50, activation='relu'))\n",
"model.add(Dense(25, activation='relu'))\n",
"model.add(Dense(1, activation='sigmoid'))\n",
"\n",
"model.compile(loss='binary_crossentropy', optimizer=\"sgd\", metrics=['accuracy'])\n",
"\n",
"# Training\n",
"history = model.fit(X_train, y_train, epochs=200, verbose=0)"
],
"id": "f05ff40ed26e45c2",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/oskar/projects/nn-from-scratch/.venv/lib/python3.13/site-packages/keras/src/layers/core/dense.py:95: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
" super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n",
"2025-11-04 22:52:01.409083: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: forward compatibility was attempted on non supported HW\n",
"2025-11-04 22:52:01.409097: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:171] verbose logging is disabled. Rerun with verbose logging (usually --v=1 or --vmodule=cuda_diagnostics=1) to get more diagnostic output from this module\n",
"2025-11-04 22:52:01.409099: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:176] retrieving CUDA diagnostic information for host: solaria\n",
"2025-11-04 22:52:01.409101: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:183] hostname: solaria\n",
"2025-11-04 22:52:01.409176: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:190] libcuda reported version is: 580.95.5\n",
"2025-11-04 22:52:01.409184: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:194] kernel reported version is: 570.195.3\n",
"2025-11-04 22:52:01.409185: E external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:287] kernel version 570.195.3 does not match DSO version 580.95.5 -- cannot find working devices in this configuration\n"
]
}
],
"execution_count": 79
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-04T21:53:11.479872Z",
"start_time": "2025-11-04T21:53:11.455625Z"
}
},
"cell_type": "code",
"source": [
"Y_test_hat = model.predict_classes(X_test)\n",
"acc_test = accuracy_score(y_test, Y_test_hat)\n",
"print(\"Test set accuracy: {:.2f} - Goliath\".format(acc_test))"
],
"id": "ef52bee9c93081d3",
"outputs": [
{
"ename": "AttributeError",
"evalue": "'Sequential' object has no attribute 'predict_classes'",
"output_type": "error",
"traceback": [
"\u001B[31m---------------------------------------------------------------------------\u001B[39m",
"\u001B[31mAttributeError\u001B[39m Traceback (most recent call last)",
"\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[83]\u001B[39m\u001B[32m, line 1\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m1\u001B[39m Y_test_hat = \u001B[43mmodel\u001B[49m\u001B[43m.\u001B[49m\u001B[43mpredict_classes\u001B[49m(X_test)\n\u001B[32m 2\u001B[39m acc_test = accuracy_score(y_test, Y_test_hat)\n\u001B[32m 3\u001B[39m \u001B[38;5;28mprint\u001B[39m(\u001B[33m\"\u001B[39m\u001B[33mTest set accuracy: \u001B[39m\u001B[38;5;132;01m{:.2f}\u001B[39;00m\u001B[33m - Goliath\u001B[39m\u001B[33m\"\u001B[39m.format(acc_test))\n",
"\u001B[31mAttributeError\u001B[39m: 'Sequential' object has no attribute 'predict_classes'"
]
}
],
"execution_count": 83
} }
], ],
"metadata": { "metadata": {