{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:43:32.795850Z",
     "start_time": "2025-11-04T21:43:32.794457Z"
    }
   },
   "source": "import numpy as np",
   "outputs": [],
   "execution_count": 45
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:43:32.811210Z",
     "start_time": "2025-11-04T21:43:32.809638Z"
    }
   },
   "cell_type": "code",
   "source": [
    "nn_architecture = [\n",
    "    {\"input_dim\": 2, \"output_dim\": 4, \"activation\": \"relu\"},\n",
    "    {\"input_dim\": 4, \"output_dim\": 6, \"activation\": \"relu\"},\n",
    "    {\"input_dim\": 6, \"output_dim\": 6, \"activation\": \"relu\"},\n",
    "    {\"input_dim\": 6, \"output_dim\": 4, \"activation\": \"relu\"},\n",
    "    {\"input_dim\": 4, \"output_dim\": 1, \"activation\": \"sigmoid\"},\n",
    "]"
   ],
   "id": "48cafaf4b64967bb",
   "outputs": [],
   "execution_count": 46
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:43:32.862226Z",
     "start_time": "2025-11-04T21:43:32.860368Z"
    }
   },
   "cell_type": "code",
   "source": [
    "def init_layers(nn_architecture, seed = 99):\n",
    "    np.random.seed(seed)\n",
    "    number_of_layers = len(nn_architecture)\n",
    "    params_values = {}\n",
    "\n",
    "    for idx, layer in enumerate(nn_architecture):\n",
    "        layer_idx = idx + 1\n",
    "        layer_input_size = layer[\"input_dim\"]\n",
    "        layer_output_size = layer[\"output_dim\"]\n",
    "\n",
    "        params_values['W' + str(layer_idx)] = np.random.randn(\n",
    "            layer_output_size, layer_input_size) * 0.1\n",
    "        params_values['b' + str(layer_idx)] = np.random.randn(\n",
    "            layer_output_size, 1) * 0.1\n",
    "\n",
    "    return params_values\n"
   ],
   "id": "d13137630b41b756",
   "outputs": [],
   "execution_count": 47
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:46:21.643740Z",
     "start_time": "2025-11-04T21:46:21.639693Z"
    }
   },
   "cell_type": "code",
   "source": [
    "params = init_layers(nn_architecture)\n",
    "# params"
   ],
   "id": "31f205147667dea6",
   "outputs": [],
   "execution_count": 64
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:43:32.957461Z",
     "start_time": "2025-11-04T21:43:32.955675Z"
    }
   },
   "cell_type": "code",
   "source": [
    "def sigmoid(Z):\n",
    "    return 1/(1+np.exp(-Z))\n",
    "\n",
    "def relu(Z):\n",
    "    return np.maximum(0,Z)\n",
    "\n",
    "def sigmoid_backward(dA, Z):\n",
    "    sig = sigmoid(Z)\n",
    "    return dA * sig * (1 - sig)\n",
    "\n",
    "def relu_backward(dA, Z):\n",
    "    dZ = np.array(dA, copy = True)\n",
    "    dZ[Z <= 0] = 0;\n",
    "    return dZ;"
   ],
   "id": "c1b960e7dcf09d91",
   "outputs": [],
   "execution_count": 49
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:44:22.909895Z",
     "start_time": "2025-11-04T21:44:22.906363Z"
    }
   },
   "cell_type": "code",
   "source": [
    "def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation=\"relu\"):\n",
    "    Z_curr = np.dot(W_curr, A_prev) + b_curr\n",
    "\n",
    "    if activation == \"relu\":\n",
    "        activation_func = relu\n",
    "    elif activation == \"sigmoid\":\n",
    "        activation_func = sigmoid\n",
    "    else:\n",
    "        raise Exception('Non-supported activation function')\n",
    "\n",
    "    return activation_func(Z_curr), Z_curr"
   ],
   "id": "efae2e184daf2fce",
   "outputs": [],
   "execution_count": 61
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:43:33.055558Z",
     "start_time": "2025-11-04T21:43:33.053594Z"
    }
   },
   "cell_type": "code",
   "source": [
    "def full_forward_propagation(X, params_values, nn_architecture):\n",
    "    memory = {}\n",
    "    A_curr = X\n",
    "\n",
    "    for idx, layer in enumerate(nn_architecture):\n",
    "        layer_idx = idx + 1\n",
    "        A_prev = A_curr\n",
    "\n",
    "        activ_function_curr = layer[\"activation\"]\n",
    "        W_curr = params_values[\"W\" + str(layer_idx)]\n",
    "        b_curr = params_values[\"b\" + str(layer_idx)]\n",
    "        A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr)\n",
    "\n",
    "        memory[\"A\" + str(idx)] = A_prev\n",
    "        memory[\"Z\" + str(layer_idx)] = Z_curr\n",
    "\n",
    "    return A_curr, memory"
   ],
   "id": "c3cd9e8f51dbe967",
   "outputs": [],
   "execution_count": 51
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:43:33.103372Z",
     "start_time": "2025-11-04T21:43:33.101510Z"
    }
   },
   "cell_type": "code",
   "source": [
    "def get_cost_value(Y_hat, Y):\n",
    "    m = Y_hat.shape[1]\n",
    "    cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))\n",
    "    return np.squeeze(cost)\n",
    "\n",
    "# an auxiliary function that converts probability into class\n",
    "def convert_prob_into_class(probs):\n",
    "    probs_ = np.copy(probs)\n",
    "    probs_[probs_ > 0.5] = 1\n",
    "    probs_[probs_ <= 0.5] = 0\n",
    "    return probs_\n",
    "\n",
    "def get_accuracy_value(Y_hat, Y):\n",
    "    Y_hat_ = convert_prob_into_class(Y_hat)\n",
    "    return (Y_hat_ == Y).all(axis=0).mean()"
   ],
   "id": "121416e7bbab57bb",
   "outputs": [],
   "execution_count": 52
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:43:33.176375Z",
     "start_time": "2025-11-04T21:43:33.169411Z"
    }
   },
   "cell_type": "code",
   "source": [
    "def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation=\"relu\"):\n",
    "    m = A_prev.shape[1]\n",
    "\n",
    "    if activation is \"relu\":\n",
    "        backward_activation_func = relu_backward\n",
    "    elif activation is \"sigmoid\":\n",
    "        backward_activation_func = sigmoid_backward\n",
    "    else:\n",
    "        raise Exception('Non-supported activation function')\n",
    "\n",
    "    dZ_curr = backward_activation_func(dA_curr, Z_curr)\n",
    "    dW_curr = np.dot(dZ_curr, A_prev.T) / m\n",
    "    db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m\n",
    "    dA_prev = np.dot(W_curr.T, dZ_curr)\n",
    "\n",
    "    return dA_prev, dW_curr, db_curr"
   ],
   "id": "92e4b87664f18a63",
   "outputs": [],
   "execution_count": 53
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:43:33.243823Z",
     "start_time": "2025-11-04T21:43:33.234283Z"
    }
   },
   "cell_type": "code",
   "source": [
    "def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):\n",
    "    grads_values = {}\n",
    "    m = Y.shape[1]\n",
    "    Y = Y.reshape(Y_hat.shape)\n",
    "\n",
    "    dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));\n",
    "\n",
    "    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):\n",
    "        layer_idx_curr = layer_idx_prev + 1\n",
    "        activ_function_curr = layer[\"activation\"]\n",
    "\n",
    "        dA_curr = dA_prev\n",
    "\n",
    "        A_prev = memory[\"A\" + str(layer_idx_prev)]\n",
    "        Z_curr = memory[\"Z\" + str(layer_idx_curr)]\n",
    "        W_curr = params_values[\"W\" + str(layer_idx_curr)]\n",
    "        b_curr = params_values[\"b\" + str(layer_idx_curr)]\n",
    "\n",
    "        dA_prev, dW_curr, db_curr = single_layer_backward_propagation(\n",
    "            dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)\n",
    "\n",
    "        grads_values[\"dW\" + str(layer_idx_curr)] = dW_curr\n",
    "        grads_values[\"db\" + str(layer_idx_curr)] = db_curr\n",
    "\n",
    "    return grads_values"
   ],
   "id": "2c8e4eed1846f003",
   "outputs": [],
   "execution_count": 54
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:47:33.615104Z",
     "start_time": "2025-11-04T21:47:33.610483Z"
    }
   },
   "cell_type": "code",
   "source": [
    "def update(params_values, grads_values, nn_architecture, learning_rate):\n",
    "    for layer_idx, layer in enumerate(nn_architecture):\n",
    "        layer_idx=layer_idx+1\n",
    "        params_values[\"W\" + str(layer_idx)] -= learning_rate * grads_values[\"dW\" + str(layer_idx)]\n",
    "        params_values[\"b\" + str(layer_idx)] -= learning_rate * grads_values[\"db\" + str(layer_idx)]\n",
    "\n",
    "    return params_values;"
   ],
   "id": "16320b953a183511",
   "outputs": [],
   "execution_count": 66
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:47:37.239308Z",
     "start_time": "2025-11-04T21:47:37.236527Z"
    }
   },
   "cell_type": "code",
   "source": [
    "def train(X, Y, nn_architecture, epochs, learning_rate, verbose=False, callback=None):\n",
    "    # initiation of neural net parameters\n",
    "    params_values = init_layers(nn_architecture, 2)\n",
    "    # initiation of lists storing the history\n",
    "    # of metrics calculated during the learning process\n",
    "    cost_history = []\n",
    "    accuracy_history = []\n",
    "\n",
    "    # performing calculations for subsequent iterations\n",
    "    for i in range(epochs):\n",
    "        # step forward\n",
    "        Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)\n",
    "\n",
    "        # calculating metrics and saving them in history\n",
    "        cost = get_cost_value(Y_hat, Y)\n",
    "        cost_history.append(cost)\n",
    "        accuracy = get_accuracy_value(Y_hat, Y)\n",
    "        accuracy_history.append(accuracy)\n",
    "\n",
    "        # step backward - calculating gradient\n",
    "        grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)\n",
    "        # updating model state\n",
    "        params_values = update(params_values, grads_values, nn_architecture, learning_rate)\n",
    "\n",
    "        if(i % 50 == 0):\n",
    "            if(verbose):\n",
    "                print(\"Iteration: {:05} - cost: {:.5f} - accuracy: {:.5f}\".format(i, cost, accuracy))\n",
    "            if(callback is not None):\n",
    "                callback(i, params_values)\n",
    "\n",
    "    return params_values"
   ],
   "id": "fce33f70bba3898",
   "outputs": [],
   "execution_count": 67
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:43:33.422252Z",
     "start_time": "2025-11-04T21:43:33.417262Z"
    }
   },
   "cell_type": "code",
   "source": [
    "import os\n",
    "import tensorflow as tf\n",
    "\n",
    "from sklearn.datasets import make_moons\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib import cm\n",
    "from mpl_toolkits.mplot3d import Axes3D\n",
    "sns.set_style(\"whitegrid\")\n",
    "\n",
    "import keras\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense\n",
    "# from keras.utils import np_utils\n",
    "from keras import regularizers\n",
    "\n",
    "from sklearn.metrics import accuracy_score"
   ],
   "id": "cccd73b5018799d4",
   "outputs": [],
   "execution_count": 57
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:43:33.472509Z",
     "start_time": "2025-11-04T21:43:33.470657Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# number of samples in the data set\n",
    "N_SAMPLES = 1000\n",
    "# ratio between training and test sets\n",
    "TEST_SIZE = 0.1"
   ],
   "id": "4f66ffa878f01c02",
   "outputs": [],
   "execution_count": 58
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:43:33.520603Z",
     "start_time": "2025-11-04T21:43:33.518562Z"
    }
   },
   "cell_type": "code",
   "source": [
    "X, y = make_moons(n_samples = N_SAMPLES, noise=0.2, random_state=100)\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)"
   ],
   "id": "bebe0ed00a2d514",
   "outputs": [],
   "execution_count": 59
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:51:26.021417Z",
     "start_time": "2025-11-04T21:51:23.520284Z"
    }
   },
   "cell_type": "code",
   "source": [
    "params_values = train(np.transpose(X_train), np.transpose(y_train.reshape((y_train.shape[0], 1))), nn_architecture, 20000, 0.01)\n",
    "# params_values\n"
   ],
   "id": "ce04892d496c5147",
   "outputs": [],
   "execution_count": 77
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:51:27.733451Z",
     "start_time": "2025-11-04T21:51:27.727264Z"
    }
   },
   "cell_type": "code",
   "source": [
    "Y_test_hat, _ = full_forward_propagation(np.transpose(X_test), params_values, nn_architecture)\n",
    "\n",
    "acc_test = get_accuracy_value(Y_test_hat, np.transpose(y_test.reshape((y_test.shape[0], 1))))\n",
    "print(\"Test set accuracy: {:.2f} - David\".format(acc_test))\n"
   ],
   "id": "26e7a2a8848714d9",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test set accuracy: 0.46 - David\n"
     ]
    }
   ],
   "execution_count": 78
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:43:33.666607121Z",
     "start_time": "2025-11-04T20:21:26.059140Z"
    }
   },
   "cell_type": "code",
   "source": [
    "startA = np.random.randn(nn_architecture[0][\"input_dim\"],1) * 0.1\n",
    "full_forward_propagation(startA, params, nn_architecture)"
   ],
   "id": "8b672c5fd5832cc",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[0.51608074]]),\n",
       " {'A0': array([[-0.10166672],\n",
       "         [ 0.14706683]]),\n",
       "  'Z1': array([[ 0.0203953 ],\n",
       "         [-0.22010647],\n",
       "         [-0.01614817],\n",
       "         [ 0.07300465]]),\n",
       "  'A1': array([[0.0203953 ],\n",
       "         [0.        ],\n",
       "         [0.        ],\n",
       "         [0.07300465]]),\n",
       "  'Z2': array([[-0.18085747],\n",
       "         [-0.01827604],\n",
       "         [-0.21683156],\n",
       "         [ 0.08504111],\n",
       "         [ 0.17066065],\n",
       "         [-0.04521306]]),\n",
       "  'A2': array([[0.        ],\n",
       "         [0.        ],\n",
       "         [0.        ],\n",
       "         [0.08504111],\n",
       "         [0.17066065],\n",
       "         [0.        ]]),\n",
       "  'Z3': array([[-0.17707529],\n",
       "         [ 0.0237745 ],\n",
       "         [-0.07487052],\n",
       "         [-0.02497606],\n",
       "         [ 0.12622027],\n",
       "         [ 0.02613133]]),\n",
       "  'A3': array([[0.        ],\n",
       "         [0.0237745 ],\n",
       "         [0.        ],\n",
       "         [0.        ],\n",
       "         [0.12622027],\n",
       "         [0.02613133]]),\n",
       "  'Z4': array([[-0.09066425],\n",
       "         [ 0.05792425],\n",
       "         [ 0.07822296],\n",
       "         [ 0.07317913]]),\n",
       "  'A4': array([[0.        ],\n",
       "         [0.05792425],\n",
       "         [0.07822296],\n",
       "         [0.07317913]]),\n",
       "  'Z5': array([[0.06434517]])})"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 24
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:52:07.296371Z",
     "start_time": "2025-11-04T21:52:01.384867Z"
    }
   },
   "cell_type": "code",
   "source": [
    "model = Sequential()\n",
    "model.add(Dense(25, input_dim=2,activation='relu'))\n",
    "model.add(Dense(50, activation='relu'))\n",
    "model.add(Dense(50, activation='relu'))\n",
    "model.add(Dense(25, activation='relu'))\n",
    "model.add(Dense(1, activation='sigmoid'))\n",
    "\n",
    "model.compile(loss='binary_crossentropy', optimizer=\"sgd\", metrics=['accuracy'])\n",
    "\n",
    "# Training\n",
    "history = model.fit(X_train, y_train, epochs=200, verbose=0)"
   ],
   "id": "f05ff40ed26e45c2",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/oskar/projects/nn-from-scratch/.venv/lib/python3.13/site-packages/keras/src/layers/core/dense.py:95: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
      "  super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n",
      "2025-11-04 22:52:01.409083: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: forward compatibility was attempted on non supported HW\n",
      "2025-11-04 22:52:01.409097: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:171] verbose logging is disabled. Rerun with verbose logging (usually --v=1 or --vmodule=cuda_diagnostics=1) to get more diagnostic output from this module\n",
      "2025-11-04 22:52:01.409099: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:176] retrieving CUDA diagnostic information for host: solaria\n",
      "2025-11-04 22:52:01.409101: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:183] hostname: solaria\n",
      "2025-11-04 22:52:01.409176: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:190] libcuda reported version is: 580.95.5\n",
      "2025-11-04 22:52:01.409184: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:194] kernel reported version is: 570.195.3\n",
      "2025-11-04 22:52:01.409185: E external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:287] kernel version 570.195.3 does not match DSO version 580.95.5 -- cannot find working devices in this configuration\n"
     ]
    }
   ],
   "execution_count": 79
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-11-04T21:53:11.479872Z",
     "start_time": "2025-11-04T21:53:11.455625Z"
    }
   },
   "cell_type": "code",
   "source": [
    "Y_test_hat = model.predict_classes(X_test)\n",
    "acc_test = accuracy_score(y_test, Y_test_hat)\n",
    "print(\"Test set accuracy: {:.2f} - Goliath\".format(acc_test))"
   ],
   "id": "ef52bee9c93081d3",
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'Sequential' object has no attribute 'predict_classes'",
     "output_type": "error",
     "traceback": [
      "\u001B[31m---------------------------------------------------------------------------\u001B[39m",
      "\u001B[31mAttributeError\u001B[39m                            Traceback (most recent call last)",
      "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[83]\u001B[39m\u001B[32m, line 1\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m1\u001B[39m Y_test_hat = \u001B[43mmodel\u001B[49m\u001B[43m.\u001B[49m\u001B[43mpredict_classes\u001B[49m(X_test)\n\u001B[32m      2\u001B[39m acc_test = accuracy_score(y_test, Y_test_hat)\n\u001B[32m      3\u001B[39m \u001B[38;5;28mprint\u001B[39m(\u001B[33m\"\u001B[39m\u001B[33mTest set accuracy: \u001B[39m\u001B[38;5;132;01m{:.2f}\u001B[39;00m\u001B[33m - Goliath\u001B[39m\u001B[33m\"\u001B[39m.format(acc_test))\n",
      "\u001B[31mAttributeError\u001B[39m: 'Sequential' object has no attribute 'predict_classes'"
     ]
    }
   ],
   "execution_count": 83
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}