643 lines
24 KiB
Plaintext
643 lines
24 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"id": "initial_id",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:00.816816Z",
|
|
"start_time": "2025-11-04T22:00:00.813630Z"
|
|
}
|
|
},
|
|
"source": "import numpy as np",
|
|
"outputs": [],
|
|
"execution_count": 84
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:00.830847Z",
|
|
"start_time": "2025-11-04T22:00:00.829329Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"nn_architecture = [\n",
|
|
" {\"input_dim\": 2, \"output_dim\": 4, \"activation\": \"relu\"},\n",
|
|
" {\"input_dim\": 4, \"output_dim\": 6, \"activation\": \"relu\"},\n",
|
|
" {\"input_dim\": 6, \"output_dim\": 6, \"activation\": \"relu\"},\n",
|
|
" {\"input_dim\": 6, \"output_dim\": 4, \"activation\": \"relu\"},\n",
|
|
" {\"input_dim\": 4, \"output_dim\": 1, \"activation\": \"sigmoid\"},\n",
|
|
"]"
|
|
],
|
|
"id": "48cafaf4b64967bb",
|
|
"outputs": [],
|
|
"execution_count": 85
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:00.885348Z",
|
|
"start_time": "2025-11-04T22:00:00.880961Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def init_layers(nn_architecture, seed = 99):\n",
|
|
" np.random.seed(seed)\n",
|
|
" number_of_layers = len(nn_architecture)\n",
|
|
" params_values = {}\n",
|
|
"\n",
|
|
" for idx, layer in enumerate(nn_architecture):\n",
|
|
" layer_idx = idx + 1\n",
|
|
" layer_input_size = layer[\"input_dim\"]\n",
|
|
" layer_output_size = layer[\"output_dim\"]\n",
|
|
"\n",
|
|
" params_values['W' + str(layer_idx)] = np.random.randn(\n",
|
|
" layer_output_size, layer_input_size) * 0.1\n",
|
|
" params_values['b' + str(layer_idx)] = np.random.randn(\n",
|
|
" layer_output_size, 1) * 0.1\n",
|
|
"\n",
|
|
" return params_values\n"
|
|
],
|
|
"id": "d13137630b41b756",
|
|
"outputs": [],
|
|
"execution_count": 86
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:00.944688Z",
|
|
"start_time": "2025-11-04T22:00:00.939752Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"params = init_layers(nn_architecture)\n",
|
|
"# params"
|
|
],
|
|
"id": "31f205147667dea6",
|
|
"outputs": [],
|
|
"execution_count": 87
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:00.994063Z",
|
|
"start_time": "2025-11-04T22:00:00.990969Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def sigmoid(Z):\n",
|
|
" return 1/(1+np.exp(-Z))\n",
|
|
"\n",
|
|
"def relu(Z):\n",
|
|
" return np.maximum(0,Z)\n",
|
|
"\n",
|
|
"def sigmoid_backward(dA, Z):\n",
|
|
" sig = sigmoid(Z)\n",
|
|
" return dA * sig * (1 - sig)\n",
|
|
"\n",
|
|
"def relu_backward(dA, Z):\n",
|
|
" dZ = np.array(dA, copy = True)\n",
|
|
" dZ[Z <= 0] = 0;\n",
|
|
" return dZ;"
|
|
],
|
|
"id": "c1b960e7dcf09d91",
|
|
"outputs": [],
|
|
"execution_count": 88
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:01.051837Z",
|
|
"start_time": "2025-11-04T22:00:01.046197Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation=\"relu\"):\n",
|
|
" Z_curr = np.dot(W_curr, A_prev) + b_curr\n",
|
|
"\n",
|
|
" if activation == \"relu\":\n",
|
|
" activation_func = relu\n",
|
|
" elif activation == \"sigmoid\":\n",
|
|
" activation_func = sigmoid\n",
|
|
" else:\n",
|
|
" raise Exception('Non-supported activation function')\n",
|
|
"\n",
|
|
" return activation_func(Z_curr), Z_curr"
|
|
],
|
|
"id": "efae2e184daf2fce",
|
|
"outputs": [],
|
|
"execution_count": 89
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:01.101365Z",
|
|
"start_time": "2025-11-04T22:00:01.097608Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def full_forward_propagation(X, params_values, nn_architecture):\n",
|
|
" memory = {}\n",
|
|
" A_curr = X\n",
|
|
"\n",
|
|
" for idx, layer in enumerate(nn_architecture):\n",
|
|
" layer_idx = idx + 1\n",
|
|
" A_prev = A_curr\n",
|
|
"\n",
|
|
" activ_function_curr = layer[\"activation\"]\n",
|
|
" W_curr = params_values[\"W\" + str(layer_idx)]\n",
|
|
" b_curr = params_values[\"b\" + str(layer_idx)]\n",
|
|
" A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr)\n",
|
|
"\n",
|
|
" memory[\"A\" + str(idx)] = A_prev\n",
|
|
" memory[\"Z\" + str(layer_idx)] = Z_curr\n",
|
|
"\n",
|
|
" return A_curr, memory"
|
|
],
|
|
"id": "c3cd9e8f51dbe967",
|
|
"outputs": [],
|
|
"execution_count": 90
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:01.147862Z",
|
|
"start_time": "2025-11-04T22:00:01.146127Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def get_cost_value(Y_hat, Y):\n",
|
|
" m = Y_hat.shape[1]\n",
|
|
" cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))\n",
|
|
" return np.squeeze(cost)\n",
|
|
"\n",
|
|
"# an auxiliary function that converts probability into class\n",
|
|
"def convert_prob_into_class(probs):\n",
|
|
" probs_ = np.copy(probs)\n",
|
|
" probs_[probs_ > 0.5] = 1\n",
|
|
" probs_[probs_ <= 0.5] = 0\n",
|
|
" return probs_\n",
|
|
"\n",
|
|
"def get_accuracy_value(Y_hat, Y):\n",
|
|
" Y_hat_ = convert_prob_into_class(Y_hat)\n",
|
|
" return (Y_hat_ == Y).all(axis=0).mean()"
|
|
],
|
|
"id": "121416e7bbab57bb",
|
|
"outputs": [],
|
|
"execution_count": 91
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:01.200653Z",
|
|
"start_time": "2025-11-04T22:00:01.198951Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation=\"relu\"):\n",
|
|
" m = A_prev.shape[1]\n",
|
|
"\n",
|
|
" if activation == \"relu\":\n",
|
|
" backward_activation_func = relu_backward\n",
|
|
" elif activation == \"sigmoid\":\n",
|
|
" backward_activation_func = sigmoid_backward\n",
|
|
" else:\n",
|
|
" raise Exception('Non-supported activation function')\n",
|
|
"\n",
|
|
" dZ_curr = backward_activation_func(dA_curr, Z_curr)\n",
|
|
" dW_curr = np.dot(dZ_curr, A_prev.T) / m\n",
|
|
" db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m\n",
|
|
" dA_prev = np.dot(W_curr.T, dZ_curr)\n",
|
|
"\n",
|
|
" return dA_prev, dW_curr, db_curr"
|
|
],
|
|
"id": "92e4b87664f18a63",
|
|
"outputs": [],
|
|
"execution_count": 92
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:01.259385Z",
|
|
"start_time": "2025-11-04T22:00:01.253050Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):\n",
|
|
" grads_values = {}\n",
|
|
" m = Y.shape[1]\n",
|
|
" Y = Y.reshape(Y_hat.shape)\n",
|
|
"\n",
|
|
" dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));\n",
|
|
"\n",
|
|
" for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):\n",
|
|
" layer_idx_curr = layer_idx_prev + 1\n",
|
|
" activ_function_curr = layer[\"activation\"]\n",
|
|
"\n",
|
|
" dA_curr = dA_prev\n",
|
|
"\n",
|
|
" A_prev = memory[\"A\" + str(layer_idx_prev)]\n",
|
|
" Z_curr = memory[\"Z\" + str(layer_idx_curr)]\n",
|
|
" W_curr = params_values[\"W\" + str(layer_idx_curr)]\n",
|
|
" b_curr = params_values[\"b\" + str(layer_idx_curr)]\n",
|
|
"\n",
|
|
" dA_prev, dW_curr, db_curr = single_layer_backward_propagation(\n",
|
|
" dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)\n",
|
|
"\n",
|
|
" grads_values[\"dW\" + str(layer_idx_curr)] = dW_curr\n",
|
|
" grads_values[\"db\" + str(layer_idx_curr)] = db_curr\n",
|
|
"\n",
|
|
" return grads_values"
|
|
],
|
|
"id": "2c8e4eed1846f003",
|
|
"outputs": [],
|
|
"execution_count": 93
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:01.319868Z",
|
|
"start_time": "2025-11-04T22:00:01.312729Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def update(params_values, grads_values, nn_architecture, learning_rate):\n",
|
|
" for layer_idx, layer in enumerate(nn_architecture):\n",
|
|
" layer_idx=layer_idx+1\n",
|
|
" params_values[\"W\" + str(layer_idx)] -= learning_rate * grads_values[\"dW\" + str(layer_idx)]\n",
|
|
" params_values[\"b\" + str(layer_idx)] -= learning_rate * grads_values[\"db\" + str(layer_idx)]\n",
|
|
"\n",
|
|
" return params_values;"
|
|
],
|
|
"id": "16320b953a183511",
|
|
"outputs": [],
|
|
"execution_count": 94
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:01.380430Z",
|
|
"start_time": "2025-11-04T22:00:01.373966Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def train(X, Y, nn_architecture, epochs, learning_rate, verbose=False, callback=None):\n",
|
|
" # initiation of neural net parameters\n",
|
|
" params_values = init_layers(nn_architecture, 2)\n",
|
|
" # initiation of lists storing the history\n",
|
|
" # of metrics calculated during the learning process\n",
|
|
" cost_history = []\n",
|
|
" accuracy_history = []\n",
|
|
"\n",
|
|
" # performing calculations for subsequent iterations\n",
|
|
" for i in range(epochs):\n",
|
|
" # step forward\n",
|
|
" Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)\n",
|
|
"\n",
|
|
" # calculating metrics and saving them in history\n",
|
|
" cost = get_cost_value(Y_hat, Y)\n",
|
|
" cost_history.append(cost)\n",
|
|
" accuracy = get_accuracy_value(Y_hat, Y)\n",
|
|
" accuracy_history.append(accuracy)\n",
|
|
"\n",
|
|
" # step backward - calculating gradient\n",
|
|
" grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)\n",
|
|
" # updating model state\n",
|
|
" params_values = update(params_values, grads_values, nn_architecture, learning_rate)\n",
|
|
"\n",
|
|
" if(i % 50 == 0):\n",
|
|
" if(verbose):\n",
|
|
" print(\"Iteration: {:05} - cost: {:.5f} - accuracy: {:.5f}\".format(i, cost, accuracy))\n",
|
|
" if(callback is not None):\n",
|
|
" callback(i, params_values)\n",
|
|
"\n",
|
|
" return params_values"
|
|
],
|
|
"id": "fce33f70bba3898",
|
|
"outputs": [],
|
|
"execution_count": 95
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:01.444163Z",
|
|
"start_time": "2025-11-04T22:00:01.436199Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"import os\n",
|
|
"import tensorflow as tf\n",
|
|
"\n",
|
|
"from sklearn.datasets import make_moons\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"\n",
|
|
"import seaborn as sns\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from matplotlib import cm\n",
|
|
"from mpl_toolkits.mplot3d import Axes3D\n",
|
|
"sns.set_style(\"whitegrid\")\n",
|
|
"\n",
|
|
"import keras\n",
|
|
"from keras.models import Sequential\n",
|
|
"from keras.layers import Dense\n",
|
|
"# from keras.utils import np_utils\n",
|
|
"from keras import regularizers\n",
|
|
"\n",
|
|
"from sklearn.metrics import accuracy_score"
|
|
],
|
|
"id": "cccd73b5018799d4",
|
|
"outputs": [],
|
|
"execution_count": 96
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:01.500700Z",
|
|
"start_time": "2025-11-04T22:00:01.497537Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# number of samples in the data set\n",
|
|
"N_SAMPLES = 1000\n",
|
|
"# ratio between training and test sets\n",
|
|
"TEST_SIZE = 0.1"
|
|
],
|
|
"id": "4f66ffa878f01c02",
|
|
"outputs": [],
|
|
"execution_count": 97
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:01.560294Z",
|
|
"start_time": "2025-11-04T22:00:01.553505Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"X, y = make_moons(n_samples = N_SAMPLES, noise=0.2, random_state=100)\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)"
|
|
],
|
|
"id": "bebe0ed00a2d514",
|
|
"outputs": [],
|
|
"execution_count": 98
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:04.165839Z",
|
|
"start_time": "2025-11-04T22:00:01.614181Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"params_values = train(np.transpose(X_train), np.transpose(y_train.reshape((y_train.shape[0], 1))), nn_architecture, 20000, 0.01)\n",
|
|
"# params_values\n"
|
|
],
|
|
"id": "ce04892d496c5147",
|
|
"outputs": [],
|
|
"execution_count": 99
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:11.428146Z",
|
|
"start_time": "2025-11-04T22:00:11.422370Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"Y_test_hat, _ = full_forward_propagation(np.transpose(X_test), params_values, nn_architecture)\n",
|
|
"\n",
|
|
"acc_test = get_accuracy_value(Y_test_hat, np.transpose(y_test.reshape((y_test.shape[0], 1))))\n",
|
|
"print(\"Test set accuracy: {:.2f} - David\".format(acc_test))\n"
|
|
],
|
|
"id": "26e7a2a8848714d9",
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Test set accuracy: 0.46 - David\n"
|
|
]
|
|
}
|
|
],
|
|
"execution_count": 105
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:29.176357Z",
|
|
"start_time": "2025-11-04T22:00:23.282276Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"model = Sequential()\n",
|
|
"model.add(Dense(25, input_dim=2,activation='relu'))\n",
|
|
"model.add(Dense(50, activation='relu'))\n",
|
|
"model.add(Dense(50, activation='relu'))\n",
|
|
"model.add(Dense(25, activation='relu'))\n",
|
|
"model.add(Dense(1, activation='sigmoid'))\n",
|
|
"\n",
|
|
"model.compile(loss='binary_crossentropy', optimizer=\"sgd\", metrics=['accuracy'])\n",
|
|
"\n",
|
|
"# Training\n",
|
|
"history = model.fit(X_train, y_train, epochs=200, verbose=0)"
|
|
],
|
|
"id": "f05ff40ed26e45c2",
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/oskar/projects/nn-from-scratch/.venv/lib/python3.13/site-packages/keras/src/layers/core/dense.py:95: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
|
|
" super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n"
|
|
]
|
|
}
|
|
],
|
|
"execution_count": 106
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:00:33.380478Z",
|
|
"start_time": "2025-11-04T22:00:33.309269Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"Y_test_prob = model.predict(X_test)\n",
|
|
"Y_test_hat = (Y_test_prob > 0.5).astype(int).ravel()\n",
|
|
"acc_test = accuracy_score(y_test, Y_test_hat)\n",
|
|
"print(\"Test set accuracy: {:.2f} - Goliath\".format(acc_test))"
|
|
],
|
|
"id": "ef52bee9c93081d3",
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"WARNING:tensorflow:6 out of the last 10 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x7e21f476c900> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n",
|
|
"\u001B[1m4/4\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 8ms/step \n",
|
|
"Test set accuracy: 0.99 - Goliath\n"
|
|
]
|
|
}
|
|
],
|
|
"execution_count": 107
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:03:33.972219Z",
|
|
"start_time": "2025-11-04T22:03:33.966407Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def make_plot(X, y, plot_name, file_name=None, XX=None, YY=None, preds=None, dark=False):\n",
|
|
" if (dark):\n",
|
|
" plt.style.use('dark_background')\n",
|
|
" else:\n",
|
|
" sns.set_style(\"whitegrid\")\n",
|
|
" plt.figure(figsize=(16,12))\n",
|
|
" axes = plt.gca()\n",
|
|
" axes.set(xlabel=\"$X_1$\", ylabel=\"$X_2$\")\n",
|
|
" plt.title(plot_name, fontsize=30)\n",
|
|
" plt.subplots_adjust(left=0.20)\n",
|
|
" plt.subplots_adjust(right=0.80)\n",
|
|
" if(XX is not None and YY is not None and preds is not None):\n",
|
|
" plt.contourf(XX, YY, preds.reshape(XX.shape), 25, alpha = 1, cmap=cm.Spectral)\n",
|
|
" plt.contour(XX, YY, preds.reshape(XX.shape), levels=[.5], cmap=\"Greys\", vmin=0, vmax=.6)\n",
|
|
" plt.scatter(X[:, 0], X[:, 1], c=y.ravel(), s=40, cmap=plt.cm.Spectral, edgecolors='black')\n",
|
|
" if(file_name):\n",
|
|
" plt.savefig(file_name)\n",
|
|
" plt.close()"
|
|
],
|
|
"id": "9535365d1da72395",
|
|
"outputs": [],
|
|
"execution_count": 109
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:02:51.938430Z",
|
|
"start_time": "2025-11-04T22:02:51.934316Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# boundary of the graph\n",
|
|
"GRID_X_START = -1.5\n",
|
|
"GRID_X_END = 2.5\n",
|
|
"GRID_Y_START = -1.0\n",
|
|
"GRID_Y_END = 2\n",
|
|
"# output directory (the folder must be created on the drive)\n",
|
|
"OUTPUT_DIR = \"./binary_classification_vizualizations/\"\n",
|
|
"### Definition of grid boundaries\n",
|
|
"grid = np.mgrid[GRID_X_START:GRID_X_END:100j, GRID_X_START:GRID_Y_END:100j]\n",
|
|
"grid_2d = grid.reshape(2, -1).T\n",
|
|
"XX, YY = grid"
|
|
],
|
|
"id": "b070f03d55981894",
|
|
"outputs": [],
|
|
"execution_count": 108
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-11-04T22:05:02.290039Z",
|
|
"start_time": "2025-11-04T22:05:02.042691Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def callback_keras_plot(epoch, logs):\n",
|
|
" plot_title = \"Keras Model - It: {:05}\".format(epoch)\n",
|
|
" file_name = \"keras_model_{:05}.png\".format(epoch)\n",
|
|
" file_path = os.path.join(OUTPUT_DIR, file_name)\n",
|
|
" prediction_probs = model.predict_proba(grid_2d, batch_size=32, verbose=0)\n",
|
|
" make_plot(X_test, y_test, plot_title, file_name=file_path, XX=XX, YY=YY, preds=prediction_probs)\n",
|
|
"\n",
|
|
"\n",
|
|
"# Adding callback functions that they will run in every epoch\n",
|
|
"testmodelcb = keras.callbacks.LambdaCallback(on_epoch_end=callback_keras_plot)\n",
|
|
"\n",
|
|
"# Building a model\n",
|
|
"model = Sequential()\n",
|
|
"model.add(Dense(25, input_dim=2, activation='relu'))\n",
|
|
"model.add(Dense(50, activation='relu'))\n",
|
|
"model.add(Dense(50, activation='relu'))\n",
|
|
"model.add(Dense(25, activation='relu'))\n",
|
|
"model.add(Dense(1, activation='sigmoid'))\n",
|
|
"\n",
|
|
"model.compile(loss='binary_crossentropy', optimizer=\"sgd\", metrics=['accuracy'])\n",
|
|
"\n",
|
|
"# Training\n",
|
|
"history = model.fit(X_train, y_train, epochs=200, verbose=0, callbacks=[testmodelcb])\n",
|
|
"rediction_probs = model.predict_proba(grid_2d, batch_size=32, verbose=0)\n",
|
|
"make_plot(X_test, y_test, \"Keras Model\", file_name=None, XX=XX, YY=YY, preds=prediction_probs)"
|
|
],
|
|
"id": "6feab7da06e7a828",
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/oskar/projects/nn-from-scratch/.venv/lib/python3.13/site-packages/keras/src/layers/core/dense.py:95: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
|
|
" super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n"
|
|
]
|
|
},
|
|
{
|
|
"ename": "AttributeError",
|
|
"evalue": "'Sequential' object has no attribute 'predict_proba'",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001B[31m---------------------------------------------------------------------------\u001B[39m",
|
|
"\u001B[31mAttributeError\u001B[39m Traceback (most recent call last)",
|
|
"\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[110]\u001B[39m\u001B[32m, line 23\u001B[39m\n\u001B[32m 20\u001B[39m model.compile(loss=\u001B[33m'\u001B[39m\u001B[33mbinary_crossentropy\u001B[39m\u001B[33m'\u001B[39m, optimizer=\u001B[33m\"\u001B[39m\u001B[33msgd\u001B[39m\u001B[33m\"\u001B[39m, metrics=[\u001B[33m'\u001B[39m\u001B[33maccuracy\u001B[39m\u001B[33m'\u001B[39m])\n\u001B[32m 22\u001B[39m \u001B[38;5;66;03m# Training\u001B[39;00m\n\u001B[32m---> \u001B[39m\u001B[32m23\u001B[39m history = \u001B[43mmodel\u001B[49m\u001B[43m.\u001B[49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\u001B[43mX_train\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43my_train\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mepochs\u001B[49m\u001B[43m=\u001B[49m\u001B[32;43m200\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mverbose\u001B[49m\u001B[43m=\u001B[49m\u001B[32;43m0\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcallbacks\u001B[49m\u001B[43m=\u001B[49m\u001B[43m[\u001B[49m\u001B[43mtestmodelcb\u001B[49m\u001B[43m]\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 24\u001B[39m rediction_probs = model.predict_proba(grid_2d, batch_size=\u001B[32m32\u001B[39m, verbose=\u001B[32m0\u001B[39m)\n\u001B[32m 25\u001B[39m make_plot(X_test, y_test, \u001B[33m\"\u001B[39m\u001B[33mKeras Model\u001B[39m\u001B[33m\"\u001B[39m, file_name=\u001B[38;5;28;01mNone\u001B[39;00m, XX=XX, YY=YY, preds=prediction_probs)\n",
|
|
"\u001B[36mFile \u001B[39m\u001B[32m~/projects/nn-from-scratch/.venv/lib/python3.13/site-packages/keras/src/utils/traceback_utils.py:122\u001B[39m, in \u001B[36mfilter_traceback.<locals>.error_handler\u001B[39m\u001B[34m(*args, **kwargs)\u001B[39m\n\u001B[32m 119\u001B[39m filtered_tb = _process_traceback_frames(e.__traceback__)\n\u001B[32m 120\u001B[39m \u001B[38;5;66;03m# To get the full stack trace, call:\u001B[39;00m\n\u001B[32m 121\u001B[39m \u001B[38;5;66;03m# `keras.config.disable_traceback_filtering()`\u001B[39;00m\n\u001B[32m--> \u001B[39m\u001B[32m122\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m e.with_traceback(filtered_tb) \u001B[38;5;28;01mfrom\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[32m 123\u001B[39m \u001B[38;5;28;01mfinally\u001B[39;00m:\n\u001B[32m 124\u001B[39m \u001B[38;5;28;01mdel\u001B[39;00m filtered_tb\n",
|
|
"\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[110]\u001B[39m\u001B[32m, line 5\u001B[39m, in \u001B[36mcallback_keras_plot\u001B[39m\u001B[34m(epoch, logs)\u001B[39m\n\u001B[32m 3\u001B[39m file_name = \u001B[33m\"\u001B[39m\u001B[33mkeras_model_\u001B[39m\u001B[38;5;132;01m{:05}\u001B[39;00m\u001B[33m.png\u001B[39m\u001B[33m\"\u001B[39m.format(epoch)\n\u001B[32m 4\u001B[39m file_path = os.path.join(OUTPUT_DIR, file_name)\n\u001B[32m----> \u001B[39m\u001B[32m5\u001B[39m prediction_probs = \u001B[43mmodel\u001B[49m\u001B[43m.\u001B[49m\u001B[43mpredict_proba\u001B[49m(grid_2d, batch_size=\u001B[32m32\u001B[39m, verbose=\u001B[32m0\u001B[39m)\n\u001B[32m 6\u001B[39m make_plot(X_test, y_test, plot_title, file_name=file_path, XX=XX, YY=YY, preds=prediction_probs)\n",
|
|
"\u001B[31mAttributeError\u001B[39m: 'Sequential' object has no attribute 'predict_proba'"
|
|
]
|
|
}
|
|
],
|
|
"execution_count": 110
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|