Optimize binary classification model

Question

I've created binary classification model from scratch, just to understand intuition behind that.

However when I compare my implementation to model from tensorflow/pytorch with the same parameters and configuration I noticed that my model achieved similar results in about 3 000 epochs but tensorflow/pytorch model achieved that in 300 epochs.

I also noticed that my model calculates very small gradient when tensorflow/pytorch calculates a much higher gradient in every epoch

Is there any way to optimize calculating gradient in backward function to make model learning faster?
Is there any other such field that could be optimized/simplified and how could it be implemented

Below is my backward function responsible for calculating gradient:

def backward( y: np.ndarray, y_pred: np.ndarray, layers: List[ Dict[ str, np.ndarray ] ] ) -> None: loss: np.ndarray = binary_cross_entropy_loss_prime(y, y_pred) for layer in reversed(layers): dZ: np.ndarray = layer['prime'](layer['z']) * loss layer['db'] = (dZ * np.ones_like(layer['b'])).sum(axis = 0, keepdims=True) / loss.shape[0] dU: np.ndarray = dZ * np.ones_like(layer['u']) layer['dw'] = np.dot(layer['x'].T, dU) / loss.shape[0] loss = np.dot(dU, layer['w'].T)

and also full code with data types to easier understanding:

 """# Dataset and libraries""" import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from typing import List, Dict from sklearn.datasets import make_moons x, y = make_moons(n_samples = 1000, noise = 0.2, random_state = 100) # expand y second dim # before expand_dims -> y.shape = (1000, ) # after expand_dims -> y.shape = (1000, 1) y = np.expand_dims(y, 1) # final shapes: X -> (1000, 2), Y -> (1000, 1) """# Activations functions""" def sigma(x: np.ndarray) -> np.ndarray: return 1 / (1 + np.exp(-x)) def sigma_prime(x: np.ndarray) -> np.ndarray: e = np.exp(x) return e / (e + 1) ** 2 def relu(x: np.ndarray) -> np.ndarray: return np.maximum(0, x) def relu_prime(x: np.ndarray) -> np.ndarray: return np.where(x <= 0, 0, 1) """# Dense layers""" dense_layers = [ { 'w': np.random.rand(2, 8) * 0.1, 'b': np.random.rand(1, 8) * 0.1, 'activ': relu, 'prime': relu_prime }, { 'w': np.random.rand(8, 8) * 0.1, 'b': np.random.rand(1, 8) * 0.1, 'activ': relu, 'prime': relu_prime }, { 'w': np.random.rand(8, 1) * 0.1, 'b': np.random.rand(1, 1) * 0.1, 'activ': sigma, 'prime': sigma_prime } ] """# Losses and metrics """ def binary_cross_entropy_loss(y_true: np.ndarray, y_pred: np.ndarray) -> float: number_of_rows = y_true.shape[0] # 1000 rows number_of_cols = y_true.shape[1] # 1 cols return np.sum(-(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))) / number_of_rows * number_of_cols def binary_cross_entropy_loss_prime(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray: return (1 - y_true) / (1 - y_pred) - y_true / y_pred def accuracy(y_true: np.ndarray, y_pred: np.ndarray, threshhold: float = 0.5) -> float: return (np.where(y_pred <= threshhold, 0, 1) == y_true).mean() """# Forward propagation""" def forward(x: np.ndarray, layers: List[ Dict[ str, np.ndarray ] ]) -> np.ndarray: for layer in layers: layer['x'] = x layer['u'] = np.dot(x, layer['w']) layer['z'] = layer['u'] + layer['b'] layer['a'] = layer['activ'](layer['z']) x = layer['a'] return x """# Backward propagation""" def backward(y: np.ndarray, y_pred: np.ndarray, layers: List[ Dict[ str, np.ndarray ] ]) -> None: loss: np.ndarray = binary_cross_entropy_loss_prime(y, y_pred) for layer in reversed(layers): dZ: np.ndarray = layer['prime'](layer['z']) * loss layer['db'] = (dZ * np.ones_like(layer['b'])).sum(axis = 0, keepdims=True) / loss.shape[0] dU: np.ndarray = dZ * np.ones_like(layer['u']) layer['dw'] = np.dot(layer['x'].T, dU) / loss.shape[0] loss = np.dot(dU, layer['w'].T) """# Update weights and biases (SGD optimizer)""" def update(layers: List[ Dict[ str, np.ndarray ] ], learning_rate: float) -> None: for layer in layers: layer['w'] -= learning_rate * layer['dw'] layer['b'] -= learning_rate * layer['db'] """# Train model""" def train(x: np.ndarray, y: np.ndarray, layers: List[ Dict[ str, np.ndarray ] ], epochs: int, learning_rate: float) -> None: for epoch in range(epochs): # Forward propagation y_hat = forward(x, layers) # Backward propagation backward(y, y_hat, layers) # Update layers update(layers, learning_rate) # show progress if epoch % 100 == 0: print('Iteration nr: ', epoch, ', loss: ', binary_cross_entropy_loss(y, y_hat), ', accuracy: ', accuracy(y, y_hat)) train(x, y, dense_layers, 3001, 0.01) ```

Stack Exchange Network

Optimize binary classification model

0

Hot Network Questions

Optimize binary classification model

0

Know someone who can answer? Share a link to this question via email, Twitter, or Facebook.

Related

Hot Network Questions