I am implementing the stochastic gradient descent algorithm. I think there is plenty of room for improvement.
def array2onehot(X_shape, array, start=1): """ transfer a column to a matrix with each row being a onehot note that the array index defaults to start with 1 rather than 0 """ array += 1 - start if start != 1 else 0 onehot = np.zeros(X_shape) onehot[np.arange(X_shape[0]), array-1] = 1 return onehot def stochastic_gradient_descent(self, batch, Y): """ the batch contains both the X's and targets Y is the output produced by the forward process using the X's in the batch """ ones = np.ones(batch.shape[0]) # add the bias to the X's X = np.column_stack(ones, batch[:, :-1]) T = array2onehot(X.shape, batch[:, -1]) m = self._batch_size # batch size is the size of the mini batch count = np.zeros(self._class_size) # class size is the amount of classes to predict delta = np.zeros(self._coefficients.shape) for i in range(m): k = batch[i, -1] - 1 count[k] += 1 delta[:, k] += X[i] * sum(np.multiply(T[i], Y[i])) tmp = self._coefficients for i in count: if i == 0: tmp[:, i] = 0 delta = np.nan_to_num(delta / count) - 2 * self._penalty_rate * tmp self._coefficients -= delta
self
variables aren't defined. It's also hard to know what yourstart
variable is used for.\$\endgroup\$