Gradient Descent Training

Glossary

Batch: a small part of the training set

Class initializer: called when creating an instance of a class.

Neural network: a model that consists of many simple models

Regularization: a method that adds additional limitations to the conditions in order to reduce overfitting

Practice


1# Stochastic gradient descent
2
3class SGDLinearRegression:
4    def __init__(self, step_size, epochs, batch_size):
5        self.step_size = step_size
6        self.epochs = epochs
7        self.batch_size = batch_size
8
9    def fit(self, train_features, train_target):
10        X = np.concatenate((np.ones((train_features.shape[0], 1)), train_features), axis=1)
11        y = train_target
12        w = np.zeros(X.shape[1])
13
14        for _ in range(self.epochs):
15            batches_count = X.shape[0] // self.batch_size
16            for i in range(batches_count):
17                begin = i * self.batch_size
18                end = (i + 1) * self.batch_size
19                X_batch = X[begin:end, :]
20                y_batch = y[begin:end]
21
22                gradient = 2 * X_batch.T.dot(X_batch.dot(w) - y_batch) / X_batch.shape[0]
23
24                w -= self.step_size * gradient
25
26        self.w = w[1:]
27        self.w0 = w[0]
28
29    def predict(self, test_features):
30        return test_features.dot(self.w) + self.w0


1# Ridge regression
2
3class RidgeRegression:
4    def __init__(self, step_size, epochs, batch_size, reg_weight):
5        self.step_size = step_size
6        self.epochs = epochs
7        self.batch_size = batch_size
8        self.reg_weight = reg_weight
9
10    def fit(self, train_features, train_target):
11        X = np.concatenate((np.ones((train_features.shape[0], 1)), train_features), axis=1)
12        y = train_target
13        w = np.zeros(X.shape[1])
14
15        for _ in range(self.epochs):
16            batches_count = X.shape[0] // self.batch_size
17            for i in range(batches_count):
18                begin = i * self.batch_size
19                end = (i + 1) * self.batch_size
20                X_batch = X[begin:end, :]
21                y_batch = y[begin:end]
22
23                gradient = 2 * X_batch.T.dot(X_batch.dot(w) - y_batch) / X_batch.shape[0]
24                reg = 2 * w.copy()
25                reg[0] = 0
26                gradient += self.reg_weight * reg
27
28                w -= self.step_size * gradient
29
30        self.w = w[1:]
31        self.w0 = w[0]
32
33    def predict(self, test_features):
34        return test_features.dot(self.w) + self.w0