Skip to content

Commit 95ba0fb

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 59c0761 commit 95ba0fb

2 files changed

Lines changed: 102 additions & 90 deletions

File tree

machine_learning/multilayer_perceptron_classifier_from_scratch.py

Lines changed: 101 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
from tqdm import tqdm
33
from typing import Tuple, List
4+
5+
46
class Dataloader:
57
"""
68
DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting.
@@ -39,7 +41,9 @@ def __init__(self, X: List[List[float]], y: List[int]) -> None:
3941
self.y = np.array(y)
4042
self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed
4143

42-
def get_Train_test_data(self) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
44+
def get_Train_test_data(
45+
self,
46+
) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
4347
"""
4448
Splits the data into training and testing sets. Here, we manually split the data.
4549
@@ -51,13 +55,21 @@ def get_Train_test_data(self) -> Tuple[List[np.ndarray], List[np.ndarray], List[
5155
- Test labels
5256
"""
5357
# Manually splitting data into training and testing sets
54-
train_data = np.array([self.X[0], self.X[1], self.X[2]]) # First 3 samples for training
55-
train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] # Labels as np.ndarray
58+
train_data = np.array(
59+
[self.X[0], self.X[1], self.X[2]]
60+
) # First 3 samples for training
61+
train_labels = [
62+
np.array([self.y[0]]),
63+
np.array([self.y[1]]),
64+
np.array([self.y[2]]),
65+
] # Labels as np.ndarray
5666
test_data = np.array([self.X[3]]) # Last sample for testing
5767
test_labels = [np.array([self.y[3]])] # Labels as np.ndarray
5868
return train_data, train_labels, test_data, test_labels
5969

60-
def shuffle_data(self, paired_data: List[Tuple[np.ndarray, int]]) -> List[Tuple[np.ndarray, int]]:
70+
def shuffle_data(
71+
self, paired_data: List[Tuple[np.ndarray, int]]
72+
) -> List[Tuple[np.ndarray, int]]:
6173
"""
6274
Shuffles the data randomly.
6375
@@ -79,58 +91,61 @@ def get_inout_dim(self) -> Tuple[int, int]:
7991
@staticmethod
8092
def one_hot_encode(labels, num_classes):
8193
"""
82-
Perform one-hot encoding for the given labels.
94+
Perform one-hot encoding for the given labels.
8395
84-
Args:
85-
labels: List of integer labels.
86-
num_classes: Total number of classes for encoding.
96+
Args:
97+
labels: List of integer labels.
98+
num_classes: Total number of classes for encoding.
8799
88-
Returns:
89-
A numpy array representing one-hot encoded labels.
100+
Returns:
101+
A numpy array representing one-hot encoded labels.
90102
"""
91103
one_hot = np.zeros((len(labels), num_classes))
92104
for idx, label in enumerate(labels):
93105
one_hot[idx, label] = 0.99
94106
return one_hot
95107

96108

97-
class MLP():
109+
class MLP:
98110
"""
99-
A custom MLP class for implementing a simple multi-layer perceptron with
100-
forward propagation, backpropagation.
101-
102-
Attributes:
103-
learning_rate (float): Learning rate for gradient descent.
104-
gamma (float): Parameter to control learning rate adjustment.
105-
epoch (int): Number of epochs for training.
106-
hidden_dim (int): Dimension of the hidden layer.
107-
batch_size (int): Number of samples per mini-batch.
108-
train_loss (List[float]): List to store training loss for each fold.
109-
train_accuracy (List[float]): List to store training accuracy for each fold.
110-
test_loss (List[float]): List to store test loss for each fold.
111-
test_accuracy (List[float]): List to store test accuracy for each fold.
112-
dataloader (Dataloader): DataLoader object for handling training data.
113-
inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
114-
weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.
115-
best_accuracy (float): Best test accuracy achieved.
116-
patience (int): Patience for early stopping.
117-
epochs_no_improve (int): Counter for epochs without improvement.
118-
119-
Methods:
120-
get_inout_dim:obtain input dimension and output dimension.
121-
relu: Apply the ReLU activation function.
122-
relu_derivative: Compute the derivative of the ReLU function.
123-
forward: Perform a forward pass through the network.
124-
back_prop: Perform backpropagation to compute gradients.
125-
update_weights: Update the weights using gradients.
126-
update_learning_rate: Adjust the learning rate based on test accuracy.
127-
accuracy: Compute accuracy of the model.
128-
loss: Compute weighted MSE loss.
129-
train: Train the MLP over multiple folds with early stopping.
130-
131-
132-
"""
133-
def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2):
111+
A custom MLP class for implementing a simple multi-layer perceptron with
112+
forward propagation, backpropagation.
113+
114+
Attributes:
115+
learning_rate (float): Learning rate for gradient descent.
116+
gamma (float): Parameter to control learning rate adjustment.
117+
epoch (int): Number of epochs for training.
118+
hidden_dim (int): Dimension of the hidden layer.
119+
batch_size (int): Number of samples per mini-batch.
120+
train_loss (List[float]): List to store training loss for each fold.
121+
train_accuracy (List[float]): List to store training accuracy for each fold.
122+
test_loss (List[float]): List to store test loss for each fold.
123+
test_accuracy (List[float]): List to store test accuracy for each fold.
124+
dataloader (Dataloader): DataLoader object for handling training data.
125+
inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
126+
weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.
127+
best_accuracy (float): Best test accuracy achieved.
128+
patience (int): Patience for early stopping.
129+
epochs_no_improve (int): Counter for epochs without improvement.
130+
131+
Methods:
132+
get_inout_dim:obtain input dimension and output dimension.
133+
relu: Apply the ReLU activation function.
134+
relu_derivative: Compute the derivative of the ReLU function.
135+
forward: Perform a forward pass through the network.
136+
back_prop: Perform backpropagation to compute gradients.
137+
update_weights: Update the weights using gradients.
138+
update_learning_rate: Adjust the learning rate based on test accuracy.
139+
accuracy: Compute accuracy of the model.
140+
loss: Compute weighted MSE loss.
141+
train: Train the MLP over multiple folds with early stopping.
142+
143+
144+
"""
145+
146+
def __init__(
147+
self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2
148+
):
134149
self.learning_rate = learning_rate #
135150
self.gamma = gamma # learning_rate decay hyperparameter gamma
136151
self.epoch = epoch
@@ -213,9 +228,7 @@ def relu_derivative(self, z):
213228
"""
214229
return (z > 0).astype(float)
215230

216-
217231
def forward(self, x, W1, W2, no_gradient=False):
218-
219232
"""
220233
Performs a forward pass through the neural network with one hidden layer.
221234
@@ -246,45 +259,40 @@ def forward(self, x, W1, W2, no_gradient=False):
246259
z2 = np.dot(a1, W2)
247260
a2 = z2
248261

249-
250262
if no_gradient:
251263
# when predict
252264
return a2
253265
else:
254266
# when training
255-
self.inter_variable = {
256-
"z1": z1, "a1": a1,
257-
"z2": z2, "a2": a2
258-
}
267+
self.inter_variable = {"z1": z1, "a1": a1, "z2": z2, "a2": a2}
259268
return a2
260269

261270
def back_prop(self, x, y, W1, W2):
262271
"""
263-
Performs backpropagation to compute gradients for the weights.
264-
265-
Args:
266-
x: Input data, shape (batch_size, input_dim).
267-
y: True labels, shape (batch_size, output_dim).
268-
W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim).
269-
W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim).
272+
Performs backpropagation to compute gradients for the weights.
270273
271-
Returns:
272-
Tuple of gradients (grad_W1, grad_W2) for the weight matrices.
274+
Args:
275+
x: Input data, shape (batch_size, input_dim).
276+
y: True labels, shape (batch_size, output_dim).
277+
W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim).
278+
W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim).
273279
274-
Examples:
275-
>>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
276-
>>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias
277-
>>> y = np.array([[0.0, 1.0]]) # batch_size=1, output_dim=2
278-
>>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2)
279-
>>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2)
280-
>>> _ = mlp.forward(x, W1, W2) # Run forward to set inter_variable
281-
>>> grad_W1, grad_W2 = mlp.back_prop(x, y, W1, W2)
282-
>>> grad_W1.shape
283-
(3, 2)
284-
>>> grad_W2.shape
285-
(2, 2)
286-
"""
280+
Returns:
281+
Tuple of gradients (grad_W1, grad_W2) for the weight matrices.
287282
283+
Examples:
284+
>>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
285+
>>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias
286+
>>> y = np.array([[0.0, 1.0]]) # batch_size=1, output_dim=2
287+
>>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2)
288+
>>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2)
289+
>>> _ = mlp.forward(x, W1, W2) # Run forward to set inter_variable
290+
>>> grad_W1, grad_W2 = mlp.back_prop(x, y, W1, W2)
291+
>>> grad_W1.shape
292+
(3, 2)
293+
>>> grad_W2.shape
294+
(2, 2)
295+
"""
288296

289297
a1 = self.inter_variable["a1"] # (batch_size, hidden_dim)
290298
z1 = self.inter_variable["z1"]
@@ -295,13 +303,17 @@ def back_prop(self, x, y, W1, W2):
295303

296304
# 1. output layer error
297305
delta_k = a2 - y
298-
delta_j = np.dot(delta_k, W2.T) * self.relu_derivative(z1) # (batch, hidden_dim) 使用relu时
299-
306+
delta_j = np.dot(delta_k, W2.T) * self.relu_derivative(
307+
z1
308+
) # (batch, hidden_dim) 使用relu时
300309

301-
grad_w2 = np.dot(a1.T, delta_k) / batch_size # (hidden, batch).dot(batch, output) = (hidden, output)
310+
grad_w2 = (
311+
np.dot(a1.T, delta_k) / batch_size
312+
) # (hidden, batch).dot(batch, output) = (hidden, output)
302313
x_flat = x.reshape(x.shape[0], -1) # (batch_size, input_dim)
303-
grad_w1 = np.dot(x_flat.T, delta_j) / batch_size # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)
304-
314+
grad_w1 = (
315+
np.dot(x_flat.T, delta_j) / batch_size
316+
) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)
305317

306318
return grad_w1, grad_w2
307319

@@ -434,12 +446,13 @@ def train(self):
434446
"""
435447

436448
learning_rate = self.learning_rate
437-
train_data, train_labels, test_data, test_labels = self.dataloader.get_Train_test_data()
449+
train_data, train_labels, test_data, test_labels = (
450+
self.dataloader.get_Train_test_data()
451+
)
438452

439453
train_data = np.c_[train_data, np.ones(train_data.shape[0])]
440454
test_data = np.c_[test_data, np.ones(test_data.shape[0])]
441455

442-
443456
_, total_label_num = self.dataloader.get_inout_dim()
444457

445458
train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num)
@@ -454,13 +467,14 @@ def train(self):
454467

455468
for j in tqdm(range(self.epoch)):
456469
for k in range(0, train_data.shape[0], batch_size): # retrieve every image
457-
458-
batch_imgs = train_data[k: k + batch_size]
459-
batch_labels = train_labels[k: k + batch_size]
470+
batch_imgs = train_data[k : k + batch_size]
471+
batch_labels = train_labels[k : k + batch_size]
460472

461473
output = self.forward(x=batch_imgs, W1=W1, W2=W2, no_gradient=False)
462474

463-
grad_W1, grad_W2 = self.back_prop(x=batch_imgs, y=batch_labels, W1=W1, W2=W2)
475+
grad_W1, grad_W2 = self.back_prop(
476+
x=batch_imgs, y=batch_labels, W1=W1, W2=W2
477+
)
464478

465479
W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate)
466480

@@ -475,6 +489,4 @@ def train(self):
475489

476490
self.test_accuracy = test_accuracy_list
477491
self.test_loss = test_loss_list
478-
print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list))
479-
480-
492+
print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list))

machine_learning/support_vector_machines.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,4 +244,4 @@ def predict(self, observation: ndarray) -> int:
244244
if __name__ == "__main__":
245245
import doctest
246246

247-
doctest.testmod()
247+
doctest.testmod()

0 commit comments

Comments
 (0)