11import numpy as np
22from tqdm import tqdm
33from typing import Tuple , List
4+
5+
46class Dataloader :
57 """
68 DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting.
@@ -39,7 +41,9 @@ def __init__(self, X: List[List[float]], y: List[int]) -> None:
3941 self .y = np .array (y )
4042 self .class_weights = {0 : 1.0 , 1 : 1.0 } # Example class weights, adjust as needed
4143
42- def get_Train_test_data (self ) -> Tuple [List [np .ndarray ], List [np .ndarray ], List [np .ndarray ], List [np .ndarray ]]:
44+ def get_Train_test_data (
45+ self ,
46+ ) -> Tuple [List [np .ndarray ], List [np .ndarray ], List [np .ndarray ], List [np .ndarray ]]:
4347 """
4448 Splits the data into training and testing sets. Here, we manually split the data.
4549
@@ -51,13 +55,21 @@ def get_Train_test_data(self) -> Tuple[List[np.ndarray], List[np.ndarray], List[
5155 - Test labels
5256 """
5357 # Manually splitting data into training and testing sets
54- train_data = np .array ([self .X [0 ], self .X [1 ], self .X [2 ]]) # First 3 samples for training
55- train_labels = [np .array ([self .y [0 ]]), np .array ([self .y [1 ]]), np .array ([self .y [2 ]])] # Labels as np.ndarray
58+ train_data = np .array (
59+ [self .X [0 ], self .X [1 ], self .X [2 ]]
60+ ) # First 3 samples for training
61+ train_labels = [
62+ np .array ([self .y [0 ]]),
63+ np .array ([self .y [1 ]]),
64+ np .array ([self .y [2 ]]),
65+ ] # Labels as np.ndarray
5666 test_data = np .array ([self .X [3 ]]) # Last sample for testing
5767 test_labels = [np .array ([self .y [3 ]])] # Labels as np.ndarray
5868 return train_data , train_labels , test_data , test_labels
5969
60- def shuffle_data (self , paired_data : List [Tuple [np .ndarray , int ]]) -> List [Tuple [np .ndarray , int ]]:
70+ def shuffle_data (
71+ self , paired_data : List [Tuple [np .ndarray , int ]]
72+ ) -> List [Tuple [np .ndarray , int ]]:
6173 """
6274 Shuffles the data randomly.
6375
@@ -79,58 +91,61 @@ def get_inout_dim(self) -> Tuple[int, int]:
7991 @staticmethod
8092 def one_hot_encode (labels , num_classes ):
8193 """
82- Perform one-hot encoding for the given labels.
94+ Perform one-hot encoding for the given labels.
8395
84- Args:
85- labels: List of integer labels.
86- num_classes: Total number of classes for encoding.
96+ Args:
97+ labels: List of integer labels.
98+ num_classes: Total number of classes for encoding.
8799
88- Returns:
89- A numpy array representing one-hot encoded labels.
100+ Returns:
101+ A numpy array representing one-hot encoded labels.
90102 """
91103 one_hot = np .zeros ((len (labels ), num_classes ))
92104 for idx , label in enumerate (labels ):
93105 one_hot [idx , label ] = 0.99
94106 return one_hot
95107
96108
97- class MLP () :
109+ class MLP :
98110 """
99- A custom MLP class for implementing a simple multi-layer perceptron with
100- forward propagation, backpropagation.
101-
102- Attributes:
103- learning_rate (float): Learning rate for gradient descent.
104- gamma (float): Parameter to control learning rate adjustment.
105- epoch (int): Number of epochs for training.
106- hidden_dim (int): Dimension of the hidden layer.
107- batch_size (int): Number of samples per mini-batch.
108- train_loss (List[float]): List to store training loss for each fold.
109- train_accuracy (List[float]): List to store training accuracy for each fold.
110- test_loss (List[float]): List to store test loss for each fold.
111- test_accuracy (List[float]): List to store test accuracy for each fold.
112- dataloader (Dataloader): DataLoader object for handling training data.
113- inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
114- weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.
115- best_accuracy (float): Best test accuracy achieved.
116- patience (int): Patience for early stopping.
117- epochs_no_improve (int): Counter for epochs without improvement.
118-
119- Methods:
120- get_inout_dim:obtain input dimension and output dimension.
121- relu: Apply the ReLU activation function.
122- relu_derivative: Compute the derivative of the ReLU function.
123- forward: Perform a forward pass through the network.
124- back_prop: Perform backpropagation to compute gradients.
125- update_weights: Update the weights using gradients.
126- update_learning_rate: Adjust the learning rate based on test accuracy.
127- accuracy: Compute accuracy of the model.
128- loss: Compute weighted MSE loss.
129- train: Train the MLP over multiple folds with early stopping.
130-
131-
132- """
133- def __init__ (self , dataloader , epoch : int , learning_rate : float , gamma = 1 , hidden_dim = 2 ):
111+ A custom MLP class for implementing a simple multi-layer perceptron with
112+ forward propagation, backpropagation.
113+
114+ Attributes:
115+ learning_rate (float): Learning rate for gradient descent.
116+ gamma (float): Parameter to control learning rate adjustment.
117+ epoch (int): Number of epochs for training.
118+ hidden_dim (int): Dimension of the hidden layer.
119+ batch_size (int): Number of samples per mini-batch.
120+ train_loss (List[float]): List to store training loss for each fold.
121+ train_accuracy (List[float]): List to store training accuracy for each fold.
122+ test_loss (List[float]): List to store test loss for each fold.
123+ test_accuracy (List[float]): List to store test accuracy for each fold.
124+ dataloader (Dataloader): DataLoader object for handling training data.
125+ inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
126+ weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.
127+ best_accuracy (float): Best test accuracy achieved.
128+ patience (int): Patience for early stopping.
129+ epochs_no_improve (int): Counter for epochs without improvement.
130+
131+ Methods:
132+ get_inout_dim:obtain input dimension and output dimension.
133+ relu: Apply the ReLU activation function.
134+ relu_derivative: Compute the derivative of the ReLU function.
135+ forward: Perform a forward pass through the network.
136+ back_prop: Perform backpropagation to compute gradients.
137+ update_weights: Update the weights using gradients.
138+ update_learning_rate: Adjust the learning rate based on test accuracy.
139+ accuracy: Compute accuracy of the model.
140+ loss: Compute weighted MSE loss.
141+ train: Train the MLP over multiple folds with early stopping.
142+
143+
144+ """
145+
146+ def __init__ (
147+ self , dataloader , epoch : int , learning_rate : float , gamma = 1 , hidden_dim = 2
148+ ):
134149 self .learning_rate = learning_rate #
135150 self .gamma = gamma # learning_rate decay hyperparameter gamma
136151 self .epoch = epoch
@@ -213,9 +228,7 @@ def relu_derivative(self, z):
213228 """
214229 return (z > 0 ).astype (float )
215230
216-
217231 def forward (self , x , W1 , W2 , no_gradient = False ):
218-
219232 """
220233 Performs a forward pass through the neural network with one hidden layer.
221234
@@ -246,45 +259,40 @@ def forward(self, x, W1, W2, no_gradient=False):
246259 z2 = np .dot (a1 , W2 )
247260 a2 = z2
248261
249-
250262 if no_gradient :
251263 # when predict
252264 return a2
253265 else :
254266 # when training
255- self .inter_variable = {
256- "z1" : z1 , "a1" : a1 ,
257- "z2" : z2 , "a2" : a2
258- }
267+ self .inter_variable = {"z1" : z1 , "a1" : a1 , "z2" : z2 , "a2" : a2 }
259268 return a2
260269
261270 def back_prop (self , x , y , W1 , W2 ):
262271 """
263- Performs backpropagation to compute gradients for the weights.
264-
265- Args:
266- x: Input data, shape (batch_size, input_dim).
267- y: True labels, shape (batch_size, output_dim).
268- W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim).
269- W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim).
272+ Performs backpropagation to compute gradients for the weights.
270273
271- Returns:
272- Tuple of gradients (grad_W1, grad_W2) for the weight matrices.
274+ Args:
275+ x: Input data, shape (batch_size, input_dim).
276+ y: True labels, shape (batch_size, output_dim).
277+ W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim).
278+ W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim).
273279
274- Examples:
275- >>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
276- >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias
277- >>> y = np.array([[0.0, 1.0]]) # batch_size=1, output_dim=2
278- >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2)
279- >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2)
280- >>> _ = mlp.forward(x, W1, W2) # Run forward to set inter_variable
281- >>> grad_W1, grad_W2 = mlp.back_prop(x, y, W1, W2)
282- >>> grad_W1.shape
283- (3, 2)
284- >>> grad_W2.shape
285- (2, 2)
286- """
280+ Returns:
281+ Tuple of gradients (grad_W1, grad_W2) for the weight matrices.
287282
283+ Examples:
284+ >>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
285+ >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias
286+ >>> y = np.array([[0.0, 1.0]]) # batch_size=1, output_dim=2
287+ >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2)
288+ >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2)
289+ >>> _ = mlp.forward(x, W1, W2) # Run forward to set inter_variable
290+ >>> grad_W1, grad_W2 = mlp.back_prop(x, y, W1, W2)
291+ >>> grad_W1.shape
292+ (3, 2)
293+ >>> grad_W2.shape
294+ (2, 2)
295+ """
288296
289297 a1 = self .inter_variable ["a1" ] # (batch_size, hidden_dim)
290298 z1 = self .inter_variable ["z1" ]
@@ -295,13 +303,17 @@ def back_prop(self, x, y, W1, W2):
295303
296304 # 1. output layer error
297305 delta_k = a2 - y
298- delta_j = np .dot (delta_k , W2 .T ) * self .relu_derivative (z1 ) # (batch, hidden_dim) 使用relu时
299-
306+ delta_j = np .dot (delta_k , W2 .T ) * self .relu_derivative (
307+ z1
308+ ) # (batch, hidden_dim) 使用relu时
300309
301- grad_w2 = np .dot (a1 .T , delta_k ) / batch_size # (hidden, batch).dot(batch, output) = (hidden, output)
310+ grad_w2 = (
311+ np .dot (a1 .T , delta_k ) / batch_size
312+ ) # (hidden, batch).dot(batch, output) = (hidden, output)
302313 x_flat = x .reshape (x .shape [0 ], - 1 ) # (batch_size, input_dim)
303- grad_w1 = np .dot (x_flat .T , delta_j ) / batch_size # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)
304-
314+ grad_w1 = (
315+ np .dot (x_flat .T , delta_j ) / batch_size
316+ ) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)
305317
306318 return grad_w1 , grad_w2
307319
@@ -434,12 +446,13 @@ def train(self):
434446 """
435447
436448 learning_rate = self .learning_rate
437- train_data , train_labels , test_data , test_labels = self .dataloader .get_Train_test_data ()
449+ train_data , train_labels , test_data , test_labels = (
450+ self .dataloader .get_Train_test_data ()
451+ )
438452
439453 train_data = np .c_ [train_data , np .ones (train_data .shape [0 ])]
440454 test_data = np .c_ [test_data , np .ones (test_data .shape [0 ])]
441455
442-
443456 _ , total_label_num = self .dataloader .get_inout_dim ()
444457
445458 train_labels = self .dataloader .one_hot_encode (train_labels , total_label_num )
@@ -454,13 +467,14 @@ def train(self):
454467
455468 for j in tqdm (range (self .epoch )):
456469 for k in range (0 , train_data .shape [0 ], batch_size ): # retrieve every image
457-
458- batch_imgs = train_data [k : k + batch_size ]
459- batch_labels = train_labels [k : k + batch_size ]
470+ batch_imgs = train_data [k : k + batch_size ]
471+ batch_labels = train_labels [k : k + batch_size ]
460472
461473 output = self .forward (x = batch_imgs , W1 = W1 , W2 = W2 , no_gradient = False )
462474
463- grad_W1 , grad_W2 = self .back_prop (x = batch_imgs , y = batch_labels , W1 = W1 , W2 = W2 )
475+ grad_W1 , grad_W2 = self .back_prop (
476+ x = batch_imgs , y = batch_labels , W1 = W1 , W2 = W2
477+ )
464478
465479 W1 , W2 = self .update_weights (W1 , W2 , grad_W1 , grad_W2 , learning_rate )
466480
@@ -475,6 +489,4 @@ def train(self):
475489
476490 self .test_accuracy = test_accuracy_list
477491 self .test_loss = test_loss_list
478- print (f"Test accuracy:" , sum (test_accuracy_list )/ len (test_accuracy_list ))
479-
480-
492+ print (f"Test accuracy:" , sum (test_accuracy_list ) / len (test_accuracy_list ))
0 commit comments