From 755b27fc76a8eec0cb198b0947526d8887bf60d9 Mon Sep 17 00:00:00 2001
From: WeiYFan <1521716717@qq.com>
Date: Tue, 13 May 2025 17:57:48 +0800
Subject: [PATCH 01/10] Add the polynomial kernel to the SVM code

---
 machine_learning/support_vector_machines.py | 42 +++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py
index d17c9044a3e9..551a970865af 100644
--- a/machine_learning/support_vector_machines.py
+++ b/machine_learning/support_vector_machines.py
@@ -49,6 +49,19 @@ class SVC:
     Traceback (most recent call last):
         ...
     ValueError: gamma must be > 0
+
+    >>> SVC(kernel="polynomial")
+    Traceback (most recent call last):
+        ...
+    ValueError: polynomial kernel requires degree
+    >>> SVC(kernel="polynomial",degree=None)
+    Traceback (most recent call last):
+        ...
+    ValueError: degree must be int
+    >>> SVC(kernel="polynomial",degree=-1)
+    Traceback (most recent call last):
+        ...
+    ValueError: degree must be > 0
     """
 
     def __init__(
@@ -57,9 +70,13 @@ def __init__(
         regularization: float = np.inf,
         kernel: str = "linear",
         gamma: float = 0.0,
+        degree: int =0.0,
+        coef0: float = 0.0,
     ) -> None:
         self.regularization = regularization
         self.gamma = gamma
+        self.degree = degree
+        self.coef0 = coef0
         if kernel == "linear":
             self.kernel = self.__linear
         elif kernel == "rbf":
@@ -73,6 +90,14 @@ def __init__(
             # in the future, there could be a default value like in sklearn
             # sklear: def_gamma = 1/(n_features * X.var()) (wiki)
             # previously it was 1/(n_features)
+        elif kernel == "polynomial":
+            if self.degree == 0:
+                raise ValueError("polynomial kernel requires degree")
+            if not isinstance(self.degree, int) :
+                raise ValueError("degree must be int")
+            if not self.degree > 0:
+                raise ValueError("degree must be > 0")
+            self.kernel = self.__polynomial
         else:
             msg = f"Unknown kernel: {kernel}"
             raise ValueError(msg)
@@ -98,6 +123,23 @@ def __rbf(self, vector1: ndarray, vector2: ndarray) -> float:
         """
         return np.exp(-(self.gamma * norm_squared(vector1 - vector2)))
 
+
+    def __polynomial(self, vector1: ndarray, vector2: ndarray) -> float:
+        """
+        Polynomial kernel: (x . y + coef0)^degree
+
+        Note: for more information see:
+            https://en.wikipedia.org/wiki/Polynomial_kernel
+
+        Args:
+            vector1 (ndarray): first vector
+            vector2 (ndarray): second vector
+
+        Returns:
+            float: (vector1 . vector2 + coef0)^degree
+        """
+        return (np.dot(vector1, vector2) + self.coef0) ** self.degree
+
     def fit(self, observations: list[ndarray], classes: ndarray) -> None:
         """
         Fits the SVC with a set of observations.

From f56f0285a7e96496e47793e3f7c7440d03e61f69 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 13 May 2025 10:07:51 +0000
Subject: [PATCH 02/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/support_vector_machines.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py
index 551a970865af..7ce99a241813 100644
--- a/machine_learning/support_vector_machines.py
+++ b/machine_learning/support_vector_machines.py
@@ -70,7 +70,7 @@ def __init__(
         regularization: float = np.inf,
         kernel: str = "linear",
         gamma: float = 0.0,
-        degree: int =0.0,
+        degree: int = 0.0,
         coef0: float = 0.0,
     ) -> None:
         self.regularization = regularization
@@ -93,7 +93,7 @@ def __init__(
         elif kernel == "polynomial":
             if self.degree == 0:
                 raise ValueError("polynomial kernel requires degree")
-            if not isinstance(self.degree, int) :
+            if not isinstance(self.degree, int):
                 raise ValueError("degree must be int")
             if not self.degree > 0:
                 raise ValueError("degree must be > 0")
@@ -123,7 +123,6 @@ def __rbf(self, vector1: ndarray, vector2: ndarray) -> float:
         """
         return np.exp(-(self.gamma * norm_squared(vector1 - vector2)))
 
-
     def __polynomial(self, vector1: ndarray, vector2: ndarray) -> float:
         """
         Polynomial kernel: (x . y + coef0)^degree

From cc278280befa80cf29f1ff59f779171cb7e75699 Mon Sep 17 00:00:00 2001
From: WeiYFan <1521716717@qq.com>
Date: Tue, 13 May 2025 18:18:40 +0800
Subject: [PATCH 03/10] Change the type of the degree

---
 machine_learning/support_vector_machines.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py
index 551a970865af..2248e96e87b5 100644
--- a/machine_learning/support_vector_machines.py
+++ b/machine_learning/support_vector_machines.py
@@ -57,7 +57,7 @@ class SVC:
     >>> SVC(kernel="polynomial",degree=None)
     Traceback (most recent call last):
         ...
-    ValueError: degree must be int
+    ValueError: degree must be float or int
     >>> SVC(kernel="polynomial",degree=-1)
     Traceback (most recent call last):
         ...
@@ -70,7 +70,7 @@ def __init__(
         regularization: float = np.inf,
         kernel: str = "linear",
         gamma: float = 0.0,
-        degree: int =0.0,
+        degree: float =0.0,
         coef0: float = 0.0,
     ) -> None:
         self.regularization = regularization
@@ -93,8 +93,8 @@ def __init__(
         elif kernel == "polynomial":
             if self.degree == 0:
                 raise ValueError("polynomial kernel requires degree")
-            if not isinstance(self.degree, int) :
-                raise ValueError("degree must be int")
+            if not isinstance(self.degree, (float, int)) :
+                raise ValueError("degree must be float or int")
             if not self.degree > 0:
                 raise ValueError("degree must be > 0")
             self.kernel = self.__polynomial

From 2ee9d30a1419da7188e37376678ea7957c3b11a4 Mon Sep 17 00:00:00 2001
From: WeiYFan <150578207+WeiYFan@users.noreply.github.com>
Date: Tue, 13 May 2025 18:38:40 +0800
Subject: [PATCH 04/10] Update support_vector_machines.py

---
 machine_learning/support_vector_machines.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py
index 7ce99a241813..5667ccf7e3da 100644
--- a/machine_learning/support_vector_machines.py
+++ b/machine_learning/support_vector_machines.py
@@ -57,8 +57,8 @@ class SVC:
     >>> SVC(kernel="polynomial",degree=None)
     Traceback (most recent call last):
         ...
-    ValueError: degree must be int
-    >>> SVC(kernel="polynomial",degree=-1)
+    ValueError: degree must be float or int
+    >>> SVC(kernel="polynomial",degree=-1.0)
     Traceback (most recent call last):
         ...
     ValueError: degree must be > 0
@@ -70,7 +70,7 @@ def __init__(
         regularization: float = np.inf,
         kernel: str = "linear",
         gamma: float = 0.0,
-        degree: int = 0.0,
+        degree: float=0.0,
         coef0: float = 0.0,
     ) -> None:
         self.regularization = regularization
@@ -93,8 +93,8 @@ def __init__(
         elif kernel == "polynomial":
             if self.degree == 0:
                 raise ValueError("polynomial kernel requires degree")
-            if not isinstance(self.degree, int):
-                raise ValueError("degree must be int")
+            if not isinstance(self.degree, (float, int)) :
+                raise ValueError("degree must be float or int")
             if not self.degree > 0:
                 raise ValueError("degree must be > 0")
             self.kernel = self.__polynomial
@@ -123,6 +123,7 @@ def __rbf(self, vector1: ndarray, vector2: ndarray) -> float:
         """
         return np.exp(-(self.gamma * norm_squared(vector1 - vector2)))
 
+
     def __polynomial(self, vector1: ndarray, vector2: ndarray) -> float:
         """
         Polynomial kernel: (x . y + coef0)^degree

From 35c9fad30674f48fb1d48632f3f7d418b0ee7715 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 13 May 2025 10:47:05 +0000
Subject: [PATCH 05/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/support_vector_machines.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py
index 5667ccf7e3da..b9a5d99a1849 100644
--- a/machine_learning/support_vector_machines.py
+++ b/machine_learning/support_vector_machines.py
@@ -70,7 +70,7 @@ def __init__(
         regularization: float = np.inf,
         kernel: str = "linear",
         gamma: float = 0.0,
-        degree: float=0.0,
+        degree: float = 0.0,
         coef0: float = 0.0,
     ) -> None:
         self.regularization = regularization
@@ -93,7 +93,7 @@ def __init__(
         elif kernel == "polynomial":
             if self.degree == 0:
                 raise ValueError("polynomial kernel requires degree")
-            if not isinstance(self.degree, (float, int)) :
+            if not isinstance(self.degree, (float, int)):
                 raise ValueError("degree must be float or int")
             if not self.degree > 0:
                 raise ValueError("degree must be > 0")
@@ -123,7 +123,6 @@ def __rbf(self, vector1: ndarray, vector2: ndarray) -> float:
         """
         return np.exp(-(self.gamma * norm_squared(vector1 - vector2)))
 
-
     def __polynomial(self, vector1: ndarray, vector2: ndarray) -> float:
         """
         Polynomial kernel: (x . y + coef0)^degree

From 64da7b923a30493999e2d91e636927dba8cb23cd Mon Sep 17 00:00:00 2001
From: WeiYFan <1521716717@qq.com>
Date: Tue, 13 May 2025 23:03:24 +0800
Subject: [PATCH 06/10] add a code file of the multi-layer perceptron
 classifier from scrach

---
 ...ayer_perceptron_classifier_from_scratch.py | 480 ++++++++++++++++++
 machine_learning/support_vector_machines.py   |   6 +-
 2 files changed, 483 insertions(+), 3 deletions(-)
 create mode 100644 machine_learning/multilayer_perceptron_classifier_from_scratch.py

diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py
new file mode 100644
index 000000000000..0ab5db9321b9
--- /dev/null
+++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py
@@ -0,0 +1,480 @@
+import numpy as np
+from tqdm import tqdm
+from typing import Tuple, List
+class Dataloader:
+    """
+    DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting.
+
+    Example usage:
+    >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
+    >>> y = [0, 1, 0, 0]
+    >>> loader = Dataloader(X, y)
+    >>> loader.get_Train_test_data()  # Returns train and test data
+    (array([[0., 0.],
+           [1., 1.],
+           [1., 0.]]), [array([0]), array([1]), array([0])], array([[0., 1.]]), [array([0])])
+    >>> loader.one_hot_encode([0, 1, 0], 2)  # Returns one-hot encoded labels
+    array([[0.99, 0.  ],
+           [0.  , 0.99],
+           [0.99, 0.  ]])
+    >>> loader.get_inout_dim()
+    (2, 3)
+    >>> loader.one_hot_encode([0, 2], 3)
+    array([[0.99, 0.  , 0.  ],
+           [0.  , 0.  , 0.99]])
+    """
+
+    def __init__(self, X: List[List[float]], y: List[int]) -> None:
+        """
+        Initializes the Dataloader instance with feature matrix X and labels y.
+
+        Args:
+            X: Feature matrix of shape (n_samples, n_features).
+            y: List of labels of shape (n_samples,).
+        """
+        # random seed
+        np.random.seed(42)
+
+        self.X = np.array(X)
+        self.y = np.array(y)
+        self.class_weights = {0: 1.0, 1: 1.0}  # Example class weights, adjust as needed
+
+    def get_Train_test_data(self) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
+        """
+        Splits the data into training and testing sets. Here, we manually split the data.
+
+        Returns:
+            A tuple containing:
+            - Train data
+            - Train labels
+            - Test data
+            - Test labels
+        """
+        # Manually splitting data into training and testing sets
+        train_data = np.array([self.X[0], self.X[1], self.X[2]])  # First 3 samples for training
+        train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] # Labels as np.ndarray
+        test_data = np.array([self.X[3]])  # Last sample for testing
+        test_labels = [np.array([self.y[3]])]  # Labels as np.ndarray
+        return train_data, train_labels, test_data, test_labels
+
+    def shuffle_data(self, paired_data: List[Tuple[np.ndarray, int]]) -> List[Tuple[np.ndarray, int]]:
+        """
+        Shuffles the data randomly.
+
+        Args:
+            paired_data: List of tuples containing data and corresponding labels.
+
+        Returns:
+            A shuffled list of data-label pairs.
+        """
+        np.random.shuffle(paired_data)
+        return paired_data
+
+    def get_inout_dim(self) -> Tuple[int, int]:
+        train_data, train_labels, test_data, test_labels = self.get_Train_test_data()
+        in_dim = train_data[0].shape[0]
+        out_dim = len(train_labels)
+        return in_dim, out_dim
+
+    @staticmethod
+    def one_hot_encode(labels, num_classes):
+        """
+                Perform one-hot encoding for the given labels.
+
+                Args:
+                    labels: List of integer labels.
+                    num_classes: Total number of classes for encoding.
+
+                Returns:
+                    A numpy array representing one-hot encoded labels.
+        """
+        one_hot = np.zeros((len(labels), num_classes))
+        for idx, label in enumerate(labels):
+            one_hot[idx, label] = 0.99
+        return one_hot
+
+
+class MLP():
+    """
+        A custom MLP class for implementing a simple multi-layer perceptron with
+        forward propagation, backpropagation.
+
+        Attributes:
+            learning_rate (float): Learning rate for gradient descent.
+            gamma (float): Parameter to control learning rate adjustment.
+            epoch (int): Number of epochs for training.
+            hidden_dim (int): Dimension of the hidden layer.
+            batch_size (int): Number of samples per mini-batch.
+            train_loss (List[float]): List to store training loss for each fold.
+            train_accuracy (List[float]): List to store training accuracy for each fold.
+            test_loss (List[float]): List to store test loss for each fold.
+            test_accuracy (List[float]): List to store test accuracy for each fold.
+            dataloader (Dataloader): DataLoader object for handling training data.
+            inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
+            weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.
+            best_accuracy (float): Best test accuracy achieved.
+            patience (int): Patience for early stopping.
+            epochs_no_improve (int): Counter for epochs without improvement.
+
+        Methods:
+            get_inout_dim:obtain input dimension and output dimension.
+            relu: Apply the ReLU activation function.
+            relu_derivative: Compute the derivative of the ReLU function.
+            forward: Perform a forward pass through the network.
+            back_prop: Perform backpropagation to compute gradients.
+            update_weights: Update the weights using gradients.
+            update_learning_rate: Adjust the learning rate based on test accuracy.
+            accuracy: Compute accuracy of the model.
+            loss: Compute weighted MSE loss.
+            train: Train the MLP over multiple folds with early stopping.
+
+
+        """
+    def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2):
+        self.learning_rate = learning_rate  #
+        self.gamma = gamma  # learning_rate decay hyperparameter gamma
+        self.epoch = epoch
+        self.hidden_dim = hidden_dim
+
+        self.train_loss = []
+        self.train_accuracy = []
+        self.test_loss = []
+        self.test_accuracy = []
+
+        self.dataloader = dataloader
+        self.inter_variable = {}
+        self.weights1_list = []
+
+    def get_inout_dim(self):
+        """
+        obtain input dimension and output dimension.
+
+        :return: Tuple of weights (input_dim, output_dim) for the network.
+
+        >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
+        >>> y = [0, 1, 0, 0]
+        >>> loader = Dataloader(X, y)
+        >>> mlp = MLP(loader, 10, 0.1)
+        >>> mlp.get_inout_dim()
+        (2, 3)
+        """
+        input_dim, output_dim = self.dataloader.get_inout_dim()
+
+        return input_dim, output_dim
+
+    def initialize(self):
+        """
+        Initialize weights using He initialization.
+
+        :return: Tuple of weights (W1, W2) for the network.
+
+        >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
+        >>> y = [0, 1, 0, 0]
+        >>> loader = Dataloader(X, y)
+        >>> mlp = MLP(loader, 10, 0.1)
+        >>> W1, W2 = mlp.initialize()
+        >>> W1.shape
+        (3, 2)
+        >>> W2.shape
+        (2, 3)
+        """
+
+        in_dim, out_dim = self.dataloader.get_inout_dim()  # in_dim here is image dim
+        W1 = np.random.randn(in_dim + 1, self.hidden_dim) * 0.01  # (in_dim, hidden)
+
+        W2 = np.random.randn(self.hidden_dim, out_dim) * 0.01  # (hidden, output)
+        return W1, W2
+
+    def relu(self, z):
+        """
+        Apply the ReLU activation function element-wise.
+
+        :param z: Input array.
+        :return: Output array after applying ReLU.
+
+        >>> mlp = MLP(None, 1, 0.1)
+        >>> mlp.relu(np.array([[-1, 2], [3, -4]]))
+        array([[0, 2],
+               [3, 0]])
+        """
+        return np.maximum(0, z)
+
+    def relu_derivative(self, z):
+        """
+        Compute the derivative of the ReLU function.
+
+        :param z: Input array.
+        :return: Derivative of ReLU function element-wise.
+
+        >>> mlp = MLP(None, 1, 0.01)
+        >>> mlp.relu_derivative(np.array([[-1, 2], [3, -4]]))
+        array([[0., 1.],
+               [1., 0.]])
+        """
+        return (z > 0).astype(float)
+
+
+    def forward(self, x, W1, W2, no_gradient=False):
+
+        """
+        Performs a forward pass through the neural network with one hidden layer.
+
+        Args:
+            x: Input data, shape (batch_size, input_dim).
+            W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim).
+            W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim).
+            no_gradient: If True, returns output without storing intermediates.
+
+        Returns:
+            Output of the network after forward pass, shape (batch_size, output_dim).
+
+        Examples:
+            >>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
+            >>> x = np.array([[1.0, 2.0, 1.0]])  # batch_size=1, input_dim=2 + bias
+            >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])  # (input_dim=3, hidden_dim=2)
+            >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]])  # (hidden_dim=2, output_dim=2)
+            >>> output = mlp.forward(x, W1, W2)
+            >>> output.shape
+            (1, 2)
+        """
+
+        z1 = np.dot(x, W1)
+
+        a1 = self.relu(z1)  # relu
+
+        # hidden → output
+        z2 = np.dot(a1, W2)
+        a2 = z2
+
+
+        if no_gradient:
+            # when predict
+            return a2
+        else:
+            # when training
+            self.inter_variable = {
+                "z1": z1, "a1": a1,
+                "z2": z2, "a2": a2
+            }
+            return a2
+
+    def back_prop(self, x, y, W1, W2):
+        """
+         Performs backpropagation to compute gradients for the weights.
+
+         Args:
+             x: Input data, shape (batch_size, input_dim).
+             y: True labels, shape (batch_size, output_dim).
+             W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim).
+             W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim).
+
+         Returns:
+             Tuple of gradients (grad_W1, grad_W2) for the weight matrices.
+
+         Examples:
+             >>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
+             >>> x = np.array([[1.0, 2.0, 1.0]])  # batch_size=1, input_dim=2 + bias
+             >>> y = np.array([[0.0, 1.0]])  # batch_size=1, output_dim=2
+             >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])  # (input_dim=3, hidden_dim=2)
+             >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]])  # (hidden_dim=2, output_dim=2)
+             >>> _ = mlp.forward(x, W1, W2)  # Run forward to set inter_variable
+             >>> grad_W1, grad_W2 = mlp.back_prop(x, y, W1, W2)
+             >>> grad_W1.shape
+             (3, 2)
+             >>> grad_W2.shape
+             (2, 2)
+         """
+
+
+        a1 = self.inter_variable["a1"]  # (batch_size, hidden_dim)
+        z1 = self.inter_variable["z1"]
+        a2 = self.inter_variable["a2"]  # (batch_size, output_dim)
+        z2 = self.inter_variable["z2"]
+
+        batch_size = x.shape[0]
+
+        # 1. output layer error
+        delta_k = a2 - y
+        delta_j = np.dot(delta_k, W2.T) * self.relu_derivative(z1)  # (batch, hidden_dim) 使用relu时
+
+
+        grad_w2 = np.dot(a1.T, delta_k) / batch_size  # (hidden, batch).dot(batch, output) = (hidden, output)
+        x_flat = x.reshape(x.shape[0], -1)  # (batch_size, input_dim)
+        grad_w1 = np.dot(x_flat.T, delta_j) / batch_size  # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)
+
+
+        return grad_w1, grad_w2
+
+    def update_weights(self, W1, W2, grad_W1, grad_W2, learning_rate):
+        """
+        Updates the weight matrices using the computed gradients and learning rate.
+
+        Args:
+            W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim).
+            W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim).
+            grad_W1: Gradient for W1, shape (input_dim + 1, hidden_dim).
+            grad_W2: Gradient for W2, shape (hidden_dim, output_dim).
+            learning_rate: Learning rate for weight updates.
+
+        Returns:
+            Updated weight matrices (W1, W2).
+
+        Examples:
+            >>> mlp = MLP(None, 1, 0.1)
+            >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])  # (input_dim=3, hidden_dim=2)
+            >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]])  # (hidden_dim=2, output_dim=2)
+            >>> grad_W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])
+            >>> grad_W2 = np.array([[0.7, 0.8], [0.9, 1.0]])
+            >>> learning_rate = 0.1
+            >>> new_W1, new_W2 = mlp.update_weights(W1, W2, grad_W1, grad_W2, learning_rate)
+            >>> new_W1==np.array([[0.09, 0.18], [0.27, 0.36], [0.45, 0.54]])
+            array([[ True,  True],
+                   [ True,  True],
+                   [ True,  True]])
+            >>> new_W2==np.array([[0.63, 0.72], [0.81, 0.90]])
+            array([[ True,  True],
+                   [ True,  True]])
+        """
+        W1 -= learning_rate * grad_W1
+        W2 -= learning_rate * grad_W2
+        return W1, W2
+
+    def update_learning_rate(self, learning_rate):
+        """
+        Updates the learning rate by applying the decay factor gamma.
+
+        Args:
+            learning_rate: Current learning rate.
+
+        Returns:
+            Updated learning rate.
+
+        Examples:
+            >>> mlp = MLP(None, 1, 0.1, gamma=0.9)
+            >>> round(mlp.update_learning_rate(0.1), 2)
+            0.09
+        """
+
+        return learning_rate * self.gamma
+
+    @staticmethod
+    def accuracy(label, y_hat):
+        """
+        Computes the accuracy of predictions by comparing predicted and true labels.
+
+        Args:
+            label: True labels, shape (batch_size, num_classes).
+            y_hat: Predicted outputs, shape (batch_size, num_classes).
+
+        Returns:
+            Accuracy as a float between 0 and 1.
+
+        Examples:
+            >>> mlp = MLP(None, 1, 0.01)
+            >>> label = np.array([[1, 0], [0, 1], [1, 0]])
+            >>> y_hat = np.array([[0.9, 0.1], [0.2, 0.8], [0.6, 0.4]])
+            >>> mlp.accuracy(label, y_hat)
+            1.0
+        """
+        return (y_hat.argmax(axis=1) == label.argmax(axis=1)).mean()
+
+    @staticmethod
+    def loss(output, label):
+        """
+        Computes the mean squared error loss between predictions and true labels.
+
+        Args:
+            output: Predicted outputs, shape (batch_size, num_classes).
+            label: True labels, shape (batch_size, num_classes).
+
+        Returns:
+            Mean squared error loss as a float.
+
+        Examples:
+            >>> mlp = MLP(None, 1, 0.1)
+            >>> output = np.array([[0.9, 0.1], [0.2, 0.8]])
+            >>> label = np.array([[1.0, 0.0], [0.0, 1.0]])
+            >>> round(mlp.loss(output, label), 3)
+            0.025
+        """
+        return np.sum((output - label) ** 2) / (2 * label.shape[0])
+
+    def get_acc_loss(self):
+        """
+        Returns the recorded test accuracy and test loss.
+
+        Returns:
+            Tuple of (test_accuracy, test_loss) lists.
+
+        Examples:
+            >>> mlp = MLP(None, 1, 0.1)
+            >>> mlp.test_accuracy = [0.8, 0.9]
+            >>> mlp.test_loss = [0.1, 0.05]
+            >>> acc, loss = mlp.get_acc_loss()
+            >>> acc
+            [0.8, 0.9]
+            >>> loss
+            [0.1, 0.05]
+        """
+        return self.test_accuracy, self.test_loss
+
+    def train(self):
+        """
+        Trains the MLP model using the provided dataloader for multiple folds and epochs.
+
+        Saves the best model parameters for each fold and records accuracy/loss.
+
+        Examples:
+            >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
+            >>> y = [0, 1, 0, 0]
+            >>> loader = Dataloader(X, y)
+            >>> mlp = MLP(loader, epoch=10, learning_rate=0.1, hidden_dim=2)
+            >>> mlp.train()
+            Test accuracy: 1.0
+        """
+
+        learning_rate = self.learning_rate
+        train_data, train_labels, test_data, test_labels = self.dataloader.get_Train_test_data()
+
+        train_data = np.c_[train_data, np.ones(train_data.shape[0])]
+        test_data = np.c_[test_data, np.ones(test_data.shape[0])]
+
+
+        _, total_label_num = self.dataloader.get_inout_dim()
+
+        train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num)
+        test_labels = self.dataloader.one_hot_encode(test_labels, total_label_num)
+
+        W1, W2 = self.initialize()
+
+        train_accuracy_list, train_loss_list = [], []
+        test_accuracy_list, test_loss_list = [], []
+
+        batch_size = 1
+
+        for j in tqdm(range(self.epoch)):
+            for k in range(0, train_data.shape[0], batch_size):  # retrieve every image
+
+                batch_imgs = train_data[k: k + batch_size]
+                batch_labels = train_labels[k: k + batch_size]
+
+                output = self.forward(x=batch_imgs, W1=W1, W2=W2, no_gradient=False)
+
+                grad_W1, grad_W2 = self.back_prop(x=batch_imgs, y=batch_labels, W1=W1, W2=W2)
+
+                W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate)
+
+            test_output = self.forward(test_data, W1, W2, no_gradient=True)
+            test_accuracy = self.accuracy(test_labels, test_output)
+            test_loss = self.loss(test_output, test_labels)
+
+            test_accuracy_list.append(test_accuracy)
+            test_loss_list.append(test_loss)
+
+            learning_rate = self.update_learning_rate(learning_rate)
+
+        self.test_accuracy = test_accuracy_list
+        self.test_loss = test_loss_list
+        print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list))
+
+
diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py
index 334be81deb33..8af529ca8694 100644
--- a/machine_learning/support_vector_machines.py
+++ b/machine_learning/support_vector_machines.py
@@ -97,13 +97,13 @@ def __init__(
         elif kernel == "polynomial":
             if self.degree == 0:
                 raise ValueError("polynomial kernel requires degree")
-<<<<<<< HEAD
+
             if not isinstance(self.degree, (float, int)) :
                 raise ValueError("degree must be float or int")
-=======
+
             if not isinstance(self.degree, int):
                 raise ValueError("degree must be int")
->>>>>>> f56f0285a7e96496e47793e3f7c7440d03e61f69
+
             if not self.degree > 0:
                 raise ValueError("degree must be > 0")
             self.kernel = self.__polynomial

From f55c264daf3a471985b9ebbee32967a2038e5d02 Mon Sep 17 00:00:00 2001
From: WeiYFan <1521716717@qq.com>
Date: Tue, 13 May 2025 23:04:43 +0800
Subject: [PATCH 07/10] add a code file of the multi-layer perceptron
 classifier from scrach

---
 machine_learning/support_vector_machines.py | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py
index 8af529ca8694..6891ca4893fe 100644
--- a/machine_learning/support_vector_machines.py
+++ b/machine_learning/support_vector_machines.py
@@ -58,7 +58,7 @@ class SVC:
     Traceback (most recent call last):
         ...
     ValueError: degree must be float or int
-    >>> SVC(kernel="polynomial",degree=-1)
+    >>> SVC(kernel="polynomial",degree=-1.0)
     Traceback (most recent call last):
         ...
     ValueError: degree must be > 0
@@ -70,11 +70,7 @@ def __init__(
         regularization: float = np.inf,
         kernel: str = "linear",
         gamma: float = 0.0,
-<<<<<<< HEAD
-        degree: float =0.0,
-=======
-        degree: int = 0.0,
->>>>>>> f56f0285a7e96496e47793e3f7c7440d03e61f69
+        degree: float = 0.0,
         coef0: float = 0.0,
     ) -> None:
         self.regularization = regularization
@@ -97,13 +93,8 @@ def __init__(
         elif kernel == "polynomial":
             if self.degree == 0:
                 raise ValueError("polynomial kernel requires degree")
-
-            if not isinstance(self.degree, (float, int)) :
+            if not isinstance(self.degree, (float, int)):
                 raise ValueError("degree must be float or int")
-
-            if not isinstance(self.degree, int):
-                raise ValueError("degree must be int")
-
             if not self.degree > 0:
                 raise ValueError("degree must be > 0")
             self.kernel = self.__polynomial
@@ -253,4 +244,4 @@ def predict(self, observation: ndarray) -> int:
 if __name__ == "__main__":
     import doctest
 
-    doctest.testmod()
+    doctest.testmod()
\ No newline at end of file

From 95ba0fb9d0be0a6eee20539ecc349b286285070d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 13 May 2025 15:17:28 +0000
Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ...ayer_perceptron_classifier_from_scratch.py | 190 ++++++++++--------
 machine_learning/support_vector_machines.py   |   2 +-
 2 files changed, 102 insertions(+), 90 deletions(-)

diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py
index 0ab5db9321b9..c8af68ee7964 100644
--- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py
+++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py
@@ -1,6 +1,8 @@
 import numpy as np
 from tqdm import tqdm
 from typing import Tuple, List
+
+
 class Dataloader:
     """
     DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting.
@@ -39,7 +41,9 @@ def __init__(self, X: List[List[float]], y: List[int]) -> None:
         self.y = np.array(y)
         self.class_weights = {0: 1.0, 1: 1.0}  # Example class weights, adjust as needed
 
-    def get_Train_test_data(self) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
+    def get_Train_test_data(
+        self,
+    ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
         """
         Splits the data into training and testing sets. Here, we manually split the data.
 
@@ -51,13 +55,21 @@ def get_Train_test_data(self) -> Tuple[List[np.ndarray], List[np.ndarray], List[
             - Test labels
         """
         # Manually splitting data into training and testing sets
-        train_data = np.array([self.X[0], self.X[1], self.X[2]])  # First 3 samples for training
-        train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] # Labels as np.ndarray
+        train_data = np.array(
+            [self.X[0], self.X[1], self.X[2]]
+        )  # First 3 samples for training
+        train_labels = [
+            np.array([self.y[0]]),
+            np.array([self.y[1]]),
+            np.array([self.y[2]]),
+        ]  # Labels as np.ndarray
         test_data = np.array([self.X[3]])  # Last sample for testing
         test_labels = [np.array([self.y[3]])]  # Labels as np.ndarray
         return train_data, train_labels, test_data, test_labels
 
-    def shuffle_data(self, paired_data: List[Tuple[np.ndarray, int]]) -> List[Tuple[np.ndarray, int]]:
+    def shuffle_data(
+        self, paired_data: List[Tuple[np.ndarray, int]]
+    ) -> List[Tuple[np.ndarray, int]]:
         """
         Shuffles the data randomly.
 
@@ -79,14 +91,14 @@ def get_inout_dim(self) -> Tuple[int, int]:
     @staticmethod
     def one_hot_encode(labels, num_classes):
         """
-                Perform one-hot encoding for the given labels.
+        Perform one-hot encoding for the given labels.
 
-                Args:
-                    labels: List of integer labels.
-                    num_classes: Total number of classes for encoding.
+        Args:
+            labels: List of integer labels.
+            num_classes: Total number of classes for encoding.
 
-                Returns:
-                    A numpy array representing one-hot encoded labels.
+        Returns:
+            A numpy array representing one-hot encoded labels.
         """
         one_hot = np.zeros((len(labels), num_classes))
         for idx, label in enumerate(labels):
@@ -94,43 +106,46 @@ def one_hot_encode(labels, num_classes):
         return one_hot
 
 
-class MLP():
+class MLP:
     """
-        A custom MLP class for implementing a simple multi-layer perceptron with
-        forward propagation, backpropagation.
-
-        Attributes:
-            learning_rate (float): Learning rate for gradient descent.
-            gamma (float): Parameter to control learning rate adjustment.
-            epoch (int): Number of epochs for training.
-            hidden_dim (int): Dimension of the hidden layer.
-            batch_size (int): Number of samples per mini-batch.
-            train_loss (List[float]): List to store training loss for each fold.
-            train_accuracy (List[float]): List to store training accuracy for each fold.
-            test_loss (List[float]): List to store test loss for each fold.
-            test_accuracy (List[float]): List to store test accuracy for each fold.
-            dataloader (Dataloader): DataLoader object for handling training data.
-            inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
-            weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.
-            best_accuracy (float): Best test accuracy achieved.
-            patience (int): Patience for early stopping.
-            epochs_no_improve (int): Counter for epochs without improvement.
-
-        Methods:
-            get_inout_dim:obtain input dimension and output dimension.
-            relu: Apply the ReLU activation function.
-            relu_derivative: Compute the derivative of the ReLU function.
-            forward: Perform a forward pass through the network.
-            back_prop: Perform backpropagation to compute gradients.
-            update_weights: Update the weights using gradients.
-            update_learning_rate: Adjust the learning rate based on test accuracy.
-            accuracy: Compute accuracy of the model.
-            loss: Compute weighted MSE loss.
-            train: Train the MLP over multiple folds with early stopping.
-
-
-        """
-    def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2):
+    A custom MLP class for implementing a simple multi-layer perceptron with
+    forward propagation, backpropagation.
+
+    Attributes:
+        learning_rate (float): Learning rate for gradient descent.
+        gamma (float): Parameter to control learning rate adjustment.
+        epoch (int): Number of epochs for training.
+        hidden_dim (int): Dimension of the hidden layer.
+        batch_size (int): Number of samples per mini-batch.
+        train_loss (List[float]): List to store training loss for each fold.
+        train_accuracy (List[float]): List to store training accuracy for each fold.
+        test_loss (List[float]): List to store test loss for each fold.
+        test_accuracy (List[float]): List to store test accuracy for each fold.
+        dataloader (Dataloader): DataLoader object for handling training data.
+        inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
+        weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.
+        best_accuracy (float): Best test accuracy achieved.
+        patience (int): Patience for early stopping.
+        epochs_no_improve (int): Counter for epochs without improvement.
+
+    Methods:
+        get_inout_dim:obtain input dimension and output dimension.
+        relu: Apply the ReLU activation function.
+        relu_derivative: Compute the derivative of the ReLU function.
+        forward: Perform a forward pass through the network.
+        back_prop: Perform backpropagation to compute gradients.
+        update_weights: Update the weights using gradients.
+        update_learning_rate: Adjust the learning rate based on test accuracy.
+        accuracy: Compute accuracy of the model.
+        loss: Compute weighted MSE loss.
+        train: Train the MLP over multiple folds with early stopping.
+
+
+    """
+
+    def __init__(
+        self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2
+    ):
         self.learning_rate = learning_rate  #
         self.gamma = gamma  # learning_rate decay hyperparameter gamma
         self.epoch = epoch
@@ -213,9 +228,7 @@ def relu_derivative(self, z):
         """
         return (z > 0).astype(float)
 
-
     def forward(self, x, W1, W2, no_gradient=False):
-
         """
         Performs a forward pass through the neural network with one hidden layer.
 
@@ -246,45 +259,40 @@ def forward(self, x, W1, W2, no_gradient=False):
         z2 = np.dot(a1, W2)
         a2 = z2
 
-
         if no_gradient:
             # when predict
             return a2
         else:
             # when training
-            self.inter_variable = {
-                "z1": z1, "a1": a1,
-                "z2": z2, "a2": a2
-            }
+            self.inter_variable = {"z1": z1, "a1": a1, "z2": z2, "a2": a2}
             return a2
 
     def back_prop(self, x, y, W1, W2):
         """
-         Performs backpropagation to compute gradients for the weights.
-
-         Args:
-             x: Input data, shape (batch_size, input_dim).
-             y: True labels, shape (batch_size, output_dim).
-             W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim).
-             W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim).
+        Performs backpropagation to compute gradients for the weights.
 
-         Returns:
-             Tuple of gradients (grad_W1, grad_W2) for the weight matrices.
+        Args:
+            x: Input data, shape (batch_size, input_dim).
+            y: True labels, shape (batch_size, output_dim).
+            W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim).
+            W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim).
 
-         Examples:
-             >>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
-             >>> x = np.array([[1.0, 2.0, 1.0]])  # batch_size=1, input_dim=2 + bias
-             >>> y = np.array([[0.0, 1.0]])  # batch_size=1, output_dim=2
-             >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])  # (input_dim=3, hidden_dim=2)
-             >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]])  # (hidden_dim=2, output_dim=2)
-             >>> _ = mlp.forward(x, W1, W2)  # Run forward to set inter_variable
-             >>> grad_W1, grad_W2 = mlp.back_prop(x, y, W1, W2)
-             >>> grad_W1.shape
-             (3, 2)
-             >>> grad_W2.shape
-             (2, 2)
-         """
+        Returns:
+            Tuple of gradients (grad_W1, grad_W2) for the weight matrices.
 
+        Examples:
+            >>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
+            >>> x = np.array([[1.0, 2.0, 1.0]])  # batch_size=1, input_dim=2 + bias
+            >>> y = np.array([[0.0, 1.0]])  # batch_size=1, output_dim=2
+            >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])  # (input_dim=3, hidden_dim=2)
+            >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]])  # (hidden_dim=2, output_dim=2)
+            >>> _ = mlp.forward(x, W1, W2)  # Run forward to set inter_variable
+            >>> grad_W1, grad_W2 = mlp.back_prop(x, y, W1, W2)
+            >>> grad_W1.shape
+            (3, 2)
+            >>> grad_W2.shape
+            (2, 2)
+        """
 
         a1 = self.inter_variable["a1"]  # (batch_size, hidden_dim)
         z1 = self.inter_variable["z1"]
@@ -295,13 +303,17 @@ def back_prop(self, x, y, W1, W2):
 
         # 1. output layer error
         delta_k = a2 - y
-        delta_j = np.dot(delta_k, W2.T) * self.relu_derivative(z1)  # (batch, hidden_dim) 使用relu时
-
+        delta_j = np.dot(delta_k, W2.T) * self.relu_derivative(
+            z1
+        )  # (batch, hidden_dim) 使用relu时
 
-        grad_w2 = np.dot(a1.T, delta_k) / batch_size  # (hidden, batch).dot(batch, output) = (hidden, output)
+        grad_w2 = (
+            np.dot(a1.T, delta_k) / batch_size
+        )  # (hidden, batch).dot(batch, output) = (hidden, output)
         x_flat = x.reshape(x.shape[0], -1)  # (batch_size, input_dim)
-        grad_w1 = np.dot(x_flat.T, delta_j) / batch_size  # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)
-
+        grad_w1 = (
+            np.dot(x_flat.T, delta_j) / batch_size
+        )  # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)
 
         return grad_w1, grad_w2
 
@@ -434,12 +446,13 @@ def train(self):
         """
 
         learning_rate = self.learning_rate
-        train_data, train_labels, test_data, test_labels = self.dataloader.get_Train_test_data()
+        train_data, train_labels, test_data, test_labels = (
+            self.dataloader.get_Train_test_data()
+        )
 
         train_data = np.c_[train_data, np.ones(train_data.shape[0])]
         test_data = np.c_[test_data, np.ones(test_data.shape[0])]
 
-
         _, total_label_num = self.dataloader.get_inout_dim()
 
         train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num)
@@ -454,13 +467,14 @@ def train(self):
 
         for j in tqdm(range(self.epoch)):
             for k in range(0, train_data.shape[0], batch_size):  # retrieve every image
-
-                batch_imgs = train_data[k: k + batch_size]
-                batch_labels = train_labels[k: k + batch_size]
+                batch_imgs = train_data[k : k + batch_size]
+                batch_labels = train_labels[k : k + batch_size]
 
                 output = self.forward(x=batch_imgs, W1=W1, W2=W2, no_gradient=False)
 
-                grad_W1, grad_W2 = self.back_prop(x=batch_imgs, y=batch_labels, W1=W1, W2=W2)
+                grad_W1, grad_W2 = self.back_prop(
+                    x=batch_imgs, y=batch_labels, W1=W1, W2=W2
+                )
 
                 W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate)
 
@@ -475,6 +489,4 @@ def train(self):
 
         self.test_accuracy = test_accuracy_list
         self.test_loss = test_loss_list
-        print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list))
-
-
+        print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list))
diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py
index 6891ca4893fe..b9a5d99a1849 100644
--- a/machine_learning/support_vector_machines.py
+++ b/machine_learning/support_vector_machines.py
@@ -244,4 +244,4 @@ def predict(self, observation: ndarray) -> int:
 if __name__ == "__main__":
     import doctest
 
-    doctest.testmod()
\ No newline at end of file
+    doctest.testmod()

From 2e72d9d812817cc8def5bb510dffb2944e047f33 Mon Sep 17 00:00:00 2001
From: WeiYFan <150578207+WeiYFan@users.noreply.github.com>
Date: Wed, 14 May 2025 16:04:34 +0800
Subject: [PATCH 09/10] Update multilayer_perceptron_classifier_from_scratch.py

---
 ...ayer_perceptron_classifier_from_scratch.py | 235 +++++++++---------
 1 file changed, 122 insertions(+), 113 deletions(-)

diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py
index c8af68ee7964..38e74310caf3 100644
--- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py
+++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py
@@ -1,8 +1,8 @@
 import numpy as np
 from tqdm import tqdm
-from typing import Tuple, List
-
-
+from numpy.random import default_rng
+from numpy.random import seed
+seed(42)
 class Dataloader:
     """
     DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting.
@@ -11,10 +11,15 @@ class Dataloader:
     >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
     >>> y = [0, 1, 0, 0]
     >>> loader = Dataloader(X, y)
-    >>> loader.get_Train_test_data()  # Returns train and test data
-    (array([[0., 0.],
-           [1., 1.],
-           [1., 0.]]), [array([0]), array([1]), array([0])], array([[0., 1.]]), [array([0])])
+    >>> train_X, train_y, test_X, test_y = loader.get_Train_test_data()
+    >>> train_X.shape
+    (3, 2)
+    >>> len(train_y)
+    3
+    >>> test_X.shape
+    (1, 2)
+    >>> len(test_y)
+    1
     >>> loader.one_hot_encode([0, 1, 0], 2)  # Returns one-hot encoded labels
     array([[0.99, 0.  ],
            [0.  , 0.99],
@@ -26,24 +31,21 @@ class Dataloader:
            [0.  , 0.  , 0.99]])
     """
 
-    def __init__(self, X: List[List[float]], y: List[int]) -> None:
+    def __init__(self, features: list[list[float]], labels: list[int]) -> None:
         """
-        Initializes the Dataloader instance with feature matrix X and labels y.
+        Initializes the Dataloader instance with feature matrix features and labels labels.
 
         Args:
-            X: Feature matrix of shape (n_samples, n_features).
-            y: List of labels of shape (n_samples,).
+            features: Feature matrix of shape (n_samples, n_features).
+            labels: List of labels of shape (n_samples,).
         """
         # random seed
-        np.random.seed(42)
-
-        self.X = np.array(X)
-        self.y = np.array(y)
+        self.rng = default_rng(42)  # Create a random number generator with a seed
+        self.X = np.array(features)
+        self.y = np.array(labels)
         self.class_weights = {0: 1.0, 1: 1.0}  # Example class weights, adjust as needed
 
-    def get_Train_test_data(
-        self,
-    ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
+    def get_Train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
         """
         Splits the data into training and testing sets. Here, we manually split the data.
 
@@ -54,22 +56,13 @@ def get_Train_test_data(
             - Test data
             - Test labels
         """
-        # Manually splitting data into training and testing sets
-        train_data = np.array(
-            [self.X[0], self.X[1], self.X[2]]
-        )  # First 3 samples for training
-        train_labels = [
-            np.array([self.y[0]]),
-            np.array([self.y[1]]),
-            np.array([self.y[2]]),
-        ]  # Labels as np.ndarray
+        train_data = np.array([self.X[0], self.X[1], self.X[2]])  # First 3 samples for training
+        train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])]  # Labels as np.ndarray
         test_data = np.array([self.X[3]])  # Last sample for testing
         test_labels = [np.array([self.y[3]])]  # Labels as np.ndarray
         return train_data, train_labels, test_data, test_labels
 
-    def shuffle_data(
-        self, paired_data: List[Tuple[np.ndarray, int]]
-    ) -> List[Tuple[np.ndarray, int]]:
+    def shuffle_data(self, paired_data: list[tuple[np.ndarray, int]]) -> list[tuple[np.ndarray, int]]:
         """
         Shuffles the data randomly.
 
@@ -79,17 +72,17 @@ def shuffle_data(
         Returns:
             A shuffled list of data-label pairs.
         """
-        np.random.shuffle(paired_data)
+        default_rng.shuffle(paired_data)  # Using the new random number generator
         return paired_data
 
-    def get_inout_dim(self) -> Tuple[int, int]:
+    def get_inout_dim(self) -> tuple[int, int]:
         train_data, train_labels, test_data, test_labels = self.get_Train_test_data()
         in_dim = train_data[0].shape[0]
         out_dim = len(train_labels)
         return in_dim, out_dim
 
     @staticmethod
-    def one_hot_encode(labels, num_classes):
+    def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray:
         """
         Perform one-hot encoding for the given labels.
 
@@ -106,46 +99,40 @@ def one_hot_encode(labels, num_classes):
         return one_hot
 
 
-class MLP:
+class MLP():
     """
-    A custom MLP class for implementing a simple multi-layer perceptron with
-    forward propagation, backpropagation.
-
-    Attributes:
-        learning_rate (float): Learning rate for gradient descent.
-        gamma (float): Parameter to control learning rate adjustment.
-        epoch (int): Number of epochs for training.
-        hidden_dim (int): Dimension of the hidden layer.
-        batch_size (int): Number of samples per mini-batch.
-        train_loss (List[float]): List to store training loss for each fold.
-        train_accuracy (List[float]): List to store training accuracy for each fold.
-        test_loss (List[float]): List to store test loss for each fold.
-        test_accuracy (List[float]): List to store test accuracy for each fold.
-        dataloader (Dataloader): DataLoader object for handling training data.
-        inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
-        weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.
-        best_accuracy (float): Best test accuracy achieved.
-        patience (int): Patience for early stopping.
-        epochs_no_improve (int): Counter for epochs without improvement.
-
-    Methods:
-        get_inout_dim:obtain input dimension and output dimension.
-        relu: Apply the ReLU activation function.
-        relu_derivative: Compute the derivative of the ReLU function.
-        forward: Perform a forward pass through the network.
-        back_prop: Perform backpropagation to compute gradients.
-        update_weights: Update the weights using gradients.
-        update_learning_rate: Adjust the learning rate based on test accuracy.
-        accuracy: Compute accuracy of the model.
-        loss: Compute weighted MSE loss.
-        train: Train the MLP over multiple folds with early stopping.
-
-
-    """
-
-    def __init__(
-        self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2
-    ):
+        A custom MLP class for implementing a simple multi-layer perceptron with
+        forward propagation, backpropagation.
+
+        Attributes:
+            learning_rate (float): Learning rate for gradient descent.
+            gamma (float): Parameter to control learning rate adjustment.
+            epoch (int): Number of epochs for training.
+            hidden_dim (int): Dimension of the hidden layer.
+            batch_size (int): Number of samples per mini-batch.
+            train_loss (List[float]): List to store training loss for each fold.
+            train_accuracy (List[float]): List to store training accuracy for each fold.
+            test_loss (List[float]): List to store test loss for each fold.
+            test_accuracy (List[float]): List to store test accuracy for each fold.
+            dataloader (Dataloader): DataLoader object for handling training data.
+            inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
+            weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.
+
+        Methods:
+            get_inout_dim:obtain input dimension and output dimension.
+            relu: Apply the ReLU activation function.
+            relu_derivative: Compute the derivative of the ReLU function.
+            forward: Perform a forward pass through the network.
+            back_prop: Perform backpropagation to compute gradients.
+            update_weights: Update the weights using gradients.
+            update_learning_rate: Adjust the learning rate based on test accuracy.
+            accuracy: Compute accuracy of the model.
+            loss: Compute weighted MSE loss.
+            train: Train the MLP over multiple folds with early stopping.
+
+
+        """
+    def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2):
         self.learning_rate = learning_rate  #
         self.gamma = gamma  # learning_rate decay hyperparameter gamma
         self.epoch = epoch
@@ -160,7 +147,7 @@ def __init__(
         self.inter_variable = {}
         self.weights1_list = []
 
-    def get_inout_dim(self):
+    def get_inout_dim(self) -> tuple[int, int]:
         """
         obtain input dimension and output dimension.
 
@@ -177,7 +164,7 @@ def get_inout_dim(self):
 
         return input_dim, output_dim
 
-    def initialize(self):
+    def initialize(self) -> tuple[np.ndarray, np.ndarray]:
         """
         Initialize weights using He initialization.
 
@@ -200,11 +187,11 @@ def initialize(self):
         W2 = np.random.randn(self.hidden_dim, out_dim) * 0.01  # (hidden, output)
         return W1, W2
 
-    def relu(self, z):
+    def relu(self, input_array: np.ndarray) -> np.ndarray:
         """
         Apply the ReLU activation function element-wise.
 
-        :param z: Input array.
+        :param input_array: Input array.
         :return: Output array after applying ReLU.
 
         >>> mlp = MLP(None, 1, 0.1)
@@ -212,13 +199,13 @@ def relu(self, z):
         array([[0, 2],
                [3, 0]])
         """
-        return np.maximum(0, z)
+        return np.maximum(0, input_array)
 
-    def relu_derivative(self, z):
+    def relu_derivative(self, input_array: np.ndarray) -> np.ndarray:
         """
         Compute the derivative of the ReLU function.
 
-        :param z: Input array.
+        :param input_array: Input array.
         :return: Derivative of ReLU function element-wise.
 
         >>> mlp = MLP(None, 1, 0.01)
@@ -226,14 +213,21 @@ def relu_derivative(self, z):
         array([[0., 1.],
                [1., 0.]])
         """
-        return (z > 0).astype(float)
+        return (input_array > 0).astype(float)
 
-    def forward(self, x, W1, W2, no_gradient=False):
+
+    def forward(
+            self,
+            input_data: np.ndarray,
+            W1: np.ndarray,
+            W2: np.ndarray,
+            no_gradient: bool = False
+    ) -> np.ndarray:
         """
         Performs a forward pass through the neural network with one hidden layer.
 
         Args:
-            x: Input data, shape (batch_size, input_dim).
+            input_data: Input data, shape (batch_size, input_dim).
             W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim).
             W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim).
             no_gradient: If True, returns output without storing intermediates.
@@ -250,8 +244,7 @@ def forward(self, x, W1, W2, no_gradient=False):
             >>> output.shape
             (1, 2)
         """
-
-        z1 = np.dot(x, W1)
+        z1 = np.dot(input_data, W1)
 
         a1 = self.relu(z1)  # relu
 
@@ -267,19 +260,24 @@ def forward(self, x, W1, W2, no_gradient=False):
             self.inter_variable = {"z1": z1, "a1": a1, "z2": z2, "a2": a2}
             return a2
 
-    def back_prop(self, x, y, W1, W2):
+    def back_prop(
+            self,
+            input_data: np.ndarray,
+            true_labels: np.ndarray,
+            W1: np.ndarray,
+            W2: np.ndarray
+    ) -> tuple[np.ndarray, np.ndarray]:
         """
         Performs backpropagation to compute gradients for the weights.
 
         Args:
-            x: Input data, shape (batch_size, input_dim).
-            y: True labels, shape (batch_size, output_dim).
+            input_data: Input data, shape (batch_size, input_dim).
+            true_labels: True labels, shape (batch_size, output_dim).
             W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim).
             W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim).
 
         Returns:
             Tuple of gradients (grad_W1, grad_W2) for the weight matrices.
-
         Examples:
             >>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
             >>> x = np.array([[1.0, 2.0, 1.0]])  # batch_size=1, input_dim=2 + bias
@@ -293,16 +291,15 @@ def back_prop(self, x, y, W1, W2):
             >>> grad_W2.shape
             (2, 2)
         """
-
         a1 = self.inter_variable["a1"]  # (batch_size, hidden_dim)
         z1 = self.inter_variable["z1"]
         a2 = self.inter_variable["a2"]  # (batch_size, output_dim)
         z2 = self.inter_variable["z2"]
 
-        batch_size = x.shape[0]
+        batch_size = input_data.shape[0]
 
         # 1. output layer error
-        delta_k = a2 - y
+        delta_k = a2 - true_labels
         delta_j = np.dot(delta_k, W2.T) * self.relu_derivative(
             z1
         )  # (batch, hidden_dim) 使用relu时
@@ -310,14 +307,21 @@ def back_prop(self, x, y, W1, W2):
         grad_w2 = (
             np.dot(a1.T, delta_k) / batch_size
         )  # (hidden, batch).dot(batch, output) = (hidden, output)
-        x_flat = x.reshape(x.shape[0], -1)  # (batch_size, input_dim)
+        input_data_flat = input_data.reshape(input_data.shape[0], -1)  # (batch_size, input_dim)
         grad_w1 = (
-            np.dot(x_flat.T, delta_j) / batch_size
+            np.dot(input_data_flat.T, delta_j) / batch_size
         )  # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)
 
         return grad_w1, grad_w2
 
-    def update_weights(self, W1, W2, grad_W1, grad_W2, learning_rate):
+    def update_weights(
+            self,
+            w1: np.ndarray,
+            w2: np.ndarray,
+            grad_w1: np.ndarray,
+            grad_w2: np.ndarray,
+            learning_rate: float
+    ) -> tuple[np.ndarray, np.ndarray]:
         """
         Updates the weight matrices using the computed gradients and learning rate.
 
@@ -347,11 +351,12 @@ def update_weights(self, W1, W2, grad_W1, grad_W2, learning_rate):
             array([[ True,  True],
                    [ True,  True]])
         """
-        W1 -= learning_rate * grad_W1
-        W2 -= learning_rate * grad_W2
-        return W1, W2
+        w1 -= learning_rate * grad_w1
+        w2 -= learning_rate * grad_w2
+        return w1, w2
 
-    def update_learning_rate(self, learning_rate):
+
+    def update_learning_rate(self, learning_rate: float) -> float:
         """
         Updates the learning rate by applying the decay factor gamma.
 
@@ -370,7 +375,7 @@ def update_learning_rate(self, learning_rate):
         return learning_rate * self.gamma
 
     @staticmethod
-    def accuracy(label, y_hat):
+    def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float:
         """
         Computes the accuracy of predictions by comparing predicted and true labels.
 
@@ -391,7 +396,7 @@ def accuracy(label, y_hat):
         return (y_hat.argmax(axis=1) == label.argmax(axis=1)).mean()
 
     @staticmethod
-    def loss(output, label):
+    def loss(output: np.ndarray, label: np.ndarray) -> float:
         """
         Computes the mean squared error loss between predictions and true labels.
 
@@ -411,7 +416,7 @@ def loss(output, label):
         """
         return np.sum((output - label) ** 2) / (2 * label.shape[0])
 
-    def get_acc_loss(self):
+    def get_acc_loss(self) -> tuple[list[float], list[float]]:
         """
         Returns the recorded test accuracy and test loss.
 
@@ -430,7 +435,7 @@ def get_acc_loss(self):
         """
         return self.test_accuracy, self.test_loss
 
-    def train(self):
+    def train(self) -> None:
         """
         Trains the MLP model using the provided dataloader for multiple folds and epochs.
 
@@ -440,19 +445,18 @@ def train(self):
             >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
             >>> y = [0, 1, 0, 0]
             >>> loader = Dataloader(X, y)
-            >>> mlp = MLP(loader, epoch=10, learning_rate=0.1, hidden_dim=2)
+            >>> mlp = MLP(loader, epoch=2, learning_rate=0.1, hidden_dim=2)
             >>> mlp.train()
             Test accuracy: 1.0
         """
 
         learning_rate = self.learning_rate
-        train_data, train_labels, test_data, test_labels = (
-            self.dataloader.get_Train_test_data()
-        )
+        train_data, train_labels, test_data, test_labels = self.dataloader.get_Train_test_data()
 
         train_data = np.c_[train_data, np.ones(train_data.shape[0])]
         test_data = np.c_[test_data, np.ones(test_data.shape[0])]
 
+
         _, total_label_num = self.dataloader.get_inout_dim()
 
         train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num)
@@ -467,14 +471,13 @@ def train(self):
 
         for j in tqdm(range(self.epoch)):
             for k in range(0, train_data.shape[0], batch_size):  # retrieve every image
-                batch_imgs = train_data[k : k + batch_size]
-                batch_labels = train_labels[k : k + batch_size]
 
-                output = self.forward(x=batch_imgs, W1=W1, W2=W2, no_gradient=False)
+                batch_imgs = train_data[k: k + batch_size]
+                batch_labels = train_labels[k: k + batch_size]
 
-                grad_W1, grad_W2 = self.back_prop(
-                    x=batch_imgs, y=batch_labels, W1=W1, W2=W2
-                )
+                output = self.forward(input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False)
+
+                grad_W1, grad_W2 = self.back_prop(input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2)
 
                 W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate)
 
@@ -489,4 +492,10 @@ def train(self):
 
         self.test_accuracy = test_accuracy_list
         self.test_loss = test_loss_list
-        print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list))
+        print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list))
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()

From c4f9f76b448b6338e25789126816aee7be8a1693 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 14 May 2025 08:04:59 +0000
Subject: [PATCH 10/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ...ayer_perceptron_classifier_from_scratch.py | 148 ++++++++++--------
 1 file changed, 84 insertions(+), 64 deletions(-)

diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py
index 38e74310caf3..5f66d6bd00ad 100644
--- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py
+++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py
@@ -2,7 +2,10 @@
 from tqdm import tqdm
 from numpy.random import default_rng
 from numpy.random import seed
+
 seed(42)
+
+
 class Dataloader:
     """
     DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting.
@@ -45,7 +48,9 @@ def __init__(self, features: list[list[float]], labels: list[int]) -> None:
         self.y = np.array(labels)
         self.class_weights = {0: 1.0, 1: 1.0}  # Example class weights, adjust as needed
 
-    def get_Train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
+    def get_Train_test_data(
+        self,
+    ) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
         """
         Splits the data into training and testing sets. Here, we manually split the data.
 
@@ -56,13 +61,21 @@ def get_Train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[
             - Test data
             - Test labels
         """
-        train_data = np.array([self.X[0], self.X[1], self.X[2]])  # First 3 samples for training
-        train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])]  # Labels as np.ndarray
+        train_data = np.array(
+            [self.X[0], self.X[1], self.X[2]]
+        )  # First 3 samples for training
+        train_labels = [
+            np.array([self.y[0]]),
+            np.array([self.y[1]]),
+            np.array([self.y[2]]),
+        ]  # Labels as np.ndarray
         test_data = np.array([self.X[3]])  # Last sample for testing
         test_labels = [np.array([self.y[3]])]  # Labels as np.ndarray
         return train_data, train_labels, test_data, test_labels
 
-    def shuffle_data(self, paired_data: list[tuple[np.ndarray, int]]) -> list[tuple[np.ndarray, int]]:
+    def shuffle_data(
+        self, paired_data: list[tuple[np.ndarray, int]]
+    ) -> list[tuple[np.ndarray, int]]:
         """
         Shuffles the data randomly.
 
@@ -99,40 +112,43 @@ def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray:
         return one_hot
 
 
-class MLP():
+class MLP:
+    """
+    A custom MLP class for implementing a simple multi-layer perceptron with
+    forward propagation, backpropagation.
+
+    Attributes:
+        learning_rate (float): Learning rate for gradient descent.
+        gamma (float): Parameter to control learning rate adjustment.
+        epoch (int): Number of epochs for training.
+        hidden_dim (int): Dimension of the hidden layer.
+        batch_size (int): Number of samples per mini-batch.
+        train_loss (List[float]): List to store training loss for each fold.
+        train_accuracy (List[float]): List to store training accuracy for each fold.
+        test_loss (List[float]): List to store test loss for each fold.
+        test_accuracy (List[float]): List to store test accuracy for each fold.
+        dataloader (Dataloader): DataLoader object for handling training data.
+        inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
+        weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.
+
+    Methods:
+        get_inout_dim:obtain input dimension and output dimension.
+        relu: Apply the ReLU activation function.
+        relu_derivative: Compute the derivative of the ReLU function.
+        forward: Perform a forward pass through the network.
+        back_prop: Perform backpropagation to compute gradients.
+        update_weights: Update the weights using gradients.
+        update_learning_rate: Adjust the learning rate based on test accuracy.
+        accuracy: Compute accuracy of the model.
+        loss: Compute weighted MSE loss.
+        train: Train the MLP over multiple folds with early stopping.
+
+
     """
-        A custom MLP class for implementing a simple multi-layer perceptron with
-        forward propagation, backpropagation.
-
-        Attributes:
-            learning_rate (float): Learning rate for gradient descent.
-            gamma (float): Parameter to control learning rate adjustment.
-            epoch (int): Number of epochs for training.
-            hidden_dim (int): Dimension of the hidden layer.
-            batch_size (int): Number of samples per mini-batch.
-            train_loss (List[float]): List to store training loss for each fold.
-            train_accuracy (List[float]): List to store training accuracy for each fold.
-            test_loss (List[float]): List to store test loss for each fold.
-            test_accuracy (List[float]): List to store test accuracy for each fold.
-            dataloader (Dataloader): DataLoader object for handling training data.
-            inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
-            weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.
-
-        Methods:
-            get_inout_dim:obtain input dimension and output dimension.
-            relu: Apply the ReLU activation function.
-            relu_derivative: Compute the derivative of the ReLU function.
-            forward: Perform a forward pass through the network.
-            back_prop: Perform backpropagation to compute gradients.
-            update_weights: Update the weights using gradients.
-            update_learning_rate: Adjust the learning rate based on test accuracy.
-            accuracy: Compute accuracy of the model.
-            loss: Compute weighted MSE loss.
-            train: Train the MLP over multiple folds with early stopping.
-
-
-        """
-    def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2):
+
+    def __init__(
+        self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2
+    ):
         self.learning_rate = learning_rate  #
         self.gamma = gamma  # learning_rate decay hyperparameter gamma
         self.epoch = epoch
@@ -215,13 +231,12 @@ def relu_derivative(self, input_array: np.ndarray) -> np.ndarray:
         """
         return (input_array > 0).astype(float)
 
-
     def forward(
-            self,
-            input_data: np.ndarray,
-            W1: np.ndarray,
-            W2: np.ndarray,
-            no_gradient: bool = False
+        self,
+        input_data: np.ndarray,
+        W1: np.ndarray,
+        W2: np.ndarray,
+        no_gradient: bool = False,
     ) -> np.ndarray:
         """
         Performs a forward pass through the neural network with one hidden layer.
@@ -261,11 +276,11 @@ def forward(
             return a2
 
     def back_prop(
-            self,
-            input_data: np.ndarray,
-            true_labels: np.ndarray,
-            W1: np.ndarray,
-            W2: np.ndarray
+        self,
+        input_data: np.ndarray,
+        true_labels: np.ndarray,
+        W1: np.ndarray,
+        W2: np.ndarray,
     ) -> tuple[np.ndarray, np.ndarray]:
         """
         Performs backpropagation to compute gradients for the weights.
@@ -307,7 +322,9 @@ def back_prop(
         grad_w2 = (
             np.dot(a1.T, delta_k) / batch_size
         )  # (hidden, batch).dot(batch, output) = (hidden, output)
-        input_data_flat = input_data.reshape(input_data.shape[0], -1)  # (batch_size, input_dim)
+        input_data_flat = input_data.reshape(
+            input_data.shape[0], -1
+        )  # (batch_size, input_dim)
         grad_w1 = (
             np.dot(input_data_flat.T, delta_j) / batch_size
         )  # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)
@@ -315,12 +332,12 @@ def back_prop(
         return grad_w1, grad_w2
 
     def update_weights(
-            self,
-            w1: np.ndarray,
-            w2: np.ndarray,
-            grad_w1: np.ndarray,
-            grad_w2: np.ndarray,
-            learning_rate: float
+        self,
+        w1: np.ndarray,
+        w2: np.ndarray,
+        grad_w1: np.ndarray,
+        grad_w2: np.ndarray,
+        learning_rate: float,
     ) -> tuple[np.ndarray, np.ndarray]:
         """
         Updates the weight matrices using the computed gradients and learning rate.
@@ -355,7 +372,6 @@ def update_weights(
         w2 -= learning_rate * grad_w2
         return w1, w2
 
-
     def update_learning_rate(self, learning_rate: float) -> float:
         """
         Updates the learning rate by applying the decay factor gamma.
@@ -451,12 +467,13 @@ def train(self) -> None:
         """
 
         learning_rate = self.learning_rate
-        train_data, train_labels, test_data, test_labels = self.dataloader.get_Train_test_data()
+        train_data, train_labels, test_data, test_labels = (
+            self.dataloader.get_Train_test_data()
+        )
 
         train_data = np.c_[train_data, np.ones(train_data.shape[0])]
         test_data = np.c_[test_data, np.ones(test_data.shape[0])]
 
-
         _, total_label_num = self.dataloader.get_inout_dim()
 
         train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num)
@@ -471,13 +488,16 @@ def train(self) -> None:
 
         for j in tqdm(range(self.epoch)):
             for k in range(0, train_data.shape[0], batch_size):  # retrieve every image
+                batch_imgs = train_data[k : k + batch_size]
+                batch_labels = train_labels[k : k + batch_size]
 
-                batch_imgs = train_data[k: k + batch_size]
-                batch_labels = train_labels[k: k + batch_size]
-
-                output = self.forward(input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False)
+                output = self.forward(
+                    input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False
+                )
 
-                grad_W1, grad_W2 = self.back_prop(input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2)
+                grad_W1, grad_W2 = self.back_prop(
+                    input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2
+                )
 
                 W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate)
 
@@ -492,7 +512,7 @@ def train(self) -> None:
 
         self.test_accuracy = test_accuracy_list
         self.test_loss = test_loss_list
-        print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list))
+        print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list))
 
 
 if __name__ == "__main__":