From 755b27fc76a8eec0cb198b0947526d8887bf60d9 Mon Sep 17 00:00:00 2001 From: WeiYFan <1521716717@qq.com> Date: Tue, 13 May 2025 17:57:48 +0800 Subject: [PATCH 01/10] Add the polynomial kernel to the SVM code --- machine_learning/support_vector_machines.py | 42 +++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py index d17c9044a3e9..551a970865af 100644 --- a/machine_learning/support_vector_machines.py +++ b/machine_learning/support_vector_machines.py @@ -49,6 +49,19 @@ class SVC: Traceback (most recent call last): ... ValueError: gamma must be > 0 + + >>> SVC(kernel="polynomial") + Traceback (most recent call last): + ... + ValueError: polynomial kernel requires degree + >>> SVC(kernel="polynomial",degree=None) + Traceback (most recent call last): + ... + ValueError: degree must be int + >>> SVC(kernel="polynomial",degree=-1) + Traceback (most recent call last): + ... + ValueError: degree must be > 0 """ def __init__( @@ -57,9 +70,13 @@ def __init__( regularization: float = np.inf, kernel: str = "linear", gamma: float = 0.0, + degree: int =0.0, + coef0: float = 0.0, ) -> None: self.regularization = regularization self.gamma = gamma + self.degree = degree + self.coef0 = coef0 if kernel == "linear": self.kernel = self.__linear elif kernel == "rbf": @@ -73,6 +90,14 @@ def __init__( # in the future, there could be a default value like in sklearn # sklear: def_gamma = 1/(n_features * X.var()) (wiki) # previously it was 1/(n_features) + elif kernel == "polynomial": + if self.degree == 0: + raise ValueError("polynomial kernel requires degree") + if not isinstance(self.degree, int) : + raise ValueError("degree must be int") + if not self.degree > 0: + raise ValueError("degree must be > 0") + self.kernel = self.__polynomial else: msg = f"Unknown kernel: {kernel}" raise ValueError(msg) @@ -98,6 +123,23 @@ def __rbf(self, vector1: ndarray, vector2: ndarray) -> float: """ return np.exp(-(self.gamma * norm_squared(vector1 - vector2))) + + def __polynomial(self, vector1: ndarray, vector2: ndarray) -> float: + """ + Polynomial kernel: (x . y + coef0)^degree + + Note: for more information see: + https://en.wikipedia.org/wiki/Polynomial_kernel + + Args: + vector1 (ndarray): first vector + vector2 (ndarray): second vector + + Returns: + float: (vector1 . vector2 + coef0)^degree + """ + return (np.dot(vector1, vector2) + self.coef0) ** self.degree + def fit(self, observations: list[ndarray], classes: ndarray) -> None: """ Fits the SVC with a set of observations. From f56f0285a7e96496e47793e3f7c7440d03e61f69 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 10:07:51 +0000 Subject: [PATCH 02/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/support_vector_machines.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py index 551a970865af..7ce99a241813 100644 --- a/machine_learning/support_vector_machines.py +++ b/machine_learning/support_vector_machines.py @@ -70,7 +70,7 @@ def __init__( regularization: float = np.inf, kernel: str = "linear", gamma: float = 0.0, - degree: int =0.0, + degree: int = 0.0, coef0: float = 0.0, ) -> None: self.regularization = regularization @@ -93,7 +93,7 @@ def __init__( elif kernel == "polynomial": if self.degree == 0: raise ValueError("polynomial kernel requires degree") - if not isinstance(self.degree, int) : + if not isinstance(self.degree, int): raise ValueError("degree must be int") if not self.degree > 0: raise ValueError("degree must be > 0") @@ -123,7 +123,6 @@ def __rbf(self, vector1: ndarray, vector2: ndarray) -> float: """ return np.exp(-(self.gamma * norm_squared(vector1 - vector2))) - def __polynomial(self, vector1: ndarray, vector2: ndarray) -> float: """ Polynomial kernel: (x . y + coef0)^degree From cc278280befa80cf29f1ff59f779171cb7e75699 Mon Sep 17 00:00:00 2001 From: WeiYFan <1521716717@qq.com> Date: Tue, 13 May 2025 18:18:40 +0800 Subject: [PATCH 03/10] Change the type of the degree --- machine_learning/support_vector_machines.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py index 551a970865af..2248e96e87b5 100644 --- a/machine_learning/support_vector_machines.py +++ b/machine_learning/support_vector_machines.py @@ -57,7 +57,7 @@ class SVC: >>> SVC(kernel="polynomial",degree=None) Traceback (most recent call last): ... - ValueError: degree must be int + ValueError: degree must be float or int >>> SVC(kernel="polynomial",degree=-1) Traceback (most recent call last): ... @@ -70,7 +70,7 @@ def __init__( regularization: float = np.inf, kernel: str = "linear", gamma: float = 0.0, - degree: int =0.0, + degree: float =0.0, coef0: float = 0.0, ) -> None: self.regularization = regularization @@ -93,8 +93,8 @@ def __init__( elif kernel == "polynomial": if self.degree == 0: raise ValueError("polynomial kernel requires degree") - if not isinstance(self.degree, int) : - raise ValueError("degree must be int") + if not isinstance(self.degree, (float, int)) : + raise ValueError("degree must be float or int") if not self.degree > 0: raise ValueError("degree must be > 0") self.kernel = self.__polynomial From 2ee9d30a1419da7188e37376678ea7957c3b11a4 Mon Sep 17 00:00:00 2001 From: WeiYFan <150578207+WeiYFan@users.noreply.github.com> Date: Tue, 13 May 2025 18:38:40 +0800 Subject: [PATCH 04/10] Update support_vector_machines.py --- machine_learning/support_vector_machines.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py index 7ce99a241813..5667ccf7e3da 100644 --- a/machine_learning/support_vector_machines.py +++ b/machine_learning/support_vector_machines.py @@ -57,8 +57,8 @@ class SVC: >>> SVC(kernel="polynomial",degree=None) Traceback (most recent call last): ... - ValueError: degree must be int - >>> SVC(kernel="polynomial",degree=-1) + ValueError: degree must be float or int + >>> SVC(kernel="polynomial",degree=-1.0) Traceback (most recent call last): ... ValueError: degree must be > 0 @@ -70,7 +70,7 @@ def __init__( regularization: float = np.inf, kernel: str = "linear", gamma: float = 0.0, - degree: int = 0.0, + degree: float=0.0, coef0: float = 0.0, ) -> None: self.regularization = regularization @@ -93,8 +93,8 @@ def __init__( elif kernel == "polynomial": if self.degree == 0: raise ValueError("polynomial kernel requires degree") - if not isinstance(self.degree, int): - raise ValueError("degree must be int") + if not isinstance(self.degree, (float, int)) : + raise ValueError("degree must be float or int") if not self.degree > 0: raise ValueError("degree must be > 0") self.kernel = self.__polynomial @@ -123,6 +123,7 @@ def __rbf(self, vector1: ndarray, vector2: ndarray) -> float: """ return np.exp(-(self.gamma * norm_squared(vector1 - vector2))) + def __polynomial(self, vector1: ndarray, vector2: ndarray) -> float: """ Polynomial kernel: (x . y + coef0)^degree From 35c9fad30674f48fb1d48632f3f7d418b0ee7715 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 10:47:05 +0000 Subject: [PATCH 05/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/support_vector_machines.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py index 5667ccf7e3da..b9a5d99a1849 100644 --- a/machine_learning/support_vector_machines.py +++ b/machine_learning/support_vector_machines.py @@ -70,7 +70,7 @@ def __init__( regularization: float = np.inf, kernel: str = "linear", gamma: float = 0.0, - degree: float=0.0, + degree: float = 0.0, coef0: float = 0.0, ) -> None: self.regularization = regularization @@ -93,7 +93,7 @@ def __init__( elif kernel == "polynomial": if self.degree == 0: raise ValueError("polynomial kernel requires degree") - if not isinstance(self.degree, (float, int)) : + if not isinstance(self.degree, (float, int)): raise ValueError("degree must be float or int") if not self.degree > 0: raise ValueError("degree must be > 0") @@ -123,7 +123,6 @@ def __rbf(self, vector1: ndarray, vector2: ndarray) -> float: """ return np.exp(-(self.gamma * norm_squared(vector1 - vector2))) - def __polynomial(self, vector1: ndarray, vector2: ndarray) -> float: """ Polynomial kernel: (x . y + coef0)^degree From 64da7b923a30493999e2d91e636927dba8cb23cd Mon Sep 17 00:00:00 2001 From: WeiYFan <1521716717@qq.com> Date: Tue, 13 May 2025 23:03:24 +0800 Subject: [PATCH 06/10] add a code file of the multi-layer perceptron classifier from scrach --- ...ayer_perceptron_classifier_from_scratch.py | 480 ++++++++++++++++++ machine_learning/support_vector_machines.py | 6 +- 2 files changed, 483 insertions(+), 3 deletions(-) create mode 100644 machine_learning/multilayer_perceptron_classifier_from_scratch.py diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py new file mode 100644 index 000000000000..0ab5db9321b9 --- /dev/null +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -0,0 +1,480 @@ +import numpy as np +from tqdm import tqdm +from typing import Tuple, List +class Dataloader: + """ + DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting. + + Example usage: + >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] + >>> y = [0, 1, 0, 0] + >>> loader = Dataloader(X, y) + >>> loader.get_Train_test_data() # Returns train and test data + (array([[0., 0.], + [1., 1.], + [1., 0.]]), [array([0]), array([1]), array([0])], array([[0., 1.]]), [array([0])]) + >>> loader.one_hot_encode([0, 1, 0], 2) # Returns one-hot encoded labels + array([[0.99, 0. ], + [0. , 0.99], + [0.99, 0. ]]) + >>> loader.get_inout_dim() + (2, 3) + >>> loader.one_hot_encode([0, 2], 3) + array([[0.99, 0. , 0. ], + [0. , 0. , 0.99]]) + """ + + def __init__(self, X: List[List[float]], y: List[int]) -> None: + """ + Initializes the Dataloader instance with feature matrix X and labels y. + + Args: + X: Feature matrix of shape (n_samples, n_features). + y: List of labels of shape (n_samples,). + """ + # random seed + np.random.seed(42) + + self.X = np.array(X) + self.y = np.array(y) + self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed + + def get_Train_test_data(self) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]: + """ + Splits the data into training and testing sets. Here, we manually split the data. + + Returns: + A tuple containing: + - Train data + - Train labels + - Test data + - Test labels + """ + # Manually splitting data into training and testing sets + train_data = np.array([self.X[0], self.X[1], self.X[2]]) # First 3 samples for training + train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] # Labels as np.ndarray + test_data = np.array([self.X[3]]) # Last sample for testing + test_labels = [np.array([self.y[3]])] # Labels as np.ndarray + return train_data, train_labels, test_data, test_labels + + def shuffle_data(self, paired_data: List[Tuple[np.ndarray, int]]) -> List[Tuple[np.ndarray, int]]: + """ + Shuffles the data randomly. + + Args: + paired_data: List of tuples containing data and corresponding labels. + + Returns: + A shuffled list of data-label pairs. + """ + np.random.shuffle(paired_data) + return paired_data + + def get_inout_dim(self) -> Tuple[int, int]: + train_data, train_labels, test_data, test_labels = self.get_Train_test_data() + in_dim = train_data[0].shape[0] + out_dim = len(train_labels) + return in_dim, out_dim + + @staticmethod + def one_hot_encode(labels, num_classes): + """ + Perform one-hot encoding for the given labels. + + Args: + labels: List of integer labels. + num_classes: Total number of classes for encoding. + + Returns: + A numpy array representing one-hot encoded labels. + """ + one_hot = np.zeros((len(labels), num_classes)) + for idx, label in enumerate(labels): + one_hot[idx, label] = 0.99 + return one_hot + + +class MLP(): + """ + A custom MLP class for implementing a simple multi-layer perceptron with + forward propagation, backpropagation. + + Attributes: + learning_rate (float): Learning rate for gradient descent. + gamma (float): Parameter to control learning rate adjustment. + epoch (int): Number of epochs for training. + hidden_dim (int): Dimension of the hidden layer. + batch_size (int): Number of samples per mini-batch. + train_loss (List[float]): List to store training loss for each fold. + train_accuracy (List[float]): List to store training accuracy for each fold. + test_loss (List[float]): List to store test loss for each fold. + test_accuracy (List[float]): List to store test accuracy for each fold. + dataloader (Dataloader): DataLoader object for handling training data. + inter_variable (dict): Dictionary to store intermediate variables for backpropagation. + weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold. + best_accuracy (float): Best test accuracy achieved. + patience (int): Patience for early stopping. + epochs_no_improve (int): Counter for epochs without improvement. + + Methods: + get_inout_dim:obtain input dimension and output dimension. + relu: Apply the ReLU activation function. + relu_derivative: Compute the derivative of the ReLU function. + forward: Perform a forward pass through the network. + back_prop: Perform backpropagation to compute gradients. + update_weights: Update the weights using gradients. + update_learning_rate: Adjust the learning rate based on test accuracy. + accuracy: Compute accuracy of the model. + loss: Compute weighted MSE loss. + train: Train the MLP over multiple folds with early stopping. + + + """ + def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2): + self.learning_rate = learning_rate # + self.gamma = gamma # learning_rate decay hyperparameter gamma + self.epoch = epoch + self.hidden_dim = hidden_dim + + self.train_loss = [] + self.train_accuracy = [] + self.test_loss = [] + self.test_accuracy = [] + + self.dataloader = dataloader + self.inter_variable = {} + self.weights1_list = [] + + def get_inout_dim(self): + """ + obtain input dimension and output dimension. + + :return: Tuple of weights (input_dim, output_dim) for the network. + + >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] + >>> y = [0, 1, 0, 0] + >>> loader = Dataloader(X, y) + >>> mlp = MLP(loader, 10, 0.1) + >>> mlp.get_inout_dim() + (2, 3) + """ + input_dim, output_dim = self.dataloader.get_inout_dim() + + return input_dim, output_dim + + def initialize(self): + """ + Initialize weights using He initialization. + + :return: Tuple of weights (W1, W2) for the network. + + >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] + >>> y = [0, 1, 0, 0] + >>> loader = Dataloader(X, y) + >>> mlp = MLP(loader, 10, 0.1) + >>> W1, W2 = mlp.initialize() + >>> W1.shape + (3, 2) + >>> W2.shape + (2, 3) + """ + + in_dim, out_dim = self.dataloader.get_inout_dim() # in_dim here is image dim + W1 = np.random.randn(in_dim + 1, self.hidden_dim) * 0.01 # (in_dim, hidden) + + W2 = np.random.randn(self.hidden_dim, out_dim) * 0.01 # (hidden, output) + return W1, W2 + + def relu(self, z): + """ + Apply the ReLU activation function element-wise. + + :param z: Input array. + :return: Output array after applying ReLU. + + >>> mlp = MLP(None, 1, 0.1) + >>> mlp.relu(np.array([[-1, 2], [3, -4]])) + array([[0, 2], + [3, 0]]) + """ + return np.maximum(0, z) + + def relu_derivative(self, z): + """ + Compute the derivative of the ReLU function. + + :param z: Input array. + :return: Derivative of ReLU function element-wise. + + >>> mlp = MLP(None, 1, 0.01) + >>> mlp.relu_derivative(np.array([[-1, 2], [3, -4]])) + array([[0., 1.], + [1., 0.]]) + """ + return (z > 0).astype(float) + + + def forward(self, x, W1, W2, no_gradient=False): + + """ + Performs a forward pass through the neural network with one hidden layer. + + Args: + x: Input data, shape (batch_size, input_dim). + W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). + W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). + no_gradient: If True, returns output without storing intermediates. + + Returns: + Output of the network after forward pass, shape (batch_size, output_dim). + + Examples: + >>> mlp = MLP(None, 1, 0.1, hidden_dim=2) + >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias + >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2) + >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2) + >>> output = mlp.forward(x, W1, W2) + >>> output.shape + (1, 2) + """ + + z1 = np.dot(x, W1) + + a1 = self.relu(z1) # relu + + # hidden → output + z2 = np.dot(a1, W2) + a2 = z2 + + + if no_gradient: + # when predict + return a2 + else: + # when training + self.inter_variable = { + "z1": z1, "a1": a1, + "z2": z2, "a2": a2 + } + return a2 + + def back_prop(self, x, y, W1, W2): + """ + Performs backpropagation to compute gradients for the weights. + + Args: + x: Input data, shape (batch_size, input_dim). + y: True labels, shape (batch_size, output_dim). + W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). + W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). + + Returns: + Tuple of gradients (grad_W1, grad_W2) for the weight matrices. + + Examples: + >>> mlp = MLP(None, 1, 0.1, hidden_dim=2) + >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias + >>> y = np.array([[0.0, 1.0]]) # batch_size=1, output_dim=2 + >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2) + >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2) + >>> _ = mlp.forward(x, W1, W2) # Run forward to set inter_variable + >>> grad_W1, grad_W2 = mlp.back_prop(x, y, W1, W2) + >>> grad_W1.shape + (3, 2) + >>> grad_W2.shape + (2, 2) + """ + + + a1 = self.inter_variable["a1"] # (batch_size, hidden_dim) + z1 = self.inter_variable["z1"] + a2 = self.inter_variable["a2"] # (batch_size, output_dim) + z2 = self.inter_variable["z2"] + + batch_size = x.shape[0] + + # 1. output layer error + delta_k = a2 - y + delta_j = np.dot(delta_k, W2.T) * self.relu_derivative(z1) # (batch, hidden_dim) 使用relu时 + + + grad_w2 = np.dot(a1.T, delta_k) / batch_size # (hidden, batch).dot(batch, output) = (hidden, output) + x_flat = x.reshape(x.shape[0], -1) # (batch_size, input_dim) + grad_w1 = np.dot(x_flat.T, delta_j) / batch_size # (input_dim, batch_size).dot(batch, hidden) = (input, hidden) + + + return grad_w1, grad_w2 + + def update_weights(self, W1, W2, grad_W1, grad_W2, learning_rate): + """ + Updates the weight matrices using the computed gradients and learning rate. + + Args: + W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). + W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). + grad_W1: Gradient for W1, shape (input_dim + 1, hidden_dim). + grad_W2: Gradient for W2, shape (hidden_dim, output_dim). + learning_rate: Learning rate for weight updates. + + Returns: + Updated weight matrices (W1, W2). + + Examples: + >>> mlp = MLP(None, 1, 0.1) + >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2) + >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2) + >>> grad_W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) + >>> grad_W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) + >>> learning_rate = 0.1 + >>> new_W1, new_W2 = mlp.update_weights(W1, W2, grad_W1, grad_W2, learning_rate) + >>> new_W1==np.array([[0.09, 0.18], [0.27, 0.36], [0.45, 0.54]]) + array([[ True, True], + [ True, True], + [ True, True]]) + >>> new_W2==np.array([[0.63, 0.72], [0.81, 0.90]]) + array([[ True, True], + [ True, True]]) + """ + W1 -= learning_rate * grad_W1 + W2 -= learning_rate * grad_W2 + return W1, W2 + + def update_learning_rate(self, learning_rate): + """ + Updates the learning rate by applying the decay factor gamma. + + Args: + learning_rate: Current learning rate. + + Returns: + Updated learning rate. + + Examples: + >>> mlp = MLP(None, 1, 0.1, gamma=0.9) + >>> round(mlp.update_learning_rate(0.1), 2) + 0.09 + """ + + return learning_rate * self.gamma + + @staticmethod + def accuracy(label, y_hat): + """ + Computes the accuracy of predictions by comparing predicted and true labels. + + Args: + label: True labels, shape (batch_size, num_classes). + y_hat: Predicted outputs, shape (batch_size, num_classes). + + Returns: + Accuracy as a float between 0 and 1. + + Examples: + >>> mlp = MLP(None, 1, 0.01) + >>> label = np.array([[1, 0], [0, 1], [1, 0]]) + >>> y_hat = np.array([[0.9, 0.1], [0.2, 0.8], [0.6, 0.4]]) + >>> mlp.accuracy(label, y_hat) + 1.0 + """ + return (y_hat.argmax(axis=1) == label.argmax(axis=1)).mean() + + @staticmethod + def loss(output, label): + """ + Computes the mean squared error loss between predictions and true labels. + + Args: + output: Predicted outputs, shape (batch_size, num_classes). + label: True labels, shape (batch_size, num_classes). + + Returns: + Mean squared error loss as a float. + + Examples: + >>> mlp = MLP(None, 1, 0.1) + >>> output = np.array([[0.9, 0.1], [0.2, 0.8]]) + >>> label = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> round(mlp.loss(output, label), 3) + 0.025 + """ + return np.sum((output - label) ** 2) / (2 * label.shape[0]) + + def get_acc_loss(self): + """ + Returns the recorded test accuracy and test loss. + + Returns: + Tuple of (test_accuracy, test_loss) lists. + + Examples: + >>> mlp = MLP(None, 1, 0.1) + >>> mlp.test_accuracy = [0.8, 0.9] + >>> mlp.test_loss = [0.1, 0.05] + >>> acc, loss = mlp.get_acc_loss() + >>> acc + [0.8, 0.9] + >>> loss + [0.1, 0.05] + """ + return self.test_accuracy, self.test_loss + + def train(self): + """ + Trains the MLP model using the provided dataloader for multiple folds and epochs. + + Saves the best model parameters for each fold and records accuracy/loss. + + Examples: + >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] + >>> y = [0, 1, 0, 0] + >>> loader = Dataloader(X, y) + >>> mlp = MLP(loader, epoch=10, learning_rate=0.1, hidden_dim=2) + >>> mlp.train() + Test accuracy: 1.0 + """ + + learning_rate = self.learning_rate + train_data, train_labels, test_data, test_labels = self.dataloader.get_Train_test_data() + + train_data = np.c_[train_data, np.ones(train_data.shape[0])] + test_data = np.c_[test_data, np.ones(test_data.shape[0])] + + + _, total_label_num = self.dataloader.get_inout_dim() + + train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num) + test_labels = self.dataloader.one_hot_encode(test_labels, total_label_num) + + W1, W2 = self.initialize() + + train_accuracy_list, train_loss_list = [], [] + test_accuracy_list, test_loss_list = [], [] + + batch_size = 1 + + for j in tqdm(range(self.epoch)): + for k in range(0, train_data.shape[0], batch_size): # retrieve every image + + batch_imgs = train_data[k: k + batch_size] + batch_labels = train_labels[k: k + batch_size] + + output = self.forward(x=batch_imgs, W1=W1, W2=W2, no_gradient=False) + + grad_W1, grad_W2 = self.back_prop(x=batch_imgs, y=batch_labels, W1=W1, W2=W2) + + W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate) + + test_output = self.forward(test_data, W1, W2, no_gradient=True) + test_accuracy = self.accuracy(test_labels, test_output) + test_loss = self.loss(test_output, test_labels) + + test_accuracy_list.append(test_accuracy) + test_loss_list.append(test_loss) + + learning_rate = self.update_learning_rate(learning_rate) + + self.test_accuracy = test_accuracy_list + self.test_loss = test_loss_list + print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list)) + + diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py index 334be81deb33..8af529ca8694 100644 --- a/machine_learning/support_vector_machines.py +++ b/machine_learning/support_vector_machines.py @@ -97,13 +97,13 @@ def __init__( elif kernel == "polynomial": if self.degree == 0: raise ValueError("polynomial kernel requires degree") -<<<<<<< HEAD + if not isinstance(self.degree, (float, int)) : raise ValueError("degree must be float or int") -======= + if not isinstance(self.degree, int): raise ValueError("degree must be int") ->>>>>>> f56f0285a7e96496e47793e3f7c7440d03e61f69 + if not self.degree > 0: raise ValueError("degree must be > 0") self.kernel = self.__polynomial From f55c264daf3a471985b9ebbee32967a2038e5d02 Mon Sep 17 00:00:00 2001 From: WeiYFan <1521716717@qq.com> Date: Tue, 13 May 2025 23:04:43 +0800 Subject: [PATCH 07/10] add a code file of the multi-layer perceptron classifier from scrach --- machine_learning/support_vector_machines.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py index 8af529ca8694..6891ca4893fe 100644 --- a/machine_learning/support_vector_machines.py +++ b/machine_learning/support_vector_machines.py @@ -58,7 +58,7 @@ class SVC: Traceback (most recent call last): ... ValueError: degree must be float or int - >>> SVC(kernel="polynomial",degree=-1) + >>> SVC(kernel="polynomial",degree=-1.0) Traceback (most recent call last): ... ValueError: degree must be > 0 @@ -70,11 +70,7 @@ def __init__( regularization: float = np.inf, kernel: str = "linear", gamma: float = 0.0, -<<<<<<< HEAD - degree: float =0.0, -======= - degree: int = 0.0, ->>>>>>> f56f0285a7e96496e47793e3f7c7440d03e61f69 + degree: float = 0.0, coef0: float = 0.0, ) -> None: self.regularization = regularization @@ -97,13 +93,8 @@ def __init__( elif kernel == "polynomial": if self.degree == 0: raise ValueError("polynomial kernel requires degree") - - if not isinstance(self.degree, (float, int)) : + if not isinstance(self.degree, (float, int)): raise ValueError("degree must be float or int") - - if not isinstance(self.degree, int): - raise ValueError("degree must be int") - if not self.degree > 0: raise ValueError("degree must be > 0") self.kernel = self.__polynomial @@ -253,4 +244,4 @@ def predict(self, observation: ndarray) -> int: if __name__ == "__main__": import doctest - doctest.testmod() + doctest.testmod() \ No newline at end of file From 95ba0fb9d0be0a6eee20539ecc349b286285070d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 15:17:28 +0000 Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ...ayer_perceptron_classifier_from_scratch.py | 190 ++++++++++-------- machine_learning/support_vector_machines.py | 2 +- 2 files changed, 102 insertions(+), 90 deletions(-) diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py index 0ab5db9321b9..c8af68ee7964 100644 --- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -1,6 +1,8 @@ import numpy as np from tqdm import tqdm from typing import Tuple, List + + class Dataloader: """ DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting. @@ -39,7 +41,9 @@ def __init__(self, X: List[List[float]], y: List[int]) -> None: self.y = np.array(y) self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed - def get_Train_test_data(self) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]: + def get_Train_test_data( + self, + ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]: """ Splits the data into training and testing sets. Here, we manually split the data. @@ -51,13 +55,21 @@ def get_Train_test_data(self) -> Tuple[List[np.ndarray], List[np.ndarray], List[ - Test labels """ # Manually splitting data into training and testing sets - train_data = np.array([self.X[0], self.X[1], self.X[2]]) # First 3 samples for training - train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] # Labels as np.ndarray + train_data = np.array( + [self.X[0], self.X[1], self.X[2]] + ) # First 3 samples for training + train_labels = [ + np.array([self.y[0]]), + np.array([self.y[1]]), + np.array([self.y[2]]), + ] # Labels as np.ndarray test_data = np.array([self.X[3]]) # Last sample for testing test_labels = [np.array([self.y[3]])] # Labels as np.ndarray return train_data, train_labels, test_data, test_labels - def shuffle_data(self, paired_data: List[Tuple[np.ndarray, int]]) -> List[Tuple[np.ndarray, int]]: + def shuffle_data( + self, paired_data: List[Tuple[np.ndarray, int]] + ) -> List[Tuple[np.ndarray, int]]: """ Shuffles the data randomly. @@ -79,14 +91,14 @@ def get_inout_dim(self) -> Tuple[int, int]: @staticmethod def one_hot_encode(labels, num_classes): """ - Perform one-hot encoding for the given labels. + Perform one-hot encoding for the given labels. - Args: - labels: List of integer labels. - num_classes: Total number of classes for encoding. + Args: + labels: List of integer labels. + num_classes: Total number of classes for encoding. - Returns: - A numpy array representing one-hot encoded labels. + Returns: + A numpy array representing one-hot encoded labels. """ one_hot = np.zeros((len(labels), num_classes)) for idx, label in enumerate(labels): @@ -94,43 +106,46 @@ def one_hot_encode(labels, num_classes): return one_hot -class MLP(): +class MLP: """ - A custom MLP class for implementing a simple multi-layer perceptron with - forward propagation, backpropagation. - - Attributes: - learning_rate (float): Learning rate for gradient descent. - gamma (float): Parameter to control learning rate adjustment. - epoch (int): Number of epochs for training. - hidden_dim (int): Dimension of the hidden layer. - batch_size (int): Number of samples per mini-batch. - train_loss (List[float]): List to store training loss for each fold. - train_accuracy (List[float]): List to store training accuracy for each fold. - test_loss (List[float]): List to store test loss for each fold. - test_accuracy (List[float]): List to store test accuracy for each fold. - dataloader (Dataloader): DataLoader object for handling training data. - inter_variable (dict): Dictionary to store intermediate variables for backpropagation. - weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold. - best_accuracy (float): Best test accuracy achieved. - patience (int): Patience for early stopping. - epochs_no_improve (int): Counter for epochs without improvement. - - Methods: - get_inout_dim:obtain input dimension and output dimension. - relu: Apply the ReLU activation function. - relu_derivative: Compute the derivative of the ReLU function. - forward: Perform a forward pass through the network. - back_prop: Perform backpropagation to compute gradients. - update_weights: Update the weights using gradients. - update_learning_rate: Adjust the learning rate based on test accuracy. - accuracy: Compute accuracy of the model. - loss: Compute weighted MSE loss. - train: Train the MLP over multiple folds with early stopping. - - - """ - def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2): + A custom MLP class for implementing a simple multi-layer perceptron with + forward propagation, backpropagation. + + Attributes: + learning_rate (float): Learning rate for gradient descent. + gamma (float): Parameter to control learning rate adjustment. + epoch (int): Number of epochs for training. + hidden_dim (int): Dimension of the hidden layer. + batch_size (int): Number of samples per mini-batch. + train_loss (List[float]): List to store training loss for each fold. + train_accuracy (List[float]): List to store training accuracy for each fold. + test_loss (List[float]): List to store test loss for each fold. + test_accuracy (List[float]): List to store test accuracy for each fold. + dataloader (Dataloader): DataLoader object for handling training data. + inter_variable (dict): Dictionary to store intermediate variables for backpropagation. + weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold. + best_accuracy (float): Best test accuracy achieved. + patience (int): Patience for early stopping. + epochs_no_improve (int): Counter for epochs without improvement. + + Methods: + get_inout_dim:obtain input dimension and output dimension. + relu: Apply the ReLU activation function. + relu_derivative: Compute the derivative of the ReLU function. + forward: Perform a forward pass through the network. + back_prop: Perform backpropagation to compute gradients. + update_weights: Update the weights using gradients. + update_learning_rate: Adjust the learning rate based on test accuracy. + accuracy: Compute accuracy of the model. + loss: Compute weighted MSE loss. + train: Train the MLP over multiple folds with early stopping. + + + """ + + def __init__( + self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2 + ): self.learning_rate = learning_rate # self.gamma = gamma # learning_rate decay hyperparameter gamma self.epoch = epoch @@ -213,9 +228,7 @@ def relu_derivative(self, z): """ return (z > 0).astype(float) - def forward(self, x, W1, W2, no_gradient=False): - """ Performs a forward pass through the neural network with one hidden layer. @@ -246,45 +259,40 @@ def forward(self, x, W1, W2, no_gradient=False): z2 = np.dot(a1, W2) a2 = z2 - if no_gradient: # when predict return a2 else: # when training - self.inter_variable = { - "z1": z1, "a1": a1, - "z2": z2, "a2": a2 - } + self.inter_variable = {"z1": z1, "a1": a1, "z2": z2, "a2": a2} return a2 def back_prop(self, x, y, W1, W2): """ - Performs backpropagation to compute gradients for the weights. - - Args: - x: Input data, shape (batch_size, input_dim). - y: True labels, shape (batch_size, output_dim). - W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). - W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). + Performs backpropagation to compute gradients for the weights. - Returns: - Tuple of gradients (grad_W1, grad_W2) for the weight matrices. + Args: + x: Input data, shape (batch_size, input_dim). + y: True labels, shape (batch_size, output_dim). + W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). + W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). - Examples: - >>> mlp = MLP(None, 1, 0.1, hidden_dim=2) - >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias - >>> y = np.array([[0.0, 1.0]]) # batch_size=1, output_dim=2 - >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2) - >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2) - >>> _ = mlp.forward(x, W1, W2) # Run forward to set inter_variable - >>> grad_W1, grad_W2 = mlp.back_prop(x, y, W1, W2) - >>> grad_W1.shape - (3, 2) - >>> grad_W2.shape - (2, 2) - """ + Returns: + Tuple of gradients (grad_W1, grad_W2) for the weight matrices. + Examples: + >>> mlp = MLP(None, 1, 0.1, hidden_dim=2) + >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias + >>> y = np.array([[0.0, 1.0]]) # batch_size=1, output_dim=2 + >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2) + >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2) + >>> _ = mlp.forward(x, W1, W2) # Run forward to set inter_variable + >>> grad_W1, grad_W2 = mlp.back_prop(x, y, W1, W2) + >>> grad_W1.shape + (3, 2) + >>> grad_W2.shape + (2, 2) + """ a1 = self.inter_variable["a1"] # (batch_size, hidden_dim) z1 = self.inter_variable["z1"] @@ -295,13 +303,17 @@ def back_prop(self, x, y, W1, W2): # 1. output layer error delta_k = a2 - y - delta_j = np.dot(delta_k, W2.T) * self.relu_derivative(z1) # (batch, hidden_dim) 使用relu时 - + delta_j = np.dot(delta_k, W2.T) * self.relu_derivative( + z1 + ) # (batch, hidden_dim) 使用relu时 - grad_w2 = np.dot(a1.T, delta_k) / batch_size # (hidden, batch).dot(batch, output) = (hidden, output) + grad_w2 = ( + np.dot(a1.T, delta_k) / batch_size + ) # (hidden, batch).dot(batch, output) = (hidden, output) x_flat = x.reshape(x.shape[0], -1) # (batch_size, input_dim) - grad_w1 = np.dot(x_flat.T, delta_j) / batch_size # (input_dim, batch_size).dot(batch, hidden) = (input, hidden) - + grad_w1 = ( + np.dot(x_flat.T, delta_j) / batch_size + ) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden) return grad_w1, grad_w2 @@ -434,12 +446,13 @@ def train(self): """ learning_rate = self.learning_rate - train_data, train_labels, test_data, test_labels = self.dataloader.get_Train_test_data() + train_data, train_labels, test_data, test_labels = ( + self.dataloader.get_Train_test_data() + ) train_data = np.c_[train_data, np.ones(train_data.shape[0])] test_data = np.c_[test_data, np.ones(test_data.shape[0])] - _, total_label_num = self.dataloader.get_inout_dim() train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num) @@ -454,13 +467,14 @@ def train(self): for j in tqdm(range(self.epoch)): for k in range(0, train_data.shape[0], batch_size): # retrieve every image - - batch_imgs = train_data[k: k + batch_size] - batch_labels = train_labels[k: k + batch_size] + batch_imgs = train_data[k : k + batch_size] + batch_labels = train_labels[k : k + batch_size] output = self.forward(x=batch_imgs, W1=W1, W2=W2, no_gradient=False) - grad_W1, grad_W2 = self.back_prop(x=batch_imgs, y=batch_labels, W1=W1, W2=W2) + grad_W1, grad_W2 = self.back_prop( + x=batch_imgs, y=batch_labels, W1=W1, W2=W2 + ) W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate) @@ -475,6 +489,4 @@ def train(self): self.test_accuracy = test_accuracy_list self.test_loss = test_loss_list - print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list)) - - + print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list)) diff --git a/machine_learning/support_vector_machines.py b/machine_learning/support_vector_machines.py index 6891ca4893fe..b9a5d99a1849 100644 --- a/machine_learning/support_vector_machines.py +++ b/machine_learning/support_vector_machines.py @@ -244,4 +244,4 @@ def predict(self, observation: ndarray) -> int: if __name__ == "__main__": import doctest - doctest.testmod() \ No newline at end of file + doctest.testmod() From 2e72d9d812817cc8def5bb510dffb2944e047f33 Mon Sep 17 00:00:00 2001 From: WeiYFan <150578207+WeiYFan@users.noreply.github.com> Date: Wed, 14 May 2025 16:04:34 +0800 Subject: [PATCH 09/10] Update multilayer_perceptron_classifier_from_scratch.py --- ...ayer_perceptron_classifier_from_scratch.py | 235 +++++++++--------- 1 file changed, 122 insertions(+), 113 deletions(-) diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py index c8af68ee7964..38e74310caf3 100644 --- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -1,8 +1,8 @@ import numpy as np from tqdm import tqdm -from typing import Tuple, List - - +from numpy.random import default_rng +from numpy.random import seed +seed(42) class Dataloader: """ DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting. @@ -11,10 +11,15 @@ class Dataloader: >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] >>> y = [0, 1, 0, 0] >>> loader = Dataloader(X, y) - >>> loader.get_Train_test_data() # Returns train and test data - (array([[0., 0.], - [1., 1.], - [1., 0.]]), [array([0]), array([1]), array([0])], array([[0., 1.]]), [array([0])]) + >>> train_X, train_y, test_X, test_y = loader.get_Train_test_data() + >>> train_X.shape + (3, 2) + >>> len(train_y) + 3 + >>> test_X.shape + (1, 2) + >>> len(test_y) + 1 >>> loader.one_hot_encode([0, 1, 0], 2) # Returns one-hot encoded labels array([[0.99, 0. ], [0. , 0.99], @@ -26,24 +31,21 @@ class Dataloader: [0. , 0. , 0.99]]) """ - def __init__(self, X: List[List[float]], y: List[int]) -> None: + def __init__(self, features: list[list[float]], labels: list[int]) -> None: """ - Initializes the Dataloader instance with feature matrix X and labels y. + Initializes the Dataloader instance with feature matrix features and labels labels. Args: - X: Feature matrix of shape (n_samples, n_features). - y: List of labels of shape (n_samples,). + features: Feature matrix of shape (n_samples, n_features). + labels: List of labels of shape (n_samples,). """ # random seed - np.random.seed(42) - - self.X = np.array(X) - self.y = np.array(y) + self.rng = default_rng(42) # Create a random number generator with a seed + self.X = np.array(features) + self.y = np.array(labels) self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed - def get_Train_test_data( - self, - ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]: + def get_Train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: """ Splits the data into training and testing sets. Here, we manually split the data. @@ -54,22 +56,13 @@ def get_Train_test_data( - Test data - Test labels """ - # Manually splitting data into training and testing sets - train_data = np.array( - [self.X[0], self.X[1], self.X[2]] - ) # First 3 samples for training - train_labels = [ - np.array([self.y[0]]), - np.array([self.y[1]]), - np.array([self.y[2]]), - ] # Labels as np.ndarray + train_data = np.array([self.X[0], self.X[1], self.X[2]]) # First 3 samples for training + train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] # Labels as np.ndarray test_data = np.array([self.X[3]]) # Last sample for testing test_labels = [np.array([self.y[3]])] # Labels as np.ndarray return train_data, train_labels, test_data, test_labels - def shuffle_data( - self, paired_data: List[Tuple[np.ndarray, int]] - ) -> List[Tuple[np.ndarray, int]]: + def shuffle_data(self, paired_data: list[tuple[np.ndarray, int]]) -> list[tuple[np.ndarray, int]]: """ Shuffles the data randomly. @@ -79,17 +72,17 @@ def shuffle_data( Returns: A shuffled list of data-label pairs. """ - np.random.shuffle(paired_data) + default_rng.shuffle(paired_data) # Using the new random number generator return paired_data - def get_inout_dim(self) -> Tuple[int, int]: + def get_inout_dim(self) -> tuple[int, int]: train_data, train_labels, test_data, test_labels = self.get_Train_test_data() in_dim = train_data[0].shape[0] out_dim = len(train_labels) return in_dim, out_dim @staticmethod - def one_hot_encode(labels, num_classes): + def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray: """ Perform one-hot encoding for the given labels. @@ -106,46 +99,40 @@ def one_hot_encode(labels, num_classes): return one_hot -class MLP: +class MLP(): """ - A custom MLP class for implementing a simple multi-layer perceptron with - forward propagation, backpropagation. - - Attributes: - learning_rate (float): Learning rate for gradient descent. - gamma (float): Parameter to control learning rate adjustment. - epoch (int): Number of epochs for training. - hidden_dim (int): Dimension of the hidden layer. - batch_size (int): Number of samples per mini-batch. - train_loss (List[float]): List to store training loss for each fold. - train_accuracy (List[float]): List to store training accuracy for each fold. - test_loss (List[float]): List to store test loss for each fold. - test_accuracy (List[float]): List to store test accuracy for each fold. - dataloader (Dataloader): DataLoader object for handling training data. - inter_variable (dict): Dictionary to store intermediate variables for backpropagation. - weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold. - best_accuracy (float): Best test accuracy achieved. - patience (int): Patience for early stopping. - epochs_no_improve (int): Counter for epochs without improvement. - - Methods: - get_inout_dim:obtain input dimension and output dimension. - relu: Apply the ReLU activation function. - relu_derivative: Compute the derivative of the ReLU function. - forward: Perform a forward pass through the network. - back_prop: Perform backpropagation to compute gradients. - update_weights: Update the weights using gradients. - update_learning_rate: Adjust the learning rate based on test accuracy. - accuracy: Compute accuracy of the model. - loss: Compute weighted MSE loss. - train: Train the MLP over multiple folds with early stopping. - - - """ - - def __init__( - self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2 - ): + A custom MLP class for implementing a simple multi-layer perceptron with + forward propagation, backpropagation. + + Attributes: + learning_rate (float): Learning rate for gradient descent. + gamma (float): Parameter to control learning rate adjustment. + epoch (int): Number of epochs for training. + hidden_dim (int): Dimension of the hidden layer. + batch_size (int): Number of samples per mini-batch. + train_loss (List[float]): List to store training loss for each fold. + train_accuracy (List[float]): List to store training accuracy for each fold. + test_loss (List[float]): List to store test loss for each fold. + test_accuracy (List[float]): List to store test accuracy for each fold. + dataloader (Dataloader): DataLoader object for handling training data. + inter_variable (dict): Dictionary to store intermediate variables for backpropagation. + weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold. + + Methods: + get_inout_dim:obtain input dimension and output dimension. + relu: Apply the ReLU activation function. + relu_derivative: Compute the derivative of the ReLU function. + forward: Perform a forward pass through the network. + back_prop: Perform backpropagation to compute gradients. + update_weights: Update the weights using gradients. + update_learning_rate: Adjust the learning rate based on test accuracy. + accuracy: Compute accuracy of the model. + loss: Compute weighted MSE loss. + train: Train the MLP over multiple folds with early stopping. + + + """ + def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2): self.learning_rate = learning_rate # self.gamma = gamma # learning_rate decay hyperparameter gamma self.epoch = epoch @@ -160,7 +147,7 @@ def __init__( self.inter_variable = {} self.weights1_list = [] - def get_inout_dim(self): + def get_inout_dim(self) -> tuple[int, int]: """ obtain input dimension and output dimension. @@ -177,7 +164,7 @@ def get_inout_dim(self): return input_dim, output_dim - def initialize(self): + def initialize(self) -> tuple[np.ndarray, np.ndarray]: """ Initialize weights using He initialization. @@ -200,11 +187,11 @@ def initialize(self): W2 = np.random.randn(self.hidden_dim, out_dim) * 0.01 # (hidden, output) return W1, W2 - def relu(self, z): + def relu(self, input_array: np.ndarray) -> np.ndarray: """ Apply the ReLU activation function element-wise. - :param z: Input array. + :param input_array: Input array. :return: Output array after applying ReLU. >>> mlp = MLP(None, 1, 0.1) @@ -212,13 +199,13 @@ def relu(self, z): array([[0, 2], [3, 0]]) """ - return np.maximum(0, z) + return np.maximum(0, input_array) - def relu_derivative(self, z): + def relu_derivative(self, input_array: np.ndarray) -> np.ndarray: """ Compute the derivative of the ReLU function. - :param z: Input array. + :param input_array: Input array. :return: Derivative of ReLU function element-wise. >>> mlp = MLP(None, 1, 0.01) @@ -226,14 +213,21 @@ def relu_derivative(self, z): array([[0., 1.], [1., 0.]]) """ - return (z > 0).astype(float) + return (input_array > 0).astype(float) - def forward(self, x, W1, W2, no_gradient=False): + + def forward( + self, + input_data: np.ndarray, + W1: np.ndarray, + W2: np.ndarray, + no_gradient: bool = False + ) -> np.ndarray: """ Performs a forward pass through the neural network with one hidden layer. Args: - x: Input data, shape (batch_size, input_dim). + input_data: Input data, shape (batch_size, input_dim). W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). no_gradient: If True, returns output without storing intermediates. @@ -250,8 +244,7 @@ def forward(self, x, W1, W2, no_gradient=False): >>> output.shape (1, 2) """ - - z1 = np.dot(x, W1) + z1 = np.dot(input_data, W1) a1 = self.relu(z1) # relu @@ -267,19 +260,24 @@ def forward(self, x, W1, W2, no_gradient=False): self.inter_variable = {"z1": z1, "a1": a1, "z2": z2, "a2": a2} return a2 - def back_prop(self, x, y, W1, W2): + def back_prop( + self, + input_data: np.ndarray, + true_labels: np.ndarray, + W1: np.ndarray, + W2: np.ndarray + ) -> tuple[np.ndarray, np.ndarray]: """ Performs backpropagation to compute gradients for the weights. Args: - x: Input data, shape (batch_size, input_dim). - y: True labels, shape (batch_size, output_dim). + input_data: Input data, shape (batch_size, input_dim). + true_labels: True labels, shape (batch_size, output_dim). W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). Returns: Tuple of gradients (grad_W1, grad_W2) for the weight matrices. - Examples: >>> mlp = MLP(None, 1, 0.1, hidden_dim=2) >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias @@ -293,16 +291,15 @@ def back_prop(self, x, y, W1, W2): >>> grad_W2.shape (2, 2) """ - a1 = self.inter_variable["a1"] # (batch_size, hidden_dim) z1 = self.inter_variable["z1"] a2 = self.inter_variable["a2"] # (batch_size, output_dim) z2 = self.inter_variable["z2"] - batch_size = x.shape[0] + batch_size = input_data.shape[0] # 1. output layer error - delta_k = a2 - y + delta_k = a2 - true_labels delta_j = np.dot(delta_k, W2.T) * self.relu_derivative( z1 ) # (batch, hidden_dim) 使用relu时 @@ -310,14 +307,21 @@ def back_prop(self, x, y, W1, W2): grad_w2 = ( np.dot(a1.T, delta_k) / batch_size ) # (hidden, batch).dot(batch, output) = (hidden, output) - x_flat = x.reshape(x.shape[0], -1) # (batch_size, input_dim) + input_data_flat = input_data.reshape(input_data.shape[0], -1) # (batch_size, input_dim) grad_w1 = ( - np.dot(x_flat.T, delta_j) / batch_size + np.dot(input_data_flat.T, delta_j) / batch_size ) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden) return grad_w1, grad_w2 - def update_weights(self, W1, W2, grad_W1, grad_W2, learning_rate): + def update_weights( + self, + w1: np.ndarray, + w2: np.ndarray, + grad_w1: np.ndarray, + grad_w2: np.ndarray, + learning_rate: float + ) -> tuple[np.ndarray, np.ndarray]: """ Updates the weight matrices using the computed gradients and learning rate. @@ -347,11 +351,12 @@ def update_weights(self, W1, W2, grad_W1, grad_W2, learning_rate): array([[ True, True], [ True, True]]) """ - W1 -= learning_rate * grad_W1 - W2 -= learning_rate * grad_W2 - return W1, W2 + w1 -= learning_rate * grad_w1 + w2 -= learning_rate * grad_w2 + return w1, w2 - def update_learning_rate(self, learning_rate): + + def update_learning_rate(self, learning_rate: float) -> float: """ Updates the learning rate by applying the decay factor gamma. @@ -370,7 +375,7 @@ def update_learning_rate(self, learning_rate): return learning_rate * self.gamma @staticmethod - def accuracy(label, y_hat): + def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float: """ Computes the accuracy of predictions by comparing predicted and true labels. @@ -391,7 +396,7 @@ def accuracy(label, y_hat): return (y_hat.argmax(axis=1) == label.argmax(axis=1)).mean() @staticmethod - def loss(output, label): + def loss(output: np.ndarray, label: np.ndarray) -> float: """ Computes the mean squared error loss between predictions and true labels. @@ -411,7 +416,7 @@ def loss(output, label): """ return np.sum((output - label) ** 2) / (2 * label.shape[0]) - def get_acc_loss(self): + def get_acc_loss(self) -> tuple[list[float], list[float]]: """ Returns the recorded test accuracy and test loss. @@ -430,7 +435,7 @@ def get_acc_loss(self): """ return self.test_accuracy, self.test_loss - def train(self): + def train(self) -> None: """ Trains the MLP model using the provided dataloader for multiple folds and epochs. @@ -440,19 +445,18 @@ def train(self): >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] >>> y = [0, 1, 0, 0] >>> loader = Dataloader(X, y) - >>> mlp = MLP(loader, epoch=10, learning_rate=0.1, hidden_dim=2) + >>> mlp = MLP(loader, epoch=2, learning_rate=0.1, hidden_dim=2) >>> mlp.train() Test accuracy: 1.0 """ learning_rate = self.learning_rate - train_data, train_labels, test_data, test_labels = ( - self.dataloader.get_Train_test_data() - ) + train_data, train_labels, test_data, test_labels = self.dataloader.get_Train_test_data() train_data = np.c_[train_data, np.ones(train_data.shape[0])] test_data = np.c_[test_data, np.ones(test_data.shape[0])] + _, total_label_num = self.dataloader.get_inout_dim() train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num) @@ -467,14 +471,13 @@ def train(self): for j in tqdm(range(self.epoch)): for k in range(0, train_data.shape[0], batch_size): # retrieve every image - batch_imgs = train_data[k : k + batch_size] - batch_labels = train_labels[k : k + batch_size] - output = self.forward(x=batch_imgs, W1=W1, W2=W2, no_gradient=False) + batch_imgs = train_data[k: k + batch_size] + batch_labels = train_labels[k: k + batch_size] - grad_W1, grad_W2 = self.back_prop( - x=batch_imgs, y=batch_labels, W1=W1, W2=W2 - ) + output = self.forward(input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False) + + grad_W1, grad_W2 = self.back_prop(input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2) W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate) @@ -489,4 +492,10 @@ def train(self): self.test_accuracy = test_accuracy_list self.test_loss = test_loss_list - print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list)) + print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list)) + + +if __name__ == "__main__": + import doctest + + doctest.testmod() From c4f9f76b448b6338e25789126816aee7be8a1693 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 08:04:59 +0000 Subject: [PATCH 10/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ...ayer_perceptron_classifier_from_scratch.py | 148 ++++++++++-------- 1 file changed, 84 insertions(+), 64 deletions(-) diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py index 38e74310caf3..5f66d6bd00ad 100644 --- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -2,7 +2,10 @@ from tqdm import tqdm from numpy.random import default_rng from numpy.random import seed + seed(42) + + class Dataloader: """ DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting. @@ -45,7 +48,9 @@ def __init__(self, features: list[list[float]], labels: list[int]) -> None: self.y = np.array(labels) self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed - def get_Train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: + def get_Train_test_data( + self, + ) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: """ Splits the data into training and testing sets. Here, we manually split the data. @@ -56,13 +61,21 @@ def get_Train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[ - Test data - Test labels """ - train_data = np.array([self.X[0], self.X[1], self.X[2]]) # First 3 samples for training - train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] # Labels as np.ndarray + train_data = np.array( + [self.X[0], self.X[1], self.X[2]] + ) # First 3 samples for training + train_labels = [ + np.array([self.y[0]]), + np.array([self.y[1]]), + np.array([self.y[2]]), + ] # Labels as np.ndarray test_data = np.array([self.X[3]]) # Last sample for testing test_labels = [np.array([self.y[3]])] # Labels as np.ndarray return train_data, train_labels, test_data, test_labels - def shuffle_data(self, paired_data: list[tuple[np.ndarray, int]]) -> list[tuple[np.ndarray, int]]: + def shuffle_data( + self, paired_data: list[tuple[np.ndarray, int]] + ) -> list[tuple[np.ndarray, int]]: """ Shuffles the data randomly. @@ -99,40 +112,43 @@ def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray: return one_hot -class MLP(): +class MLP: + """ + A custom MLP class for implementing a simple multi-layer perceptron with + forward propagation, backpropagation. + + Attributes: + learning_rate (float): Learning rate for gradient descent. + gamma (float): Parameter to control learning rate adjustment. + epoch (int): Number of epochs for training. + hidden_dim (int): Dimension of the hidden layer. + batch_size (int): Number of samples per mini-batch. + train_loss (List[float]): List to store training loss for each fold. + train_accuracy (List[float]): List to store training accuracy for each fold. + test_loss (List[float]): List to store test loss for each fold. + test_accuracy (List[float]): List to store test accuracy for each fold. + dataloader (Dataloader): DataLoader object for handling training data. + inter_variable (dict): Dictionary to store intermediate variables for backpropagation. + weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold. + + Methods: + get_inout_dim:obtain input dimension and output dimension. + relu: Apply the ReLU activation function. + relu_derivative: Compute the derivative of the ReLU function. + forward: Perform a forward pass through the network. + back_prop: Perform backpropagation to compute gradients. + update_weights: Update the weights using gradients. + update_learning_rate: Adjust the learning rate based on test accuracy. + accuracy: Compute accuracy of the model. + loss: Compute weighted MSE loss. + train: Train the MLP over multiple folds with early stopping. + + """ - A custom MLP class for implementing a simple multi-layer perceptron with - forward propagation, backpropagation. - - Attributes: - learning_rate (float): Learning rate for gradient descent. - gamma (float): Parameter to control learning rate adjustment. - epoch (int): Number of epochs for training. - hidden_dim (int): Dimension of the hidden layer. - batch_size (int): Number of samples per mini-batch. - train_loss (List[float]): List to store training loss for each fold. - train_accuracy (List[float]): List to store training accuracy for each fold. - test_loss (List[float]): List to store test loss for each fold. - test_accuracy (List[float]): List to store test accuracy for each fold. - dataloader (Dataloader): DataLoader object for handling training data. - inter_variable (dict): Dictionary to store intermediate variables for backpropagation. - weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold. - - Methods: - get_inout_dim:obtain input dimension and output dimension. - relu: Apply the ReLU activation function. - relu_derivative: Compute the derivative of the ReLU function. - forward: Perform a forward pass through the network. - back_prop: Perform backpropagation to compute gradients. - update_weights: Update the weights using gradients. - update_learning_rate: Adjust the learning rate based on test accuracy. - accuracy: Compute accuracy of the model. - loss: Compute weighted MSE loss. - train: Train the MLP over multiple folds with early stopping. - - - """ - def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2): + + def __init__( + self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2 + ): self.learning_rate = learning_rate # self.gamma = gamma # learning_rate decay hyperparameter gamma self.epoch = epoch @@ -215,13 +231,12 @@ def relu_derivative(self, input_array: np.ndarray) -> np.ndarray: """ return (input_array > 0).astype(float) - def forward( - self, - input_data: np.ndarray, - W1: np.ndarray, - W2: np.ndarray, - no_gradient: bool = False + self, + input_data: np.ndarray, + W1: np.ndarray, + W2: np.ndarray, + no_gradient: bool = False, ) -> np.ndarray: """ Performs a forward pass through the neural network with one hidden layer. @@ -261,11 +276,11 @@ def forward( return a2 def back_prop( - self, - input_data: np.ndarray, - true_labels: np.ndarray, - W1: np.ndarray, - W2: np.ndarray + self, + input_data: np.ndarray, + true_labels: np.ndarray, + W1: np.ndarray, + W2: np.ndarray, ) -> tuple[np.ndarray, np.ndarray]: """ Performs backpropagation to compute gradients for the weights. @@ -307,7 +322,9 @@ def back_prop( grad_w2 = ( np.dot(a1.T, delta_k) / batch_size ) # (hidden, batch).dot(batch, output) = (hidden, output) - input_data_flat = input_data.reshape(input_data.shape[0], -1) # (batch_size, input_dim) + input_data_flat = input_data.reshape( + input_data.shape[0], -1 + ) # (batch_size, input_dim) grad_w1 = ( np.dot(input_data_flat.T, delta_j) / batch_size ) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden) @@ -315,12 +332,12 @@ def back_prop( return grad_w1, grad_w2 def update_weights( - self, - w1: np.ndarray, - w2: np.ndarray, - grad_w1: np.ndarray, - grad_w2: np.ndarray, - learning_rate: float + self, + w1: np.ndarray, + w2: np.ndarray, + grad_w1: np.ndarray, + grad_w2: np.ndarray, + learning_rate: float, ) -> tuple[np.ndarray, np.ndarray]: """ Updates the weight matrices using the computed gradients and learning rate. @@ -355,7 +372,6 @@ def update_weights( w2 -= learning_rate * grad_w2 return w1, w2 - def update_learning_rate(self, learning_rate: float) -> float: """ Updates the learning rate by applying the decay factor gamma. @@ -451,12 +467,13 @@ def train(self) -> None: """ learning_rate = self.learning_rate - train_data, train_labels, test_data, test_labels = self.dataloader.get_Train_test_data() + train_data, train_labels, test_data, test_labels = ( + self.dataloader.get_Train_test_data() + ) train_data = np.c_[train_data, np.ones(train_data.shape[0])] test_data = np.c_[test_data, np.ones(test_data.shape[0])] - _, total_label_num = self.dataloader.get_inout_dim() train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num) @@ -471,13 +488,16 @@ def train(self) -> None: for j in tqdm(range(self.epoch)): for k in range(0, train_data.shape[0], batch_size): # retrieve every image + batch_imgs = train_data[k : k + batch_size] + batch_labels = train_labels[k : k + batch_size] - batch_imgs = train_data[k: k + batch_size] - batch_labels = train_labels[k: k + batch_size] - - output = self.forward(input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False) + output = self.forward( + input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False + ) - grad_W1, grad_W2 = self.back_prop(input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2) + grad_W1, grad_W2 = self.back_prop( + input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2 + ) W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate) @@ -492,7 +512,7 @@ def train(self) -> None: self.test_accuracy = test_accuracy_list self.test_loss = test_loss_list - print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list)) + print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list)) if __name__ == "__main__":