diff --git a/pydeepflow/__init__.py b/pydeepflow/__init__.py index 91007c9..b7ab7df 100644 --- a/pydeepflow/__init__.py +++ b/pydeepflow/__init__.py @@ -6,7 +6,8 @@ from .checkpoints import ModelCheckpoint from .regularization import Regularization from .early_stopping import EarlyStopping -from .cross_validator import CrossValidator # Add CrossValidator import +from .cross_validator import CrossValidator +from .batch_normalization import BatchNormalization __all__ = [ "activation", @@ -20,5 +21,6 @@ "ModelCheckpoint", "Regularization", "EarlyStopping", - "CrossValidator", # Include CrossValidator in the exported members + "CrossValidator", + "BatchNormalization", ] diff --git a/pydeepflow/batch_normalization.py b/pydeepflow/batch_normalization.py index 18df304..d6972a4 100644 --- a/pydeepflow/batch_normalization.py +++ b/pydeepflow/batch_normalization.py @@ -1,7 +1,24 @@ import numpy as np class BatchNormalization: + """ + A class that implements Batch Normalization for a layer in a neural network. + + Batch Normalization helps stabilize the learning process and accelerate training + by normalizing the inputs of each layer. This class can be used during training + and inference. + """ + def __init__(self, layer_size, epsilon=1e-5, momentum=0.9, device=np): + """ + Initializes the BatchNormalization object. + + Parameters: + layer_size (int): The size of the layer to which batch normalization is applied. + epsilon (float): A small constant added to the variance for numerical stability. + momentum (float): The momentum for updating the running mean and variance. + device (module): The device module (e.g., numpy) to perform calculations on. + """ self.epsilon = epsilon self.momentum = momentum self.device = device @@ -13,6 +30,20 @@ def __init__(self, layer_size, epsilon=1e-5, momentum=0.9, device=np): self.running_variance = self.device.ones((1, layer_size)) def normalize(self, Z, training=True): + """ + Normalizes the input data Z. + + During training, it computes the batch mean and variance, and updates the + running mean and variance. During inference, it uses the running statistics. + + Parameters: + Z (ndarray): The input data of shape (batch_size, layer_size) to normalize. + training (bool): A flag indicating whether the model is in training mode. + If True, updates running statistics; otherwise uses them. + + Returns: + ndarray: The normalized and scaled output data. + """ if training: batch_mean = self.device.mean(Z, axis=0, keepdims=True) batch_variance = self.device.var(Z, axis=0, keepdims=True) @@ -29,6 +60,19 @@ def normalize(self, Z, training=True): return Z_scaled def backprop(self, Z, dZ, learning_rate): + """ + Computes the gradients for gamma and beta during backpropagation + and updates their values. + + Parameters: + Z (ndarray): The input data used for normalization, of shape (batch_size, layer_size). + dZ (ndarray): The gradient of the loss with respect to the output of the layer, + of shape (batch_size, layer_size). + learning_rate (float): The learning rate for updating gamma and beta. + + Returns: + ndarray: The gradient of the loss with respect to the input data Z. + """ dgamma = self.device.sum(dZ * Z, axis=0, keepdims=True) dbeta = self.device.sum(dZ, axis=0, keepdims=True) diff --git a/pydeepflow/checkpoints.py b/pydeepflow/checkpoints.py index dd571e0..9593207 100644 --- a/pydeepflow/checkpoints.py +++ b/pydeepflow/checkpoints.py @@ -38,8 +38,8 @@ def save_weights(self, epoch, weights, biases, val_loss): # Prepare data to save data = {} for i, (w, b) in enumerate(zip(weights, biases)): - data[f'weights_layer_{i}'] = w # Correctly saving with this key - data[f'biases_layer_{i}'] = b # Correctly saving with this key + data[f'weights_layer_{i}'] = w + data[f'biases_layer_{i}'] = b # Save as .npz file np.savez(checkpoint_path, **data) diff --git a/pydeepflow/cross_validator.py b/pydeepflow/cross_validator.py index a083ba6..eec61cf 100644 --- a/pydeepflow/cross_validator.py +++ b/pydeepflow/cross_validator.py @@ -43,5 +43,5 @@ def get_metrics(self, y_true, y_pred, metrics): for metric in metrics: if metric == "accuracy": results['accuracy'] = np.mean(y_true == y_pred) - # Add more metrics as needed (e.g., precision, recall) + return results diff --git a/pydeepflow/device.py b/pydeepflow/device.py index 4b4458f..4e9833a 100644 --- a/pydeepflow/device.py +++ b/pydeepflow/device.py @@ -293,3 +293,60 @@ def norm(self, x, ord=None, axis=None, keepdims=False): """ return cp.linalg.norm(x, ord=ord, axis=axis, keepdims=keepdims) if self.use_gpu else np.linalg.norm(x, ord=ord, axis=axis, keepdims=keepdims) + + def ones(self, shape): + """ + Creates an array of ones with the specified shape. + + Parameters: + ----------- + shape : tuple of ints + The shape of the output array. + + Returns: + -------- + np.ndarray or cp.ndarray + An array of ones, either using NumPy or CuPy. + """ + return cp.ones(shape) if self.use_gpu else np.ones(shape) + + + def mean(self, x, axis=None, keepdims=False): + """ + Computes the mean of the input array along the specified axis. + + Parameters: + ----------- + x : np.ndarray or cp.ndarray + The input array. + axis : int or tuple of ints, optional + Axis or axes along which the means are computed. + keepdims : bool, optional + If True, the reduced dimensions are retained. + + Returns: + -------- + np.ndarray or cp.ndarray + The mean of the input array along the specified axis. + """ + return cp.mean(x, axis=axis, keepdims=keepdims) if self.use_gpu else np.mean(x, axis=axis, keepdims=keepdims) + + def var(self, x, axis=None, keepdims=False): + """ + Computes the variance of an array along a specified axis. + + Parameters: + ----------- + x : np.ndarray or cp.ndarray + Input array. + axis : int or None, optional (default=None) + Axis along which the variance is computed. + keepdims : bool, optional (default=False) + If True, the reduced dimensions will be retained. + + Returns: + -------- + np.ndarray or cp.ndarray + The variance of the input array along the specified axis. + """ + return cp.var(x, axis=axis, keepdims=keepdims) if self.use_gpu else np.var(x, axis=axis, keepdims=keepdims) diff --git a/pydeepflow/model.py b/pydeepflow/model.py index 492e414..8eedea7 100644 --- a/pydeepflow/model.py +++ b/pydeepflow/model.py @@ -136,7 +136,7 @@ def backpropagation(self, X, y, activations, Z_values, learning_rate, clip_value # Apply L2 regularization to the weights self.weights[i] -= learning_rate * self.regularization.apply_l2_regularization(self.weights[i], learning_rate, X.shape) - def fit(self, epochs, learning_rate=0.01, lr_scheduler=None, early_stop=None, X_val=None, y_val=None, checkpoint=None, verbose=False, clipping_threshold=None): + def fit(self, epochs, learning_rate=0.01, lr_scheduler=None, early_stop=None, X_val=None, y_val=None, checkpoint=None, verbose=True, clipping_threshold=None): """ Trains the model for a given number of epochs with an optional learning rate scheduler. """ diff --git a/runner.py b/runner.py index 0c01590..8cf0e0b 100644 --- a/runner.py +++ b/runner.py @@ -27,16 +27,16 @@ scaler = StandardScaler() X = scaler.fit_transform(X) - # Ask the user whether to use GPU + # Ask the user whether to use GPU (simulated as False for this example) use_gpu_input = False use_gpu = True if use_gpu_input == 'y' else False # Define the architecture of the network - hidden_layers = [5, 5] - activations = ['relu', 'relu'] + hidden_layers = [5, 5] # Example: two hidden layers with 5 neurons each + activations = ['relu', 'relu'] # ReLU activations for the hidden layers # Initialize the CrossValidator - k_folds = 5 # Set the number of folds for cross-validation + k_folds = 10 # Set the number of folds for cross-validation cross_validator = CrossValidator(n_splits=k_folds) # Perform k-fold cross-validation @@ -48,41 +48,36 @@ X_train, X_val = X[train_index], X[val_index] y_train, y_val = y_one_hot[train_index], y_one_hot[val_index] - # Initialize the ANN for each fold - ann = Multi_Layer_ANN(X_train, y_train, hidden_layers, activations, loss='categorical_crossentropy', use_gpu=use_gpu) + # Initialize the ANN for each fold without batch normalization + ann = Multi_Layer_ANN(X_train, y_train, hidden_layers, activations, + loss='categorical_crossentropy', use_gpu=use_gpu) # Set up model checkpointing checkpoint = ModelCheckpoint(save_dir='./checkpoints', monitor='val_loss', save_best_only=True, save_freq=5) - # Callback functions + # Callback functions lr_scheduler = LearningRateScheduler(initial_lr=0.01, strategy="cyclic") early_stop = EarlyStopping(patience=3) # Train the model and capture history - # ann.fit(epochs=10000, learning_rate=0.01, lr_scheduler=lr_scheduler, early_stop=early_stop, - # X_val=X_val, y_val=y_val, checkpoint=checkpoint) - - ann.fit(epochs=1000, learning_rate=0.01, lr_scheduler=lr_scheduler, X_val=X_val, y_val=y_val) + ann.fit(epochs=1000, learning_rate=0.01, + lr_scheduler=lr_scheduler, + early_stop=early_stop, + X_val=X_val, + y_val=y_val, checkpoint=checkpoint) # Evaluate the model on the validation set y_pred_val = ann.predict(X_val) y_val_labels = np.argmax(y_val, axis=1) # Adjust prediction shape handling for accuracy calculation - if y_pred_val.ndim == 2: - y_pred_val_labels = np.argmax(y_pred_val, axis=1) # Multi-class classification - else: - y_pred_val_labels = (y_pred_val >= 0.5).astype(int) # Binary classification (if applicable) - + y_pred_val_labels = np.argmax(y_pred_val, axis=1) # Multi-class classification + # Calculate and store the accuracy for this fold fold_accuracy = np.mean(y_pred_val_labels == y_val_labels) fold_accuracies.append(fold_accuracy) print(f"Fold {fold + 1} Accuracy: {fold_accuracy * 100:.2f}%") - # Print the average accuracy across all folds - average_accuracy = np.mean(fold_accuracies) - print(f"Average Accuracy across {k_folds} folds: {average_accuracy * 100:.2f}%") - # Optionally plot training history of the last fold plot_utils = Plotting_Utils() plot_utils.plot_training_history(ann.history) diff --git a/setup.py b/setup.py index c96375b..a9bee55 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="pydeepflow", - version="0.1.9", # Updated version + version="1.0.0", # Updated version author="Ravin D", author_email="ravin.d3107@outlook.com", description="A deep learning package optimized for performing Deep Learning Tasks, easy to learn and integrate into projects",