diff --git a/pydeepflow/__init__.py b/pydeepflow/__init__.py
index 91007c9..b7ab7df 100644
--- a/pydeepflow/__init__.py
+++ b/pydeepflow/__init__.py
@@ -6,7 +6,8 @@
 from .checkpoints import ModelCheckpoint
 from .regularization import Regularization
 from .early_stopping import EarlyStopping
-from .cross_validator import CrossValidator  # Add CrossValidator import
+from .cross_validator import CrossValidator
+from .batch_normalization import BatchNormalization
 
 __all__ = [
     "activation",
@@ -20,5 +21,6 @@
     "ModelCheckpoint",
     "Regularization",
     "EarlyStopping",
-    "CrossValidator",  # Include CrossValidator in the exported members
+    "CrossValidator",  
+    "BatchNormalization",
 ]
diff --git a/pydeepflow/batch_normalization.py b/pydeepflow/batch_normalization.py
index 18df304..d6972a4 100644
--- a/pydeepflow/batch_normalization.py
+++ b/pydeepflow/batch_normalization.py
@@ -1,7 +1,24 @@
 import numpy as np
 
 class BatchNormalization:
+    """
+    A class that implements Batch Normalization for a layer in a neural network.
+    
+    Batch Normalization helps stabilize the learning process and accelerate training
+    by normalizing the inputs of each layer. This class can be used during training 
+    and inference.
+    """
+
     def __init__(self, layer_size, epsilon=1e-5, momentum=0.9, device=np):
+        """
+        Initializes the BatchNormalization object.
+
+        Parameters:
+            layer_size (int): The size of the layer to which batch normalization is applied.
+            epsilon (float): A small constant added to the variance for numerical stability.
+            momentum (float): The momentum for updating the running mean and variance.
+            device (module): The device module (e.g., numpy) to perform calculations on.
+        """
         self.epsilon = epsilon
         self.momentum = momentum
         self.device = device
@@ -13,6 +30,20 @@ def __init__(self, layer_size, epsilon=1e-5, momentum=0.9, device=np):
         self.running_variance = self.device.ones((1, layer_size))
     
     def normalize(self, Z, training=True):
+        """
+        Normalizes the input data Z.
+
+        During training, it computes the batch mean and variance, and updates the 
+        running mean and variance. During inference, it uses the running statistics.
+
+        Parameters:
+            Z (ndarray): The input data of shape (batch_size, layer_size) to normalize.
+            training (bool): A flag indicating whether the model is in training mode.
+                             If True, updates running statistics; otherwise uses them.
+
+        Returns:
+            ndarray: The normalized and scaled output data.
+        """
         if training:
             batch_mean = self.device.mean(Z, axis=0, keepdims=True)
             batch_variance = self.device.var(Z, axis=0, keepdims=True)
@@ -29,6 +60,19 @@ def normalize(self, Z, training=True):
         return Z_scaled
     
     def backprop(self, Z, dZ, learning_rate):
+        """
+        Computes the gradients for gamma and beta during backpropagation
+        and updates their values.
+
+        Parameters:
+            Z (ndarray): The input data used for normalization, of shape (batch_size, layer_size).
+            dZ (ndarray): The gradient of the loss with respect to the output of the layer,
+                          of shape (batch_size, layer_size).
+            learning_rate (float): The learning rate for updating gamma and beta.
+
+        Returns:
+            ndarray: The gradient of the loss with respect to the input data Z.
+        """
         dgamma = self.device.sum(dZ * Z, axis=0, keepdims=True)
         dbeta = self.device.sum(dZ, axis=0, keepdims=True)
         
diff --git a/pydeepflow/checkpoints.py b/pydeepflow/checkpoints.py
index dd571e0..9593207 100644
--- a/pydeepflow/checkpoints.py
+++ b/pydeepflow/checkpoints.py
@@ -38,8 +38,8 @@ def save_weights(self, epoch, weights, biases, val_loss):
         # Prepare data to save
         data = {}
         for i, (w, b) in enumerate(zip(weights, biases)):
-            data[f'weights_layer_{i}'] = w  # Correctly saving with this key
-            data[f'biases_layer_{i}'] = b    # Correctly saving with this key
+            data[f'weights_layer_{i}'] = w  
+            data[f'biases_layer_{i}'] = b    
         
         # Save as .npz file
         np.savez(checkpoint_path, **data)
diff --git a/pydeepflow/cross_validator.py b/pydeepflow/cross_validator.py
index a083ba6..eec61cf 100644
--- a/pydeepflow/cross_validator.py
+++ b/pydeepflow/cross_validator.py
@@ -43,5 +43,5 @@ def get_metrics(self, y_true, y_pred, metrics):
         for metric in metrics:
             if metric == "accuracy":
                 results['accuracy'] = np.mean(y_true == y_pred)
-            # Add more metrics as needed (e.g., precision, recall)
+            
         return results
diff --git a/pydeepflow/device.py b/pydeepflow/device.py
index 4b4458f..4e9833a 100644
--- a/pydeepflow/device.py
+++ b/pydeepflow/device.py
@@ -293,3 +293,60 @@ def norm(self, x, ord=None, axis=None, keepdims=False):
         """
 
         return cp.linalg.norm(x, ord=ord, axis=axis, keepdims=keepdims) if self.use_gpu else np.linalg.norm(x, ord=ord, axis=axis, keepdims=keepdims)
+
+    def ones(self, shape):
+        """
+        Creates an array of ones with the specified shape.
+
+        Parameters:
+        -----------
+        shape : tuple of ints
+            The shape of the output array.
+
+        Returns:
+        --------
+        np.ndarray or cp.ndarray
+            An array of ones, either using NumPy or CuPy.
+        """
+        return cp.ones(shape) if self.use_gpu else np.ones(shape)
+    
+    
+    def mean(self, x, axis=None, keepdims=False):
+        """
+        Computes the mean of the input array along the specified axis.
+
+        Parameters:
+        -----------
+        x : np.ndarray or cp.ndarray
+            The input array.
+        axis : int or tuple of ints, optional
+            Axis or axes along which the means are computed.
+        keepdims : bool, optional
+            If True, the reduced dimensions are retained.
+
+        Returns:
+        --------
+        np.ndarray or cp.ndarray
+            The mean of the input array along the specified axis.
+        """
+        return cp.mean(x, axis=axis, keepdims=keepdims) if self.use_gpu else np.mean(x, axis=axis, keepdims=keepdims)
+
+    def var(self, x, axis=None, keepdims=False):
+        """
+        Computes the variance of an array along a specified axis.
+
+        Parameters:
+        ----------- 
+        x : np.ndarray or cp.ndarray
+            Input array.
+        axis : int or None, optional (default=None)
+            Axis along which the variance is computed.
+        keepdims : bool, optional (default=False)
+            If True, the reduced dimensions will be retained.
+
+        Returns:
+        --------
+        np.ndarray or cp.ndarray
+            The variance of the input array along the specified axis.
+        """
+        return cp.var(x, axis=axis, keepdims=keepdims) if self.use_gpu else np.var(x, axis=axis, keepdims=keepdims)
diff --git a/pydeepflow/model.py b/pydeepflow/model.py
index 492e414..8eedea7 100644
--- a/pydeepflow/model.py
+++ b/pydeepflow/model.py
@@ -136,7 +136,7 @@ def backpropagation(self, X, y, activations, Z_values, learning_rate, clip_value
             # Apply L2 regularization to the weights
             self.weights[i] -= learning_rate * self.regularization.apply_l2_regularization(self.weights[i], learning_rate, X.shape)
 
-    def fit(self, epochs, learning_rate=0.01, lr_scheduler=None, early_stop=None, X_val=None, y_val=None, checkpoint=None, verbose=False, clipping_threshold=None):
+    def fit(self, epochs, learning_rate=0.01, lr_scheduler=None, early_stop=None, X_val=None, y_val=None, checkpoint=None, verbose=True, clipping_threshold=None):
         """
         Trains the model for a given number of epochs with an optional learning rate scheduler.
         """
diff --git a/runner.py b/runner.py
index 0c01590..8cf0e0b 100644
--- a/runner.py
+++ b/runner.py
@@ -27,16 +27,16 @@
     scaler = StandardScaler()
     X = scaler.fit_transform(X)
 
-    # Ask the user whether to use GPU
+    # Ask the user whether to use GPU (simulated as False for this example)
     use_gpu_input = False
     use_gpu = True if use_gpu_input == 'y' else False
 
     # Define the architecture of the network
-    hidden_layers = [5, 5]  
-    activations = ['relu', 'relu']  
+    hidden_layers = [5, 5]  # Example: two hidden layers with 5 neurons each
+    activations = ['relu', 'relu']  # ReLU activations for the hidden layers
 
     # Initialize the CrossValidator
-    k_folds = 5  # Set the number of folds for cross-validation
+    k_folds = 10 # Set the number of folds for cross-validation
     cross_validator = CrossValidator(n_splits=k_folds)
 
     # Perform k-fold cross-validation
@@ -48,41 +48,36 @@
         X_train, X_val = X[train_index], X[val_index]
         y_train, y_val = y_one_hot[train_index], y_one_hot[val_index]
 
-        # Initialize the ANN for each fold
-        ann = Multi_Layer_ANN(X_train, y_train, hidden_layers, activations, loss='categorical_crossentropy', use_gpu=use_gpu)
+        # Initialize the ANN for each fold without batch normalization
+        ann = Multi_Layer_ANN(X_train, y_train, hidden_layers, activations, 
+                              loss='categorical_crossentropy', use_gpu=use_gpu)
 
         # Set up model checkpointing
         checkpoint = ModelCheckpoint(save_dir='./checkpoints', monitor='val_loss', save_best_only=True, save_freq=5)
 
-        # Callback functions 
+        # Callback functions
         lr_scheduler = LearningRateScheduler(initial_lr=0.01, strategy="cyclic")
         early_stop = EarlyStopping(patience=3)
 
         # Train the model and capture history
-        # ann.fit(epochs=10000, learning_rate=0.01, lr_scheduler=lr_scheduler, early_stop=early_stop, 
-        #         X_val=X_val, y_val=y_val, checkpoint=checkpoint)
-        
-        ann.fit(epochs=1000, learning_rate=0.01, lr_scheduler=lr_scheduler, X_val=X_val, y_val=y_val)
+        ann.fit(epochs=1000, learning_rate=0.01, 
+                lr_scheduler=lr_scheduler, 
+                early_stop=early_stop, 
+                X_val=X_val, 
+                y_val=y_val, checkpoint=checkpoint)
 
         # Evaluate the model on the validation set
         y_pred_val = ann.predict(X_val)
         y_val_labels = np.argmax(y_val, axis=1)
 
         # Adjust prediction shape handling for accuracy calculation
-        if y_pred_val.ndim == 2:  
-            y_pred_val_labels = np.argmax(y_pred_val, axis=1)  # Multi-class classification
-        else:
-            y_pred_val_labels = (y_pred_val >= 0.5).astype(int)  # Binary classification (if applicable)
-
+        y_pred_val_labels = np.argmax(y_pred_val, axis=1)  # Multi-class classification
+        
         # Calculate and store the accuracy for this fold
         fold_accuracy = np.mean(y_pred_val_labels == y_val_labels)
         fold_accuracies.append(fold_accuracy)
         print(f"Fold {fold + 1} Accuracy: {fold_accuracy * 100:.2f}%")
 
-    # Print the average accuracy across all folds
-    average_accuracy = np.mean(fold_accuracies)
-    print(f"Average Accuracy across {k_folds} folds: {average_accuracy * 100:.2f}%")
-
     # Optionally plot training history of the last fold
     plot_utils = Plotting_Utils()  
     plot_utils.plot_training_history(ann.history)
diff --git a/setup.py b/setup.py
index c96375b..a9bee55 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="pydeepflow",
-    version="0.1.9",  # Updated version
+    version="1.0.0",  # Updated version
     author="Ravin D",
     author_email="ravin.d3107@outlook.com",
     description="A deep learning package optimized for performing Deep Learning Tasks, easy to learn and integrate into projects",