Explanation

import numpy as np
import os
import pickle

# Activation function: Leaky ReLU
def leaky_relu(x, alpha=0.01):
    return np.where(x > 0, x, x * alpha)

# Derivative of Leaky ReLU (for backpropagation)
def leaky_relu_derivative(x, alpha=0.01):
    return np.where(x > 0, 1, alpha)

# Activation function for the output layer: Softmax
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# Definition of the Deep Neural Network class
class DeepNeuralNetwork:
    # Initialization of the network
    def __init__(self, layers, learning_rate=0.01, lambda_reg=0.001):
        self.layers = layers
        self.learning_rate = learning_rate
        self.lambda_reg = lambda_reg
        self.weights = [np.random.randn(layers[i], layers[i+1]) * np.sqrt(2. / layers[i]) for i in range(len(layers)-1)]
        self.biases = [np.zeros((1, layers[i+1])) for i in range(len(layers)-1)]

    # Forward pass through the network
    def feedforward(self, X):
        activations = [X]
        # Iterate over all layers except the last one using Leaky ReLU
        for i in range(len(self.weights) - 1):
            z = np.dot(activations[-1], self.weights[i]) + self.biases[i]
            a = leaky_relu(z)
            activations.append(a)
        # For the last layer, use Softmax
        z = np.dot(activations[-1], self.weights[-1]) + self.biases[-1]
        a = softmax(z)
        activations.append(a)
        return activations

    # Compute the loss using Cross-Entropy and L2 Regularization
    def compute_loss(self, Y, Y_hat):
        m = Y.shape[0]
        l2_penalty = sum(np.sum(np.square(w)) for w in self.weights) * (self.lambda_reg / (2 * m))
        cross_entropy = -np.sum(Y * np.log(Y_hat + 1e-8)) / m
        return cross_entropy + l2_penalty

    # Backpropagation to update weights and biases
    def backprop(self, X, Y, activations):
        m = X.shape[0]
        Y_hat = activations[-1]
        delta = Y_hat - Y
        dW = np.dot(activations[-2].T, delta) / m
        db = np.sum(delta, axis=0, keepdims=True) / m

        # Update for the last layer
        dW += (self.lambda_reg / m) * self.weights[-1]
        self.weights[-1] -= self.learning_rate * dW
        self.biases[-1] -= self.learning_rate * db

        # Update for remaining layers
        for i in range(len(self.weights) - 2, -1, -1):
            delta = np.dot(delta, self.weights[i + 1].T) * leaky_relu_derivative(activations[i + 1])
            dW = np.dot(activations[i].T, delta) / m
            db = np.sum(delta, axis=0) / m

            dW += (self.lambda_reg / m) * self.weights[i]
            self.weights[i] -= self.learning_rate * dW
            self.biases[i] -= self.learning_rate * db

    # Train the network for a specified number of epochs
    def train(self, X, Y, epochs):
        for epoch in range(epochs):
            activations = self.feedforward(X)
            loss = self.compute_loss(Y, activations[-1])
            self.backprop(X, Y, activations)

            if epoch % 1000 == 0:
                print(f"Epoch {epoch}, loss: {loss}")

    # Predict function to output the class of each input
    def predict(self, X):
        activations = self.feedforward(X)
        return np.argmax(activations[-1], axis=1)

    # Save the model weights to a file
    def save_weights(self, filename='model_weights95.pkl'):
        with open(filename, 'wb') as f:
            pickle.dump((self.weights, self.biases), f)

    # Load the model weights from a file
    def load_weights(self, filename='model_weights95.pkl'):
        if os.path.exists(filename):
            with open(filename, 'rb') as f:
                self.weights, self.biases = pickle.load(f)
                print("Model weights loaded.")

# Function to generate one-hot encoded data for training
def generate_data():
    n = 95  # Number of characters
    X = np.eye(n)  # Input: Identity matrix representing one-hot encoded characters
    Y = np.eye(n)  # Output: Same as input
    return X, Y

# Test all characters by predicting each one
def test_all_characters(nn, test_characters):
    print("Testing all characters...")
    for char in test_characters:
        idx = test_characters.index(char)
        prediction = nn.predict(np.eye(95)[idx].reshape(1, -1))
        print(f"Input: {char},Predicted: {test_characters[prediction[0]]}")

if name == "main":
# Generate synthetic data
X, Y = generate_data()

python
Copy code
# Characters to test - assuming an ASCII subset
test_characters = [chr(i) for i in range(32, 127)]

# Initialize the network
nn = DeepNeuralNetwork(layers=[95, 128, 64, 95], learning_rate=0.01, lambda_reg=0.001)

# Train the network
print("Training the network...")
nn.train(X, Y, epochs=5000)

# Test the network
test_all_characters(nn, test_characters)

# Save the model weights
nn.save_weights()

# Optionally, load the model weights (for demonstration)
# nn.load_weights()

# thats all 
#Congratulations you have learned the basics