-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathExplanation
140 lines (115 loc) · 4.9 KB
/
Explanation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import numpy as np
import os
import pickle
# Activation function: Leaky ReLU
def leaky_relu(x, alpha=0.01):
return np.where(x > 0, x, x * alpha)
# Derivative of Leaky ReLU (for backpropagation)
def leaky_relu_derivative(x, alpha=0.01):
return np.where(x > 0, 1, alpha)
# Activation function for the output layer: Softmax
def softmax(x):
exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
return exp_x / np.sum(exp_x, axis=1, keepdims=True)
# Definition of the Deep Neural Network class
class DeepNeuralNetwork:
# Initialization of the network
def __init__(self, layers, learning_rate=0.01, lambda_reg=0.001):
self.layers = layers
self.learning_rate = learning_rate
self.lambda_reg = lambda_reg
self.weights = [np.random.randn(layers[i], layers[i+1]) * np.sqrt(2. / layers[i]) for i in range(len(layers)-1)]
self.biases = [np.zeros((1, layers[i+1])) for i in range(len(layers)-1)]
# Forward pass through the network
def feedforward(self, X):
activations = [X]
# Iterate over all layers except the last one using Leaky ReLU
for i in range(len(self.weights) - 1):
z = np.dot(activations[-1], self.weights[i]) + self.biases[i]
a = leaky_relu(z)
activations.append(a)
# For the last layer, use Softmax
z = np.dot(activations[-1], self.weights[-1]) + self.biases[-1]
a = softmax(z)
activations.append(a)
return activations
# Compute the loss using Cross-Entropy and L2 Regularization
def compute_loss(self, Y, Y_hat):
m = Y.shape[0]
l2_penalty = sum(np.sum(np.square(w)) for w in self.weights) * (self.lambda_reg / (2 * m))
cross_entropy = -np.sum(Y * np.log(Y_hat + 1e-8)) / m
return cross_entropy + l2_penalty
# Backpropagation to update weights and biases
def backprop(self, X, Y, activations):
m = X.shape[0]
Y_hat = activations[-1]
delta = Y_hat - Y
dW = np.dot(activations[-2].T, delta) / m
db = np.sum(delta, axis=0, keepdims=True) / m
# Update for the last layer
dW += (self.lambda_reg / m) * self.weights[-1]
self.weights[-1] -= self.learning_rate * dW
self.biases[-1] -= self.learning_rate * db
# Update for remaining layers
for i in range(len(self.weights) - 2, -1, -1):
delta = np.dot(delta, self.weights[i + 1].T) * leaky_relu_derivative(activations[i + 1])
dW = np.dot(activations[i].T, delta) / m
db = np.sum(delta, axis=0) / m
dW += (self.lambda_reg / m) * self.weights[i]
self.weights[i] -= self.learning_rate * dW
self.biases[i] -= self.learning_rate * db
# Train the network for a specified number of epochs
def train(self, X, Y, epochs):
for epoch in range(epochs):
activations = self.feedforward(X)
loss = self.compute_loss(Y, activations[-1])
self.backprop(X, Y, activations)
if epoch % 1000 == 0:
print(f"Epoch {epoch}, loss: {loss}")
# Predict function to output the class of each input
def predict(self, X):
activations = self.feedforward(X)
return np.argmax(activations[-1], axis=1)
# Save the model weights to a file
def save_weights(self, filename='model_weights95.pkl'):
with open(filename, 'wb') as f:
pickle.dump((self.weights, self.biases), f)
# Load the model weights from a file
def load_weights(self, filename='model_weights95.pkl'):
if os.path.exists(filename):
with open(filename, 'rb') as f:
self.weights, self.biases = pickle.load(f)
print("Model weights loaded.")
# Function to generate one-hot encoded data for training
def generate_data():
n = 95 # Number of characters
X = np.eye(n) # Input: Identity matrix representing one-hot encoded characters
Y = np.eye(n) # Output: Same as input
return X, Y
# Test all characters by predicting each one
def test_all_characters(nn, test_characters):
print("Testing all characters...")
for char in test_characters:
idx = test_characters.index(char)
prediction = nn.predict(np.eye(95)[idx].reshape(1, -1))
print(f"Input: {char},Predicted: {test_characters[prediction[0]]}")
if name == "main":
# Generate synthetic data
X, Y = generate_data()
python
Copy code
# Characters to test - assuming an ASCII subset
test_characters = [chr(i) for i in range(32, 127)]
# Initialize the network
nn = DeepNeuralNetwork(layers=[95, 128, 64, 95], learning_rate=0.01, lambda_reg=0.001)
# Train the network
print("Training the network...")
nn.train(X, Y, epochs=5000)
# Test the network
test_all_characters(nn, test_characters)
# Save the model weights
nn.save_weights()
# Optionally, load the model weights (for demonstration)
# nn.load_weights()
# thats all
#Congratulations you have learned the basics