-
Notifications
You must be signed in to change notification settings - Fork 0
/
cnn_scratch.py
239 lines (190 loc) · 7.84 KB
/
cnn_scratch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
import numpy as np
class Conv2D:
def __init__(self, num_filters, filter_size):
"""
Initialize the convolutional layer with random weights.
Parameters:
num_filters (int): Number of filters (or kernels)
filter_size (int): Size of the square filter (filter_size x filter_size)
"""
self.num_filters = num_filters
self.filter_size = filter_size
self.filters = np.random.randn(num_filters, filter_size, filter_size) * 0.01 # Random weights
self.biases = np.zeros(num_filters) # Biases for each filter
def forward(self, X):
"""
Forward pass through the convolutional layer.
Parameters:
X (array): Input data of shape (batch_size, height, width, channels)
Returns:
out (array): Output data after convolution
"""
self.X = X
batch_size, height, width, channels = X.shape
out_height = height - self.filter_size + 1
out_width = width - self.filter_size + 1
self.out = np.zeros((batch_size, out_height, out_width, self.num_filters))
# Apply each filter to the input
for i in range(self.num_filters):
filter = self.filters[i]
bias = self.biases[i]
for b in range(batch_size):
for h in range(out_height):
for w in range(out_width):
self.out[b, h, w, i] = np.sum(X[b, h:h+self.filter_size, w:w+self.filter_size] * filter) + bias
return self.out
def backward(self, d_out, learning_rate=0.01):
"""
Backward pass through the convolutional layer to compute gradients.
Parameters:
d_out (array): Gradient of the loss w.r.t. the output
learning_rate (float): Learning rate for weight updates
"""
batch_size, out_height, out_width, _ = d_out.shape
d_filters = np.zeros_like(self.filters)
d_biases = np.zeros_like(self.biases)
d_X = np.zeros_like(self.X)
for i in range(self.num_filters):
filter = self.filters[i]
d_filter = np.zeros_like(filter)
d_bias = 0
for b in range(batch_size):
for h in range(out_height):
for w in range(out_width):
d_filter += d_out[b, h, w, i] * self.X[b, h:h+self.filter_size, w:w+self.filter_size]
d_bias += d_out[b, h, w, i]
d_X[b, h:h+self.filter_size, w:w+self.filter_size] += d_out[b, h, w, i] * filter
d_filters[i] = d_filter
d_biases[i] = d_bias
self.filters -= learning_rate * d_filters
self.biases -= learning_rate * d_biases
return d_X
class ReLU:
def forward(self, X):
"""
Forward pass through the ReLU activation function.
Parameters:
X (array): Input data
Returns:
out (array): Output data after ReLU activation
"""
self.X = X
return np.maximum(0, X)
def backward(self, d_out):
"""
Backward pass through the ReLU activation function.
Parameters:
d_out (array): Gradient of the loss w.r.t. the output
Returns:
d_X (array): Gradient of the loss w.r.t. the input
"""
return d_out * (self.X > 0)
class MaxPooling:
def __init__(self, pool_size):
"""
Initialize the max pooling layer.
Parameters:
pool_size (int): Size of the square pooling window
"""
self.pool_size = pool_size
def forward(self, X):
"""
Forward pass through the max pooling layer.
Parameters:
X (array): Input data of shape (batch_size, height, width, channels)
Returns:
out (array): Output data after max pooling
"""
self.X = X
batch_size, height, width, channels = X.shape
out_height = height // self.pool_size
out_width = width // self.pool_size
self.out = np.zeros((batch_size, out_height, out_width, channels))
for b in range(batch_size):
for h in range(out_height):
for w in range(out_width):
self.out[b, h, w, :] = np.max(X[b, h*self.pool_size:(h+1)*self.pool_size, w*self.pool_size:(w+1)*self.pool_size, :], axis=(0, 1))
return self.out
def backward(self, d_out):
"""
Backward pass through the max pooling layer to compute gradients.
Parameters:
d_out (array): Gradient of the loss w.r.t. the output
Returns:
d_X (array): Gradient of the loss w.r.t. the input
"""
batch_size, out_height, out_width, channels = d_out.shape
d_X = np.zeros_like(self.X)
for b in range(batch_size):
for h in range(out_height):
for w in range(out_width):
pool_region = self.X[b, h*self.pool_size:(h+1)*self.pool_size, w*self.pool_size:(w+1)*self.pool_size, :]
max_mask = (pool_region == np.max(pool_region, axis=(0, 1), keepdims=True))
d_X[b, h*self.pool_size:(h+1)*self.pool_size, w*self.pool_size:(w+1)*self.pool_size, :] += d_out[b, h, w, :] * max_mask
return d_X
class FullyConnected:
def __init__(self, input_dim, output_dim):
"""
Initialize the fully connected layer with random weights.
Parameters:
input_dim (int): Number of input features
output_dim (int): Number of output features
"""
self.weights = np.random.randn(input_dim, output_dim) * 0.01 # Random weights
self.biases = np.zeros(output_dim) # Biases
def forward(self, X):
"""
Forward pass through the fully connected layer.
Parameters:
X (array): Input data of shape (batch_size, input_dim)
Returns:
out (array): Output data after the fully connected layer
"""
self.X = X
return np.dot(X, self.weights) + self.biases
def backward(self, d_out, learning_rate=0.01):
"""
Backward pass through the fully connected layer to compute gradients.
Parameters:
d_out (array): Gradient of the loss w.r.t. the output
learning_rate (float): Learning rate for weight updates
Returns:
d_X (array): Gradient of the loss w.r.t. the input
"""
d_weights = np.dot(self.X.T, d_out)
d_biases = np.sum(d_out, axis=0)
d_X = np.dot(d_out, self.weights.T)
self.weights -= learning_rate * d_weights
self.biases -= learning_rate * d_biases
return d_X
# Example Usage
if __name__ == "__main__":
# Hyperparameters
input_height, input_width = 28, 28 # MNIST image size
num_filters = 8
filter_size = 3
pool_size = 2
num_fc_units = 10 # Number of classes in MNIST (digits 0-9)
learning_rate = 0.01
# Create layers
conv = Conv2D(num_filters, filter_size)
relu = ReLU()
pool = MaxPooling(pool_size)
fc = FullyConnected((input_height // pool_size) * (input_width // pool_size) * num_filters, num_fc_units)
# Create a synthetic dataset (batch_size, height, width, channels)
X = np.random.randn(10, input_height, input_width, 1) # Batch of 10 images
y = np.random.randint(0, num_fc_units, size=(10,)) # Random labels
# Forward pass
out = conv.forward(X)
out = relu.forward(out)
out = pool.forward(out)
out = out.reshape(out.shape[0], -1) # Flatten
out = fc.forward(out)
print("Output shape after fully connected layer:", out.shape)
# Assuming a dummy loss gradient for demonstration
d_out = np.random.randn(*out.shape)
d_out = fc.backward(d_out, learning_rate)
d_out = d_out.reshape(X.shape[0], input_height // pool_size, input_width // pool_size, num_filters)
d_out = pool.backward(d_out)
d_out = relu.backward(d_out)
d_out = conv.backward(d_out, learning_rate)