-
Notifications
You must be signed in to change notification settings - Fork 15
/
c4_neural.py
70 lines (48 loc) · 2.32 KB
/
c4_neural.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python
import numpy as np
import random
from c1_softmax import softmax
from c3_sigmoid import sigmoid, sigmoid_grad
from c2_gradcheck import gradcheck_naive
def forward_backward_prop(data, labels, params, dimensions):
# Distribute elements in params to W1 b1 W2 b2
ofs = 0
Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H)) #W1.shape = (10, 5)
ofs += Dx * H
b1 = np.reshape(params[ofs:ofs + H], (1, H)) #b1.shape = (1, 5)
ofs += H
W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy)) #W2.shape = (5, 10)
ofs += H * Dy
b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy)) #b2.shape = (1, 10)
# Forward Propagation
hidden = sigmoid(data.dot(W1) + b1) #hidden.shape = (20, 5)
prediction = softmax(hidden.dot(W2) + b2) #prediction.shape = (20, 10)
cost = -np.sum(np.log(prediction) * labels) #np.log(prediction) * labels remains relative class value
# Backward Propagation
delta = prediction - labels
gradW2 = hidden.T.dot(delta)
gradb2 = np.sum(delta, axis = 0)
delta = delta.dot(W2.T) * sigmoid_grad(hidden)
gradW1 = data.T.dot(delta)
gradb1 = np.sum(delta, axis = 0)
grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), #grad.shape = (115,) concatenate 4 gradients together
gradW2.flatten(), gradb2.flatten()))
# f: 1st parameter is cost function, 2nd parameter is gradient
# gradient is of params
return cost, grad
def sanity_check():
print "Running sanity check..."
N = 20
dimensions = [10, 5, 10] #Dimension of x, H, y=labels
data = np.random.randn(N, dimensions[0]) #data.shape, labels.shape = (20, 10)
labels = np.zeros((N, dimensions[2])) #Data has 10 columns = 10 input x vectors
for i in xrange(N): #Data has 20 rows = each x has 20 features
labels[i, random.randint(0, dimensions[2]-1)] = 1 #each row randomly set a position to 1
#each vector xi belongs to 1 class
params = np.random.randn((dimensions[0]+1) * dimensions[1] +
(dimensions[1]+1) * dimensions[2], ) #params.shape = (115,)
# For each element in params, compare derivative calculated by calculus and BP-NN
gradcheck_naive(lambda params: forward_backward_prop(data, labels, params, dimensions), params)
if __name__ == "__main__":
sanity_check()