-
Notifications
You must be signed in to change notification settings - Fork 0
/
gradient_check.py
125 lines (100 loc) · 3.78 KB
/
gradient_check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import numpy as np
from random import randrange
def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
"""
a naive implementation of numerical gradient of f at x
- f should be a function that takes a single argument
- x is the point (numpy array) to evaluate the gradient at
"""
fx = f(x) # evaluate function value at original point
grad = np.zeros_like(x)
# iterate over all indexes in x
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
# evaluate function at x+h
ix = it.multi_index
oldval = x[ix]
x[ix] = oldval + h # increment by h
fxph = f(x) # evalute f(x + h)
x[ix] = oldval - h
fxmh = f(x) # evaluate f(x - h)
x[ix] = oldval # restore
# compute the partial derivative with centered formula
grad[ix] = (fxph - fxmh) / (2 * h) # the slope
if verbose:
print(ix, grad[ix])
it.iternext() # step to next dimension
return grad
def eval_numerical_gradient_array(f, x, df, h=1e-5):
"""
Evaluate a numeric gradient for a function that accepts a numpy
array and returns a numpy array.
"""
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
ix = it.multi_index
oldval = x[ix]
x[ix] = oldval + h
pos = f(x).copy()
x[ix] = oldval - h
neg = f(x).copy()
x[ix] = oldval
grad[ix] = np.sum((pos - neg) * df) / (2 * h)
it.iternext()
return grad
def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
"""
Compute numeric gradients for a function that operates on input
and output blobs.
We assume that f accepts several input blobs as arguments, followed by a
blob where outputs will be written. For example, f might be called like:
f(x, w, out)
where x and w are input Blobs, and the result of f will be written to out.
Inputs:
- f: function
- inputs: tuple of input blobs
- output: output blob
- h: step size
"""
numeric_diffs = []
for input_blob in inputs:
diff = np.zeros_like(input_blob.diffs)
it = np.nditer(input_blob.vals, flags=['multi_index'],
op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
orig = input_blob.vals[idx]
input_blob.vals[idx] = orig + h
f(*(inputs + (output,)))
pos = np.copy(output.vals)
input_blob.vals[idx] = orig - h
f(*(inputs + (output,)))
neg = np.copy(output.vals)
input_blob.vals[idx] = orig
diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
it.iternext()
numeric_diffs.append(diff)
return numeric_diffs
def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
return eval_numerical_gradient_blobs(lambda *args: net.forward(),
inputs, output, h=h)
def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
"""
sample a few random elements and only return numerical
in this dimensions.
"""
for i in range(num_checks):
ix = tuple([randrange(m) for m in x.shape])
oldval = x[ix]
x[ix] = oldval + h # increment by h
fxph = f(x) # evaluate f(x + h)
x[ix] = oldval - h # increment by h
fxmh = f(x) # evaluate f(x - h)
x[ix] = oldval # reset
grad_numerical = (fxph - fxmh) / (2 * h)
grad_analytic = analytic_grad[ix]
rel_error = (abs(grad_numerical - grad_analytic) /
(abs(grad_numerical) + abs(grad_analytic)))
print('numerical: %f analytic: %f, relative error: %e'
%(grad_numerical, grad_analytic, rel_error))