-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
29 changed files
with
3,773 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
function checkNNGradients(lambda) | ||
%CHECKNNGRADIENTS Creates a small neural network to check the | ||
%backpropagation gradients | ||
% CHECKNNGRADIENTS(lambda) Creates a small neural network to check the | ||
% backpropagation gradients, it will output the analytical gradients | ||
% produced by your backprop code and the numerical gradients (computed | ||
% using computeNumericalGradient). These two gradient computations should | ||
% result in very similar values. | ||
% | ||
|
||
if ~exist('lambda', 'var') || isempty(lambda) | ||
lambda = 0; | ||
end | ||
|
||
input_layer_size = 3; | ||
hidden_layer_size = 5; | ||
num_labels = 3; | ||
m = 5; | ||
|
||
% We generate some 'random' test data | ||
Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size); | ||
Theta2 = debugInitializeWeights(num_labels, hidden_layer_size); | ||
% Reusing debugInitializeWeights to generate X | ||
X = debugInitializeWeights(m, input_layer_size - 1); | ||
y = 1 + mod(1:m, num_labels)'; | ||
|
||
% Unroll parameters | ||
nn_params = [Theta1(:) ; Theta2(:)]; | ||
|
||
% Short hand for cost function | ||
costFunc = @(p) nnCostFunction(p, input_layer_size, hidden_layer_size, ... | ||
num_labels, X, y, lambda); | ||
|
||
[cost, grad] = costFunc(nn_params); | ||
numgrad = computeNumericalGradient(costFunc, nn_params); | ||
|
||
% Visually examine the two gradient computations. The two columns | ||
% you get should be very similar. | ||
disp([numgrad grad]); | ||
fprintf(['The above two columns you get should be very similar.\n' ... | ||
'(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']); | ||
|
||
% Evaluate the norm of the difference between two solutions. | ||
% If you have a correct implementation, and assuming you used EPSILON = 0.0001 | ||
% in computeNumericalGradient.m, then diff below should be less than 1e-9 | ||
diff = norm(numgrad-grad)/norm(numgrad+grad); | ||
|
||
fprintf(['If your backpropagation implementation is correct, then \n' ... | ||
'the relative difference will be small (less than 1e-9). \n' ... | ||
'\nRelative Difference: %g\n'], diff); | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
function numgrad = computeNumericalGradient(J, theta) | ||
%COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" | ||
%and gives us a numerical estimate of the gradient. | ||
% numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical | ||
% gradient of the function J around theta. Calling y = J(theta) should | ||
% return the function value at theta. | ||
|
||
% Notes: The following code implements numerical gradient checking, and | ||
% returns the numerical gradient.It sets numgrad(i) to (a numerical | ||
% approximation of) the partial derivative of J with respect to the | ||
% i-th input argument, evaluated at theta. (i.e., numgrad(i) should | ||
% be the (approximately) the partial derivative of J with respect | ||
% to theta(i).) | ||
% | ||
|
||
numgrad = zeros(size(theta)); | ||
perturb = zeros(size(theta)); | ||
e = 1e-4; | ||
for p = 1:numel(theta) | ||
% Set perturbation vector | ||
perturb(p) = e; | ||
loss1 = J(theta - perturb); | ||
loss2 = J(theta + perturb); | ||
% Compute Numerical Gradient | ||
numgrad(p) = (loss2 - loss1) / (2*e); | ||
perturb(p) = 0; | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
function W = debugInitializeWeights(fan_out, fan_in) | ||
%DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in | ||
%incoming connections and fan_out outgoing connections using a fixed | ||
%strategy, this will help you later in debugging | ||
% W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights | ||
% of a layer with fan_in incoming connections and fan_out outgoing | ||
% connections using a fix set of values | ||
% | ||
% Note that W should be set to a matrix of size(1 + fan_in, fan_out) as | ||
% the first row of W handles the "bias" terms | ||
% | ||
|
||
% Set W to zeros | ||
W = zeros(fan_out, 1 + fan_in); | ||
|
||
% Initialize W using "sin", this ensures that W is always of the same | ||
% values and will be useful for debugging | ||
W = reshape(sin(1:numel(W)), size(W)) / 10; | ||
|
||
% ========================================================================= | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
function [h, display_array] = displayData(X, example_width) | ||
%DISPLAYDATA Display 2D data in a nice grid | ||
% [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data | ||
% stored in X in a nice grid. It returns the figure handle h and the | ||
% displayed array if requested. | ||
|
||
% Set example_width automatically if not passed in | ||
if ~exist('example_width', 'var') || isempty(example_width) | ||
example_width = round(sqrt(size(X, 2))); | ||
end | ||
|
||
% Gray Image | ||
colormap(gray); | ||
|
||
% Compute rows, cols | ||
[m n] = size(X); | ||
example_height = (n / example_width); | ||
|
||
% Compute number of items to display | ||
display_rows = floor(sqrt(m)); | ||
display_cols = ceil(m / display_rows); | ||
|
||
% Between images padding | ||
pad = 1; | ||
|
||
% Setup blank display | ||
display_array = - ones(pad + display_rows * (example_height + pad), ... | ||
pad + display_cols * (example_width + pad)); | ||
|
||
% Copy each example into a patch on the display array | ||
curr_ex = 1; | ||
for j = 1:display_rows | ||
for i = 1:display_cols | ||
if curr_ex > m, | ||
break; | ||
end | ||
% Copy the patch | ||
|
||
% Get the max value of the patch | ||
max_val = max(abs(X(curr_ex, :))); | ||
display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... | ||
pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... | ||
reshape(X(curr_ex, :), example_height, example_width) / max_val; | ||
curr_ex = curr_ex + 1; | ||
end | ||
if curr_ex > m, | ||
break; | ||
end | ||
end | ||
|
||
% Display Image | ||
h = imagesc(display_array, [-1 1]); | ||
|
||
% Do not show axis | ||
axis image off | ||
|
||
drawnow; | ||
|
||
end |
Oops, something went wrong.