diff --git a/NN/nnoutput.m b/NN/nnoutput.m new file mode 100644 index 0000000..06e8a2d --- /dev/null +++ b/NN/nnoutput.m @@ -0,0 +1,6 @@ +function predictions = nnoutput(nn, x) + nn.testing = 1; + nn = nnff(nn, x, zeros(size(x,1), nn.size(end))); + nn.testing = 0; + predictions = nn.a{end}; +end diff --git a/NN/nnpredict.m b/NN/nnpredict.m index 9106d4c..5079649 100644 --- a/NN/nnpredict.m +++ b/NN/nnpredict.m @@ -1,8 +1,7 @@ -function labels = nnpredict(nn, x) +function predictions = nnpredict(nn, x) nn.testing = 1; nn = nnff(nn, x, zeros(size(x,1), nn.size(end))); nn.testing = 0; - [dummy, i] = max(nn.a{end},[],2); - labels = i; + predictions = i; end diff --git a/README.md b/README.md index be277e4..a2ade93 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ +[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/rasmusbergpalm/deeplearntoolbox/trend.png)](https://bitdeli.com/free "Bitdeli Badge") + DeepLearnToolbox ================ @@ -164,7 +166,9 @@ test_y = double(test_y'); %% ex1 Train a 6c-2s-12c-2s Convolutional neural network %will run 1 epoch in about 200 second and get around 11% error. %With 100 epochs you'll get around 1.2% error + rand('state',0) + cnn.layers = { struct('type', 'i') %input layer struct('type', 'c', 'outputmaps', 6, 'kernelsize', 5) %convolution layer @@ -172,19 +176,19 @@ cnn.layers = { struct('type', 'c', 'outputmaps', 12, 'kernelsize', 5) %convolution layer struct('type', 's', 'scale', 2) %subsampling layer }; -cnn = cnnsetup(cnn, train_x, train_y); + opts.alpha = 1; opts.batchsize = 50; opts.numepochs = 1; +cnn = cnnsetup(cnn, train_x, train_y); cnn = cnntrain(cnn, train_x, train_y, opts); [er, bad] = cnntest(cnn, test_x, test_y); %plot mean squared error figure; plot(cnn.rL); - assert(er<0.12, 'Too big error'); ``` @@ -289,10 +293,45 @@ nn = nntrain(nn, tx, ty, opts, vx, vy); % nntrain takes validati [er, bad] = nntest(nn, test_x, test_y); assert(er < 0.1, 'Too big error'); -``` +%% ex7 neural net for regression problem +rand('state',0); +randn('state',0); +% Some invented nonlinear relationships to get 3 noisy output targets +% 1000 records with 10 features +all_x = randn(20000, 10); +all_y = randn(20000, 3) * 0.01; +all_y(:,1) += sum( all_x(:,1:5) .* all_x(:, 3:7), 2 ); +all_y(:,2) += sum( all_x(:,5:9) .* all_x(:, 4:8) .* all_x(:, 2:6), 2 ); +all_y(:,3) += log( sum( all_x(:,4:8) .* all_x(:,4:8), 2 ) ) * 3.0; +train_x = all_x(1:19000,:); +train_y = all_y(1:19000,:); +test_x = all_x(19001:20000,:); +test_y = all_y(19001:20000,:); + +% the constructed data is already normalized, but this is usually best practice: +[train_x, mu, sigma] = zscore(train_x); +test_x = normalize(test_x, mu, sigma); + +%% ex7 network setup +nn = nnsetup([10 50 50 3]); +nn.activation_function = 'tanh_opt'; % tanh_opt activation function +nn.output = 'linear'; % linear is usual choice for regression problems +nn.learningRate = 0.001; % Linear output can be sensitive to learning rate +nn.momentum = 0.95; + +opts.numepochs = 20; % Number of full sweeps through data +opts.batchsize = 100; % Take a mean gradient step over this many samples +[nn, L] = nntrain(nn, train_x, train_y, opts); + +% nnoutput calculates the predicted regression values +predictions = nnoutput( nn, test_x ); + +[er, bad] = nntest(nn, test_x, test_y); +assert(er < 1.5, 'Too big error'); + +``` -[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/rasmusbergpalm/deeplearntoolbox/trend.png)](https://bitdeli.com/free "Bitdeli Badge") diff --git a/tests/test_example_NN.m b/tests/test_example_NN.m index c254ee7..93392f8 100644 --- a/tests/test_example_NN.m +++ b/tests/test_example_NN.m @@ -92,3 +92,42 @@ [er, bad] = nntest(nn, test_x, test_y); assert(er < 0.1, 'Too big error'); + +%% ex7 neural net for regression problem +rand('state',0); +randn('state',0); + +% Some invented nonlinear relationships to get 3 noisy output targets +% 1000 records with 10 features +all_x = randn(20000, 10); +all_y = randn(20000, 3) * 0.01; +all_y(:,1) += sum( all_x(:,1:5) .* all_x(:, 3:7), 2 ); +all_y(:,2) += sum( all_x(:,5:9) .* all_x(:, 4:8) .* all_x(:, 2:6), 2 ); +all_y(:,3) += log( sum( all_x(:,4:8) .* all_x(:,4:8), 2 ) ) * 3.0; + +train_x = all_x(1:19000,:); +train_y = all_y(1:19000,:); + +test_x = all_x(19001:20000,:); +test_y = all_y(19001:20000,:); + +% the constructed data is already normalized, but this is usually best practice: +[train_x, mu, sigma] = zscore(train_x); +test_x = normalize(test_x, mu, sigma); + +%% ex7 network setup +nn = nnsetup([10 50 50 3]); +nn.activation_function = 'tanh_opt'; % tanh_opt activation function +nn.output = 'linear'; % linear is usual choice for regression problems +nn.learningRate = 0.001; % Linear output can be sensitive to learning rate +nn.momentum = 0.95; + +opts.numepochs = 20; % Number of full sweeps through data +opts.batchsize = 100; % Take a mean gradient step over this many samples +[nn, L] = nntrain(nn, train_x, train_y, opts); + +% nnoutput calculates the predicted regression values +predictions = nnoutput( nn, test_x ); + +[er, bad] = nntest(nn, test_x, test_y); +assert(er < 1.5, 'Too big error');