diff --git a/NN/nn_online_step_train.m b/NN/nn_online_step_train.m new file mode 100644 index 0000000..816ef61 --- /dev/null +++ b/NN/nn_online_step_train.m @@ -0,0 +1,39 @@ +function nn = nn_online_step_train(nn, sample_x, sample_y, opts) +%NN_ONLINE_STEP_TRAIN online step trains a neural net +% nn = nn_online_step_train(nn, train_x, train_y, opts) trains the neural network nn online +% with input sample_x and output sample_y +% and this can be done with minibatches of size opts.batchsize. +% Returns a neural network nn with online step updated activations, +% weights and biases, (nn.a, nn.W, nn.b) + +assert(nargin == 4,'number ofinput arguments must be 4 ') + +m = size(sample_y, 1); + +batchsize = opts.batchsize; + +numbatches = m / batchsize; + +assert(rem(numbatches, 1) == 0, 'numbatches must be a integer'); + +kk = randperm(m); +for l = 1 : numbatches + batch_x = sample_x(kk((l - 1) * batchsize + 1 : l * batchsize), :); + + %Add noise to input (for use in denoising autoencoder) + if(nn.inputZeroMaskedFraction ~= 0) + batch_x = batch_x.*(rand(size(batch_x))>nn.inputZeroMaskedFraction); + end + + batch_y = sample_y(kk((l - 1) * batchsize + 1 : l * batchsize), :); + + nn = nnff(nn, batch_x, batch_y); + nn = nnbp(nn); + nn = nnapplygrads(nn); +end + +nn.learningRate = nn.learningRate * nn.scaling_learningRate; + + + + diff --git a/NN/nnbp.m b/NN/nnbp.m index 1217719..245ec9b 100644 --- a/NN/nnbp.m +++ b/NN/nnbp.m @@ -17,6 +17,10 @@ d_act = nn.a{i} .* (1 - nn.a{i}); case 'tanh_opt' d_act = 1.7159 * 2/3 * (1 - 1/(1.7159)^2 * nn.a{i}.^2); + case 'relu' + d_act = nn.a{i}; + d_act(d_act>0) = 1; + d_act(d_act<=0) = nn.neg_slope;% end if(nn.nonSparsityPenalty>0) diff --git a/NN/nnff.m b/NN/nnff.m index 32a2c69..60fcfac 100644 --- a/NN/nnff.m +++ b/NN/nnff.m @@ -17,6 +17,8 @@ nn.a{i} = sigm(nn.a{i - 1} * nn.W{i - 1}'); case 'tanh_opt' nn.a{i} = tanh_opt(nn.a{i - 1} * nn.W{i - 1}'); + case 'relu' + nn.a{i} = relu(nn.a{i - 1} * nn.W{i - 1}',nn); end %dropout diff --git a/NN/nnsetup.m b/NN/nnsetup.m index b8ec742..0018749 100644 --- a/NN/nnsetup.m +++ b/NN/nnsetup.m @@ -6,7 +6,8 @@ nn.size = architecture; nn.n = numel(nn.size); - nn.activation_function = 'tanh_opt'; % Activation functions of hidden layers: 'sigm' (sigmoid) or 'tanh_opt' (optimal tanh). + nn.activation_function = 'tanh_opt'; % Activation functions of hidden layers: 'sigm' (sigmoid) or 'tanh_opt' (optimal tanh) or 'relu'. + nn.neg_slope = 0; % unit for relu nn.learningRate = 2; % learning rate Note: typically needs to be lower when using 'sigm' activation function and non-normalized inputs. nn.momentum = 0.5; % Momentum nn.scaling_learningRate = 1; % Scaling factor for the learning rate (each epoch) diff --git a/README.md b/README.md index be277e4..aa433da 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ Setup ----- 1. Download. -2. addpath(genpath('DeepLearnToolbox')); +2. run dltb_setup.m Known errors ------------------------------ diff --git a/dltb_setup.m b/dltb_setup.m new file mode 100644 index 0000000..b35b56f --- /dev/null +++ b/dltb_setup.m @@ -0,0 +1,7 @@ +% deepLearningToolBox setup +clc +clear all +global DLTB_HOMEDIR +DLTB_HOMEDIR = pwd; + +addpath(genpath(DLTB_HOMEDIR)); diff --git a/util/relu.m b/util/relu.m new file mode 100644 index 0000000..d0591a5 --- /dev/null +++ b/util/relu.m @@ -0,0 +1,9 @@ +%relu +function out = relu(X,nn) +% Params: +% X - input val +if isempty(nn.neg_slope) + nn.neg_slope = 0; +end +out = max(X,0) + nn.neg_slope *X; +end