diff --git a/NN/nn_online_step_train.m b/NN/nn_online_step_train.m
new file mode 100644
index 0000000..816ef61
--- /dev/null
+++ b/NN/nn_online_step_train.m
@@ -0,0 +1,39 @@
+function nn = nn_online_step_train(nn, sample_x, sample_y, opts)
+%NN_ONLINE_STEP_TRAIN  online step trains a neural net
+% nn = nn_online_step_train(nn, train_x, train_y, opts) trains the neural network nn online
+% with input sample_x and output sample_y
+% and this can be done with minibatches of size opts.batchsize.
+% Returns a neural network nn with online step updated activations,
+% weights and biases, (nn.a, nn.W, nn.b)
+
+assert(nargin == 4,'number ofinput arguments must be 4 ')
+
+m = size(sample_y, 1);
+
+batchsize = opts.batchsize;
+
+numbatches = m / batchsize;
+
+assert(rem(numbatches, 1) == 0, 'numbatches must be a integer');
+
+kk = randperm(m);
+for l = 1 : numbatches
+    batch_x = sample_x(kk((l - 1) * batchsize + 1 : l * batchsize), :);
+    
+    %Add noise to input (for use in denoising autoencoder)
+    if(nn.inputZeroMaskedFraction ~= 0)
+        batch_x = batch_x.*(rand(size(batch_x))>nn.inputZeroMaskedFraction);
+    end
+    
+    batch_y = sample_y(kk((l - 1) * batchsize + 1 : l * batchsize), :);
+    
+    nn = nnff(nn, batch_x, batch_y);
+    nn = nnbp(nn);
+    nn = nnapplygrads(nn);
+end
+
+nn.learningRate = nn.learningRate * nn.scaling_learningRate;
+
+
+
+
diff --git a/NN/nnbp.m b/NN/nnbp.m
index 1217719..245ec9b 100644
--- a/NN/nnbp.m
+++ b/NN/nnbp.m
@@ -17,6 +17,10 @@
                 d_act = nn.a{i} .* (1 - nn.a{i});
             case 'tanh_opt'
                 d_act = 1.7159 * 2/3 * (1 - 1/(1.7159)^2 * nn.a{i}.^2);
+            case 'relu'
+                d_act = nn.a{i};
+                d_act(d_act>0) = 1;
+                d_act(d_act<=0) = nn.neg_slope;% 
         end
         
         if(nn.nonSparsityPenalty>0)
diff --git a/NN/nnff.m b/NN/nnff.m
index 32a2c69..60fcfac 100644
--- a/NN/nnff.m
+++ b/NN/nnff.m
@@ -17,6 +17,8 @@
                 nn.a{i} = sigm(nn.a{i - 1} * nn.W{i - 1}');
             case 'tanh_opt'
                 nn.a{i} = tanh_opt(nn.a{i - 1} * nn.W{i - 1}');
+            case 'relu'
+                nn.a{i} = relu(nn.a{i - 1} * nn.W{i - 1}',nn);
         end
         
         %dropout
diff --git a/NN/nnsetup.m b/NN/nnsetup.m
index b8ec742..0018749 100644
--- a/NN/nnsetup.m
+++ b/NN/nnsetup.m
@@ -6,7 +6,8 @@
     nn.size   = architecture;
     nn.n      = numel(nn.size);
     
-    nn.activation_function              = 'tanh_opt';   %  Activation functions of hidden layers: 'sigm' (sigmoid) or 'tanh_opt' (optimal tanh).
+    nn.activation_function              = 'tanh_opt';   %  Activation functions of hidden layers: 'sigm' (sigmoid) or 'tanh_opt' (optimal tanh) or 'relu'.
+    nn.neg_slope                        = 0;            % unit for relu
     nn.learningRate                     = 2;            %  learning rate Note: typically needs to be lower when using 'sigm' activation function and non-normalized inputs.
     nn.momentum                         = 0.5;          %  Momentum
     nn.scaling_learningRate             = 1;            %  Scaling factor for the learning rate (each epoch)
diff --git a/README.md b/README.md
index be277e4..aa433da 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ Setup
 -----
 
 1. Download.
-2. addpath(genpath('DeepLearnToolbox'));
+2. run dltb_setup.m
 
 Known errors
 ------------------------------
diff --git a/dltb_setup.m b/dltb_setup.m
new file mode 100644
index 0000000..b35b56f
--- /dev/null
+++ b/dltb_setup.m
@@ -0,0 +1,7 @@
+% deepLearningToolBox setup
+clc
+clear all
+global DLTB_HOMEDIR
+DLTB_HOMEDIR = pwd;
+
+addpath(genpath(DLTB_HOMEDIR));
diff --git a/util/relu.m b/util/relu.m
new file mode 100644
index 0000000..d0591a5
--- /dev/null
+++ b/util/relu.m
@@ -0,0 +1,9 @@
+%relu
+function out = relu(X,nn)
+% Params:
+% X - input val
+if isempty(nn.neg_slope)
+    nn.neg_slope = 0;
+end
+out = max(X,0) + nn.neg_slope *X;
+end