Skip to content

Commit

Permalink
SdA.java
Browse files Browse the repository at this point in the history
  • Loading branch information
Yusuke Sugomori committed Mar 21, 2013
1 parent 6d6b412 commit ff41d30
Show file tree
Hide file tree
Showing 4 changed files with 526 additions and 0 deletions.
74 changes: 74 additions & 0 deletions java/SdA/src/HiddenLayer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import java.util.Random;

public class HiddenLayer {
public int N;
public int n_in;
public int n_out;
public double[][] W;
public double[] b;
public Random rng;

public double uniform(double min, double max) {
return rng.nextDouble() * (max - min) + min;
}

public int binomial(int n, double p) {
if(p < 0 || p > 1) return 0;

int c = 0;
double r;

for(int i=0; i<n; i++) {
r = rng.nextDouble();
if (r < p) c++;
}

return c;
}

public static double sigmoid(double x) {
return 1.0 / (1.0 + Math.pow(Math.E, -x));
}



public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
this.N = N;
this.n_in = n_in;
this.n_out = n_out;

if(rng == null) this.rng = new Random(1234);
else this.rng = rng;

if(W == null) {
this.W = new double[n_out][n_in];
double a = 1.0 / this.n_in;

for(int i=0; i<n_out; i++) {
for(int j=0; j<n_in; j++) {
this.W[i][j] = uniform(-a, a);
}
}
} else {
this.W = W;
}

if(b == null) this.b = new double[n_out];
else this.b = b;
}

public double output(int[] input, double[] w, double b) {
double linear_output = 0.0;
for(int j=0; j<n_in; j++) {
linear_output += w[j] * input[j];
}
linear_output += b;
return sigmoid(linear_output);
}

public void sample_h_given_v(int[] input, int[] sample) {
for(int i=0; i<n_out; i++) {
sample[i] = binomial(1, output(input, W[i], b[i]));
}
}
}
73 changes: 73 additions & 0 deletions java/SdA/src/LogisticRegression.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@

public class LogisticRegression {
public int N;
public int n_in;
public int n_out;
public double[][] W;
public double[] b;

public LogisticRegression(int N, int n_in, int n_out) {
this.N = N;
this.n_in = n_in;
this.n_out = n_out;

W = new double[this.n_out][this.n_in];
b = new double[this.n_out];
}

public void train(int[] x, int[] y, double lr) {
double[] p_y_given_x = new double[n_out];
double[] dy = new double[n_out];

for(int i=0; i<n_out; i++) {
p_y_given_x[i] = 0;
for(int j=0; j<n_in; j++) {
p_y_given_x[i] += W[i][j] * x[j];
}
p_y_given_x[i] += b[i];
}
softmax(p_y_given_x);

for(int i=0; i<n_out; i++) {
dy[i] = y[i] - p_y_given_x[i];

for(int j=0; j<n_in; j++) {
W[i][j] += lr * dy[i] * x[j] / N;
}

b[i] += lr * dy[i] / N;
}
}

public void softmax(double[] x) {
double max = 0.0;
double sum = 0.0;

for(int i=0; i<n_out; i++) {
if(max < x[i]) {
max = x[i];
}
}

for(int i=0; i<n_out; i++) {
x[i] = Math.exp(x[i] - max);
sum += x[i];
}

for(int i=0; i<n_out; i++) {
x[i] /= sum;
}
}

public void predict(int[] x, double[] y) {
for(int i=0; i<n_out; i++) {
y[i] = 0;
for(int j=0; j<n_in; j++) {
y[i] += W[i][j] * x[j];
}
y[i] += b[i];
}

softmax(y);
}
}
229 changes: 229 additions & 0 deletions java/SdA/src/SdA.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
import java.util.Random;

public class SdA {
public int N;
public int n_ins;
public int[] hidden_layer_sizes;
public int n_outs;
public int n_layers;
public HiddenLayer[] sigmoid_layers;
public dA[] dA_layers;
public LogisticRegression log_layer;
public Random rng;

public static double sigmoid(double x) {
return 1.0 / (1.0 + Math.pow(Math.E, -x));
}

public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
int input_size;

this.N = N;
this.n_ins = n_ins;
this.hidden_layer_sizes = hidden_layer_sizes;
this.n_outs = n_outs;
this.n_layers = n_layers;

this.sigmoid_layers = new HiddenLayer[n_layers];
this.dA_layers = new dA[n_layers];

if(rng == null) this.rng = new Random(1234);
else this.rng = rng;

// construct multi-layer
for(int i=0; i<this.n_layers; i++) {
if(i == 0) {
input_size = this.n_ins;
} else {
input_size = this.hidden_layer_sizes[i-1];
}

// construct sigmoid_layer
this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);

// construct dA_layer
this.dA_layers[i] = new dA(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
}

// layer for output using LogisticRegression
this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
}

public void pretrain(int[][] train_X, double lr, double corruption_level, int epochs) {
int[] layer_input = new int[0];
int prev_layer_input_size;
int[] prev_layer_input;

for(int i=0; i<n_layers; i++) { // layer-wise
for(int epoch=0; epoch<epochs; epoch++) { // training epochs
for(int n=0; n<N; n++) { // input x1...xN
// layer input
for(int l=0; l<=i; l++) {

if(l == 0) {
layer_input = new int[n_ins];
for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
} else {
if(l == 1) prev_layer_input_size = n_ins;
else prev_layer_input_size = hidden_layer_sizes[l-2];

prev_layer_input = new int[prev_layer_input_size];
for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];

layer_input = new int[hidden_layer_sizes[l-1]];

sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
}
}

dA_layers[i].train(layer_input, lr, corruption_level);
}
}
}
}

public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
int[] layer_input = new int[0];
// int prev_layer_input_size;
int[] prev_layer_input = new int[0];

for(int epoch=0; epoch<epochs; epoch++) {
for(int n=0; n<N; n++) {

// layer input
for(int i=0; i<n_layers; i++) {
if(i == 0) {
prev_layer_input = new int[n_ins];
for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
} else {
prev_layer_input = new int[hidden_layer_sizes[i-1]];
for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
}

layer_input = new int[hidden_layer_sizes[i]];
sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
}

log_layer.train(layer_input, train_Y[n], lr);
}
// lr *= 0.95;
}
}

public void predict(int[] x, double[] y) {
double[] layer_input = new double[0];
// int prev_layer_input_size;
double[] prev_layer_input = new double[n_ins];
for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];

double linear_output;


// layer activation
for(int i=0; i<n_layers; i++) {
layer_input = new double[sigmoid_layers[i].n_out];

linear_output = 0.0;
for(int k=0; k<sigmoid_layers[i].n_out; k++) {
for(int j=0; j<sigmoid_layers[i].n_in; j++) {
linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
}
linear_output += sigmoid_layers[i].b[k];
layer_input[k] = sigmoid(linear_output);
}

if(i < n_layers-1) {
prev_layer_input = new double[sigmoid_layers[i].n_out];
for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
}
}

for(int i=0; i<log_layer.n_out; i++) {
y[i] = 0;
for(int j=0; j<log_layer.n_in; j++) {
y[i] += log_layer.W[i][j] * layer_input[j];
}
y[i] += log_layer.b[i];
}

log_layer.softmax(y);
}


private static void test_sda() {
Random rng = new Random(123);

double pretrain_lr = 0.1;
double corruption_level = 0.3;
int pretraining_epochs = 1000;
double finetune_lr = 0.1;
int finetune_epochs = 500;

int train_N = 10;
int test_N = 4;
int n_ins = 28;
int n_outs = 2;
int[] hidden_layer_sizes = {15, 15};
int n_layers = hidden_layer_sizes.length;

// training data
int[][] train_X = {
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}
};

int[][] train_Y = {
{1, 0},
{1, 0},
{1, 0},
{1, 0},
{1, 0},
{0, 1},
{0, 1},
{0, 1},
{0, 1},
{0, 1}
};

// construct SdA
SdA sda = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);

// pretrain
sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs);

// finetune
sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs);


// test data
int[][] test_X = {
{1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}
};

double[][] test_Y = new double[test_N][n_outs];

// test
for(int i=0; i<test_N; i++) {
sda.predict(test_X[i], test_Y[i]);
for(int j=0; j<n_outs; j++) {
System.out.print(test_Y[i][j] + " ");
}
System.out.println();
}
}

public static void main(String[] args) {
test_sda();
}
}
Loading

0 comments on commit ff41d30

Please sign in to comment.