forked from yusugomori/DeepLearning
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Yusuke Sugomori
committed
Mar 21, 2013
1 parent
6d6b412
commit ff41d30
Showing
4 changed files
with
526 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import java.util.Random; | ||
|
||
public class HiddenLayer { | ||
public int N; | ||
public int n_in; | ||
public int n_out; | ||
public double[][] W; | ||
public double[] b; | ||
public Random rng; | ||
|
||
public double uniform(double min, double max) { | ||
return rng.nextDouble() * (max - min) + min; | ||
} | ||
|
||
public int binomial(int n, double p) { | ||
if(p < 0 || p > 1) return 0; | ||
|
||
int c = 0; | ||
double r; | ||
|
||
for(int i=0; i<n; i++) { | ||
r = rng.nextDouble(); | ||
if (r < p) c++; | ||
} | ||
|
||
return c; | ||
} | ||
|
||
public static double sigmoid(double x) { | ||
return 1.0 / (1.0 + Math.pow(Math.E, -x)); | ||
} | ||
|
||
|
||
|
||
public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) { | ||
this.N = N; | ||
this.n_in = n_in; | ||
this.n_out = n_out; | ||
|
||
if(rng == null) this.rng = new Random(1234); | ||
else this.rng = rng; | ||
|
||
if(W == null) { | ||
this.W = new double[n_out][n_in]; | ||
double a = 1.0 / this.n_in; | ||
|
||
for(int i=0; i<n_out; i++) { | ||
for(int j=0; j<n_in; j++) { | ||
this.W[i][j] = uniform(-a, a); | ||
} | ||
} | ||
} else { | ||
this.W = W; | ||
} | ||
|
||
if(b == null) this.b = new double[n_out]; | ||
else this.b = b; | ||
} | ||
|
||
public double output(int[] input, double[] w, double b) { | ||
double linear_output = 0.0; | ||
for(int j=0; j<n_in; j++) { | ||
linear_output += w[j] * input[j]; | ||
} | ||
linear_output += b; | ||
return sigmoid(linear_output); | ||
} | ||
|
||
public void sample_h_given_v(int[] input, int[] sample) { | ||
for(int i=0; i<n_out; i++) { | ||
sample[i] = binomial(1, output(input, W[i], b[i])); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
|
||
public class LogisticRegression { | ||
public int N; | ||
public int n_in; | ||
public int n_out; | ||
public double[][] W; | ||
public double[] b; | ||
|
||
public LogisticRegression(int N, int n_in, int n_out) { | ||
this.N = N; | ||
this.n_in = n_in; | ||
this.n_out = n_out; | ||
|
||
W = new double[this.n_out][this.n_in]; | ||
b = new double[this.n_out]; | ||
} | ||
|
||
public void train(int[] x, int[] y, double lr) { | ||
double[] p_y_given_x = new double[n_out]; | ||
double[] dy = new double[n_out]; | ||
|
||
for(int i=0; i<n_out; i++) { | ||
p_y_given_x[i] = 0; | ||
for(int j=0; j<n_in; j++) { | ||
p_y_given_x[i] += W[i][j] * x[j]; | ||
} | ||
p_y_given_x[i] += b[i]; | ||
} | ||
softmax(p_y_given_x); | ||
|
||
for(int i=0; i<n_out; i++) { | ||
dy[i] = y[i] - p_y_given_x[i]; | ||
|
||
for(int j=0; j<n_in; j++) { | ||
W[i][j] += lr * dy[i] * x[j] / N; | ||
} | ||
|
||
b[i] += lr * dy[i] / N; | ||
} | ||
} | ||
|
||
public void softmax(double[] x) { | ||
double max = 0.0; | ||
double sum = 0.0; | ||
|
||
for(int i=0; i<n_out; i++) { | ||
if(max < x[i]) { | ||
max = x[i]; | ||
} | ||
} | ||
|
||
for(int i=0; i<n_out; i++) { | ||
x[i] = Math.exp(x[i] - max); | ||
sum += x[i]; | ||
} | ||
|
||
for(int i=0; i<n_out; i++) { | ||
x[i] /= sum; | ||
} | ||
} | ||
|
||
public void predict(int[] x, double[] y) { | ||
for(int i=0; i<n_out; i++) { | ||
y[i] = 0; | ||
for(int j=0; j<n_in; j++) { | ||
y[i] += W[i][j] * x[j]; | ||
} | ||
y[i] += b[i]; | ||
} | ||
|
||
softmax(y); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,229 @@ | ||
import java.util.Random; | ||
|
||
public class SdA { | ||
public int N; | ||
public int n_ins; | ||
public int[] hidden_layer_sizes; | ||
public int n_outs; | ||
public int n_layers; | ||
public HiddenLayer[] sigmoid_layers; | ||
public dA[] dA_layers; | ||
public LogisticRegression log_layer; | ||
public Random rng; | ||
|
||
public static double sigmoid(double x) { | ||
return 1.0 / (1.0 + Math.pow(Math.E, -x)); | ||
} | ||
|
||
public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) { | ||
int input_size; | ||
|
||
this.N = N; | ||
this.n_ins = n_ins; | ||
this.hidden_layer_sizes = hidden_layer_sizes; | ||
this.n_outs = n_outs; | ||
this.n_layers = n_layers; | ||
|
||
this.sigmoid_layers = new HiddenLayer[n_layers]; | ||
this.dA_layers = new dA[n_layers]; | ||
|
||
if(rng == null) this.rng = new Random(1234); | ||
else this.rng = rng; | ||
|
||
// construct multi-layer | ||
for(int i=0; i<this.n_layers; i++) { | ||
if(i == 0) { | ||
input_size = this.n_ins; | ||
} else { | ||
input_size = this.hidden_layer_sizes[i-1]; | ||
} | ||
|
||
// construct sigmoid_layer | ||
this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng); | ||
|
||
// construct dA_layer | ||
this.dA_layers[i] = new dA(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng); | ||
} | ||
|
||
// layer for output using LogisticRegression | ||
this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs); | ||
} | ||
|
||
public void pretrain(int[][] train_X, double lr, double corruption_level, int epochs) { | ||
int[] layer_input = new int[0]; | ||
int prev_layer_input_size; | ||
int[] prev_layer_input; | ||
|
||
for(int i=0; i<n_layers; i++) { // layer-wise | ||
for(int epoch=0; epoch<epochs; epoch++) { // training epochs | ||
for(int n=0; n<N; n++) { // input x1...xN | ||
// layer input | ||
for(int l=0; l<=i; l++) { | ||
|
||
if(l == 0) { | ||
layer_input = new int[n_ins]; | ||
for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j]; | ||
} else { | ||
if(l == 1) prev_layer_input_size = n_ins; | ||
else prev_layer_input_size = hidden_layer_sizes[l-2]; | ||
|
||
prev_layer_input = new int[prev_layer_input_size]; | ||
for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j]; | ||
|
||
layer_input = new int[hidden_layer_sizes[l-1]]; | ||
|
||
sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input); | ||
} | ||
} | ||
|
||
dA_layers[i].train(layer_input, lr, corruption_level); | ||
} | ||
} | ||
} | ||
} | ||
|
||
public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) { | ||
int[] layer_input = new int[0]; | ||
// int prev_layer_input_size; | ||
int[] prev_layer_input = new int[0]; | ||
|
||
for(int epoch=0; epoch<epochs; epoch++) { | ||
for(int n=0; n<N; n++) { | ||
|
||
// layer input | ||
for(int i=0; i<n_layers; i++) { | ||
if(i == 0) { | ||
prev_layer_input = new int[n_ins]; | ||
for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j]; | ||
} else { | ||
prev_layer_input = new int[hidden_layer_sizes[i-1]]; | ||
for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j]; | ||
} | ||
|
||
layer_input = new int[hidden_layer_sizes[i]]; | ||
sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input); | ||
} | ||
|
||
log_layer.train(layer_input, train_Y[n], lr); | ||
} | ||
// lr *= 0.95; | ||
} | ||
} | ||
|
||
public void predict(int[] x, double[] y) { | ||
double[] layer_input = new double[0]; | ||
// int prev_layer_input_size; | ||
double[] prev_layer_input = new double[n_ins]; | ||
for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j]; | ||
|
||
double linear_output; | ||
|
||
|
||
// layer activation | ||
for(int i=0; i<n_layers; i++) { | ||
layer_input = new double[sigmoid_layers[i].n_out]; | ||
|
||
linear_output = 0.0; | ||
for(int k=0; k<sigmoid_layers[i].n_out; k++) { | ||
for(int j=0; j<sigmoid_layers[i].n_in; j++) { | ||
linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j]; | ||
} | ||
linear_output += sigmoid_layers[i].b[k]; | ||
layer_input[k] = sigmoid(linear_output); | ||
} | ||
|
||
if(i < n_layers-1) { | ||
prev_layer_input = new double[sigmoid_layers[i].n_out]; | ||
for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j]; | ||
} | ||
} | ||
|
||
for(int i=0; i<log_layer.n_out; i++) { | ||
y[i] = 0; | ||
for(int j=0; j<log_layer.n_in; j++) { | ||
y[i] += log_layer.W[i][j] * layer_input[j]; | ||
} | ||
y[i] += log_layer.b[i]; | ||
} | ||
|
||
log_layer.softmax(y); | ||
} | ||
|
||
|
||
private static void test_sda() { | ||
Random rng = new Random(123); | ||
|
||
double pretrain_lr = 0.1; | ||
double corruption_level = 0.3; | ||
int pretraining_epochs = 1000; | ||
double finetune_lr = 0.1; | ||
int finetune_epochs = 500; | ||
|
||
int train_N = 10; | ||
int test_N = 4; | ||
int n_ins = 28; | ||
int n_outs = 2; | ||
int[] hidden_layer_sizes = {15, 15}; | ||
int n_layers = hidden_layer_sizes.length; | ||
|
||
// training data | ||
int[][] train_X = { | ||
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, | ||
{0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, | ||
{1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, | ||
{0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, | ||
{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, | ||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, | ||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1}, | ||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1}, | ||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1}, | ||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1} | ||
}; | ||
|
||
int[][] train_Y = { | ||
{1, 0}, | ||
{1, 0}, | ||
{1, 0}, | ||
{1, 0}, | ||
{1, 0}, | ||
{0, 1}, | ||
{0, 1}, | ||
{0, 1}, | ||
{0, 1}, | ||
{0, 1} | ||
}; | ||
|
||
// construct SdA | ||
SdA sda = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng); | ||
|
||
// pretrain | ||
sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs); | ||
|
||
// finetune | ||
sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs); | ||
|
||
|
||
// test data | ||
int[][] test_X = { | ||
{1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, | ||
{1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, | ||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1}, | ||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1} | ||
}; | ||
|
||
double[][] test_Y = new double[test_N][n_outs]; | ||
|
||
// test | ||
for(int i=0; i<test_N; i++) { | ||
sda.predict(test_X[i], test_Y[i]); | ||
for(int j=0; j<n_outs; j++) { | ||
System.out.print(test_Y[i][j] + " "); | ||
} | ||
System.out.println(); | ||
} | ||
} | ||
|
||
public static void main(String[] args) { | ||
test_sda(); | ||
} | ||
} |
Oops, something went wrong.