diff --git a/examples/autograd/mlp.py b/examples/autograd/mlp.py index e90ff1d7df..dfc67b3c79 100755 --- a/examples/autograd/mlp.py +++ b/examples/autograd/mlp.py @@ -84,7 +84,7 @@ def to_categorical(y, num_classes): x = autograd.relu(x) x = autograd.matmul(x, w1) x = autograd.add_bias(x, b1) - x = autograd.soft_max(x) + x = autograd.softmax(x) loss = autograd.cross_entropy(x, target) for p, gp in autograd.backward(loss): sgd.apply(0, gp, p, '') diff --git a/python/singa/autograd.py b/python/singa/autograd.py index 11ad644bb5..e1557ca9e2 100755 --- a/python/singa/autograd.py +++ b/python/singa/autograd.py @@ -448,7 +448,7 @@ def backward(self, dy): return singa.DefaultTranspose(dx) -def soft_max(x, axis=0): +def softmax(x, axis=0): return SoftMax(axis)(x)[0] @@ -540,7 +540,7 @@ def backward(self, dy=1.0): pass # TODO, broadcast elementwise multiply seems not support -def mean_square_error(x, t): +def mse_loss(x, t): return MeanSquareError()(x, t)[0] @@ -1076,7 +1076,8 @@ def backward(self, dy): return dx1, dx2 -def elemmatmul(x, y): +def mul(x, y): + # do pointwise multiplication return ElemMatmul()(x, y)[0] @@ -1088,7 +1089,7 @@ def add_all(*xs): return -class RNN(Layer): +class RNN_Base(Layer): def __init__(self): raise NotImplementedError @@ -1100,7 +1101,7 @@ def step_forward(self): raise NotImplementedError -class Vanilla_RNN(RNN): +class RNN(RNN_Base): def __init__(self, input_size, hidden_size, num_layers=1, nonlinearity='tanh', bias=True, batch_first=False, dropout=0, bidirectional=False): self.nonlinearity = nonlinearity @@ -1119,7 +1120,10 @@ def __init__(self, input_size, hidden_size, num_layers=1, nonlinearity='tanh', b self.params = (self.Wx, self.Wh, self.b) - def __call__(self, h0, *xs): + def __call__(self, xs, h0): + # xs: a tuple or list of input tensors + if not isinstance(xs, tuple): + xs = tuple(xs) inputs = xs + (h0,) self.device_check(*inputs) #self.device_check(inputs[0], *self.params) @@ -1148,7 +1152,7 @@ def step_forward(self, x, h, Wx, Wh, b): return y -class LSTM(RNN): +class LSTM(RNN_Base): def __init__(self, input_size, hidden_size, nonlinearity='tanh', num_layers=1, bias=True, batch_first=False, dropout=0, bidirectional=False): self.nonlinearity = nonlinearity @@ -1183,8 +1187,11 @@ def __init__(self, input_size, hidden_size, nonlinearity='tanh', num_layers=1, b self.params = self.Wx + self.Wh + self.Bx + self.Bh - def __call__(self, h0, c0, *xs): - inputs = xs + (h0, c0) + def __call__(self, xs, (h0, c0)): + # xs: a tuple or list of input tensors + if not isinstance(xs, list): + xs = list(xs) + inputs = xs + list((h0, c0)) self.device_check(*inputs) #self.device_check(inputs[0], *self.params) self.device_check(inputs[0], *(self.Wx + self.Wh + self.Bx + self.Bh)) @@ -1229,10 +1236,10 @@ def step_forward(self, x, h, c, Wx, Wh, Bx, Bh): g = add(y1, y2) g = tanh(g) - cout1 = elemmatmul(f, c) - cout2 = elemmatmul(i, g) + cout1 = mul(f, c) + cout2 = mul(i, g) cout = add(cout1, cout2) hout = tanh(cout) - hout = elemmatmul(o, hout) + hout = mul(o, hout) return hout, cout diff --git a/test/python/test_operation.py b/test/python/test_operation.py index 9fb6a57c06..1df89498bf 100755 --- a/test/python/test_operation.py +++ b/test/python/test_operation.py @@ -141,9 +141,9 @@ def test_batchnorm2d_gpu(self): def test_vanillaRNN_gpu_tiny_ops_shape_check(self): # gradients shape check. inputs, target, h0 = prepare_inputs_targets_for_rnn_test() - rnn = autograd.Vanilla_RNN(3, 2) + rnn = autograd.RNN(3, 2) - hs, _ = rnn(h0, *inputs) + hs, _ = rnn(inputs, h0) loss = autograd.softmax_cross_entropy(hs[0], target[0]) for i in range(1, len(hs)): @@ -162,7 +162,7 @@ def test_LSTM_gpu_tiny_ops_shape_check(self): rnn = autograd.LSTM(3, 2) - hs, _, _ = rnn(h0, c0, *inputs) + hs, _, _ = rnn(inputs, (h0, c0)) loss = autograd.softmax_cross_entropy(hs[0], target[0]) for i in range(1, len(hs)): @@ -206,10 +206,10 @@ def gradients_check(self, func, param, autograds, h=0.0005, df=1): def test_numerical_gradients_check_for_vallina_rnn(self): inputs, target, h0 = prepare_inputs_targets_for_rnn_test() - rnn = autograd.Vanilla_RNN(3, 2) + rnn = autograd.RNN(3, 2) def valinna_rnn_forward(): - hs, _ = rnn(h0, *inputs) + hs, _ = rnn(inputs, h0) loss = autograd.softmax_cross_entropy(hs[0], target[0]) for i in range(1, len(hs)): @@ -234,7 +234,7 @@ def test_numerical_gradients_check_for_lstm(self): rnn = autograd.LSTM(3, 2) def lstm_forward(): - hs, _, _ = rnn(h0, c0, *inputs) + hs, _, _ = rnn(inputs, (h0, c0)) loss = autograd.softmax_cross_entropy(hs[0], target[0]) for i in range(1, len(hs)): @@ -258,7 +258,7 @@ def test_MeanSquareError(self): x.to_device(gpu_dev) t.to_device(gpu_dev) - loss= autograd.mean_square_error(x,t) + loss= autograd.mse_loss(x,t) dx=loss.creator.backward()[0] loss_np=tensor.to_numpy(loss)