-
Notifications
You must be signed in to change notification settings - Fork 1
/
RNN_Cell.cu
74 lines (62 loc) · 2.3 KB
/
RNN_Cell.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#include "RNN_Cell.h"
#include <cstdlib>
#include <math.h>
__global__ void Tanh(float* data, float* bias1, float* bias2, int height, int width) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i >= height * width) return;
int j = i % width;
data[i] += bias1[j] + bias2[j];
data[i] = tanh(data[i]); // ...
}
void RNN_Cell::initRandom() {
this->w_ih= new cuMatrix<float>(input_size, hidden_size, 1);
this->w_hh= new cuMatrix<float>(hidden_size, hidden_size, 1);
this->b_ih = new cuMatrix<float>(hidden_size, 1, 1);
this->b_hh = new cuMatrix<float>(hidden_size, 1, 1);
for(int i = 0; i < w_ih->getLen(); i++){
w_ih->getHost()[i] = (2.0f * rand() / RAND_MAX - 1.0f);
}
for(int j = 0; j < w_hh->getLen(); j++){
w_hh->getHost()[j] = (2.0f * rand() / RAND_MAX - 1.0f);
}
this->w_ih->toGpu();
this->w_hh->toGpu();
this->b_ih->toGpu();
this->b_hh->toGpu();
}
void RNN_Cell::initParams(float* _w_ih, float* _w_hh, float* _b_ih, float* _b_hh) {
this->w_ih= new cuMatrix<float>(input_size, hidden_size, 1);
this->w_hh= new cuMatrix<float>(hidden_size, hidden_size, 1);
this->b_ih = new cuMatrix<float>(hidden_size, 1, 1);
this->b_hh = new cuMatrix<float>(hidden_size, 1, 1);
for (int j = 0; j < w_ih->getLen(); j++){
w_ih->getHost()[j] = _w_ih[j];
}
for (int j = 0; j < w_hh->getLen(); j++){
w_hh->getHost()[j] = _w_hh[j];
}
for (int j = 0; j < b_ih->getLen(); j++){
b_ih->getHost()[j] = _b_ih[j];
}
for (int j = 0; j < b_hh->getLen(); j++){
b_hh->getHost()[j] = _b_hh[j];
}
this->w_ih->toGpu();
this->w_hh->toGpu();
this->b_ih->toGpu();
this->b_hh->toGpu();
}
/*
* input: [b, input_size], here b is real batch
* pre_hidden: [b, hidden_size]
* output: [b, hidden_size]
*/
cuMatrix<float>* RNN_Cell::forward(cuMatrix<float>* inputs, cuMatrix<float>* pre_hidden, cuMatrix<float>* outputs) {
matrixMul(inputs, w_ih, ih_outputs);
matrixMul(pre_hidden, w_hh, hh_outputs);
matrixAdd(ih_outputs, hh_outputs, outputs, 1);
int blockDim = 256;
int numBlocks = (batch_size * hidden_size + blockDim - 1) / blockDim;
Tanh<<<numBlocks, blockDim>>>(outputs->getDev(), b_hh->getDev(), b_ih->getDev(), batch_size, hidden_size);
return outputs;
}