This repository has been archived by the owner on Sep 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
/
rnn.py
118 lines (104 loc) · 4.19 KB
/
rnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import tensorflow as tf
from tensorflow.python.ops.rnn_cell_impl import RNNCell
class GRU:
'''
GRU class.
'''
def __init__(self, name, input_dim, hidden_dim):
self.name = '/'.join([name, 'gru'])
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.w_matrix = None
self.U = None
self.bias = None
def define_params(self):
'''
Define parameters.
'''
input_dim = self.input_dim
hidden_dim = self.hidden_dim
prefix = self.name
self.w_matrix = tf.Variable(tf.random_normal([input_dim, 3 * hidden_dim], stddev=0.1),
name='/'.join([prefix, 'W']))
self.U = tf.Variable(tf.random_normal([hidden_dim, 3 * hidden_dim], stddev=0.1),
name='/'.join([prefix, 'U']))
self.bias = tf.Variable(tf.random_normal([1, 3 * hidden_dim], stddev=0.1),
name='/'.join([prefix, 'b']))
return self
def build(self, x, h, mask=None):
'''
Build the GRU cell.
'''
xw = tf.split(tf.matmul(x, self.w_matrix) + self.bias, 3, 1)
hu = tf.split(tf.matmul(h, self.U), 3, 1)
r = tf.sigmoid(xw[0] + hu[0])
z = tf.sigmoid(xw[1] + hu[1])
h1 = tf.tanh(xw[2] + r * hu[2])
next_h = h1 * (1 - z) + h * z
if mask is not None:
next_h = next_h * mask + h * (1 - mask)
return next_h
def build_sequence(self, xs, masks, init, is_left_to_right):
'''
Build GRU sequence.
'''
states = []
last = init
if is_left_to_right:
for i, xs_i in enumerate(xs):
h = self.build(xs_i, last, masks[i])
states.append(h)
last = h
else:
for i in range(len(xs) - 1, -1, -1):
h = self.build(xs[i], last, masks[i])
states.insert(0, h)
last = h
return states
class XGRUCell(RNNCell):
def __init__(self, hidden_dim, reuse=None):
super(XGRUCell, self).__init__(self, _reuse=reuse)
self._num_units = hidden_dim
self._activation = tf.tanh
@property
def state_size(self):
return self._num_units
@property
def output_size(self):
return self._num_units
def call(self, inputs, state):
input_dim = inputs.get_shape()[-1]
assert input_dim is not None, "input dimension must be defined"
W = tf.get_variable(
name="W", shape=[input_dim, 3 * self._num_units], dtype=tf.float32)
U = tf.get_variable(
name='U', shape=[self._num_units, 3 * self._num_units], dtype=tf.float32)
b = tf.get_variable(
name='b', shape=[1, 3 * self._num_units], dtype=tf.float32)
xw = tf.split(tf.matmul(inputs, W) + b, 3, 1)
hu = tf.split(tf.matmul(state, U), 3, 1)
r = tf.sigmoid(xw[0] + hu[0])
z = tf.sigmoid(xw[1] + hu[1])
h1 = self._activation(xw[2] + r * hu[2])
next_h = h1 * (1 - z) + state * z
return next_h, next_h