Skip to content

Commit

Permalink
doc
Browse files Browse the repository at this point in the history
  • Loading branch information
Lingjun Liu committed Sep 14, 2019
1 parent e0e81f0 commit 80c985c
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 176 deletions.
41 changes: 2 additions & 39 deletions tensorlayer/models/transformer/attention_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def get_config(self):
}

def build(self, inputs_shape):

# Transformation for linearly projecting the queries, keys, and values.
self.q_transformation = self._get_weights(
"q_project", shape=(self.hidden_size, self.hidden_size), init=tf.initializers.get('glorot_uniform')
Expand All @@ -75,20 +76,7 @@ def build(self, inputs_shape):
)

def split_heads(self, x):
"""Split x into different heads, and transpose the resulting value.
The tensor is transposed to insure the inner dimensions hold the correct
values during the matrix multiplication.

Parameters
-----------
x: A tensor with shape [batch_size, length, hidden_size]
Returns:
-----------
A tensor with shape [batch_size, num_heads, length, hidden_size/num_heads]
"""
with tf.name_scope("split_heads"):
batch_size = tf.shape(x)[0]
length = tf.shape(x)[1]
Expand All @@ -103,40 +91,15 @@ def split_heads(self, x):
return tf.transpose(x, [0, 2, 1, 3])

def combine_heads(self, x):
"""Combine tensor that has been split.
Args:
x: A tensor [batch_size, num_heads, length, hidden_size/num_heads]

Returns:
-----------
A tensor with shape [batch_size, length, hidden_size]
"""
with tf.name_scope("combine_heads"):
batch_size = tf.shape(x)[0]
length = tf.shape(x)[2]
x = tf.transpose(x, [0, 2, 1, 3]) # --> [batch, length, num_heads, depth]
return tf.reshape(x, [batch_size, length, self.hidden_size])

def forward(self, x, y, mask, cache=None):
"""Apply attention mechanism to x and y.
Args:
x: a tensor with shape [batch_size, length_x, hidden_size]
y: a tensor with shape [batch_size, length_y, hidden_size]
mask: attention bias that will be added to the result of the dot product.
training: boolean, whether in training mode or not.
cache: (Used during prediction) dictionary with tensors containing results
of previous attentions. The dictionary must have the items:
{"k": tensor with shape [batch_size, i, key_channels],
"v": tensor with shape [batch_size, i, value_channels]}
where i is the current decoded length.
Returns:
-----------
Attention layer output with shape [batch_size, length_x, hidden_size]
Attention weights with shape [batch_size, number_of_head, length_x, length_y]
"""
"""Apply attention mechanism to x and y."""
# Linearly project the query (q), key (k) and value (v) using different
# learned projections. This is in preparation of splitting them into
# multiple heads. Multi-head attention uses multiple queries, keys, and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,11 @@ def sequence_beam_search(
eos_id: int
id of eos token, used to determine when a sequence has finished
Returns
Notes
-------
Top decoded sequences [batch_size, beam_size, max_decode_length]
sequence scores [batch_size, beam_size]
The function would return:
Top decoded sequences [batch_size, beam_size, max_decode_length]
sequence scores [batch_size, beam_size]
"""

batch_size = tf.shape(initial_ids)[0]
Expand Down
42 changes: 4 additions & 38 deletions tensorlayer/models/transformer/beamsearchHelper/beam_search_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,15 +166,6 @@ def _continue_search(self, state):
2) when the worst score in the finished sequences is better than the best
score in the alive sequences (i.e. the finished sequences are provably
unchanging)
Parameters
-----------
state: A dictionary with the current loop state.
Returns:
-----------
Bool tensor with value True if loop should continue, False if loop should
terminate.
"""
i = state[_StateKeys.CUR_INDEX]
alive_log_probs = state[_StateKeys.ALIVE_LOG_PROBS]
Expand Down Expand Up @@ -216,13 +207,6 @@ def _search_step(self, state):
by the length normalization factor. Without length normalization, the
search is more likely to return shorter sequences.
Parameters
-----------
state: A dictionary with the current loop state.
Returns:
-----------
new state dictionary.
"""
# Grow alive sequences by one token.
new_seq, new_log_probs, new_cache = self._grow_alive_seq(state)
Expand All @@ -241,20 +225,9 @@ def _search_step(self, state):

def _grow_alive_seq(self, state):
"""Grow alive sequences by one token, and collect top 2*beam_size sequences.
2*beam_size sequences are collected because some sequences may have reached
the EOS token. 2*beam_size ensures that at least beam_size sequences are
still alive.
Parameters
-----------
state: A dictionary with the current loop state.
Returns:
-----------
Tuple of
(Top 2*beam_size sequences [batch_size, 2 * beam_size, cur_index + 1],
Scores of returned sequences [batch_size, 2 * beam_size],
New alive cache, for each of the 2 * beam_size sequences)
"""
i = state[_StateKeys.CUR_INDEX]
alive_seq = state[_StateKeys.ALIVE_SEQ]
Expand Down Expand Up @@ -384,10 +357,11 @@ def sequence_beam_search(
eos_id: int
id of eos token, used to determine when a sequence has finished
Returns
Notes
-------
Top decoded sequences [batch_size, beam_size, max_decode_length]
sequence scores [batch_size, beam_size]
The function would return:
Top decoded sequences [batch_size, beam_size, max_decode_length]
sequence scores [batch_size, beam_size]
"""
batch_size = tf.shape(initial_ids)[0]
sbs = SequenceBeamSearch(symbols_to_logits_fn, vocab_size, batch_size, beam_size, alpha, max_decode_length, eos_id)
Expand Down Expand Up @@ -449,14 +423,6 @@ def _get_shape_keep_last_dim(tensor):

def _flatten_beam_dim(tensor):
"""Reshapes first two dimensions in to single dimension.
Parameters
-----------
tensor: Tensor to reshape of shape [A, B, ...]
Returns
-----------
Reshaped tensor of shape [A*B, ...]
"""
shape = _shape_list(tensor)
shape[0] *= shape[1]
Expand Down
31 changes: 6 additions & 25 deletions tensorlayer/models/transformer/embedding_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@ def __init__(self, vocab_size, hidden_size):
Parameters
-----------
vocab_size: Number of tokens in the embedding. (Typically ~32,000)
hidden_size: Dimensionality of the embedding. (Typically 512 or 1024)
vocab_size : int
Number of tokens in the embedding. (Typically ~32,000)
hidden_size : int
Dimensionality of the embedding. (Typically 512 or 1024)
"""
super(EmbeddingLayer, self).__init__()
self.vocab_size = vocab_size
Expand All @@ -56,20 +58,7 @@ def get_config(self):
}

def forward(self, inputs, mode="embedding"):
"""Get token embeddings of inputs.
Parameters
-----------
inputs: An int64 tensor with shape [batch_size, length]
mode: string, a valid value is one of "embedding" and "linear".
Returns:
-----------
outputs: (1) If mode == "embedding", output embedding tensor, float32 with
shape [batch_size, length, embedding_size]; (2) mode == "linear", output
linear tensor, float32 with shape [batch_size, length, vocab_size].
Raises:
ValueError: if mode is not valid.
"""
"""Get token embeddings of inputs."""
if mode == "embedding":
return self._embedding(inputs)
elif mode == "linear":
Expand All @@ -89,15 +78,7 @@ def _embedding(self, inputs):
return embeddings

def _linear(self, inputs):
"""Computes logits by running inputs through a linear layer.
Parameters
-----------
inputs: A float32 tensor with shape [batch_size, length, hidden_size]
Returns:
-----------
float32 tensor with shape [batch_size, length, vocab_size].
"""
"""Computes logits by running inputs through a linear layer."""
with tf.name_scope("presoftmax_linear"):
batch_size = tf.shape(inputs)[0]
length = tf.shape(inputs)[1]
Expand Down
22 changes: 7 additions & 15 deletions tensorlayer/models/transformer/feedforward_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,12 @@ def __init__(self, hidden_size, filter_size, keep_prob):
Parameters
-----------
hidden_size: int, output dim of hidden layer.
filter_size: int, filter size for the inner (first) dense layer.
relu_dropout: float, dropout rate for training.
hidden_size: int
output dim of hidden layer.
filter_size: int
filter size for the inner (first) dense layer.
relu_dropout: float
dropout rate for training.
"""
super(TransformerFeedForwardLayer, self).__init__()
self.hidden_size = hidden_size
Expand Down Expand Up @@ -60,18 +63,7 @@ def get_config(self):
}

def forward(self, inputs):
"""Return outputs of the feedforward network.
Parameters
-----------
x: tensor with shape [batch_size, length, hidden_size]
training: boolean, whether in training mode or not.
Returns:
-----------
Output of the feedforward network.
tensor with shape [batch_size, length, hidden_size]
"""
"""Return outputs of the feedforward network."""
# Retrieve dynamically known shapes
x = inputs
batch_size = tf.shape(x)[0]
Expand Down
Loading

0 comments on commit 80c985c

Please sign in to comment.