forked from xixiaoyao/CS224n-winter-together
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
hw5.ipynb - for running sanity checks. colab gpu training time ~ 6 hours BLEU 36.38
- Loading branch information
Showing
25 changed files
with
169,396 additions
and
0 deletions.
There are no files selected for viewing
20,030 changes: 20,030 additions & 0 deletions
20,030
Assignments/assignment5/Herais/Ans 1 Coding.mht
Large diffs are not rendered by default.
Oops, something went wrong.
20,397 changes: 20,397 additions & 0 deletions
20,397
Assignments/assignment5/Herais/Ans 1 Written.mht
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Assgnment #5 Written and Coding | ||
- Ans 1 Written.mht | ||
- Ans 1 Coding.mht | ||
- Ans 2 Coding.mht | ||
- Ans 3.mht | ||
- hw5 jupyter |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
""" | ||
CS224N 2019-20: Homework 5 | ||
""" | ||
|
||
import torch | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
from vocab import VocabEntry | ||
import numpy as np | ||
import re | ||
|
||
|
||
class CharDecoder(nn.Module): | ||
def __init__(self, hidden_size, char_embedding_size=50, target_vocab=None): | ||
""" Init Character Decoder. | ||
@param hidden_size (int): Hidden size of the decoder LSTM | ||
@param char_embedding_size (int): dimensionality of character embeddings | ||
@param target_vocab (VocabEntry): vocabulary for the target language. See vocab.py for documentation. | ||
""" | ||
super(CharDecoder, self).__init__() | ||
self.target_vocab = target_vocab | ||
self.charDecoder = nn.LSTM(char_embedding_size, hidden_size) | ||
self.char_output_projection = nn.Linear(hidden_size, len(self.target_vocab.char2id)) | ||
self.decoderCharEmb = nn.Embedding(len(self.target_vocab.char2id), char_embedding_size, | ||
padding_idx=self.target_vocab.char_pad) | ||
|
||
def forward(self, input, dec_hidden=None): | ||
""" Forward pass of character decoder. | ||
@param input (Tensor): tensor of integers, shape (length, batch_size) | ||
@param dec_hidden (tuple(Tensor, Tensor)): internal state of the LSTM before reading the input characters. A tuple of two tensors of shape (1, batch, hidden_size) | ||
@returns scores (Tensor): called s_t in the PDF, shape (length, batch_size, self.vocab_size) | ||
@returns dec_hidden (tuple(Tensor, Tensor)): internal state of the LSTM after reading the input characters. A tuple of two tensors of shape (1, batch, hidden_size) | ||
""" | ||
### YOUR CODE HERE for part 2a | ||
### TODO - Implement the forward pass of the character decoder. | ||
|
||
# dec_hidden (tuple(Tensor, Tensor)), each Tensor (1, batch, h) | ||
|
||
# input: [length, b] ==> decoderCharEmb => X: [length, b, char_embed_size] | ||
X = self.decoderCharEmb(input) | ||
|
||
# X: [length, b, char_embed_size], dec_hidden = (h_n, c_n): ([1, b, h], [1, b, h]) | ||
# ==> charDecoder ==> | ||
# h_t: [length, b char_embed_size], dec_hidden = (h_n, c_n): ([1, b, h], [1, b, h]) | ||
h_t, dec_hidden = self.charDecoder(X, dec_hidden) | ||
|
||
# h_t: [length, b char_embed_size] ==> char_output_projection ==> scores = s_t : [length, b, self.vocab_size] | ||
scores = self.char_output_projection(h_t) | ||
|
||
return scores, dec_hidden | ||
### END YOUR CODE | ||
|
||
def train_forward(self, char_sequence, dec_hidden=None): | ||
""" Forward computation during training. | ||
@param char_sequence (Tensor): tensor of integers, shape (length, batch_size). Note that "length" here and in forward() need not be the same. | ||
@param dec_hidden (tuple(Tensor, Tensor)): initial internal state of the LSTM, obtained from the output of the word-level decoder. A tuple of two tensors of shape (1, batch_size, hidden_size) | ||
@returns The cross-entropy loss (Tensor), computed as the *sum* of cross-entropy losses of all the words in the batch. | ||
""" | ||
### YOUR CODE HERE for part 2b | ||
### TODO - Implement training forward pass. | ||
### | ||
### Hint: - Make sure padding characters do not contribute to the cross-entropy loss. Check vocab.py to find the padding token's index. | ||
### - char_sequence corresponds to the sequence x_1 ... x_{n+1} (e.g., <START>,m,u,s,i,c,<END>). Read the handout about how to construct input and target sequence of CharDecoderLSTM. | ||
### - Carefully read the documentation for nn.CrossEntropyLoss and our handout to see what this criterion have already included: | ||
### https://pytorch.org/docs/stable/nn.html#crossentropyloss | ||
|
||
|
||
# char_sequence: [length, b] => delete end_token => input_sequence: [length, b] | ||
X_input = char_sequence[:-1] | ||
|
||
# char_sequence: [length, b] => delete start_token => input_sequence: [length, b] | ||
X_target = char_sequence[1:] | ||
|
||
# X_input: [length, b], dec_hidden = (h_n, c_n): ([1, b, h], [1, b, h]) | ||
# ==> softmax ==> | ||
# s_t: [length, b, self.vocab_size], dec_hidden = (h_n, c_n): ([1, b, h], [1, b, h]) | ||
s_t, dec_hidden = self.forward(X_input, dec_hidden) | ||
|
||
# For lookup char_pad index value, shall be 0 | ||
vocab_entry = VocabEntry() | ||
idx_char_pad = vocab_entry.char_pad | ||
|
||
# Initialiate CrossEntropyLoss Instances, combines logsoftmax and nllloss | ||
compute_loss = nn.CrossEntropyLoss(ignore_index = idx_char_pad, | ||
reduction ='sum' | ||
) | ||
|
||
# Reshape s_t for compute_loss, length*b => b_char | ||
# length = length of a word, b = batch size, length*b = # of characters in the batch | ||
# s_t: [length, b, self.vocab_size] ==> s_t: [length*b, self.vocab_size] = [N, C] | ||
s_t = s_t.reshape(s_t.shape[0]*s_t.shape[1], -1) | ||
|
||
# Reshape X_target for compute_loss | ||
# X_target: [length, b] ==> X_target: [length*b] = [N] | ||
X_target = X_target.reshape(-1) | ||
|
||
# s_t: [length*b, self.vocab_size] = [N, C, d1...dk], X_target: [length*b] = [N] | ||
# ==> compute_loss ==> loss_char_dec: | ||
loss_char_dec = compute_loss(s_t, X_target) | ||
|
||
return loss_char_dec | ||
### END YOUR CODE | ||
|
||
def decode_greedy(self, initialStates, device, max_length=21): | ||
""" Greedy decoding | ||
@param initialStates (tuple(Tensor, Tensor)): initial internal state of the LSTM, a tuple of two tensors of size (1, batch_size, hidden_size) | ||
@param device: torch.device (indicates whether the model is on CPU or GPU) | ||
@param max_length (int): maximum length of words to decode | ||
@returns decodedWords (List[str]): a list (of length batch_size) of strings, each of which has length <= max_length. | ||
The decoded strings should NOT contain the start-of-word and end-of-word characters. | ||
""" | ||
|
||
### YOUR CODE HERE for part 2c | ||
### TODO - Implement greedy decoding. | ||
### Hints: | ||
### - Use initialStates to get batch_size = b. | ||
### - Use target_vocab.char2id and target_vocab.id2char to convert between integers and characters | ||
### - Use torch.tensor(..., device=device) to turn a list of character indices into a tensor. | ||
### - You may find torch.argmax or torch.argmax useful | ||
### - We use curly brackets as start-of-word and end-of-word characters. That is, use the character '{' for <START> and '}' for <END>. | ||
### Their indices are self.target_vocab.start_of_word and self.target_vocab.end_of_word, respectively. | ||
|
||
### END YOUR CODE | ||
|
||
# initialStates (tuple(Tensor, Tensor)): ([1, batch_size, hidden_size], [1, batch_size, hidden_size]) | ||
# ==> read ==> batch_size [int] | ||
batch_size = initialStates[0].shape[1] | ||
|
||
# iniitalStates ==> dec_hidden = (h0, c0) | ||
# (tuple(Tensor, Tensor)): [1, batch_size, hidden_size], [1, batch_size, hidden_size] | ||
dec_hidden = initialStates | ||
|
||
# Initialize output_word as an empty, output_word (Tensor): [length <= max_length = 0, batch_size] | ||
output_word = torch.empty(0, batch_size, dtype=torch.long , device=device) | ||
|
||
# Initiated VocabEntry Instance for character-index lookups | ||
vocab_entry = VocabEntry() # vocab_entry.start_of_word = index of (<START>='{') | ||
|
||
# Initialize current_char (Tensor): [1, batch_size] | ||
current_char = torch.tensor([vocab_entry.start_of_word]*batch_size, dtype=torch.long, device=device).reshape(1, -1).contiguous() | ||
|
||
# Keep finding next character, until reaching max-length of word. | ||
for i in range(0, max_length-1): | ||
|
||
# current_char (Tensor): [1, b], dec_hidden = (h_n, c_n) (tuple(Tensor, Tensor)): ([1, b, h], [1, b, h]) | ||
# ==> self.forward ==> | ||
# s_t (Tensor): [1, b, self.vocab_size], dec_hidden (tuple(Tensor, Tensor)): ([1, b, h], [1, b, h]) | ||
s_t, dec_hidden = self.forward(current_char, dec_hidden) | ||
|
||
# s_t (Tensor): [1, b, self.vocab_size] ==> softmax ==> p_t (Tensor): [1, b, self.vocab.size] | ||
p_t = F.softmax(s_t, dim=2) | ||
|
||
# p_t (Tensor): [1, b, self.vocab_size] | ||
# ==> argmax ==> current_char (Tensor): [1, b] | ||
current_char = torch.argmax(p_t, dim=2) | ||
|
||
# current_char (Tensor): [1, b] ==> output_word (Tensor): [length <= max_length, b] | ||
output_word = torch.cat((output_word, current_char), dim=0) | ||
|
||
# output_word (Tensor): [max_length, b] ==> output_word (List(List[int]): [b, max_length] | ||
output_word = output_word.permute(1,0).tolist() | ||
|
||
# Trucate each word in batch starting from the first end_of_word token <END>='}' | ||
# output_word (List(List[int]): [b, max_length] ==> output_word (List(List(int))): [b, length <= max_length] | ||
output_word = [cids[0:cids.index(vocab_entry.end_of_word)] if vocab_entry.end_of_word in cids else cids for cids in output_word] | ||
|
||
# Convert character indices to characters | ||
# output_word (List(List[int])): [b, length <= max_length] | ||
# ==> (List(List[str])): [b, length <= max_length, str_len=1] | ||
decodedWords = [[vocab_entry.id2char[cid] for cid in word] for word in output_word] | ||
|
||
# decodedWords (List(List[str])): [b, length <= max_length, str_len = 1] | ||
# ==> decodedWords (List[str]): [b, length <= max_length] | ||
decodedWords = [''.join(char) for char in decodedWords] | ||
|
||
return decodedWords | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
""" | ||
CS224N 2019-20: Homework 5 | ||
""" | ||
|
||
import torch | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
|
||
class CNN(nn.Module): | ||
""" | ||
Class of Convolution Neural Network | ||
that applys kernel over x_reshaped to compute x_conv_out | ||
""" | ||
#pass | ||
# Remember to delete the above 'pass' after your implementation | ||
### YOUR CODE HERE for part 1g | ||
def __init__(self, word_embed_size, char_embed_size=50, k=5, padding=1): | ||
""" | ||
Init CNN Layers. | ||
@param word_embed_size (int): size of word embedding | ||
@param char_embed_size (int) = 50: size of word embedding | ||
@param k (int) = 5: kernel size for convolution | ||
@padding (int) = 1: size of padding applied to x_reshaped bilaterally | ||
""" | ||
super(CNN, self).__init__() # Initialize self._modules as OrderedDict | ||
|
||
self.word_embed_size = word_embed_size | ||
self.char_embed_size = char_embed_size | ||
self.k = k | ||
self.padding = padding | ||
|
||
# Default Values | ||
self.apply_conv = None | ||
self.apply_maxpool = None | ||
|
||
# Initialize Variables | ||
""" | ||
torch.nn.Conv1d(in_channels, # | ||
out_channels, # f = number of output channels = word_embed_size | ||
kernel_size, # k=5 | ||
stride=1, | ||
padding=0, # padding=1 | ||
dilation=1, | ||
groups=1, | ||
bias=True, | ||
padding_mode='zeros' | ||
) | ||
""" | ||
self.apply_conv = nn.Conv1d(in_channels = self.char_embed_size, | ||
out_channels = self.word_embed_size, | ||
kernel_size=self.k, | ||
padding=self.padding | ||
) | ||
|
||
def forward(self, x): | ||
""" | ||
@param x (tensor): x_reshaped in shape (b, char_embed_size, m_word), | ||
where b = batch size, | ||
char_embed_size = size of the character embedding, and | ||
m_word = length of longest word in the batch. | ||
@return x_conv_out (tensor): tensor of shape (b, word_embed_size) | ||
""" | ||
# x_conv shape (b, word_embed_size, m_word-k+1) | ||
x_conv = self.apply_conv(x) # (b, word_embed_size, m_word+2*padding-k+1) | ||
|
||
m_word = x.shape[2] | ||
""" | ||
torch.nn.MaxPool1d(kernel_size, | ||
stride=None, | ||
padding=0, | ||
dilation=1, | ||
return_indices=False, | ||
ceil_mode=False | ||
) | ||
""" | ||
apply_maxpool = nn.MaxPool1d(kernel_size = m_word + 2*self.padding - self.k + 1) | ||
x_conv_out = apply_maxpool(F.relu(x_conv)).squeeze(2) | ||
|
||
|
||
return x_conv_out | ||
|
||
### END YOUR CODE | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
rm -f assignment5.zip | ||
zip -r assignment5.zip *.py ./en_es_data ./sanity_check_en_es_data ./outputs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
nltk | ||
docopt | ||
tqdm==4.29.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
""" | ||
CS224N 2019-20: Homework 5 | ||
""" | ||
|
||
import torch | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
|
||
class Highway(nn.Module): | ||
""" | ||
Class that computes X_highway from X_conv_out | ||
""" | ||
# Remember to delete the above 'pass' after your implementation | ||
### YOUR CODE HERE for part 1f | ||
|
||
def __init__(self, word_embed_size): | ||
""" | ||
Init Highway Layer. | ||
@param word_embed_size (int): Embedding size (dimensionality) of word | ||
""" | ||
super(Highway, self).__init__() # Initialize self._modules as OrderedDict | ||
self.word_embed_size = word_embed_size | ||
|
||
# default values | ||
self.w_proj = None | ||
self.w_gate = None | ||
|
||
# initialize variables | ||
# torch.nn.Linear(in_features, out_features, bias=True) | ||
self.w_proj = nn.Linear(word_embed_size, word_embed_size, bias=True) # W_project | ||
self.w_gate = nn.Linear(word_embed_size, word_embed_size, bias=True) # W_gate | ||
|
||
|
||
def forward(self, x): | ||
"""Maps x_conv_out to x_highway | ||
# nn.Linear | ||
@param x (tensor): x_conv_out tensor of shape (b, word_embed_size), | ||
where b = batch size | ||
@returns x_highway (tensor): tenosor of shape (b, word_embed_size) | ||
""" | ||
|
||
x_proj = F.relu(self.w_proj(x)) # (b, word_embed_size) | ||
x_gate = torch.sigmoid(self.w_gate(x)) # (b, word_embed_size) | ||
|
||
# element wise multiplication: * or mul() | ||
x_highway = x_gate * x_proj + (1 - x_gate) * x # (b, word_embed_size) | ||
|
||
return x_highway | ||
|
||
### END YOUR CODE |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
name: local_nmt | ||
channels: | ||
- soumith | ||
- defaults | ||
dependencies: | ||
- pytorch=1.0.0 | ||
- python=3.5 | ||
- numpy | ||
- scipy | ||
- tqdm | ||
- docopt | ||
- pytorch | ||
- nltk | ||
- torchvision |
Oops, something went wrong.