Assignment xixiaoyao#5

hw5.ipynb - for running sanity checks. colab gpu training time ~ 6 hours BLEU 36.38
Herais · May 22, 2020 · 1cb4689 · 1cb4689
1 parent cec6d3f
commit 1cb4689
Show file tree

Hide file tree

Showing 25 changed files with 169,396 additions and 0 deletions.
diff --git a/Assignments/assignment5/Herais/Ans 1 Coding.mht b/Assignments/assignment5/Herais/Ans 1 Coding.mht
diff --git a/Assignments/assignment5/Herais/Ans 1 Written.mht b/Assignments/assignment5/Herais/Ans 1 Written.mht
diff --git a/Assignments/assignment5/Herais/Ans 2 Coding.mht b/Assignments/assignment5/Herais/Ans 2 Coding.mht
diff --git a/Assignments/assignment5/Herais/Ans 3.mht b/Assignments/assignment5/Herais/Ans 3.mht
diff --git a/Assignments/assignment5/Herais/README.md b/Assignments/assignment5/Herais/README.md
@@ -0,0 +1,6 @@
+# Assgnment #5 Written and Coding
+ - Ans 1 Written.mht
+ - Ans 1 Coding.mht
+ - Ans 2 Coding.mht
+ - Ans 3.mht
+ - hw5 jupyter
diff --git a/Assignments/assignment5/Herais/__init__.py b/Assignments/assignment5/Herais/__init__.py
diff --git a/Assignments/assignment5/Herais/char_decoder.py b/Assignments/assignment5/Herais/char_decoder.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+CS224N 2019-20: Homework 5
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from vocab import VocabEntry
+import numpy as np
+import re
+
+
+class CharDecoder(nn.Module):
+    def __init__(self, hidden_size, char_embedding_size=50, target_vocab=None):
+        """ Init Character Decoder.
+
+        @param hidden_size (int): Hidden size of the decoder LSTM
+        @param char_embedding_size (int): dimensionality of character embeddings
+        @param target_vocab (VocabEntry): vocabulary for the target language. See vocab.py for documentation.
+        """
+        super(CharDecoder, self).__init__()
+        self.target_vocab = target_vocab
+        self.charDecoder = nn.LSTM(char_embedding_size, hidden_size)
+        self.char_output_projection = nn.Linear(hidden_size, len(self.target_vocab.char2id))
+        self.decoderCharEmb = nn.Embedding(len(self.target_vocab.char2id), char_embedding_size,
+                                           padding_idx=self.target_vocab.char_pad)
+
+    def forward(self, input, dec_hidden=None):
+        """ Forward pass of character decoder.
+
+        @param input (Tensor): tensor of integers, shape (length, batch_size)
+        @param dec_hidden (tuple(Tensor, Tensor)): internal state of the LSTM before reading the input characters. A tuple of two tensors of shape (1, batch, hidden_size)
+
+        @returns scores (Tensor): called s_t in the PDF, shape (length, batch_size, self.vocab_size)
+        @returns dec_hidden (tuple(Tensor, Tensor)): internal state of the LSTM after reading the input characters. A tuple of two tensors of shape (1, batch, hidden_size)
+        """
+        ### YOUR CODE HERE for part 2a
+        ### TODO - Implement the forward pass of the character decoder.
+
+        # dec_hidden (tuple(Tensor, Tensor)), each Tensor (1, batch, h)
+
+        # input: [length, b] ==> decoderCharEmb => X: [length, b, char_embed_size]
+        X = self.decoderCharEmb(input)
+
+        # X: [length, b, char_embed_size], dec_hidden = (h_n, c_n): ([1, b, h], [1, b, h])
+        #   ==> charDecoder ==> 
+        # h_t: [length, b char_embed_size], dec_hidden = (h_n, c_n): ([1, b, h], [1, b, h])
+        h_t, dec_hidden = self.charDecoder(X, dec_hidden)
+
+        # h_t: [length, b char_embed_size] ==> char_output_projection ==> scores = s_t : [length, b, self.vocab_size]
+        scores = self.char_output_projection(h_t)
+
+        return scores, dec_hidden
+        ### END YOUR CODE
+
+    def train_forward(self, char_sequence, dec_hidden=None):
+        """ Forward computation during training.
+
+        @param char_sequence (Tensor): tensor of integers, shape (length, batch_size). Note that "length" here and in forward() need not be the same.
+        @param dec_hidden (tuple(Tensor, Tensor)): initial internal state of the LSTM, obtained from the output of the word-level decoder. A tuple of two tensors of shape (1, batch_size, hidden_size)
+
+        @returns The cross-entropy loss (Tensor), computed as the *sum* of cross-entropy losses of all the words in the batch.
+        """
+        ### YOUR CODE HERE for part 2b
+        ### TODO - Implement training forward pass.
+        ###
+        ### Hint: - Make sure padding characters do not contribute to the cross-entropy loss. Check vocab.py to find the padding token's index.
+        ###       - char_sequence corresponds to the sequence x_1 ... x_{n+1} (e.g., <START>,m,u,s,i,c,<END>). Read the handout about how to construct input and target sequence of CharDecoderLSTM.
+        ###       - Carefully read the documentation for nn.CrossEntropyLoss and our handout to see what this criterion have already included:
+        ###             https://pytorch.org/docs/stable/nn.html#crossentropyloss
+
+
+        # char_sequence: [length, b] => delete end_token => input_sequence: [length, b]
+        X_input = char_sequence[:-1]
+
+        # char_sequence: [length, b] => delete start_token => input_sequence: [length, b]
+        X_target = char_sequence[1:]
+
+        # X_input: [length, b], dec_hidden = (h_n, c_n): ([1, b, h], [1, b, h])
+        #    ==> softmax   ==>
+        # s_t: [length, b, self.vocab_size], dec_hidden = (h_n, c_n): ([1, b, h], [1, b, h])
+        s_t, dec_hidden = self.forward(X_input, dec_hidden)
+
+        # For lookup char_pad index value, shall be 0
+        vocab_entry = VocabEntry()
+        idx_char_pad = vocab_entry.char_pad
+
+        # Initialiate CrossEntropyLoss Instances, combines logsoftmax and nllloss
+        compute_loss = nn.CrossEntropyLoss(ignore_index = idx_char_pad,
+                                           reduction ='sum'
+                                          )
+
+        # Reshape s_t for compute_loss, length*b => b_char
+        # length = length of a word, b = batch size, length*b = # of characters in the batch
+        # s_t: [length, b, self.vocab_size] ==> s_t: [length*b, self.vocab_size] = [N, C]
+        s_t = s_t.reshape(s_t.shape[0]*s_t.shape[1], -1)
+
+        # Reshape X_target for compute_loss
+        # X_target: [length, b] ==> X_target: [length*b] = [N]
+        X_target = X_target.reshape(-1)
+
+        # s_t: [length*b, self.vocab_size] = [N, C, d1...dk], X_target: [length*b] = [N]
+        #   ==> compute_loss ==> loss_char_dec: 
+        loss_char_dec = compute_loss(s_t, X_target) 
+
+        return loss_char_dec
+        ### END YOUR CODE
+
+    def decode_greedy(self, initialStates, device, max_length=21):
+        """ Greedy decoding
+        @param initialStates (tuple(Tensor, Tensor)): initial internal state of the LSTM, a tuple of two tensors of size (1, batch_size, hidden_size)
+        @param device: torch.device (indicates whether the model is on CPU or GPU)
+        @param max_length (int): maximum length of words to decode
+
+        @returns decodedWords (List[str]): a list (of length batch_size) of strings, each of which has length <= max_length.
+                              The decoded strings should NOT contain the start-of-word and end-of-word characters.
+        """
+
+        ### YOUR CODE HERE for part 2c
+        ### TODO - Implement greedy decoding.
+        ### Hints:
+        ###      - Use initialStates to get batch_size = b.
+        ###      - Use target_vocab.char2id and target_vocab.id2char to convert between integers and characters
+        ###      - Use torch.tensor(..., device=device) to turn a list of character indices into a tensor.
+        ###      - You may find torch.argmax or torch.argmax useful
+        ###      - We use curly brackets as start-of-word and end-of-word characters. That is, use the character '{' for <START> and '}' for <END>.
+        ###        Their indices are self.target_vocab.start_of_word and self.target_vocab.end_of_word, respectively.
+
+        ### END YOUR CODE
+
+        # initialStates (tuple(Tensor, Tensor)): ([1, batch_size, hidden_size], [1, batch_size, hidden_size])
+        #    ==> read ==> batch_size [int]
+        batch_size = initialStates[0].shape[1]
+
+        # iniitalStates ==> dec_hidden = (h0, c0) 
+        #    (tuple(Tensor, Tensor)): [1, batch_size, hidden_size], [1, batch_size, hidden_size]
+        dec_hidden = initialStates
+
+        # Initialize output_word as an empty, output_word (Tensor): [length <= max_length = 0, batch_size]
+        output_word = torch.empty(0, batch_size, dtype=torch.long , device=device)
+
+        # Initiated VocabEntry Instance for character-index lookups
+        vocab_entry = VocabEntry() # vocab_entry.start_of_word = index of (<START>='{')
+
+        # Initialize current_char (Tensor): [1, batch_size]
+        current_char = torch.tensor([vocab_entry.start_of_word]*batch_size, dtype=torch.long, device=device).reshape(1, -1).contiguous()
+
+        # Keep finding next character, until reaching max-length of word.
+        for i in range(0, max_length-1):
+
+            # current_char (Tensor): [1, b], dec_hidden = (h_n, c_n) (tuple(Tensor, Tensor)): ([1, b, h], [1, b, h])
+            #     ==> self.forward   ==>
+            # s_t (Tensor): [1, b, self.vocab_size], dec_hidden (tuple(Tensor, Tensor)): ([1, b, h], [1, b, h])
+            s_t, dec_hidden = self.forward(current_char, dec_hidden)          
+
+            # s_t (Tensor): [1, b, self.vocab_size] ==> softmax ==> p_t (Tensor): [1, b, self.vocab.size]
+            p_t = F.softmax(s_t, dim=2)
+
+            # p_t (Tensor): [1, b, self.vocab_size]
+            #     ==> argmax ==> current_char (Tensor): [1, b]
+            current_char = torch.argmax(p_t, dim=2)
+
+            # current_char (Tensor): [1, b]  ==>  output_word (Tensor): [length <= max_length, b]
+            output_word = torch.cat((output_word, current_char), dim=0)
+
+        # output_word (Tensor): [max_length, b] ==> output_word (List(List[int]): [b, max_length]
+        output_word = output_word.permute(1,0).tolist()
+
+        # Trucate each word in batch starting from the first end_of_word token <END>='}'
+        # output_word (List(List[int]): [b, max_length] ==> output_word (List(List(int))): [b, length <= max_length]
+        output_word = [cids[0:cids.index(vocab_entry.end_of_word)] if vocab_entry.end_of_word in cids else cids for cids in output_word]
+
+        # Convert character indices to characters
+        # output_word (List(List[int])): [b, length <= max_length] 
+        #    ==> (List(List[str])): [b, length <= max_length, str_len=1]
+        decodedWords = [[vocab_entry.id2char[cid] for cid in word] for word in output_word]
+
+        # decodedWords (List(List[str])): [b, length <= max_length, str_len = 1]
+        #     ==> decodedWords (List[str]): [b, length <= max_length]
+        decodedWords = [''.join(char) for char in decodedWords]
+
+        return decodedWords
+
diff --git a/Assignments/assignment5/Herais/cnn.py b/Assignments/assignment5/Herais/cnn.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+CS224N 2019-20: Homework 5
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class CNN(nn.Module):
+    """
+    Class of Convolution Neural Network
+    that applys kernel over x_reshaped to compute x_conv_out
+    """
+    #pass
+    # Remember to delete the above 'pass' after your implementation
+    ### YOUR CODE HERE for part 1g
+    def __init__(self, word_embed_size, char_embed_size=50, k=5, padding=1):
+        """
+        Init CNN Layers.
+        @param word_embed_size (int): size of word embedding
+        @param char_embed_size (int) = 50: size of word embedding
+        @param k (int) = 5: kernel size for convolution
+        @padding (int) = 1: size of padding applied to x_reshaped bilaterally
+
+        """
+        super(CNN, self).__init__() # Initialize self._modules as OrderedDict
+
+        self.word_embed_size = word_embed_size
+        self.char_embed_size = char_embed_size
+        self.k = k
+        self.padding = padding
+
+        # Default Values
+        self.apply_conv = None
+        self.apply_maxpool = None
+
+        # Initialize Variables
+        """
+        torch.nn.Conv1d(in_channels,  # 
+                        out_channels, # f = number of output channels = word_embed_size
+                        kernel_size,  # k=5
+                        stride=1, 
+                        padding=0,    # padding=1
+                        dilation=1, 
+                        groups=1, 
+                        bias=True, 
+                        padding_mode='zeros'
+                        )
+        """
+        self.apply_conv = nn.Conv1d(in_channels = self.char_embed_size,
+                              out_channels = self.word_embed_size, 
+                              kernel_size=self.k, 
+                              padding=self.padding
+                              )
+
+    def forward(self, x):
+        """
+        @param x (tensor): x_reshaped in shape (b, char_embed_size, m_word),
+                      where b = batch size,
+                      char_embed_size = size of the character embedding, and
+                      m_word = length of longest word in the batch.
+        @return x_conv_out (tensor): tensor of shape (b, word_embed_size)
+        """
+        # x_conv shape (b, word_embed_size, m_word-k+1)
+        x_conv = self.apply_conv(x) #  (b, word_embed_size, m_word+2*padding-k+1)
+
+        m_word = x.shape[2]
+        """
+        torch.nn.MaxPool1d(kernel_size, 
+                           stride=None, 
+                           padding=0, 
+                           dilation=1, 
+                           return_indices=False, 
+                           ceil_mode=False
+                           )
+        """
+        apply_maxpool = nn.MaxPool1d(kernel_size = m_word + 2*self.padding - self.k + 1)
+        x_conv_out = apply_maxpool(F.relu(x_conv)).squeeze(2)
+
+
+        return x_conv_out
+
+    ### END YOUR CODE
+
diff --git a/Assignments/assignment5/Herais/collect_submission.sh b/Assignments/assignment5/Herais/collect_submission.sh
@@ -0,0 +1,2 @@
+rm -f assignment5.zip
+zip -r assignment5.zip *.py ./en_es_data ./sanity_check_en_es_data ./outputs
diff --git a/Assignments/assignment5/Herais/gpu_requirements.txt b/Assignments/assignment5/Herais/gpu_requirements.txt
@@ -0,0 +1,3 @@
+nltk
+docopt
+tqdm==4.29.1
diff --git a/Assignments/assignment5/Herais/highway.py b/Assignments/assignment5/Herais/highway.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+CS224N 2019-20: Homework 5
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class Highway(nn.Module):
+    """
+    Class that computes X_highway from X_conv_out
+    """
+    # Remember to delete the above 'pass' after your implementation
+    ### YOUR CODE HERE for part 1f
+
+    def __init__(self, word_embed_size):
+      """
+      Init Highway Layer.
+
+      @param word_embed_size (int): Embedding size (dimensionality) of word
+      
+      """
+      super(Highway, self).__init__() # Initialize self._modules as OrderedDict
+      self.word_embed_size = word_embed_size
+
+      # default values
+      self.w_proj = None
+      self.w_gate = None
+
+      # initialize variables
+      # torch.nn.Linear(in_features, out_features, bias=True)
+      self.w_proj = nn.Linear(word_embed_size, word_embed_size, bias=True) # W_project
+      self.w_gate = nn.Linear(word_embed_size, word_embed_size, bias=True) # W_gate
+
+
+    def forward(self, x):
+      """Maps x_conv_out to x_highway
+       # nn.Linear
+      @param x (tensor): x_conv_out tensor of shape (b, word_embed_size), 
+                         where b = batch size
+      @returns x_highway (tensor): tenosor of shape (b, word_embed_size)
+      """
+
+      x_proj = F.relu(self.w_proj(x)) # (b, word_embed_size)
+      x_gate = torch.sigmoid(self.w_gate(x)) # (b, word_embed_size)
+
+      # element wise multiplication: * or mul()
+      x_highway = x_gate * x_proj + (1 - x_gate) * x # (b, word_embed_size)
+
+      return x_highway
+
+    ### END YOUR CODE
diff --git a/Assignments/assignment5/Herais/hw5.ipynb b/Assignments/assignment5/Herais/hw5.ipynb
diff --git a/Assignments/assignment5/Herais/local_env.yml b/Assignments/assignment5/Herais/local_env.yml
@@ -0,0 +1,14 @@
+name: local_nmt
+channels:
+  - soumith
+  - defaults
+dependencies:
+  - pytorch=1.0.0
+  - python=3.5
+  - numpy
+  - scipy
+  - tqdm
+  - docopt
+  - pytorch
+  - nltk
+  - torchvision
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		rm -f assignment5.zip
		zip -r assignment5.zip *.py ./en_es_data ./sanity_check_en_es_data ./outputs