Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More modular approach and some novelties in params and training #16

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
__pycache__/
.idea/
*.pt
Save/
68 changes: 58 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,36 @@ Download [this Shakespeare dataset](https://raw.githubusercontent.com/karpathy/c
Run `train.py` with the dataset filename to train and save the network:

```
> python train.py shakespeare.txt
> python train.py --train shakespeare.txt

Training for 2000 epochs...
(... 10 minutes later ...)
Saved as shakespeare.pt
```
After training the model will be saved as `[filename].pt`.
According to the --print_every arg model checkpoints will be saved in the `Save/` folder that should be in the same fold where the train.py script is called

### Training options

```
Usage: train.py [filename] [options]
Usage: train.py [options]

Options:
--model Whether to use LSTM or GRU units gru
--n_epochs Number of epochs to train 2000
--print_every Log learning rate at this interval 100
--hidden_size Hidden size of GRU 50
--n_layers Number of GRU layers 2
--learning_rate Learning rate 0.01
--chunk_len Length of training chunks 200
--batch_size Number of examples per batch 100
--train Train data
--valid Validation data
--model Whether to use LSTM or GRU units gru
--n_epochs Number of epochs to train 10
--print_every Log learning rate at this interval 100
--hidden_size Hidden size of GRU 50
--n_layers Number of GRU layers 2
--learning_rate Learning rate 0.01
--chunk_len Length of training chunks 200
--batch_size Number of examples per batch 100
--batch_type Batch random (0) or sequential (1) 0
--drop_out drop-out rate between Recurrent layers 0
--early_stopping Number of validation step with no impr. 10
--model_name model(session) name, used in checkpoints

--cuda Use CUDA
```

Expand Down Expand Up @@ -59,3 +67,43 @@ Options:
--cuda Use CUDA
```


### Grid search
```
Usage : search_params.py --train [options]

Hard-coded params :
-learning_rate : [0.001,0.01]
-max_epochs : [500]
-n_batch_size : [32,1024] (should be changed according to available memory)
-batch_type : [0,1] (random vs consequent sampling)
-model_type : [lstm, gru]

Options:
--train training file
--valid validation file
--hidden_size_init 50
--hidden_size_end 800
--hidden_size_step 200
--n_layer_init 1
--n_layer_end 4
--n_layer_step 1
--chunk_len_init 20
--chunk_len_end 90
--chunk_len_step 10
--early_stopping 10
--optimizer adam
--cuda
```

### TODO

* [] Grid search (Need improv.)

## DONE
* [x] Early stopping
* [x] Add Dropout (p)
* [x] Add Validation set to monitor overfitting
* [x] Saving model at checkpoint
* [x] Saving train and validation error, with training params to file
* [x] Refact to more OO paradigm
22 changes: 17 additions & 5 deletions generate.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,49 @@
#!/usr/bin/env python
# https://github.com/spro/char-rnn.pytorch
# https://github.com/zutotonno/char-rnn.pytorch

import torch
import os
import argparse
import string

from helpers import *
from model import *

all_characters = string.printable
n_characters = len(all_characters)

def generate(decoder, prime_str='A', predict_len=100, temperature=0.8, cuda=False):
hidden = decoder.init_hidden(1)
prime_input = Variable(char_tensor(prime_str).unsqueeze(0))

if cuda:
hidden = hidden.cuda()
if isinstance(hidden, tuple):
hidden = (hidden[0].cuda(), hidden[1].cuda())
else:
hidden = hidden.cuda()
prime_input = prime_input.cuda()
predicted = prime_str

# Use priming string to "build up" hidden state
for p in range(len(prime_str) - 1):
_, hidden = decoder(prime_input[:,p], hidden)
_, hidden = decoder(prime_input[:, p], hidden)

inp = prime_input[:,-1]
inp = prime_input[:, -1]

for p in range(predict_len):


output, hidden = decoder(inp, hidden)

# Sample from the network as a multinomial distribution
output_dist = output.data.view(-1).div(temperature).exp()
top_i = torch.multinomial(output_dist, 1)[0]

# Add predicted character to string and use as next input
predicted_char = all_characters[top_i]
# if(predicted_char=='\n'):
# break
# else:
predicted += predicted_char
inp = Variable(char_tensor(predicted_char).unsqueeze(0))
if cuda:
Expand Down
6 changes: 3 additions & 3 deletions helpers.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
# https://github.com/spro/char-rnn.pytorch

import unidecode
import string
import random
import time
import math
import torch
import string

# Reading and un-unicode-encoding data

all_characters = string.printable

n_characters = len(all_characters)

def read_file(filename):
file = unidecode.unidecode(open(filename).read())
file = unidecode.unidecode(open(filename, encoding="utf8").read())
return file, len(file)

# Turning a string into a tensor
Expand Down
45 changes: 40 additions & 5 deletions model.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,36 @@
# https://github.com/spro/char-rnn.pytorch
# https://github.com/zutotonno/char-rnn.pytorch

import torch
import torch.nn as nn
from torch.autograd import Variable

class CharRNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1):
def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1,
dropout = 0, gpu = True, batch_size = 32, chunk_len = 30, learning_rate = 0.001, optimizer = "adam"):
super(CharRNN, self).__init__()
self.model = model.lower()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.n_layers = n_layers
self.gpu = gpu
self.batch_size = batch_size
self.chunk_len = chunk_len
self.optimizer = optimizer

self.encoder = nn.Embedding(input_size, hidden_size)
if self.model == "gru":
self.rnn = nn.GRU(hidden_size, hidden_size, n_layers)
self.rnn = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout)
elif self.model == "lstm":
self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers)
self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers, dropout=dropout)
self.decoder = nn.Linear(hidden_size, output_size)
if self.optimizer == "adam":
self.optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
elif self.optimizer == "rms":
self.optimizer = torch.optim.RMSprop(self.parameters(), lr=learning_rate)
self.criterion = nn.CrossEntropyLoss()
if self.gpu:
self.cuda()

def forward(self, input, hidden):
batch_size = input.size(0)
Expand All @@ -35,7 +47,30 @@ def forward2(self, input, hidden):

def init_hidden(self, batch_size):
if self.model == "lstm":
return (Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)),
return (Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)),
Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)))
return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))


def train(self,inp, target, validation):
self.zero_grad()
loss = 0
hidden = self.init_hidden(self.batch_size)
if self.cuda:
if self.model == "gru":
hidden = hidden.cuda()
else:
hidden = (hidden[0].cuda(), hidden[1].cuda())
for c in range(self.chunk_len):
output, hidden = self(inp[:, c], hidden)
loss += self.criterion(output.view(self.batch_size, -1), target[:, c])
### The losses are averaged across observations for each minibatch (see doc CrossEntropyLoss)
if not validation:
loss.backward()
self.optimizer.step()
currentLoss = loss.item()/ self.chunk_len
return currentLoss




Loading