From b1df0f485dc341f5f4526218dd9f1885c4a4b3b1 Mon Sep 17 00:00:00 2001 From: Antonio Ritacco Date: Wed, 29 May 2019 19:40:33 +0200 Subject: [PATCH 01/27] Modifying to be complaiant with new torch version. Want to add denoising and sparsitivity --- README.md | 7 +++++++ generate.py | 15 ++++++++++----- helpers.py | 2 +- model.py | 1 + train.py | 19 ++++++++++++++++--- 5 files changed, 35 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 21a04b8..e142da6 100644 --- a/README.md +++ b/README.md @@ -59,3 +59,10 @@ Options: --cuda Use CUDA ``` +### TODO +``` +[] Add Dropout (p) +[] Add Validation set to monitor overfitting + +``` + diff --git a/generate.py b/generate.py index 0fdf414..21ab812 100755 --- a/generate.py +++ b/generate.py @@ -24,18 +24,23 @@ def generate(decoder, prime_str='A', predict_len=100, temperature=0.8, cuda=Fals inp = prime_input[:,-1] for p in range(predict_len): + + output, hidden = decoder(inp, hidden) - + # Sample from the network as a multinomial distribution output_dist = output.data.view(-1).div(temperature).exp() top_i = torch.multinomial(output_dist, 1)[0] # Add predicted character to string and use as next input predicted_char = all_characters[top_i] - predicted += predicted_char - inp = Variable(char_tensor(predicted_char).unsqueeze(0)) - if cuda: - inp = inp.cuda() + if(predicted_char=='\n'): + break + else: + predicted += predicted_char + inp = Variable(char_tensor(predicted_char).unsqueeze(0)) + if cuda: + inp = inp.cuda() return predicted diff --git a/helpers.py b/helpers.py index abbd56a..de7f221 100644 --- a/helpers.py +++ b/helpers.py @@ -13,7 +13,7 @@ n_characters = len(all_characters) def read_file(filename): - file = unidecode.unidecode(open(filename).read()) + file = unidecode.unidecode(open(filename, encoding="utf8").read()) return file, len(file) # Turning a string into a tensor diff --git a/model.py b/model.py index b619634..b2a1ae7 100644 --- a/model.py +++ b/model.py @@ -18,6 +18,7 @@ def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1 self.rnn = nn.GRU(hidden_size, hidden_size, n_layers) elif self.model == "lstm": self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers) + self.decoder = nn.Linear(hidden_size, output_size) def forward(self, input, hidden): diff --git a/train.py b/train.py index 4d47bba..dcd0125 100755 --- a/train.py +++ b/train.py @@ -38,8 +38,17 @@ def random_training_set(chunk_len, batch_size): target = torch.LongTensor(batch_size, chunk_len) for bi in range(batch_size): start_index = random.randint(0, file_len - chunk_len) + + while(file[start_index]!='\n'): # first word should be the actual start of a sentence. + start_index = start_index+1 + end_index = start_index + chunk_len + 1 + + if(end_index>file_len): # if we ended after the last char of the file, come back to get a correct chunk len + start_index = file_len-chunk_len-1 + chunk = file[start_index:end_index] + inp[bi] = char_tensor(chunk[:-1]) target[bi] = char_tensor(chunk[1:]) inp = Variable(inp) @@ -62,14 +71,18 @@ def train(inp, target): loss.backward() decoder_optimizer.step() - - return loss.data[0] / args.chunk_len + currentLoss = loss.item() / args.chunk_len + return currentLoss def save(): save_filename = os.path.splitext(os.path.basename(args.filename))[0] + '.pt' torch.save(decoder, save_filename) print('Saved as %s' % save_filename) + + + + # Initialize models and start training decoder = CharRNN( @@ -97,7 +110,7 @@ def save(): if epoch % args.print_every == 0: print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss)) - print(generate(decoder, 'Wh', 100, cuda=args.cuda), '\n') + print(generate(decoder, 'Renzi', 100, cuda=args.cuda), '\n') print("Saving...") save() From 7766b24b01564287ff8a6b7f5f0aade9891adcb1 Mon Sep 17 00:00:00 2001 From: zutotonno Date: Thu, 30 May 2019 10:38:18 +0200 Subject: [PATCH 02/27] gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c18dd8d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ From 4481f2bf8cdfc09e000528c72287f63da18b120c Mon Sep 17 00:00:00 2001 From: zutotonno Date: Thu, 30 May 2019 10:40:22 +0200 Subject: [PATCH 03/27] prova --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index dcd0125..ea89300 100755 --- a/train.py +++ b/train.py @@ -110,7 +110,7 @@ def save(): if epoch % args.print_every == 0: print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss)) - print(generate(decoder, 'Renzi', 100, cuda=args.cuda), '\n') + print(generate(decoder, 'Renzi', 200, cuda=args.cuda), '\n') print("Saving...") save() From 2cb40531d88f85f40a27423b881949158f234845 Mon Sep 17 00:00:00 2001 From: Antonio Ritacco Date: Thu, 30 May 2019 11:24:50 +0200 Subject: [PATCH 04/27] ignore pycharm files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c18dd8d..8ae8ce5 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ __pycache__/ +.idea/ From 61f03ee7b5c446b64e861527c83fc3960d328ead Mon Sep 17 00:00:00 2001 From: Antonio Ritacco Date: Thu, 30 May 2019 12:55:47 +0200 Subject: [PATCH 05/27] Epoch equals to train on entire training set Initial implementation one epoch was equal to one batch --- train.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/train.py b/train.py index ea89300..f4e9174 100755 --- a/train.py +++ b/train.py @@ -58,7 +58,30 @@ def random_training_set(chunk_len, batch_size): target = target.cuda() return inp, target -def train(inp, target): + +def getBatch(chunk_len, batch_size, chunk_start): + inp = torch.LongTensor(batch_size, chunk_len) + target = torch.LongTensor(batch_size, chunk_len) + end_index = chunk_start + for bi in range(batch_size): + start_index = end_index + end_index = start_index + chunk_len + 1 + + if (end_index > file_len): # if we ended after the last char of the file, come back to get a correct chunk len + start_index = file_len - chunk_len - 1 + + chunk = file[start_index:end_index] + + inp[bi] = char_tensor(chunk[:-1]) + target[bi] = char_tensor(chunk[1:]) + inp = Variable(inp) + target = Variable(target) + if args.cuda: + inp = inp.cuda() + target = target.cuda() + return inp, target, end_index + +def train(inp, target, end_index): hidden = decoder.init_hidden(args.batch_size) if args.cuda: hidden = hidden.cuda() @@ -66,13 +89,13 @@ def train(inp, target): loss = 0 for c in range(args.chunk_len): - output, hidden = decoder(inp[:,c], hidden) + output, hidden = decoder(inp[:, c], hidden) loss += criterion(output.view(args.batch_size, -1), target[:,c]) loss.backward() decoder_optimizer.step() currentLoss = loss.item() / args.chunk_len - return currentLoss + return currentLoss, end_index def save(): save_filename = os.path.splitext(os.path.basename(args.filename))[0] + '.pt' @@ -105,8 +128,11 @@ def save(): try: print("Training for %d epochs..." % args.n_epochs) for epoch in tqdm(range(1, args.n_epochs + 1)): - loss = train(*random_training_set(args.chunk_len, args.batch_size)) - loss_avg += loss + end_index = 0 + while(end_index < file_len) : + # loss = train(*random_training_set(args.chunk_len, args.batch_size)) + loss, end_index = train(*getBatch(args.chunk_len, args.batch_size, end_index)) + loss_avg += loss if epoch % args.print_every == 0: print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss)) From 3baa440b24e917c407e9e95b288ae5b3a7eda9e7 Mon Sep 17 00:00:00 2001 From: Antonio Ritacco Date: Thu, 30 May 2019 12:57:36 +0200 Subject: [PATCH 06/27] Ignoring .pt --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8ae8ce5..879eb50 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ __pycache__/ .idea/ +*.pt From 977398695b4a69731c99c5af5beef6081d2ad07a Mon Sep 17 00:00:00 2001 From: Antonio Ritacco Date: Thu, 30 May 2019 18:31:35 +0200 Subject: [PATCH 07/27] Code readability improved --- generate.py | 8 ++++++-- helpers.py | 4 ---- train.py | 9 +++++++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/generate.py b/generate.py index 21ab812..97397ea 100755 --- a/generate.py +++ b/generate.py @@ -4,10 +4,14 @@ import torch import os import argparse +import string from helpers import * from model import * +all_characters = string.printable +n_characters = len(all_characters) + def generate(decoder, prime_str='A', predict_len=100, temperature=0.8, cuda=False): hidden = decoder.init_hidden(1) prime_input = Variable(char_tensor(prime_str).unsqueeze(0)) @@ -19,9 +23,9 @@ def generate(decoder, prime_str='A', predict_len=100, temperature=0.8, cuda=Fals # Use priming string to "build up" hidden state for p in range(len(prime_str) - 1): - _, hidden = decoder(prime_input[:,p], hidden) + _, hidden = decoder(prime_input[:, p], hidden) - inp = prime_input[:,-1] + inp = prime_input[:, -1] for p in range(predict_len): diff --git a/helpers.py b/helpers.py index de7f221..3a2997c 100644 --- a/helpers.py +++ b/helpers.py @@ -1,7 +1,6 @@ # https://github.com/spro/char-rnn.pytorch import unidecode -import string import random import time import math @@ -9,9 +8,6 @@ # Reading and un-unicode-encoding data -all_characters = string.printable -n_characters = len(all_characters) - def read_file(filename): file = unidecode.unidecode(open(filename, encoding="utf8").read()) return file, len(file) diff --git a/train.py b/train.py index f4e9174..79f3a52 100755 --- a/train.py +++ b/train.py @@ -8,6 +8,7 @@ import os from tqdm import tqdm +import string from helpers import * from model import * @@ -39,8 +40,8 @@ def random_training_set(chunk_len, batch_size): for bi in range(batch_size): start_index = random.randint(0, file_len - chunk_len) - while(file[start_index]!='\n'): # first word should be the actual start of a sentence. - start_index = start_index+1 + # while(file[start_index]!='\n'): # first word should be the actual start of a sentence. + # start_index = start_index+1 end_index = start_index + chunk_len + 1 @@ -108,6 +109,9 @@ def save(): # Initialize models and start training +all_characters = string.printable +n_characters = len(all_characters) + decoder = CharRNN( n_characters, args.hidden_size, @@ -130,6 +134,7 @@ def save(): for epoch in tqdm(range(1, args.n_epochs + 1)): end_index = 0 while(end_index < file_len) : + # loss = train(*random_training_set(args.chunk_len, args.batch_size)) loss, end_index = train(*getBatch(args.chunk_len, args.batch_size, end_index)) loss_avg += loss From 802b298a9c54a133fa6fe2e723fdb3df68c1b32b Mon Sep 17 00:00:00 2001 From: zutotonno Date: Fri, 31 May 2019 18:24:22 +0200 Subject: [PATCH 08/27] Added params model name, minor refact --- helpers.py | 3 +++ model.py | 3 ++- train.py | 7 ++++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/helpers.py b/helpers.py index 3a2997c..280bc33 100644 --- a/helpers.py +++ b/helpers.py @@ -5,8 +5,11 @@ import time import math import torch +import string # Reading and un-unicode-encoding data +all_characters = string.printable +n_characters = len(all_characters) def read_file(filename): file = unidecode.unidecode(open(filename, encoding="utf8").read()) diff --git a/model.py b/model.py index b2a1ae7..d209631 100644 --- a/model.py +++ b/model.py @@ -18,13 +18,14 @@ def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1 self.rnn = nn.GRU(hidden_size, hidden_size, n_layers) elif self.model == "lstm": self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers) - + # self.drop_layer = nn.Dropout(p=0.3) self.decoder = nn.Linear(hidden_size, output_size) def forward(self, input, hidden): batch_size = input.size(0) encoded = self.encoder(input) output, hidden = self.rnn(encoded.view(1, batch_size, -1), hidden) + # outputDropped = self.drop_layer(output) output = self.decoder(output.view(batch_size, -1)) return output, hidden diff --git a/train.py b/train.py index 79f3a52..768feab 100755 --- a/train.py +++ b/train.py @@ -27,12 +27,14 @@ argparser.add_argument('--batch_size', type=int, default=100) argparser.add_argument('--shuffle', action='store_true') argparser.add_argument('--cuda', action='store_true') +argparser.add_argument('--modelname', type=str, default=None) args = argparser.parse_args() if args.cuda: print("Using CUDA") file, file_len = read_file(args.filename) +modelName = args.modelname def random_training_set(chunk_len, batch_size): inp = torch.LongTensor(batch_size, chunk_len) @@ -99,7 +101,10 @@ def train(inp, target, end_index): return currentLoss, end_index def save(): - save_filename = os.path.splitext(os.path.basename(args.filename))[0] + '.pt' + if modelName is not None: + save_filename = os.path.splitext(os.path.basename(args.filename))[0] +modelName+ '.pt' + else: + save_filename = os.path.splitext(os.path.basename(args.filename))[0] + '.pt' torch.save(decoder, save_filename) print('Saved as %s' % save_filename) From 7f36e6f18fbae995ee83ae802506e5d0b10a7810 Mon Sep 17 00:00:00 2001 From: zutotonno Date: Sat, 1 Jun 2019 09:46:59 +0200 Subject: [PATCH 09/27] Adding dropout --- model.py | 11 +++++------ train.py | 3 ++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/model.py b/model.py index d209631..776a04e 100644 --- a/model.py +++ b/model.py @@ -5,7 +5,7 @@ from torch.autograd import Variable class CharRNN(nn.Module): - def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1): + def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1, dropout = 0.3): super(CharRNN, self).__init__() self.model = model.lower() self.input_size = input_size @@ -18,14 +18,14 @@ def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1 self.rnn = nn.GRU(hidden_size, hidden_size, n_layers) elif self.model == "lstm": self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers) - # self.drop_layer = nn.Dropout(p=0.3) + self.drop_layer = nn.Dropout(p=dropout) self.decoder = nn.Linear(hidden_size, output_size) def forward(self, input, hidden): batch_size = input.size(0) encoded = self.encoder(input) output, hidden = self.rnn(encoded.view(1, batch_size, -1), hidden) - # outputDropped = self.drop_layer(output) + output = self.drop_layer(output) output = self.decoder(output.view(batch_size, -1)) return output, hidden @@ -37,7 +37,6 @@ def forward2(self, input, hidden): def init_hidden(self, batch_size): if self.model == "lstm": - return (Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)), - Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))) - return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)) + return Variable(torch.zeros(1, batch_size, self.hidden_size)) + return Variable(torch.zeros(1, batch_size, self.hidden_size)) diff --git a/train.py b/train.py index 768feab..f7c5506 100755 --- a/train.py +++ b/train.py @@ -22,6 +22,7 @@ argparser.add_argument('--print_every', type=int, default=100) argparser.add_argument('--hidden_size', type=int, default=100) argparser.add_argument('--n_layers', type=int, default=2) +argparser.add_argument('--dropout', type=float, default=0.3) argparser.add_argument('--learning_rate', type=float, default=0.01) argparser.add_argument('--chunk_len', type=int, default=200) argparser.add_argument('--batch_size', type=int, default=100) @@ -123,10 +124,10 @@ def save(): n_characters, model=args.model, n_layers=args.n_layers, + dropout=args.dropout ) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate) criterion = nn.CrossEntropyLoss() - if args.cuda: decoder.cuda() From cc1153de54015000b41f96c0234c011d025614e0 Mon Sep 17 00:00:00 2001 From: Antonio Ritacco Date: Sat, 1 Jun 2019 12:49:50 +0200 Subject: [PATCH 10/27] adding sequential batching --- model.py | 10 ++++------ train.py | 52 +++++++++++++++++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/model.py b/model.py index 776a04e..8fc027e 100644 --- a/model.py +++ b/model.py @@ -15,17 +15,15 @@ def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1 self.encoder = nn.Embedding(input_size, hidden_size) if self.model == "gru": - self.rnn = nn.GRU(hidden_size, hidden_size, n_layers) + self.rnn = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout) elif self.model == "lstm": - self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers) - self.drop_layer = nn.Dropout(p=dropout) + self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers, dropout=dropout) self.decoder = nn.Linear(hidden_size, output_size) def forward(self, input, hidden): batch_size = input.size(0) encoded = self.encoder(input) output, hidden = self.rnn(encoded.view(1, batch_size, -1), hidden) - output = self.drop_layer(output) output = self.decoder(output.view(batch_size, -1)) return output, hidden @@ -37,6 +35,6 @@ def forward2(self, input, hidden): def init_hidden(self, batch_size): if self.model == "lstm": - return Variable(torch.zeros(1, batch_size, self.hidden_size)) - return Variable(torch.zeros(1, batch_size, self.hidden_size)) + return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)) + return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)) diff --git a/train.py b/train.py index f7c5506..8abb3cc 100755 --- a/train.py +++ b/train.py @@ -26,7 +26,7 @@ argparser.add_argument('--learning_rate', type=float, default=0.01) argparser.add_argument('--chunk_len', type=int, default=200) argparser.add_argument('--batch_size', type=int, default=100) -argparser.add_argument('--shuffle', action='store_true') +argparser.add_argument('--batch_type', type=int, default=0) argparser.add_argument('--cuda', action='store_true') argparser.add_argument('--modelname', type=str, default=None) args = argparser.parse_args() @@ -63,17 +63,22 @@ def random_training_set(chunk_len, batch_size): return inp, target -def getBatch(chunk_len, batch_size, chunk_start): +def consequent_training_set(chunk_len, batch_size, num_batches): inp = torch.LongTensor(batch_size, chunk_len) target = torch.LongTensor(batch_size, chunk_len) - end_index = chunk_start + end_index = chunk_len*num_batches*batch_size + (batch_size*num_batches) + end_reached = False for bi in range(batch_size): start_index = end_index - end_index = start_index + chunk_len + 1 - if (end_index > file_len): # if we ended after the last char of the file, come back to get a correct chunk len + if (end_reached == True): + start_index = random.randint(0, file_len - chunk_len - 1) + + if (start_index + chunk_len + 1 > file_len): # if we ended after the last char of the file, come back to get a correct chunk len start_index = file_len - chunk_len - 1 + end_reached = True + end_index = start_index + chunk_len + 1 # Adding 1 to create target chunk = file[start_index:end_index] inp[bi] = char_tensor(chunk[:-1]) @@ -83,23 +88,25 @@ def getBatch(chunk_len, batch_size, chunk_start): if args.cuda: inp = inp.cuda() target = target.cuda() - return inp, target, end_index + return inp, target + +def train(inp, target): -def train(inp, target, end_index): + decoder.zero_grad() + loss = 0 hidden = decoder.init_hidden(args.batch_size) if args.cuda: hidden = hidden.cuda() - decoder.zero_grad() - loss = 0 - for c in range(args.chunk_len): output, hidden = decoder(inp[:, c], hidden) - loss += criterion(output.view(args.batch_size, -1), target[:,c]) + loss += criterion(output.view(args.batch_size, -1), target[:, c]) + + ### The losses are averaged across observations for each minibatch (see doc CrossEntropyLoss) loss.backward() decoder_optimizer.step() - currentLoss = loss.item() / args.chunk_len - return currentLoss, end_index + currentLoss = loss.item() + return currentLoss def save(): if modelName is not None: @@ -126,25 +133,32 @@ def save(): n_layers=args.n_layers, dropout=args.dropout ) + + decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate) criterion = nn.CrossEntropyLoss() if args.cuda: decoder.cuda() +batch_type = args.batch_type + start = time.time() all_losses = [] loss_avg = 0 try: print("Training for %d epochs..." % args.n_epochs) + numFileBatches = math.ceil(file_len/((args.batch_size*args.chunk_len)+args.batch_size)) for epoch in tqdm(range(1, args.n_epochs + 1)): - end_index = 0 - while(end_index < file_len) : - - # loss = train(*random_training_set(args.chunk_len, args.batch_size)) - loss, end_index = train(*getBatch(args.chunk_len, args.batch_size, end_index)) + # end_index = 0 + numBatches = 0 + while(numBatches < numFileBatches) : + if(batch_type == 0): ### Sampling batches at random + loss = train(*random_training_set(args.chunk_len, args.batch_size)) + elif(batch_type == 1): ### Get consequent batches of chars without replacement + loss = train(*consequent_training_set(args.chunk_len, args.batch_size, numBatches)) loss_avg += loss - + numBatches += 1 if epoch % args.print_every == 0: print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss)) print(generate(decoder, 'Renzi', 200, cuda=args.cuda), '\n') From 9a951fa53e920b5c34ea21f9782ab6a6d4aee98c Mon Sep 17 00:00:00 2001 From: zutotonno Date: Sun, 2 Jun 2019 12:39:55 +0200 Subject: [PATCH 11/27] 1) Fixed support to lstm 2) Now support validation during training 3) Save model, train validation errors, json with train params --- generate.py | 5 +++- model.py | 3 ++- train.py | 70 ++++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 59 insertions(+), 19 deletions(-) diff --git a/generate.py b/generate.py index 97397ea..0c0ddb5 100755 --- a/generate.py +++ b/generate.py @@ -17,7 +17,10 @@ def generate(decoder, prime_str='A', predict_len=100, temperature=0.8, cuda=Fals prime_input = Variable(char_tensor(prime_str).unsqueeze(0)) if cuda: - hidden = hidden.cuda() + if isinstance(hidden, tuple): + hidden = (hidden[0].cuda(), hidden[1].cuda()) + else: + hidden = hidden.cuda() prime_input = prime_input.cuda() predicted = prime_str diff --git a/model.py b/model.py index 8fc027e..3234f9a 100644 --- a/model.py +++ b/model.py @@ -35,6 +35,7 @@ def forward2(self, input, hidden): def init_hidden(self, batch_size): if self.model == "lstm": - return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)) + return (Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)), + Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))) return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)) diff --git a/train.py b/train.py index 8abb3cc..f3e25c2 100755 --- a/train.py +++ b/train.py @@ -6,7 +6,8 @@ from torch.autograd import Variable import argparse import os - +import json +import numpy as np from tqdm import tqdm import string @@ -16,7 +17,8 @@ # Parse command line arguments argparser = argparse.ArgumentParser() -argparser.add_argument('filename', type=str) +argparser.add_argument('--train', type=str) +argparser.add_argument('--valid', type=str) argparser.add_argument('--model', type=str, default="gru") argparser.add_argument('--n_epochs', type=int, default=2000) argparser.add_argument('--print_every', type=int, default=100) @@ -34,10 +36,12 @@ if args.cuda: print("Using CUDA") -file, file_len = read_file(args.filename) +fileTrain, file_lenTrain = read_file(args.train) +fileValid, file_lenValid = read_file(args.valid) modelName = args.modelname -def random_training_set(chunk_len, batch_size): + +def random_dataset(chunk_len, batch_size,file,file_len): inp = torch.LongTensor(batch_size, chunk_len) target = torch.LongTensor(batch_size, chunk_len) for bi in range(batch_size): @@ -72,14 +76,14 @@ def consequent_training_set(chunk_len, batch_size, num_batches): start_index = end_index if (end_reached == True): - start_index = random.randint(0, file_len - chunk_len - 1) + start_index = random.randint(0, file_lenTrain - chunk_len - 1) - if (start_index + chunk_len + 1 > file_len): # if we ended after the last char of the file, come back to get a correct chunk len - start_index = file_len - chunk_len - 1 + if (start_index + chunk_len + 1 > file_lenTrain): # if we ended after the last char of the file, come back to get a correct chunk len + start_index = file_lenTrain - chunk_len - 1 end_reached = True end_index = start_index + chunk_len + 1 # Adding 1 to create target - chunk = file[start_index:end_index] + chunk = fileTrain[start_index:end_index] inp[bi] = char_tensor(chunk[:-1]) target[bi] = char_tensor(chunk[1:]) @@ -96,7 +100,10 @@ def train(inp, target): loss = 0 hidden = decoder.init_hidden(args.batch_size) if args.cuda: - hidden = hidden.cuda() + if args.model == "gru": + hidden = hidden.cuda() + else: + hidden = (hidden[0].cuda(), hidden[1].cuda()) for c in range(args.chunk_len): output, hidden = decoder(inp[:, c], hidden) loss += criterion(output.view(args.batch_size, -1), target[:, c]) @@ -105,14 +112,36 @@ def train(inp, target): loss.backward() decoder_optimizer.step() - currentLoss = loss.item() + currentLoss = loss.item()/ args.chunk_len + return currentLoss + +def valid(inp,target): + decoder.zero_grad() + loss = 0 + hidden = decoder.init_hidden(args.batch_size) + if args.cuda: + if args.model == "gru": + hidden = hidden.cuda() + else: + hidden = (hidden[0].cuda(), hidden[1].cuda()) + for c in range(args.chunk_len): + output, hidden = decoder(inp[:, c], hidden) + loss += criterion(output.view(args.batch_size, -1), target[:, c]) + currentLoss = loss.item() / args.chunk_len return currentLoss def save(): + save_filename = 'Save/' if modelName is not None: - save_filename = os.path.splitext(os.path.basename(args.filename))[0] +modelName+ '.pt' + save_filename += os.path.splitext(os.path.basename(args.train))[0] +'_'+modelName+ '.pt' else: - save_filename = os.path.splitext(os.path.basename(args.filename))[0] + '.pt' + save_filename += os.path.splitext(os.path.basename(args.train))[0] + '.pt' + + jsonName = save_filename + '.json' + with open(jsonName, 'w') as json_file: + json.dump(vars(args), json_file) + saveLossesName = save_filename+'.csv' + np.savetxt(saveLossesName, np.column_stack((train_losses, valid_losses)), delimiter=",", fmt='%s', header='Train,Valid') torch.save(decoder, save_filename) print('Saved as %s' % save_filename) @@ -143,24 +172,31 @@ def save(): batch_type = args.batch_type start = time.time() -all_losses = [] +train_losses = [] +valid_losses = [] loss_avg = 0 - +valid_loss_avg = 0 try: print("Training for %d epochs..." % args.n_epochs) - numFileBatches = math.ceil(file_len/((args.batch_size*args.chunk_len)+args.batch_size)) + numFileBatches = math.ceil(file_lenTrain/((args.batch_size*args.chunk_len)+args.batch_size)) for epoch in tqdm(range(1, args.n_epochs + 1)): # end_index = 0 numBatches = 0 + while(numBatches < numFileBatches) : if(batch_type == 0): ### Sampling batches at random - loss = train(*random_training_set(args.chunk_len, args.batch_size)) + loss = train(*random_dataset(args.chunk_len, args.batch_size,fileTrain,file_lenTrain)) elif(batch_type == 1): ### Get consequent batches of chars without replacement loss = train(*consequent_training_set(args.chunk_len, args.batch_size, numBatches)) loss_avg += loss numBatches += 1 + loss_avg /= numFileBatches + valid_loss_avg = valid(*random_dataset(args.chunk_len, args.batch_size,fileValid,file_lenValid)) + + train_losses.append(loss_avg) + valid_losses.append(valid_loss_avg) if epoch % args.print_every == 0: - print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss)) + print('[%s (%d %d%%) Train: %.4f Valid: %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss_avg, valid_loss_avg)) print(generate(decoder, 'Renzi', 200, cuda=args.cuda), '\n') print("Saving...") From 9e169ab31f769e92db954dd042b145452319490c Mon Sep 17 00:00:00 2001 From: zutotonno Date: Sun, 2 Jun 2019 12:57:11 +0200 Subject: [PATCH 12/27] saving model every epoch --- .gitignore | 1 + train.py | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/.gitignore b/.gitignore index 879eb50..6d63ec3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__/ .idea/ *.pt +Save/ diff --git a/train.py b/train.py index f3e25c2..d8c2f6c 100755 --- a/train.py +++ b/train.py @@ -145,6 +145,17 @@ def save(): torch.save(decoder, save_filename) print('Saved as %s' % save_filename) +def savemodel(epoch): + save_filename = 'Save/' + directoryCheckpoint = 'Save/'+modelName + if not os.path.exists(directoryCheckpoint): + os.makedirs(directoryCheckpoint) + if modelName is not None: + directoryCheckpoint +='/'+ os.path.splitext(os.path.basename(args.train))[0] +'_'+modelName+ '_'+str(epoch) +'.pt' + else: + directoryCheckpoint +='/'+ os.path.splitext(os.path.basename(args.train))[0] + '_'+str(epoch)+'.pt' + + torch.save(decoder, directoryCheckpoint) @@ -198,6 +209,7 @@ def save(): if epoch % args.print_every == 0: print('[%s (%d %d%%) Train: %.4f Valid: %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss_avg, valid_loss_avg)) print(generate(decoder, 'Renzi', 200, cuda=args.cuda), '\n') + savemodel(epoch) print("Saving...") save() From c51aa38e2318dc780128f3b7dfdb529ddabb524b Mon Sep 17 00:00:00 2001 From: Antonio Ritacco Date: Sun, 2 Jun 2019 13:06:51 +0200 Subject: [PATCH 13/27] Update README.md --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e142da6..63bf805 100644 --- a/README.md +++ b/README.md @@ -59,10 +59,14 @@ Options: --cuda Use CUDA ``` + ### TODO ``` -[] Add Dropout (p) -[] Add Validation set to monitor overfitting +[] Early stopping (?) Maybe not in few days +[x] Add Dropout (p) +[x] Add Validation set to monitor overfitting +[x] Saving model at checkpoint +[x] Saving train and validation error, with training params to file ``` From 0a6128e0aa840f7b7fd8667ff511135a7a6b30c7 Mon Sep 17 00:00:00 2001 From: zutotonno Date: Mon, 3 Jun 2019 13:35:18 +0200 Subject: [PATCH 14/27] refact to Object Oriented style --- generate.py | 14 ++-- model.py | 33 +++++++- train.py | 234 ++++++++++++++++++++++------------------------------ 3 files changed, 139 insertions(+), 142 deletions(-) diff --git a/generate.py b/generate.py index 0c0ddb5..9c24df2 100755 --- a/generate.py +++ b/generate.py @@ -41,13 +41,13 @@ def generate(decoder, prime_str='A', predict_len=100, temperature=0.8, cuda=Fals # Add predicted character to string and use as next input predicted_char = all_characters[top_i] - if(predicted_char=='\n'): - break - else: - predicted += predicted_char - inp = Variable(char_tensor(predicted_char).unsqueeze(0)) - if cuda: - inp = inp.cuda() + # if(predicted_char=='\n'): + # break + # else: + predicted += predicted_char + inp = Variable(char_tensor(predicted_char).unsqueeze(0)) + if cuda: + inp = inp.cuda() return predicted diff --git a/model.py b/model.py index 3234f9a..b79eb42 100644 --- a/model.py +++ b/model.py @@ -5,13 +5,17 @@ from torch.autograd import Variable class CharRNN(nn.Module): - def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1, dropout = 0.3): + def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1, + dropout = 0.3, gpu = True, batch_size = 32, chunk_len = 30, learning_rate = 0.001): super(CharRNN, self).__init__() self.model = model.lower() self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers + self.gpu = gpu + self.batch_size = batch_size + self.chunk_len = chunk_len self.encoder = nn.Embedding(input_size, hidden_size) if self.model == "gru": @@ -19,6 +23,10 @@ def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1 elif self.model == "lstm": self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers, dropout=dropout) self.decoder = nn.Linear(hidden_size, output_size) + self.optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate) + self.criterion = nn.CrossEntropyLoss() + if self.gpu: + self.cuda() def forward(self, input, hidden): batch_size = input.size(0) @@ -39,3 +47,26 @@ def init_hidden(self, batch_size): Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))) return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)) + + def train(self,inp, target, validation): + self.zero_grad() + loss = 0 + hidden = self.init_hidden(self.batch_size) + if self.cuda: + if self.model == "gru": + hidden = hidden.cuda() + else: + hidden = (hidden[0].cuda(), hidden[1].cuda()) + for c in range(self.chunk_len): + output, hidden = self(inp[:, c], hidden) + loss += self.criterion(output.view(self.batch_size, -1), target[:, c]) + ### The losses are averaged across observations for each minibatch (see doc CrossEntropyLoss) + if not validation: + loss.backward() + self.optimizer.step() + currentLoss = loss.item()/ self.chunk_len + return currentLoss + + + + diff --git a/train.py b/train.py index d8c2f6c..af9032b 100755 --- a/train.py +++ b/train.py @@ -15,45 +15,19 @@ from model import * from generate import * -# Parse command line arguments -argparser = argparse.ArgumentParser() -argparser.add_argument('--train', type=str) -argparser.add_argument('--valid', type=str) -argparser.add_argument('--model', type=str, default="gru") -argparser.add_argument('--n_epochs', type=int, default=2000) -argparser.add_argument('--print_every', type=int, default=100) -argparser.add_argument('--hidden_size', type=int, default=100) -argparser.add_argument('--n_layers', type=int, default=2) -argparser.add_argument('--dropout', type=float, default=0.3) -argparser.add_argument('--learning_rate', type=float, default=0.01) -argparser.add_argument('--chunk_len', type=int, default=200) -argparser.add_argument('--batch_size', type=int, default=100) -argparser.add_argument('--batch_type', type=int, default=0) -argparser.add_argument('--cuda', action='store_true') -argparser.add_argument('--modelname', type=str, default=None) -args = argparser.parse_args() - -if args.cuda: - print("Using CUDA") - -fileTrain, file_lenTrain = read_file(args.train) -fileValid, file_lenValid = read_file(args.valid) -modelName = args.modelname - - -def random_dataset(chunk_len, batch_size,file,file_len): - inp = torch.LongTensor(batch_size, chunk_len) - target = torch.LongTensor(batch_size, chunk_len) - for bi in range(batch_size): - start_index = random.randint(0, file_len - chunk_len) +def random_dataset(args,file,file_len): + inp = torch.LongTensor(args.batch_size, args.chunk_len) + target = torch.LongTensor(args.batch_size, args.chunk_len) + for bi in range(args.batch_size): + start_index = random.randint(0, file_len - args.chunk_len) # while(file[start_index]!='\n'): # first word should be the actual start of a sentence. # start_index = start_index+1 - end_index = start_index + chunk_len + 1 + end_index = start_index + args.chunk_len + 1 if(end_index>file_len): # if we ended after the last char of the file, come back to get a correct chunk len - start_index = file_len-chunk_len-1 + start_index = file_len-args.chunk_len-1 chunk = file[start_index:end_index] @@ -67,22 +41,22 @@ def random_dataset(chunk_len, batch_size,file,file_len): return inp, target -def consequent_training_set(chunk_len, batch_size, num_batches): - inp = torch.LongTensor(batch_size, chunk_len) - target = torch.LongTensor(batch_size, chunk_len) - end_index = chunk_len*num_batches*batch_size + (batch_size*num_batches) +def consequent_training_set(args, num_batches, fileTrain, file_lenTrain): + inp = torch.LongTensor(args.batch_size, args.chunk_len) + target = torch.LongTensor(args.batch_size, args.chunk_len) + end_index = args.chunk_len*args.num_batches*args.batch_size + (args.batch_size*args.num_batches) end_reached = False - for bi in range(batch_size): + for bi in range(args.batch_size): start_index = end_index if (end_reached == True): - start_index = random.randint(0, file_lenTrain - chunk_len - 1) + start_index = random.randint(0, file_lenTrain - args.chunk_len - 1) if (start_index + chunk_len + 1 > file_lenTrain): # if we ended after the last char of the file, come back to get a correct chunk len - start_index = file_lenTrain - chunk_len - 1 + start_index = file_lenTrain - args.chunk_len - 1 end_reached = True - end_index = start_index + chunk_len + 1 # Adding 1 to create target + end_index = start_index + args.chunk_len + 1 # Adding 1 to create target chunk = fileTrain[start_index:end_index] inp[bi] = char_tensor(chunk[:-1]) @@ -94,43 +68,7 @@ def consequent_training_set(chunk_len, batch_size, num_batches): target = target.cuda() return inp, target -def train(inp, target): - - decoder.zero_grad() - loss = 0 - hidden = decoder.init_hidden(args.batch_size) - if args.cuda: - if args.model == "gru": - hidden = hidden.cuda() - else: - hidden = (hidden[0].cuda(), hidden[1].cuda()) - for c in range(args.chunk_len): - output, hidden = decoder(inp[:, c], hidden) - loss += criterion(output.view(args.batch_size, -1), target[:, c]) - - ### The losses are averaged across observations for each minibatch (see doc CrossEntropyLoss) - - loss.backward() - decoder_optimizer.step() - currentLoss = loss.item()/ args.chunk_len - return currentLoss - -def valid(inp,target): - decoder.zero_grad() - loss = 0 - hidden = decoder.init_hidden(args.batch_size) - if args.cuda: - if args.model == "gru": - hidden = hidden.cuda() - else: - hidden = (hidden[0].cuda(), hidden[1].cuda()) - for c in range(args.chunk_len): - output, hidden = decoder(inp[:, c], hidden) - loss += criterion(output.view(args.batch_size, -1), target[:, c]) - currentLoss = loss.item() / args.chunk_len - return currentLoss - -def save(): +def save(args): save_filename = 'Save/' if modelName is not None: save_filename += os.path.splitext(os.path.basename(args.train))[0] +'_'+modelName+ '.pt' @@ -145,7 +83,7 @@ def save(): torch.save(decoder, save_filename) print('Saved as %s' % save_filename) -def savemodel(epoch): +def savemodel(args, epoch): save_filename = 'Save/' directoryCheckpoint = 'Save/'+modelName if not os.path.exists(directoryCheckpoint): @@ -162,59 +100,87 @@ def savemodel(epoch): # Initialize models and start training -all_characters = string.printable -n_characters = len(all_characters) - -decoder = CharRNN( - n_characters, - args.hidden_size, - n_characters, - model=args.model, - n_layers=args.n_layers, - dropout=args.dropout -) - - -decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate) -criterion = nn.CrossEntropyLoss() -if args.cuda: - decoder.cuda() - -batch_type = args.batch_type - -start = time.time() -train_losses = [] -valid_losses = [] -loss_avg = 0 -valid_loss_avg = 0 -try: - print("Training for %d epochs..." % args.n_epochs) - numFileBatches = math.ceil(file_lenTrain/((args.batch_size*args.chunk_len)+args.batch_size)) - for epoch in tqdm(range(1, args.n_epochs + 1)): - # end_index = 0 - numBatches = 0 - - while(numBatches < numFileBatches) : - if(batch_type == 0): ### Sampling batches at random - loss = train(*random_dataset(args.chunk_len, args.batch_size,fileTrain,file_lenTrain)) - elif(batch_type == 1): ### Get consequent batches of chars without replacement - loss = train(*consequent_training_set(args.chunk_len, args.batch_size, numBatches)) - loss_avg += loss - numBatches += 1 - loss_avg /= numFileBatches - valid_loss_avg = valid(*random_dataset(args.chunk_len, args.batch_size,fileValid,file_lenValid)) - - train_losses.append(loss_avg) - valid_losses.append(valid_loss_avg) - if epoch % args.print_every == 0: - print('[%s (%d %d%%) Train: %.4f Valid: %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss_avg, valid_loss_avg)) - print(generate(decoder, 'Renzi', 200, cuda=args.cuda), '\n') - savemodel(epoch) - - print("Saving...") - save() - -except KeyboardInterrupt: - print("Saving before quit...") - save() +if __name__ == '__main__': + + # Parse command line arguments + argparser = argparse.ArgumentParser() + argparser.add_argument('--train', type=str) + argparser.add_argument('--valid', type=str) + argparser.add_argument('--model', type=str, default="gru") + argparser.add_argument('--n_epochs', type=int, default=2000) + argparser.add_argument('--print_every', type=int, default=100) + argparser.add_argument('--hidden_size', type=int, default=100) + argparser.add_argument('--n_layers', type=int, default=2) + argparser.add_argument('--dropout', type=float, default=0.3) + argparser.add_argument('--learning_rate', type=float, default=0.01) + argparser.add_argument('--chunk_len', type=int, default=200) + argparser.add_argument('--batch_size', type=int, default=100) + argparser.add_argument('--batch_type', type=int, default=0) + argparser.add_argument('--cuda', action='store_true') + argparser.add_argument('--modelname', type=str, default=None) + args = argparser.parse_args() + + if args.cuda: + print("Using CUDA") + + fileTrain, file_lenTrain = read_file(args.train) + fileValid, file_lenValid = read_file(args.valid) + modelName = args.modelname + + all_characters = string.printable + n_characters = len(all_characters) + + decoder = CharRNN( + n_characters, + args.hidden_size, + n_characters, + model=args.model, + n_layers=args.n_layers, + dropout=args.dropout, + learning_rate=args.learning_rate, + chunk_len= args.chunk_len, + batch_size=args.batch_size, + gpu = args.cuda + ) + + + + + batch_type = args.batch_type + + start = time.time() + train_losses = [] + valid_losses = [] + loss_avg = 0 + valid_loss_avg = 0 + try: + print("Training for %d epochs..." % args.n_epochs) + numFileBatches = math.ceil(file_lenTrain/((args.batch_size*args.chunk_len)+args.batch_size)) + for epoch in tqdm(range(1, args.n_epochs + 1)): + # end_index = 0 + numBatches = 0 + + while(numBatches < numFileBatches) : + if(batch_type == 0): ### Sampling batches at random + loss = decoder.train(*random_dataset(args,fileTrain,file_lenTrain),validation=False) + elif(batch_type == 1): ### Get consequent batches of chars without replacement + loss = decoder.train(*consequent_training_set(args, numBatches,fileTrain, file_lenTrain),validation=False) + loss_avg += loss + numBatches += 1 + loss_avg /= numFileBatches + valid_loss_avg = decoder.train(*random_dataset(args,fileValid,file_lenValid),validation=True) + + train_losses.append(loss_avg) + valid_losses.append(valid_loss_avg) + if epoch % args.print_every == 0: + print('[%s (%d %d%%) Train: %.4f Valid: %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss_avg, valid_loss_avg)) + print(generate(decoder, 'Renzi', 200, cuda=args.cuda), '\n') + savemodel(args, epoch) + + print("Saving...") + save(args) + + except KeyboardInterrupt: + print("Saving before quit...") + save(args) From 67728a6d6860b803ce56d89e13b94fb7f19f5ca8 Mon Sep 17 00:00:00 2001 From: zutotonno Date: Mon, 3 Jun 2019 15:13:08 +0200 Subject: [PATCH 15/27] update readme --- README.md | 28 ++++++++++++++++++---------- train.py | 13 ++++++++----- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 63bf805..986df44 100644 --- a/README.md +++ b/README.md @@ -9,28 +9,35 @@ Download [this Shakespeare dataset](https://raw.githubusercontent.com/karpathy/c Run `train.py` with the dataset filename to train and save the network: ``` -> python train.py shakespeare.txt +> python train.py --train shakespeare.txt Training for 2000 epochs... (... 10 minutes later ...) Saved as shakespeare.pt ``` After training the model will be saved as `[filename].pt`. +According to the --print_every arg model checkpoints will be saved in the `Save/` folder that should be in the same fold where the train.py script is called ### Training options ``` -Usage: train.py [filename] [options] +Usage: train.py [options] Options: ---model Whether to use LSTM or GRU units gru ---n_epochs Number of epochs to train 2000 ---print_every Log learning rate at this interval 100 ---hidden_size Hidden size of GRU 50 ---n_layers Number of GRU layers 2 ---learning_rate Learning rate 0.01 ---chunk_len Length of training chunks 200 ---batch_size Number of examples per batch 100 +--train Train data +--valid Validation data +--model Whether to use LSTM or GRU units gru +--n_epochs Number of epochs to train 10 +--print_every Log learning rate at this interval 100 +--hidden_size Hidden size of GRU 50 +--n_layers Number of GRU layers 2 +--learning_rate Learning rate 0.01 +--chunk_len Length of training chunks 200 +--batch_size Number of examples per batch 100 +--batch_type Batch random (0) or sequential (1) 0 +--drop_out drop-out rate between Recurrent layers 0 +--model_name model(session) name, used in checkpoints + --cuda Use CUDA ``` @@ -67,6 +74,7 @@ Options: [x] Add Validation set to monitor overfitting [x] Saving model at checkpoint [x] Saving train and validation error, with training params to file +[x] Refact to more OO paradigm ``` diff --git a/train.py b/train.py index af9032b..6bd9ee4 100755 --- a/train.py +++ b/train.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# https://github.com/spro/char-rnn.pytorch +# https://github.com/zutotonno/char-rnn.pytorch import torch import torch.nn as nn @@ -79,7 +79,10 @@ def save(args): with open(jsonName, 'w') as json_file: json.dump(vars(args), json_file) saveLossesName = save_filename+'.csv' - np.savetxt(saveLossesName, np.column_stack((train_losses, valid_losses)), delimiter=",", fmt='%s', header='Train,Valid') + if(args.valid is not none): + np.savetxt(saveLossesName, np.column_stack((train_losses, valid_losses)), delimiter=",", fmt='%s', header='Train,Valid') + else: + np.savetxt(saveLossesName, train_losses, delimiter=",", fmt='%s', header='Train') torch.save(decoder, save_filename) print('Saved as %s' % save_filename) @@ -168,10 +171,10 @@ def savemodel(args, epoch): loss_avg += loss numBatches += 1 loss_avg /= numFileBatches - valid_loss_avg = decoder.train(*random_dataset(args,fileValid,file_lenValid),validation=True) - + if args.valid is not None: + valid_loss_avg = decoder.train(*random_dataset(args,fileValid,file_lenValid),validation=True) + valid_losses.append(valid_loss_avg) train_losses.append(loss_avg) - valid_losses.append(valid_loss_avg) if epoch % args.print_every == 0: print('[%s (%d %d%%) Train: %.4f Valid: %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss_avg, valid_loss_avg)) print(generate(decoder, 'Renzi', 200, cuda=args.cuda), '\n') From df46f11b6dbd99697672c33b28e9f4db8612754b Mon Sep 17 00:00:00 2001 From: zutotonno Date: Mon, 3 Jun 2019 15:18:53 +0200 Subject: [PATCH 16/27] fix flies --- train.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/train.py b/train.py index 6bd9ee4..3db6f52 100755 --- a/train.py +++ b/train.py @@ -44,7 +44,7 @@ def random_dataset(args,file,file_len): def consequent_training_set(args, num_batches, fileTrain, file_lenTrain): inp = torch.LongTensor(args.batch_size, args.chunk_len) target = torch.LongTensor(args.batch_size, args.chunk_len) - end_index = args.chunk_len*args.num_batches*args.batch_size + (args.batch_size*args.num_batches) + end_index = args.chunk_len*num_batches*args.batch_size + (args.batch_size*num_batches) end_reached = False for bi in range(args.batch_size): start_index = end_index @@ -52,7 +52,7 @@ def consequent_training_set(args, num_batches, fileTrain, file_lenTrain): if (end_reached == True): start_index = random.randint(0, file_lenTrain - args.chunk_len - 1) - if (start_index + chunk_len + 1 > file_lenTrain): # if we ended after the last char of the file, come back to get a correct chunk len + if (start_index + args.chunk_len + 1 > file_lenTrain): # if we ended after the last char of the file, come back to get a correct chunk len start_index = file_lenTrain - args.chunk_len - 1 end_reached = True @@ -162,7 +162,6 @@ def savemodel(args, epoch): for epoch in tqdm(range(1, args.n_epochs + 1)): # end_index = 0 numBatches = 0 - while(numBatches < numFileBatches) : if(batch_type == 0): ### Sampling batches at random loss = decoder.train(*random_dataset(args,fileTrain,file_lenTrain),validation=False) From 827af08edd9db4b403606f0a7da035d10241be8e Mon Sep 17 00:00:00 2001 From: zutotonno Date: Mon, 3 Jun 2019 15:47:43 +0200 Subject: [PATCH 17/27] fix flies --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 3db6f52..5f63c1c 100755 --- a/train.py +++ b/train.py @@ -79,7 +79,7 @@ def save(args): with open(jsonName, 'w') as json_file: json.dump(vars(args), json_file) saveLossesName = save_filename+'.csv' - if(args.valid is not none): + if(args.valid is not None): np.savetxt(saveLossesName, np.column_stack((train_losses, valid_losses)), delimiter=",", fmt='%s', header='Train,Valid') else: np.savetxt(saveLossesName, train_losses, delimiter=",", fmt='%s', header='Train') From 73da77903df4fee1126bf00179062ba9003ef06e Mon Sep 17 00:00:00 2001 From: zutotonno Date: Mon, 3 Jun 2019 17:40:52 +0200 Subject: [PATCH 18/27] fix some flies around --- train.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index 5f63c1c..462b662 100755 --- a/train.py +++ b/train.py @@ -127,7 +127,12 @@ def savemodel(args, epoch): print("Using CUDA") fileTrain, file_lenTrain = read_file(args.train) - fileValid, file_lenValid = read_file(args.valid) + try: + fileValid, file_lenValid = read_file(args.valid) + except: + print('No validation data supplied') + if(args.modelname is None): + print('No model name supplied -> Model checkpoint disabled') modelName = args.modelname all_characters = string.printable @@ -177,7 +182,8 @@ def savemodel(args, epoch): if epoch % args.print_every == 0: print('[%s (%d %d%%) Train: %.4f Valid: %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss_avg, valid_loss_avg)) print(generate(decoder, 'Renzi', 200, cuda=args.cuda), '\n') - savemodel(args, epoch) + if(args.modelname is not None): + savemodel(args, epoch) print("Saving...") save(args) From 15643de7fc9bc419a1981394846d0557d1e31375 Mon Sep 17 00:00:00 2001 From: zutotonno Date: Tue, 4 Jun 2019 09:17:11 +0200 Subject: [PATCH 19/27] optimizer rmsprop and flies fixing --- generate.py | 2 +- model.py | 10 +++++++--- train.py | 4 +++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/generate.py b/generate.py index 9c24df2..4731141 100755 --- a/generate.py +++ b/generate.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# https://github.com/spro/char-rnn.pytorch +# https://github.com/zutotonno/char-rnn.pytorch import torch import os diff --git a/model.py b/model.py index b79eb42..51ff5ad 100644 --- a/model.py +++ b/model.py @@ -1,4 +1,4 @@ -# https://github.com/spro/char-rnn.pytorch +# https://github.com/zutotonno/char-rnn.pytorch import torch import torch.nn as nn @@ -6,7 +6,7 @@ class CharRNN(nn.Module): def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1, - dropout = 0.3, gpu = True, batch_size = 32, chunk_len = 30, learning_rate = 0.001): + dropout = 0.3, gpu = True, batch_size = 32, chunk_len = 30, learning_rate = 0.001, optimizer = "adam"): super(CharRNN, self).__init__() self.model = model.lower() self.input_size = input_size @@ -16,6 +16,7 @@ def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1 self.gpu = gpu self.batch_size = batch_size self.chunk_len = chunk_len + self.optimizer = optimizer self.encoder = nn.Embedding(input_size, hidden_size) if self.model == "gru": @@ -23,7 +24,10 @@ def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1 elif self.model == "lstm": self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers, dropout=dropout) self.decoder = nn.Linear(hidden_size, output_size) - self.optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate) + if self.optimizer == "adam": + self.optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate) + elif self.optimizer == "rms": + self.optimizer = torch.optim.RMSprop(self.parameters(), lr=learning_rate) self.criterion = nn.CrossEntropyLoss() if self.gpu: self.cuda() diff --git a/train.py b/train.py index 462b662..da45960 100755 --- a/train.py +++ b/train.py @@ -121,6 +121,7 @@ def savemodel(args, epoch): argparser.add_argument('--batch_type', type=int, default=0) argparser.add_argument('--cuda', action='store_true') argparser.add_argument('--modelname', type=str, default=None) + argparser.add_argument('--optimizer', type=str, default="adam") args = argparser.parse_args() if args.cuda: @@ -148,7 +149,8 @@ def savemodel(args, epoch): learning_rate=args.learning_rate, chunk_len= args.chunk_len, batch_size=args.batch_size, - gpu = args.cuda + gpu = args.cuda, + optimizer = args.optimizer ) From f309e99beb091208b9a4f897c31de899bd93fc7b Mon Sep 17 00:00:00 2001 From: zutotonno Date: Tue, 4 Jun 2019 12:07:01 +0200 Subject: [PATCH 20/27] added grid_search script --- model.py | 2 +- search_params.py | 216 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 217 insertions(+), 1 deletion(-) create mode 100755 search_params.py diff --git a/model.py b/model.py index 51ff5ad..f6f4e85 100644 --- a/model.py +++ b/model.py @@ -6,7 +6,7 @@ class CharRNN(nn.Module): def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1, - dropout = 0.3, gpu = True, batch_size = 32, chunk_len = 30, learning_rate = 0.001, optimizer = "adam"): + dropout = 0, gpu = True, batch_size = 32, chunk_len = 30, learning_rate = 0.001, optimizer = "adam"): super(CharRNN, self).__init__() self.model = model.lower() self.input_size = input_size diff --git a/search_params.py b/search_params.py new file mode 100755 index 0000000..1766e22 --- /dev/null +++ b/search_params.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python +# https://github.com/zutotonno/char-rnn.pytorch + +import torch +import torch.nn as nn +from torch.autograd import Variable +import argparse +import os +import json +import numpy as np +from tqdm import tqdm +import string +import itertools + +from helpers import * +from model import * +from generate import * + +def random_dataset(program_args,args,file,file_len): + inp = torch.LongTensor(args['batch_size'], args['chunk_len']) + target = torch.LongTensor(args['batch_size'], args['chunk_len']) + for bi in range(args['batch_size']): + start_index = random.randint(0, file_len -args['chunk_len']) + + # while(file[start_index]!='\n'): # first word should be the actual start of a sentence. + # start_index = start_index+1 + + end_index = start_index + args['chunk_len'] + 1 + + if(end_index>file_len): # if we ended after the last char of the file, come back to get a correct chunk len + start_index = file_len-args['chunk_len']-1 + + chunk = file[start_index:end_index] + + inp[bi] = char_tensor(chunk[:-1]) + target[bi] = char_tensor(chunk[1:]) + inp = Variable(inp) + target = Variable(target) + if program_args.cuda: + inp = inp.cuda() + target = target.cuda() + return inp, target + + +def consequent_training_set(program_args,args, num_batches, fileTrain, file_lenTrain): + inp = torch.LongTensor(args['batch_size'], args['chunk_len']) + target = torch.LongTensor(args['batch_size'], args['chunk_len']) + end_index = args['chunk_len']*num_batches*args['batch_size'] + (args['batch_size']*num_batches) + end_reached = False + for bi in range(args['batch_size']): + start_index = end_index + + if (end_reached == True): + start_index = random.randint(0, file_lenTrain - args.chunk_len - 1) + + if (start_index + args['chunk_len'] + 1 > file_lenTrain): # if we ended after the last char of the file, come back to get a correct chunk len + start_index = file_lenTrain - args['chunk_len'] - 1 + end_reached = True + + end_index = start_index + args['chunk_len'] + 1 # Adding 1 to create target + chunk = fileTrain[start_index:end_index] + + inp[bi] = char_tensor(chunk[:-1]) + target[bi] = char_tensor(chunk[1:]) + inp = Variable(inp) + target = Variable(target) + if program_args.cuda: + inp = inp.cuda() + target = target.cuda() + return inp, target + + +def save(modelName,params,train_losses,valid_losses): + save_filename = 'Save/' + save_filename += modelName + + jsonName = save_filename + '.json' + with open(jsonName, 'w') as json_file: + json.dump(vars(params), json_file) + saveLossesName = save_filename+'.csv' + if(valid_losses is not None): + np.savetxt(saveLossesName, np.column_stack((train_losses, valid_losses)), delimiter=",", fmt='%s', header='Train,Valid') + else: + np.savetxt(saveLossesName, train_losses, delimiter=",", fmt='%s', header='Train') + print('Saved as %s' % save_filename) + + +# Initialize models and start training + +if __name__ == '__main__': + + # Parse command line arguments + argparser = argparse.ArgumentParser() + argparser.add_argument('--train', type=str) + argparser.add_argument('--valid', type=str) + + argparser.add_argument('--hidden_size_init', type=int, default=50) + argparser.add_argument('--hidden_size_end', type=int, default=300) + argparser.add_argument('--hidden_size_step', type=int, default=50) + + argparser.add_argument('--n_layers_init', type=int, default=1) + argparser.add_argument('--n_layers_end', type=int, default=4) + argparser.add_argument('--n_layers_step', type=int, default=1) + + + argparser.add_argument('--chunk_len_init', type=int, default=20) + argparser.add_argument('--chunk_len_end', type=int, default=90) + argparser.add_argument('--chunk_len_step', type=int, default=10) + + + argparser.add_argument('--cuda', action='store_true') + argparser.add_argument('--optimizer', type=str, default="adam") + argparser.add_argument('--print_every', type=int, default=10) + args = argparser.parse_args() + + if args.cuda: + print("Using CUDA") + + fileTrain, file_lenTrain = read_file(args.train) + try: + fileValid, file_lenValid = read_file(args.valid) + except: + print('No validation data supplied') + + all_characters = string.printable + n_characters = len(all_characters) + + params_list = [] + + ##0 + n_epochs_list = [30] + params_list.append(n_epochs_list) + ##1 + n_hidden_list = list(range(args.hidden_size_init,args.hidden_size_end,args.hidden_size_step)) + params_list.append(n_hidden_list) + ##2 + n_layers_list = list(range(args.n_layers_init,args.n_layers_end,args.n_layers_step)) + params_list.append(n_layers_list) + + # n_dropout_list = [0,0.3] + # params_list.append(n_dropout_list) + + ##3 + n_chunk_len_list = list(range(args.chunk_len_init,args.chunk_len_end,args.chunk_len_step)) + params_list.append(n_chunk_len_list) + ##4 + n_batch_size_list = [512,1024] + params_list.append(n_batch_size_list) + ##5 + n_learning_rate_list = [0.001,0.01] + params_list.append(n_learning_rate_list) + ##6 + batch_type = [0,1] + params_list.append(batch_type) + ##7 + model_type = ['lstm'] + params_list.append(model_type) + + param_combinations = list(itertools.product(*params_list)) + + currentCombination = 1 + for params in param_combinations : + param_dict = dict() + param_dict['model'] = params[-1] + param_dict['hidden_size'] = params[1] + param_dict['n_layers'] = params[2] + param_dict['learning_rate'] = params[5] + param_dict['chunk_len'] = params[3] + param_dict['batch_size'] = params[4] + + decoder = CharRNN( + input_size =n_characters, + output_size = n_characters, + **param_dict + ) + + + param_dict['batch_type'] = params[6] + param_dict['epochs'] = params[0] + train_losses = [] + valid_losses = [] + loss_avg = 0 + valid_loss_avg = 0 + start = time.time() + + try: + print("Training for %d epochs..." % param_dict['epochs']) + print(param_dict) + numFileBatches = math.ceil(file_lenTrain/((param_dict['batch_size']*param_dict['chunk_len'])+param_dict['batch_size'])) + for epoch in tqdm(range(1, param_dict['epochs'] + 1)): + # end_index = 0 + numBatches = 0 + while(numBatches < numFileBatches) : + if(param_dict['batch_type'] == 0): ### Sampling batches at random + loss = decoder.train(*random_dataset(args,param_dict,fileTrain,file_lenTrain),validation=False) + elif(batch_type == 1): ### Get consequent batches of chars without replacement + loss = decoder.train(*consequent_training_set(args,param_dict, numBatches,fileTrain, file_lenTrain),validation=False) + loss_avg += loss + numBatches += 1 + loss_avg /= numFileBatches + if args.valid is not None: + valid_loss_avg = decoder.train(*random_dataset(args,param_dict,fileValid,file_lenValid),validation=True) + valid_losses.append(valid_loss_avg) + train_losses.append(loss_avg) + if epoch % args.print_every == 0: + print('[%s (%d %d%%) Train: %.4f Valid: %.4f]' % (time_since(start), epoch, epoch / param_dict['epochs'] * 100, loss_avg, valid_loss_avg)) + print(generate(decoder, 'Renzi', 200, cuda=args.cuda), '\n') + + print("Saving...") + modelName = str(currentCombination) + save(modelName,params,train_losses,valid_losses) + currentCombination += 1 + except KeyboardInterrupt: + print("Saving before quit...") + save(args) + From 71502ab799fd1fb145980f559a03def920a53b6b Mon Sep 17 00:00:00 2001 From: zutotonno Date: Wed, 5 Jun 2019 00:18:35 +0200 Subject: [PATCH 21/27] attempt to implement early stopping. Nvidia drivers broken, so fuck now on cpu. DIOCANE --- search_params.py | 2 +- train.py | 43 +++++++++++++++++++++++++++++++------------ 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/search_params.py b/search_params.py index 1766e22..78b27e8 100755 --- a/search_params.py +++ b/search_params.py @@ -76,7 +76,7 @@ def save(modelName,params,train_losses,valid_losses): jsonName = save_filename + '.json' with open(jsonName, 'w') as json_file: - json.dump(vars(params), json_file) + json.dump(params, json_file) saveLossesName = save_filename+'.csv' if(valid_losses is not None): np.savetxt(saveLossesName, np.column_stack((train_losses, valid_losses)), delimiter=",", fmt='%s', header='Train,Valid') diff --git a/train.py b/train.py index da45960..d88d5b9 100755 --- a/train.py +++ b/train.py @@ -41,7 +41,7 @@ def random_dataset(args,file,file_len): return inp, target -def consequent_training_set(args, num_batches, fileTrain, file_lenTrain): +def consequent_dataset(args, num_batches, file, file_len): inp = torch.LongTensor(args.batch_size, args.chunk_len) target = torch.LongTensor(args.batch_size, args.chunk_len) end_index = args.chunk_len*num_batches*args.batch_size + (args.batch_size*num_batches) @@ -50,14 +50,14 @@ def consequent_training_set(args, num_batches, fileTrain, file_lenTrain): start_index = end_index if (end_reached == True): - start_index = random.randint(0, file_lenTrain - args.chunk_len - 1) + start_index = random.randint(0, file_len - args.chunk_len - 1) - if (start_index + args.chunk_len + 1 > file_lenTrain): # if we ended after the last char of the file, come back to get a correct chunk len - start_index = file_lenTrain - args.chunk_len - 1 + if (start_index + args.chunk_len + 1 > file_len): # if we ended after the last char of the file, come back to get a correct chunk len + start_index = file_len - args.chunk_len - 1 end_reached = True end_index = start_index + args.chunk_len + 1 # Adding 1 to create target - chunk = fileTrain[start_index:end_index] + chunk = file[start_index:end_index] inp[bi] = char_tensor(chunk[:-1]) target[bi] = char_tensor(chunk[1:]) @@ -119,17 +119,18 @@ def savemodel(args, epoch): argparser.add_argument('--chunk_len', type=int, default=200) argparser.add_argument('--batch_size', type=int, default=100) argparser.add_argument('--batch_type', type=int, default=0) + argparser.add_argument('--early_stopping', type=int, default=10) + argparser.add_argument('--optimizer', type=str, default="adam") argparser.add_argument('--cuda', action='store_true') argparser.add_argument('--modelname', type=str, default=None) - argparser.add_argument('--optimizer', type=str, default="adam") args = argparser.parse_args() - if args.cuda: print("Using CUDA") fileTrain, file_lenTrain = read_file(args.train) try: fileValid, file_lenValid = read_file(args.valid) + early_stopping_patience = args.early_stopping except: print('No validation data supplied') if(args.modelname is None): @@ -163,29 +164,47 @@ def savemodel(args, epoch): valid_losses = [] loss_avg = 0 valid_loss_avg = 0 + valid_loss_best = np.inf + patience = 1 try: print("Training for %d epochs..." % args.n_epochs) numFileBatches = math.ceil(file_lenTrain/((args.batch_size*args.chunk_len)+args.batch_size)) + numValidBatches = math.ceil(file_lenValid/((args.batch_size*args.chunk_len)+args.batch_size)) + for epoch in tqdm(range(1, args.n_epochs + 1)): # end_index = 0 numBatches = 0 + numBatchesValid = 0 while(numBatches < numFileBatches) : if(batch_type == 0): ### Sampling batches at random loss = decoder.train(*random_dataset(args,fileTrain,file_lenTrain),validation=False) elif(batch_type == 1): ### Get consequent batches of chars without replacement - loss = decoder.train(*consequent_training_set(args, numBatches,fileTrain, file_lenTrain),validation=False) + loss = decoder.train(*consequent_dataset(args, numBatches,fileTrain, file_lenTrain),validation=False) loss_avg += loss numBatches += 1 loss_avg /= numFileBatches + train_losses.append(loss_avg) if args.valid is not None: - valid_loss_avg = decoder.train(*random_dataset(args,fileValid,file_lenValid),validation=True) + while(numBatchesValid < numValidBatches) : + valid_loss_avg = decoder.train(*consequent_dataset(args,numBatchesValid,fileValid,file_lenValid),validation=True) + numBatchesValid += 1 + valid_loss_avg /= numBatchesValid valid_losses.append(valid_loss_avg) - train_losses.append(loss_avg) + if valid_loss_avg < valid_loss_best: + if(args.modelname is not None): + print("New best checkpoint: %.4f, old: %.4f" % (valid_loss_avg,valid_loss_best)) + savemodel(args, epoch) + valid_loss_best = valid_loss_avg + args.early_stopping = valid_loss_best + patience = 1 + else: + patience += 1 + if(patience >= early_stopping_patience): + break + if epoch % args.print_every == 0: print('[%s (%d %d%%) Train: %.4f Valid: %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss_avg, valid_loss_avg)) print(generate(decoder, 'Renzi', 200, cuda=args.cuda), '\n') - if(args.modelname is not None): - savemodel(args, epoch) print("Saving...") save(args) From 0fa1f96dafb6ce221f133e77f734dd30060ec713 Mon Sep 17 00:00:00 2001 From: Antonio Ritacco Date: Wed, 5 Jun 2019 00:46:16 +0200 Subject: [PATCH 22/27] minor changes --- train.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/train.py b/train.py index d88d5b9..62547c2 100755 --- a/train.py +++ b/train.py @@ -86,15 +86,15 @@ def save(args): torch.save(decoder, save_filename) print('Saved as %s' % save_filename) -def savemodel(args, epoch): +def savemodel(args): save_filename = 'Save/' directoryCheckpoint = 'Save/'+modelName if not os.path.exists(directoryCheckpoint): os.makedirs(directoryCheckpoint) if modelName is not None: - directoryCheckpoint +='/'+ os.path.splitext(os.path.basename(args.train))[0] +'_'+modelName+ '_'+str(epoch) +'.pt' + directoryCheckpoint +='/'+ os.path.splitext(os.path.basename(args.train))[0] +'_'+modelName+ '_Checkpoint' +'.pt' else: - directoryCheckpoint +='/'+ os.path.splitext(os.path.basename(args.train))[0] + '_'+str(epoch)+'.pt' + directoryCheckpoint +='/'+ os.path.splitext(os.path.basename(args.train))[0] + '_Checkpoint'+'.pt' torch.save(decoder, directoryCheckpoint) @@ -163,7 +163,6 @@ def savemodel(args, epoch): train_losses = [] valid_losses = [] loss_avg = 0 - valid_loss_avg = 0 valid_loss_best = np.inf patience = 1 try: @@ -185,15 +184,16 @@ def savemodel(args, epoch): loss_avg /= numFileBatches train_losses.append(loss_avg) if args.valid is not None: + valid_loss_avg = 0 while(numBatchesValid < numValidBatches) : - valid_loss_avg = decoder.train(*consequent_dataset(args,numBatchesValid,fileValid,file_lenValid),validation=True) + valid_loss_avg += decoder.train(*consequent_dataset(args,numBatchesValid,fileValid,file_lenValid),validation=True) numBatchesValid += 1 - valid_loss_avg /= numBatchesValid + valid_loss_avg /= numValidBatches valid_losses.append(valid_loss_avg) if valid_loss_avg < valid_loss_best: if(args.modelname is not None): print("New best checkpoint: %.4f, old: %.4f" % (valid_loss_avg,valid_loss_best)) - savemodel(args, epoch) + savemodel(args) valid_loss_best = valid_loss_avg args.early_stopping = valid_loss_best patience = 1 From 58692388d64bb78ee2239f219acff119831f43f4 Mon Sep 17 00:00:00 2001 From: zutotonno Date: Wed, 5 Jun 2019 02:38:54 +0200 Subject: [PATCH 23/27] fix minor flies --- search_params.py | 67 ++++++++++++++++++++++++++++++++++++------------ train.py | 4 +-- 2 files changed, 53 insertions(+), 18 deletions(-) diff --git a/search_params.py b/search_params.py index 78b27e8..d54694a 100755 --- a/search_params.py +++ b/search_params.py @@ -42,7 +42,7 @@ def random_dataset(program_args,args,file,file_len): return inp, target -def consequent_training_set(program_args,args, num_batches, fileTrain, file_lenTrain): +def consequent_dataset(program_args,args, num_batches, file, file_len): inp = torch.LongTensor(args['batch_size'], args['chunk_len']) target = torch.LongTensor(args['batch_size'], args['chunk_len']) end_index = args['chunk_len']*num_batches*args['batch_size'] + (args['batch_size']*num_batches) @@ -51,14 +51,14 @@ def consequent_training_set(program_args,args, num_batches, fileTrain, file_lenT start_index = end_index if (end_reached == True): - start_index = random.randint(0, file_lenTrain - args.chunk_len - 1) + start_index = random.randint(0, file_len - args['chunk_len'] - 1) - if (start_index + args['chunk_len'] + 1 > file_lenTrain): # if we ended after the last char of the file, come back to get a correct chunk len - start_index = file_lenTrain - args['chunk_len'] - 1 + if (start_index + args['chunk_len'] + 1 > file_len): # if we ended after the last char of the file, come back to get a correct chunk len + start_index = file_len - args['chunk_len'] - 1 end_reached = True end_index = start_index + args['chunk_len'] + 1 # Adding 1 to create target - chunk = fileTrain[start_index:end_index] + chunk = file[start_index:end_index] inp[bi] = char_tensor(chunk[:-1]) target[bi] = char_tensor(chunk[1:]) @@ -70,6 +70,19 @@ def consequent_training_set(program_args,args, num_batches, fileTrain, file_lenT return inp, target +def savemodel(modelName,args): + save_filename = 'Save/' + directoryCheckpoint = 'Save/'+modelName + if not os.path.exists(directoryCheckpoint): + os.makedirs(directoryCheckpoint) + if modelName is not None: + directoryCheckpoint +='/'+ os.path.splitext(os.path.basename(args.train))[0] +'_'+modelName+ '_Checkpoint' +'.pt' + else: + directoryCheckpoint +='/'+ os.path.splitext(os.path.basename(args.train))[0] + '_Checkpoint'+'.pt' + + torch.save(decoder, directoryCheckpoint) + + def save(modelName,params,train_losses,valid_losses): save_filename = 'Save/' save_filename += modelName @@ -95,8 +108,8 @@ def save(modelName,params,train_losses,valid_losses): argparser.add_argument('--valid', type=str) argparser.add_argument('--hidden_size_init', type=int, default=50) - argparser.add_argument('--hidden_size_end', type=int, default=300) - argparser.add_argument('--hidden_size_step', type=int, default=50) + argparser.add_argument('--hidden_size_end', type=int, default=800) + argparser.add_argument('--hidden_size_step', type=int, default=200) argparser.add_argument('--n_layers_init', type=int, default=1) argparser.add_argument('--n_layers_end', type=int, default=4) @@ -107,6 +120,7 @@ def save(modelName,params,train_losses,valid_losses): argparser.add_argument('--chunk_len_end', type=int, default=90) argparser.add_argument('--chunk_len_step', type=int, default=10) + argparser.add_argument('--early_stopping', type=int, default=10) argparser.add_argument('--cuda', action='store_true') argparser.add_argument('--optimizer', type=str, default="adam") @@ -119,6 +133,7 @@ def save(modelName,params,train_losses,valid_losses): fileTrain, file_lenTrain = read_file(args.train) try: fileValid, file_lenValid = read_file(args.valid) + early_stopping_patience = args.early_stopping except: print('No validation data supplied') @@ -128,7 +143,7 @@ def save(modelName,params,train_losses,valid_losses): params_list = [] ##0 - n_epochs_list = [30] + n_epochs_list = [500] params_list.append(n_epochs_list) ##1 n_hidden_list = list(range(args.hidden_size_init,args.hidden_size_end,args.hidden_size_step)) @@ -144,13 +159,13 @@ def save(modelName,params,train_losses,valid_losses): n_chunk_len_list = list(range(args.chunk_len_init,args.chunk_len_end,args.chunk_len_step)) params_list.append(n_chunk_len_list) ##4 - n_batch_size_list = [512,1024] + n_batch_size_list = [1024,2048] params_list.append(n_batch_size_list) ##5 n_learning_rate_list = [0.001,0.01] params_list.append(n_learning_rate_list) ##6 - batch_type = [0,1] + batch_type = [0] params_list.append(batch_type) ##7 model_type = ['lstm'] @@ -169,7 +184,7 @@ def save(modelName,params,train_losses,valid_losses): param_dict['batch_size'] = params[4] decoder = CharRNN( - input_size =n_characters, + input_size = n_characters, output_size = n_characters, **param_dict ) @@ -182,32 +197,52 @@ def save(modelName,params,train_losses,valid_losses): loss_avg = 0 valid_loss_avg = 0 start = time.time() + valid_loss_best = np.inf + patience = 1 try: print("Training for %d epochs..." % param_dict['epochs']) + modelName = str(currentCombination) print(param_dict) numFileBatches = math.ceil(file_lenTrain/((param_dict['batch_size']*param_dict['chunk_len'])+param_dict['batch_size'])) + numValidBatches = math.ceil(file_lenValid/((param_dict['batch_size']*param_dict['chunk_len'])+param_dict['batch_size'])) for epoch in tqdm(range(1, param_dict['epochs'] + 1)): # end_index = 0 numBatches = 0 + numBatchesValid = 0 + loss_avg = 0 while(numBatches < numFileBatches) : if(param_dict['batch_type'] == 0): ### Sampling batches at random loss = decoder.train(*random_dataset(args,param_dict,fileTrain,file_lenTrain),validation=False) - elif(batch_type == 1): ### Get consequent batches of chars without replacement - loss = decoder.train(*consequent_training_set(args,param_dict, numBatches,fileTrain, file_lenTrain),validation=False) + elif(param_dict['batch_type'] == 1): ### Get consequent batches of chars without replacement + loss = decoder.train(*consequent_dataset(args, param_dict, numBatches,fileTrain, file_lenTrain),validation=False) loss_avg += loss numBatches += 1 loss_avg /= numFileBatches + train_losses.append(loss_avg) if args.valid is not None: - valid_loss_avg = decoder.train(*random_dataset(args,param_dict,fileValid,file_lenValid),validation=True) + valid_loss_avg = 0 + while(numBatchesValid < numValidBatches) : + valid_loss_avg += decoder.train(*consequent_dataset(args,param_dict,numBatchesValid,fileValid,file_lenValid),validation=True) + numBatchesValid += 1 + valid_loss_avg /= numValidBatches valid_losses.append(valid_loss_avg) - train_losses.append(loss_avg) + if valid_loss_avg < valid_loss_best: + print("New best checkpoint: %.4f, old: %.4f" % (valid_loss_avg,valid_loss_best)) + savemodel(modelName, args) + valid_loss_best = valid_loss_avg + args.early_stopping = epoch + patience = 1 + else: + patience += 1 + if(patience >= early_stopping_patience): + break if epoch % args.print_every == 0: print('[%s (%d %d%%) Train: %.4f Valid: %.4f]' % (time_since(start), epoch, epoch / param_dict['epochs'] * 100, loss_avg, valid_loss_avg)) print(generate(decoder, 'Renzi', 200, cuda=args.cuda), '\n') print("Saving...") - modelName = str(currentCombination) + params['early_stopping'] = args.early_stopping save(modelName,params,train_losses,valid_losses) currentCombination += 1 except KeyboardInterrupt: diff --git a/train.py b/train.py index 62547c2..a2989d9 100755 --- a/train.py +++ b/train.py @@ -162,7 +162,6 @@ def savemodel(args): start = time.time() train_losses = [] valid_losses = [] - loss_avg = 0 valid_loss_best = np.inf patience = 1 try: @@ -174,6 +173,7 @@ def savemodel(args): # end_index = 0 numBatches = 0 numBatchesValid = 0 + loss_avg = 0 while(numBatches < numFileBatches) : if(batch_type == 0): ### Sampling batches at random loss = decoder.train(*random_dataset(args,fileTrain,file_lenTrain),validation=False) @@ -195,7 +195,7 @@ def savemodel(args): print("New best checkpoint: %.4f, old: %.4f" % (valid_loss_avg,valid_loss_best)) savemodel(args) valid_loss_best = valid_loss_avg - args.early_stopping = valid_loss_best + args.early_stopping = epoch patience = 1 else: patience += 1 From 49eafb6f241670a7b93bb489493f98e6a3af467c Mon Sep 17 00:00:00 2001 From: zutotonno Date: Wed, 5 Jun 2019 12:28:41 +0200 Subject: [PATCH 24/27] last fixes --- helpers.py | 1 + search_params.py | 6 +++--- train.py | 10 +++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/helpers.py b/helpers.py index 280bc33..6169ac5 100644 --- a/helpers.py +++ b/helpers.py @@ -9,6 +9,7 @@ # Reading and un-unicode-encoding data all_characters = string.printable + n_characters = len(all_characters) def read_file(filename): diff --git a/search_params.py b/search_params.py index d54694a..9f93806 100755 --- a/search_params.py +++ b/search_params.py @@ -242,10 +242,10 @@ def save(modelName,params,train_losses,valid_losses): print(generate(decoder, 'Renzi', 200, cuda=args.cuda), '\n') print("Saving...") - params['early_stopping'] = args.early_stopping - save(modelName,params,train_losses,valid_losses) + param_dict['early_stopping'] = args.early_stopping + save(modelName,param_dict,train_losses,valid_losses) currentCombination += 1 except KeyboardInterrupt: print("Saving before quit...") - save(args) + save(param_dict) diff --git a/train.py b/train.py index a2989d9..b77f44a 100755 --- a/train.py +++ b/train.py @@ -128,8 +128,11 @@ def savemodel(args): print("Using CUDA") fileTrain, file_lenTrain = read_file(args.train) + + numFileBatches = math.ceil(file_lenTrain/((args.batch_size*args.chunk_len)+args.batch_size)) try: fileValid, file_lenValid = read_file(args.valid) + numValidBatches = math.ceil(file_lenValid/((args.batch_size*args.chunk_len)+args.batch_size)) early_stopping_patience = args.early_stopping except: print('No validation data supplied') @@ -166,8 +169,6 @@ def savemodel(args): patience = 1 try: print("Training for %d epochs..." % args.n_epochs) - numFileBatches = math.ceil(file_lenTrain/((args.batch_size*args.chunk_len)+args.batch_size)) - numValidBatches = math.ceil(file_lenValid/((args.batch_size*args.chunk_len)+args.batch_size)) for epoch in tqdm(range(1, args.n_epochs + 1)): # end_index = 0 @@ -203,7 +204,10 @@ def savemodel(args): break if epoch % args.print_every == 0: - print('[%s (%d %d%%) Train: %.4f Valid: %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss_avg, valid_loss_avg)) + if args.valid is not None: + print('[%s (%d %d%%) Train: %.4f Valid: %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss_avg, valid_loss_avg)) + else: + print('[%s (%d %d%%) Train: %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss_avg)) print(generate(decoder, 'Renzi', 200, cuda=args.cuda), '\n') print("Saving...") From d8261dbaf7b3d5be2f49c1ac12e9d12cb800a05e Mon Sep 17 00:00:00 2001 From: Antonio Ritacco Date: Wed, 5 Jun 2019 13:08:08 +0200 Subject: [PATCH 25/27] Update search_params.py --- search_params.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/search_params.py b/search_params.py index 9f93806..029d49d 100755 --- a/search_params.py +++ b/search_params.py @@ -151,7 +151,7 @@ def save(modelName,params,train_losses,valid_losses): ##2 n_layers_list = list(range(args.n_layers_init,args.n_layers_end,args.n_layers_step)) params_list.append(n_layers_list) - + ### TODO: # n_dropout_list = [0,0.3] # params_list.append(n_dropout_list) @@ -159,16 +159,16 @@ def save(modelName,params,train_losses,valid_losses): n_chunk_len_list = list(range(args.chunk_len_init,args.chunk_len_end,args.chunk_len_step)) params_list.append(n_chunk_len_list) ##4 - n_batch_size_list = [1024,2048] + n_batch_size_list = [32,1024] params_list.append(n_batch_size_list) ##5 n_learning_rate_list = [0.001,0.01] params_list.append(n_learning_rate_list) ##6 - batch_type = [0] + batch_type = [0,1] params_list.append(batch_type) ##7 - model_type = ['lstm'] + model_type = ['lstm','gru'] params_list.append(model_type) param_combinations = list(itertools.product(*params_list)) From bc046110355eb17f469fd80008d70d5cf491bd29 Mon Sep 17 00:00:00 2001 From: Antonio Ritacco Date: Wed, 5 Jun 2019 13:09:27 +0200 Subject: [PATCH 26/27] Update README.md --- README.md | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 986df44..3fe5471 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ Options: --batch_size Number of examples per batch 100 --batch_type Batch random (0) or sequential (1) 0 --drop_out drop-out rate between Recurrent layers 0 +--early_stopping Number of validation step with no impr. 10 --model_name model(session) name, used in checkpoints --cuda Use CUDA @@ -67,9 +68,39 @@ Options: ``` -### TODO +### Grid search +``` +Usage : search_params.py --train [options] + +Hard-coded params : + -learning_rate : [0.001,0.01] + -max_epochs : [500] + -n_batch_size : [32,1024] (should be changed according to available memory) + -batch_type : [0,1] (random vs consequent sampling) + -model_type : [lstm, gru] + +Options: +--train training file +--valid validation file +--hidden_size_init 50 +--hidden_size_end 800 +--hidden_size_step 200 +--n_layer_init 1 +--n_layer_end 4 +--n_layer_step 1 +--chunk_len_init 20 +--chunk_len_end 90 +--chunk_len_step 10 +--early_stopping 10 +--optimizer adam +--cuda ``` -[] Early stopping (?) Maybe not in few days + +### TODO +[x] Grid search + -[] Adding dropout to grid search (config. with less than 1 layer doesn't need dropout!!) + -[] Adapt batch_size to available memory +[x] Early stopping [x] Add Dropout (p) [x] Add Validation set to monitor overfitting [x] Saving model at checkpoint From dbef9101f95a928490213707389afd26b01286b9 Mon Sep 17 00:00:00 2001 From: Antonio Ritacco Date: Wed, 5 Jun 2019 13:12:58 +0200 Subject: [PATCH 27/27] Update README.md --- README.md | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 3fe5471..e5182bc 100644 --- a/README.md +++ b/README.md @@ -97,15 +97,13 @@ Options: ``` ### TODO -[x] Grid search - -[] Adding dropout to grid search (config. with less than 1 layer doesn't need dropout!!) - -[] Adapt batch_size to available memory -[x] Early stopping -[x] Add Dropout (p) -[x] Add Validation set to monitor overfitting -[x] Saving model at checkpoint -[x] Saving train and validation error, with training params to file -[x] Refact to more OO paradigm -``` +* [] Grid search (Need improv.) +## DONE +* [x] Early stopping +* [x] Add Dropout (p) +* [x] Add Validation set to monitor overfitting +* [x] Saving model at checkpoint +* [x] Saving train and validation error, with training params to file +* [x] Refact to more OO paradigm