From b2b8af22eff5960f787f9863d44591f4fd0c6a45 Mon Sep 17 00:00:00 2001 From: phildue Date: Sat, 16 Jun 2018 14:37:24 +0200 Subject: [PATCH] added function to create model based on hyperparams prepared experiments for architecture --- experiments/architecture.py | 177 ++++++++++++++++++++++++++++++++++++ training/models.py | 50 +++++++++- training/train.py | 39 ++++---- 3 files changed, 247 insertions(+), 19 deletions(-) create mode 100644 experiments/architecture.py diff --git a/experiments/architecture.py b/experiments/architecture.py new file mode 100644 index 0000000..b118eb6 --- /dev/null +++ b/experiments/architecture.py @@ -0,0 +1,177 @@ +from training.models import create_model +from training.train import train, MASK_VALUE + +batch_size = 8 +n_stations = 41 +radius = 1000 +t_train_h = 7 * 24 +t_pred_d = 3 +t_pred_resolution_h = 1 +filenames_train = ['2015', '2014', '2013', '2012', '2011', '2010', '2009', '2008'] +filenames_valid = ['2016'] +features_train = ['air_temperature'] +features_predict = ['air_temperature'] + +""" +Experiment I: Preprocessing Layers +""" +n_dense_pres = [1, 2, 4, 8] +n_node = 1.0 +act = 'relu' +n_memory = 1 +n_dense_pos = 3 +memory_unit = 'lstm' +for n_dense_pre in n_dense_pres: + model = create_model(batch_size=batch_size, + t_train=t_train_h, + t_pred=int(t_pred_d * 24 / t_pred_resolution_h), + n_features_train=len(features_train), + n_stations=n_stations, + memory_unit=memory_unit, + width=n_dense_pre, + n_layers_memory=n_memory, + n_layers_preprocessing=n_dense_pre, + n_layers_postprocessing=n_dense_pos, + n_features_pred=len(features_predict), + activation=act, + mask_value=MASK_VALUE) + + # '{layer_pre}x{n_nodes}*{act}->[{memory}]{n_lstm}->{layer_pos}{n_nodes}*{act}' + log_dir = 'out/{}-{}-{}-{}{}-{}-{}-{}'.format(n_dense_pre, int(n_node * 10), act, memory_unit, n_memory, + n_dense_pos, int(n_stations / n_dense_pre), act) + + train(radius=radius, + batch_size=batch_size, + log_dir=log_dir, + t_train_h=t_train_h, + t_pred_d=t_pred_d, + t_pred_resolution_h=t_pred_resolution_h, + model_name=model, + filenames_train=filenames_train, + filenames_valid=filenames_valid, + features_train=features_train, + features_predict=features_predict, + ) + +""" +Experiment II: Postprocessing Layers +""" +n_dense_poss = [1, 2, 4, 8] +n_node = 1.0 +act = 'relu' +n_memory = 1 +n_dense_pre = 3 +memory_unit = 'lstm' +for n_dense_pos in n_dense_poss: + model = create_model(batch_size=batch_size, + t_train=t_train_h, + t_pred=int(t_pred_d * 24 / t_pred_resolution_h), + n_features_train=len(features_train), + n_stations=n_stations, + memory_unit=memory_unit, + width=n_dense_pre, + n_layers_memory=n_memory, + n_layers_preprocessing=n_dense_pre, + n_layers_postprocessing=n_dense_pos, + n_features_pred=len(features_predict), + activation=act, + mask_value=MASK_VALUE) + + # '{layer_pre}x{n_nodes}*{act}->[{memory}]{n_lstm}->{layer_pos}{n_nodes}*{act}' + log_dir = 'out/{}-{}-{}-{}{}-{}-{}-{}'.format(n_dense_pre, int(n_node * 10), act, memory_unit, n_memory, + n_dense_pos, int(n_stations / n_dense_pre), act) + + train(radius=radius, + batch_size=batch_size, + log_dir=log_dir, + t_train_h=t_train_h, + t_pred_d=t_pred_d, + t_pred_resolution_h=t_pred_resolution_h, + model_name=model, + filenames_train=filenames_train, + filenames_valid=filenames_valid, + features_train=features_train, + features_predict=features_predict, + ) + +""" +Experiment III: Nodes per layer +""" +n_nodes = [0.2, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0] +act = 'relu' +n_memory = 1 +n_dense_pre = 3 +n_dense_pos = 3 +memory_unit = 'lstm' +for n_node in n_nodes: + model = create_model(batch_size=batch_size, + t_train=t_train_h, + t_pred=int(t_pred_d * 24 / t_pred_resolution_h), + n_features_train=len(features_train), + n_stations=n_stations, + memory_unit=memory_unit, + width=n_dense_pre, + n_layers_memory=n_memory, + n_layers_preprocessing=n_dense_pre, + n_layers_postprocessing=n_dense_pos, + n_features_pred=len(features_predict), + activation=act, + mask_value=MASK_VALUE) + + # '{layer_pre}x{n_nodes}*{act}->[{memory}]{n_lstm}->{layer_pos}{n_nodes}*{act}' + log_dir = 'out/{}-{}-{}-{}{}-{}-{}-{}'.format(n_dense_pre, int(n_node * 10), act, memory_unit, n_memory, + n_dense_pos, int(n_stations / n_dense_pre), act) + + train(radius=radius, + batch_size=batch_size, + log_dir=log_dir, + t_train_h=t_train_h, + t_pred_d=t_pred_d, + t_pred_resolution_h=t_pred_resolution_h, + model_name=model, + filenames_train=filenames_train, + filenames_valid=filenames_valid, + features_train=features_train, + features_predict=features_predict, + ) + +""" +Experiment IV: Memory Depth +""" +n_lstms = [1, 2, 4, 8] +n_node = 1.0 +act = 'relu' +n_dense_pre = 3 +n_dense_pos = 3 +memory_unit = 'lstm' +for n_memory in n_lstms: + model = create_model(batch_size=batch_size, + t_train=t_train_h, + t_pred=int(t_pred_d * 24 / t_pred_resolution_h), + n_features_train=len(features_train), + n_stations=n_stations, + memory_unit=memory_unit, + width=n_dense_pre, + n_layers_memory=n_memory, + n_layers_preprocessing=n_dense_pre, + n_layers_postprocessing=n_dense_pos, + n_features_pred=len(features_predict), + activation=act, + mask_value=MASK_VALUE) + + # '{layer_pre}x{n_nodes}*{act}->[{memory}]{n_lstm}->{layer_pos}{n_nodes}*{act}' + log_dir = 'out/{}-{}-{}-{}{}-{}-{}-{}'.format(n_dense_pre, int(n_node * 10), act, memory_unit, n_memory, + n_dense_pos, int(n_stations / n_dense_pre), act) + + train(radius=radius, + batch_size=batch_size, + log_dir=log_dir, + t_train_h=t_train_h, + t_pred_d=t_pred_d, + t_pred_resolution_h=t_pred_resolution_h, + model_name=model, + filenames_train=filenames_train, + filenames_valid=filenames_valid, + features_train=features_train, + features_predict=features_predict, + ) diff --git a/training/models.py b/training/models.py index dd8b732..c260f77 100644 --- a/training/models.py +++ b/training/models.py @@ -1,6 +1,6 @@ from keras import Input, Model from keras.layers import Dense, LSTM, Conv1D, MaxPooling1D, np, GRU, Dropout, regularizers, Lambda, BatchNormalization, \ - Activation, Masking + Activation, Masking, LeakyReLU def meijer_net(batch_size=8, n_features=1, n_stations=21, seq_len_train=7 * 24, @@ -345,6 +345,54 @@ def m2m_lstm_norm(batch_size=8, n_features=1, n_stations=21, seq_len_train=7 * 2 return Model(input, out) +def create_preprocessing(netin, n_stations, activation, depth, width): + conn = netin + for i in range(1, depth + 1): + conn = Dense(units=int(max(n_stations / i * width, 0)))(conn) + if activation == 'leaky_relu': + conn = LeakyReLU()(conn) + else: + conn = Activation(activation)(conn) + return conn + + +def create_postprocessing(netin, activation, depth, width): + conn = netin + for i in range(depth): + conn = Dense(int(width))(conn) + if activation == 'leaky_relu': + conn = LeakyReLU()(conn) + else: + conn = Activation(activation)(conn) + return conn + + +def create_memory(netin, memory, depth, width): + conn = netin + for i in range(depth): + conn = LSTM(width, return_sequences=True)(conn) if memory == 'lstm' else GRU(width, + return_sequences=True)( + conn) + return conn + + +def create_model(batch_size, t_train, n_features_train, n_stations, width, n_layers_preprocessing, activation, + n_layers_memory, + memory_unit, n_layers_postprocessing, t_pred, mask_value, n_features_pred): + netin = Input(batch_shape=(batch_size, t_train + t_pred, n_features_train * n_stations)) + mask = Masking(mask_value=mask_value)(netin) + + preprocessing = create_preprocessing(mask, n_stations, activation, n_layers_preprocessing, width) + memory_unit = create_memory(preprocessing, memory_unit, n_layers_memory, int(n_stations / n_layers_preprocessing)) + shift = Lambda(lambda x: x[:, -t_pred:, :])(memory_unit) + postprocessing = create_postprocessing(shift, activation, n_layers_postprocessing, + int(n_stations / n_layers_preprocessing)) + out = Dense(n_features_pred)(postprocessing) + model = Model(netin, out) + + return model + + if __name__ == '__main__': monday_til_saturday = np.array([ [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6]], diff --git a/training/train.py b/training/train.py index d11c58a..2d701b7 100644 --- a/training/train.py +++ b/training/train.py @@ -3,7 +3,7 @@ import random import sys import csv - + sys.path.extend(['../', './']) # from downloadData.functions.file_utils import create_dirs, save_file @@ -43,7 +43,7 @@ # default values # DATA_DIR = '../data/RADIUS500KM_PROCESSED/' -RADIUS = 100 +RADIUS = 500 LOG_DIR = '../out/m2m_lstm/' BATCH_SIZE = 4 MODEL_NAME = 'm2m_lstm' @@ -72,7 +72,8 @@ def train(batch_size=BATCH_SIZE, position=POSITION, features_train=FEATURES_TRAIN, features_predict=FEATURES_PREDICT, - mask_value=MASK_VALUE): + mask_value=MASK_VALUE, + log_dir = None): """ Script to start a training. @@ -87,11 +88,11 @@ def train(batch_size=BATCH_SIZE, Run this script from terminal with : 'python train.py --model_name X --data_dir X --batch_size X --n_samples X --log_dir X/X' """ - + data_dir = '../data/RADIUS' + str(radius) + 'KM_PROCESSED/' - log_dir = '../out/' + model_name + '_'.join(features_train) + '/' + str(radius) + '/' - - + if log_dir is None: + log_dir = '../out/' + model_name + '_'.join(features_train) + '/' + str(radius) + '/' + t_pred = int(t_pred_d * 24 / t_pred_resolution_h) if n_samples is None: @@ -115,9 +116,12 @@ def train(batch_size=BATCH_SIZE, """ Create Model """ - model = models[model_name](n_stations=n_stations, batch_size=batch_size, seq_len_pred=t_pred, - seq_len_train=t_train_h, n_features=len(features_train), - n_features_pred=len(features_predict), mask_value=mask_value, padding=t_pred_d * 24) + if isinstance(model_name, str): + model = models[model_name](n_stations=n_stations, batch_size=batch_size, seq_len_pred=t_pred, + seq_len_train=t_train_h, n_features=len(features_train), + n_features_pred=len(features_predict), mask_value=mask_value, padding=t_pred_d * 24) + else: + model = model_name print('Training training: ', model_name) print('Storing files at: ', log_dir) print('Reading data from: ', data_dir) @@ -172,12 +176,11 @@ def train(batch_size=BATCH_SIZE, print("Dataset statistics: {} +- {}".format(mean, std)) print("Number of samples: ", n_samples) - with open(log_dir + 'data_stat.csv', 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow([mean[0], std[0], n_samples]) - + """ Configure Training """ @@ -207,7 +210,7 @@ def train(batch_size=BATCH_SIZE, pprint(summary) save_file(summary, name='summary.txt', path=log_dir) - save_file(summary, name='summary.pkl', path=log_dir) +# save_file(summary, name='summary.pkl', path=log_dir) """ Oppaa! @@ -229,8 +232,8 @@ def train(batch_size=BATCH_SIZE, argparser.add_argument('--n_samples', help='Amount of samples to train', default=None) args = argparser.parse_args() train() - # train(batch_size=args.batch_size, - # log_dir=args.log_dir, - # data_dir=args.data_dir, - # model_name=args.model_name, - # n_samples=args.n_samples) +# train(batch_size=args.batch_size, +# log_dir=args.log_dir, +# data_dir=args.data_dir, +# model_name=args.model_name, +# n_samples=args.n_samples)