From 588c323d402b778bc03acafb20dd4b7af56a6687 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Mon, 9 Aug 2021 10:37:06 +0200 Subject: [PATCH 01/17] [GIT] add PyCharm files to gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 22c1ad65..8e898fb3 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,10 @@ docs/_build/ # Jupyter Notebook .ipynb_checkpoints +# PyCharm +.idea/ +.coverage + # Distribution / packaging .Python env/ From c0846913c4d3a3c4930beeb4b57582c921ef33ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Mon, 9 Aug 2021 14:20:15 +0200 Subject: [PATCH 02/17] [ADD] Tolstoi Char RNN testproblem --- deepobs/pytorch/testproblems/__init__.py | 1 + .../pytorch/testproblems/tolstoi_char_rnn.py | 33 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 deepobs/pytorch/testproblems/tolstoi_char_rnn.py diff --git a/deepobs/pytorch/testproblems/__init__.py b/deepobs/pytorch/testproblems/__init__.py index 3b8bd446..9cc5ac2a 100644 --- a/deepobs/pytorch/testproblems/__init__.py +++ b/deepobs/pytorch/testproblems/__init__.py @@ -21,3 +21,4 @@ from .svhn_3c3d import svhn_3c3d from .svhn_wrn164 import svhn_wrn164 from .testproblem import TestProblem +from .tolstoi_char_rnn import tolstoi_char_rnn diff --git a/deepobs/pytorch/testproblems/tolstoi_char_rnn.py b/deepobs/pytorch/testproblems/tolstoi_char_rnn.py new file mode 100644 index 00000000..68cd8ad9 --- /dev/null +++ b/deepobs/pytorch/testproblems/tolstoi_char_rnn.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +"""A vanilla RNN architecture for Tolstoi.""" +from torch import nn + +from deepobs.pytorch.testproblems.testproblem import WeightRegularizedTestproblem +from .testproblems_modules import net_char_rnn +from ..datasets.tolstoi import tolstoi + + +class tolstoi_char_rnn(WeightRegularizedTestproblem): + """DeepOBS test problem class for char_rnn network on Tolstoi. + + TODO: add some more details how the test problem works + """ + def __init__(self, batch_size, l2_reg=0.0005): + """Create a new char_rnn test problem instance on Tolstoi. + + Args: + batch_size (int): Batch size to use. + l2_reg (float): L2-regularization factor. L2-Regularization (weight decay) + is used on the weights but not the biases. + Defaults to ``5e-4``. + """ + print(f"batch_size={batch_size}") + super(tolstoi_char_rnn, self).__init__(batch_size, l2_reg) + + def set_up(self): + """Set up the Char RNN test problem on Tolstoi.""" + self.data = tolstoi(self._batch_size) + self.loss_function = nn.CrossEntropyLoss + self.net = net_char_rnn(hidden_dim=10, num_layers=2, seq_len=50, vocab_size=100) + self.net.to(self._device) + self.regularization_groups = self.get_regularization_groups() From e5db1afb3448a2bb8e4b817f7a90ce60fd410656 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Mon, 9 Aug 2021 14:21:41 +0200 Subject: [PATCH 03/17] [FIX] Tolstoi dataset --- deepobs/pytorch/datasets/tolstoi.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/deepobs/pytorch/datasets/tolstoi.py b/deepobs/pytorch/datasets/tolstoi.py index 562b104a..f577809a 100644 --- a/deepobs/pytorch/datasets/tolstoi.py +++ b/deepobs/pytorch/datasets/tolstoi.py @@ -7,8 +7,8 @@ import torch from torch.utils import data as dat -from .. import config from . import dataset +from ...config import get_data_dir class tolstoi(dataset.DataSet): @@ -43,7 +43,7 @@ def __init__(self, batch_size, seq_length=50, train_eval_size=653237): self._train_eval_size = train_eval_size super(tolstoi, self).__init__(batch_size) - def _make_dataloader(self, filepath): + def _make_tolstoi_dataloader(self, filepath): # Load the array of character ids, determine the number of batches that # can be produced, given batch size and sequence lengh arr = np.load(filepath) @@ -79,8 +79,8 @@ def _make_dataloader(self, filepath): return dataset def _make_train_dataloader(self): - filepath = os.path.join(config.get_data_dir(), "tolstoi", "train.npy") - return self._make_dataloader(filepath) + filepath = os.path.join(get_data_dir(), "tolstoi", "train.npy") + return self._make_tolstoi_dataloader(filepath) def _make_train_eval_dataloader(self): indices = np.arange( @@ -90,5 +90,13 @@ def _make_train_eval_dataloader(self): return dat.TensorDataset(train_eval_set[0], train_eval_set[1]) def _make_test_dataloader(self): - filepath = os.path.join(config.get_data_dir(), "tolstoi", "test.npy") - return self._make_dataloader(filepath) + filepath = os.path.join(get_data_dir(), "tolstoi", "test.npy") + return self._make_tolstoi_dataloader(filepath) + + def _make_train_and_valid_dataloader(self): + # TODO check whether this is intended usage + """return self._make_train_and_valid_dataloader_helper( + self._make_train_dataloader(), + self._make_train_dataloader(), + )""" + return self._make_train_dataloader(), self._make_train_dataloader() From 6701e1a75ac366a9b282c8e729ba2534b38d6f17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Mon, 9 Aug 2021 14:22:36 +0200 Subject: [PATCH 04/17] [ADD] net_char_rnn: debug with print --- deepobs/pytorch/testproblems/testproblems_modules.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/deepobs/pytorch/testproblems/testproblems_modules.py b/deepobs/pytorch/testproblems/testproblems_modules.py index 682284bd..84387c63 100644 --- a/deepobs/pytorch/testproblems/testproblems_modules.py +++ b/deepobs/pytorch/testproblems/testproblems_modules.py @@ -725,14 +725,18 @@ def __init__(self, seq_len, hidden_dim, vocab_size, num_layers): def forward(self, x, state=None): """state is a tuple for hidden and cell state for initialisation of the lstm""" + print("net_char_rnn:forward()") + print(f"x.shape = {x.shape}") + # print(f"x={x}") x = self.embedding(x) # if no state is provided, default the state to zeros if state is None: x, new_state = self.lstm(x) else: x, new_state = self.lstm(x, state) - x = self.dense(x) - return x, new_state + output = self.dense(x) + print(f"output.shape={output.shape}") + return output # , new_state class net_quadratic_deep(nn.Sequential): From a7e407160963cd51889579b8f32d8797b6fef8c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Mon, 16 Aug 2021 11:23:34 +0200 Subject: [PATCH 05/17] [ADD] add TODO, fix parameters --- deepobs/pytorch/testproblems/tolstoi_char_rnn.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/deepobs/pytorch/testproblems/tolstoi_char_rnn.py b/deepobs/pytorch/testproblems/tolstoi_char_rnn.py index 68cd8ad9..b104ef67 100644 --- a/deepobs/pytorch/testproblems/tolstoi_char_rnn.py +++ b/deepobs/pytorch/testproblems/tolstoi_char_rnn.py @@ -12,6 +12,17 @@ class tolstoi_char_rnn(WeightRegularizedTestproblem): TODO: add some more details how the test problem works """ + + # TODO check differences compared to tensorflow + # - lstm layer has two bias -> "_check_parameters()" + # - loss function: + # - tensorflow: mean across time, sum across batch + # - pytorch: mean across all + # - lstm parameters counted separately (weight_{ih|hh}_l{i}) + # - dropout layers + # - tensorflow: dropout before and after each layer with keep=0.8 + # - pytorch: dropout in-between LSTM + dropout before and after LSTM + def __init__(self, batch_size, l2_reg=0.0005): """Create a new char_rnn test problem instance on Tolstoi. @@ -28,6 +39,6 @@ def set_up(self): """Set up the Char RNN test problem on Tolstoi.""" self.data = tolstoi(self._batch_size) self.loss_function = nn.CrossEntropyLoss - self.net = net_char_rnn(hidden_dim=10, num_layers=2, seq_len=50, vocab_size=100) + self.net = net_char_rnn(hidden_dim=128, num_layers=2, seq_len=50, vocab_size=83) self.net.to(self._device) self.regularization_groups = self.get_regularization_groups() From 8970099df314c8e3bec5d1f610f9e18ff61c98bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Mon, 16 Aug 2021 11:24:22 +0200 Subject: [PATCH 06/17] [ADD] fix network, remove print --- .../pytorch/testproblems/testproblems_modules.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/deepobs/pytorch/testproblems/testproblems_modules.py b/deepobs/pytorch/testproblems/testproblems_modules.py index 84387c63..3b95b73b 100644 --- a/deepobs/pytorch/testproblems/testproblems_modules.py +++ b/deepobs/pytorch/testproblems/testproblems_modules.py @@ -713,6 +713,7 @@ def __init__(self, seq_len, hidden_dim, vocab_size, num_layers): self.embedding = nn.Embedding( num_embeddings=vocab_size, embedding_dim=hidden_dim ) + self.dropout = nn.Dropout(p=0.2) self.lstm = nn.LSTM( input_size=hidden_dim, hidden_size=hidden_dim, @@ -720,22 +721,28 @@ def __init__(self, seq_len, hidden_dim, vocab_size, num_layers): dropout=0.2, batch_first=True, ) + """new_bias_l0 = torch.zeros_like(self.lstm.bias_ih_l0, device=self.lstm.bias_ih_l0.device) + new_bias_l1 = torch.zeros_like(self.lstm.bias_ih_l1, device=self.lstm.bias_ih_l1.device) + del self.lstm.bias_ih_l0 + del self.lstm.bias_ih_l1 + self.lstm.bias_ih_l0 = new_bias_l0 + self.lstm.bias_ih_l1 = new_bias_l1""" + self.dense = nn.Linear(in_features=hidden_dim, out_features=vocab_size) # TODO init layers? def forward(self, x, state=None): """state is a tuple for hidden and cell state for initialisation of the lstm""" - print("net_char_rnn:forward()") - print(f"x.shape = {x.shape}") - # print(f"x={x}") x = self.embedding(x) # if no state is provided, default the state to zeros + x = self.dropout(x) if state is None: x, new_state = self.lstm(x) else: x, new_state = self.lstm(x, state) + x = self.dropout(x) output = self.dense(x) - print(f"output.shape={output.shape}") + output = output.transpose(1, 2) return output # , new_state From cfa7f8f2cc50842260126639a1b8c3dd101f931f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Mon, 16 Aug 2021 11:25:13 +0200 Subject: [PATCH 07/17] [ADD] LSTM PyTorch: different parameter count --- tests/test_testproblems.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_testproblems.py b/tests/test_testproblems.py index 10a8ad95..2f35912e 100644 --- a/tests/test_testproblems.py +++ b/tests/test_testproblems.py @@ -147,8 +147,13 @@ def _check_parameters(tproblem, framework): num_param = [] if framework == "pytorch": - for parameter in tproblem.net.parameters(): - num_param.append(parameter.numel()) + for name, parameter in tproblem.net.named_parameters(): + if "weight_hh_l" in name: + num_param[-1] += parameter.numel() + elif "bias_hh_l" in name: + pass + else: + num_param.append(parameter.numel()) elif framework == "tensorflow": num_param = [np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()] From a899ed29d41d76236a85720b74fe3cede9d864cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Mon, 16 Aug 2021 13:49:50 +0200 Subject: [PATCH 08/17] [ADD] SGD Runner --- examples/runner_sgd_pytorch.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 examples/runner_sgd_pytorch.py diff --git a/examples/runner_sgd_pytorch.py b/examples/runner_sgd_pytorch.py new file mode 100644 index 00000000..38fee120 --- /dev/null +++ b/examples/runner_sgd_pytorch.py @@ -0,0 +1,11 @@ +"""StandardRunner: Default SGD.""" + +from torch.optim import SGD + +from deepobs import pytorch as pt + +optimizer_class = SGD +hyperparams = {"lr": {"type": float}} + +runner = pt.runners.StandardRunner(optimizer_class, hyperparams) +runner.run() From b46ccf35989abb714956d40246a31ce8463e197c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Fri, 17 Sep 2021 11:40:22 +0200 Subject: [PATCH 09/17] [REF] adjust NR_PT_TESTPROBLEMS to 21 --- tests/test_testproblems.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_testproblems.py b/tests/test_testproblems.py index 2f35912e..0258624b 100644 --- a/tests/test_testproblems.py +++ b/tests/test_testproblems.py @@ -19,7 +19,7 @@ # Basic Settings of the Test BATCH_SIZE = 8 -NR_PT_TESTPROBLEMS = 20 +NR_PT_TESTPROBLEMS = 21 NR_TF_TESTPROBLEMS = 27 DEVICES = ["cpu", "cuda:0"] if torch.cuda.is_available() else ["cpu"] FRAMEWORKS = ["pytorch", "tensorflow"] From 6ba92d0d96c68aee86346aad434b0f377387864b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Fri, 17 Sep 2021 11:51:18 +0200 Subject: [PATCH 10/17] [ADD] Tolstoi: PyTorch: redundant bias: set to zero and requires_grad=False --- deepobs/pytorch/testproblems/testproblems_modules.py | 11 ++++------- tests/test_testproblems.py | 6 +++--- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/deepobs/pytorch/testproblems/testproblems_modules.py b/deepobs/pytorch/testproblems/testproblems_modules.py index 3b95b73b..dfd07841 100644 --- a/deepobs/pytorch/testproblems/testproblems_modules.py +++ b/deepobs/pytorch/testproblems/testproblems_modules.py @@ -721,15 +721,12 @@ def __init__(self, seq_len, hidden_dim, vocab_size, num_layers): dropout=0.2, batch_first=True, ) - """new_bias_l0 = torch.zeros_like(self.lstm.bias_ih_l0, device=self.lstm.bias_ih_l0.device) - new_bias_l1 = torch.zeros_like(self.lstm.bias_ih_l1, device=self.lstm.bias_ih_l1.device) - del self.lstm.bias_ih_l0 - del self.lstm.bias_ih_l1 - self.lstm.bias_ih_l0 = new_bias_l0 - self.lstm.bias_ih_l1 = new_bias_l1""" + self.lstm.bias_ih_l0.data = torch.zeros_like(self.lstm.bias_ih_l0, device=self.lstm.bias_ih_l0.device) + self.lstm.bias_ih_l1.data = torch.zeros_like(self.lstm.bias_ih_l1, device=self.lstm.bias_ih_l0.device) + self.lstm.bias_ih_l0.requires_grad = False + self.lstm.bias_ih_l1.requires_grad = False self.dense = nn.Linear(in_features=hidden_dim, out_features=vocab_size) - # TODO init layers? def forward(self, x, state=None): """state is a tuple for hidden and cell state for initialisation of the lstm""" diff --git a/tests/test_testproblems.py b/tests/test_testproblems.py index 0258624b..a12bb353 100644 --- a/tests/test_testproblems.py +++ b/tests/test_testproblems.py @@ -148,10 +148,10 @@ def _check_parameters(tproblem, framework): if framework == "pytorch": for name, parameter in tproblem.net.named_parameters(): - if "weight_hh_l" in name: + if parameter.requires_grad is False: + continue + elif "weight_hh_l" in name: num_param[-1] += parameter.numel() - elif "bias_hh_l" in name: - pass else: num_param.append(parameter.numel()) elif framework == "tensorflow": From a46b8ef809dd875388cd67a906316f6af20302c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Fri, 17 Sep 2021 11:55:44 +0200 Subject: [PATCH 11/17] [REF] Tolstoi, PyTorch: adjust dropout probability to tensorflow --- deepobs/pytorch/testproblems/testproblems_modules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepobs/pytorch/testproblems/testproblems_modules.py b/deepobs/pytorch/testproblems/testproblems_modules.py index dfd07841..dc125157 100644 --- a/deepobs/pytorch/testproblems/testproblems_modules.py +++ b/deepobs/pytorch/testproblems/testproblems_modules.py @@ -718,7 +718,7 @@ def __init__(self, seq_len, hidden_dim, vocab_size, num_layers): input_size=hidden_dim, hidden_size=hidden_dim, num_layers=num_layers, - dropout=0.2, + dropout=0.36, # tensorflow two dropouts with keep=0.8 each -> dropout=1-0.8*0.8=0.36 batch_first=True, ) self.lstm.bias_ih_l0.data = torch.zeros_like(self.lstm.bias_ih_l0, device=self.lstm.bias_ih_l0.device) From ba2f00206c0f252fc3a32a83385bf42ae6c584d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Fri, 17 Sep 2021 12:02:57 +0200 Subject: [PATCH 12/17] [REF] adjust TODO --- deepobs/pytorch/testproblems/tolstoi_char_rnn.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/deepobs/pytorch/testproblems/tolstoi_char_rnn.py b/deepobs/pytorch/testproblems/tolstoi_char_rnn.py index b104ef67..a6f90967 100644 --- a/deepobs/pytorch/testproblems/tolstoi_char_rnn.py +++ b/deepobs/pytorch/testproblems/tolstoi_char_rnn.py @@ -14,14 +14,10 @@ class tolstoi_char_rnn(WeightRegularizedTestproblem): """ # TODO check differences compared to tensorflow - # - lstm layer has two bias -> "_check_parameters()" + # - often the test on cuda fails: acc is greater than 1.0 # - loss function: # - tensorflow: mean across time, sum across batch # - pytorch: mean across all - # - lstm parameters counted separately (weight_{ih|hh}_l{i}) - # - dropout layers - # - tensorflow: dropout before and after each layer with keep=0.8 - # - pytorch: dropout in-between LSTM + dropout before and after LSTM def __init__(self, batch_size, l2_reg=0.0005): """Create a new char_rnn test problem instance on Tolstoi. From 523e4a8270eba83e7d9c898951b2822c4b804628 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Thu, 7 Oct 2021 11:08:15 +0200 Subject: [PATCH 13/17] [REF] cleanup --- deepobs/pytorch/datasets/tolstoi.py | 6 +--- .../pytorch/testproblems/tolstoi_char_rnn.py | 32 +++++++++++++++++-- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/deepobs/pytorch/datasets/tolstoi.py b/deepobs/pytorch/datasets/tolstoi.py index f577809a..ba973cd1 100644 --- a/deepobs/pytorch/datasets/tolstoi.py +++ b/deepobs/pytorch/datasets/tolstoi.py @@ -94,9 +94,5 @@ def _make_test_dataloader(self): return self._make_tolstoi_dataloader(filepath) def _make_train_and_valid_dataloader(self): - # TODO check whether this is intended usage - """return self._make_train_and_valid_dataloader_helper( - self._make_train_dataloader(), - self._make_train_dataloader(), - )""" + # TODO validation data set return self._make_train_dataloader(), self._make_train_dataloader() diff --git a/deepobs/pytorch/testproblems/tolstoi_char_rnn.py b/deepobs/pytorch/testproblems/tolstoi_char_rnn.py index a6f90967..93c6ddb1 100644 --- a/deepobs/pytorch/testproblems/tolstoi_char_rnn.py +++ b/deepobs/pytorch/testproblems/tolstoi_char_rnn.py @@ -8,9 +8,37 @@ class tolstoi_char_rnn(WeightRegularizedTestproblem): - """DeepOBS test problem class for char_rnn network on Tolstoi. + """DeepOBS test problem class for a two-layer LSTM for character-level language + modelling (Char RNN) on Tolstoi's War and Peace. - TODO: add some more details how the test problem works + Some network characteristics: + + - ``128`` hidden units per LSTM cell + - sequence length ``50`` + - cell state is automatically stored in variables between subsequent steps + - when the phase placeholder switches its value from one step to the next, + the cell state is set to its zero value (meaning that we set to zero state + after each round of evaluation, it is therefore important to set the + evaluation interval such that we evaluate after a full epoch.) + + Working training parameters are: + + - batch size ``50`` + - ``200`` epochs + - SGD with a learning rate of :math:`\\approx 0.1` works + + Args: + batch_size (int): Batch size to use. + l2_reg (float): L2-regularization factor. L2-Regularization (weight decay) + is used on the weights but not the biases. + Defaults to ``5e-4``. + + Attributes: + _batch_size: Batch_size for the data of this test problem. + _l2_reg: The regularization factor for this test problem + data: The dataset used by the test problem (datasets.DataSet instance). + loss_function: The loss function for this test problem. + net: The torch module (the neural network) that is trained. """ # TODO check differences compared to tensorflow From b71d6730c612bd3f7a37287cc73c44d10bd3eda2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Thu, 7 Oct 2021 11:25:07 +0200 Subject: [PATCH 14/17] [FIX] denominator for 2d labels (like in Tolstoi) --- deepobs/pytorch/testproblems/testproblem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepobs/pytorch/testproblems/testproblem.py b/deepobs/pytorch/testproblems/testproblem.py index b661a50e..d6d689b2 100644 --- a/deepobs/pytorch/testproblems/testproblem.py +++ b/deepobs/pytorch/testproblems/testproblem.py @@ -143,7 +143,7 @@ def forward_func(): loss = self.loss_function(reduction=reduction)(outputs, labels) _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) + total += labels.numel() correct += (predicted == labels).sum().item() accuracy = correct / total From 3edd50527ca2b1b6843e9312a1ebc538b7381b73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Thu, 7 Oct 2021 11:36:11 +0200 Subject: [PATCH 15/17] [REF] cleanup --- deepobs/pytorch/testproblems/testproblems_modules.py | 5 +++-- deepobs/pytorch/testproblems/tolstoi_char_rnn.py | 9 --------- tests/test_testproblems.py | 2 +- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/deepobs/pytorch/testproblems/testproblems_modules.py b/deepobs/pytorch/testproblems/testproblems_modules.py index dc125157..877a0f8a 100644 --- a/deepobs/pytorch/testproblems/testproblems_modules.py +++ b/deepobs/pytorch/testproblems/testproblems_modules.py @@ -718,9 +718,10 @@ def __init__(self, seq_len, hidden_dim, vocab_size, num_layers): input_size=hidden_dim, hidden_size=hidden_dim, num_layers=num_layers, - dropout=0.36, # tensorflow two dropouts with keep=0.8 each -> dropout=1-0.8*0.8=0.36 + dropout=0.36, # tensorflow two dropouts with keep=0.8 each -> dropout=1-0.8*0.8=0.36 batch_first=True, ) + # deactivate redundant bias self.lstm.bias_ih_l0.data = torch.zeros_like(self.lstm.bias_ih_l0, device=self.lstm.bias_ih_l0.device) self.lstm.bias_ih_l1.data = torch.zeros_like(self.lstm.bias_ih_l1, device=self.lstm.bias_ih_l0.device) self.lstm.bias_ih_l0.requires_grad = False @@ -731,8 +732,8 @@ def __init__(self, seq_len, hidden_dim, vocab_size, num_layers): def forward(self, x, state=None): """state is a tuple for hidden and cell state for initialisation of the lstm""" x = self.embedding(x) - # if no state is provided, default the state to zeros x = self.dropout(x) + # if no state is provided, default the state to zeros if state is None: x, new_state = self.lstm(x) else: diff --git a/deepobs/pytorch/testproblems/tolstoi_char_rnn.py b/deepobs/pytorch/testproblems/tolstoi_char_rnn.py index 93c6ddb1..7375d293 100644 --- a/deepobs/pytorch/testproblems/tolstoi_char_rnn.py +++ b/deepobs/pytorch/testproblems/tolstoi_char_rnn.py @@ -34,19 +34,11 @@ class tolstoi_char_rnn(WeightRegularizedTestproblem): Defaults to ``5e-4``. Attributes: - _batch_size: Batch_size for the data of this test problem. - _l2_reg: The regularization factor for this test problem data: The dataset used by the test problem (datasets.DataSet instance). loss_function: The loss function for this test problem. net: The torch module (the neural network) that is trained. """ - # TODO check differences compared to tensorflow - # - often the test on cuda fails: acc is greater than 1.0 - # - loss function: - # - tensorflow: mean across time, sum across batch - # - pytorch: mean across all - def __init__(self, batch_size, l2_reg=0.0005): """Create a new char_rnn test problem instance on Tolstoi. @@ -56,7 +48,6 @@ def __init__(self, batch_size, l2_reg=0.0005): is used on the weights but not the biases. Defaults to ``5e-4``. """ - print(f"batch_size={batch_size}") super(tolstoi_char_rnn, self).__init__(batch_size, l2_reg) def set_up(self): diff --git a/tests/test_testproblems.py b/tests/test_testproblems.py index a12bb353..1fac964f 100644 --- a/tests/test_testproblems.py +++ b/tests/test_testproblems.py @@ -150,7 +150,7 @@ def _check_parameters(tproblem, framework): for name, parameter in tproblem.net.named_parameters(): if parameter.requires_grad is False: continue - elif "weight_hh_l" in name: + elif "weight_hh_l" in name: # LSTM parameters counted separately in PyTorch num_param[-1] += parameter.numel() else: num_param.append(parameter.numel()) From 7be0020ad9342b78f25172f5684bf8fdb11e3c1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Thu, 7 Oct 2021 11:44:46 +0200 Subject: [PATCH 16/17] [DEL] remove default sgd --- examples/runner_sgd_pytorch.py | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 examples/runner_sgd_pytorch.py diff --git a/examples/runner_sgd_pytorch.py b/examples/runner_sgd_pytorch.py deleted file mode 100644 index 38fee120..00000000 --- a/examples/runner_sgd_pytorch.py +++ /dev/null @@ -1,11 +0,0 @@ -"""StandardRunner: Default SGD.""" - -from torch.optim import SGD - -from deepobs import pytorch as pt - -optimizer_class = SGD -hyperparams = {"lr": {"type": float}} - -runner = pt.runners.StandardRunner(optimizer_class, hyperparams) -runner.run() From 883202f099f9dd29383e1e1e3e8c0e78d78d8db6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Sch=C3=A4fer?= Date: Thu, 7 Oct 2021 14:52:41 +0200 Subject: [PATCH 17/17] [REF] separate training and validation data --- deepobs/pytorch/datasets/tolstoi.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/deepobs/pytorch/datasets/tolstoi.py b/deepobs/pytorch/datasets/tolstoi.py index ba973cd1..ffcb23d7 100644 --- a/deepobs/pytorch/datasets/tolstoi.py +++ b/deepobs/pytorch/datasets/tolstoi.py @@ -43,10 +43,9 @@ def __init__(self, batch_size, seq_length=50, train_eval_size=653237): self._train_eval_size = train_eval_size super(tolstoi, self).__init__(batch_size) - def _make_tolstoi_dataloader(self, filepath): - # Load the array of character ids, determine the number of batches that - # can be produced, given batch size and sequence lengh - arr = np.load(filepath) + def _make_tolstoi_dataloader(self, arr): + # determine the number of batches that can be produced, given batch size + # and sequence lengh num_batches = int( np.floor((np.size(arr) - 1) / (self._batch_size * self._seq_length)) ) @@ -80,7 +79,7 @@ def _make_tolstoi_dataloader(self, filepath): def _make_train_dataloader(self): filepath = os.path.join(get_data_dir(), "tolstoi", "train.npy") - return self._make_tolstoi_dataloader(filepath) + return self._make_tolstoi_dataloader(np.load(filepath)) def _make_train_eval_dataloader(self): indices = np.arange( @@ -91,8 +90,11 @@ def _make_train_eval_dataloader(self): def _make_test_dataloader(self): filepath = os.path.join(get_data_dir(), "tolstoi", "test.npy") - return self._make_tolstoi_dataloader(filepath) + return self._make_tolstoi_dataloader(np.load(filepath)) def _make_train_and_valid_dataloader(self): - # TODO validation data set - return self._make_train_dataloader(), self._make_train_dataloader() + filepath = os.path.join(get_data_dir(), "tolstoi", "train.npy") + data = np.load(filepath) + valid_data = data[0: self._train_eval_size] + train_data = data[self._train_eval_size:] + return self._make_tolstoi_dataloader(valid_data), self._make_tolstoi_dataloader(train_data)