From ec9be6c30e9cfce451dd9449c772dbdda820b53f Mon Sep 17 00:00:00 2001
From: Aayush Garg <inagco@stcb-n-w10128.asia-pac.shell.com>
Date: Fri, 6 May 2022 12:59:19 +0530
Subject: [PATCH] add pl---hydra implementation

---
 pl-hydra/configs/callbacks/default.yaml       |  28 +++
 pl-hydra/configs/callbacks/none.yaml          |   0
 pl-hydra/configs/datamodule/cifar10.yaml      |  11 +
 pl-hydra/configs/debug/default.yaml           |  28 +++
 pl-hydra/configs/debug/limit_batches.yaml     |  12 ++
 pl-hydra/configs/debug/overfit.yaml           |  10 +
 pl-hydra/configs/debug/profiler.yaml          |  12 ++
 pl-hydra/configs/debug/step.yaml              |   9 +
 pl-hydra/configs/debug/test_only.yaml         |   9 +
 pl-hydra/configs/experiment/example.yaml      |  38 ++++
 .../configs/hparams_search/mnist_optuna.yaml  |  60 ++++++
 pl-hydra/configs/local/.gitkeep               |   0
 pl-hydra/configs/log_dir/debug.yaml           |   8 +
 pl-hydra/configs/log_dir/default.yaml         |  15 ++
 pl-hydra/configs/log_dir/evaluation.yaml      |   8 +
 pl-hydra/configs/logger/comet.yaml            |   7 +
 pl-hydra/configs/logger/csv.yaml              |   7 +
 pl-hydra/configs/logger/many_loggers.yaml     |   9 +
 pl-hydra/configs/logger/mlflow.yaml           |   9 +
 pl-hydra/configs/logger/neptune.yaml          |  11 +
 pl-hydra/configs/logger/tensorboard.yaml      |  11 +
 pl-hydra/configs/logger/wandb.yaml            |  15 ++
 pl-hydra/configs/model/cifar10_densenet.yaml  |  10 +
 pl-hydra/configs/model/cifar10_googlenet.yaml |   6 +
 pl-hydra/configs/model/cifar10_resnet.yaml    |   8 +
 pl-hydra/configs/model/cifar10_vgg11.yaml     |   6 +
 pl-hydra/configs/model/cifar10_vit.yaml       |  13 ++
 pl-hydra/configs/optim/optim_adam.yaml        |  11 +
 pl-hydra/configs/optim/optim_adam_vit.yaml    |  10 +
 pl-hydra/configs/optim/optim_sgd.yaml         |  12 ++
 pl-hydra/configs/test.yaml                    |  32 +++
 pl-hydra/configs/train.yaml                   |  71 +++++++
 pl-hydra/configs/trainer/ddp.yaml             |   6 +
 pl-hydra/configs/trainer/default.yaml         |  12 ++
 pl-hydra/notebooks/.gitkeep                   |   0
 pl-hydra/notebooks/make_vgg11.ipynb           | 196 +++++++++++++++++
 pl-hydra/scripts/schedule.sh                  |   7 +
 pl-hydra/setup.cfg                            |  36 ++++
 pl-hydra/src/__init__.py                      |   0
 pl-hydra/src/datamodules/__init__.py          |   0
 .../src/datamodules/cifar10_datamodule.py     | 128 +++++++++++
 .../src/datamodules/components/__init__.py    |   0
 pl-hydra/src/datamodules/mnist_datamodule.py  | 106 ++++++++++
 pl-hydra/src/models/__init__.py               |   0
 pl-hydra/src/models/cifar10_module.py         | 198 ++++++++++++++++++
 pl-hydra/src/models/components/__init__.py    |   0
 pl-hydra/src/models/components/densenet.py    | 153 ++++++++++++++
 pl-hydra/src/models/components/googlenet.py   | 132 ++++++++++++
 pl-hydra/src/models/components/resnet.py      | 135 ++++++++++++
 pl-hydra/src/models/components/vgg.py         | 111 ++++++++++
 pl-hydra/src/models/components/vit.py         | 128 +++++++++++
 pl-hydra/src/testing_pipeline.py              |  57 +++++
 pl-hydra/src/training_pipeline.py             | 126 +++++++++++
 pl-hydra/src/utils/__init__.py                | 164 +++++++++++++++
 pl-hydra/src/utils/plotter.py                 |  37 ++++
 pl-hydra/src/vendor/__init__.py               |   1 +
 pl-hydra/test.py                              |  26 +++
 pl-hydra/tests/__init__.py                    |   0
 pl-hydra/tests/helpers/__init__.py            |   0
 pl-hydra/tests/helpers/module_available.py    |  28 +++
 pl-hydra/tests/helpers/run_command.py         |  15 ++
 pl-hydra/tests/helpers/runif.py               | 104 +++++++++
 pl-hydra/tests/shell/__init__.py              |   0
 pl-hydra/tests/shell/test_basic_commands.py   |  58 +++++
 pl-hydra/tests/shell/test_debug_configs.py    |  35 ++++
 pl-hydra/tests/shell/test_sweeps.py           |  44 ++++
 pl-hydra/tests/unit/__init__.py               |   0
 pl-hydra/tests/unit/test_mnist_datamodule.py  |  36 ++++
 pl-hydra/train.py                             |  33 +++
 69 files changed, 2608 insertions(+)
 create mode 100644 pl-hydra/configs/callbacks/default.yaml
 create mode 100644 pl-hydra/configs/callbacks/none.yaml
 create mode 100644 pl-hydra/configs/datamodule/cifar10.yaml
 create mode 100644 pl-hydra/configs/debug/default.yaml
 create mode 100644 pl-hydra/configs/debug/limit_batches.yaml
 create mode 100644 pl-hydra/configs/debug/overfit.yaml
 create mode 100644 pl-hydra/configs/debug/profiler.yaml
 create mode 100644 pl-hydra/configs/debug/step.yaml
 create mode 100644 pl-hydra/configs/debug/test_only.yaml
 create mode 100644 pl-hydra/configs/experiment/example.yaml
 create mode 100644 pl-hydra/configs/hparams_search/mnist_optuna.yaml
 create mode 100644 pl-hydra/configs/local/.gitkeep
 create mode 100644 pl-hydra/configs/log_dir/debug.yaml
 create mode 100644 pl-hydra/configs/log_dir/default.yaml
 create mode 100644 pl-hydra/configs/log_dir/evaluation.yaml
 create mode 100644 pl-hydra/configs/logger/comet.yaml
 create mode 100644 pl-hydra/configs/logger/csv.yaml
 create mode 100644 pl-hydra/configs/logger/many_loggers.yaml
 create mode 100644 pl-hydra/configs/logger/mlflow.yaml
 create mode 100644 pl-hydra/configs/logger/neptune.yaml
 create mode 100644 pl-hydra/configs/logger/tensorboard.yaml
 create mode 100644 pl-hydra/configs/logger/wandb.yaml
 create mode 100644 pl-hydra/configs/model/cifar10_densenet.yaml
 create mode 100644 pl-hydra/configs/model/cifar10_googlenet.yaml
 create mode 100644 pl-hydra/configs/model/cifar10_resnet.yaml
 create mode 100644 pl-hydra/configs/model/cifar10_vgg11.yaml
 create mode 100644 pl-hydra/configs/model/cifar10_vit.yaml
 create mode 100644 pl-hydra/configs/optim/optim_adam.yaml
 create mode 100644 pl-hydra/configs/optim/optim_adam_vit.yaml
 create mode 100644 pl-hydra/configs/optim/optim_sgd.yaml
 create mode 100644 pl-hydra/configs/test.yaml
 create mode 100644 pl-hydra/configs/train.yaml
 create mode 100644 pl-hydra/configs/trainer/ddp.yaml
 create mode 100644 pl-hydra/configs/trainer/default.yaml
 create mode 100644 pl-hydra/notebooks/.gitkeep
 create mode 100644 pl-hydra/notebooks/make_vgg11.ipynb
 create mode 100644 pl-hydra/scripts/schedule.sh
 create mode 100644 pl-hydra/setup.cfg
 create mode 100644 pl-hydra/src/__init__.py
 create mode 100644 pl-hydra/src/datamodules/__init__.py
 create mode 100644 pl-hydra/src/datamodules/cifar10_datamodule.py
 create mode 100644 pl-hydra/src/datamodules/components/__init__.py
 create mode 100644 pl-hydra/src/datamodules/mnist_datamodule.py
 create mode 100644 pl-hydra/src/models/__init__.py
 create mode 100644 pl-hydra/src/models/cifar10_module.py
 create mode 100644 pl-hydra/src/models/components/__init__.py
 create mode 100644 pl-hydra/src/models/components/densenet.py
 create mode 100644 pl-hydra/src/models/components/googlenet.py
 create mode 100644 pl-hydra/src/models/components/resnet.py
 create mode 100644 pl-hydra/src/models/components/vgg.py
 create mode 100644 pl-hydra/src/models/components/vit.py
 create mode 100644 pl-hydra/src/testing_pipeline.py
 create mode 100644 pl-hydra/src/training_pipeline.py
 create mode 100644 pl-hydra/src/utils/__init__.py
 create mode 100644 pl-hydra/src/utils/plotter.py
 create mode 100644 pl-hydra/src/vendor/__init__.py
 create mode 100644 pl-hydra/test.py
 create mode 100644 pl-hydra/tests/__init__.py
 create mode 100644 pl-hydra/tests/helpers/__init__.py
 create mode 100644 pl-hydra/tests/helpers/module_available.py
 create mode 100644 pl-hydra/tests/helpers/run_command.py
 create mode 100644 pl-hydra/tests/helpers/runif.py
 create mode 100644 pl-hydra/tests/shell/__init__.py
 create mode 100644 pl-hydra/tests/shell/test_basic_commands.py
 create mode 100644 pl-hydra/tests/shell/test_debug_configs.py
 create mode 100644 pl-hydra/tests/shell/test_sweeps.py
 create mode 100644 pl-hydra/tests/unit/__init__.py
 create mode 100644 pl-hydra/tests/unit/test_mnist_datamodule.py
 create mode 100644 pl-hydra/train.py

diff --git a/pl-hydra/configs/callbacks/default.yaml b/pl-hydra/configs/callbacks/default.yaml
new file mode 100644
index 0000000..29fa956
--- /dev/null
+++ b/pl-hydra/configs/callbacks/default.yaml
@@ -0,0 +1,28 @@
+model_checkpoint:
+  _target_: pytorch_lightning.callbacks.ModelCheckpoint
+  monitor: "val/acc" # name of the logged metric which determines when model is improving
+  mode: "max" # "max" means higher metric value is better, can be also "min"
+  save_top_k: 3 # save k best models (determined by above metric)
+  save_last: True # additionaly always save model from last epoch
+  verbose: False
+  dirpath: "checkpoints/"
+  filename: "epoch_{epoch:03d}"
+  auto_insert_metric_name: False
+
+early_stopping:
+  _target_: pytorch_lightning.callbacks.EarlyStopping
+  monitor: "val/acc" # name of the logged metric which determines when model is improving
+  mode: "max" # "max" means higher metric value is better, can be also "min"
+  patience: 100 # how many validation epochs of not improving until training stops
+  min_delta: 0 # minimum change in the monitored metric needed to qualify as an improvement
+
+model_summary:
+  _target_: pytorch_lightning.callbacks.RichModelSummary
+  max_depth: -1
+
+rich_progress_bar:
+  _target_: pytorch_lightning.callbacks.RichProgressBar
+
+learning_rate_monitor:
+  _target_: pytorch_lightning.callbacks.LearningRateMonitor
+  logging_interval: epoch
diff --git a/pl-hydra/configs/callbacks/none.yaml b/pl-hydra/configs/callbacks/none.yaml
new file mode 100644
index 0000000..e69de29
diff --git a/pl-hydra/configs/datamodule/cifar10.yaml b/pl-hydra/configs/datamodule/cifar10.yaml
new file mode 100644
index 0000000..f5528b5
--- /dev/null
+++ b/pl-hydra/configs/datamodule/cifar10.yaml
@@ -0,0 +1,11 @@
+_target_: src.datamodules.cifar10_datamodule.CIFAR10DataModule
+
+data_dir: ${data_dir} # data_dir is specified in config.yaml
+batch_size: 128
+num_workers: 4
+pin_memory: True
+data_mean: [0.49421428, 0.48513139, 0.45040909]
+data_std: [0.24665252, 0.24289226, 0.26159238]
+image_size: [32, 32]
+scale_bounds: [0.8, 1.0]
+aspect_bounds: [0.9, 1.1]
diff --git a/pl-hydra/configs/debug/default.yaml b/pl-hydra/configs/debug/default.yaml
new file mode 100644
index 0000000..8dfb104
--- /dev/null
+++ b/pl-hydra/configs/debug/default.yaml
@@ -0,0 +1,28 @@
+# @package _global_
+
+# default debugging setup, runs 1 full epoch
+# other debugging configs can inherit from this one
+
+defaults:
+  - override /log_dir: debug.yaml
+
+trainer:
+  max_epochs: 1
+  gpus: 0 # debuggers don't like gpus
+  detect_anomaly: true # raise exception if NaN or +/-inf is detected in any tensor
+  track_grad_norm: 2 # track gradient norm with loggers
+
+datamodule:
+  num_workers: 0 # debuggers don't like multiprocessing
+  pin_memory: False # disable gpu memory pin
+
+# sets level of all command line loggers to 'DEBUG'
+# https://hydra.cc/docs/tutorials/basic/running_your_app/logging/
+hydra:
+  verbose: True
+
+  # use this to set level of only chosen command line loggers to 'DEBUG':
+  # verbose: [src.train, src.utils]
+
+# config is already printed by hydra when `hydra/verbose: True`
+print_config: False
diff --git a/pl-hydra/configs/debug/limit_batches.yaml b/pl-hydra/configs/debug/limit_batches.yaml
new file mode 100644
index 0000000..cc28852
--- /dev/null
+++ b/pl-hydra/configs/debug/limit_batches.yaml
@@ -0,0 +1,12 @@
+# @package _global_
+
+# uses only 1% of the training data and 5% of validation/test data
+
+defaults:
+  - default.yaml
+
+trainer:
+  max_epochs: 3
+  limit_train_batches: 0.01
+  limit_val_batches: 0.05
+  limit_test_batches: 0.05
diff --git a/pl-hydra/configs/debug/overfit.yaml b/pl-hydra/configs/debug/overfit.yaml
new file mode 100644
index 0000000..2ce654b
--- /dev/null
+++ b/pl-hydra/configs/debug/overfit.yaml
@@ -0,0 +1,10 @@
+# @package _global_
+
+# overfits to 3 batches
+
+defaults:
+  - default.yaml
+
+trainer:
+  max_epochs: 20
+  overfit_batches: 3
diff --git a/pl-hydra/configs/debug/profiler.yaml b/pl-hydra/configs/debug/profiler.yaml
new file mode 100644
index 0000000..e18df1c
--- /dev/null
+++ b/pl-hydra/configs/debug/profiler.yaml
@@ -0,0 +1,12 @@
+# @package _global_
+
+# runs with execution time profiling
+
+defaults:
+  - default.yaml
+
+trainer:
+  max_epochs: 1
+  profiler: "simple"
+  # profiler: "advanced"
+  # profiler: "pytorch"
diff --git a/pl-hydra/configs/debug/step.yaml b/pl-hydra/configs/debug/step.yaml
new file mode 100644
index 0000000..98eba22
--- /dev/null
+++ b/pl-hydra/configs/debug/step.yaml
@@ -0,0 +1,9 @@
+# @package _global_
+
+# runs 1 train, 1 validation and 1 test step
+
+defaults:
+  - default.yaml
+
+trainer:
+  fast_dev_run: true
diff --git a/pl-hydra/configs/debug/test_only.yaml b/pl-hydra/configs/debug/test_only.yaml
new file mode 100644
index 0000000..79dc34a
--- /dev/null
+++ b/pl-hydra/configs/debug/test_only.yaml
@@ -0,0 +1,9 @@
+# @package _global_
+
+# runs only test epoch
+
+defaults:
+  - default.yaml
+
+train: False
+test: True
diff --git a/pl-hydra/configs/experiment/example.yaml b/pl-hydra/configs/experiment/example.yaml
new file mode 100644
index 0000000..305d96c
--- /dev/null
+++ b/pl-hydra/configs/experiment/example.yaml
@@ -0,0 +1,38 @@
+# @package _global_
+
+# to execute this experiment run:
+# python train.py experiment=example
+
+defaults:
+  - override /datamodule: mnist.yaml
+  - override /model: mnist.yaml
+  - override /callbacks: default.yaml
+  - override /logger: null
+  - override /trainer: default.yaml
+
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+
+# name of the run determines folder name in logs
+name: "simple_dense_net"
+
+seed: 12345
+
+trainer:
+  min_epochs: 10
+  max_epochs: 10
+  gradient_clip_val: 0.5
+
+model:
+  lr: 0.002
+  net:
+    lin1_size: 128
+    lin2_size: 256
+    lin3_size: 64
+
+datamodule:
+  batch_size: 64
+
+logger:
+  wandb:
+    tags: ["mnist", "${name}"]
diff --git a/pl-hydra/configs/hparams_search/mnist_optuna.yaml b/pl-hydra/configs/hparams_search/mnist_optuna.yaml
new file mode 100644
index 0000000..7de2b44
--- /dev/null
+++ b/pl-hydra/configs/hparams_search/mnist_optuna.yaml
@@ -0,0 +1,60 @@
+# @package _global_
+
+# example hyperparameter optimization of some experiment with Optuna:
+# python train.py -m hparams_search=mnist_optuna experiment=example
+
+defaults:
+  - override /hydra/sweeper: optuna
+
+# choose metric which will be optimized by Optuna
+# make sure this is the correct name of some metric logged in lightning module!
+optimized_metric: "val/acc_best"
+
+# here we define Optuna hyperparameter search
+# it optimizes for value returned from function with @hydra.main decorator
+# docs: https://hydra.cc/docs/next/plugins/optuna_sweeper
+hydra:
+  sweeper:
+    _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+
+    # storage URL to persist optimization results
+    # for example, you can use SQLite if you set 'sqlite:///example.db'
+    storage: null
+
+    # name of the study to persist optimization results
+    study_name: null
+
+    # number of parallel workers
+    n_jobs: 1
+
+    # 'minimize' or 'maximize' the objective
+    direction: maximize
+
+    # total number of runs that will be executed
+    n_trials: 25
+
+    # choose Optuna hyperparameter sampler
+    # docs: https://optuna.readthedocs.io/en/stable/reference/samplers.html
+    sampler:
+      _target_: optuna.samplers.TPESampler
+      seed: 12345
+      n_startup_trials: 10 # number of random sampling runs before optimization starts
+
+    # define range of hyperparameters
+    search_space:
+      datamodule.batch_size:
+        type: categorical
+        choices: [32, 64, 128]
+      model.lr:
+        type: float
+        low: 0.0001
+        high: 0.2
+      model.net.lin1_size:
+        type: categorical
+        choices: [32, 64, 128, 256, 512]
+      model.net.lin2_size:
+        type: categorical
+        choices: [32, 64, 128, 256, 512]
+      model.net.lin3_size:
+        type: categorical
+        choices: [32, 64, 128, 256, 512]
diff --git a/pl-hydra/configs/local/.gitkeep b/pl-hydra/configs/local/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/pl-hydra/configs/log_dir/debug.yaml b/pl-hydra/configs/log_dir/debug.yaml
new file mode 100644
index 0000000..83db732
--- /dev/null
+++ b/pl-hydra/configs/log_dir/debug.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+
+hydra:
+  run:
+    dir: logs/debugs/runs/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+  sweep:
+    dir: logs/debugs/multiruns/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
diff --git a/pl-hydra/configs/log_dir/default.yaml b/pl-hydra/configs/log_dir/default.yaml
new file mode 100644
index 0000000..3868729
--- /dev/null
+++ b/pl-hydra/configs/log_dir/default.yaml
@@ -0,0 +1,15 @@
+# @package _global_
+
+hydra:
+  run:
+    dir: logs/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+  sweep:
+    dir: logs/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+
+# hydra:
+#   run:
+#     dir: logs/experiments/runs/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+#   sweep:
+#     dir: logs/experiments/multiruns/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+#     subdir: ${hydra.job.num}
diff --git a/pl-hydra/configs/log_dir/evaluation.yaml b/pl-hydra/configs/log_dir/evaluation.yaml
new file mode 100644
index 0000000..a8de069
--- /dev/null
+++ b/pl-hydra/configs/log_dir/evaluation.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+
+hydra:
+  run:
+    dir: logs/evaluations/runs/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+  sweep:
+    dir: logs/evaluations/multiruns/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
diff --git a/pl-hydra/configs/logger/comet.yaml b/pl-hydra/configs/logger/comet.yaml
new file mode 100644
index 0000000..6ac99f4
--- /dev/null
+++ b/pl-hydra/configs/logger/comet.yaml
@@ -0,0 +1,7 @@
+# https://www.comet.ml
+
+comet:
+  _target_: pytorch_lightning.loggers.comet.CometLogger
+  api_key: ${oc.env:COMET_API_TOKEN} # api key is loaded from environment variable
+  project_name: "template-tests"
+  experiment_name: ${name}
diff --git a/pl-hydra/configs/logger/csv.yaml b/pl-hydra/configs/logger/csv.yaml
new file mode 100644
index 0000000..aaec6d7
--- /dev/null
+++ b/pl-hydra/configs/logger/csv.yaml
@@ -0,0 +1,7 @@
+# csv logger built in lightning
+
+csv:
+  _target_: pytorch_lightning.loggers.csv_logs.CSVLogger
+  save_dir: "."
+  name: "csv/"
+  prefix: ""
diff --git a/pl-hydra/configs/logger/many_loggers.yaml b/pl-hydra/configs/logger/many_loggers.yaml
new file mode 100644
index 0000000..801444d
--- /dev/null
+++ b/pl-hydra/configs/logger/many_loggers.yaml
@@ -0,0 +1,9 @@
+# train with many loggers at once
+
+defaults:
+  # - comet.yaml
+  - csv.yaml
+  # - mlflow.yaml
+  # - neptune.yaml
+  - tensorboard.yaml
+  - wandb.yaml
diff --git a/pl-hydra/configs/logger/mlflow.yaml b/pl-hydra/configs/logger/mlflow.yaml
new file mode 100644
index 0000000..130d3de
--- /dev/null
+++ b/pl-hydra/configs/logger/mlflow.yaml
@@ -0,0 +1,9 @@
+# https://mlflow.org
+
+mlflow:
+  _target_: pytorch_lightning.loggers.mlflow.MLFlowLogger
+  experiment_name: ${name}
+  tracking_uri: ${original_work_dir}/logs/mlflow/mlruns # run `mlflow ui` command inside the `logs/mlflow/` dir to open the UI
+  tags: null
+  prefix: ""
+  artifact_location: null
diff --git a/pl-hydra/configs/logger/neptune.yaml b/pl-hydra/configs/logger/neptune.yaml
new file mode 100644
index 0000000..117af93
--- /dev/null
+++ b/pl-hydra/configs/logger/neptune.yaml
@@ -0,0 +1,11 @@
+# https://neptune.ai
+
+neptune:
+  _target_: pytorch_lightning.loggers.neptune.NeptuneLogger
+  api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is loaded from environment variable
+  project_name: your_name/template-tests
+  close_after_fit: True
+  offline_mode: False
+  experiment_name: ${name}
+  experiment_id: null
+  prefix: ""
diff --git a/pl-hydra/configs/logger/tensorboard.yaml b/pl-hydra/configs/logger/tensorboard.yaml
new file mode 100644
index 0000000..730c8e9
--- /dev/null
+++ b/pl-hydra/configs/logger/tensorboard.yaml
@@ -0,0 +1,11 @@
+# https://www.tensorflow.org/tensorboard/
+
+tensorboard:
+  _target_: pytorch_lightning.loggers.tensorboard.TensorBoardLogger
+  save_dir: "tensorboard/"
+  name: null
+  version: ${name}
+  log_graph: True
+  default_hp_metric: True
+  prefix: ""
+                                                                                   
\ No newline at end of file
diff --git a/pl-hydra/configs/logger/wandb.yaml b/pl-hydra/configs/logger/wandb.yaml
new file mode 100644
index 0000000..df6024b
--- /dev/null
+++ b/pl-hydra/configs/logger/wandb.yaml
@@ -0,0 +1,15 @@
+# https://wandb.ai
+
+wandb:
+  _target_: pytorch_lightning.loggers.wandb.WandbLogger
+  project: "template-tests"
+  # name: ${name}
+  save_dir: "."
+  offline: False # set True to store all logs only locally
+  id: null # pass correct id to resume experiment!
+  # entity: ""  # set to name of your wandb team
+  log_model: False
+  prefix: ""
+  job_type: "train"
+  group: ""
+  tags: []
diff --git a/pl-hydra/configs/model/cifar10_densenet.yaml b/pl-hydra/configs/model/cifar10_densenet.yaml
new file mode 100644
index 0000000..be9f459
--- /dev/null
+++ b/pl-hydra/configs/model/cifar10_densenet.yaml
@@ -0,0 +1,10 @@
+_target_: src.models.cifar10_module.CIFAR10LitModule
+
+net:
+  _target_: src.models.components.densenet.DenseNet
+  num_classes: 10
+  num_layers: [6,6,6,6]
+  bn_size: 2
+  growth_rate: 16
+  act_fn_by_name: relu
+
diff --git a/pl-hydra/configs/model/cifar10_googlenet.yaml b/pl-hydra/configs/model/cifar10_googlenet.yaml
new file mode 100644
index 0000000..8efd5c0
--- /dev/null
+++ b/pl-hydra/configs/model/cifar10_googlenet.yaml
@@ -0,0 +1,6 @@
+_target_: src.models.cifar10_module.CIFAR10LitModule
+
+net:
+  _target_: src.models.components.googlenet.GoogleNet
+  num_classes: 10
+  act_fn_by_name: relu
diff --git a/pl-hydra/configs/model/cifar10_resnet.yaml b/pl-hydra/configs/model/cifar10_resnet.yaml
new file mode 100644
index 0000000..9770ba5
--- /dev/null
+++ b/pl-hydra/configs/model/cifar10_resnet.yaml
@@ -0,0 +1,8 @@
+_target_: src.models.cifar10_module.CIFAR10LitModule
+
+net:
+  _target_: src.models.components.resnet.ResNet
+  num_classes: 10
+  num_blocks: [3,3,3]
+  c_hidden: [16,32,64]
+  act_fn_by_name: relu
diff --git a/pl-hydra/configs/model/cifar10_vgg11.yaml b/pl-hydra/configs/model/cifar10_vgg11.yaml
new file mode 100644
index 0000000..6e1defc
--- /dev/null
+++ b/pl-hydra/configs/model/cifar10_vgg11.yaml
@@ -0,0 +1,6 @@
+_target_: src.models.cifar10_module.CIFAR10LitModule
+
+net:
+  _target_: src.models.components.vgg.VGG11
+  num_classes: 10
+  act_fn_by_name: relu
diff --git a/pl-hydra/configs/model/cifar10_vit.yaml b/pl-hydra/configs/model/cifar10_vit.yaml
new file mode 100644
index 0000000..24e4bc9
--- /dev/null
+++ b/pl-hydra/configs/model/cifar10_vit.yaml
@@ -0,0 +1,13 @@
+_target_: src.models.cifar10_module.CIFAR10LitModule
+
+net:
+  _target_: src.models.components.vit.VisionTransformer
+  num_classes: 10
+  num_heads: 8
+  num_layers: 6
+  num_channels: 3
+  num_patches: 64
+  patch_size: 4
+  embed_dim: 256
+  hidden_dim: 512
+  dropout: 0.2
\ No newline at end of file
diff --git a/pl-hydra/configs/optim/optim_adam.yaml b/pl-hydra/configs/optim/optim_adam.yaml
new file mode 100644
index 0000000..212af78
--- /dev/null
+++ b/pl-hydra/configs/optim/optim_adam.yaml
@@ -0,0 +1,11 @@
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1e-3
+  weight_decay: 1e-4
+
+use_lr_scheduler: True
+
+lr_scheduler:
+  _target_: torch.optim.lr_scheduler.MultiStepLR
+  milestones: [90,130]
+  gamma: 0.1
\ No newline at end of file
diff --git a/pl-hydra/configs/optim/optim_adam_vit.yaml b/pl-hydra/configs/optim/optim_adam_vit.yaml
new file mode 100644
index 0000000..2f3ab04
--- /dev/null
+++ b/pl-hydra/configs/optim/optim_adam_vit.yaml
@@ -0,0 +1,10 @@
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 3e-4
+  
+use_lr_scheduler: True
+
+lr_scheduler:
+  _target_: torch.optim.lr_scheduler.MultiStepLR
+  milestones: [90,130]
+  gamma: 0.1
\ No newline at end of file
diff --git a/pl-hydra/configs/optim/optim_sgd.yaml b/pl-hydra/configs/optim/optim_sgd.yaml
new file mode 100644
index 0000000..24a3ccf
--- /dev/null
+++ b/pl-hydra/configs/optim/optim_sgd.yaml
@@ -0,0 +1,12 @@
+optimizer:
+  _target_: torch.optim.SGD
+  lr: 0.1
+  weight_decay: 1e-4
+  momentum: 0.9
+  
+use_lr_scheduler: True
+
+lr_scheduler:
+  _target_: torch.optim.lr_scheduler.MultiStepLR
+  milestones: [90,130]
+  gamma: 0.1
diff --git a/pl-hydra/configs/test.yaml b/pl-hydra/configs/test.yaml
new file mode 100644
index 0000000..1e10d5c
--- /dev/null
+++ b/pl-hydra/configs/test.yaml
@@ -0,0 +1,32 @@
+# @package _global_
+
+# specify here default evaluation configuration
+defaults:
+  - _self_
+  - datamodule: mnist.yaml # choose the datamodule for evaluation
+  - model: mnist.yaml
+  - callbacks: null
+  - logger: null
+  - trainer: default.yaml
+  - log_dir: evaluation.yaml
+
+  - experiment: null
+
+  # enable color logging
+  - override hydra/hydra_logging: colorlog
+  - override hydra/job_logging: colorlog
+
+original_work_dir: ${hydra:runtime.cwd}
+
+data_dir: ${original_work_dir}/data/
+
+print_config: True
+
+ignore_warnings: True
+
+seed: null
+
+name: "default"
+
+# passing checkpoint path is necessary
+ckpt_path: ???
diff --git a/pl-hydra/configs/train.yaml b/pl-hydra/configs/train.yaml
new file mode 100644
index 0000000..1632f5c
--- /dev/null
+++ b/pl-hydra/configs/train.yaml
@@ -0,0 +1,71 @@
+# @package _global_
+
+# specify here default training configuration
+defaults:
+  - _self_
+  - datamodule: cifar10.yaml
+  # for resnet 
+  - model : cifar10_resnet.yaml
+  - optim: optim_sgd.yaml
+  # # for googlenet 
+  # - model : cifar10_googlenet.yaml
+  # - optim: optim_adam.yaml
+  # # for densenet 
+  # - model : cifar10_densenet.yaml
+  # - optim: optim_adam.yaml
+  # for vgg11 
+  # - model : cifar10_vgg11.yaml
+  # - optim: optim_adam.yaml
+  # # for Vit 
+  # - model : cifar10_vit.yaml
+  # - optim: optim_adam_vit.yaml
+  # - callbacks: default.yaml
+  - logger: tensorboard.yaml # set logger here or use command line (e.g. `python train.py logger=tensorboard`)
+  # - trainer: ddp.yaml
+  - trainer: default.yaml
+  - log_dir: default.yaml
+  # experiment configs allow for version control of specific configurations
+  # e.g. best hyperparameters for each combination of model and datamodule
+  - experiment: null
+
+  # debugging config (enable through command line, e.g. `python train.py debug=default)
+  - debug: null
+
+  # config for hyperparameter optimization
+  - hparams_search: null
+
+  # optional local config for machine/user specific settings
+  # it's optional since it doesn't need to exist and is excluded from version control
+  - optional local: default.yaml
+
+  # enable color logging
+  - override hydra/hydra_logging: colorlog
+  - override hydra/job_logging: colorlog
+
+# default name for the experiment, determines logging folder path
+# (you can overwrite this name in experiment configs)
+name: "resnet"
+
+# path to original working directory
+# hydra hijacks working directory by changing it to the new log directory
+# https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory
+original_work_dir: ${hydra:runtime.cwd}
+
+# path to folder with data
+data_dir: ${original_work_dir}/../../data/
+
+# pretty print config at the start of the run using Rich library
+print_config: True
+
+# disable python warnings if they annoy you
+ignore_warnings: True
+
+# set False to skip model training
+train: True
+
+# evaluate on test set, using best model weights achieved during training
+# lightning chooses best weights based on the metric specified in checkpoint callback
+test: True
+
+# seed for random number generators in pytorch, numpy and python.random
+seed: 100
\ No newline at end of file
diff --git a/pl-hydra/configs/trainer/ddp.yaml b/pl-hydra/configs/trainer/ddp.yaml
new file mode 100644
index 0000000..8a11249
--- /dev/null
+++ b/pl-hydra/configs/trainer/ddp.yaml
@@ -0,0 +1,6 @@
+defaults:
+  - default.yaml
+
+gpus: 4
+strategy: ddp
+sync_batchnorm: True
diff --git a/pl-hydra/configs/trainer/default.yaml b/pl-hydra/configs/trainer/default.yaml
new file mode 100644
index 0000000..2a1ee74
--- /dev/null
+++ b/pl-hydra/configs/trainer/default.yaml
@@ -0,0 +1,12 @@
+_target_: pytorch_lightning.Trainer
+
+gpus: 1
+
+min_epochs: 1
+max_epochs: 150
+
+# number of validation steps to execute at the beginning of the training
+# num_sanity_val_steps: 0
+
+# ckpt path
+resume_from_checkpoint: null
diff --git a/pl-hydra/notebooks/.gitkeep b/pl-hydra/notebooks/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/pl-hydra/notebooks/make_vgg11.ipynb b/pl-hydra/notebooks/make_vgg11.ipynb
new file mode 100644
index 0000000..3d43ffa
--- /dev/null
+++ b/pl-hydra/notebooks/make_vgg11.ipynb
@@ -0,0 +1,196 @@
+{
+ "metadata": {
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13-final"
+  },
+  "orig_nbformat": 2,
+  "kernelspec": {
+   "name": "python38264bitpytorchcondad338a0d9609a4ea7a86cafca05238e80",
+   "display_name": "Python 3.8.2 64-bit ('PYTORCH': conda)"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Standard libraries\n",
+    "import os\n",
+    "\n",
+    "## PyTorch\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.utils.data as data\n",
+    "import torch.optim as optim\n",
+    "from types import SimpleNamespace\n",
+    "\n",
+    "import math\n",
+    "import torch.nn.init as init"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "Files already downloaded and verified\n"
+    }
+   ],
+   "source": [
+    "## classes\n",
+    "name_classes = ['Airplane', 'Automobile', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck']\n",
+    "\n",
+    "# Act fns\n",
+    "act_fn_by_name = {\n",
+    "    \"tanh\": nn.Tanh,\n",
+    "    \"relu\": nn.ReLU,\n",
+    "    \"leakyrelu\": nn.LeakyReLU,\n",
+    "    \"gelu\": nn.GELU\n",
+    "}\n",
+    "# Path to the folder where the datasets are/should be downloaded (e.g. CIFAR10)\n",
+    "DATASET_PATH = \"../data\"\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 142,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## VGG network\n",
+    "class CnnBlock(nn.Module):\n",
+    "\n",
+    "    def __init__(self, c_in, c_out, act_fn):\n",
+    "        \"\"\"\n",
+    "        Inputs:\n",
+    "            c_in - Number of input feature maps from the previous layers\n",
+    "            c_out - Number of output feature maps\n",
+    "            act_fn - Activation class constructor (e.g. nn.ReLU)\n",
+    "        \"\"\"\n",
+    "        super().__init__()\n",
+    "\n",
+    "        self.conv = nn.Sequential(\n",
+    "            nn.Conv2d(c_in, c_out, kernel_size=3, padding=1),\n",
+    "            nn.BatchNorm2d(c_out),\n",
+    "            act_fn()\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.conv(x)\n",
+    "\n",
+    "class VGG11(nn.Module):\n",
+    "    '''\n",
+    "    VGG model \n",
+    "    '''\n",
+    "    def __init__(self,\n",
+    "                num_classes: int=10,\n",
+    "                act_fn_name = \"relu\",\n",
+    "                **kwargs):\n",
+    "        super().__init__()\n",
+    "        self.hparams = SimpleNamespace(num_classes=num_classes,\n",
+    "                                        act_fn_name=act_fn_name)\n",
+    "        #print(self.hparams)\n",
+    "        self._create_network()\n",
+    "        self._init_params()\n",
+    "\n",
+    "\n",
+    "    def _create_network(self):\n",
+    "        \n",
+    "        # Creating the features map\n",
+    "        self.vgg_blocks = nn.Sequential(\n",
+    "            CnnBlock(3, 64, act_fn=act_fn_by_name[self.hparams.act_fn_name]),\n",
+    "            nn.MaxPool2d(kernel_size=2, stride=2),\n",
+    "            CnnBlock(64, 128, act_fn=act_fn_by_name[self.hparams.act_fn_name]),\n",
+    "            nn.MaxPool2d(kernel_size=2, stride=2),\n",
+    "            CnnBlock(128, 256, act_fn=act_fn_by_name[self.hparams.act_fn_name]),\n",
+    "            #CnnBlock(256, 256, act_fn=act_fn_by_name[self.hparams.act_fn_name]),\n",
+    "            nn.MaxPool2d(kernel_size=2, stride=2),\n",
+    "            CnnBlock(256, 512, act_fn=act_fn_by_name[self.hparams.act_fn_name]),\n",
+    "            #CnnBlock(512, 512, act_fn=act_fn_by_name[self.hparams.act_fn_name]),\n",
+    "            nn.MaxPool2d(kernel_size=2, stride=2),\n",
+    "            CnnBlock(512, 512, act_fn=act_fn_by_name[self.hparams.act_fn_name]),\n",
+    "            #CnnBlock(512, 512, act_fn=act_fn_by_name[self.hparams.act_fn_name]),\n",
+    "            nn.MaxPool2d(kernel_size=2, stride=2),\n",
+    "        )\n",
+    "        \n",
+    "        # Mapping to classification output\n",
+    "        self.output_net = nn.Sequential(nn.Flatten(),\n",
+    "                                        nn.Linear(512, 512),\n",
+    "                                        act_fn_by_name[self.hparams.act_fn_name](),\n",
+    "                                        nn.Linear(512, 512),\n",
+    "                                        act_fn_by_name[self.hparams.act_fn_name](),\n",
+    "                                        nn.Linear(512, self.hparams.num_classes),\n",
+    "                                        )\n",
+    "\n",
+    "    def _init_params(self):\n",
+    "        for m in self.modules():\n",
+    "            if isinstance(m, nn.Conv2d):\n",
+    "                nn.init.kaiming_normal_(\n",
+    "                    m.weight, \n",
+    "                    nonlinearity=self.hparams.act_fn_name)\n",
+    "            elif isinstance(m, nn.BatchNorm2d):\n",
+    "                nn.init.constant_(m.weight, 1)\n",
+    "                nn.init.constant_(m.bias, 0)\n",
+    "\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.vgg_blocks(x)\n",
+    "        x = self.output_net(x)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 143,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") # PyTorch v0.4.0\n",
+    "model = VGG11().to(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 144,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "----------------------------------------------------------------\n        Layer (type)               Output Shape         Param #\n================================================================\n            Conv2d-1           [-1, 64, 32, 32]           1,792\n       BatchNorm2d-2           [-1, 64, 32, 32]             128\n              ReLU-3           [-1, 64, 32, 32]               0\n          CnnBlock-4           [-1, 64, 32, 32]               0\n         MaxPool2d-5           [-1, 64, 16, 16]               0\n            Conv2d-6          [-1, 128, 16, 16]          73,856\n       BatchNorm2d-7          [-1, 128, 16, 16]             256\n              ReLU-8          [-1, 128, 16, 16]               0\n          CnnBlock-9          [-1, 128, 16, 16]               0\n        MaxPool2d-10            [-1, 128, 8, 8]               0\n           Conv2d-11            [-1, 256, 8, 8]         295,168\n      BatchNorm2d-12            [-1, 256, 8, 8]             512\n             ReLU-13            [-1, 256, 8, 8]               0\n         CnnBlock-14            [-1, 256, 8, 8]               0\n        MaxPool2d-15            [-1, 256, 4, 4]               0\n           Conv2d-16            [-1, 512, 4, 4]       1,180,160\n      BatchNorm2d-17            [-1, 512, 4, 4]           1,024\n             ReLU-18            [-1, 512, 4, 4]               0\n         CnnBlock-19            [-1, 512, 4, 4]               0\n        MaxPool2d-20            [-1, 512, 2, 2]               0\n           Conv2d-21            [-1, 512, 2, 2]       2,359,808\n      BatchNorm2d-22            [-1, 512, 2, 2]           1,024\n             ReLU-23            [-1, 512, 2, 2]               0\n         CnnBlock-24            [-1, 512, 2, 2]               0\n        MaxPool2d-25            [-1, 512, 1, 1]               0\n          Flatten-26                  [-1, 512]               0\n           Linear-27                  [-1, 512]         262,656\n             ReLU-28                  [-1, 512]               0\n           Linear-29                  [-1, 512]         262,656\n             ReLU-30                  [-1, 512]               0\n           Linear-31                   [-1, 10]           5,130\n================================================================\nTotal params: 4,444,170\nTrainable params: 4,444,170\nNon-trainable params: 0\n----------------------------------------------------------------\nInput size (MB): 0.01\nForward/backward pass size (MB): 4.07\nParams size (MB): 16.95\nEstimated Total Size (MB): 21.04\n----------------------------------------------------------------\n"
+    }
+   ],
+   "source": [
+    "summary(model, (3, 32, 32))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ]
+}
\ No newline at end of file
diff --git a/pl-hydra/scripts/schedule.sh b/pl-hydra/scripts/schedule.sh
new file mode 100644
index 0000000..a3fbaaa
--- /dev/null
+++ b/pl-hydra/scripts/schedule.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# Shedule execution of many runs
+# Run from root folder with: bash scripts/schedule.sh
+
+python train.py trainer.max_epochs=5
+
+python train.py trainer.max_epochs=10 logger=csv
diff --git a/pl-hydra/setup.cfg b/pl-hydra/setup.cfg
new file mode 100644
index 0000000..a205e67
--- /dev/null
+++ b/pl-hydra/setup.cfg
@@ -0,0 +1,36 @@
+[isort]
+line_length = 99
+profile = black
+filter_files = True
+
+
+[flake8]
+max_line_length = 99
+show_source = True
+format = pylint
+ignore =
+    F401  # Module imported but unused
+    W504  # Line break occurred after a binary operator
+    F841  # Local variable name is assigned to but never used
+    E501  # Line too long
+exclude =
+    .git
+    __pycache__
+    data/*
+    tests/*
+    notebooks/*
+    logs/*
+
+
+[tool:pytest]
+testpaths = tests/
+log_cli = True
+markers =
+    slow
+addopts =
+    --durations=0
+    --strict-markers
+    --doctest-modules
+filterwarnings =
+    ignore::DeprecationWarning
+    ignore::UserWarning
diff --git a/pl-hydra/src/__init__.py b/pl-hydra/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pl-hydra/src/datamodules/__init__.py b/pl-hydra/src/datamodules/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pl-hydra/src/datamodules/cifar10_datamodule.py b/pl-hydra/src/datamodules/cifar10_datamodule.py
new file mode 100644
index 0000000..6da4c37
--- /dev/null
+++ b/pl-hydra/src/datamodules/cifar10_datamodule.py
@@ -0,0 +1,128 @@
+from typing import Optional, Tuple
+
+import torch
+from pytorch_lightning import LightningDataModule
+from torch.utils.data import ConcatDataset, DataLoader, Dataset, random_split
+from torchvision.datasets import CIFAR10
+from torchvision.transforms import transforms
+
+
+class CIFAR10DataModule(LightningDataModule):
+    """Example of LightningDataModule for MNIST dataset.
+
+    A DataModule implements 5 key methods:
+        - prepare_data (things to do on 1 GPU/TPU, not on every GPU/TPU in distributed mode)
+        - setup (things to do on every accelerator in distributed mode)
+        - train_dataloader (the training dataloader)
+        - val_dataloader (the validation dataloader(s))
+        - test_dataloader (the test dataloader(s))
+
+    This allows you to share a full dataset without explaining how to download,
+    split, transform and process the data.
+
+    Read the docs:
+        https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html
+    """
+
+    def __init__(
+        self,
+        data_dir: str = "data/",
+        train_val_split: Tuple[int, int, int] = (55_000, 5_000, 10_000),
+        batch_size: int = 128,
+        num_workers: int = 4,
+        pin_memory: bool = True,
+        data_mean: Tuple[float, float, float] = (0.0, 0.0, 0.0),
+        data_std: Tuple[float, float, float] = (0.0, 0.0, 0.0),
+        image_size: Tuple[int, int] = (32, 32), 
+        scale_bounds: Tuple[float, float] = (0.8, 1.0),
+        aspect_bounds: Tuple[float, float] = (0.9, 1.1)
+    ):
+        super().__init__()
+
+        # this line allows to access init params with 'self.hparams' attribute
+        self.save_hyperparameters(logger=False)
+
+        # data transformations
+        self.train_transforms = transforms.Compose([
+                                        transforms.RandomHorizontalFlip(),
+                                        transforms.RandomResizedCrop(image_size,scale=scale_bounds,ratio=aspect_bounds),
+                                        transforms.ToTensor(),
+                                        transforms.Normalize(data_mean, data_std)
+                                     ])
+
+        self.test_transforms = transforms.Compose([transforms.ToTensor(),
+                                     transforms.Normalize(data_mean, data_std)
+                                     ])
+    
+        self.data_train: Optional[Dataset] = None
+        self.data_val: Optional[Dataset] = None
+        self.data_test: Optional[Dataset] = None
+
+    @property
+    def num_classes(self) -> int:
+        return 10
+
+    def prepare_data(self):
+        """Download data if needed.
+
+        This method is called only from a single GPU.
+        Do not use it to assign state (self.x = y).
+        """
+        CIFAR10(self.hparams.data_dir, train=True, download=True)
+        CIFAR10(self.hparams.data_dir, train=False, download=True)
+
+   
+    def setup(self, stage: Optional[str] = None):
+        """Load data. Set variables: `self.data_train`, `self.data_val`, `self.data_test`.
+
+        This method is called by lightning when doing `trainer.fit()` and `trainer.test()`,
+        so be careful not to execute the random split twice! The `stage` can be used to
+        differentiate whether it's called before trainer.fit()` or `trainer.test()`.
+        """
+
+        # load datasets only if they're not loaded already
+        if not self.data_train and not self.data_val and not self.data_test:
+            trainset = CIFAR10(self.hparams.data_dir,
+                            train=True, 
+                            transform=self.train_transforms)
+            valset = CIFAR10(self.hparams.data_dir,
+                            train=False, 
+                            transform=self.test_transforms)
+            testset = CIFAR10(self.hparams.data_dir,
+                            train=False, 
+                            transform=self.test_transforms)
+                            
+            self.data_train = trainset
+            self.data_val = valset
+            self.data_test = testset
+
+        #print(type(self.data_train.dataset))
+        
+    def train_dataloader(self):
+        return DataLoader(
+            dataset=self.data_train,
+            batch_size=self.hparams.batch_size,
+            num_workers=self.hparams.num_workers,
+            pin_memory=self.hparams.pin_memory,
+            shuffle=True,
+            drop_last=True
+        )
+
+    def val_dataloader(self):
+        return DataLoader(
+            dataset=self.data_val,
+            batch_size=self.hparams.batch_size,
+            num_workers=self.hparams.num_workers,
+            shuffle=False,
+            drop_last=False
+        )
+
+
+    def test_dataloader(self):
+        return DataLoader(
+            dataset=self.data_test,
+            batch_size=self.hparams.batch_size,
+            num_workers=self.hparams.num_workers,
+            shuffle=False,
+            drop_last=False
+        )
diff --git a/pl-hydra/src/datamodules/components/__init__.py b/pl-hydra/src/datamodules/components/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pl-hydra/src/datamodules/mnist_datamodule.py b/pl-hydra/src/datamodules/mnist_datamodule.py
new file mode 100644
index 0000000..377c537
--- /dev/null
+++ b/pl-hydra/src/datamodules/mnist_datamodule.py
@@ -0,0 +1,106 @@
+from typing import Optional, Tuple
+
+import torch
+from pytorch_lightning import LightningDataModule
+from torch.utils.data import ConcatDataset, DataLoader, Dataset, random_split
+from torchvision.datasets import MNIST
+from torchvision.transforms import transforms
+
+
+class MNISTDataModule(LightningDataModule):
+    """Example of LightningDataModule for MNIST dataset.
+
+    A DataModule implements 5 key methods:
+        - prepare_data (things to do on 1 GPU/TPU, not on every GPU/TPU in distributed mode)
+        - setup (things to do on every accelerator in distributed mode)
+        - train_dataloader (the training dataloader)
+        - val_dataloader (the validation dataloader(s))
+        - test_dataloader (the test dataloader(s))
+
+    This allows you to share a full dataset without explaining how to download,
+    split, transform and process the data.
+
+    Read the docs:
+        https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html
+    """
+
+    def __init__(
+        self,
+        data_dir: str = "data/",
+        train_val_test_split: Tuple[int, int, int] = (55_000, 5_000, 10_000),
+        batch_size: int = 64,
+        num_workers: int = 0,
+        pin_memory: bool = False,
+    ):
+        super().__init__()
+
+        # this line allows to access init params with 'self.hparams' attribute
+        self.save_hyperparameters(logger=False)
+
+        # data transformations
+        self.transforms = transforms.Compose(
+            [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
+        )
+
+        self.data_train: Optional[Dataset] = None
+        self.data_val: Optional[Dataset] = None
+        self.data_test: Optional[Dataset] = None
+
+    @property
+    def num_classes(self) -> int:
+        return 10
+
+    def prepare_data(self):
+        """Download data if needed.
+
+        This method is called only from a single GPU.
+        Do not use it to assign state (self.x = y).
+        """
+        MNIST(self.hparams.data_dir, train=True, download=True)
+        MNIST(self.hparams.data_dir, train=False, download=True)
+
+    def setup(self, stage: Optional[str] = None):
+        """Load data. Set variables: `self.data_train`, `self.data_val`, `self.data_test`.
+
+        This method is called by lightning when doing `trainer.fit()` and `trainer.test()`,
+        so be careful not to execute the random split twice! The `stage` can be used to
+        differentiate whether it's called before trainer.fit()` or `trainer.test()`.
+        """
+
+        # load datasets only if they're not loaded already
+        if not self.data_train and not self.data_val and not self.data_test:
+            trainset = MNIST(self.hparams.data_dir, train=True, transform=self.transforms)
+            testset = MNIST(self.hparams.data_dir, train=False, transform=self.transforms)
+            dataset = ConcatDataset(datasets=[trainset, testset])
+            self.data_train, self.data_val, self.data_test = random_split(
+                dataset=dataset,
+                lengths=self.hparams.train_val_test_split,
+                generator=torch.Generator().manual_seed(42),
+            )
+
+    def train_dataloader(self):
+        return DataLoader(
+            dataset=self.data_train,
+            batch_size=self.hparams.batch_size,
+            num_workers=self.hparams.num_workers,
+            pin_memory=self.hparams.pin_memory,
+            shuffle=True,
+        )
+
+    def val_dataloader(self):
+        return DataLoader(
+            dataset=self.data_val,
+            batch_size=self.hparams.batch_size,
+            num_workers=self.hparams.num_workers,
+            pin_memory=self.hparams.pin_memory,
+            shuffle=False,
+        )
+
+    def test_dataloader(self):
+        return DataLoader(
+            dataset=self.data_test,
+            batch_size=self.hparams.batch_size,
+            num_workers=self.hparams.num_workers,
+            pin_memory=self.hparams.pin_memory,
+            shuffle=False,
+        )
diff --git a/pl-hydra/src/models/__init__.py b/pl-hydra/src/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pl-hydra/src/models/cifar10_module.py b/pl-hydra/src/models/cifar10_module.py
new file mode 100644
index 0000000..d45fc25
--- /dev/null
+++ b/pl-hydra/src/models/cifar10_module.py
@@ -0,0 +1,198 @@
+from typing import Any, List
+
+import torch
+from pytorch_lightning import LightningModule
+from torchmetrics import MaxMetric
+from torchmetrics.classification.accuracy import Accuracy
+from torchmetrics.functional import confusion_matrix
+
+from src.models.components.resnet import ResNet
+#import torch.optim as optim
+import torch.nn as nn
+
+import seaborn as sns
+import pandas as pd
+import matplotlib.pylab as plt
+import numpy as np
+from src.utils.plotter import plot_cm, plot_preds
+
+import hydra
+
+## classes
+name_classes = ['Airplane', 'Automobile', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck']
+num_classes = len(name_classes)
+data_mean = [0.49421428, 0.48513139, 0.45040909]
+data_std = [0.24665252, 0.24289226, 0.26159238]
+
+class CIFAR10LitModule(LightningModule):
+    """Example of LightningModule for MNIST classification.
+
+    A LightningModule organizes your PyTorch code into 5 sections:
+        - Computations (init).
+        - Train loop (training_step)
+        - Validation loop (validation_step)
+        - Test loop (test_step)
+        - Optimizers (configure_optimizers)
+
+    Read the docs:
+        https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html
+    """
+
+    def __init__(self,*args,**kwargs):
+        super().__init__()
+
+        # this line allows to access init params with 'self.hparams' attribute
+        # it also ensures init params will be stored in ckpt
+        print('here')
+        
+        self.save_hyperparameters()
+        # network
+        self.net = hydra.utils.instantiate(self.hparams['net'])
+
+        # loss function
+        self.criterion = nn.CrossEntropyLoss()
+
+        # Example input for visualizing the graph in Tensorboard
+        self.example_input_array = torch.zeros((1, 3, 32, 32), dtype=torch.float32)
+                                                                                                           
+        # use separate metric instance for train, val and test step
+        # to ensure a proper reduction over the epoch
+        self.train_acc = Accuracy()
+        self.val_acc = Accuracy()
+        self.test_acc = Accuracy()
+
+        # for logging best so far validation accuracy
+        self.val_acc_best = MaxMetric()
+
+    def forward(self, x: torch.Tensor):
+        return self.net(x)
+
+    def step(self, batch: Any):
+        x, y = batch
+        logits = self.forward(x)
+        loss = self.criterion(logits, y)
+        preds = torch.argmax(logits, dim=1)
+        return loss, preds, y
+
+    def training_step(self, batch: Any, batch_idx: int):
+        loss, preds, targets = self.step(batch)
+
+        # log train metrics
+        acc = self.train_acc(preds, targets)
+        self.log("train/loss", loss, on_step=False, on_epoch=True, prog_bar=False)
+        self.log("train/acc", acc, on_step=False, on_epoch=True, prog_bar=True)
+
+        # we can return here dict with any tensors
+        # and then read it in some callback or in `training_epoch_end()`` below
+        # remember to always return loss from `training_step()` or else backpropagation will fail!
+        return {"loss": loss, "preds": preds, "targets": targets}
+
+    def training_epoch_end(self, outputs: List[Any]):
+        # `outputs` is a list of dicts returned from `training_step()`
+        # plot the confusion matrix at the end of each epoch
+        preds = torch.cat([tmp['preds'] for tmp in outputs])
+        targets = torch.cat([tmp['targets'] for tmp in outputs])
+        
+        # plot confusion matrix
+        cm = confusion_matrix(targets, preds, num_classes)
+        fig_ = plot_cm(cm, name_classes)
+        plt.close(fig_)
+        self.logger.experiment.add_figure("confusion_matrix_train", fig_, self.current_epoch)
+        
+    def validation_step(self, batch: Any, batch_idx: int):
+        loss, preds, targets = self.step(batch)
+
+        # plot figures
+        if batch_idx == 0:
+            images, _ = batch
+            fig_ = plot_preds(images.cpu().numpy(), 
+                            targets.cpu().numpy(), 
+                            preds.cpu().numpy(), 
+                            name_classes,
+                            nimg=32,
+                            ncols=8,
+                            data_mean=data_mean,
+                            data_std=data_std)
+            self.logger.experiment.add_figure(
+                                    "examples_val_batch_idx_" + str(batch_idx),
+                                    fig_, 
+                                    self.current_epoch)
+
+        # log val metrics
+        acc = self.val_acc(preds, targets)
+        self.log("val/loss", loss, on_step=False, on_epoch=True, prog_bar=False)
+        self.log("val/acc", acc, on_step=False, on_epoch=True, prog_bar=True)
+        #print(preds)
+        return {"loss": loss, "preds": preds, "targets": targets}
+    
+    def validation_epoch_end(self, outputs: List[Any]):
+        acc = self.val_acc.compute()  # get val accuracy from current epoch
+        self.val_acc_best.update(acc)
+        self.log("val/acc_best", self.val_acc_best.compute(), on_epoch=True, prog_bar=True)
+
+        # plot the confusion matrix at the end of each epoch
+        preds = torch.cat([tmp['preds'] for tmp in outputs])
+        targets = torch.cat([tmp['targets'] for tmp in outputs])
+        cm = confusion_matrix(targets, preds, num_classes)
+        fig_ = plot_cm(cm, name_classes)
+        plt.close(fig_)
+        self.logger.experiment.add_figure("confusion_matrix_val", fig_, self.current_epoch)
+
+        
+    def test_step(self, batch: Any, batch_idx: int):
+        loss, preds, targets = self.step(batch)
+        
+        # plot figures
+        if batch_idx == 0:
+            images, _ = batch
+            fig_ = plot_preds(images.cpu().numpy(), 
+                            targets.cpu().numpy(), 
+                            preds.cpu().numpy(), 
+                            name_classes,
+                            nimg=32,
+                            ncols=8,
+                            data_mean=data_mean,
+                            data_std=data_std)
+            self.logger.experiment.add_figure(
+                                    "examples_test_batch_idx_" + str(batch_idx),
+                                    fig_, 
+                                    self.current_epoch)
+
+        # log test metrics
+        acc = self.test_acc(preds, targets)
+        self.log("test/loss", loss, on_step=False, on_epoch=True)
+        self.log("test/acc", acc, on_step=False, on_epoch=True)
+
+        return {"loss": loss, "preds": preds, "targets": targets}
+
+    def test_epoch_end(self, outputs: List[Any]):
+        # plot the confusion matrix at the end of each epoch
+        preds = torch.cat([tmp['preds'] for tmp in outputs])
+        targets = torch.cat([tmp['targets'] for tmp in outputs])
+        cm = confusion_matrix(targets, preds, num_classes)
+        fig_ = plot_cm(cm, name_classes)
+        plt.close(fig_)
+        
+        self.logger.experiment.add_figure("confusion_matrix_test", fig_, self.current_epoch)
+
+    def on_epoch_end(self):
+        # reset metrics at the end of every epoch
+        self.train_acc.reset()
+        self.test_acc.reset()
+        self.val_acc.reset()
+
+   
+    def configure_optimizers(self):
+        optimizer=hydra.utils.instantiate(
+                                        self.hparams.optim["optimizer"],
+                                        params=self.net.parameters()
+                                        )
+        
+        if(self.hparams.optim['use_lr_scheduler']==True):
+            scheduler=hydra.utils.instantiate(
+                                            self.hparams.optim['lr_scheduler'],
+                                            optimizer=optimizer
+                                            )
+            return [optimizer],[scheduler]
+        else:
+            return optimizer
\ No newline at end of file
diff --git a/pl-hydra/src/models/components/__init__.py b/pl-hydra/src/models/components/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pl-hydra/src/models/components/densenet.py b/pl-hydra/src/models/components/densenet.py
new file mode 100644
index 0000000..43b3670
--- /dev/null
+++ b/pl-hydra/src/models/components/densenet.py
@@ -0,0 +1,153 @@
+'''
+Modified from https://github.com/phlippe/uvadlc_notebooks.git
+'''
+
+## Standard libraries
+import os
+
+## PyTorch
+import torch
+import torch.nn as nn
+import torch.utils.data as data
+import torch.optim as optim
+from types import SimpleNamespace
+
+
+act_fn_by_name = {
+    "tanh": nn.Tanh,
+    "relu": nn.ReLU,
+    "leakyrelu": nn.LeakyReLU,
+    "gelu": nn.GELU
+}
+
+class DenseLayer(nn.Module):
+
+    def __init__(self, c_in, bn_size, growth_rate, act_fn):
+        """
+        Inputs:
+            c_in - Number of input channels
+            bn_size - Bottleneck size (factor of growth rate) for the output of the 1x1 convolution. Typically between 2 and 4.
+            growth_rate - Number of output channels of the 3x3 convolution
+            act_fn - Activation class constructor (e.g. nn.ReLU)
+        """
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.BatchNorm2d(c_in),
+            act_fn(),
+            nn.Conv2d(c_in, bn_size * growth_rate, kernel_size=1, bias=False),
+            nn.BatchNorm2d(bn_size * growth_rate),
+            act_fn(),
+            nn.Conv2d(bn_size * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
+        )
+
+    def forward(self, x):
+        out = self.net(x)
+        out = torch.cat([out, x], dim=1)
+        return out
+
+# Block
+class DenseBlock(nn.Module):
+
+    def __init__(self, c_in, num_layers, bn_size, growth_rate, act_fn):
+        """
+        Inputs:
+            c_in - Number of input channels
+            num_layers - Number of dense layers to apply in the block
+            bn_size - Bottleneck size to use in the dense layers
+            growth_rate - Growth rate to use in the dense layers
+            act_fn - Activation function to use in the dense layers
+        """
+        super().__init__()
+        layers = []
+        for layer_idx in range(num_layers):
+            layers.append(
+                DenseLayer(c_in=c_in + layer_idx * growth_rate, # Input channels are original plus the feature maps from previous layers
+                           bn_size=bn_size,
+                           growth_rate=growth_rate,
+                           act_fn=act_fn)
+            )
+        self.block = nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = self.block(x)
+        return out
+
+class TransitionLayer(nn.Module):
+
+    def __init__(self, c_in, c_out, act_fn):
+        super().__init__()
+        self.transition = nn.Sequential(
+            nn.BatchNorm2d(c_in),
+            act_fn(),
+            nn.Conv2d(c_in, c_out, kernel_size=1, bias=False),
+            nn.AvgPool2d(kernel_size=2, stride=2) # Average the output for each 2x2 pixel group
+        )
+
+    def forward(self, x):
+        return self.transition(x)
+
+class DenseNet(nn.Module):
+
+    def __init__(self, num_classes=10, num_layers=[6,6,6,6], bn_size=2, growth_rate=16, act_fn_name="relu", **kwargs):
+        super().__init__()
+        self.hparams = SimpleNamespace(num_classes=num_classes,
+                                       num_layers=num_layers,
+                                       bn_size=bn_size,
+                                       growth_rate=growth_rate,
+                                       act_fn_name=act_fn_name,
+                                       )
+        self._create_network()
+        self._init_params()
+
+    def _create_network(self):
+        c_hidden = self.hparams.growth_rate * self.hparams.bn_size # The start number of hidden channels
+
+        # A first convolution on the original image to scale up the channel size
+        self.input_net = nn.Sequential(
+            nn.Conv2d(3, c_hidden, kernel_size=3, padding=1) # No batch norm or activation function as done inside the Dense layers
+        )
+
+        # Creating the dense blocks, eventually including transition layers
+        blocks = []
+        for block_idx, num_layers in enumerate(self.hparams.num_layers):
+            blocks.append(
+                DenseBlock(c_in=c_hidden,
+                           num_layers=num_layers,
+                           bn_size=self.hparams.bn_size,
+                           growth_rate=self.hparams.growth_rate,
+                           act_fn=act_fn_by_name[self.hparams.act_fn_name])
+            )
+            c_hidden = c_hidden + num_layers * self.hparams.growth_rate # Overall output of the dense block
+            if block_idx < len(self.hparams.num_layers)-1: # Don't apply transition layer on last block
+                blocks.append(
+                    TransitionLayer(c_in=c_hidden,
+                                    c_out=c_hidden // 2,
+                                    act_fn=act_fn_by_name[self.hparams.act_fn_name]))
+                c_hidden = c_hidden // 2
+
+        self.blocks = nn.Sequential(*blocks)
+
+        # Mapping to classification output
+        self.output_net = nn.Sequential(
+            nn.BatchNorm2d(c_hidden), # The features have not passed a non-linearity until here.
+            act_fn_by_name[self.hparams.act_fn_name](),
+            nn.AdaptiveAvgPool2d((1,1)),
+            nn.Flatten(),
+            nn.Linear(c_hidden, self.hparams.num_classes)
+        )
+
+    def _init_params(self):
+        # Based on our discussion in Tutorial 4, we should initialize the convolutions according to the activation function
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, nonlinearity=self.hparams.act_fn_name)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        x = self.input_net(x)
+        x = self.blocks(x)
+        x = self.output_net(x)
+        return x
+
diff --git a/pl-hydra/src/models/components/googlenet.py b/pl-hydra/src/models/components/googlenet.py
new file mode 100644
index 0000000..0745736
--- /dev/null
+++ b/pl-hydra/src/models/components/googlenet.py
@@ -0,0 +1,132 @@
+'''
+Modified from https://github.com/phlippe/uvadlc_notebooks.git
+'''
+
+## Standard libraries
+import os
+
+## PyTorch
+import torch
+import torch.nn as nn
+import torch.utils.data as data
+import torch.optim as optim
+from types import SimpleNamespace
+
+
+act_fn_by_name = {
+    "tanh": nn.Tanh,
+    "relu": nn.ReLU,
+    "leakyrelu": nn.LeakyReLU,
+    "gelu": nn.GELU
+}
+
+
+# Block
+class InceptionBlock(nn.Module):
+
+    def __init__(self, c_in, c_red : dict, c_out : dict, act_fn):
+        """
+        Inputs:
+            c_in - Number of input feature maps from the previous layers
+            c_red - Dictionary with keys "3x3" and "5x5" specifying the output of the dimensionality reducing 1x1 convolutions
+            c_out - Dictionary with keys "1x1", "3x3", "5x5", and "max"
+            act_fn - Activation class constructor (e.g. nn.ReLU)
+        """
+        super().__init__()
+
+        # 1x1 convolution branch
+        self.conv_1x1 = nn.Sequential(
+            nn.Conv2d(c_in, c_out["1x1"], kernel_size=1),
+            nn.BatchNorm2d(c_out["1x1"]),
+            act_fn()
+        )
+
+        # 3x3 convolution branch
+        self.conv_3x3 = nn.Sequential(
+            nn.Conv2d(c_in, c_red["3x3"], kernel_size=1),
+            nn.BatchNorm2d(c_red["3x3"]),
+            act_fn(),
+            nn.Conv2d(c_red["3x3"], c_out["3x3"], kernel_size=3, padding=1),
+            nn.BatchNorm2d(c_out["3x3"]),
+            act_fn()
+        )
+
+        # 5x5 convolution branch
+        self.conv_5x5 = nn.Sequential(
+            nn.Conv2d(c_in, c_red["5x5"], kernel_size=1),
+            nn.BatchNorm2d(c_red["5x5"]),
+            act_fn(),
+            nn.Conv2d(c_red["5x5"], c_out["5x5"], kernel_size=5, padding=2),
+            nn.BatchNorm2d(c_out["5x5"]),
+            act_fn()
+        )
+
+        # Max-pool branch
+        self.max_pool = nn.Sequential(
+            nn.MaxPool2d(kernel_size=3, padding=1, stride=1),
+            nn.Conv2d(c_in, c_out["max"], kernel_size=1),
+            nn.BatchNorm2d(c_out["max"]),
+            act_fn()
+        )
+
+    def forward(self, x):
+        x_1x1 = self.conv_1x1(x)
+        x_3x3 = self.conv_3x3(x)
+        x_5x5 = self.conv_5x5(x)
+        x_max = self.max_pool(x)
+        x_out = torch.cat([x_1x1, x_3x3, x_5x5, x_max], dim=1)
+        return x_out
+
+
+class GoogleNet(nn.Module):
+
+    def __init__(self, num_classes=10, act_fn_name="relu", **kwargs):
+        super().__init__()
+        self.hparams = SimpleNamespace(num_classes=num_classes,
+                                       act_fn_name=act_fn_name,)
+        self._create_network()
+        self._init_params()
+
+    def _create_network(self):
+        # A first convolution on the original image to scale up the channel size
+        self.input_net = nn.Sequential(
+            nn.Conv2d(3, 64, kernel_size=3, padding=1),
+            nn.BatchNorm2d(64),
+            act_fn_by_name[self.hparams.act_fn_name]()
+        )
+        # Stacking inception blocks
+        self.inception_blocks = nn.Sequential(
+            InceptionBlock(64, c_red={"3x3": 32, "5x5": 16}, c_out={"1x1": 16, "3x3": 32, "5x5": 8, "max": 8}, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            InceptionBlock(64, c_red={"3x3": 32, "5x5": 16}, c_out={"1x1": 24, "3x3": 48, "5x5": 12, "max": 12}, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            nn.MaxPool2d(3, stride=2, padding=1),  # 32x32 => 16x16
+            InceptionBlock(96, c_red={"3x3": 32, "5x5": 16}, c_out={"1x1": 24, "3x3": 48, "5x5": 12, "max": 12}, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            InceptionBlock(96, c_red={"3x3": 32, "5x5": 16}, c_out={"1x1": 16, "3x3": 48, "5x5": 16, "max": 16}, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            InceptionBlock(96, c_red={"3x3": 32, "5x5": 16}, c_out={"1x1": 16, "3x3": 48, "5x5": 16, "max": 16}, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            InceptionBlock(96, c_red={"3x3": 32, "5x5": 16}, c_out={"1x1": 32, "3x3": 48, "5x5": 24, "max": 24}, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            nn.MaxPool2d(3, stride=2, padding=1),  # 16x16 => 8x8
+            InceptionBlock(128, c_red={"3x3": 48, "5x5": 16}, c_out={"1x1": 32, "3x3": 64, "5x5": 16, "max": 16}, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            InceptionBlock(128, c_red={"3x3": 48, "5x5": 16}, c_out={"1x1": 32, "3x3": 64, "5x5": 16, "max": 16}, act_fn=act_fn_by_name[self.hparams.act_fn_name])
+        )
+        # Mapping to classification output
+        self.output_net = nn.Sequential(
+            nn.AdaptiveAvgPool2d((1, 1)),
+            nn.Flatten(),
+            nn.Linear(128, self.hparams.num_classes)
+        )
+
+    def _init_params(self):
+        # Based on our discussion in Tutorial 4, we should initialize the convolutions according to the activation function
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, 
+                    nonlinearity=self.hparams.act_fn_name)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        x = self.input_net(x)
+        x = self.inception_blocks(x)
+        x = self.output_net(x)
+        return x
diff --git a/pl-hydra/src/models/components/resnet.py b/pl-hydra/src/models/components/resnet.py
new file mode 100644
index 0000000..5004e2a
--- /dev/null
+++ b/pl-hydra/src/models/components/resnet.py
@@ -0,0 +1,135 @@
+'''
+Modified from https://github.com/phlippe/uvadlc_notebooks.git
+'''
+
+## Standard libraries
+import os
+
+## PyTorch
+import torch
+import torch.nn as nn
+import torch.utils.data as data
+import torch.optim as optim
+from types import SimpleNamespace
+
+
+act_fn_by_name = {
+    "tanh": nn.Tanh,
+    "relu": nn.ReLU,
+    "leakyrelu": nn.LeakyReLU,
+    "gelu": nn.GELU
+}
+
+
+# Block
+class ResNetBlock(nn.Module):
+
+    def __init__(self, c_in, act_fn, subsample=False, c_out=-1):
+        """
+        Inputs:
+            c_in - Number of input features
+            act_fn - Activation class constructor (e.g. nn.ReLU)
+            subsample - If True, we want to apply a stride inside the block and reduce the output shape by 2 in height and width
+            c_out - Number of output features. Note that this is only relevant if subsample is True, as otherwise, c_out = c_in
+        """
+        super().__init__()
+        if not subsample:
+            c_out = c_in
+            
+        # Network representing F
+        self.net = nn.Sequential(
+            nn.Conv2d(c_in, c_out, kernel_size=3, padding=1, stride=1 if not subsample else 2, bias=False),  # No bias needed as the Batch Norm handles it
+            nn.BatchNorm2d(c_out),
+            act_fn(),
+            nn.Conv2d(c_out, c_out, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(c_out)
+        )
+        
+        # 1x1 convolution with stride 2 means we take the upper left value, and transform it to new output size
+        self.downsample = nn.Conv2d(c_in, c_out, kernel_size=1, stride=2) if subsample else None
+        self.act_fn = act_fn()
+        #print(self.act_fn)
+
+    def forward(self, x):
+        z = self.net(x)
+        if self.downsample is not None:
+            x = self.downsample(x)
+        out = z + x
+        out = self.act_fn(out)
+        return out
+
+
+# Residual Network
+class ResNet(nn.Module):
+
+    def __init__(self,
+                num_classes: int=10,
+                num_blocks: list=[3,3,3],
+                c_hidden: list=[16,32,64],
+                act_fn_name = "relu", 
+                **kwargs):
+        """
+        Inputs: 
+            num_classes - Number of classification outputs (10 for CIFAR10)
+            num_blocks - List with the number of ResNet blocks to use. The first block of each group uses downsampling, except the first.
+            c_hidden - List with the hidden dimensionalities in the different blocks. Usually multiplied by 2 the deeper we go.
+            act_fn_name - Name of the activation function to use, looked up in "act_fn_by_name"
+            block_name - Name of the ResNet block, looked up in "resnet_blocks_by_name"
+        """
+        super().__init__()
+        self.hparams = SimpleNamespace(num_classes=num_classes,
+                                        c_hidden=c_hidden, 
+                                        num_blocks=num_blocks, 
+                                        act_fn_name=act_fn_name)
+        #print(self.hparams)
+        self._create_network()
+        self._init_params()
+
+    def _create_network(self):
+        c_hidden = self.hparams.c_hidden
+        
+        # A first convolution on the original image to scale up the channel size
+        self.input_net = nn.Sequential(
+            nn.Conv2d(3, c_hidden[0], kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(c_hidden[0]),
+            act_fn_by_name[self.hparams.act_fn_name]()
+        )
+        #print(self.input_net)
+        
+        # Creating the ResNet blocks
+        blocks = []
+        for block_idx, block_count in enumerate(self.hparams.num_blocks):
+            for bc in range(block_count):
+                subsample = (bc == 0 and block_idx > 0) # Subsample the first block of each group, except the very first one.
+                blocks.append(
+                    ResNetBlock(c_in=c_hidden[block_idx if not subsample else (block_idx-1)],
+                                             act_fn=act_fn_by_name[self.hparams.act_fn_name],
+                                             subsample=subsample,
+                                             c_out=c_hidden[block_idx])
+                )
+        self.blocks = nn.Sequential(*blocks)
+        
+        # Mapping to classification output
+        self.output_net = nn.Sequential(
+            nn.AdaptiveAvgPool2d((1,1)),
+            nn.Flatten(),
+            nn.Linear(c_hidden[-1], self.hparams.num_classes)
+        )
+
+    def _init_params(self):
+        # Based on our discussion in Tutorial 4, we should initialize the convolutions according to the activation function
+        # Fan-out focuses on the gradient distribution, and is commonly used in ResNets
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, 
+                                        mode='fan_out',
+                                        nonlinearity=self.hparams.act_fn_name)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        x = self.input_net(x)
+        x = self.blocks(x)
+        x = self.output_net(x)
+        return x
diff --git a/pl-hydra/src/models/components/vgg.py b/pl-hydra/src/models/components/vgg.py
new file mode 100644
index 0000000..ccd0926
--- /dev/null
+++ b/pl-hydra/src/models/components/vgg.py
@@ -0,0 +1,111 @@
+'''
+Modified from https://github.com/chengyangfu/pytorch-vgg-cifar10.git
+'''
+
+## Standard libraries
+import os
+
+## PyTorch
+import torch
+import torch.nn as nn
+import torch.utils.data as data
+import torch.optim as optim
+from types import SimpleNamespace
+
+
+act_fn_by_name = {
+    "tanh": nn.Tanh,
+    "relu": nn.ReLU,
+    "leakyrelu": nn.LeakyReLU,
+    "gelu": nn.GELU
+}
+
+
+import math
+
+import torch.nn as nn
+import torch.nn.init as init
+
+
+class CnnBlock(nn.Module):
+
+    def __init__(self, c_in, c_out, act_fn):
+        """
+        Inputs:
+            c_in - Number of input feature maps from the previous layers
+            c_out - Number of output feature maps
+            act_fn - Activation class constructor (e.g. nn.ReLU)
+        """
+        super().__init__()
+
+        self.conv = nn.Sequential(
+            nn.Conv2d(c_in, c_out, kernel_size=3, padding=1),
+            nn.BatchNorm2d(c_out),
+            act_fn()
+        )
+
+    def forward(self, x):
+        return self.conv(x)
+
+class VGG11(nn.Module):
+    '''
+    VGG model 
+    '''
+    def __init__(self,
+                num_classes: int=10,
+                act_fn_name = "relu",
+                **kwargs):
+        super().__init__()
+        self.hparams = SimpleNamespace(num_classes=num_classes,
+                                        act_fn_name=act_fn_name)
+        #print(self.hparams)
+        self._create_network()
+        self._init_params()
+
+
+    def _create_network(self):
+        
+        # Creating the features map
+        self.vgg_blocks = nn.Sequential(
+            CnnBlock(3, 64, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            CnnBlock(64, 128, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            CnnBlock(128, 256, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            CnnBlock(256, 256, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            CnnBlock(256, 512, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            CnnBlock(512, 512, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            CnnBlock(512, 512, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            CnnBlock(512, 512, act_fn=act_fn_by_name[self.hparams.act_fn_name]),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+        )
+        
+        # Mapping to classification output
+        self.output_net = nn.Sequential(nn.Flatten(),
+                                        nn.Dropout(0.4),
+                                        nn.Linear(512, 512),
+                                        act_fn_by_name[self.hparams.act_fn_name](),
+                                        nn.Dropout(0.4),
+                                        nn.Linear(512, 512),
+                                        act_fn_by_name[self.hparams.act_fn_name](),
+                                        nn.Linear(512, self.hparams.num_classes),
+                                        )
+
+    def _init_params(self):
+        # Based on our discussion in Tutorial 4, we should initialize the convolutions according to the activation function
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, 
+                    nonlinearity=self.hparams.act_fn_name)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+
+    def forward(self, x):
+        x = self.vgg_blocks(x)
+        x = self.output_net(x)
+        return x
\ No newline at end of file
diff --git a/pl-hydra/src/models/components/vit.py b/pl-hydra/src/models/components/vit.py
new file mode 100644
index 0000000..1d365b2
--- /dev/null
+++ b/pl-hydra/src/models/components/vit.py
@@ -0,0 +1,128 @@
+'''
+Modified from https://github.com/phlippe/uvadlc_notebooks.git
+'''
+
+## Standard libraries
+import os
+
+## PyTorch
+import torch
+import torch.nn as nn
+import torch.utils.data as data
+import torch.optim as optim
+from types import SimpleNamespace
+
+
+act_fn_by_name = {
+    "tanh": nn.Tanh,
+    "relu": nn.ReLU,
+    "leakyrelu": nn.LeakyReLU,
+    "gelu": nn.GELU
+}
+
+# helper function
+def img_to_patch(x, patch_size, flatten_channels=True):
+    """
+    Inputs:
+        x - torch.Tensor representing the image of shape [B, C, H, W]
+        patch_size - Number of pixels per dimension of the patches (integer)
+        flatten_channels - If True, the patches will be returned in a flattened format
+                           as a feature vector instead of a image grid.
+    """
+    B, C, H, W = x.shape
+    x = x.reshape(B, C, H//patch_size, patch_size, W//patch_size, patch_size)
+    x = x.permute(0, 2, 4, 1, 3, 5) # [B, H', W', C, p_H, p_W]
+    x = x.flatten(1,2)              # [B, H'*W', C, p_H, p_W]
+    if flatten_channels:
+        x = x.flatten(2,4)          # [B, H'*W', C*p_H*p_W]
+    return x
+
+
+# Attention block
+class AttentionBlock(nn.Module):
+
+    def __init__(self, embed_dim, hidden_dim, num_heads, dropout=0.0):
+        """
+        Inputs:
+            embed_dim - Dimensionality of input and attention feature vectors
+            hidden_dim - Dimensionality of hidden layer in feed-forward network
+                         (usually 2-4x larger than embed_dim)
+            num_heads - Number of heads to use in the Multi-Head Attention block
+            dropout - Amount of dropout to apply in the feed-forward network
+        """
+        super().__init__()
+
+        self.layer_norm_1 = nn.LayerNorm(embed_dim)
+        self.attn = nn.MultiheadAttention(embed_dim, num_heads)
+        self.layer_norm_2 = nn.LayerNorm(embed_dim)
+        self.linear = nn.Sequential(
+            nn.Linear(embed_dim, hidden_dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, embed_dim),
+            nn.Dropout(dropout)
+        )
+
+
+    def forward(self, x):
+        inp_x = self.layer_norm_1(x)
+        x = x + self.attn(inp_x, inp_x, inp_x)[0]
+        x = x + self.linear(self.layer_norm_2(x))
+        return x
+
+
+class VisionTransformer(nn.Module):
+
+    def __init__(self, embed_dim, hidden_dim, num_channels, num_heads, num_layers, num_classes, patch_size, num_patches, dropout=0.0):
+        """
+        Inputs:
+            embed_dim - Dimensionality of the input feature vectors to the Transformer
+            hidden_dim - Dimensionality of the hidden layer in the feed-forward networks
+                         within the Transformer
+            num_channels - Number of channels of the input (3 for RGB)
+            num_heads - Number of heads to use in the Multi-Head Attention block
+            num_layers - Number of layers to use in the Transformer
+            num_classes - Number of classes to predict
+            patch_size - Number of pixels that the patches have per dimension
+            num_patches - Maximum number of patches an image can have
+            dropout - Amount of dropout to apply in the feed-forward network and
+                      on the input encoding
+        """
+        super().__init__()
+
+        self.patch_size = patch_size
+
+        # Layers/Networks
+        self.input_layer = nn.Linear(num_channels*(patch_size**2), embed_dim)
+        self.transformer = nn.Sequential(*[AttentionBlock(embed_dim, hidden_dim, num_heads, dropout=dropout) for _ in range(num_layers)])
+        self.mlp_head = nn.Sequential(
+            nn.LayerNorm(embed_dim),
+            nn.Linear(embed_dim, num_classes)
+        )
+        self.dropout = nn.Dropout(dropout)
+
+        # Parameters/Embeddings
+        self.cls_token = nn.Parameter(torch.randn(1,1,embed_dim))
+        self.pos_embedding = nn.Parameter(torch.randn(1,1+num_patches,embed_dim))
+
+
+    def forward(self, x):
+        # Preprocess input
+        x = img_to_patch(x, self.patch_size)
+        B, T, _ = x.shape
+        x = self.input_layer(x)
+
+        # Add CLS token and positional encoding
+        cls_token = self.cls_token.repeat(B, 1, 1)
+        x = torch.cat([cls_token, x], dim=1)
+        x = x + self.pos_embedding[:,:T+1]
+
+        # Apply Transforrmer
+        x = self.dropout(x)
+        x = x.transpose(0, 1)
+        x = self.transformer(x)
+
+        # Perform classification prediction
+        cls = x[0]
+        out = self.mlp_head(cls)
+        return out
\ No newline at end of file
diff --git a/pl-hydra/src/testing_pipeline.py b/pl-hydra/src/testing_pipeline.py
new file mode 100644
index 0000000..abd030a
--- /dev/null
+++ b/pl-hydra/src/testing_pipeline.py
@@ -0,0 +1,57 @@
+import os
+from typing import List
+
+import hydra
+from omegaconf import DictConfig
+from pytorch_lightning import LightningDataModule, LightningModule, Trainer, seed_everything
+from pytorch_lightning.loggers import LightningLoggerBase
+
+from src import utils
+
+log = utils.get_logger(__name__)
+
+
+def test(config: DictConfig) -> None:
+    """Contains minimal example of the testing pipeline. Evaluates given checkpoint on a testset.
+
+    Args:
+        config (DictConfig): Configuration composed by Hydra.
+
+    Returns:
+        None
+    """
+
+    # Set seed for random number generators in pytorch, numpy and python.random
+    if config.get("seed"):
+        seed_everything(config.seed, workers=True)
+
+    # Convert relative ckpt path to absolute path if necessary
+    if not os.path.isabs(config.ckpt_path):
+        config.ckpt_path = os.path.join(hydra.utils.get_original_cwd(), config.ckpt_path)
+
+    # Init lightning datamodule
+    log.info(f"Instantiating datamodule <{config.datamodule._target_}>")
+    datamodule: LightningDataModule = hydra.utils.instantiate(config.datamodule)
+
+    # Init lightning model
+    log.info(f"Instantiating model <{config.model._target_}>")
+    model: LightningModule = hydra.utils.instantiate(config.model)
+
+    # Init lightning loggers
+    logger: List[LightningLoggerBase] = []
+    if "logger" in config:
+        for _, lg_conf in config.logger.items():
+            if "_target_" in lg_conf:
+                log.info(f"Instantiating logger <{lg_conf._target_}>")
+                logger.append(hydra.utils.instantiate(lg_conf))
+
+    # Init lightning trainer
+    log.info(f"Instantiating trainer <{config.trainer._target_}>")
+    trainer: Trainer = hydra.utils.instantiate(config.trainer, logger=logger)
+
+    # Log hyperparameters
+    if trainer.logger:
+        trainer.logger.log_hyperparams({"ckpt_path": config.ckpt_path})
+
+    log.info("Starting testing!")
+    trainer.test(model=model, datamodule=datamodule, ckpt_path=config.ckpt_path)
diff --git a/pl-hydra/src/training_pipeline.py b/pl-hydra/src/training_pipeline.py
new file mode 100644
index 0000000..fea28c1
--- /dev/null
+++ b/pl-hydra/src/training_pipeline.py
@@ -0,0 +1,126 @@
+import os
+from typing import List, Optional
+
+import hydra
+from omegaconf import DictConfig, OmegaConf
+from pytorch_lightning import (
+    Callback,
+    LightningDataModule,
+    LightningModule,
+    Trainer,
+    seed_everything,
+)
+from pytorch_lightning.loggers import LightningLoggerBase
+
+from src import utils
+
+log = utils.get_logger(__name__)
+
+
+def train(config: DictConfig) -> Optional[float]:
+    """Contains the training pipeline. Can additionally evaluate model on a testset, using best
+    weights achieved during training.
+
+    Args:
+        config (DictConfig): Configuration composed by Hydra.
+
+    Returns:
+        Optional[float]: Metric score for hyperparameter optimization.
+    """
+
+    # Set seed for random number generators in pytorch, numpy and python.random
+    if config.get("seed"):
+        seed_everything(config.seed, workers=True)
+
+    # Convert relative ckpt path to absolute path if necessary
+    ckpt_path = config.trainer.get("resume_from_checkpoint")
+    if ckpt_path and not os.path.isabs(ckpt_path):
+        config.trainer.resume_from_checkpoint = os.path.join(
+            hydra.utils.get_original_cwd(), ckpt_path
+        )
+
+    # Init lightning datamodule
+    log.info(f"Instantiating datamodule <{config.datamodule._target_}>")
+    datamodule: LightningDataModule = hydra.utils.instantiate(config.datamodule)
+
+    # Init lightning model
+    log.info(f"Instantiating model <{config.model._target_}>")
+    model: LightningModule = hydra.utils.instantiate(config.model,
+                                                    optim=config.optim,
+                                                    _recursive_=False,
+                                                    _convert_ = "partial")
+
+    # Init lightning callbacks
+    callbacks: List[Callback] = []
+    if "callbacks" in config:
+        for _, cb_conf in config.callbacks.items():
+            if "_target_" in cb_conf:
+                log.info(f"Instantiating callback <{cb_conf._target_}>")
+                callbacks.append(hydra.utils.instantiate(cb_conf))
+
+    # Init lightning loggers
+    logger: List[LightningLoggerBase] = []
+    if "logger" in config:
+        for _, lg_conf in config.logger.items():
+            if "_target_" in lg_conf:
+                log.info(f"Instantiating logger <{lg_conf._target_}>")
+                logger.append(hydra.utils.instantiate(lg_conf))
+
+    # Init lightning trainer
+    log.info(f"Instantiating trainer <{config.trainer._target_}>")
+    trainer: Trainer = hydra.utils.instantiate(
+        config.trainer,
+        callbacks=callbacks,
+        logger=logger,
+        _convert_="partial")
+
+    # Send some parameters from config to all lightning loggers
+    log.info("Logging hyperparameters!")
+    utils.log_hyperparameters(
+        config=config,
+        model=model,
+        datamodule=datamodule,
+        trainer=trainer,
+        callbacks=callbacks,
+        logger=logger,
+    )
+
+    # Train the model
+    if config.get("train"):
+        log.info("Starting training!")
+        trainer.fit(model=model, datamodule=datamodule)
+
+    # Get metric score for hyperparameter optimization
+    optimized_metric = config.get("optimized_metric")
+    if optimized_metric and optimized_metric not in trainer.callback_metrics:
+        raise Exception(
+            "Metric for hyperparameter optimization not found! "
+            "Make sure the `optimized_metric` in `hparams_search` config is correct!"
+        )
+    score = trainer.callback_metrics.get(optimized_metric)
+
+    # Test the model
+    if config.get("test"):
+        ckpt_path = "best"
+        if not config.get("train") or config.trainer.get("fast_dev_run"):
+            ckpt_path = None
+        log.info("Starting testing!")
+        trainer.test(model=model, datamodule=datamodule, ckpt_path=ckpt_path)
+
+    # Make sure everything closed properly
+    log.info("Finalizing!")
+    utils.finish(
+        config=config,
+        model=model,
+        datamodule=datamodule,
+        trainer=trainer,
+        callbacks=callbacks,
+        logger=logger,
+    )
+
+    # Print path to best checkpoint
+    if not config.trainer.get("fast_dev_run") and config.get("train"):
+        log.info(f"Best model ckpt at {trainer.checkpoint_callback.best_model_path}")
+
+    # Return metric score for hyperparameter optimization
+    return score
diff --git a/pl-hydra/src/utils/__init__.py b/pl-hydra/src/utils/__init__.py
new file mode 100644
index 0000000..b31f39a
--- /dev/null
+++ b/pl-hydra/src/utils/__init__.py
@@ -0,0 +1,164 @@
+import logging
+import warnings
+from typing import List, Sequence
+
+import pytorch_lightning as pl
+import rich.syntax
+import rich.tree
+from omegaconf import DictConfig, OmegaConf
+from pytorch_lightning.utilities import rank_zero_only
+
+
+def get_logger(name=__name__) -> logging.Logger:
+    """Initializes multi-GPU-friendly python command line logger."""
+
+    logger = logging.getLogger(name)
+
+    # this ensures all logging levels get marked with the rank zero decorator
+    # otherwise logs would get multiplied for each GPU process in multi-GPU setup
+    for level in (
+        "debug",
+        "info",
+        "warning",
+        "error",
+        "exception",
+        "fatal",
+        "critical",
+    ):
+        setattr(logger, level, rank_zero_only(getattr(logger, level)))
+
+    return logger
+
+
+log = get_logger(__name__)
+
+
+def extras(config: DictConfig) -> None:
+    """Applies optional utilities, controlled by config flags.
+
+    Utilities:
+    - Ignoring python warnings
+    - Rich config printing
+    """
+
+    # disable python warnings if <config.ignore_warnings=True>
+    if config.get("ignore_warnings"):
+        log.info("Disabling python warnings! <config.ignore_warnings=True>")
+        warnings.filterwarnings("ignore")
+
+    # pretty print config tree using Rich library if <config.print_config=True>
+    if config.get("print_config"):
+        log.info("Printing config tree with Rich! <config.print_config=True>")
+        print_config(config, resolve=True)
+
+
+@rank_zero_only
+def print_config(
+    config: DictConfig,
+    print_order: Sequence[str] = (
+        "datamodule",
+        "model",
+        "callbacks",
+        "logger",
+        "trainer",
+    ),
+    resolve: bool = True,
+) -> None:
+    """Prints content of DictConfig using Rich library and its tree structure.
+
+    Args:
+        config (DictConfig): Configuration composed by Hydra.
+        print_order (Sequence[str], optional): Determines in what order config components are printed.
+        resolve (bool, optional): Whether to resolve reference fields of DictConfig.
+    """
+
+    style = "dim"
+    tree = rich.tree.Tree("CONFIG", style=style, guide_style=style)
+
+    quee = []
+
+    for field in print_order:
+        quee.append(field) if field in config else log.info(f"Field '{field}' not found in config")
+
+    for field in config:
+        if field not in quee:
+            quee.append(field)
+
+    for field in quee:
+        branch = tree.add(field, style=style, guide_style=style)
+
+        config_group = config[field]
+        if isinstance(config_group, DictConfig):
+            branch_content = OmegaConf.to_yaml(config_group, resolve=resolve)
+        else:
+            branch_content = str(config_group)
+
+        branch.add(rich.syntax.Syntax(branch_content, "yaml"))
+
+    rich.print(tree)
+
+    with open("config_tree.log", "w") as file:
+        rich.print(tree, file=file)
+
+
+@rank_zero_only
+def log_hyperparameters(
+    config: DictConfig,
+    model: pl.LightningModule,
+    datamodule: pl.LightningDataModule,
+    trainer: pl.Trainer,
+    callbacks: List[pl.Callback],
+    logger: List[pl.loggers.LightningLoggerBase],
+) -> None:
+    """Controls which config parts are saved by Lightning loggers.
+
+    Additionaly saves:
+    - number of model parameters
+    """
+
+    if not trainer.logger:
+        return
+
+    hparams = {}
+
+    # choose which parts of hydra config will be saved to loggers
+    hparams["model"] = config["model"]
+
+    # save number of model parameters
+    hparams["model/params/total"] = sum(p.numel() for p in model.parameters())
+    hparams["model/params/trainable"] = sum(
+        p.numel() for p in model.parameters() if p.requires_grad
+    )
+    hparams["model/params/non_trainable"] = sum(
+        p.numel() for p in model.parameters() if not p.requires_grad
+    )
+
+    hparams["datamodule"] = config["datamodule"]
+    hparams["trainer"] = config["trainer"]
+
+    if "seed" in config:
+        hparams["seed"] = config["seed"]
+    if "callbacks" in config:
+        hparams["callbacks"] = config["callbacks"]
+    
+    hparams["optim"] = config["optim"]
+    # send hparams to all loggers
+    trainer.logger.log_hyperparams(hparams)
+
+
+def finish(
+    config: DictConfig,
+    model: pl.LightningModule,
+    datamodule: pl.LightningDataModule,
+    trainer: pl.Trainer,
+    callbacks: List[pl.Callback],
+    logger: List[pl.loggers.LightningLoggerBase],
+) -> None:
+    """Makes sure everything closed properly."""
+
+    # without this sweeps with wandb logger might crash!
+    for lg in logger:
+        if isinstance(lg, pl.loggers.wandb.WandbLogger):
+            import wandb
+
+            wandb.finish()
diff --git a/pl-hydra/src/utils/plotter.py b/pl-hydra/src/utils/plotter.py
new file mode 100644
index 0000000..8ef3541
--- /dev/null
+++ b/pl-hydra/src/utils/plotter.py
@@ -0,0 +1,37 @@
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+def plot_cm(cm, name_classes):
+    cm_np = cm.cpu().numpy()
+    df_cm = pd.DataFrame((cm_np/np.sum(cm_np))*10, 
+            index = [i for i in name_classes],
+            columns= [i for i in name_classes])
+    
+    plt.figure(figsize = (10,7))
+    fig_ = sns.heatmap(df_cm, annot=True, cmap=None).get_figure()
+    return fig_
+
+def plot_preds(images, labels, preds, name_classes, nimg=32, ncols=8,
+                data_mean=[], data_std=[]):
+    nrows = nimg//ncols  
+    # define figure
+    fig_, axes=plt.subplots(nrows, ncols, figsize=(12, 8))
+    axes = axes.ravel()
+
+    #print(np.min(images), np.max(images))
+    for i in range(nimg):
+        label_name = name_classes[labels[i]]
+        pred_name = name_classes[preds[i]]
+        image = images[i]
+        image[0] = image[0]*data_std[0] + data_mean[0]
+        image[1] = image[1]*data_std[1] + data_mean[1]
+        image[2] = image[1]*data_std[2] + data_mean[2]
+        #print(np.min(image), np.max(image))
+        image = np.transpose((image*255).astype('uint8'), (1,2,0))
+        axes[i].imshow(image)
+        axes[i].set_title(f'label: {label_name} \n pred: {pred_name}', fontsize=8)
+        axes[i].axis('off')
+    plt.subplots_adjust(hspace=0.2)
+    return fig_ 
\ No newline at end of file
diff --git a/pl-hydra/src/vendor/__init__.py b/pl-hydra/src/vendor/__init__.py
new file mode 100644
index 0000000..203cb9c
--- /dev/null
+++ b/pl-hydra/src/vendor/__init__.py
@@ -0,0 +1 @@
+# use this folder for storing third party code that cannot be installed using pip/conda
diff --git a/pl-hydra/test.py b/pl-hydra/test.py
new file mode 100644
index 0000000..ee02d04
--- /dev/null
+++ b/pl-hydra/test.py
@@ -0,0 +1,26 @@
+import dotenv
+import hydra
+from omegaconf import DictConfig
+
+# load environment variables from `.env` file if it exists
+# recursively searches for `.env` in all folders starting from work dir
+dotenv.load_dotenv(override=True)
+
+
+@hydra.main(config_path="configs/", config_name="test.yaml")
+def main(config: DictConfig):
+
+    # Imports can be nested inside @hydra.main to optimize tab completion
+    # https://github.com/facebookresearch/hydra/issues/934
+    from src import utils
+    from src.testing_pipeline import test
+
+    # Applies optional utilities
+    utils.extras(config)
+
+    # Evaluate model
+    return test(config)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pl-hydra/tests/__init__.py b/pl-hydra/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pl-hydra/tests/helpers/__init__.py b/pl-hydra/tests/helpers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pl-hydra/tests/helpers/module_available.py b/pl-hydra/tests/helpers/module_available.py
new file mode 100644
index 0000000..d3137f3
--- /dev/null
+++ b/pl-hydra/tests/helpers/module_available.py
@@ -0,0 +1,28 @@
+import platform
+from importlib.util import find_spec
+
+"""
+Adapted from:
+    https://github.com/PyTorchLightning/pytorch-lightning/blob/master/pytorch_lightning/utilities/imports.py
+"""
+
+
+def _module_available(module_path: str) -> bool:
+    """Check if a path is available in your environment.
+
+    >>> _module_available('os')
+    True
+    >>> _module_available('bla.bla')
+    False
+    """
+    try:
+        return find_spec(module_path) is not None
+    except ModuleNotFoundError:
+        # Python 3.7+
+        return False
+
+
+_IS_WINDOWS = platform.system() == "Windows"
+_DEEPSPEED_AVAILABLE = not _IS_WINDOWS and _module_available("deepspeed")
+_FAIRSCALE_AVAILABLE = not _IS_WINDOWS and _module_available("fairscale.nn")
+_RPC_AVAILABLE = not _IS_WINDOWS and _module_available("torch.distributed.rpc")
diff --git a/pl-hydra/tests/helpers/run_command.py b/pl-hydra/tests/helpers/run_command.py
new file mode 100644
index 0000000..1670988
--- /dev/null
+++ b/pl-hydra/tests/helpers/run_command.py
@@ -0,0 +1,15 @@
+from typing import List
+
+import pytest
+import sh
+
+
+def run_command(command: List[str]):
+    """Default method for executing shell commands with pytest."""
+    msg = None
+    try:
+        sh.python(command)
+    except sh.ErrorReturnCode as e:
+        msg = e.stderr.decode()
+    if msg:
+        pytest.fail(msg=msg)
diff --git a/pl-hydra/tests/helpers/runif.py b/pl-hydra/tests/helpers/runif.py
new file mode 100644
index 0000000..36d73e1
--- /dev/null
+++ b/pl-hydra/tests/helpers/runif.py
@@ -0,0 +1,104 @@
+import sys
+from typing import Optional
+
+import pytest
+import torch
+from packaging.version import Version
+from pkg_resources import get_distribution
+
+"""
+Adapted from:
+    https://github.com/PyTorchLightning/pytorch-lightning/blob/master/tests/helpers/runif.py
+"""
+
+from tests.helpers.module_available import (
+    _DEEPSPEED_AVAILABLE,
+    _FAIRSCALE_AVAILABLE,
+    _IS_WINDOWS,
+    _RPC_AVAILABLE,
+)
+
+
+class RunIf:
+    """RunIf wrapper for conditional skipping of tests.
+
+    Fully compatible with `@pytest.mark`.
+
+    Example:
+
+        @RunIf(min_torch="1.8")
+        @pytest.mark.parametrize("arg1", [1.0, 2.0])
+        def test_wrapper(arg1):
+            assert arg1 > 0
+    """
+
+    def __new__(
+        self,
+        min_gpus: int = 0,
+        min_torch: Optional[str] = None,
+        max_torch: Optional[str] = None,
+        min_python: Optional[str] = None,
+        skip_windows: bool = False,
+        rpc: bool = False,
+        fairscale: bool = False,
+        deepspeed: bool = False,
+        **kwargs,
+    ):
+        """
+        Args:
+            min_gpus: min number of gpus required to run test
+            min_torch: minimum pytorch version to run test
+            max_torch: maximum pytorch version to run test
+            min_python: minimum python version required to run test
+            skip_windows: skip test for Windows platform
+            rpc: requires Remote Procedure Call (RPC)
+            fairscale: if `fairscale` module is required to run the test
+            deepspeed: if `deepspeed` module is required to run the test
+            kwargs: native pytest.mark.skipif keyword arguments
+        """
+        conditions = []
+        reasons = []
+
+        if min_gpus:
+            conditions.append(torch.cuda.device_count() < min_gpus)
+            reasons.append(f"GPUs>={min_gpus}")
+
+        if min_torch:
+            torch_version = get_distribution("torch").version
+            conditions.append(Version(torch_version) < Version(min_torch))
+            reasons.append(f"torch>={min_torch}")
+
+        if max_torch:
+            torch_version = get_distribution("torch").version
+            conditions.append(Version(torch_version) >= Version(max_torch))
+            reasons.append(f"torch<{max_torch}")
+
+        if min_python:
+            py_version = (
+                f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
+            )
+            conditions.append(Version(py_version) < Version(min_python))
+            reasons.append(f"python>={min_python}")
+
+        if skip_windows:
+            conditions.append(_IS_WINDOWS)
+            reasons.append("does not run on Windows")
+
+        if rpc:
+            conditions.append(not _RPC_AVAILABLE)
+            reasons.append("RPC")
+
+        if fairscale:
+            conditions.append(not _FAIRSCALE_AVAILABLE)
+            reasons.append("Fairscale")
+
+        if deepspeed:
+            conditions.append(not _DEEPSPEED_AVAILABLE)
+            reasons.append("Deepspeed")
+
+        reasons = [rs for cond, rs in zip(conditions, reasons) if cond]
+        return pytest.mark.skipif(
+            condition=any(conditions),
+            reason=f"Requires: [{' + '.join(reasons)}]",
+            **kwargs,
+        )
diff --git a/pl-hydra/tests/shell/__init__.py b/pl-hydra/tests/shell/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pl-hydra/tests/shell/test_basic_commands.py b/pl-hydra/tests/shell/test_basic_commands.py
new file mode 100644
index 0000000..f708ede
--- /dev/null
+++ b/pl-hydra/tests/shell/test_basic_commands.py
@@ -0,0 +1,58 @@
+import pytest
+
+from tests.helpers.run_command import run_command
+from tests.helpers.runif import RunIf
+
+"""
+A couple of sanity checks to make sure the model doesn't crash with different running options.
+"""
+
+
+def test_fast_dev_run():
+    """Test running for 1 train, val and test batch."""
+    command = ["train.py", "++trainer.fast_dev_run=true"]
+    run_command(command)
+
+
+@pytest.mark.slow
+def test_cpu():
+    """Test running 1 epoch on CPU."""
+    command = ["train.py", "++trainer.max_epochs=1", "++trainer.gpus=0"]
+    run_command(command)
+
+
+# use RunIf to skip execution of some tests, e.g. when no gpus are available
+@RunIf(min_gpus=1)
+@pytest.mark.slow
+def test_gpu():
+    """Test running 1 epoch on GPU."""
+    command = [
+        "train.py",
+        "++trainer.max_epochs=1",
+        "++trainer.gpus=1",
+    ]
+    run_command(command)
+
+
+@RunIf(min_gpus=1)
+@pytest.mark.slow
+def test_mixed_precision():
+    """Test running 1 epoch with pytorch native automatic mixed precision (AMP)."""
+    command = [
+        "train.py",
+        "++trainer.max_epochs=1",
+        "++trainer.gpus=1",
+        "++trainer.precision=16",
+    ]
+    run_command(command)
+
+
+@pytest.mark.slow
+def test_double_validation_loop():
+    """Test running 1 epoch with validation loop twice per epoch."""
+    command = [
+        "train.py",
+        "++trainer.max_epochs=1",
+        "++trainer.val_check_interval=0.5",
+    ]
+    run_command(command)
diff --git a/pl-hydra/tests/shell/test_debug_configs.py b/pl-hydra/tests/shell/test_debug_configs.py
new file mode 100644
index 0000000..a73dda8
--- /dev/null
+++ b/pl-hydra/tests/shell/test_debug_configs.py
@@ -0,0 +1,35 @@
+import pytest
+
+from tests.helpers.run_command import run_command
+
+
+@pytest.mark.slow
+def test_debug_default():
+    command = ["train.py", "debug=default"]
+    run_command(command)
+
+
+def test_debug_limit_batches():
+    command = ["train.py", "debug=limit_batches"]
+    run_command(command)
+
+
+def test_debug_overfit():
+    command = ["train.py", "debug=overfit"]
+    run_command(command)
+
+
+@pytest.mark.slow
+def test_debug_profiler():
+    command = ["train.py", "debug=profiler"]
+    run_command(command)
+
+
+def test_debug_step():
+    command = ["train.py", "debug=step"]
+    run_command(command)
+
+
+def test_debug_test_only():
+    command = ["train.py", "debug=test_only"]
+    run_command(command)
diff --git a/pl-hydra/tests/shell/test_sweeps.py b/pl-hydra/tests/shell/test_sweeps.py
new file mode 100644
index 0000000..10a298d
--- /dev/null
+++ b/pl-hydra/tests/shell/test_sweeps.py
@@ -0,0 +1,44 @@
+import pytest
+
+from tests.helpers.run_command import run_command
+
+"""
+A couple of tests executing hydra sweeps.
+
+Use the following command to skip slow tests:
+    pytest -k "not slow"
+"""
+
+
+@pytest.mark.slow
+def test_experiments():
+    """Test running all available experiment configs for 1 epoch."""
+    command = ["train.py", "-m", "experiment=glob(*)", "++trainer.max_epochs=1"]
+    run_command(command)
+
+
+@pytest.mark.slow
+def test_default_sweep():
+    """Test default Hydra sweeper."""
+    command = [
+        "train.py",
+        "-m",
+        "datamodule.batch_size=64,128",
+        "model.lr=0.01,0.02",
+        "trainer=default",
+        "++trainer.fast_dev_run=true",
+    ]
+    run_command(command)
+
+
+@pytest.mark.slow
+def test_optuna_sweep():
+    """Test Optuna sweeper."""
+    command = [
+        "train.py",
+        "-m",
+        "hparams_search=mnist_optuna",
+        "trainer=default",
+        "++trainer.fast_dev_run=true",
+    ]
+    run_command(command)
diff --git a/pl-hydra/tests/unit/__init__.py b/pl-hydra/tests/unit/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pl-hydra/tests/unit/test_mnist_datamodule.py b/pl-hydra/tests/unit/test_mnist_datamodule.py
new file mode 100644
index 0000000..91e6182
--- /dev/null
+++ b/pl-hydra/tests/unit/test_mnist_datamodule.py
@@ -0,0 +1,36 @@
+import os
+
+import pytest
+import torch
+
+from src.datamodules.mnist_datamodule import MNISTDataModule
+
+
+@pytest.mark.parametrize("batch_size", [32, 128])
+def test_mnist_datamodule(batch_size):
+    datamodule = MNISTDataModule(batch_size=batch_size)
+    datamodule.prepare_data()
+
+    assert not datamodule.data_train and not datamodule.data_val and not datamodule.data_test
+
+    assert os.path.exists(os.path.join("data", "MNIST"))
+    assert os.path.exists(os.path.join("data", "MNIST", "raw"))
+
+    datamodule.setup()
+
+    assert datamodule.data_train and datamodule.data_val and datamodule.data_test
+    assert (
+        len(datamodule.data_train) + len(datamodule.data_val) + len(datamodule.data_test) == 70_000
+    )
+
+    assert datamodule.train_dataloader()
+    assert datamodule.val_dataloader()
+    assert datamodule.test_dataloader()
+
+    batch = next(iter(datamodule.train_dataloader()))
+    x, y = batch
+
+    assert len(x) == batch_size
+    assert len(y) == batch_size
+    assert x.dtype == torch.float32
+    assert y.dtype == torch.int64
diff --git a/pl-hydra/train.py b/pl-hydra/train.py
new file mode 100644
index 0000000..05b78f3
--- /dev/null
+++ b/pl-hydra/train.py
@@ -0,0 +1,33 @@
+'''
+Modified from https://github.com/phlippe/uvadlc_notebooks.git
+'''
+
+import dotenv
+import hydra
+from omegaconf import DictConfig
+import os
+# Set the visible GPUs (curent machine has 16 GPUS [0-15])
+os.environ["CUDA_VISIBLE_DEVICES"]="11"
+
+# load environment variables from `.env` file if it exists
+# recursively searches for `.env` in all folders starting from work dir
+dotenv.load_dotenv(override=True)
+
+
+@hydra.main(config_path="configs/", config_name="train.yaml")
+def main(config: DictConfig):
+
+    # Imports can be nested inside @hydra.main to optimize tab completion
+    # https://github.com/facebookresearch/hydra/issues/934
+    from src import utils
+    from src.training_pipeline import train
+
+    # Applies optional utilities
+    utils.extras(config)
+
+    # Train model
+    return train(config)
+
+
+if __name__ == "__main__":
+    main()