diff --git a/.github/workflows/cpu-tests.yml b/.github/workflows/cpu-tests.yml
index 48f0a456c..48dc91a34 100644
--- a/.github/workflows/cpu-tests.yml
+++ b/.github/workflows/cpu-tests.yml
@@ -35,6 +35,8 @@ jobs:
     steps:
       - name: Check out repository
         uses: actions/checkout@v4
+        with:
+          fetch-depth: '1'
 
       - name: Set environment variables
         run: |
@@ -62,7 +64,7 @@ jobs:
           fi
 
       - name: Pull latest image from container registry
-        run: docker pull $IMAGE_REPO/$IMAGE_NAME || true
+        run: docker pull $IMAGE_REPO/$IMAGE_NAME --quiet || true
 
       - name: Build temporary Docker image
         run: |
@@ -131,12 +133,12 @@ jobs:
 
       - name: "Prepare environment: Load Docker image from cache"
         if: env.DOCKER_TAG != 'latest'
-        run: docker load -i $DOCKER_CACHE_PATH/docker-image.tar.gz
+        run: docker load -i $DOCKER_CACHE_PATH/docker-image.tar.gz --quiet
 
       - name: "Prepare environment: Pull latest image from container registry"
         if: env.DOCKER_TAG == 'latest'
         run: |
-          docker pull $IMAGE_REPO/$IMAGE_NAME:latest
+          docker pull $IMAGE_REPO/$IMAGE_NAME:latest --quiet
           docker image tag $IMAGE_REPO/$IMAGE_NAME:latest $IMAGE_NAME:latest
 
       - name: "Prepare environment: Run Docker container"
@@ -155,6 +157,8 @@ jobs:
 
       - name: Check out repository (mala)
         uses: actions/checkout@v4
+        with:
+          fetch-depth: '1'
 
       - name: Install mala package
         # Exec all commands inside the mala-cpu container
@@ -174,7 +178,13 @@ jobs:
 
           # if comparison fails, `install/mala_cpu_[base]_environment.yml` needs to be aligned with
           # `requirements.txt` and/or extra dependencies are missing in the Docker Conda environment
-          diff --side-by-side --color=always env_before.yml env_after.yml
+
+          if diff --brief env_before.yml env_after.yml
+          then
+            echo "Files env_before.yml and env_after.yml do not differ."
+          else
+            diff --side-by-side --color-always env_before.yml env_after.yml
+          fi
 
       - name: Download test data repository from RODARE
         shell: 'bash -c "docker exec -i mala-cpu python < {0}"'
@@ -229,9 +239,6 @@ jobs:
       ((contains(github.ref_name, 'develop') || contains(github.ref_name, 'master')) && needs.build-docker-image-cpu.outputs.docker-tag != 'latest')
       || startsWith(github.ref, 'refs/tags/')
     steps:
-      - name: Check out repository
-        uses: actions/checkout@v4
-
       - name: "Prepare environment: Restore cache"
         if: env.DOCKER_TAG != 'latest'
         uses: actions/cache@v4
@@ -242,21 +249,19 @@ jobs:
 
       - name: "Prepare environment: Load Docker image from cache"
         if: env.DOCKER_TAG != 'latest'
-        run: docker load -i $DOCKER_CACHE_PATH/docker-image.tar.gz
+        run: docker load -i $DOCKER_CACHE_PATH/docker-image.tar.gz --quiet
 
       - name: "Prepare environment: Pull latest image from container registry"
         if: env.DOCKER_TAG == 'latest'
-        run: docker pull $IMAGE_REPO/$IMAGE_NAME:latest
+        run: docker pull $IMAGE_REPO/$IMAGE_NAME:latest --quiet
 
       - name: Tag Docker image
         run: |
           # Execute on change of Docker image
           if [[ "$DOCKER_TAG" != 'latest' ]]; then
-            GIT_SHA=${GITHUB_REF_NAME}-$(git rev-parse --short "$GITHUB_SHA")
-            echo "GIT_SHA=$GIT_SHA"
 
             docker tag $IMAGE_NAME:$GITHUB_RUN_ID $IMAGE_REPO/$IMAGE_NAME:latest
-            docker tag $IMAGE_NAME:$GITHUB_RUN_ID $IMAGE_REPO/$IMAGE_NAME:$GIT_SHA
+            docker tag $IMAGE_NAME:$GITHUB_RUN_ID $IMAGE_REPO/$IMAGE_NAME:${GITHUB_REF_NAME}-${GITHUB_SHA:0:7}
           fi
 
           # Execute on push of git tag
@@ -272,4 +277,4 @@ jobs:
         run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
 
       - name: Push Docker image
-        run: docker push $IMAGE_REPO/$IMAGE_NAME --all-tags
+        run: docker push $IMAGE_REPO/$IMAGE_NAME --all-tags | grep -v -E 'Waiting|Layer already|Preparing|Pushed'
diff --git a/docs/source/advanced_usage/hyperparameters.rst b/docs/source/advanced_usage/hyperparameters.rst
index 4240250e7..5c0665b44 100644
--- a/docs/source/advanced_usage/hyperparameters.rst
+++ b/docs/source/advanced_usage/hyperparameters.rst
@@ -114,7 +114,7 @@ a physical validation metric such as
 
       .. code-block:: python
 
-            parameters.running.after_before_training_metric = "band_energy"
+            parameters.running.after_training_metric = "band_energy"
 
 Advanced optimization algorithms
 ********************************
diff --git a/docs/source/advanced_usage/predictions.rst b/docs/source/advanced_usage/predictions.rst
index 7058f17de..20e82494b 100644
--- a/docs/source/advanced_usage/predictions.rst
+++ b/docs/source/advanced_usage/predictions.rst
@@ -40,6 +40,8 @@ Likewise, you can adjust the inference temperature via
             calculator.data_handler.target_calculator.temperature = ...
 
 
+.. _production_gpu:
+
 Predictions on GPU
 *******************
 
@@ -137,4 +139,3 @@ With the exception of the electronic density, which is saved into the ``.cube``
 format for visualization with regular electronic structure visualization
 software, all of these observables can be plotted with Python based
 visualization libraries such as ``matplotlib``.
-
diff --git a/docs/source/advanced_usage/trainingmodel.rst b/docs/source/advanced_usage/trainingmodel.rst
index 52e50ec50..290aa15f3 100644
--- a/docs/source/advanced_usage/trainingmodel.rst
+++ b/docs/source/advanced_usage/trainingmodel.rst
@@ -77,7 +77,7 @@ Specifically, when setting
 
       .. code-block:: python
 
-            parameters.running.after_before_training_metric = "band_energy"
+            parameters.running.after_training_metric = "band_energy"
 
 the error in the band energy between actual and predicted LDOS will be
 calculated and printed before and after network training (in meV/atom).
@@ -205,21 +205,21 @@ visualization prior to training via
 
             # 0: No visualizatuon, 1: loss and learning rate, 2: like 1,
             # but additionally weights and biases are saved
-            parameters.running.visualisation = 1
-            parameters.running.visualisation_dir = "mala_vis"
+            parameters.running.logging = 1
+            parameters.running.logging_dir = "mala_vis"
 
-where ``visualisation_dir`` specifies some directory in which to save the
-MALA visualization data. Afterwards, you can run the training without any
+where ``logging_dir`` specifies some directory in which to save the
+MALA logging data. Afterwards, you can run the training without any
 other modifications. Once training is finished (or during training, in case
 you want to use tensorboard to monitor progress), you can launch tensorboard
 via
 
       .. code-block:: bash
 
-            tensorboard --logdir path_to_visualization
+            tensorboard --logdir path_to_log_directory
 
-The full path for ``path_to_visualization`` can be accessed via
-``trainer.full_visualization_path``.
+The full path for ``path_to_log_directory`` can be accessed via
+``trainer.full_logging_path``.
 
 
 Training in parallel
diff --git a/docs/source/basic_usage/hyperparameters.rst b/docs/source/basic_usage/hyperparameters.rst
index 11742932d..d10bb440e 100644
--- a/docs/source/basic_usage/hyperparameters.rst
+++ b/docs/source/basic_usage/hyperparameters.rst
@@ -118,9 +118,9 @@ properties of the ``Parameters`` class:
        during the optimization.
      - ``network.layer_sizes``
      - ``"int"``, ``"categorical"``
-   * - ``"trainingtype"``
+   * - ``"optimizer"``
      - Optimization algorithm used during the NN optimization.
-     - ``running.trainingtype``
+     - ``running.optimizer``
      - ``"categorical"``
    * - ``"mini_batch_size"``
      - Size of the mini batches used to calculate the gradient during
diff --git a/docs/source/basic_usage/trainingmodel.rst b/docs/source/basic_usage/trainingmodel.rst
index 3995865e6..e6bc8c967 100644
--- a/docs/source/basic_usage/trainingmodel.rst
+++ b/docs/source/basic_usage/trainingmodel.rst
@@ -35,7 +35,7 @@ options to train a simple network with example data, namely
             parameters.running.max_number_epochs = 100
             parameters.running.mini_batch_size = 40
             parameters.running.learning_rate = 0.00001
-            parameters.running.trainingtype = "Adam"
+            parameters.running.optimizer = "Adam"
             parameters.verbosity = 1 # level of output; 1 is standard, 0 is low, 2 is debug.
 
 Here, we can see that the ``Parameters`` object contains multiple
diff --git a/docs/source/install/installing_lammps.rst b/docs/source/install/installing_lammps.rst
index 50fb41cef..28affb950 100644
--- a/docs/source/install/installing_lammps.rst
+++ b/docs/source/install/installing_lammps.rst
@@ -41,18 +41,24 @@ The MALA team recommends to build LAMMPS with ``cmake``. To do so
       * ``Kokkos_ARCH_GPUARCH=???``: Your GPU architecture (see see `Kokkos instructions <https://docs.lammps.org/Build_extras.html#kokkos-package>`_)
       * ``CMAKE_CXX_COMPILER=???``: Path to the ``nvcc_wrapper`` executable
         shipped with the LAMMPS code, should be at ``/your/path/to/lammps/lib/kokkos/bin/nvcc_wrapper``
-* For example, this configures the LAMMPS cmake build with Kokkos support
-  for an Intel Haswell CPU and an Nvidia Volta GPU, with MPI support:
+
+    For example, this configures the LAMMPS cmake build with Kokkos support
+    for an Intel Haswell CPU and an Nvidia Volta GPU, with MPI support:
 
       .. code-block:: bash
 
             cmake ../cmake -D PKG_KOKKOS=yes -D BUILD_MPI=yes -D PKG_ML-SNAP=yes -D Kokkos_ENABLE_CUDA=yes -D Kokkos_ARCH_HSW=yes -D Kokkos_ARCH_VOLTA70=yes -D CMAKE_CXX_COMPILER=/path/to/lammps/lib/kokkos/bin/nvcc_wrapper -D BUILD_SHARED_LIBS=yes
 
+   .. note::
+      When using a GPU by setting ``parameters.use_gpu = True``, you *need* to
+      have a GPU version of ``LAMMPS`` installed. See :ref:`production_gpu` for
+      details.
 
 * Build the library and executable with ``cmake --build .``
   (Add ``--parallel=8`` for a faster build)
 
 
+
 Installing the Python extension
 ********************************
 
diff --git a/docs/source/install/installing_qe.rst b/docs/source/install/installing_qe.rst
index 3b426ba48..9ff514c7a 100644
--- a/docs/source/install/installing_qe.rst
+++ b/docs/source/install/installing_qe.rst
@@ -4,24 +4,25 @@ Installing Quantum ESPRESSO (total energy module)
 Prerequisites
 *************
 
-To run the total energy module, you need a full Quantum ESPRESSO installation,
-for which to install the Python bindings. This module has been tested with
-version ``7.2.``, the most recent version at the time of this release of MALA.
-Newer versions may work (untested), but installation instructions may vary.
+To build and run the total energy module, you need a full Quantum ESPRESSO
+installation, for which to install the Python bindings. This module has been
+tested with version ``7.2.``, the most recent version at the time of this
+release of MALA. Newer versions may work (untested), but installation
+instructions may vary.
 
 Make sure you have an (MPI-aware) F90 compiler such as ``mpif90`` (e.g.
 Debian-ish machine: ``apt install openmpi-bin``, on an HPC cluster something
 like ``module load openmpi gcc``). Make sure to use the same compiler
 for QE and the extension. This should be the default case, but if problems
 arise you can manually select the compiler via
-``--f90exec=`` in ``build_total_energy_energy_module.sh``
+``--f90exec=`` in ``build_total_energy_module.sh``
 
 We assume that QE's ``configure`` script will find your system libs, e.g. use
 ``-lblas``, ``-llapack`` and ``-lfftw3``. We use those by default in
-``build_total_energy_energy_module.sh``. If you have, say, the MKL library,
+``build_total_energy_module.sh``. If you have, say, the MKL library,
 you may see ``configure`` use something like ``-lmkl_intel_lp64 -lmkl_sequential -lmkl_core``
 when building QE. In this case you have to modify
-``build_total_energy_energy_module.sh`` to use the same libraries!
+``build_total_energy_module.sh`` to use the same libraries!
 
 Build Quantum ESPRESSO
 **********************
@@ -35,10 +36,16 @@ Build Quantum ESPRESSO
 * Change to the  ``external_modules/total_energy_module`` directory of the
   MALA repository
 
+.. note::
+   At the moment, building QE using ``cmake`` `doesn't work together with the
+   build_total_energy_module.sh script
+   <https://github.com/mala-project/mala/issues/468>`_. Please use the
+   ``configure`` + ``make`` build workflow.
+
 Installing the Python extension
 ********************************
 
-* Run ``build_total_energy_energy_module.sh /path/to/your/q-e``.
+* Run ``build_total_energy_module.sh /path/to/your/q-e``.
 
   * If the build is successful, a file named something like
     ``total_energy.cpython-39m-x86_64-linux-gnu.so`` will be generated. This is
diff --git a/examples/advanced/ex01_checkpoint_training.py b/examples/advanced/ex01_checkpoint_training.py
index 01bb9b486..5222a5232 100644
--- a/examples/advanced/ex01_checkpoint_training.py
+++ b/examples/advanced/ex01_checkpoint_training.py
@@ -26,7 +26,7 @@ def initial_setup():
     parameters.running.max_number_epochs = 9
     parameters.running.mini_batch_size = 8
     parameters.running.learning_rate = 0.00001
-    parameters.running.trainingtype = "Adam"
+    parameters.running.optimizer = "Adam"
 
     # We checkpoint the training every 5 epochs and save the results
     # as "ex07".
diff --git a/examples/advanced/ex03_tensor_board.py b/examples/advanced/ex03_tensor_board.py
index b15239495..43a066aaf 100644
--- a/examples/advanced/ex03_tensor_board.py
+++ b/examples/advanced/ex03_tensor_board.py
@@ -18,7 +18,7 @@
 parameters.running.max_number_epochs = 100
 parameters.running.mini_batch_size = 40
 parameters.running.learning_rate = 0.001
-parameters.running.trainingtype = "Adam"
+parameters.running.optimizer = "Adam"
 
 # Turn the visualization on and select a folder to save the visualization
 # files into.
@@ -45,6 +45,6 @@
 trainer.train_network()
 printout(
     'Run finished, launch tensorboard with "tensorboard --logdir '
-    + trainer.full_visualization_path
+    + trainer.full_logging_path
     + '"'
 )
diff --git a/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py b/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
index cef7c8f4f..99a92fa35 100644
--- a/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
+++ b/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
@@ -21,7 +21,7 @@ def initial_setup():
     parameters.running.max_number_epochs = 10
     parameters.running.mini_batch_size = 40
     parameters.running.learning_rate = 0.00001
-    parameters.running.trainingtype = "Adam"
+    parameters.running.optimizer = "Adam"
     parameters.hyperparameters.n_trials = 9
     parameters.hyperparameters.checkpoints_each_trial = 5
     parameters.hyperparameters.checkpoint_name = "ex05_checkpoint"
diff --git a/examples/advanced/ex06_distributed_hyperparameter_optimization.py b/examples/advanced/ex06_distributed_hyperparameter_optimization.py
index b34f9bb8b..215dd1ab2 100644
--- a/examples/advanced/ex06_distributed_hyperparameter_optimization.py
+++ b/examples/advanced/ex06_distributed_hyperparameter_optimization.py
@@ -28,7 +28,7 @@
 parameters.running.max_number_epochs = 5
 parameters.running.mini_batch_size = 40
 parameters.running.learning_rate = 0.00001
-parameters.running.trainingtype = "Adam"
+parameters.running.optimizer = "Adam"
 parameters.hyperparameters.n_trials = 10
 parameters.hyperparameters.checkpoints_each_trial = -1
 parameters.hyperparameters.checkpoint_name = "ex06"
@@ -44,7 +44,7 @@
 parameters.targets.ldos_gridspacing_ev = 2.5
 parameters.targets.ldos_gridoffset_ev = -5
 parameters.hyperparameters.number_training_per_trial = 3
-parameters.running.after_before_training_metric = "band_energy"
+parameters.running.after_training_metric = "band_energy"
 
 data_handler = mala.DataHandler(parameters)
 
diff --git a/examples/advanced/ex07_advanced_hyperparameter_optimization.py b/examples/advanced/ex07_advanced_hyperparameter_optimization.py
index 8165ef01e..242ffd7dd 100644
--- a/examples/advanced/ex07_advanced_hyperparameter_optimization.py
+++ b/examples/advanced/ex07_advanced_hyperparameter_optimization.py
@@ -21,7 +21,7 @@ def optimize_hyperparameters(hyper_optimizer):
     parameters.running.max_number_epochs = 10
     parameters.running.mini_batch_size = 40
     parameters.running.learning_rate = 0.00001
-    parameters.running.trainingtype = "Adam"
+    parameters.running.optimizer = "Adam"
     parameters.hyperparameters.n_trials = 8
     parameters.hyperparameters.hyper_opt_method = hyper_optimizer
 
@@ -64,7 +64,7 @@ def optimize_hyperparameters(hyper_optimizer):
         data_handler.output_dimension,
     ]
     hyperoptimizer.add_hyperparameter(
-        "categorical", "trainingtype", choices=["Adam", "SGD"]
+        "categorical", "optimizer", choices=["Adam", "SGD"]
     )
     hyperoptimizer.add_hyperparameter(
         "categorical", "layer_activation_00", choices=["ReLU", "Sigmoid"]
diff --git a/examples/basic/ex01_train_network.py b/examples/basic/ex01_train_network.py
index 95eb2d51b..1eca8c6b7 100644
--- a/examples/basic/ex01_train_network.py
+++ b/examples/basic/ex01_train_network.py
@@ -28,7 +28,7 @@
 parameters.running.max_number_epochs = 100
 parameters.running.mini_batch_size = 40
 parameters.running.learning_rate = 0.00001
-parameters.running.trainingtype = "Adam"
+parameters.running.optimizer = "Adam"
 # These parameters characterize how the LDOS and bispectrum descriptors
 # were calculated. They are _technically_ not needed to train a simple
 # network. However, it is useful to define them prior to training. Then,
diff --git a/examples/basic/ex02_test_network.py b/examples/basic/ex02_test_network.py
index 2e4b8953c..0d90dfe7f 100644
--- a/examples/basic/ex02_test_network.py
+++ b/examples/basic/ex02_test_network.py
@@ -21,15 +21,15 @@
 # It is recommended to enable the "lazy-loading" feature, so that
 # data is loaded into memory one snapshot at a time during testing - this
 # helps keep RAM requirement down. Furthermore, you have to decide which
-# observables to test (usual choices are "band_energy", "total_energy" and
-# "number_of_electrons") and whether you want the results per snapshot
+# observables to test (usual choices are "band_energy", "total_energy")
+# and whether you want the results per snapshot
 # (output_format="list") or as an averaged value (output_format="mae")
 ####################
 
 parameters, network, data_handler, tester = mala.Tester.load_run(
     run_name=model_name, path=model_path
 )
-tester.observables_to_test = ["band_energy", "number_of_electrons"]
+tester.observables_to_test = ["band_energy", "density"]
 tester.output_format = "list"
 parameters.data.use_lazy_loading = True
 
diff --git a/examples/basic/ex04_hyperparameter_optimization.py b/examples/basic/ex04_hyperparameter_optimization.py
index 4c68179c2..cebb4c42e 100644
--- a/examples/basic/ex04_hyperparameter_optimization.py
+++ b/examples/basic/ex04_hyperparameter_optimization.py
@@ -22,7 +22,7 @@
 parameters.data.output_rescaling_type = "normal"
 parameters.running.max_number_epochs = 20
 parameters.running.mini_batch_size = 40
-parameters.running.trainingtype = "Adam"
+parameters.running.optimizer = "Adam"
 parameters.hyperparameters.n_trials = 20
 
 ####################
diff --git a/mala/common/parameters.py b/mala/common/parameters.py
index 51e5ac937..c9b1b826c 100644
--- a/mala/common/parameters.py
+++ b/mala/common/parameters.py
@@ -733,7 +733,7 @@ def __init__(self):
         self.learning_rate_decay = 0.1
         self.learning_rate_patience = 0
         self._during_training_metric = "ldos"
-        self._after_before_training_metric = "ldos"
+        self._after_training_metric = "ldos"
         self.use_compression = False
         self.num_workers = 0
         self.use_shuffling_for_samplers = True
@@ -755,7 +755,7 @@ def __init__(self):
     def _update_ddp(self, new_ddp):
         super(ParametersRunning, self)._update_ddp(new_ddp)
         self.during_training_metric = self.during_training_metric
-        self.after_before_training_metric = self.after_before_training_metric
+        self.after_training_metric = self.after_training_metric
 
     @property
     def during_training_metric(self):
@@ -783,7 +783,7 @@ def during_training_metric(self, value):
         self._during_training_metric = value
 
     @property
-    def after_before_training_metric(self):
+    def after_training_metric(self):
         """
         Get the metric used during training.
 
@@ -795,17 +795,17 @@ def after_before_training_metric(self):
         DFT results. Of these, the mean average error in eV/atom will be
         calculated.
         """
-        return self._after_before_training_metric
+        return self._after_training_metric
 
-    @after_before_training_metric.setter
-    def after_before_training_metric(self, value):
+    @after_training_metric.setter
+    def after_training_metric(self, value):
         if value != "ldos":
             if self._configuration["ddp"]:
                 raise Exception(
                     "Currently, MALA can only operate with the "
                     '"ldos" metric for ddp runs.'
                 )
-        self._after_before_training_metric = value
+        self._after_training_metric = value
 
     @during_training_metric.setter
     def during_training_metric(self, value):
diff --git a/mala/network/hyper_opt_naswot.py b/mala/network/hyper_opt_naswot.py
index ae27f7d13..9a11e1ca0 100644
--- a/mala/network/hyper_opt_naswot.py
+++ b/mala/network/hyper_opt_naswot.py
@@ -39,7 +39,7 @@ def __init__(self, params, data):
         self.trial_list = None
         self.ignored_hyperparameters = [
             "learning_rate",
-            "trainingtype",
+            "optimizer",
             "mini_batch_size",
             "early_stopping_epochs",
             "learning_rate_patience",
diff --git a/mala/network/objective_base.py b/mala/network/objective_base.py
index 52d0d9464..2fbf29503 100644
--- a/mala/network/objective_base.py
+++ b/mala/network/objective_base.py
@@ -231,8 +231,8 @@ def parse_trial_optuna(self, trial: Trial):
                         turned_off_layers.append(layer_counter)
                     layer_counter += 1
 
-            elif "trainingtype" == par.name:
-                self.params.running.trainingtype = par.get_parameter(trial)
+            elif "optimizer" == par.name:
+                self.params.running.optimizer = par.get_parameter(trial)
 
             elif "mini_batch_size" == par.name:
                 self.params.running.mini_batch_size = par.get_parameter(trial)
@@ -358,8 +358,8 @@ def parse_trial_oat(self, trial):
                         turned_off_layers.append(layer_counter)
                     layer_counter += 1
 
-            elif "trainingtype" == par.name:
-                self.params.running.trainingtype = par.get_parameter(
+            elif "optimizer" == par.name:
+                self.params.running.optimizer = par.get_parameter(
                     trial, factor_idx
                 )
             elif "mini_batch_size" == par.name:
diff --git a/mala/network/runner.py b/mala/network/runner.py
index f62bd2b9c..beb7c6c17 100644
--- a/mala/network/runner.py
+++ b/mala/network/runner.py
@@ -12,6 +12,7 @@
 import mala
 from mala.common.parallelizer import get_rank
 from mala.common.parameters import ParametersRunning
+from mala.datahandling.fast_tensor_dataset import FastTensorDataset
 from mala.network.network import Network
 from mala.datahandling.data_scaler import DataScaler
 from mala.datahandling.data_handler import DataHandler
@@ -78,38 +79,21 @@ def _calculate_errors(
         non_energy_metrics = [
             metric for metric in metrics if "energy" not in metric
         ]
-        errors = self._calculate_energy_errors(
-            actual_outputs, predicted_outputs, energy_metrics, snapshot_number
-        )
+        if len(energy_metrics) > 0:
+            errors = self._calculate_energy_errors(
+                actual_outputs,
+                predicted_outputs,
+                energy_metrics,
+                snapshot_number,
+            )
+        else:
+            errors = {}
         for metric in non_energy_metrics:
             try:
                 if metric == "ldos":
                     error = np.mean((predicted_outputs - actual_outputs) ** 2)
                     errors[metric] = error
 
-                elif metric == "number_of_electrons":
-                    target_calculator = self.data.target_calculator
-                    if (
-                        not isinstance(target_calculator, LDOS)
-                        and not isinstance(target_calculator, DOS)
-                        and not isinstance(target_calculator, Density)
-                    ):
-                        raise Exception(
-                            "Cannot calculate the band energy from this observable."
-                        )
-                    target_calculator.read_additional_calculation_data(
-                        self.data.get_snapshot_calculation_output(
-                            snapshot_number
-                        )
-                    )
-                    actual = target_calculator.get_number_of_electrons(
-                        actual_outputs
-                    )
-                    predicted = target_calculator.get_number_of_electrons(
-                        predicted_outputs
-                    )
-                    errors[metric] = actual - predicted
-
                 elif metric == "density":
                     target_calculator = self.data.target_calculator
                     if not isinstance(
@@ -179,7 +163,7 @@ def _calculate_errors(
 
                     errors[metric] = np.abs(actual - predicted).mean()
 
-                elif metric == "dos_realtive":
+                elif metric == "dos_relative":
                     target_calculator = self.data.target_calculator
                     if not isinstance(
                         target_calculator, LDOS
@@ -211,9 +195,11 @@ def _calculate_errors(
                         ).mean()
                         * 100
                     )
+                else:
+                    raise Exception(f"Invalid metric ({metric}) requested.")
             except ValueError as e:
                 printout(
-                    f"Error calculating observable: {observable} for snapshot {snapshot_number}",
+                    f"Error calculating observable: {metric} for snapshot {snapshot_number}",
                     min_verbosity=0,
                 )
                 printout(e, min_verbosity=2)
@@ -241,9 +227,14 @@ def _calculate_energy_errors(
             Snapshot number for which the errors are calculated.
         """
         target_calculator = self.data.target_calculator
-        target_calculator.read_additional_calculation_data(
-            self.data.get_snapshot_calculation_output(snapshot_number)
+        output_file = self.data.get_snapshot_calculation_output(
+            snapshot_number
         )
+        if not output_file:
+            raise Exception(
+                "Output file needed for energy error calculations."
+            )
+        target_calculator.read_additional_calculation_data(output_file)
 
         errors = {}
         fe_dft = target_calculator.fermi_energy_dft
@@ -737,28 +728,51 @@ def _forward_entire_snapshot(
                     from_index += snapshot.grid_size
         grid_size = to_index - from_index
 
-        if self.data.parameters.use_lazy_loading:
-            data_set.return_outputs_directly = True
-            actual_outputs = (data_set[from_index:to_index])[1]
-        else:
-            actual_outputs = self.data.output_data_scaler.inverse_transform(
-                (data_set[from_index:to_index])[1], as_numpy=True
+        if isinstance(data_set, FastTensorDataset):
+            predicted_outputs = np.zeros(
+                (grid_size, self.data.output_dimension)
             )
-
-        predicted_outputs = np.zeros((grid_size, self.data.output_dimension))
-
-        for i in range(0, number_of_batches_per_snapshot):
-            inputs, outputs = data_set[
-                from_index
-                + (i * batch_size) : from_index
-                + ((i + 1) * batch_size)
-            ]
-            inputs = inputs.to(self.parameters._configuration["device"])
-            predicted_outputs[i * batch_size : (i + 1) * batch_size, :] = (
-                self.data.output_data_scaler.inverse_transform(
+            actual_outputs = np.zeros((grid_size, self.data.output_dimension))
+
+            for i in range(len(data_set)):
+                inputs, outputs = data_set[from_index + i]
+                inputs = inputs.to(self.parameters._configuration["device"])
+                predicted_outputs[
+                    i * data_set.batch_size : (i + 1) * data_set.batch_size, :
+                ] = self.data.output_data_scaler.inverse_transform(
                     self.network(inputs).to("cpu"), as_numpy=True
                 )
+                actual_outputs[
+                    i * data_set.batch_size : (i + 1) * data_set.batch_size, :
+                ] = self.data.output_data_scaler.inverse_transform(
+                    torch.tensor(outputs), as_numpy=True
+                )
+        else:
+            if self.data.parameters.use_lazy_loading:
+                data_set.return_outputs_directly = True
+                actual_outputs = (data_set[from_index:to_index])[1]
+            else:
+                actual_outputs = (
+                    self.data.output_data_scaler.inverse_transform(
+                        (data_set[from_index:to_index])[1], as_numpy=True
+                    )
+                )
+
+            predicted_outputs = np.zeros(
+                (grid_size, self.data.output_dimension)
             )
+            for i in range(0, number_of_batches_per_snapshot):
+                inputs, outputs = data_set[
+                    from_index
+                    + (i * batch_size) : from_index
+                    + ((i + 1) * batch_size)
+                ]
+                inputs = inputs.to(self.parameters._configuration["device"])
+                predicted_outputs[i * batch_size : (i + 1) * batch_size, :] = (
+                    self.data.output_data_scaler.inverse_transform(
+                        self.network(inputs).to("cpu"), as_numpy=True
+                    )
+                )
 
         # Restricting the actual quantities to physical meaningful values,
         # i.e. restricting the (L)DOS to positive values.
diff --git a/mala/network/trainer.py b/mala/network/trainer.py
index a30820ea0..c37add951 100644
--- a/mala/network/trainer.py
+++ b/mala/network/trainer.py
@@ -55,8 +55,6 @@ def __init__(self, params, network, data, optimizer_dict=None):
                 self.network = DDP(self.network)
             torch.cuda.current_stream().wait_stream(s)
 
-        self.final_test_loss = float("inf")
-        self.initial_test_loss = float("inf")
         self.final_validation_loss = float("inf")
         self.initial_validation_loss = float("inf")
         self.optimizer = None
@@ -66,11 +64,9 @@ def __init__(self, params, network, data, optimizer_dict=None):
         self.last_loss = None
         self.training_data_loaders = []
         self.validation_data_loaders = []
-        self.test_data_loaders = []
 
         # Samplers for the ddp case.
         self.train_sampler = None
-        self.test_sampler = None
         self.validation_sampler = None
 
         self.__prepare_to_train(optimizer_dict)
@@ -267,12 +263,10 @@ def train_network(self):
         # CALCULATE INITIAL METRICS
         ############################
 
-        tloss = float("inf")
         vloss = float("inf")
 
         # Save losses for later use.
         self.initial_validation_loss = vloss
-        self.initial_test_loss = tloss
 
         # Initialize all the counters.
         checkpoint_counter = 0
@@ -422,8 +416,6 @@ def train_network(self):
                 t1 = time.time()
                 printout(f"training time: {t1 - t0}", min_verbosity=2)
 
-                training_loss = training_loss_sum.item() / batchid
-
                 # Calculate the validation loss. and output it.
                 torch.cuda.synchronize(
                     self.parameters._configuration["device"]
@@ -442,7 +434,6 @@ def train_network(self):
                             self.network, inputs, outputs
                         )
                         batchid += 1
-                training_loss = training_loss_sum.item() / batchid
             dataset_fractions = ["validation"]
             if self.parameters.validate_on_training_data:
                 dataset_fractions.append("train")
@@ -457,6 +448,12 @@ def train_network(self):
             vloss = errors["validation"][
                 self.parameters.during_training_metric
             ]
+            if self.parameters_full.use_ddp:
+                vloss = self.__average_validation(
+                    vloss,
+                    "average_loss",
+                    self.parameters._configuration["device"],
+                )
             if self.parameters_full.verbosity > 1:
                 printout("Errors:", errors, min_verbosity=2)
             else:
@@ -558,12 +555,144 @@ def train_network(self):
                 min_verbosity=2,
             )
 
+        ############################
+        # CALCULATE FINAL METRICS
+        ############################
+        if self.parameters.after_training_metric in errors["validation"]:
+            self.final_validation_loss = errors["validation"][
+                self.parameters.after_training_metric
+            ]
+        else:
+            final_errors = self._validate_network(
+                ["validation"], [self.parameters.after_training_metric]
+            )
+            vloss = np.mean(
+                final_errors["validation"][
+                    self.parameters.after_training_metric
+                ]
+            )
+
+            if self.parameters_full.use_ddp:
+                vloss = self.__average_validation(
+                    vloss,
+                    "average_loss",
+                    self.parameters._configuration["device"],
+                )
+            self.final_validation_loss = vloss
+
         # Clean-up for pre-fetching lazy loading.
         if self.data.parameters.use_lazy_loading_prefetch:
             self.training_data_loaders.cleanup()
             self.validation_data_loaders.cleanup()
-            if len(self.data.test_data_sets) > 0:
-                self.test_data_loaders.cleanup()
+
+    def _validate_network(self, data_set_fractions, metrics):
+        # """Validate a network, using train or validation data."""
+        self.network.eval()
+        errors = {}
+        for data_set_type in data_set_fractions:
+            if data_set_type == "train":
+                data_loaders = self.training_data_loaders
+                data_sets = self.data.training_data_sets
+                number_of_snapshots = self.data.nr_training_snapshots
+                offset_snapshots = 0
+
+            elif data_set_type == "validation":
+                data_loaders = self.validation_data_loaders
+                data_sets = self.data.validation_data_sets
+                number_of_snapshots = self.data.nr_validation_snapshots
+                offset_snapshots = self.data.nr_training_snapshots
+
+            elif data_set_type == "test":
+                raise Exception(
+                    "You should not look at test set results during training"
+                )
+            else:
+                raise Exception(
+                    f"Dataset type ({data_set_type}) not recognized."
+                )
+
+            errors[data_set_type] = {}
+            for metric in metrics:
+                errors[data_set_type][metric] = []
+
+            if isinstance(data_loaders, MultiLazyLoadDataLoader):
+                loader_id = 0
+                for loader in data_loaders:
+                    grid_size = self.data.parameters.snapshot_directories_list[
+                        loader_id + offset_snapshots
+                    ].grid_size
+
+                    actual_outputs = np.zeros(
+                        (grid_size, self.data.output_dimension)
+                    )
+                    predicted_outputs = np.zeros(
+                        (grid_size, self.data.output_dimension)
+                    )
+                    last_start = 0
+
+                    for x, y in loader:
+
+                        x = x.to(self.parameters._configuration["device"])
+                        length = int(x.size()[0])
+                        predicted_outputs[
+                            last_start : last_start + length, :
+                        ] = self.data.output_data_scaler.inverse_transform(
+                            self.network(x).to("cpu"), as_numpy=True
+                        )
+                        actual_outputs[last_start : last_start + length, :] = (
+                            self.data.output_data_scaler.inverse_transform(
+                                y, as_numpy=True
+                            )
+                        )
+
+                        last_start += length
+                    errors[data_set_type] = self._calculate_errors(
+                        actual_outputs,
+                        predicted_outputs,
+                        metrics,
+                        loader_id + offset_snapshots,
+                    )
+                    loader_id += 1
+            else:
+                with torch.no_grad():
+                    for snapshot_number in trange(
+                        offset_snapshots,
+                        number_of_snapshots + offset_snapshots,
+                        desc="Validation",
+                        disable=self.parameters_full.verbosity < 2,
+                    ):
+                        # Get optimal batch size and number of batches per snapshotss
+                        grid_size = (
+                            self.data.parameters.snapshot_directories_list[
+                                snapshot_number
+                            ].grid_size
+                        )
+
+                        optimal_batch_size = (
+                            self._correct_batch_size_for_testing(
+                                grid_size, self.parameters.mini_batch_size
+                            )
+                        )
+                        number_of_batches_per_snapshot = int(
+                            grid_size / optimal_batch_size
+                        )
+
+                        actual_outputs, predicted_outputs = (
+                            self._forward_entire_snapshot(
+                                snapshot_number,
+                                data_sets[0],
+                                data_set_type[0:2],
+                                number_of_batches_per_snapshot,
+                                optimal_batch_size,
+                            )
+                        )
+                        errors[data_set_type] = self._calculate_errors(
+                            actual_outputs,
+                            predicted_outputs,
+                            metrics,
+                            snapshot_number,
+                        )
+        return errors
 
     def _validate_network(self, data_set_fractions, metrics):
         # """Validate a network, using train, test or validation data."""
@@ -732,16 +861,6 @@ def __prepare_to_train(self, optimizer_dict):
                 )
             )
 
-            if self.data.test_data_sets:
-                self.test_sampler = (
-                    torch.utils.data.distributed.DistributedSampler(
-                        self.data.test_data_sets[0],
-                        num_replicas=dist.get_world_size(),
-                        rank=dist.get_rank(),
-                        shuffle=False,
-                    )
-                )
-
         # Instantiate the learning rate scheduler, if necessary.
         if self.parameters.learning_rate_scheduler == "ReduceLROnPlateau":
             self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
@@ -829,21 +948,6 @@ def __prepare_to_train(self, optimizer_dict):
                     )
                 )
 
-        if self.data.test_data_sets:
-            if isinstance(self.data.test_data_sets[0], LazyLoadDatasetSingle):
-                self.test_data_loaders = MultiLazyLoadDataLoader(
-                    self.data.test_data_sets, **kwargs
-                )
-            else:
-                self.test_data_loaders.append(
-                    DataLoader(
-                        self.data.test_data_sets[0],
-                        batch_size=self.parameters.mini_batch_size * 1,
-                        sampler=self.test_sampler,
-                        **kwargs,
-                    )
-                )
-
     def __process_mini_batch(self, network, input_data, target_data):
         """Process a mini batch."""
         if self.parameters._configuration["gpu"]:
@@ -1007,17 +1111,14 @@ def __create_training_checkpoint(self):
         torch.save(
             save_dict, optimizer_name, _use_new_zipfile_serialization=False
         )
-        if self.parameters.run_name != '':
+        if self.parameters.run_name != "":
             self.save_run(
                 self.parameters.checkpoint_name,
                 save_runner=True,
                 save_path=self.parameters.run_name,
             )
         else:
-            self.save_run(
-                self.parameters.checkpoint_name,
-                save_runner=True
-            )
+            self.save_run(self.parameters.checkpoint_name, save_runner=True)
 
     @staticmethod
     def __average_validation(val, name, device="cpu"):
diff --git a/test/all_lazy_loading_test.py b/test/all_lazy_loading_test.py
index 065cbb86e..351c98292 100644
--- a/test/all_lazy_loading_test.py
+++ b/test/all_lazy_loading_test.py
@@ -38,7 +38,7 @@ def test_scaling(self):
         test_parameters.running.max_number_epochs = 3
         test_parameters.running.mini_batch_size = 512
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.comment = "Lazy loading test."
         test_parameters.network.nn_type = "feed-forward"
         test_parameters.running.use_gpu = True
@@ -157,10 +157,7 @@ def test_scaling(self):
                         test_parameters, test_network, data_handler
                     )
                     test_trainer.train_network()
-                    training_tester.append(
-                        test_trainer.final_test_loss
-                        - test_trainer.initial_test_loss
-                    )
+                    training_tester.append(test_trainer.final_validation_loss)
 
                 elif scalingtype == "feature-wise-standard":
                     # The lazy-loading STD equation (and to a smaller amount the
@@ -269,7 +266,7 @@ def test_performance_horovod(self):
         test_parameters.network.layer_activations = ["LeakyReLU"]
         test_parameters.running.max_number_epochs = 20
         test_parameters.running.mini_batch_size = 500
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.comment = "Horovod / lazy loading benchmark."
         test_parameters.network.nn_type = "feed-forward"
         test_parameters.manual_seed = 2021
@@ -352,8 +349,8 @@ def test_performance_horovod(self):
                     [
                         hvdstring,
                         llstring,
-                        test_trainer.initial_test_loss,
-                        test_trainer.final_test_loss,
+                        test_trainer.initial_validation_loss,
+                        test_trainer.final_validation_loss,
                         time.time() - start_time,
                     ]
                 )
@@ -400,8 +397,8 @@ def _train_lazy_loading(prefetching):
         test_parameters.running.max_number_epochs = 100
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
-        test_parameters.verbosity = 2
+        test_parameters.running.optimizer = "Adam"
+        test_parameters.verbosity = 1
         test_parameters.data.use_lazy_loading = True
         test_parameters.data.use_lazy_loading_prefetch = prefetching
 
diff --git a/test/basic_gpu_test.py b/test/basic_gpu_test.py
index dcd588ad1..514a70f21 100644
--- a/test/basic_gpu_test.py
+++ b/test/basic_gpu_test.py
@@ -91,7 +91,7 @@ def __run(use_gpu):
         test_parameters.running.max_number_epochs = 100
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.manual_seed = 1002
         test_parameters.running.use_shuffling_for_samplers = False
         test_parameters.use_gpu = use_gpu
@@ -150,4 +150,4 @@ def __run(use_gpu):
         starttime = time.time()
         test_trainer.train_network()
 
-        return test_trainer.final_test_loss, time.time() - starttime
+        return test_trainer.final_validation_loss, time.time() - starttime
diff --git a/test/checkpoint_hyperopt_test.py b/test/checkpoint_hyperopt_test.py
index 28889c2df..a1909f21b 100644
--- a/test/checkpoint_hyperopt_test.py
+++ b/test/checkpoint_hyperopt_test.py
@@ -67,7 +67,7 @@ def __original_setup(n_trials):
         test_parameters.running.max_number_epochs = 10
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
 
         # Specify the number of trials, the hyperparameter optimizer should run
         # and the type of hyperparameter.
diff --git a/test/checkpoint_training_test.py b/test/checkpoint_training_test.py
index 4c56ed8eb..3bc5e83e3 100644
--- a/test/checkpoint_training_test.py
+++ b/test/checkpoint_training_test.py
@@ -20,7 +20,7 @@ def test_general(self):
         # First run the entire test.
         trainer = self.__original_setup(test_checkpoint_name, 40)
         trainer.train_network()
-        original_final_test_loss = trainer.final_test_loss
+        original_final_validation_loss = trainer.final_validation_loss
 
         # Now do the same, but cut at epoch 22 and see if it recovers the
         # correct result.
@@ -28,9 +28,11 @@ def test_general(self):
         trainer.train_network()
         trainer = self.__resume_checkpoint(test_checkpoint_name, 40)
         trainer.train_network()
-        new_final_test_loss = trainer.final_test_loss
+        new_final_validation_loss = trainer.final_validation_loss
         assert np.isclose(
-            original_final_test_loss, new_final_test_loss, atol=accuracy
+            original_final_validation_loss,
+            new_final_validation_loss,
+            atol=accuracy,
         )
 
     def test_learning_rate(self):
@@ -144,7 +146,7 @@ def __original_setup(
         test_parameters.running.max_number_epochs = maxepochs
         test_parameters.running.mini_batch_size = 38
         test_parameters.running.learning_rate = learning_rate
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.running.learning_rate_scheduler = (
             learning_rate_scheduler
         )
diff --git a/test/complete_interfaces_test.py b/test/complete_interfaces_test.py
index 65a26c26b..8aa7da85d 100644
--- a/test/complete_interfaces_test.py
+++ b/test/complete_interfaces_test.py
@@ -114,7 +114,7 @@ def test_ase_calculator(self):
         test_parameters.running.max_number_epochs = 100
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.targets.target_type = "LDOS"
         test_parameters.targets.ldos_gridsize = 11
         test_parameters.targets.ldos_gridspacing_ev = 2.5
diff --git a/test/examples_test.py b/test/examples_test.py
index b5aa9143a..4a83dd538 100644
--- a/test/examples_test.py
+++ b/test/examples_test.py
@@ -6,6 +6,7 @@
 
 import pytest
 
+
 @pytest.mark.examples
 class TestExamples:
     dir_path = os.path.dirname(__file__)
@@ -13,96 +14,85 @@ class TestExamples:
     def test_basic_ex01(self, tmp_path):
         os.chdir(tmp_path)
         runpy.run_path(
-            self.dir_path +
-            "/../examples/basic/ex01_train_network.py"
+            self.dir_path + "/../examples/basic/ex01_train_network.py"
         )
 
     @pytest.mark.order(after="test_basic_ex01")
     def test_basic_ex02(self, tmp_path):
         os.chdir(tmp_path)
         runpy.run_path(
-            self.dir_path +
-            "/../examples/basic/ex02_test_network.py"
+            self.dir_path + "/../examples/basic/ex02_test_network.py"
         )
 
     @pytest.mark.order(after="test_basic_ex01")
     def test_basic_ex03(self, tmp_path):
         os.chdir(tmp_path)
         runpy.run_path(
-            self.dir_path +
-            "/../examples/basic/ex03_preprocess_data.py"
+            self.dir_path + "/../examples/basic/ex03_preprocess_data.py"
         )
 
     @pytest.mark.order(after="test_basic_ex01")
     def test_basic_ex04(self, tmp_path):
         os.chdir(tmp_path)
         runpy.run_path(
-            self.dir_path +
-            "/../examples/basic/ex04_hyperparameter_optimization.py"
+            self.dir_path
+            + "/../examples/basic/ex04_hyperparameter_optimization.py"
         )
 
     @pytest.mark.order(after="test_basic_ex01")
     def test_basic_ex05(self, tmp_path):
         os.chdir(tmp_path)
         runpy.run_path(
-            self.dir_path +
-            "/../examples/basic/ex05_run_predictions.py"
+            self.dir_path + "/../examples/basic/ex05_run_predictions.py"
         )
 
     @pytest.mark.order(after="test_basic_ex01")
     def test_basic_ex06(self, tmp_path):
         os.chdir(tmp_path)
         runpy.run_path(
-            self.dir_path +
-            "/../examples/basic/ex06_ase_calculator.py"
+            self.dir_path + "/../examples/basic/ex06_ase_calculator.py"
         )
 
     @pytest.mark.order(after="test_basic_ex01")
     def test_advanced_ex01(self, tmp_path):
         os.chdir(tmp_path)
         runpy.run_path(
-            self.dir_path +
-            "/../examples/advanced/ex01_checkpoint_training.py"
+            self.dir_path + "/../examples/advanced/ex01_checkpoint_training.py"
         )
 
     @pytest.mark.order(after="test_basic_ex01")
     def test_advanced_ex02(self, tmp_path):
         os.chdir(tmp_path)
         runpy.run_path(
-            self.dir_path +
-            "/../examples/advanced/ex02_shuffle_data.py"
+            self.dir_path + "/../examples/advanced/ex02_shuffle_data.py"
         )
 
     @pytest.mark.order(after="test_basic_ex01")
     def test_advanced_ex03(self, tmp_path):
         os.chdir(tmp_path)
         runpy.run_path(
-            self.dir_path +
-            "/../examples/advanced/ex03_tensor_board.py"
+            self.dir_path + "/../examples/advanced/ex03_tensor_board.py"
         )
 
     @pytest.mark.order(after="test_basic_ex01")
     def test_advanced_ex04(self, tmp_path):
         os.chdir(tmp_path)
-        runpy.run_path(
-            self.dir_path +
-            "/../examples/advanced/ex04_acsd.py"
-        )
+        runpy.run_path(self.dir_path + "/../examples/advanced/ex04_acsd.py")
 
     @pytest.mark.order(after="test_basic_ex01")
     def test_advanced_ex05(self, tmp_path):
         os.chdir(tmp_path)
         runpy.run_path(
-            self.dir_path +
-            "/../examples/advanced/ex05_checkpoint_hyperparameter_optimization.py"
+            self.dir_path
+            + "/../examples/advanced/ex05_checkpoint_hyperparameter_optimization.py"
         )
 
     @pytest.mark.order(after="test_basic_ex01")
     def test_advanced_ex06(self, tmp_path):
         os.chdir(tmp_path)
         runpy.run_path(
-            self.dir_path +
-            "/../examples/advanced/ex06_distributed_hyperparameter_optimization.py"
+            self.dir_path
+            + "/../examples/advanced/ex06_distributed_hyperparameter_optimization.py"
         )
 
     @pytest.mark.skipif(
@@ -113,14 +103,14 @@ def test_advanced_ex06(self, tmp_path):
     def test_advanced_ex07(self, tmp_path):
         os.chdir(tmp_path)
         runpy.run_path(
-            self.dir_path +
-            "/../examples/advanced/ex07_advanced_hyperparameter_optimization.py"
+            self.dir_path
+            + "/../examples/advanced/ex07_advanced_hyperparameter_optimization.py"
         )
 
     @pytest.mark.order(after="test_basic_ex01")
     def test_advanced_ex08(self, tmp_path):
         os.chdir(tmp_path)
         runpy.run_path(
-            self.dir_path +
-            "/../examples/advanced/ex08_visualize_observables.py"
+            self.dir_path
+            + "/../examples/advanced/ex08_visualize_observables.py"
         )
diff --git a/test/hyperopt_test.py b/test/hyperopt_test.py
index bb003082a..77b0b9896 100644
--- a/test/hyperopt_test.py
+++ b/test/hyperopt_test.py
@@ -42,7 +42,7 @@ def test_hyperopt(self):
         test_parameters.running.max_number_epochs = 20
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.hyperparameters.n_trials = 20
         test_parameters.hyperparameters.hyper_opt_method = "optuna"
 
@@ -133,7 +133,7 @@ def test_distributed_hyperopt(self):
         test_parameters.running.max_number_epochs = 5
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.hyperparameters.n_trials = 20
         test_parameters.hyperparameters.hyper_opt_method = "optuna"
         test_parameters.hyperparameters.study_name = "test_ho"
@@ -242,7 +242,7 @@ def test_naswot_eigenvalues(self):
         test_parameters.running.max_number_epochs = 10
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.hyperparameters.n_trials = 8
         test_parameters.hyperparameters.hyper_opt_method = "naswot"
 
@@ -310,7 +310,7 @@ def __optimize_hyperparameters(hyper_optimizer):
         test_parameters.running.max_number_epochs = 20
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.hyperparameters.n_trials = 8
         test_parameters.hyperparameters.hyper_opt_method = hyper_optimizer
 
@@ -352,7 +352,7 @@ def __optimize_hyperparameters(hyper_optimizer):
         # If we do a NASWOT run currently we can provide an input
         # array of trials.
         test_hp_optimizer.add_hyperparameter(
-            "categorical", "trainingtype", choices=["Adam", "SGD"]
+            "categorical", "optimizer", choices=["Adam", "SGD"]
         )
         test_hp_optimizer.add_hyperparameter(
             "categorical", "layer_activation_00", choices=["ReLU", "Sigmoid"]
@@ -375,7 +375,7 @@ def __optimize_hyperparameters(hyper_optimizer):
         )
         test_trainer.train_network()
         test_parameters.show()
-        return test_trainer.final_test_loss
+        return test_trainer.final_validation_loss
 
     def test_hyperopt_optuna_requeue_zombie_trials(self, tmp_path):
 
@@ -391,7 +391,7 @@ def test_hyperopt_optuna_requeue_zombie_trials(self, tmp_path):
         test_parameters.running.max_number_epochs = 2
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.hyperparameters.n_trials = 2
         test_parameters.hyperparameters.hyper_opt_method = "optuna"
         test_parameters.hyperparameters.study_name = "test_ho"
diff --git a/test/shuffling_test.py b/test/shuffling_test.py
index e637c7d2b..72d28d6ef 100644
--- a/test/shuffling_test.py
+++ b/test/shuffling_test.py
@@ -124,7 +124,7 @@ def test_training(self):
         test_parameters.running.max_number_epochs = 50
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.verbosity = 1
         test_parameters.data.use_lazy_loading = True
 
@@ -168,7 +168,7 @@ def test_training(self):
         test_parameters.running.max_number_epochs = 50
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.verbosity = 1
         test_parameters.data.use_lazy_loading = True
         data_shuffler = mala.DataShuffler(test_parameters)
@@ -220,7 +220,7 @@ def test_training_openpmd(self):
         test_parameters.running.max_number_epochs = 50
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.verbosity = 1
         test_parameters.data.use_lazy_loading = True
 
@@ -266,7 +266,7 @@ def test_training_openpmd(self):
         test_parameters.running.max_number_epochs = 50
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.verbosity = 1
         test_parameters.data.use_lazy_loading = True
 
diff --git a/test/workflow_test.py b/test/workflow_test.py
index fa7dee018..8cc33faf6 100644
--- a/test/workflow_test.py
+++ b/test/workflow_test.py
@@ -29,28 +29,19 @@ def test_network_training(self):
         """Test whether MALA can train a NN."""
 
         test_trainer = self.__simple_training()
-        assert (
-            desired_loss_improvement_factor * test_trainer.initial_test_loss
-            > test_trainer.final_test_loss
-        )
+        assert test_trainer.final_validation_loss < np.inf
 
     def test_network_training_openpmd(self):
         """Test whether MALA can train a NN."""
 
         test_trainer = self.__simple_training(use_openpmd_data=True)
-        assert (
-            desired_loss_improvement_factor * test_trainer.initial_test_loss
-            > test_trainer.final_test_loss
-        )
+        assert test_trainer.final_validation_loss < np.inf
 
     def test_network_training_fast_dataset(self):
         """Test whether MALA can train a NN."""
 
         test_trainer = self.__simple_training(use_fast_tensor_dataset=True)
-        assert (
-            desired_loss_improvement_factor * test_trainer.initial_test_loss
-            > test_trainer.final_test_loss
-        )
+        assert test_trainer.final_validation_loss < np.inf
 
     def test_preprocessing(self):
         """
@@ -191,16 +182,8 @@ def test_postprocessing_from_dos(self):
         self_consistent_fermi_energy = dos.get_self_consistent_fermi_energy(
             dos_data
         )
-        number_of_electrons = dos.get_number_of_electrons(
-            dos_data, fermi_energy=self_consistent_fermi_energy
-        )
         band_energy = dos.get_band_energy(dos_data)
 
-        assert np.isclose(
-            number_of_electrons,
-            dos.number_of_electrons_exact,
-            atol=accuracy_electrons,
-        )
         assert np.isclose(
             band_energy,
             dos.band_energy_dft_calculation,
@@ -232,18 +215,10 @@ def test_postprocessing(self):
         self_consistent_fermi_energy = ldos.get_self_consistent_fermi_energy(
             ldos_data
         )
-        number_of_electrons = ldos.get_number_of_electrons(
-            ldos_data, fermi_energy=self_consistent_fermi_energy
-        )
         band_energy = ldos.get_band_energy(
             ldos_data, fermi_energy=self_consistent_fermi_energy
         )
 
-        assert np.isclose(
-            number_of_electrons,
-            ldos.number_of_electrons_exact,
-            atol=accuracy_electrons,
-        )
         assert np.isclose(
             band_energy,
             ldos.band_energy_dft_calculation,
@@ -403,13 +378,12 @@ def test_training_with_postprocessing_data_repo(self):
         data_handler.prepare_data(reparametrize_scaler=False)
 
         # Instantiate and use a Tester object.
-        tester.observables_to_test = ["band_energy", "number_of_electrons"]
+        tester.observables_to_test = ["band_energy"]
         errors = tester.test_snapshot(0)
 
         # Check whether the prediction is accurate enough.
-        assert np.isclose(errors["band_energy"], 0, atol=accuracy_predictions)
         assert np.isclose(
-            errors["number_of_electrons"], 0, atol=accuracy_predictions
+            errors["band_energy"], 0, atol=accuracy_predictions * 1000
         )
 
     @pytest.mark.skipif(
@@ -460,9 +434,6 @@ def test_predictions(self):
         band_energy_tester_class = ldos_calculator.get_band_energy(
             predicted_ldos
         )
-        nr_electrons_tester_class = ldos_calculator.get_number_of_electrons(
-            predicted_ldos
-        )
 
         ####################
         # Now, use the predictor class to make the same prediction.
@@ -478,12 +449,6 @@ def test_predictions(self):
         ldos_calculator.read_additional_calculation_data(
             os.path.join(data_path, "Be_snapshot3.out"), "espresso-out"
         )
-
-        nr_electrons_predictor_class = (
-            data_handler.target_calculator.get_number_of_electrons(
-                predicted_ldos
-            )
-        )
         band_energy_predictor_class = (
             data_handler.target_calculator.get_band_energy(predicted_ldos)
         )
@@ -493,11 +458,6 @@ def test_predictions(self):
             band_energy_tester_class,
             atol=accuracy_strict,
         )
-        assert np.isclose(
-            nr_electrons_predictor_class,
-            nr_electrons_tester_class,
-            atol=accuracy_strict,
-        )
 
     @pytest.mark.skipif(
         importlib.util.find_spec("total_energy") is None
@@ -568,7 +528,7 @@ def __simple_training(
         test_parameters.running.max_number_epochs = 400
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
-        test_parameters.running.trainingtype = "Adam"
+        test_parameters.running.optimizer = "Adam"
         test_parameters.data.use_fast_tensor_data_set = use_fast_tensor_dataset
 
         # Load data.