Merge upstream

mala-project · Jul 22, 2024 · dea71eb · dea71eb
2 parents 7163fa3 + 47f0dda
commit dea71eb
Show file tree

Hide file tree

Showing 30 changed files with 338 additions and 255 deletions.
diff --git a/.github/workflows/cpu-tests.yml b/.github/workflows/cpu-tests.yml
@@ -35,6 +35,8 @@ jobs:
     steps:
       - name: Check out repository
         uses: actions/checkout@v4
+        with:
+          fetch-depth: '1'
 
       - name: Set environment variables
         run: |
@@ -62,7 +64,7 @@ jobs:
           fi
 
       - name: Pull latest image from container registry
-        run: docker pull $IMAGE_REPO/$IMAGE_NAME || true
+        run: docker pull $IMAGE_REPO/$IMAGE_NAME --quiet || true
 
       - name: Build temporary Docker image
         run: |
@@ -131,12 +133,12 @@ jobs:
 
       - name: "Prepare environment: Load Docker image from cache"
         if: env.DOCKER_TAG != 'latest'
-        run: docker load -i $DOCKER_CACHE_PATH/docker-image.tar.gz
+        run: docker load -i $DOCKER_CACHE_PATH/docker-image.tar.gz --quiet
 
       - name: "Prepare environment: Pull latest image from container registry"
         if: env.DOCKER_TAG == 'latest'
         run: |
-          docker pull $IMAGE_REPO/$IMAGE_NAME:latest
+          docker pull $IMAGE_REPO/$IMAGE_NAME:latest --quiet
           docker image tag $IMAGE_REPO/$IMAGE_NAME:latest $IMAGE_NAME:latest
 
       - name: "Prepare environment: Run Docker container"
@@ -155,6 +157,8 @@ jobs:
 
       - name: Check out repository (mala)
         uses: actions/checkout@v4
+        with:
+          fetch-depth: '1'
 
       - name: Install mala package
         # Exec all commands inside the mala-cpu container
@@ -174,7 +178,13 @@ jobs:
 
           # if comparison fails, `install/mala_cpu_[base]_environment.yml` needs to be aligned with
           # `requirements.txt` and/or extra dependencies are missing in the Docker Conda environment
-          diff --side-by-side --color=always env_before.yml env_after.yml
+
+          if diff --brief env_before.yml env_after.yml
+          then
+            echo "Files env_before.yml and env_after.yml do not differ."
+          else
+            diff --side-by-side --color-always env_before.yml env_after.yml
+          fi
 
       - name: Download test data repository from RODARE
         shell: 'bash -c "docker exec -i mala-cpu python < {0}"'
@@ -229,9 +239,6 @@ jobs:
       ((contains(github.ref_name, 'develop') || contains(github.ref_name, 'master')) && needs.build-docker-image-cpu.outputs.docker-tag != 'latest')
       || startsWith(github.ref, 'refs/tags/')
     steps:
-      - name: Check out repository
-        uses: actions/checkout@v4
-
       - name: "Prepare environment: Restore cache"
         if: env.DOCKER_TAG != 'latest'
         uses: actions/cache@v4
@@ -242,21 +249,19 @@ jobs:
 
       - name: "Prepare environment: Load Docker image from cache"
         if: env.DOCKER_TAG != 'latest'
-        run: docker load -i $DOCKER_CACHE_PATH/docker-image.tar.gz
+        run: docker load -i $DOCKER_CACHE_PATH/docker-image.tar.gz --quiet
 
       - name: "Prepare environment: Pull latest image from container registry"
         if: env.DOCKER_TAG == 'latest'
-        run: docker pull $IMAGE_REPO/$IMAGE_NAME:latest
+        run: docker pull $IMAGE_REPO/$IMAGE_NAME:latest --quiet
 
       - name: Tag Docker image
         run: |
           # Execute on change of Docker image
           if [[ "$DOCKER_TAG" != 'latest' ]]; then
-            GIT_SHA=${GITHUB_REF_NAME}-$(git rev-parse --short "$GITHUB_SHA")
-            echo "GIT_SHA=$GIT_SHA"
 
             docker tag $IMAGE_NAME:$GITHUB_RUN_ID $IMAGE_REPO/$IMAGE_NAME:latest
-            docker tag $IMAGE_NAME:$GITHUB_RUN_ID $IMAGE_REPO/$IMAGE_NAME:$GIT_SHA
+            docker tag $IMAGE_NAME:$GITHUB_RUN_ID $IMAGE_REPO/$IMAGE_NAME:${GITHUB_REF_NAME}-${GITHUB_SHA:0:7}
           fi
 
           # Execute on push of git tag
@@ -272,4 +277,4 @@ jobs:
         run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
 
       - name: Push Docker image
-        run: docker push $IMAGE_REPO/$IMAGE_NAME --all-tags
+        run: docker push $IMAGE_REPO/$IMAGE_NAME --all-tags | grep -v -E 'Waiting|Layer already|Preparing|Pushed'
diff --git a/docs/source/advanced_usage/hyperparameters.rst b/docs/source/advanced_usage/hyperparameters.rst
@@ -114,7 +114,7 @@ a physical validation metric such as
 
       .. code-block:: python
 
-            parameters.running.after_before_training_metric = "band_energy"
+            parameters.running.after_training_metric = "band_energy"
 
 Advanced optimization algorithms
 ********************************

diff --git a/docs/source/advanced_usage/predictions.rst b/docs/source/advanced_usage/predictions.rst
@@ -40,6 +40,8 @@ Likewise, you can adjust the inference temperature via
             calculator.data_handler.target_calculator.temperature = ...
 
 
+.. _production_gpu:
+
 Predictions on GPU
 *******************
 
@@ -137,4 +139,3 @@ With the exception of the electronic density, which is saved into the ``.cube``
 format for visualization with regular electronic structure visualization
 software, all of these observables can be plotted with Python based
 visualization libraries such as ``matplotlib``.
-
diff --git a/docs/source/advanced_usage/trainingmodel.rst b/docs/source/advanced_usage/trainingmodel.rst
@@ -77,7 +77,7 @@ Specifically, when setting
 
       .. code-block:: python
 
-            parameters.running.after_before_training_metric = "band_energy"
+            parameters.running.after_training_metric = "band_energy"
 
 the error in the band energy between actual and predicted LDOS will be
 calculated and printed before and after network training (in meV/atom).
@@ -205,21 +205,21 @@ visualization prior to training via
 
             # 0: No visualizatuon, 1: loss and learning rate, 2: like 1,
             # but additionally weights and biases are saved
-            parameters.running.visualisation = 1
-            parameters.running.visualisation_dir = "mala_vis"
+            parameters.running.logging = 1
+            parameters.running.logging_dir = "mala_vis"
 
-where ``visualisation_dir`` specifies some directory in which to save the
-MALA visualization data. Afterwards, you can run the training without any
+where ``logging_dir`` specifies some directory in which to save the
+MALA logging data. Afterwards, you can run the training without any
 other modifications. Once training is finished (or during training, in case
 you want to use tensorboard to monitor progress), you can launch tensorboard
 via
 
       .. code-block:: bash
 
-            tensorboard --logdir path_to_visualization
+            tensorboard --logdir path_to_log_directory
 
-The full path for ``path_to_visualization`` can be accessed via
-``trainer.full_visualization_path``.
+The full path for ``path_to_log_directory`` can be accessed via
+``trainer.full_logging_path``.
 
 
 Training in parallel

diff --git a/docs/source/basic_usage/hyperparameters.rst b/docs/source/basic_usage/hyperparameters.rst
@@ -118,9 +118,9 @@ properties of the ``Parameters`` class:
        during the optimization.
      - ``network.layer_sizes``
      - ``"int"``, ``"categorical"``
-   * - ``"trainingtype"``
+   * - ``"optimizer"``
      - Optimization algorithm used during the NN optimization.
-     - ``running.trainingtype``
+     - ``running.optimizer``
      - ``"categorical"``
    * - ``"mini_batch_size"``
      - Size of the mini batches used to calculate the gradient during

diff --git a/docs/source/basic_usage/trainingmodel.rst b/docs/source/basic_usage/trainingmodel.rst
@@ -35,7 +35,7 @@ options to train a simple network with example data, namely
             parameters.running.max_number_epochs = 100
             parameters.running.mini_batch_size = 40
             parameters.running.learning_rate = 0.00001
-            parameters.running.trainingtype = "Adam"
+            parameters.running.optimizer = "Adam"
             parameters.verbosity = 1 # level of output; 1 is standard, 0 is low, 2 is debug.
 
 Here, we can see that the ``Parameters`` object contains multiple

diff --git a/docs/source/install/installing_lammps.rst b/docs/source/install/installing_lammps.rst
@@ -41,18 +41,24 @@ The MALA team recommends to build LAMMPS with ``cmake``. To do so
       * ``Kokkos_ARCH_GPUARCH=???``: Your GPU architecture (see see `Kokkos instructions <https://docs.lammps.org/Build_extras.html#kokkos-package>`_)
       * ``CMAKE_CXX_COMPILER=???``: Path to the ``nvcc_wrapper`` executable
         shipped with the LAMMPS code, should be at ``/your/path/to/lammps/lib/kokkos/bin/nvcc_wrapper``
-* For example, this configures the LAMMPS cmake build with Kokkos support
-  for an Intel Haswell CPU and an Nvidia Volta GPU, with MPI support:
+
+    For example, this configures the LAMMPS cmake build with Kokkos support
+    for an Intel Haswell CPU and an Nvidia Volta GPU, with MPI support:
 
       .. code-block:: bash
 
             cmake ../cmake -D PKG_KOKKOS=yes -D BUILD_MPI=yes -D PKG_ML-SNAP=yes -D Kokkos_ENABLE_CUDA=yes -D Kokkos_ARCH_HSW=yes -D Kokkos_ARCH_VOLTA70=yes -D CMAKE_CXX_COMPILER=/path/to/lammps/lib/kokkos/bin/nvcc_wrapper -D BUILD_SHARED_LIBS=yes
 
+   .. note::
+      When using a GPU by setting ``parameters.use_gpu = True``, you *need* to
+      have a GPU version of ``LAMMPS`` installed. See :ref:`production_gpu` for
+      details.
 
 * Build the library and executable with ``cmake --build .``
   (Add ``--parallel=8`` for a faster build)
 
 
+
 Installing the Python extension
 ********************************
 

diff --git a/docs/source/install/installing_qe.rst b/docs/source/install/installing_qe.rst
@@ -4,24 +4,25 @@ Installing Quantum ESPRESSO (total energy module)
 Prerequisites
 *************
 
-To run the total energy module, you need a full Quantum ESPRESSO installation,
-for which to install the Python bindings. This module has been tested with
-version ``7.2.``, the most recent version at the time of this release of MALA.
-Newer versions may work (untested), but installation instructions may vary.
+To build and run the total energy module, you need a full Quantum ESPRESSO
+installation, for which to install the Python bindings. This module has been
+tested with version ``7.2.``, the most recent version at the time of this
+release of MALA. Newer versions may work (untested), but installation
+instructions may vary.
 
 Make sure you have an (MPI-aware) F90 compiler such as ``mpif90`` (e.g.
 Debian-ish machine: ``apt install openmpi-bin``, on an HPC cluster something
 like ``module load openmpi gcc``). Make sure to use the same compiler
 for QE and the extension. This should be the default case, but if problems
 arise you can manually select the compiler via
-``--f90exec=`` in ``build_total_energy_energy_module.sh``
+``--f90exec=`` in ``build_total_energy_module.sh``
 
 We assume that QE's ``configure`` script will find your system libs, e.g. use
 ``-lblas``, ``-llapack`` and ``-lfftw3``. We use those by default in
-``build_total_energy_energy_module.sh``. If you have, say, the MKL library,
+``build_total_energy_module.sh``. If you have, say, the MKL library,
 you may see ``configure`` use something like ``-lmkl_intel_lp64 -lmkl_sequential -lmkl_core``
 when building QE. In this case you have to modify
-``build_total_energy_energy_module.sh`` to use the same libraries!
+``build_total_energy_module.sh`` to use the same libraries!
 
 Build Quantum ESPRESSO
 **********************
@@ -35,10 +36,16 @@ Build Quantum ESPRESSO
 * Change to the  ``external_modules/total_energy_module`` directory of the
   MALA repository
 
+.. note::
+   At the moment, building QE using ``cmake`` `doesn't work together with the
+   build_total_energy_module.sh script
+   <https://github.com/mala-project/mala/issues/468>`_. Please use the
+   ``configure`` + ``make`` build workflow.
+
 Installing the Python extension
 ********************************
 
-* Run ``build_total_energy_energy_module.sh /path/to/your/q-e``.
+* Run ``build_total_energy_module.sh /path/to/your/q-e``.
 
   * If the build is successful, a file named something like
     ``total_energy.cpython-39m-x86_64-linux-gnu.so`` will be generated. This is

diff --git a/examples/advanced/ex01_checkpoint_training.py b/examples/advanced/ex01_checkpoint_training.py
@@ -26,7 +26,7 @@ def initial_setup():
     parameters.running.max_number_epochs = 9
     parameters.running.mini_batch_size = 8
     parameters.running.learning_rate = 0.00001
-    parameters.running.trainingtype = "Adam"
+    parameters.running.optimizer = "Adam"
 
     # We checkpoint the training every 5 epochs and save the results
     # as "ex07".

diff --git a/examples/advanced/ex03_tensor_board.py b/examples/advanced/ex03_tensor_board.py
@@ -18,7 +18,7 @@
 parameters.running.max_number_epochs = 100
 parameters.running.mini_batch_size = 40
 parameters.running.learning_rate = 0.001
-parameters.running.trainingtype = "Adam"
+parameters.running.optimizer = "Adam"
 
 # Turn the visualization on and select a folder to save the visualization
 # files into.
@@ -45,6 +45,6 @@
 trainer.train_network()
 printout(
     'Run finished, launch tensorboard with "tensorboard --logdir '
-    + trainer.full_visualization_path
+    + trainer.full_logging_path
     + '"'
 )
diff --git a/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py b/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
@@ -21,7 +21,7 @@ def initial_setup():
     parameters.running.max_number_epochs = 10
     parameters.running.mini_batch_size = 40
     parameters.running.learning_rate = 0.00001
-    parameters.running.trainingtype = "Adam"
+    parameters.running.optimizer = "Adam"
     parameters.hyperparameters.n_trials = 9
     parameters.hyperparameters.checkpoints_each_trial = 5
     parameters.hyperparameters.checkpoint_name = "ex05_checkpoint"

diff --git a/examples/advanced/ex06_distributed_hyperparameter_optimization.py b/examples/advanced/ex06_distributed_hyperparameter_optimization.py
@@ -28,7 +28,7 @@
 parameters.running.max_number_epochs = 5
 parameters.running.mini_batch_size = 40
 parameters.running.learning_rate = 0.00001
-parameters.running.trainingtype = "Adam"
+parameters.running.optimizer = "Adam"
 parameters.hyperparameters.n_trials = 10
 parameters.hyperparameters.checkpoints_each_trial = -1
 parameters.hyperparameters.checkpoint_name = "ex06"
@@ -44,7 +44,7 @@
 parameters.targets.ldos_gridspacing_ev = 2.5
 parameters.targets.ldos_gridoffset_ev = -5
 parameters.hyperparameters.number_training_per_trial = 3
-parameters.running.after_before_training_metric = "band_energy"
+parameters.running.after_training_metric = "band_energy"
 
 data_handler = mala.DataHandler(parameters)
 

diff --git a/examples/advanced/ex07_advanced_hyperparameter_optimization.py b/examples/advanced/ex07_advanced_hyperparameter_optimization.py
@@ -21,7 +21,7 @@ def optimize_hyperparameters(hyper_optimizer):
     parameters.running.max_number_epochs = 10
     parameters.running.mini_batch_size = 40
     parameters.running.learning_rate = 0.00001
-    parameters.running.trainingtype = "Adam"
+    parameters.running.optimizer = "Adam"
     parameters.hyperparameters.n_trials = 8
     parameters.hyperparameters.hyper_opt_method = hyper_optimizer
 
@@ -64,7 +64,7 @@ def optimize_hyperparameters(hyper_optimizer):
         data_handler.output_dimension,
     ]
     hyperoptimizer.add_hyperparameter(
-        "categorical", "trainingtype", choices=["Adam", "SGD"]
+        "categorical", "optimizer", choices=["Adam", "SGD"]
     )
     hyperoptimizer.add_hyperparameter(
         "categorical", "layer_activation_00", choices=["ReLU", "Sigmoid"]

diff --git a/examples/basic/ex01_train_network.py b/examples/basic/ex01_train_network.py
@@ -28,7 +28,7 @@
 parameters.running.max_number_epochs = 100
 parameters.running.mini_batch_size = 40
 parameters.running.learning_rate = 0.00001
-parameters.running.trainingtype = "Adam"
+parameters.running.optimizer = "Adam"
 # These parameters characterize how the LDOS and bispectrum descriptors
 # were calculated. They are _technically_ not needed to train a simple
 # network. However, it is useful to define them prior to training. Then,

diff --git a/examples/basic/ex02_test_network.py b/examples/basic/ex02_test_network.py
@@ -21,15 +21,15 @@
 # It is recommended to enable the "lazy-loading" feature, so that
 # data is loaded into memory one snapshot at a time during testing - this
 # helps keep RAM requirement down. Furthermore, you have to decide which
-# observables to test (usual choices are "band_energy", "total_energy" and
-# "number_of_electrons") and whether you want the results per snapshot
+# observables to test (usual choices are "band_energy", "total_energy")
+# and whether you want the results per snapshot
 # (output_format="list") or as an averaged value (output_format="mae")
 ####################
 
 parameters, network, data_handler, tester = mala.Tester.load_run(
     run_name=model_name, path=model_path
 )
-tester.observables_to_test = ["band_energy", "number_of_electrons"]
+tester.observables_to_test = ["band_energy", "density"]
 tester.output_format = "list"
 parameters.data.use_lazy_loading = True
 

diff --git a/examples/basic/ex04_hyperparameter_optimization.py b/examples/basic/ex04_hyperparameter_optimization.py
@@ -22,7 +22,7 @@
 parameters.data.output_rescaling_type = "normal"
 parameters.running.max_number_epochs = 20
 parameters.running.mini_batch_size = 40
-parameters.running.trainingtype = "Adam"
+parameters.running.optimizer = "Adam"
 parameters.hyperparameters.n_trials = 20
 
 ####################