Merge branch 'develop' into feature_uncertainty

mala-project · Jun 18, 2024 · cbca55c · cbca55c
2 parents d92d92b + d59a57f
commit cbca55c
Show file tree

Hide file tree

Showing 38 changed files with 248 additions and 193 deletions.
diff --git a/.github/workflows/cleanup-caches.yml b/.github/workflows/cleanup-caches.yml
@@ -3,12 +3,19 @@ on:
   pull_request_target:
     types:
       - closed
+  push:
+    # Trigger on pushes to master or develop and for git tag pushes
+    branches:
+      - master
+      - develop
+    tags:
+      - v*
 
 jobs:
   cleanup:
     runs-on: ubuntu-latest
     steps:
-      - name: Cleanup
+      - name: Cleanup caches
         run: |
           gh extension install actions/gh-actions-cache
 

diff --git a/.github/workflows/cpu-tests.yml b/.github/workflows/cpu-tests.yml
@@ -1,6 +1,7 @@
 name: CPU tests
 
 on:
+  workflow_dispatch:
   pull_request:
     # Trigger on pull requests to master or develop that are
     # marked as "ready for review" (non-draft PRs)
@@ -33,7 +34,7 @@ jobs:
     runs-on: ubuntu-22.04
     steps:
       - name: Check out repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Set environment variables
         run: |
@@ -45,7 +46,7 @@ jobs:
           echo "IMAGE_REPO=$IMAGE_REPO"
 
       - name: Restore cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         id: cache-docker
         with:
           path: ${{ env.DOCKER_CACHE_PATH }}
@@ -73,7 +74,7 @@ jobs:
             CACHE=$IMAGE_REPO/$IMAGE_NAME:latest
           fi
 
-          docker build . --file Dockerfile --tag $IMAGE_NAME:local --cache-from=$CACHE --build-arg DEVICE=cpu
+          DOCKER_BUILDKIT=0 docker build . --file Dockerfile --tag $IMAGE_NAME:local --cache-from=$CACHE --build-arg DEVICE=cpu
 
           # Show images
           docker images --filter=reference=$IMAGE_NAME --filter=reference=$IMAGE_REPO/$IMAGE_NAME
@@ -122,7 +123,7 @@ jobs:
     steps:
       - name: "Prepare environment: Restore cache"
         if: env.DOCKER_TAG != 'latest'
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         id: cache-docker
         with:
           path: ${{ env.DOCKER_CACHE_PATH }}
@@ -153,14 +154,14 @@ jobs:
           [[ $(docker inspect --format '{{json .State.Running}}' mala-cpu) == 'true' ]]
 
       - name: Check out repository (mala)
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Install mala package
         # Exec all commands inside the mala-cpu container
         shell: 'bash -c "docker exec -i mala-cpu bash < {0}"'
         run: |
-          # epxort Docker image Conda environment for a later comparison
-          conda env export -n mala-cpu > env_1.yml
+          # export Docker image Conda environment for a later comparison
+          conda env export -n mala-cpu > env_before.yml
 
           # install mala package
           pip --no-cache-dir install -e .[opt,test] --no-build-isolation
@@ -169,26 +170,46 @@ jobs:
         shell: 'bash -c "docker exec -i mala-cpu bash < {0}"'
         run: |
           # export Conda environment _with_ mala package installed in it (and extra dependencies)
-          conda env export -n mala-cpu > env_2.yml
+          conda env export -n mala-cpu > env_after.yml
 
           # if comparison fails, `install/mala_cpu_[base]_environment.yml` needs to be aligned with
           # `requirements.txt` and/or extra dependencies are missing in the Docker Conda environment
-          diff env_1.yml env_2.yml
+          diff --side-by-side --color=always env_before.yml env_after.yml
 
-      - name: Download test data repository
-        shell: 'bash -c "docker exec -i mala-cpu bash < {0}"'
+      - name: Download test data repository from RODARE
+        shell: 'bash -c "docker exec -i mala-cpu python < {0}"'
         run: |
-          # Download test data repository from RODARE. If the version changes
-          # this URL has to be adapted (the number after /record/ and the
-          # version have to be incremented)
-          wget "https://rodare.hzdr.de/record/2999/files/mala-project/test-data-1.8.0.zip"
-
-          # Once downloaded, we have to unzip the file. The name of the root
-          # folder in the zip file has to be updated for data repository
-          # updates as well - the string at the end is the hash of the data
-          # repository commit.
-          unzip -q test-data-1.8.0.zip
-          mv mala-project-test-data-d5694c7  mala_data
+          import requests, shutil, zipfile
+
+          # This DOI represents all versions, and will always resolve to the latest one
+          DOI = "https://doi.org/10.14278/rodare.2900"
+
+          # Resolve DOI and get record ID and the associated API URL
+          response = requests.get(DOI)
+          *_, record_id = response.url.split("/")
+          api_url = f"https://rodare.hzdr.de/api/records/{record_id}"
+
+          # Download record from API and get the first file
+          response = requests.get(api_url)
+          record = response.json()
+          size = record["files"][0]["size"]
+          download_link = record["files"][0]["links"]["self"]
+
+          print(size, "bytes", "--", download_link)
+
+          # TODO: implement some sort of auto retry for failed HTTP requests
+          response = requests.get(download_link)
+
+          # Saving downloaded content to a file
+          with open("test-data.zip", mode="wb") as file:
+            file.write(response.content)
+
+          # Get top level directory name
+          dir_name = zipfile.ZipFile("test-data.zip").namelist()[0]
+          shutil.unpack_archive("test-data.zip", ".")
+
+          print(f"Rename {dir_name} to mala_data")
+          shutil.move(dir_name, "mala_data")
 
       - name: Test mala
         shell: 'bash -c "docker exec -i mala-cpu bash < {0}"'
@@ -209,11 +230,11 @@ jobs:
       || startsWith(github.ref, 'refs/tags/')
     steps:
       - name: Check out repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: "Prepare environment: Restore cache"
         if: env.DOCKER_TAG != 'latest'
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         id: cache-docker
         with:
           path: ${{ env.DOCKER_CACHE_PATH }}

diff --git a/.github/workflows/mirror-to-casus.yml b/.github/workflows/mirror-to-casus.yml
@@ -1,18 +1,19 @@
-name: mirror
+name: Mirror to CASUS
 
 on: [push, delete]
 
 jobs:
   mirror-to-CASUS:
     runs-on: ubuntu-22.04
     steps:
-    - uses: actions/checkout@v3
-      with:
-        fetch-depth: 0
-    - name: mirror-repository
-      uses: spyoungtech/[email protected]
-      with:
-        REMOTE: 'ssh://[email protected]/casus/mala.git'
-        GIT_SSH_PRIVATE_KEY: ${{ secrets.GIT_SSH_KEY }}
-        GIT_SSH_NO_VERIFY_HOST: "true"
-        DEBUG: "true"
+      - name: Check out repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: mirror-repository
+        uses: spyoungtech/[email protected]
+        with:
+          REMOTE: 'ssh://[email protected]/casus/mala.git'
+          GIT_SSH_PRIVATE_KEY: ${{ secrets.GIT_SSH_KEY }}
+          GIT_SSH_NO_VERIFY_HOST: "true"
+          DEBUG: "true"
diff --git a/Dockerfile b/Dockerfile
@@ -21,7 +21,6 @@ RUN conda env create -f mala_${DEVICE}_environment.yml && rm -rf /opt/conda/pkgs
 RUN /opt/conda/envs/mala-${DEVICE}/bin/pip install --no-input --no-cache-dir \
     pytest \
     oapackage==2.6.8 \
-    openpmd-api==0.15.1 \
     pqkmeans
 
 RUN echo "source activate mala-${DEVICE}" > ~/.bashrc

diff --git a/examples/advanced/ex01_checkpoint_training.py b/examples/advanced/ex01_checkpoint_training.py
@@ -3,9 +3,7 @@
 import mala
 from mala import printout
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
 Shows how a training run can be paused and

diff --git a/examples/advanced/ex02_shuffle_data.py b/examples/advanced/ex02_shuffle_data.py
@@ -2,14 +2,12 @@
 
 import mala
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
 Shows how data can be shuffled amongst multiple
-snapshots, which is very useful in the lazy loading case, where this cannot be 
-easily done in memory. 
+snapshots, which is very useful in the lazy loading case, where this cannot be
+easily done in memory.
 """
 
 

diff --git a/examples/advanced/ex03_tensor_board.py b/examples/advanced/ex03_tensor_board.py
@@ -3,13 +3,10 @@
 import mala
 from mala import printout
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
-
+from mala.datahandling.data_repo import data_path
 
 """
-Shows how a NN training by MALA can be visualized using 
+Shows how a NN training by MALA can be visualized using
 tensorboard. The training is a basic MALA network training.
 """
 

diff --git a/examples/advanced/ex04_acsd.py b/examples/advanced/ex04_acsd.py
@@ -1,13 +1,11 @@
 import os
 
 import mala
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
 Shows how MALA can be used to optimize descriptor
-parameters based on the ACSD analysis (see hyperparameter paper in the 
+parameters based on the ACSD analysis (see hyperparameter paper in the
 documentation for mathematical details).
 """
 

diff --git a/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py b/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
@@ -2,16 +2,14 @@
 
 import mala
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
-Shows how a hyperparameter optimization run can 
+Shows how a hyperparameter optimization run can
 be paused and resumed. Delete all ex04_*.pkl and ex04_*.pth prior to execution.
-Afterwards, execute this script twice to see how MALA progresses from a 
+Afterwards, execute this script twice to see how MALA progresses from a
 checkpoint. As the number of trials cannot be divided by the number
-of epochs after which a checkpoint is created without residual, this will 
+of epochs after which a checkpoint is created without residual, this will
 lead to MALA performing the missing trials again.
 """
 

diff --git a/examples/advanced/ex06_distributed_hyperparameter_optimization.py b/examples/advanced/ex06_distributed_hyperparameter_optimization.py
@@ -2,14 +2,12 @@
 
 import mala
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
-ex09_distributed_hyperopt.py: Shows how a hyperparameter 
+ex09_distributed_hyperopt.py: Shows how a hyperparameter
 optimization can be sped up using a RDB storage. Ideally this should be done
-using a database server system, such as PostgreSQL or MySQL. 
+using a database server system, such as PostgreSQL or MySQL.
 For this easy example, sqlite will be used. It is highly advisory not to
 to use this for actual, at-scale calculations!
 

diff --git a/examples/advanced/ex07_advanced_hyperparameter_optimization.py b/examples/advanced/ex07_advanced_hyperparameter_optimization.py
@@ -3,12 +3,10 @@
 import mala
 from mala import printout
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
-Shows how recent developments in hyperparameter optimization techniques can be 
+Shows how recent developments in hyperparameter optimization techniques can be
 used (OAT / training-free NAS).
 
 REQUIRES OAPACKAGE.

diff --git a/examples/advanced/ex08_visualize_observables.py b/examples/advanced/ex08_visualize_observables.py
@@ -2,18 +2,15 @@
 
 import mala
 
-from mala.datahandling.data_repo import data_repo_path
+from mala.datahandling.data_repo import data_path
 
-atoms_path = os.path.join(
-    os.path.join(data_repo_path, "Be2"), "Be_snapshot1.out"
-)
-ldos_path = os.path.join(
-    os.path.join(data_repo_path, "Be2"), "Be_snapshot1.out.npy"
-)
 """
-Shows how MALA can be used to visualize observables of interest. 
+Shows how MALA can be used to visualize observables of interest.
 """
 
+atoms_path = os.path.join(data_path, "Be_snapshot1.out")
+ldos_path = os.path.join(data_path, "Be_snapshot1.out.npy")
+
 ####################
 # 1. READ ELECTRONIC STRUCTURE DATA
 # This data may be read as part of an ML-DFT model inference.

diff --git a/examples/basic/ex01_train_network.py b/examples/basic/ex01_train_network.py
@@ -2,17 +2,14 @@
 
 import mala
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
 This example shows how a neural network can be trained on material
 data using this framework. It uses preprocessed data, that is read in
 from *.npy files.
 """
 
-
 ####################
 # 1. PARAMETERS
 # The first step of each MALA workflow is to define a parameters object and
@@ -93,5 +90,5 @@
 test_trainer.train_network()
 additional_calculation_data = os.path.join(data_path, "Be_snapshot0.out")
 test_trainer.save_run(
-    "be_model", additional_calculation_data=additional_calculation_data
+    "Be_model", additional_calculation_data=additional_calculation_data
 )
diff --git a/examples/basic/ex02_test_network.py b/examples/basic/ex02_test_network.py
@@ -3,17 +3,16 @@
 import mala
 from mala import printout
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
 This example shows how a trained network can be tested
 with additional test snapshots. Either execute ex01 before executing this one
 or download the appropriate model from the provided test data repo.
 """
-assert os.path.exists("be_model.zip"), "Be model missing, run ex01 first."
 
+model_name = "Be_model"
+model_path = "./" if os.path.exists("Be_model.zip") else data_path
 
 ####################
 # 1. LOADING A NETWORK
@@ -27,7 +26,9 @@
 # (output_format="list") or as an averaged value (output_format="mae")
 ####################
 
-parameters, network, data_handler, tester = mala.Tester.load_run("be_model")
+parameters, network, data_handler, tester = mala.Tester.load_run(
+    run_name=model_name, path=model_path
+)
 tester.observables_to_test = ["band_energy", "number_of_electrons"]
 tester.output_format = "list"
 parameters.data.use_lazy_loading = True