Skip to content

Commit

Permalink
Merge branch 'develop' into feature_uncertainty
Browse files Browse the repository at this point in the history
  • Loading branch information
elcorto committed Jun 18, 2024
2 parents d92d92b + d59a57f commit cbca55c
Show file tree
Hide file tree
Showing 38 changed files with 248 additions and 193 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/cleanup-caches.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,19 @@ on:
pull_request_target:
types:
- closed
push:
# Trigger on pushes to master or develop and for git tag pushes
branches:
- master
- develop
tags:
- v*

jobs:
cleanup:
runs-on: ubuntu-latest
steps:
- name: Cleanup
- name: Cleanup caches
run: |
gh extension install actions/gh-actions-cache
Expand Down
69 changes: 45 additions & 24 deletions .github/workflows/cpu-tests.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: CPU tests

on:
workflow_dispatch:
pull_request:
# Trigger on pull requests to master or develop that are
# marked as "ready for review" (non-draft PRs)
Expand Down Expand Up @@ -33,7 +34,7 @@ jobs:
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Set environment variables
run: |
Expand All @@ -45,7 +46,7 @@ jobs:
echo "IMAGE_REPO=$IMAGE_REPO"
- name: Restore cache
uses: actions/cache@v3
uses: actions/cache@v4
id: cache-docker
with:
path: ${{ env.DOCKER_CACHE_PATH }}
Expand Down Expand Up @@ -73,7 +74,7 @@ jobs:
CACHE=$IMAGE_REPO/$IMAGE_NAME:latest
fi
docker build . --file Dockerfile --tag $IMAGE_NAME:local --cache-from=$CACHE --build-arg DEVICE=cpu
DOCKER_BUILDKIT=0 docker build . --file Dockerfile --tag $IMAGE_NAME:local --cache-from=$CACHE --build-arg DEVICE=cpu
# Show images
docker images --filter=reference=$IMAGE_NAME --filter=reference=$IMAGE_REPO/$IMAGE_NAME
Expand Down Expand Up @@ -122,7 +123,7 @@ jobs:
steps:
- name: "Prepare environment: Restore cache"
if: env.DOCKER_TAG != 'latest'
uses: actions/cache@v3
uses: actions/cache@v4
id: cache-docker
with:
path: ${{ env.DOCKER_CACHE_PATH }}
Expand Down Expand Up @@ -153,14 +154,14 @@ jobs:
[[ $(docker inspect --format '{{json .State.Running}}' mala-cpu) == 'true' ]]
- name: Check out repository (mala)
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Install mala package
# Exec all commands inside the mala-cpu container
shell: 'bash -c "docker exec -i mala-cpu bash < {0}"'
run: |
# epxort Docker image Conda environment for a later comparison
conda env export -n mala-cpu > env_1.yml
# export Docker image Conda environment for a later comparison
conda env export -n mala-cpu > env_before.yml
# install mala package
pip --no-cache-dir install -e .[opt,test] --no-build-isolation
Expand All @@ -169,26 +170,46 @@ jobs:
shell: 'bash -c "docker exec -i mala-cpu bash < {0}"'
run: |
# export Conda environment _with_ mala package installed in it (and extra dependencies)
conda env export -n mala-cpu > env_2.yml
conda env export -n mala-cpu > env_after.yml
# if comparison fails, `install/mala_cpu_[base]_environment.yml` needs to be aligned with
# `requirements.txt` and/or extra dependencies are missing in the Docker Conda environment
diff env_1.yml env_2.yml
diff --side-by-side --color=always env_before.yml env_after.yml
- name: Download test data repository
shell: 'bash -c "docker exec -i mala-cpu bash < {0}"'
- name: Download test data repository from RODARE
shell: 'bash -c "docker exec -i mala-cpu python < {0}"'
run: |
# Download test data repository from RODARE. If the version changes
# this URL has to be adapted (the number after /record/ and the
# version have to be incremented)
wget "https://rodare.hzdr.de/record/2999/files/mala-project/test-data-1.8.0.zip"
# Once downloaded, we have to unzip the file. The name of the root
# folder in the zip file has to be updated for data repository
# updates as well - the string at the end is the hash of the data
# repository commit.
unzip -q test-data-1.8.0.zip
mv mala-project-test-data-d5694c7 mala_data
import requests, shutil, zipfile
# This DOI represents all versions, and will always resolve to the latest one
DOI = "https://doi.org/10.14278/rodare.2900"
# Resolve DOI and get record ID and the associated API URL
response = requests.get(DOI)
*_, record_id = response.url.split("/")
api_url = f"https://rodare.hzdr.de/api/records/{record_id}"
# Download record from API and get the first file
response = requests.get(api_url)
record = response.json()
size = record["files"][0]["size"]
download_link = record["files"][0]["links"]["self"]
print(size, "bytes", "--", download_link)
# TODO: implement some sort of auto retry for failed HTTP requests
response = requests.get(download_link)
# Saving downloaded content to a file
with open("test-data.zip", mode="wb") as file:
file.write(response.content)
# Get top level directory name
dir_name = zipfile.ZipFile("test-data.zip").namelist()[0]
shutil.unpack_archive("test-data.zip", ".")
print(f"Rename {dir_name} to mala_data")
shutil.move(dir_name, "mala_data")
- name: Test mala
shell: 'bash -c "docker exec -i mala-cpu bash < {0}"'
Expand All @@ -209,11 +230,11 @@ jobs:
|| startsWith(github.ref, 'refs/tags/')
steps:
- name: Check out repository
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: "Prepare environment: Restore cache"
if: env.DOCKER_TAG != 'latest'
uses: actions/cache@v3
uses: actions/cache@v4
id: cache-docker
with:
path: ${{ env.DOCKER_CACHE_PATH }}
Expand Down
23 changes: 12 additions & 11 deletions .github/workflows/mirror-to-casus.yml
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
name: mirror
name: Mirror to CASUS

on: [push, delete]

jobs:
mirror-to-CASUS:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: mirror-repository
uses: spyoungtech/[email protected]
with:
REMOTE: 'ssh://[email protected]/casus/mala.git'
GIT_SSH_PRIVATE_KEY: ${{ secrets.GIT_SSH_KEY }}
GIT_SSH_NO_VERIFY_HOST: "true"
DEBUG: "true"
- name: Check out repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: mirror-repository
uses: spyoungtech/[email protected]
with:
REMOTE: 'ssh://[email protected]/casus/mala.git'
GIT_SSH_PRIVATE_KEY: ${{ secrets.GIT_SSH_KEY }}
GIT_SSH_NO_VERIFY_HOST: "true"
DEBUG: "true"
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ RUN conda env create -f mala_${DEVICE}_environment.yml && rm -rf /opt/conda/pkgs
RUN /opt/conda/envs/mala-${DEVICE}/bin/pip install --no-input --no-cache-dir \
pytest \
oapackage==2.6.8 \
openpmd-api==0.15.1 \
pqkmeans

RUN echo "source activate mala-${DEVICE}" > ~/.bashrc
Expand Down
4 changes: 1 addition & 3 deletions examples/advanced/ex01_checkpoint_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
import mala
from mala import printout

from mala.datahandling.data_repo import data_repo_path

data_path = os.path.join(data_repo_path, "Be2")
from mala.datahandling.data_repo import data_path

"""
Shows how a training run can be paused and
Expand Down
8 changes: 3 additions & 5 deletions examples/advanced/ex02_shuffle_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@

import mala

from mala.datahandling.data_repo import data_repo_path

data_path = os.path.join(data_repo_path, "Be2")
from mala.datahandling.data_repo import data_path

"""
Shows how data can be shuffled amongst multiple
snapshots, which is very useful in the lazy loading case, where this cannot be
easily done in memory.
snapshots, which is very useful in the lazy loading case, where this cannot be
easily done in memory.
"""


Expand Down
7 changes: 2 additions & 5 deletions examples/advanced/ex03_tensor_board.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,10 @@
import mala
from mala import printout

from mala.datahandling.data_repo import data_repo_path

data_path = os.path.join(data_repo_path, "Be2")

from mala.datahandling.data_repo import data_path

"""
Shows how a NN training by MALA can be visualized using
Shows how a NN training by MALA can be visualized using
tensorboard. The training is a basic MALA network training.
"""

Expand Down
6 changes: 2 additions & 4 deletions examples/advanced/ex04_acsd.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import os

import mala
from mala.datahandling.data_repo import data_repo_path

data_path = os.path.join(data_repo_path, "Be2")
from mala.datahandling.data_repo import data_path

"""
Shows how MALA can be used to optimize descriptor
parameters based on the ACSD analysis (see hyperparameter paper in the
parameters based on the ACSD analysis (see hyperparameter paper in the
documentation for mathematical details).
"""

Expand Down
10 changes: 4 additions & 6 deletions examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,14 @@

import mala

from mala.datahandling.data_repo import data_repo_path

data_path = os.path.join(data_repo_path, "Be2")
from mala.datahandling.data_repo import data_path

"""
Shows how a hyperparameter optimization run can
Shows how a hyperparameter optimization run can
be paused and resumed. Delete all ex04_*.pkl and ex04_*.pth prior to execution.
Afterwards, execute this script twice to see how MALA progresses from a
Afterwards, execute this script twice to see how MALA progresses from a
checkpoint. As the number of trials cannot be divided by the number
of epochs after which a checkpoint is created without residual, this will
of epochs after which a checkpoint is created without residual, this will
lead to MALA performing the missing trials again.
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@

import mala

from mala.datahandling.data_repo import data_repo_path

data_path = os.path.join(data_repo_path, "Be2")
from mala.datahandling.data_repo import data_path

"""
ex09_distributed_hyperopt.py: Shows how a hyperparameter
ex09_distributed_hyperopt.py: Shows how a hyperparameter
optimization can be sped up using a RDB storage. Ideally this should be done
using a database server system, such as PostgreSQL or MySQL.
using a database server system, such as PostgreSQL or MySQL.
For this easy example, sqlite will be used. It is highly advisory not to
to use this for actual, at-scale calculations!
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
import mala
from mala import printout

from mala.datahandling.data_repo import data_repo_path

data_path = os.path.join(data_repo_path, "Be2")
from mala.datahandling.data_repo import data_path

"""
Shows how recent developments in hyperparameter optimization techniques can be
Shows how recent developments in hyperparameter optimization techniques can be
used (OAT / training-free NAS).
REQUIRES OAPACKAGE.
Expand Down
13 changes: 5 additions & 8 deletions examples/advanced/ex08_visualize_observables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,15 @@

import mala

from mala.datahandling.data_repo import data_repo_path
from mala.datahandling.data_repo import data_path

atoms_path = os.path.join(
os.path.join(data_repo_path, "Be2"), "Be_snapshot1.out"
)
ldos_path = os.path.join(
os.path.join(data_repo_path, "Be2"), "Be_snapshot1.out.npy"
)
"""
Shows how MALA can be used to visualize observables of interest.
Shows how MALA can be used to visualize observables of interest.
"""

atoms_path = os.path.join(data_path, "Be_snapshot1.out")
ldos_path = os.path.join(data_path, "Be_snapshot1.out.npy")

####################
# 1. READ ELECTRONIC STRUCTURE DATA
# This data may be read as part of an ML-DFT model inference.
Expand Down
7 changes: 2 additions & 5 deletions examples/basic/ex01_train_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,14 @@

import mala

from mala.datahandling.data_repo import data_repo_path

data_path = os.path.join(data_repo_path, "Be2")
from mala.datahandling.data_repo import data_path

"""
This example shows how a neural network can be trained on material
data using this framework. It uses preprocessed data, that is read in
from *.npy files.
"""


####################
# 1. PARAMETERS
# The first step of each MALA workflow is to define a parameters object and
Expand Down Expand Up @@ -93,5 +90,5 @@
test_trainer.train_network()
additional_calculation_data = os.path.join(data_path, "Be_snapshot0.out")
test_trainer.save_run(
"be_model", additional_calculation_data=additional_calculation_data
"Be_model", additional_calculation_data=additional_calculation_data
)
11 changes: 6 additions & 5 deletions examples/basic/ex02_test_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,16 @@
import mala
from mala import printout

from mala.datahandling.data_repo import data_repo_path

data_path = os.path.join(data_repo_path, "Be2")
from mala.datahandling.data_repo import data_path

"""
This example shows how a trained network can be tested
with additional test snapshots. Either execute ex01 before executing this one
or download the appropriate model from the provided test data repo.
"""
assert os.path.exists("be_model.zip"), "Be model missing, run ex01 first."

model_name = "Be_model"
model_path = "./" if os.path.exists("Be_model.zip") else data_path

####################
# 1. LOADING A NETWORK
Expand All @@ -27,7 +26,9 @@
# (output_format="list") or as an averaged value (output_format="mae")
####################

parameters, network, data_handler, tester = mala.Tester.load_run("be_model")
parameters, network, data_handler, tester = mala.Tester.load_run(
run_name=model_name, path=model_path
)
tester.observables_to_test = ["band_energy", "number_of_electrons"]
tester.output_format = "list"
parameters.data.use_lazy_loading = True
Expand Down
Loading

0 comments on commit cbca55c

Please sign in to comment.