Skip to content

Commit

Permalink
Merge pull request #530 from franzpoeschel/convert-numpy-openpmd
Browse files Browse the repository at this point in the history
DataConverter class: Add reading logic for openPMD and numpy
  • Loading branch information
RandomDefaultUser authored Nov 15, 2024
2 parents 9a45d69 + 09ed574 commit 68f2a2d
Show file tree
Hide file tree
Showing 5 changed files with 228 additions and 4 deletions.
100 changes: 100 additions & 0 deletions examples/advanced/ex10_convert_numpy_openpmd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import mala

from mala.datahandling.data_repo import data_path
import os

parameters = mala.Parameters()
parameters.descriptors.descriptors_contain_xyz = False

# First, convert from Numpy files to openPMD.

data_converter = mala.DataConverter(parameters)

for snapshot in range(2):
data_converter.add_snapshot(
descriptor_input_type="numpy",
descriptor_input_path=os.path.join(
data_path, "Be_snapshot{}.in.npy".format(snapshot)
),
target_input_type="numpy",
target_input_path=os.path.join(
data_path, "Be_snapshot{}.out.npy".format(snapshot)
),
additional_info_input_type=None,
additional_info_input_path=None,
target_units=None,
)

data_converter.convert_snapshots(
descriptor_save_path="./",
target_save_path="./",
additional_info_save_path="./",
naming_scheme="converted_from_numpy_*.bp5",
descriptor_calculation_kwargs={"working_directory": "./"},
)

# Convert those files back to Numpy to verify the data stays the same.

data_converter = mala.DataConverter(parameters)

for snapshot in range(2):
data_converter.add_snapshot(
descriptor_input_type="openpmd",
descriptor_input_path="converted_from_numpy_{}.in.bp5".format(
snapshot
),
target_input_type="openpmd",
target_input_path="converted_from_numpy_{}.out.bp5".format(snapshot),
additional_info_input_type=None,
additional_info_input_path=None,
target_units=None,
)

data_converter.convert_snapshots(
descriptor_save_path="./",
target_save_path="./",
additional_info_save_path="./",
naming_scheme="verify_against_original_numpy_data_*.npy",
descriptor_calculation_kwargs={"working_directory": "./"},
)

for snapshot in range(2):
for i_o in ["in", "out"]:
original = os.path.join(
data_path, "Be_snapshot{}.{}.npy".format(snapshot, i_o)
)
roundtrip = "verify_against_original_numpy_data_{}.{}.npy".format(
snapshot, i_o
)
import numpy as np

original_a = np.load(original)
roundtrip_a = np.load(roundtrip)
np.testing.assert_allclose(original_a, roundtrip_a)

# Now, convert some openPMD data back to Numpy.

data_converter = mala.DataConverter(parameters)

for snapshot in range(2):
data_converter.add_snapshot(
descriptor_input_type="openpmd",
descriptor_input_path=os.path.join(
data_path, "Be_snapshot{}.in.h5".format(snapshot)
),
target_input_type="openpmd",
target_input_path=os.path.join(
data_path, "Be_snapshot{}.out.h5".format(snapshot)
),
additional_info_input_type=None,
additional_info_input_path=None,
target_units=None,
)

data_converter.convert_snapshots(
descriptor_save_path="./",
target_save_path="./",
additional_info_save_path="./",
naming_scheme="converted_from_openpmd_*.npy",
descriptor_calculation_kwargs={"working_directory": "./"},
)
13 changes: 11 additions & 2 deletions mala/common/physical_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,11 @@ def write_to_openpmd_iteration(
atoms_openpmd["position"][str(atom)].unit_SI = 1.0e-10
atoms_openpmd["positionOffset"][str(atom)].unit_SI = 1.0e-10

if any(i == 0 for i in self.grid_dimensions) and not isinstance(
array, self.SkipArrayWriting
):
self.grid_dimensions = array.shape[0:-1]

dataset = (
array.dataset
if isinstance(array, self.SkipArrayWriting)
Expand All @@ -564,8 +569,12 @@ def write_to_openpmd_iteration(
# Global feature sizes:
feature_global_from = 0
feature_global_to = self.feature_size
if feature_global_to == 0 and isinstance(array, self.SkipArrayWriting):
feature_global_to = array.feature_size
if feature_global_to == 0:
feature_global_to = (
array.feature_size
if isinstance(array, self.SkipArrayWriting)
else array.shape[-1]
)

# First loop: Only metadata, write metadata equivalently across ranks
for current_feature in range(feature_global_from, feature_global_to):
Expand Down
40 changes: 38 additions & 2 deletions mala/datahandling/data_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from mala.targets.target import Target
from mala.version import __version__ as mala_version

descriptor_input_types = ["espresso-out"]
target_input_types = [".cube", ".xsf"]
descriptor_input_types = ["espresso-out", "openpmd", "numpy"]
target_input_types = [".cube", ".xsf", "openpmd", "numpy"]
additional_info_input_types = ["espresso-out"]


Expand Down Expand Up @@ -547,6 +547,20 @@ def __convert_single_snapshot(
)
)

elif description["input"] == "openpmd":
if self.parameters_full.descriptors.descriptors_contain_xyz:
printout(
"[Warning] parameters.descriptors.descriptors_contain_xyz is True, will be ignored since this mode is unimplemented for openPMD data."
)
self.descriptor_calculator._feature_mask = lambda: 0
tmp_input = self.descriptor_calculator.read_from_openpmd_file(
snapshot["input"], units=original_units["input"]
)
elif description["input"] == "numpy":
tmp_input = self.descriptor_calculator.read_from_numpy_file(
snapshot["input"], units=original_units["input"]
)

elif description["input"] is None:
# In this case, only the output is processed.
pass
Expand Down Expand Up @@ -617,6 +631,17 @@ def __convert_single_snapshot(
snapshot["output"], **target_calculator_kwargs
)

elif description["output"] == "openpmd":
tmp_output = self.target_calculator.read_from_openpmd_file(
snapshot["output"], units=original_units["output"]
)
elif description["output"] == "numpy":
tmp_output = (
self.target_calculator.read_from_numpy_file(
snapshot["output"], units=original_units["output"]
)
)

elif description["output"] is None:
# In this case, only the input is processed.
pass
Expand Down Expand Up @@ -657,6 +682,17 @@ def __convert_single_snapshot(
snapshot["output"], **target_calculator_kwargs
)

elif description["output"] == "openpmd":
tmp_output = self.target_calculator.read_from_openpmd_file(
snapshot["output"], units=original_units["output"]
)
elif description["output"] == "numpy":
tmp_output = (
self.target_calculator.read_from_numpy_file(
snapshot["output"]
)
)

elif description["output"] is None:
# In this case, only the input is processed.
pass
Expand Down
71 changes: 71 additions & 0 deletions test/complete_interfaces_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,77 @@ def test_openpmd_io(self):
rtol=accuracy_fine,
)

@pytest.mark.skipif(
importlib.util.find_spec("openpmd_api") is None,
reason="No OpenPMD found on this machine, skipping " "test.",
)
def test_convert_numpy_openpmd(self):
parameters = mala.Parameters()
parameters.descriptors.descriptors_contain_xyz = False

data_converter = mala.DataConverter(parameters)
for snapshot in range(2):
data_converter.add_snapshot(
descriptor_input_type="numpy",
descriptor_input_path=os.path.join(
data_path, "Be_snapshot{}.in.npy".format(snapshot)
),
target_input_type="numpy",
target_input_path=os.path.join(
data_path, "Be_snapshot{}.out.npy".format(snapshot)
),
additional_info_input_type=None,
additional_info_input_path=None,
target_units=None,
)

data_converter.convert_snapshots(
descriptor_save_path="./",
target_save_path="./",
additional_info_save_path="./",
naming_scheme="converted_from_numpy_*.bp5",
descriptor_calculation_kwargs={"working_directory": "./"},
)

# Convert those files back to Numpy to verify the data stays the same.

data_converter = mala.DataConverter(parameters)

for snapshot in range(2):
data_converter.add_snapshot(
descriptor_input_type="openpmd",
descriptor_input_path="converted_from_numpy_{}.in.bp5".format(
snapshot
),
target_input_type="openpmd",
target_input_path="converted_from_numpy_{}.out.bp5".format(snapshot),
additional_info_input_type=None,
additional_info_input_path=None,
target_units=None,
)

data_converter.convert_snapshots(
descriptor_save_path="./",
target_save_path="./",
additional_info_save_path="./",
naming_scheme="verify_against_original_numpy_data_*.npy",
descriptor_calculation_kwargs={"working_directory": "./"},
)

for snapshot in range(2):
for i_o in ["in", "out"]:
original = os.path.join(
data_path, "Be_snapshot{}.{}.npy".format(snapshot, i_o)
)
roundtrip = "verify_against_original_numpy_data_{}.{}.npy".format(
snapshot, i_o
)
import numpy as np

original_a = np.load(original)
roundtrip_a = np.load(roundtrip)
np.testing.assert_allclose(original_a, roundtrip_a)

@pytest.mark.skipif(
importlib.util.find_spec("total_energy") is None
or importlib.util.find_spec("lammps") is None,
Expand Down
8 changes: 8 additions & 0 deletions test/examples_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ def test_advanced_ex06(self, tmp_path):
+ "/../examples/advanced/ex06_distributed_hyperparameter_optimization.py"
)

@pytest.mark.order(after="test_basic_ex01")
def test_advanced_ex09(self, tmp_path):
os.chdir(tmp_path)
runpy.run_path(
self.dir_path
+ "/../examples/advanced/ex10_convert_numpy_openpmd.py"
)

@pytest.mark.skipif(
importlib.util.find_spec("oapackage") is None,
reason="No OAT found on this machine, skipping this " "test.",
Expand Down

0 comments on commit 68f2a2d

Please sign in to comment.