diff --git a/examples/advanced/ex10_convert_numpy_openpmd.py b/examples/advanced/ex10_convert_numpy_openpmd.py new file mode 100644 index 000000000..45369ff89 --- /dev/null +++ b/examples/advanced/ex10_convert_numpy_openpmd.py @@ -0,0 +1,100 @@ +import mala + +from mala.datahandling.data_repo import data_path +import os + +parameters = mala.Parameters() +parameters.descriptors.descriptors_contain_xyz = False + +# First, convert from Numpy files to openPMD. + +data_converter = mala.DataConverter(parameters) + +for snapshot in range(2): + data_converter.add_snapshot( + descriptor_input_type="numpy", + descriptor_input_path=os.path.join( + data_path, "Be_snapshot{}.in.npy".format(snapshot) + ), + target_input_type="numpy", + target_input_path=os.path.join( + data_path, "Be_snapshot{}.out.npy".format(snapshot) + ), + additional_info_input_type=None, + additional_info_input_path=None, + target_units=None, + ) + +data_converter.convert_snapshots( + descriptor_save_path="./", + target_save_path="./", + additional_info_save_path="./", + naming_scheme="converted_from_numpy_*.bp5", + descriptor_calculation_kwargs={"working_directory": "./"}, +) + +# Convert those files back to Numpy to verify the data stays the same. + +data_converter = mala.DataConverter(parameters) + +for snapshot in range(2): + data_converter.add_snapshot( + descriptor_input_type="openpmd", + descriptor_input_path="converted_from_numpy_{}.in.bp5".format( + snapshot + ), + target_input_type="openpmd", + target_input_path="converted_from_numpy_{}.out.bp5".format(snapshot), + additional_info_input_type=None, + additional_info_input_path=None, + target_units=None, + ) + +data_converter.convert_snapshots( + descriptor_save_path="./", + target_save_path="./", + additional_info_save_path="./", + naming_scheme="verify_against_original_numpy_data_*.npy", + descriptor_calculation_kwargs={"working_directory": "./"}, +) + +for snapshot in range(2): + for i_o in ["in", "out"]: + original = os.path.join( + data_path, "Be_snapshot{}.{}.npy".format(snapshot, i_o) + ) + roundtrip = "verify_against_original_numpy_data_{}.{}.npy".format( + snapshot, i_o + ) + import numpy as np + + original_a = np.load(original) + roundtrip_a = np.load(roundtrip) + np.testing.assert_allclose(original_a, roundtrip_a) + +# Now, convert some openPMD data back to Numpy. + +data_converter = mala.DataConverter(parameters) + +for snapshot in range(2): + data_converter.add_snapshot( + descriptor_input_type="openpmd", + descriptor_input_path=os.path.join( + data_path, "Be_snapshot{}.in.h5".format(snapshot) + ), + target_input_type="openpmd", + target_input_path=os.path.join( + data_path, "Be_snapshot{}.out.h5".format(snapshot) + ), + additional_info_input_type=None, + additional_info_input_path=None, + target_units=None, + ) + +data_converter.convert_snapshots( + descriptor_save_path="./", + target_save_path="./", + additional_info_save_path="./", + naming_scheme="converted_from_openpmd_*.npy", + descriptor_calculation_kwargs={"working_directory": "./"}, +) diff --git a/mala/common/physical_data.py b/mala/common/physical_data.py index 7ec85623d..629378829 100644 --- a/mala/common/physical_data.py +++ b/mala/common/physical_data.py @@ -555,6 +555,11 @@ def write_to_openpmd_iteration( atoms_openpmd["position"][str(atom)].unit_SI = 1.0e-10 atoms_openpmd["positionOffset"][str(atom)].unit_SI = 1.0e-10 + if any(i == 0 for i in self.grid_dimensions) and not isinstance( + array, self.SkipArrayWriting + ): + self.grid_dimensions = array.shape[0:-1] + dataset = ( array.dataset if isinstance(array, self.SkipArrayWriting) @@ -564,8 +569,12 @@ def write_to_openpmd_iteration( # Global feature sizes: feature_global_from = 0 feature_global_to = self.feature_size - if feature_global_to == 0 and isinstance(array, self.SkipArrayWriting): - feature_global_to = array.feature_size + if feature_global_to == 0: + feature_global_to = ( + array.feature_size + if isinstance(array, self.SkipArrayWriting) + else array.shape[-1] + ) # First loop: Only metadata, write metadata equivalently across ranks for current_feature in range(feature_global_from, feature_global_to): diff --git a/mala/datahandling/data_converter.py b/mala/datahandling/data_converter.py index 5a97ec06c..5b22e2293 100644 --- a/mala/datahandling/data_converter.py +++ b/mala/datahandling/data_converter.py @@ -10,8 +10,8 @@ from mala.targets.target import Target from mala.version import __version__ as mala_version -descriptor_input_types = ["espresso-out"] -target_input_types = [".cube", ".xsf"] +descriptor_input_types = ["espresso-out", "openpmd", "numpy"] +target_input_types = [".cube", ".xsf", "openpmd", "numpy"] additional_info_input_types = ["espresso-out"] @@ -547,6 +547,20 @@ def __convert_single_snapshot( ) ) + elif description["input"] == "openpmd": + if self.parameters_full.descriptors.descriptors_contain_xyz: + printout( + "[Warning] parameters.descriptors.descriptors_contain_xyz is True, will be ignored since this mode is unimplemented for openPMD data." + ) + self.descriptor_calculator._feature_mask = lambda: 0 + tmp_input = self.descriptor_calculator.read_from_openpmd_file( + snapshot["input"], units=original_units["input"] + ) + elif description["input"] == "numpy": + tmp_input = self.descriptor_calculator.read_from_numpy_file( + snapshot["input"], units=original_units["input"] + ) + elif description["input"] is None: # In this case, only the output is processed. pass @@ -617,6 +631,17 @@ def __convert_single_snapshot( snapshot["output"], **target_calculator_kwargs ) + elif description["output"] == "openpmd": + tmp_output = self.target_calculator.read_from_openpmd_file( + snapshot["output"], units=original_units["output"] + ) + elif description["output"] == "numpy": + tmp_output = ( + self.target_calculator.read_from_numpy_file( + snapshot["output"], units=original_units["output"] + ) + ) + elif description["output"] is None: # In this case, only the input is processed. pass @@ -657,6 +682,17 @@ def __convert_single_snapshot( snapshot["output"], **target_calculator_kwargs ) + elif description["output"] == "openpmd": + tmp_output = self.target_calculator.read_from_openpmd_file( + snapshot["output"], units=original_units["output"] + ) + elif description["output"] == "numpy": + tmp_output = ( + self.target_calculator.read_from_numpy_file( + snapshot["output"] + ) + ) + elif description["output"] is None: # In this case, only the input is processed. pass diff --git a/test/complete_interfaces_test.py b/test/complete_interfaces_test.py index 8aa7da85d..4ceb691d8 100644 --- a/test/complete_interfaces_test.py +++ b/test/complete_interfaces_test.py @@ -89,6 +89,77 @@ def test_openpmd_io(self): rtol=accuracy_fine, ) + @pytest.mark.skipif( + importlib.util.find_spec("openpmd_api") is None, + reason="No OpenPMD found on this machine, skipping " "test.", + ) + def test_convert_numpy_openpmd(self): + parameters = mala.Parameters() + parameters.descriptors.descriptors_contain_xyz = False + + data_converter = mala.DataConverter(parameters) + for snapshot in range(2): + data_converter.add_snapshot( + descriptor_input_type="numpy", + descriptor_input_path=os.path.join( + data_path, "Be_snapshot{}.in.npy".format(snapshot) + ), + target_input_type="numpy", + target_input_path=os.path.join( + data_path, "Be_snapshot{}.out.npy".format(snapshot) + ), + additional_info_input_type=None, + additional_info_input_path=None, + target_units=None, + ) + + data_converter.convert_snapshots( + descriptor_save_path="./", + target_save_path="./", + additional_info_save_path="./", + naming_scheme="converted_from_numpy_*.bp5", + descriptor_calculation_kwargs={"working_directory": "./"}, + ) + + # Convert those files back to Numpy to verify the data stays the same. + + data_converter = mala.DataConverter(parameters) + + for snapshot in range(2): + data_converter.add_snapshot( + descriptor_input_type="openpmd", + descriptor_input_path="converted_from_numpy_{}.in.bp5".format( + snapshot + ), + target_input_type="openpmd", + target_input_path="converted_from_numpy_{}.out.bp5".format(snapshot), + additional_info_input_type=None, + additional_info_input_path=None, + target_units=None, + ) + + data_converter.convert_snapshots( + descriptor_save_path="./", + target_save_path="./", + additional_info_save_path="./", + naming_scheme="verify_against_original_numpy_data_*.npy", + descriptor_calculation_kwargs={"working_directory": "./"}, + ) + + for snapshot in range(2): + for i_o in ["in", "out"]: + original = os.path.join( + data_path, "Be_snapshot{}.{}.npy".format(snapshot, i_o) + ) + roundtrip = "verify_against_original_numpy_data_{}.{}.npy".format( + snapshot, i_o + ) + import numpy as np + + original_a = np.load(original) + roundtrip_a = np.load(roundtrip) + np.testing.assert_allclose(original_a, roundtrip_a) + @pytest.mark.skipif( importlib.util.find_spec("total_energy") is None or importlib.util.find_spec("lammps") is None, diff --git a/test/examples_test.py b/test/examples_test.py index 4a83dd538..8834ad8b7 100644 --- a/test/examples_test.py +++ b/test/examples_test.py @@ -95,6 +95,14 @@ def test_advanced_ex06(self, tmp_path): + "/../examples/advanced/ex06_distributed_hyperparameter_optimization.py" ) + @pytest.mark.order(after="test_basic_ex01") + def test_advanced_ex09(self, tmp_path): + os.chdir(tmp_path) + runpy.run_path( + self.dir_path + + "/../examples/advanced/ex10_convert_numpy_openpmd.py" + ) + @pytest.mark.skipif( importlib.util.find_spec("oapackage") is None, reason="No OAT found on this machine, skipping this " "test.",