Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement on-the-fly descriptor calculation #630

Draft
wants to merge 17 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/source/basic_usage/more_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,16 @@ and fill it with data, e.g., by
descriptor_input_path=outfile,
target_input_type=".cube",
target_input_path=ldosfile,
additional_info_input_type="espresso-out",
additional_info_input_path=outfile,
simulation_output_type="espresso-out",
simulation_output_path=outfile,
target_units="1/(Ry*Bohr^3)")

The ``add_snapshot`` function can be called multiple times to add
multiple snapshots to MALA.
For regular Quantum ESPRESSO calculations, the ``descriptor_input_type``
and ``target_input_type`` will always be ``"espresso-out"`` and ``".cube"``,
respectively, and the ``target_units`` will always be ``"1/(Ry*Bohr^3)"``.
The paths have to be modified accordingly. ``additional_info_input_*`` refers
The paths have to be modified accordingly. ``simulation_output_*`` refers
to the calculation output file - MALA provides an interface to condense
the entire, verbose simulation output to ``.json`` files for further
processing. In the preceding section, we had to specify calculation output
Expand All @@ -121,7 +121,7 @@ Once data is provided, the conversion itself is simple.

data_converter.convert_snapshots(descriptor_save_path="./",
target_save_path="./",
additional_info_save_path="./",
simulation_output_save_path="./",
naming_scheme="Be_snapshot*.npy",
descriptor_calculation_kwargs=
{"working_directory": data_path})
Expand Down
18 changes: 9 additions & 9 deletions examples/advanced/ex10_convert_numpy_openpmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@
target_input_path=os.path.join(
data_path, "Be_snapshot{}.out.npy".format(snapshot)
),
additional_info_input_type=None,
additional_info_input_path=None,
simulation_output_type=None,
simulation_output_path=None,
target_units=None,
)

data_converter.convert_snapshots(
descriptor_save_path="./",
target_save_path="./",
additional_info_save_path="./",
simulation_output_save_path="./",
naming_scheme="converted_from_numpy_*.h5",
descriptor_calculation_kwargs={"working_directory": "./"},
)
Expand All @@ -43,15 +43,15 @@
descriptor_input_path="converted_from_numpy_{}.in.h5".format(snapshot),
target_input_type="openpmd",
target_input_path="converted_from_numpy_{}.out.h5".format(snapshot),
additional_info_input_type=None,
additional_info_input_path=None,
simulation_output_type=None,
simulation_output_path=None,
target_units=None,
)

data_converter.convert_snapshots(
descriptor_save_path="./",
target_save_path="./",
additional_info_save_path="./",
simulation_output_save_path="./",
naming_scheme="verify_against_original_numpy_data_*.npy",
descriptor_calculation_kwargs={"working_directory": "./"},
)
Expand Down Expand Up @@ -84,15 +84,15 @@
target_input_path=os.path.join(
data_path, "Be_snapshot{}.out.h5".format(snapshot)
),
additional_info_input_type=None,
additional_info_input_path=None,
simulation_output_type=None,
simulation_output_path=None,
target_units=None,
)

data_converter.convert_snapshots(
descriptor_save_path="./",
target_save_path="./",
additional_info_save_path="./",
simulation_output_save_path="./",
naming_scheme="converted_from_openpmd_*.npy",
descriptor_calculation_kwargs={"working_directory": "./"},
)
10 changes: 5 additions & 5 deletions examples/basic/ex03_preprocess_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@
# Data conversion itself is simple. We select input and output data
# to be converted, add this data snapshot-wise and tell MALA to
# convert snapshots. Inputs and outputs can be processed individually.
# Further, via the additional_info_input_* keywords, calculation output
# Further, via the simulation_output_input_* keywords, calculation output
# can be processed from the original simulation *.out output files into
# more convenient *.json files that can be used in their stead. This saves
# on disk space.
# To only process parts of the data, omit/add descriptor_input*, target_input_*
# and additional_info_input_* at your leisure.
# and simulation_output_* at your leisure.
# Make sure to set the correct units - for QE, this should always be
# 1/(Ry*Bohr^3).
####################
Expand All @@ -65,8 +65,8 @@
descriptor_input_path=outfile,
target_input_type=".cube",
target_input_path=ldosfile,
additional_info_input_type="espresso-out",
additional_info_input_path=outfile,
simulation_output_type="espresso-out",
simulation_output_path=outfile,
target_units="1/(Ry*Bohr^3)",
)

Expand All @@ -84,7 +84,7 @@
data_converter.convert_snapshots(
descriptor_save_path="./",
target_save_path="./",
additional_info_save_path="./",
simulation_output_save_path="./",
naming_scheme="Be_snapshot*.npy",
descriptor_calculation_kwargs={"working_directory": data_path},
)
Expand Down
64 changes: 32 additions & 32 deletions mala/datahandling/data_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

descriptor_input_types = ["espresso-out", "openpmd", "numpy"]
target_input_types = [".cube", ".xsf", "openpmd", "numpy"]
additional_info_input_types = ["espresso-out"]
simulation_output_types = ["espresso-out"]


class DataConverter:
Expand Down Expand Up @@ -78,16 +78,16 @@ def __init__(
# Keep track of what has to be done by this data converter.
self.__process_descriptors = False
self.__process_targets = False
self.__process_additional_info = False
self.__process_simulation_output = False

def add_snapshot(
self,
descriptor_input_type=None,
descriptor_input_path=None,
target_input_type=None,
target_input_path=None,
additional_info_input_type=None,
additional_info_input_path=None,
simulation_output_type=None,
simulation_output_path=None,
descriptor_units=None,
metadata_input_type=None,
metadata_input_path=None,
Expand All @@ -114,22 +114,22 @@ def add_snapshot(
target_input_path : string
Path of target data to be processed.

additional_info_input_type : string
simulation_output_type : string
Type of additional info data to be processed.
See mala.datahandling.data_converter.additional_info_input_types
See mala.datahandling.data_converter.simulation_output_types
for options.

additional_info_input_path : string
simulation_output_path : string
Path of additional info data to be processed.

metadata_input_type : string
Type of additional metadata to be processed.
See mala.datahandling.data_converter.additional_info_input_types
See mala.datahandling.data_converter.simulation_output_types
for options.
This is essentially the same as additional_info_input_type,
This is essentially the same as simulation_output_type,
but will not affect saving; i.e., the data given here will
only be saved in OpenPMD files, not saved separately.
If additional_info_input_type is set, this argument will be
If simulation_output_type is set, this argument will be
ignored.

metadata_input_path : string
Expand Down Expand Up @@ -161,28 +161,28 @@ def add_snapshot(
raise Exception("Cannot process this type of target data.")
self.__process_targets = True

if additional_info_input_type is not None:
metadata_input_type = additional_info_input_type
if additional_info_input_path is None:
if simulation_output_type is not None:
metadata_input_type = simulation_output_type
if simulation_output_path is None:
raise Exception(
"Cannot process additional info data with "
"no path given."
)
if additional_info_input_type not in additional_info_input_types:
if simulation_output_type not in simulation_output_types:
raise Exception(
"Cannot process this type of additional info data."
)
self.__process_additional_info = True
self.__process_simulation_output = True

metadata_input_path = additional_info_input_path
metadata_input_path = simulation_output_path

if metadata_input_type is not None:
if metadata_input_path is None:
raise Exception(
"Cannot process additional info data with "
"no path given."
)
if metadata_input_type not in additional_info_input_types:
if metadata_input_type not in simulation_output_types:
raise Exception(
"Cannot process this type of additional info data."
)
Expand All @@ -192,15 +192,15 @@ def add_snapshot(
{
"input": descriptor_input_path,
"output": target_input_path,
"additional_info": additional_info_input_path,
"simulation_output": simulation_output_path,
"metadata": metadata_input_path,
}
)
self.__snapshot_description.append(
{
"input": descriptor_input_type,
"output": target_input_type,
"additional_info": additional_info_input_type,
"simulation_output": simulation_output_type,
"metadata": metadata_input_type,
}
)
Expand All @@ -213,7 +213,7 @@ def convert_snapshots(
complete_save_path=None,
descriptor_save_path=None,
target_save_path=None,
additional_info_save_path=None,
simulation_output_save_path=None,
naming_scheme="ELEM_snapshot*.npy",
starts_at=0,
file_based_communication=False,
Expand All @@ -231,15 +231,15 @@ def convert_snapshots(
complete_save_path : string
If not None: the directory in which all snapshots will be saved.
Overwrites descriptor_save_path, target_save_path and
additional_info_save_path if set.
simulation_output_save_path if set.

descriptor_save_path : string
Directory in which to save descriptor data.

target_save_path : string
Directory in which to save target data.

additional_info_save_path : string
simulation_output_save_path : string
Directory in which to save additional info data.

naming_scheme : string
Expand Down Expand Up @@ -304,7 +304,7 @@ def convert_snapshots(
if complete_save_path is not None:
descriptor_save_path = complete_save_path
target_save_path = complete_save_path
additional_info_save_path = complete_save_path
simulation_output_save_path = complete_save_path
else:
if self.__process_targets is True and target_save_path is None:
raise Exception(
Expand All @@ -318,8 +318,8 @@ def convert_snapshots(
"No descriptor path specified, cannot process data."
)
if (
self.__process_additional_info is True
and additional_info_save_path is None
self.__process_simulation_output is True
and simulation_output_save_path is None
):
raise Exception(
"No additional info path specified, cannot "
Expand Down Expand Up @@ -393,9 +393,9 @@ def convert_snapshots(
snapshot_name = snapshot_name.replace("*", str(snapshot_number))

# Create the paths as needed.
if self.__process_additional_info:
if self.__process_simulation_output:
info_path = os.path.join(
additional_info_save_path, snapshot_name + ".info.json"
simulation_output_save_path, snapshot_name + ".info.json"
)
else:
info_path = None
Expand Down Expand Up @@ -454,7 +454,7 @@ def convert_snapshots(
use_memmap=memmap,
input_iteration=input_iteration,
output_iteration=output_iteration,
additional_info_path=info_path,
simulation_output_info_path=info_path,
use_fp64=use_fp64,
)

Expand All @@ -479,7 +479,7 @@ def __convert_single_snapshot(
target_calculator_kwargs,
input_path=None,
output_path=None,
additional_info_path=None,
simulation_output_info_path=None,
use_memmap=None,
output_iteration=None,
input_iteration=None,
Expand Down Expand Up @@ -719,11 +719,11 @@ def __convert_single_snapshot(
del tmp_output

# Parse and/or calculate the additional info.
if description["additional_info"] is not None:
if description["simulation_output"] is not None:
# Parsing and saving is done using the target calculator.
self.target_calculator.read_additional_calculation_data(
snapshot["additional_info"], description["additional_info"]
snapshot["simulation_output"], description["simulation_output"]
)
self.target_calculator.write_additional_calculation_data(
additional_info_path
simulation_output_info_path
)
Loading