From 6d5dcab9b0389d3ce7b161faa91639bd84cd5854 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 21 Feb 2024 18:59:42 +0100 Subject: [PATCH 1/4] Remove hardcoded iteration number from data shuffler --- mala/datahandling/data_shuffler.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mala/datahandling/data_shuffler.py b/mala/datahandling/data_shuffler.py index 935847276..62d6e11a3 100644 --- a/mala/datahandling/data_shuffler.py +++ b/mala/datahandling/data_shuffler.py @@ -131,6 +131,10 @@ def __shuffle_numpy( ) # Do the actual shuffling. + target_name_openpmd = os.path.join(target_save_path, + save_name.replace("*", "%T")) + descriptor_name_openpmd = os.path.join(descriptor_save_path, + save_name.replace("*", "%T")) for i in range(0, number_of_new_snapshots): new_descriptors = np.zeros( (int(np.prod(shuffle_dimensions)), self.input_dimension), @@ -209,7 +213,7 @@ def __shuffle_numpy( shuffle_dimensions ) self.descriptor_calculator.write_to_openpmd_file( - descriptor_name + ".in." + file_ending, + descriptor_name_openpmd + ".in." + file_ending, new_descriptors, additional_attributes={ "global_shuffling_seed": self.parameters.shuffling_seed, @@ -219,7 +223,7 @@ def __shuffle_numpy( internal_iteration_number=i, ) self.target_calculator.write_to_openpmd_file( - target_name + ".out." + file_ending, + target_name_openpmd + ".out." + file_ending, array=new_targets, additional_attributes={ "global_shuffling_seed": self.parameters.shuffling_seed, @@ -359,12 +363,12 @@ def from_chunk_i(i, n, dset, slice_dimension=0): import json # Do the actual shuffling. + name_prefix = os.path.join( + dot.save_path, save_name.replace("*", "%T") + ) for i in range(my_items_start, my_items_end): # We check above that in the non-numpy case, OpenPMD will work. dot.calculator.grid_dimensions = list(shuffle_dimensions) - name_prefix = os.path.join( - dot.save_path, save_name.replace("*", str(i)) - ) # do NOT open with MPI shuffled_snapshot_series = io.Series( name_prefix + dot.name_infix + file_ending, From 976a84ebe2c4d114defe3b8908c84e7f434eb0ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 30 May 2024 10:56:52 +0200 Subject: [PATCH 2/4] Also do this inside mala/common/physical_data.py --- mala/common/physical_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mala/common/physical_data.py b/mala/common/physical_data.py index 26bb12675..e756e96d1 100644 --- a/mala/common/physical_data.py +++ b/mala/common/physical_data.py @@ -418,7 +418,8 @@ def write_to_openpmd_file( import openpmd_api as io if isinstance(path, str): - file_name = os.path.basename(path) + directory, file_name = os.path.split(path) + path = os.path.join(directory, file_name.replace("*", "%T")) file_ending = file_name.split(".")[-1] if file_name == file_ending: path += ".h5" From ea3fbeefe84be3947127c62314450902fb90d37c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 30 May 2024 11:00:12 +0200 Subject: [PATCH 3/4] Use %06T instead of %T, i.e. 6-digit padding --- mala/common/physical_data.py | 2 +- mala/datahandling/data_shuffler.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mala/common/physical_data.py b/mala/common/physical_data.py index e756e96d1..9fa271670 100644 --- a/mala/common/physical_data.py +++ b/mala/common/physical_data.py @@ -419,7 +419,7 @@ def write_to_openpmd_file( if isinstance(path, str): directory, file_name = os.path.split(path) - path = os.path.join(directory, file_name.replace("*", "%T")) + path = os.path.join(directory, file_name.replace("*", "%06T")) file_ending = file_name.split(".")[-1] if file_name == file_ending: path += ".h5" diff --git a/mala/datahandling/data_shuffler.py b/mala/datahandling/data_shuffler.py index 62d6e11a3..6b5e04c61 100644 --- a/mala/datahandling/data_shuffler.py +++ b/mala/datahandling/data_shuffler.py @@ -132,9 +132,9 @@ def __shuffle_numpy( # Do the actual shuffling. target_name_openpmd = os.path.join(target_save_path, - save_name.replace("*", "%T")) + save_name.replace("*", "%06T")) descriptor_name_openpmd = os.path.join(descriptor_save_path, - save_name.replace("*", "%T")) + save_name.replace("*", "%06T")) for i in range(0, number_of_new_snapshots): new_descriptors = np.zeros( (int(np.prod(shuffle_dimensions)), self.input_dimension), @@ -364,7 +364,7 @@ def from_chunk_i(i, n, dset, slice_dimension=0): # Do the actual shuffling. name_prefix = os.path.join( - dot.save_path, save_name.replace("*", "%T") + dot.save_path, save_name.replace("*", "%06T") ) for i in range(my_items_start, my_items_end): # We check above that in the non-numpy case, OpenPMD will work. From 845c0219c46bccc94c8defc95b81016331b34e6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 30 May 2024 11:55:09 +0200 Subject: [PATCH 4/4] Revert "Use %06T instead of %T, i.e. 6-digit padding" This reverts commit ea3fbeefe84be3947127c62314450902fb90d37c. --- mala/common/physical_data.py | 2 +- mala/datahandling/data_shuffler.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mala/common/physical_data.py b/mala/common/physical_data.py index 9fa271670..e756e96d1 100644 --- a/mala/common/physical_data.py +++ b/mala/common/physical_data.py @@ -419,7 +419,7 @@ def write_to_openpmd_file( if isinstance(path, str): directory, file_name = os.path.split(path) - path = os.path.join(directory, file_name.replace("*", "%06T")) + path = os.path.join(directory, file_name.replace("*", "%T")) file_ending = file_name.split(".")[-1] if file_name == file_ending: path += ".h5" diff --git a/mala/datahandling/data_shuffler.py b/mala/datahandling/data_shuffler.py index 6b5e04c61..62d6e11a3 100644 --- a/mala/datahandling/data_shuffler.py +++ b/mala/datahandling/data_shuffler.py @@ -132,9 +132,9 @@ def __shuffle_numpy( # Do the actual shuffling. target_name_openpmd = os.path.join(target_save_path, - save_name.replace("*", "%06T")) + save_name.replace("*", "%T")) descriptor_name_openpmd = os.path.join(descriptor_save_path, - save_name.replace("*", "%06T")) + save_name.replace("*", "%T")) for i in range(0, number_of_new_snapshots): new_descriptors = np.zeros( (int(np.prod(shuffle_dimensions)), self.input_dimension), @@ -364,7 +364,7 @@ def from_chunk_i(i, n, dset, slice_dimension=0): # Do the actual shuffling. name_prefix = os.path.join( - dot.save_path, save_name.replace("*", "%06T") + dot.save_path, save_name.replace("*", "%T") ) for i in range(my_items_start, my_items_end): # We check above that in the non-numpy case, OpenPMD will work.