From 30d74e0929bc9a205611df9ff87b05c94ea5145b Mon Sep 17 00:00:00 2001 From: RandomDefaultUser Date: Thu, 21 Nov 2024 16:42:48 +0000 Subject: [PATCH] deploy: a402f7905d3a8cd1347be5887a21e466087bc6bb --- _modules/mala/datahandling/data_shuffler.html | 111 ++++++++---------- objects.inv | Bin 6356 -> 6356 bytes 2 files changed, 52 insertions(+), 59 deletions(-) diff --git a/_modules/mala/datahandling/data_shuffler.html b/_modules/mala/datahandling/data_shuffler.html index af72693d..aafdbcb4 100644 --- a/_modules/mala/datahandling/data_shuffler.html +++ b/_modules/mala/datahandling/data_shuffler.html @@ -222,7 +222,10 @@

Source code for mala.datahandling.data_shuffler

< # if the number of new snapshots is not a divisor of the grid size # then we have to trim the original snapshots to size # the indicies to be removed are selected at random - if self.data_points_to_remove is not None: + if ( + self.data_points_to_remove is not None + and np.sum(self.data_points_to_remove) > 0 + ): if self.parameters.shuffling_seed is not None: np.random.seed(idx * self.parameters.shuffling_seed) ngrid = ( @@ -638,74 +641,64 @@

Source code for mala.datahandling.data_shuffler

< self.data_points_to_remove = None if number_of_shuffled_snapshots is None: number_of_shuffled_snapshots = self.nr_snapshots - number_of_new_snapshots = number_of_shuffled_snapshots - - if snapshot_type == "openpmd": - import math - import functools - specified_number_of_new_snapshots = number_of_new_snapshots - number_of_new_snapshots = functools.reduce( - math.gcd, + # Currently, the openPMD interface is not feature-complete. + if snapshot_type == "openpmd" and np.any( + np.array( [ - snapshot.grid_dimension[0] + snapshot.grid_dimension[0] % number_of_shuffled_snapshots for snapshot in self.parameters.snapshot_directories_list - ], - number_of_new_snapshots, + ] + ) + != 0 + ): + raise ValueError( + "Shuffling from OpenPMD files currently only " + "supported if first dimension of all snapshots " + "can evenly be divided by number of snapshots. " + "Please select a different number of shuffled " + "snapshots or use the numpy interface. " ) - if number_of_new_snapshots != specified_number_of_new_snapshots: - print( - f"[openPMD shuffling] Reduced the number of output snapshots to " - f"{number_of_new_snapshots} because of the dataset dimensions." - ) - del specified_number_of_new_snapshots - elif snapshot_type == "numpy": - # Implement all of the below for OpenPMD later. - # We need to check if we need to reduce the overall grid size - # because the individual snapshots may not contain enough data - # points - shuffled_gridsizes = snapshot_size_list // number_of_new_snapshots - - if np.any( - np.array(snapshot_size_list) - - ( - (np.array(snapshot_size_list) // number_of_new_snapshots) - * number_of_new_snapshots - ) - > 0 - ): - number_of_data_points = int( - np.sum(shuffled_gridsizes) * number_of_new_snapshots - ) - self.data_points_to_remove = [] - for i in range(0, self.nr_snapshots): - self.data_points_to_remove.append( - snapshot_size_list[i] - - shuffled_gridsizes[i] * number_of_new_snapshots - ) - tot_points_missing = sum(self.data_points_to_remove) - - if tot_points_missing > 0: - printout( - "Warning: number of requested snapshots is not a divisor of", - "the original grid sizes.\n", - f"{tot_points_missing} / {number_of_data_points} data points", - "will be left out of the shuffled snapshots.", - ) + shuffled_gridsizes = snapshot_size_list // number_of_shuffled_snapshots - else: - raise Exception("Invalid snapshot type.") + if np.any( + np.array(snapshot_size_list) + - ( + (np.array(snapshot_size_list) // number_of_shuffled_snapshots) + * number_of_shuffled_snapshots + ) + > 0 + ): + number_of_data_points = int( + np.sum(shuffled_gridsizes) * number_of_shuffled_snapshots + ) + + self.data_points_to_remove = [] + for i in range(0, self.nr_snapshots): + self.data_points_to_remove.append( + snapshot_size_list[i] + - shuffled_gridsizes[i] * number_of_shuffled_snapshots + ) + tot_points_missing = sum(self.data_points_to_remove) + + if tot_points_missing > 0: + printout( + "Warning: number of requested snapshots is not a divisor of", + "the original grid sizes.\n", + f"{tot_points_missing} / {number_of_data_points} data points", + "will be left out of the shuffled snapshots.", + ) shuffle_dimensions = [ - int(number_of_data_points / number_of_new_snapshots), + int(number_of_data_points / number_of_shuffled_snapshots), 1, 1, ] printout( "Data shuffler will generate", - number_of_new_snapshots, + number_of_shuffled_snapshots, "new snapshots.", ) printout("Shuffled snapshot dimension will be ", shuffle_dimensions) @@ -713,7 +706,7 @@

Source code for mala.datahandling.data_shuffler

< # Prepare permutations. permutations = [] seeds = [] - for i in range(0, number_of_new_snapshots): + for i in range(0, number_of_shuffled_snapshots): # This makes the shuffling deterministic, if specified by the user. if self.parameters.shuffling_seed is not None: np.random.seed(i * self.parameters.shuffling_seed) @@ -723,7 +716,7 @@

Source code for mala.datahandling.data_shuffler

< if snapshot_type == "numpy": self.__shuffle_numpy( - number_of_new_snapshots, + number_of_shuffled_snapshots, shuffle_dimensions, descriptor_save_path, save_name, @@ -742,7 +735,7 @@

Source code for mala.datahandling.data_shuffler

< ) self.__shuffle_openpmd( descriptor, - number_of_new_snapshots, + number_of_shuffled_snapshots, shuffle_dimensions, save_name, permutations, @@ -758,7 +751,7 @@

Source code for mala.datahandling.data_shuffler

< ) self.__shuffle_openpmd( target, - number_of_new_snapshots, + number_of_shuffled_snapshots, shuffle_dimensions, save_name, permutations, diff --git a/objects.inv b/objects.inv index eaaf5e23c2dd7124c84de0826df42b489d8eeb20..dc991bf1e7a5e98ca18e2bd6a45affa75fbad0f0 100644 GIT binary patch delta 22 dcmca&c*Sr+BDbY(dZLMeQJT4>!N!~n2>@Ln2W|iW delta 22 dcmca&c*Sr+BDbk-x`CmwkwIEw>c*T52>@It2af;%