diff --git a/mala/descriptors/atomic_density.py b/mala/descriptors/atomic_density.py index a81c1d384..cda944b13 100755 --- a/mala/descriptors/atomic_density.py +++ b/mala/descriptors/atomic_density.py @@ -134,10 +134,15 @@ def __calculate_lammps(self, outdir, **kwargs): use_fp64 = kwargs.get("use_fp64", False) return_directly = kwargs.get("return_directly", False) + keep_logs = kwargs.get("keep_logs", False) lammps_format = "lammps-data" - ase_out_path = os.path.join(outdir, "lammps_input.tmp") - ase.io.write(ase_out_path, self.atoms, format=lammps_format) + self.lammps_temporary_input = os.path.join( + outdir, "lammps_input_" + self.calculation_timestamp + ".tmp" + ) + ase.io.write( + self.lammps_temporary_input, self.atoms, format=lammps_format + ) nx = self.grid_dimensions[0] ny = self.grid_dimensions[1] @@ -151,30 +156,35 @@ def __calculate_lammps(self, outdir, **kwargs): ) # Create LAMMPS instance. - lammps_dict = {} - lammps_dict["sigma"] = self.parameters.atomic_density_sigma - lammps_dict["rcutfac"] = self.parameters.atomic_density_cutoff - lammps_dict["atom_config_fname"] = ase_out_path - lmp = self._setup_lammps( - nx, - ny, - nz, + lammps_dict = { + "sigma": self.parameters.atomic_density_sigma, + "rcutfac": self.parameters.atomic_density_cutoff, + } + self.lammps_temporary_log = os.path.join( outdir, - lammps_dict, - log_file_name="lammps_ggrid_log.tmp", + "lammps_ggrid_log_" + self.calculation_timestamp + ".tmp", ) + lmp = self._setup_lammps(nx, ny, nz, lammps_dict) # For now the file is chosen automatically, because this is used # mostly under the hood anyway. filepath = __file__.split("atomic_density")[0] if self.parameters._configuration["mpi"]: if self.parameters.use_z_splitting: - runfile = os.path.join(filepath, "in.ggrid.python") + self.parameters.lammps_compute_file = os.path.join( + filepath, "in.ggrid.python" + ) else: - runfile = os.path.join(filepath, "in.ggrid_defaultproc.python") + self.parameters.lammps_compute_file = os.path.join( + filepath, "in.ggrid_defaultproc.python" + ) else: - runfile = os.path.join(filepath, "in.ggrid_defaultproc.python") - lmp.file(runfile) + self.parameters.lammps_compute_file = os.path.join( + filepath, "in.ggrid_defaultproc.python" + ) + + # Do the LAMMPS calculation and clean up. + lmp.file(self.parameters.lammps_compute_file) # Extract the data. nrows_ggrid = extract_compute_np( @@ -198,7 +208,7 @@ def __calculate_lammps(self, outdir, **kwargs): array_shape=(nrows_ggrid, ncols_ggrid), use_fp64=use_fp64, ) - lmp.close() + self._clean_calculation(lmp, keep_logs) # In comparison to SNAP, the atomic density always returns # in the "local mode". Thus we have to make some slight adjustments diff --git a/mala/descriptors/bispectrum.py b/mala/descriptors/bispectrum.py index 3f75ecc8e..66860b29b 100755 --- a/mala/descriptors/bispectrum.py +++ b/mala/descriptors/bispectrum.py @@ -138,10 +138,15 @@ def __calculate_lammps(self, outdir, **kwargs): from lammps import constants as lammps_constants use_fp64 = kwargs.get("use_fp64", False) + keep_logs = kwargs.get("keep_logs", False) lammps_format = "lammps-data" - ase_out_path = os.path.join(outdir, "lammps_input.tmp") - ase.io.write(ase_out_path, self.atoms, format=lammps_format) + self.lammps_temporary_input = os.path.join( + outdir, "lammps_input_" + self.calculation_timestamp + ".tmp" + ) + ase.io.write( + self.lammps_temporary_input, self.atoms, format=lammps_format + ) nx = self.grid_dimensions[0] ny = self.grid_dimensions[1] @@ -151,16 +156,13 @@ def __calculate_lammps(self, outdir, **kwargs): lammps_dict = { "twojmax": self.parameters.bispectrum_twojmax, "rcutfac": self.parameters.bispectrum_cutoff, - "atom_config_fname": ase_out_path, } - lmp = self._setup_lammps( - nx, - ny, - nz, + + self.lammps_temporary_log = os.path.join( outdir, - lammps_dict, - log_file_name="lammps_bgrid_log.tmp", + "lammps_bgrid_log_" + self.calculation_timestamp + ".tmp", ) + lmp = self._setup_lammps(nx, ny, nz, lammps_dict) # An empty string means that the user wants to use the standard input. # What that is differs depending on serial/parallel execution. @@ -180,7 +182,7 @@ def __calculate_lammps(self, outdir, **kwargs): filepath, "in.bgrid.python" ) - # Do the LAMMPS calculation. + # Do the LAMMPS calculation and clean up. lmp.file(self.parameters.lammps_compute_file) # Set things not accessible from LAMMPS @@ -225,7 +227,7 @@ def __calculate_lammps(self, outdir, **kwargs): array_shape=(nrows_local, ncols_local), use_fp64=use_fp64, ) - lmp.close() + self._clean_calculation(lmp, keep_logs) # Copy the grid dimensions only at the end. self.grid_dimensions = [nx, ny, nz] @@ -241,7 +243,7 @@ def __calculate_lammps(self, outdir, **kwargs): (nz, ny, nx, self.fingerprint_length), use_fp64=use_fp64, ) - lmp.close() + self._clean_calculation(lmp, keep_logs) # switch from x-fastest to z-fastest order (swaps 0th and 2nd # dimension) diff --git a/mala/descriptors/descriptor.py b/mala/descriptors/descriptor.py index 131037ba8..bf74f9ca5 100644 --- a/mala/descriptors/descriptor.py +++ b/mala/descriptors/descriptor.py @@ -1,6 +1,8 @@ """Base class for all descriptor calculators.""" from abc import abstractmethod +from datetime import datetime +from functools import cached_property import os import ase @@ -122,6 +124,12 @@ def __init__(self, parameters): self.atoms = None self.voxel = None + # If we ever have NON LAMMPS descriptors, these parameters have no + # meaning anymore and should probably be moved to an intermediate + # DescriptorsLAMMPS class, from which the LAMMPS descriptors inherit. + self.lammps_temporary_input = None + self.lammps_temporary_log = None + ############################## # Properties ############################## @@ -155,6 +163,26 @@ def descriptors_contain_xyz(self): def descriptors_contain_xyz(self, value): self.parameters.descriptors_contain_xyz = value + @cached_property + def calculation_timestamp(self): + """ + Timestamp of calculation start. + + Used to distinguish multiple LAMMPS runs performed in the same + directory. Since the interface is file based, this timestamp prevents + problems with slightly + """ + if get_rank() == 0: + timestamp = datetime.timestamp(datetime.utcnow()) + else: + timestamp = None + + if self.parameters._configuration["mpi"]: + timestamp = get_comm().bcast(timestamp, root=0) + return datetime.fromtimestamp(timestamp).strftime("%F-%H-%M-%S-%f")[ + :-3 + ] + ############################## # Methods ############################## @@ -273,6 +301,17 @@ def calculate_from_qe_out( Usually the local directory should suffice, given that there are no multiple instances running in the same directory. + kwargs : dict + A collection of keyword arguments, that are mainly used for + debugging and development. Different types of descriptors + may support different keyword arguments. Commonly supported + are + + - "use_fp64": To use enforce floating point 64 precision for + descriptors. + - "keep_logs": To not delete temporary files created during + LAMMPS calculation of descriptors. + Returns ------- descriptors : numpy.array @@ -334,6 +373,17 @@ def calculate_from_atoms( Usually the local directory should suffice, given that there are no multiple instances running in the same directory. + kwargs : dict + A collection of keyword arguments, that are mainly used for + debugging and development. Different types of descriptors + may support different keyword arguments. Commonly supported + are + + - "use_fp64": To use enforce floating point 64 precision for + descriptors. + - "keep_logs": To not delete temporary files created during + LAMMPS calculation of descriptors. + Returns ------- descriptors : numpy.array @@ -542,9 +592,7 @@ def _feature_mask(self): else: return 0 - def _setup_lammps( - self, nx, ny, nz, outdir, lammps_dict, log_file_name="lammps_log.tmp" - ): + def _setup_lammps(self, nx, ny, nz, lammps_dict): """ Set up the lammps processor grid. @@ -552,20 +600,14 @@ def _setup_lammps( """ from lammps import lammps - parallel_warn( - "Using LAMMPS for descriptor calculation. " - "Do not initialize more than one pre-processing " - "calculation in the same directory at the same time. " - "Data may be over-written." - ) - # Build LAMMPS arguments from the data we read. lmp_cmdargs = [ "-screen", "none", "-log", - os.path.join(outdir, log_file_name), + self.lammps_temporary_log, ] + lammps_dict["atom_config_fname"] = self.lammps_temporary_input if self.parameters._configuration["mpi"]: size = get_size() @@ -778,6 +820,17 @@ def _setup_lammps( return lmp + def _clean_calculation(self, lmp, keep_logs): + lmp.close() + if not keep_logs: + if get_rank() == 0: + os.remove(self.lammps_temporary_log) + os.remove(self.lammps_temporary_input) + + # Reset timestamp for potential next calculation using same LAMMPS + # object. + del self.calculation_timestamp + def _setup_atom_list(self): """ Set up a list of atoms potentially relevant for descriptor calculation. diff --git a/mala/descriptors/minterpy_descriptors.py b/mala/descriptors/minterpy_descriptors.py index 3722260c3..2964fb494 100755 --- a/mala/descriptors/minterpy_descriptors.py +++ b/mala/descriptors/minterpy_descriptors.py @@ -91,6 +91,8 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs): # general LAMMPS import. from lammps import constants as lammps_constants + keep_logs = kwargs.get("keep_logs", False) + nx = grid_dimensions[0] ny = grid_dimensions[1] nz = grid_dimensions[2] @@ -161,22 +163,23 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs): # The rest is the stanfard LAMMPS atomic density stuff. lammps_format = "lammps-data" - ase_out_path = os.path.join(outdir, "lammps_input.tmp") - ase.io.write(ase_out_path, atoms_copied, format=lammps_format) + self.lammps_temporary_input = os.path.join( + outdir, "lammps_input_" + self.calculation_timestamp + ".tmp" + ) + ase.io.write( + self.lammps_temporary_input, self.atoms, format=lammps_format + ) # Create LAMMPS instance. - lammps_dict = {} - lammps_dict["sigma"] = self.parameters.atomic_density_sigma - lammps_dict["rcutfac"] = self.parameters.atomic_density_cutoff - lammps_dict["atom_config_fname"] = ase_out_path - lmp = self._setup_lammps( - nx, - ny, - nz, + lammps_dict = { + "sigma": self.parameters.atomic_density_sigma, + "rcutfac": self.parameters.atomic_density_cutoff, + } + self.lammps_temporary_log = os.path.join( outdir, - lammps_dict, - log_file_name="lammps_mgrid_log.tmp", + "lammps_bgrid_log_" + self.calculation_timestamp + ".tmp", ) + lmp = self._setup_lammps(nx, ny, nz, lammps_dict) # For now the file is chosen automatically, because this is used # mostly under the hood anyway. @@ -191,8 +194,12 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs): # else: # runfile = os.path.join(filepath, "in.ggrid_defaultproc.python") else: - runfile = os.path.join(filepath, "in.ggrid_defaultproc.python") - lmp.file(runfile) + self.parameters.lammps_compute_file = os.path.join( + filepath, "in.ggrid_defaultproc.python" + ) + + # Do the LAMMPS calculation and clean up. + lmp.file(self.parameters.lammps_compute_file) # Extract the data. nrows_ggrid = extract_compute_np( @@ -216,7 +223,7 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs): array_shape=(nrows_ggrid, ncols_ggrid), ) - lmp.close() + self._clean_calculation(lmp, keep_logs) gaussian_descriptors_np = gaussian_descriptors_np.reshape( ( diff --git a/mala/targets/target.py b/mala/targets/target.py index 23212470b..4621c6542 100644 --- a/mala/targets/target.py +++ b/mala/targets/target.py @@ -649,7 +649,7 @@ def get_target(self): @abstractmethod def invalidate_target(self): """ - Invalidates the saved target wuantity. + Invalidates the saved target quantity. This is the generic interface for cached target quantities. It should work for all implemented targets.