Merge pull request #518 from mala-project/multiple_lammps_runs

Timestamping LAMMPS temporary files
mala-project · May 31, 2024 · 035dd2a · 035dd2a
2 parents a9c219e + fe911a2
commit 035dd2a
Show file tree

Hide file tree

Showing 5 changed files with 128 additions and 56 deletions.
diff --git a/mala/descriptors/atomic_density.py b/mala/descriptors/atomic_density.py
@@ -134,10 +134,15 @@ def __calculate_lammps(self, outdir, **kwargs):
 
         use_fp64 = kwargs.get("use_fp64", False)
         return_directly = kwargs.get("return_directly", False)
+        keep_logs = kwargs.get("keep_logs", False)
 
         lammps_format = "lammps-data"
-        ase_out_path = os.path.join(outdir, "lammps_input.tmp")
-        ase.io.write(ase_out_path, self.atoms, format=lammps_format)
+        self.lammps_temporary_input = os.path.join(
+            outdir, "lammps_input_" + self.calculation_timestamp + ".tmp"
+        )
+        ase.io.write(
+            self.lammps_temporary_input, self.atoms, format=lammps_format
+        )
 
         nx = self.grid_dimensions[0]
         ny = self.grid_dimensions[1]
@@ -151,30 +156,35 @@ def __calculate_lammps(self, outdir, **kwargs):
             )
 
         # Create LAMMPS instance.
-        lammps_dict = {}
-        lammps_dict["sigma"] = self.parameters.atomic_density_sigma
-        lammps_dict["rcutfac"] = self.parameters.atomic_density_cutoff
-        lammps_dict["atom_config_fname"] = ase_out_path
-        lmp = self._setup_lammps(
-            nx,
-            ny,
-            nz,
+        lammps_dict = {
+            "sigma": self.parameters.atomic_density_sigma,
+            "rcutfac": self.parameters.atomic_density_cutoff,
+        }
+        self.lammps_temporary_log = os.path.join(
             outdir,
-            lammps_dict,
-            log_file_name="lammps_ggrid_log.tmp",
+            "lammps_ggrid_log_" + self.calculation_timestamp + ".tmp",
         )
+        lmp = self._setup_lammps(nx, ny, nz, lammps_dict)
 
         # For now the file is chosen automatically, because this is used
         # mostly under the hood anyway.
         filepath = __file__.split("atomic_density")[0]
         if self.parameters._configuration["mpi"]:
             if self.parameters.use_z_splitting:
-                runfile = os.path.join(filepath, "in.ggrid.python")
+                self.parameters.lammps_compute_file = os.path.join(
+                    filepath, "in.ggrid.python"
+                )
             else:
-                runfile = os.path.join(filepath, "in.ggrid_defaultproc.python")
+                self.parameters.lammps_compute_file = os.path.join(
+                    filepath, "in.ggrid_defaultproc.python"
+                )
         else:
-            runfile = os.path.join(filepath, "in.ggrid_defaultproc.python")
-        lmp.file(runfile)
+            self.parameters.lammps_compute_file = os.path.join(
+                filepath, "in.ggrid_defaultproc.python"
+            )
+
+        # Do the LAMMPS calculation and clean up.
+        lmp.file(self.parameters.lammps_compute_file)
 
         # Extract the data.
         nrows_ggrid = extract_compute_np(
@@ -198,7 +208,7 @@ def __calculate_lammps(self, outdir, **kwargs):
             array_shape=(nrows_ggrid, ncols_ggrid),
             use_fp64=use_fp64,
         )
-        lmp.close()
+        self._clean_calculation(lmp, keep_logs)
 
         # In comparison to SNAP, the atomic density always returns
         # in the "local mode". Thus we have to make some slight adjustments

diff --git a/mala/descriptors/bispectrum.py b/mala/descriptors/bispectrum.py
@@ -138,10 +138,15 @@ def __calculate_lammps(self, outdir, **kwargs):
         from lammps import constants as lammps_constants
 
         use_fp64 = kwargs.get("use_fp64", False)
+        keep_logs = kwargs.get("keep_logs", False)
 
         lammps_format = "lammps-data"
-        ase_out_path = os.path.join(outdir, "lammps_input.tmp")
-        ase.io.write(ase_out_path, self.atoms, format=lammps_format)
+        self.lammps_temporary_input = os.path.join(
+            outdir, "lammps_input_" + self.calculation_timestamp + ".tmp"
+        )
+        ase.io.write(
+            self.lammps_temporary_input, self.atoms, format=lammps_format
+        )
 
         nx = self.grid_dimensions[0]
         ny = self.grid_dimensions[1]
@@ -151,16 +156,13 @@ def __calculate_lammps(self, outdir, **kwargs):
         lammps_dict = {
             "twojmax": self.parameters.bispectrum_twojmax,
             "rcutfac": self.parameters.bispectrum_cutoff,
-            "atom_config_fname": ase_out_path,
         }
-        lmp = self._setup_lammps(
-            nx,
-            ny,
-            nz,
+
+        self.lammps_temporary_log = os.path.join(
             outdir,
-            lammps_dict,
-            log_file_name="lammps_bgrid_log.tmp",
+            "lammps_bgrid_log_" + self.calculation_timestamp + ".tmp",
         )
+        lmp = self._setup_lammps(nx, ny, nz, lammps_dict)
 
         # An empty string means that the user wants to use the standard input.
         # What that is differs depending on serial/parallel execution.
@@ -180,7 +182,7 @@ def __calculate_lammps(self, outdir, **kwargs):
                     filepath, "in.bgrid.python"
                 )
 
-        # Do the LAMMPS calculation.
+        # Do the LAMMPS calculation and clean up.
         lmp.file(self.parameters.lammps_compute_file)
 
         # Set things not accessible from LAMMPS
@@ -225,7 +227,7 @@ def __calculate_lammps(self, outdir, **kwargs):
                 array_shape=(nrows_local, ncols_local),
                 use_fp64=use_fp64,
             )
-            lmp.close()
+            self._clean_calculation(lmp, keep_logs)
 
             # Copy the grid dimensions only at the end.
             self.grid_dimensions = [nx, ny, nz]
@@ -241,7 +243,7 @@ def __calculate_lammps(self, outdir, **kwargs):
                 (nz, ny, nx, self.fingerprint_length),
                 use_fp64=use_fp64,
             )
-            lmp.close()
+            self._clean_calculation(lmp, keep_logs)
 
             # switch from x-fastest to z-fastest order (swaps 0th and 2nd
             # dimension)

diff --git a/mala/descriptors/descriptor.py b/mala/descriptors/descriptor.py
@@ -1,6 +1,8 @@
 """Base class for all descriptor calculators."""
 
 from abc import abstractmethod
+from datetime import datetime
+from functools import cached_property
 import os
 
 import ase
@@ -122,6 +124,12 @@ def __init__(self, parameters):
         self.atoms = None
         self.voxel = None
 
+        # If we ever have NON LAMMPS descriptors, these parameters have no
+        # meaning anymore and should probably be moved to an intermediate
+        # DescriptorsLAMMPS class, from which the LAMMPS descriptors inherit.
+        self.lammps_temporary_input = None
+        self.lammps_temporary_log = None
+
     ##############################
     # Properties
     ##############################
@@ -155,6 +163,26 @@ def descriptors_contain_xyz(self):
     def descriptors_contain_xyz(self, value):
         self.parameters.descriptors_contain_xyz = value
 
+    @cached_property
+    def calculation_timestamp(self):
+        """
+        Timestamp of calculation start.
+
+        Used to distinguish multiple LAMMPS runs performed in the same
+        directory. Since the interface is file based, this timestamp prevents
+        problems with slightly
+        """
+        if get_rank() == 0:
+            timestamp = datetime.timestamp(datetime.utcnow())
+        else:
+            timestamp = None
+
+        if self.parameters._configuration["mpi"]:
+            timestamp = get_comm().bcast(timestamp, root=0)
+        return datetime.fromtimestamp(timestamp).strftime("%F-%H-%M-%S-%f")[
+            :-3
+        ]
+
     ##############################
     # Methods
     ##############################
@@ -273,6 +301,17 @@ def calculate_from_qe_out(
             Usually the local directory should suffice, given that there
             are no multiple instances running in the same directory.
 
+        kwargs : dict
+            A collection of keyword arguments, that are mainly used for
+            debugging and development. Different types of descriptors
+            may support different keyword arguments. Commonly supported
+            are
+
+            - "use_fp64": To use enforce floating point 64 precision for
+              descriptors.
+            - "keep_logs": To not delete temporary files created during
+              LAMMPS calculation of descriptors.
+
         Returns
         -------
         descriptors : numpy.array
@@ -334,6 +373,17 @@ def calculate_from_atoms(
             Usually the local directory should suffice, given that there
             are no multiple instances running in the same directory.
 
+        kwargs : dict
+            A collection of keyword arguments, that are mainly used for
+            debugging and development. Different types of descriptors
+            may support different keyword arguments. Commonly supported
+            are
+
+            - "use_fp64": To use enforce floating point 64 precision for
+              descriptors.
+            - "keep_logs": To not delete temporary files created during
+              LAMMPS calculation of descriptors.
+
         Returns
         -------
         descriptors : numpy.array
@@ -542,30 +592,22 @@ def _feature_mask(self):
         else:
             return 0
 
-    def _setup_lammps(
-        self, nx, ny, nz, outdir, lammps_dict, log_file_name="lammps_log.tmp"
-    ):
+    def _setup_lammps(self, nx, ny, nz, lammps_dict):
         """
         Set up the lammps processor grid.
 
         Takes into account y/z-splitting.
         """
         from lammps import lammps
 
-        parallel_warn(
-            "Using LAMMPS for descriptor calculation. "
-            "Do not initialize more than one pre-processing "
-            "calculation in the same directory at the same time. "
-            "Data may be over-written."
-        )
-
         # Build LAMMPS arguments from the data we read.
         lmp_cmdargs = [
             "-screen",
             "none",
             "-log",
-            os.path.join(outdir, log_file_name),
+            self.lammps_temporary_log,
         ]
+        lammps_dict["atom_config_fname"] = self.lammps_temporary_input
 
         if self.parameters._configuration["mpi"]:
             size = get_size()
@@ -778,6 +820,17 @@ def _setup_lammps(
 
         return lmp
 
+    def _clean_calculation(self, lmp, keep_logs):
+        lmp.close()
+        if not keep_logs:
+            if get_rank() == 0:
+                os.remove(self.lammps_temporary_log)
+                os.remove(self.lammps_temporary_input)
+
+        # Reset timestamp for potential next calculation using same LAMMPS
+        # object.
+        del self.calculation_timestamp
+
     def _setup_atom_list(self):
         """
         Set up a list of atoms potentially relevant for descriptor calculation.

diff --git a/mala/descriptors/minterpy_descriptors.py b/mala/descriptors/minterpy_descriptors.py
@@ -91,6 +91,8 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs):
         # general LAMMPS import.
         from lammps import constants as lammps_constants
 
+        keep_logs = kwargs.get("keep_logs", False)
+
         nx = grid_dimensions[0]
         ny = grid_dimensions[1]
         nz = grid_dimensions[2]
@@ -161,22 +163,23 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs):
 
             # The rest is the stanfard LAMMPS atomic density stuff.
             lammps_format = "lammps-data"
-            ase_out_path = os.path.join(outdir, "lammps_input.tmp")
-            ase.io.write(ase_out_path, atoms_copied, format=lammps_format)
+            self.lammps_temporary_input = os.path.join(
+                outdir, "lammps_input_" + self.calculation_timestamp + ".tmp"
+            )
+            ase.io.write(
+                self.lammps_temporary_input, self.atoms, format=lammps_format
+            )
 
             # Create LAMMPS instance.
-            lammps_dict = {}
-            lammps_dict["sigma"] = self.parameters.atomic_density_sigma
-            lammps_dict["rcutfac"] = self.parameters.atomic_density_cutoff
-            lammps_dict["atom_config_fname"] = ase_out_path
-            lmp = self._setup_lammps(
-                nx,
-                ny,
-                nz,
+            lammps_dict = {
+                "sigma": self.parameters.atomic_density_sigma,
+                "rcutfac": self.parameters.atomic_density_cutoff,
+            }
+            self.lammps_temporary_log = os.path.join(
                 outdir,
-                lammps_dict,
-                log_file_name="lammps_mgrid_log.tmp",
+                "lammps_bgrid_log_" + self.calculation_timestamp + ".tmp",
             )
+            lmp = self._setup_lammps(nx, ny, nz, lammps_dict)
 
             # For now the file is chosen automatically, because this is used
             # mostly under the hood anyway.
@@ -191,8 +194,12 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs):
                 # else:
                 #     runfile = os.path.join(filepath, "in.ggrid_defaultproc.python")
             else:
-                runfile = os.path.join(filepath, "in.ggrid_defaultproc.python")
-            lmp.file(runfile)
+                self.parameters.lammps_compute_file = os.path.join(
+                    filepath, "in.ggrid_defaultproc.python"
+                )
+
+            # Do the LAMMPS calculation and clean up.
+            lmp.file(self.parameters.lammps_compute_file)
 
             # Extract the data.
             nrows_ggrid = extract_compute_np(
@@ -216,7 +223,7 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs):
                 array_shape=(nrows_ggrid, ncols_ggrid),
             )
 
-            lmp.close()
+            self._clean_calculation(lmp, keep_logs)
 
             gaussian_descriptors_np = gaussian_descriptors_np.reshape(
                 (

diff --git a/mala/targets/target.py b/mala/targets/target.py
@@ -649,7 +649,7 @@ def get_target(self):
     @abstractmethod
     def invalidate_target(self):
         """
-        Invalidates the saved target wuantity.
+        Invalidates the saved target quantity.
 
         This is the generic interface for cached target quantities.
         It should work for all implemented targets.