Skip to content

Commit

Permalink
Merge pull request #518 from mala-project/multiple_lammps_runs
Browse files Browse the repository at this point in the history
Timestamping LAMMPS temporary files
  • Loading branch information
RandomDefaultUser authored May 31, 2024
2 parents a9c219e + fe911a2 commit 035dd2a
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 56 deletions.
44 changes: 27 additions & 17 deletions mala/descriptors/atomic_density.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,15 @@ def __calculate_lammps(self, outdir, **kwargs):

use_fp64 = kwargs.get("use_fp64", False)
return_directly = kwargs.get("return_directly", False)
keep_logs = kwargs.get("keep_logs", False)

lammps_format = "lammps-data"
ase_out_path = os.path.join(outdir, "lammps_input.tmp")
ase.io.write(ase_out_path, self.atoms, format=lammps_format)
self.lammps_temporary_input = os.path.join(
outdir, "lammps_input_" + self.calculation_timestamp + ".tmp"
)
ase.io.write(
self.lammps_temporary_input, self.atoms, format=lammps_format
)

nx = self.grid_dimensions[0]
ny = self.grid_dimensions[1]
Expand All @@ -151,30 +156,35 @@ def __calculate_lammps(self, outdir, **kwargs):
)

# Create LAMMPS instance.
lammps_dict = {}
lammps_dict["sigma"] = self.parameters.atomic_density_sigma
lammps_dict["rcutfac"] = self.parameters.atomic_density_cutoff
lammps_dict["atom_config_fname"] = ase_out_path
lmp = self._setup_lammps(
nx,
ny,
nz,
lammps_dict = {
"sigma": self.parameters.atomic_density_sigma,
"rcutfac": self.parameters.atomic_density_cutoff,
}
self.lammps_temporary_log = os.path.join(
outdir,
lammps_dict,
log_file_name="lammps_ggrid_log.tmp",
"lammps_ggrid_log_" + self.calculation_timestamp + ".tmp",
)
lmp = self._setup_lammps(nx, ny, nz, lammps_dict)

# For now the file is chosen automatically, because this is used
# mostly under the hood anyway.
filepath = __file__.split("atomic_density")[0]
if self.parameters._configuration["mpi"]:
if self.parameters.use_z_splitting:
runfile = os.path.join(filepath, "in.ggrid.python")
self.parameters.lammps_compute_file = os.path.join(
filepath, "in.ggrid.python"
)
else:
runfile = os.path.join(filepath, "in.ggrid_defaultproc.python")
self.parameters.lammps_compute_file = os.path.join(
filepath, "in.ggrid_defaultproc.python"
)
else:
runfile = os.path.join(filepath, "in.ggrid_defaultproc.python")
lmp.file(runfile)
self.parameters.lammps_compute_file = os.path.join(
filepath, "in.ggrid_defaultproc.python"
)

# Do the LAMMPS calculation and clean up.
lmp.file(self.parameters.lammps_compute_file)

# Extract the data.
nrows_ggrid = extract_compute_np(
Expand All @@ -198,7 +208,7 @@ def __calculate_lammps(self, outdir, **kwargs):
array_shape=(nrows_ggrid, ncols_ggrid),
use_fp64=use_fp64,
)
lmp.close()
self._clean_calculation(lmp, keep_logs)

# In comparison to SNAP, the atomic density always returns
# in the "local mode". Thus we have to make some slight adjustments
Expand Down
26 changes: 14 additions & 12 deletions mala/descriptors/bispectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,15 @@ def __calculate_lammps(self, outdir, **kwargs):
from lammps import constants as lammps_constants

use_fp64 = kwargs.get("use_fp64", False)
keep_logs = kwargs.get("keep_logs", False)

lammps_format = "lammps-data"
ase_out_path = os.path.join(outdir, "lammps_input.tmp")
ase.io.write(ase_out_path, self.atoms, format=lammps_format)
self.lammps_temporary_input = os.path.join(
outdir, "lammps_input_" + self.calculation_timestamp + ".tmp"
)
ase.io.write(
self.lammps_temporary_input, self.atoms, format=lammps_format
)

nx = self.grid_dimensions[0]
ny = self.grid_dimensions[1]
Expand All @@ -151,16 +156,13 @@ def __calculate_lammps(self, outdir, **kwargs):
lammps_dict = {
"twojmax": self.parameters.bispectrum_twojmax,
"rcutfac": self.parameters.bispectrum_cutoff,
"atom_config_fname": ase_out_path,
}
lmp = self._setup_lammps(
nx,
ny,
nz,

self.lammps_temporary_log = os.path.join(
outdir,
lammps_dict,
log_file_name="lammps_bgrid_log.tmp",
"lammps_bgrid_log_" + self.calculation_timestamp + ".tmp",
)
lmp = self._setup_lammps(nx, ny, nz, lammps_dict)

# An empty string means that the user wants to use the standard input.
# What that is differs depending on serial/parallel execution.
Expand All @@ -180,7 +182,7 @@ def __calculate_lammps(self, outdir, **kwargs):
filepath, "in.bgrid.python"
)

# Do the LAMMPS calculation.
# Do the LAMMPS calculation and clean up.
lmp.file(self.parameters.lammps_compute_file)

# Set things not accessible from LAMMPS
Expand Down Expand Up @@ -225,7 +227,7 @@ def __calculate_lammps(self, outdir, **kwargs):
array_shape=(nrows_local, ncols_local),
use_fp64=use_fp64,
)
lmp.close()
self._clean_calculation(lmp, keep_logs)

# Copy the grid dimensions only at the end.
self.grid_dimensions = [nx, ny, nz]
Expand All @@ -241,7 +243,7 @@ def __calculate_lammps(self, outdir, **kwargs):
(nz, ny, nx, self.fingerprint_length),
use_fp64=use_fp64,
)
lmp.close()
self._clean_calculation(lmp, keep_logs)

# switch from x-fastest to z-fastest order (swaps 0th and 2nd
# dimension)
Expand Down
75 changes: 64 additions & 11 deletions mala/descriptors/descriptor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Base class for all descriptor calculators."""

from abc import abstractmethod
from datetime import datetime
from functools import cached_property
import os

import ase
Expand Down Expand Up @@ -122,6 +124,12 @@ def __init__(self, parameters):
self.atoms = None
self.voxel = None

# If we ever have NON LAMMPS descriptors, these parameters have no
# meaning anymore and should probably be moved to an intermediate
# DescriptorsLAMMPS class, from which the LAMMPS descriptors inherit.
self.lammps_temporary_input = None
self.lammps_temporary_log = None

##############################
# Properties
##############################
Expand Down Expand Up @@ -155,6 +163,26 @@ def descriptors_contain_xyz(self):
def descriptors_contain_xyz(self, value):
self.parameters.descriptors_contain_xyz = value

@cached_property
def calculation_timestamp(self):
"""
Timestamp of calculation start.
Used to distinguish multiple LAMMPS runs performed in the same
directory. Since the interface is file based, this timestamp prevents
problems with slightly
"""
if get_rank() == 0:
timestamp = datetime.timestamp(datetime.utcnow())
else:
timestamp = None

if self.parameters._configuration["mpi"]:
timestamp = get_comm().bcast(timestamp, root=0)
return datetime.fromtimestamp(timestamp).strftime("%F-%H-%M-%S-%f")[
:-3
]

##############################
# Methods
##############################
Expand Down Expand Up @@ -273,6 +301,17 @@ def calculate_from_qe_out(
Usually the local directory should suffice, given that there
are no multiple instances running in the same directory.
kwargs : dict
A collection of keyword arguments, that are mainly used for
debugging and development. Different types of descriptors
may support different keyword arguments. Commonly supported
are
- "use_fp64": To use enforce floating point 64 precision for
descriptors.
- "keep_logs": To not delete temporary files created during
LAMMPS calculation of descriptors.
Returns
-------
descriptors : numpy.array
Expand Down Expand Up @@ -334,6 +373,17 @@ def calculate_from_atoms(
Usually the local directory should suffice, given that there
are no multiple instances running in the same directory.
kwargs : dict
A collection of keyword arguments, that are mainly used for
debugging and development. Different types of descriptors
may support different keyword arguments. Commonly supported
are
- "use_fp64": To use enforce floating point 64 precision for
descriptors.
- "keep_logs": To not delete temporary files created during
LAMMPS calculation of descriptors.
Returns
-------
descriptors : numpy.array
Expand Down Expand Up @@ -542,30 +592,22 @@ def _feature_mask(self):
else:
return 0

def _setup_lammps(
self, nx, ny, nz, outdir, lammps_dict, log_file_name="lammps_log.tmp"
):
def _setup_lammps(self, nx, ny, nz, lammps_dict):
"""
Set up the lammps processor grid.
Takes into account y/z-splitting.
"""
from lammps import lammps

parallel_warn(
"Using LAMMPS for descriptor calculation. "
"Do not initialize more than one pre-processing "
"calculation in the same directory at the same time. "
"Data may be over-written."
)

# Build LAMMPS arguments from the data we read.
lmp_cmdargs = [
"-screen",
"none",
"-log",
os.path.join(outdir, log_file_name),
self.lammps_temporary_log,
]
lammps_dict["atom_config_fname"] = self.lammps_temporary_input

if self.parameters._configuration["mpi"]:
size = get_size()
Expand Down Expand Up @@ -778,6 +820,17 @@ def _setup_lammps(

return lmp

def _clean_calculation(self, lmp, keep_logs):
lmp.close()
if not keep_logs:
if get_rank() == 0:
os.remove(self.lammps_temporary_log)
os.remove(self.lammps_temporary_input)

# Reset timestamp for potential next calculation using same LAMMPS
# object.
del self.calculation_timestamp

def _setup_atom_list(self):
"""
Set up a list of atoms potentially relevant for descriptor calculation.
Expand Down
37 changes: 22 additions & 15 deletions mala/descriptors/minterpy_descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs):
# general LAMMPS import.
from lammps import constants as lammps_constants

keep_logs = kwargs.get("keep_logs", False)

nx = grid_dimensions[0]
ny = grid_dimensions[1]
nz = grid_dimensions[2]
Expand Down Expand Up @@ -161,22 +163,23 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs):

# The rest is the stanfard LAMMPS atomic density stuff.
lammps_format = "lammps-data"
ase_out_path = os.path.join(outdir, "lammps_input.tmp")
ase.io.write(ase_out_path, atoms_copied, format=lammps_format)
self.lammps_temporary_input = os.path.join(
outdir, "lammps_input_" + self.calculation_timestamp + ".tmp"
)
ase.io.write(
self.lammps_temporary_input, self.atoms, format=lammps_format
)

# Create LAMMPS instance.
lammps_dict = {}
lammps_dict["sigma"] = self.parameters.atomic_density_sigma
lammps_dict["rcutfac"] = self.parameters.atomic_density_cutoff
lammps_dict["atom_config_fname"] = ase_out_path
lmp = self._setup_lammps(
nx,
ny,
nz,
lammps_dict = {
"sigma": self.parameters.atomic_density_sigma,
"rcutfac": self.parameters.atomic_density_cutoff,
}
self.lammps_temporary_log = os.path.join(
outdir,
lammps_dict,
log_file_name="lammps_mgrid_log.tmp",
"lammps_bgrid_log_" + self.calculation_timestamp + ".tmp",
)
lmp = self._setup_lammps(nx, ny, nz, lammps_dict)

# For now the file is chosen automatically, because this is used
# mostly under the hood anyway.
Expand All @@ -191,8 +194,12 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs):
# else:
# runfile = os.path.join(filepath, "in.ggrid_defaultproc.python")
else:
runfile = os.path.join(filepath, "in.ggrid_defaultproc.python")
lmp.file(runfile)
self.parameters.lammps_compute_file = os.path.join(
filepath, "in.ggrid_defaultproc.python"
)

# Do the LAMMPS calculation and clean up.
lmp.file(self.parameters.lammps_compute_file)

# Extract the data.
nrows_ggrid = extract_compute_np(
Expand All @@ -216,7 +223,7 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs):
array_shape=(nrows_ggrid, ncols_ggrid),
)

lmp.close()
self._clean_calculation(lmp, keep_logs)

gaussian_descriptors_np = gaussian_descriptors_np.reshape(
(
Expand Down
2 changes: 1 addition & 1 deletion mala/targets/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ def get_target(self):
@abstractmethod
def invalidate_target(self):
"""
Invalidates the saved target wuantity.
Invalidates the saved target quantity.
This is the generic interface for cached target quantities.
It should work for all implemented targets.
Expand Down

0 comments on commit 035dd2a

Please sign in to comment.