Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Timestamping LAMMPS temporary files #518

Merged
merged 5 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 27 additions & 17 deletions mala/descriptors/atomic_density.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,15 @@ def __calculate_lammps(self, outdir, **kwargs):

use_fp64 = kwargs.get("use_fp64", False)
return_directly = kwargs.get("return_directly", False)
keep_logs = kwargs.get("keep_logs", False)

lammps_format = "lammps-data"
ase_out_path = os.path.join(outdir, "lammps_input.tmp")
ase.io.write(ase_out_path, self.atoms, format=lammps_format)
self.lammps_temporary_input = os.path.join(
outdir, "lammps_input_" + self.calculation_timestamp + ".tmp"
)
ase.io.write(
self.lammps_temporary_input, self.atoms, format=lammps_format
)

nx = self.grid_dimensions[0]
ny = self.grid_dimensions[1]
Expand All @@ -151,30 +156,35 @@ def __calculate_lammps(self, outdir, **kwargs):
)

# Create LAMMPS instance.
lammps_dict = {}
lammps_dict["sigma"] = self.parameters.atomic_density_sigma
lammps_dict["rcutfac"] = self.parameters.atomic_density_cutoff
lammps_dict["atom_config_fname"] = ase_out_path
lmp = self._setup_lammps(
nx,
ny,
nz,
lammps_dict = {
"sigma": self.parameters.atomic_density_sigma,
"rcutfac": self.parameters.atomic_density_cutoff,
}
self.lammps_temporary_log = os.path.join(
outdir,
lammps_dict,
log_file_name="lammps_ggrid_log.tmp",
"lammps_ggrid_log_" + self.calculation_timestamp + ".tmp",
)
lmp = self._setup_lammps(nx, ny, nz, lammps_dict)

# For now the file is chosen automatically, because this is used
# mostly under the hood anyway.
filepath = __file__.split("atomic_density")[0]
if self.parameters._configuration["mpi"]:
if self.parameters.use_z_splitting:
runfile = os.path.join(filepath, "in.ggrid.python")
self.parameters.lammps_compute_file = os.path.join(
filepath, "in.ggrid.python"
)
else:
runfile = os.path.join(filepath, "in.ggrid_defaultproc.python")
self.parameters.lammps_compute_file = os.path.join(
filepath, "in.ggrid_defaultproc.python"
)
else:
runfile = os.path.join(filepath, "in.ggrid_defaultproc.python")
lmp.file(runfile)
self.parameters.lammps_compute_file = os.path.join(
filepath, "in.ggrid_defaultproc.python"
)

# Do the LAMMPS calculation and clean up.
lmp.file(self.parameters.lammps_compute_file)

# Extract the data.
nrows_ggrid = extract_compute_np(
Expand All @@ -198,7 +208,7 @@ def __calculate_lammps(self, outdir, **kwargs):
array_shape=(nrows_ggrid, ncols_ggrid),
use_fp64=use_fp64,
)
lmp.close()
self._clean_calculation(lmp, keep_logs)

# In comparison to SNAP, the atomic density always returns
# in the "local mode". Thus we have to make some slight adjustments
Expand Down
26 changes: 14 additions & 12 deletions mala/descriptors/bispectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,15 @@ def __calculate_lammps(self, outdir, **kwargs):
from lammps import constants as lammps_constants

use_fp64 = kwargs.get("use_fp64", False)
keep_logs = kwargs.get("keep_logs", False)

lammps_format = "lammps-data"
ase_out_path = os.path.join(outdir, "lammps_input.tmp")
ase.io.write(ase_out_path, self.atoms, format=lammps_format)
self.lammps_temporary_input = os.path.join(
outdir, "lammps_input_" + self.calculation_timestamp + ".tmp"
)
ase.io.write(
self.lammps_temporary_input, self.atoms, format=lammps_format
)

nx = self.grid_dimensions[0]
ny = self.grid_dimensions[1]
Expand All @@ -151,16 +156,13 @@ def __calculate_lammps(self, outdir, **kwargs):
lammps_dict = {
"twojmax": self.parameters.bispectrum_twojmax,
"rcutfac": self.parameters.bispectrum_cutoff,
"atom_config_fname": ase_out_path,
}
lmp = self._setup_lammps(
nx,
ny,
nz,

self.lammps_temporary_log = os.path.join(
outdir,
lammps_dict,
log_file_name="lammps_bgrid_log.tmp",
"lammps_bgrid_log_" + self.calculation_timestamp + ".tmp",
)
lmp = self._setup_lammps(nx, ny, nz, lammps_dict)

# An empty string means that the user wants to use the standard input.
# What that is differs depending on serial/parallel execution.
Expand All @@ -180,7 +182,7 @@ def __calculate_lammps(self, outdir, **kwargs):
filepath, "in.bgrid.python"
)

# Do the LAMMPS calculation.
# Do the LAMMPS calculation and clean up.
lmp.file(self.parameters.lammps_compute_file)

# Set things not accessible from LAMMPS
Expand Down Expand Up @@ -225,7 +227,7 @@ def __calculate_lammps(self, outdir, **kwargs):
array_shape=(nrows_local, ncols_local),
use_fp64=use_fp64,
)
lmp.close()
self._clean_calculation(lmp, keep_logs)

# Copy the grid dimensions only at the end.
self.grid_dimensions = [nx, ny, nz]
Expand All @@ -241,7 +243,7 @@ def __calculate_lammps(self, outdir, **kwargs):
(nz, ny, nx, self.fingerprint_length),
use_fp64=use_fp64,
)
lmp.close()
self._clean_calculation(lmp, keep_logs)

# switch from x-fastest to z-fastest order (swaps 0th and 2nd
# dimension)
Expand Down
75 changes: 64 additions & 11 deletions mala/descriptors/descriptor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Base class for all descriptor calculators."""

from abc import abstractmethod
from datetime import datetime
from functools import cached_property
import os

import ase
Expand Down Expand Up @@ -122,6 +124,12 @@ def __init__(self, parameters):
self.atoms = None
self.voxel = None

# If we ever have NON LAMMPS descriptors, these parameters have no
# meaning anymore and should probably be moved to an intermediate
# DescriptorsLAMMPS class, from which the LAMMPS descriptors inherit.
self.lammps_temporary_input = None
self.lammps_temporary_log = None

##############################
# Properties
##############################
Expand Down Expand Up @@ -155,6 +163,26 @@ def descriptors_contain_xyz(self):
def descriptors_contain_xyz(self, value):
self.parameters.descriptors_contain_xyz = value

@cached_property
def calculation_timestamp(self):
"""
Timestamp of calculation start.

Used to distinguish multiple LAMMPS runs performed in the same
directory. Since the interface is file based, this timestamp prevents
problems with slightly
"""
if get_rank() == 0:
timestamp = datetime.timestamp(datetime.utcnow())
else:
timestamp = None

if self.parameters._configuration["mpi"]:
timestamp = get_comm().bcast(timestamp, root=0)
return datetime.fromtimestamp(timestamp).strftime("%F-%H-%M-%S-%f")[
:-3
]

##############################
# Methods
##############################
Expand Down Expand Up @@ -273,6 +301,17 @@ def calculate_from_qe_out(
Usually the local directory should suffice, given that there
are no multiple instances running in the same directory.

kwargs : dict
A collection of keyword arguments, that are mainly used for
debugging and development. Different types of descriptors
may support different keyword arguments. Commonly supported
are

- "use_fp64": To use enforce floating point 64 precision for
descriptors.
- "keep_logs": To not delete temporary files created during
LAMMPS calculation of descriptors.

Returns
-------
descriptors : numpy.array
Expand Down Expand Up @@ -334,6 +373,17 @@ def calculate_from_atoms(
Usually the local directory should suffice, given that there
are no multiple instances running in the same directory.

kwargs : dict
A collection of keyword arguments, that are mainly used for
debugging and development. Different types of descriptors
may support different keyword arguments. Commonly supported
are

- "use_fp64": To use enforce floating point 64 precision for
descriptors.
- "keep_logs": To not delete temporary files created during
LAMMPS calculation of descriptors.

Returns
-------
descriptors : numpy.array
Expand Down Expand Up @@ -542,30 +592,22 @@ def _feature_mask(self):
else:
return 0

def _setup_lammps(
self, nx, ny, nz, outdir, lammps_dict, log_file_name="lammps_log.tmp"
):
def _setup_lammps(self, nx, ny, nz, lammps_dict):
"""
Set up the lammps processor grid.

Takes into account y/z-splitting.
"""
from lammps import lammps

parallel_warn(
"Using LAMMPS for descriptor calculation. "
"Do not initialize more than one pre-processing "
"calculation in the same directory at the same time. "
"Data may be over-written."
)

# Build LAMMPS arguments from the data we read.
lmp_cmdargs = [
"-screen",
"none",
"-log",
os.path.join(outdir, log_file_name),
self.lammps_temporary_log,
]
lammps_dict["atom_config_fname"] = self.lammps_temporary_input

if self.parameters._configuration["mpi"]:
size = get_size()
Expand Down Expand Up @@ -778,6 +820,17 @@ def _setup_lammps(

return lmp

def _clean_calculation(self, lmp, keep_logs):
lmp.close()
if not keep_logs:
if get_rank() == 0:
os.remove(self.lammps_temporary_log)
os.remove(self.lammps_temporary_input)

# Reset timestamp for potential next calculation using same LAMMPS
# object.
del self.calculation_timestamp

def _setup_atom_list(self):
"""
Set up a list of atoms potentially relevant for descriptor calculation.
Expand Down
37 changes: 22 additions & 15 deletions mala/descriptors/minterpy_descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs):
# general LAMMPS import.
from lammps import constants as lammps_constants

keep_logs = kwargs.get("keep_logs", False)

nx = grid_dimensions[0]
ny = grid_dimensions[1]
nz = grid_dimensions[2]
Expand Down Expand Up @@ -161,22 +163,23 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs):

# The rest is the stanfard LAMMPS atomic density stuff.
lammps_format = "lammps-data"
ase_out_path = os.path.join(outdir, "lammps_input.tmp")
ase.io.write(ase_out_path, atoms_copied, format=lammps_format)
self.lammps_temporary_input = os.path.join(
outdir, "lammps_input_" + self.calculation_timestamp + ".tmp"
)
ase.io.write(
self.lammps_temporary_input, self.atoms, format=lammps_format
)

# Create LAMMPS instance.
lammps_dict = {}
lammps_dict["sigma"] = self.parameters.atomic_density_sigma
lammps_dict["rcutfac"] = self.parameters.atomic_density_cutoff
lammps_dict["atom_config_fname"] = ase_out_path
lmp = self._setup_lammps(
nx,
ny,
nz,
lammps_dict = {
"sigma": self.parameters.atomic_density_sigma,
"rcutfac": self.parameters.atomic_density_cutoff,
}
self.lammps_temporary_log = os.path.join(
outdir,
lammps_dict,
log_file_name="lammps_mgrid_log.tmp",
"lammps_bgrid_log_" + self.calculation_timestamp + ".tmp",
)
lmp = self._setup_lammps(nx, ny, nz, lammps_dict)

# For now the file is chosen automatically, because this is used
# mostly under the hood anyway.
Expand All @@ -191,8 +194,12 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs):
# else:
# runfile = os.path.join(filepath, "in.ggrid_defaultproc.python")
else:
runfile = os.path.join(filepath, "in.ggrid_defaultproc.python")
lmp.file(runfile)
self.parameters.lammps_compute_file = os.path.join(
filepath, "in.ggrid_defaultproc.python"
)

# Do the LAMMPS calculation and clean up.
lmp.file(self.parameters.lammps_compute_file)

# Extract the data.
nrows_ggrid = extract_compute_np(
Expand All @@ -216,7 +223,7 @@ def _calculate(self, atoms, outdir, grid_dimensions, **kwargs):
array_shape=(nrows_ggrid, ncols_ggrid),
)

lmp.close()
self._clean_calculation(lmp, keep_logs)

gaussian_descriptors_np = gaussian_descriptors_np.reshape(
(
Expand Down
2 changes: 1 addition & 1 deletion mala/targets/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ def get_target(self):
@abstractmethod
def invalidate_target(self):
"""
Invalidates the saved target wuantity.
Invalidates the saved target quantity.

This is the generic interface for cached target quantities.
It should work for all implemented targets.
Expand Down
Loading