diff --git a/eessi/testsuite/tests/apps/espresso/__init__.py b/eessi/testsuite/tests/apps/espresso/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py new file mode 100644 index 00000000..a1675afd --- /dev/null +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -0,0 +1,125 @@ +""" +This module tests Espresso in available modules containing substring 'ESPResSo' which is different from Quantum +Espresso. Tests included: +- P3M benchmark - Ionic crystals + - Weak scaling + - Strong scaling Weak and strong scaling are options that are needed to be provided to the script and the system is + either scaled based on number of cores or kept constant. +""" + +import reframe as rfm +import reframe.utility.sanity as sn + +from reframe.core.builtins import parameter, run_after # added only to make the linter happy +from reframe.utility import reframe + +from eessi.testsuite import hooks, utils +from eessi.testsuite.constants import * +from eessi.testsuite.utils import find_modules, log + + +def filter_scales_P3M(): + """ + Filtering function for filtering scales for P3M test. + This is currently required because the 16 node test takes way too long and always fails due to time limit. + Once a solution to mesh tuning algorithm is found, where we can specify the mesh sizes for a particular scale, + this function can be removed. + """ + return [ + k for (k, v) in SCALES.items() + if v['num_nodes'] != 16 + ] + + +@rfm.simple_test +class EESSI_ESPRESSO_P3M_IONIC_CRYSTALS(rfm.RunOnlyRegressionTest): + + scale = parameter(filter_scales_P3M()) + valid_prog_environs = ['default'] + valid_systems = ['*'] + time_limit = '300m' + # Need to check if QuantumESPRESSO also gets listed. + module_name = parameter(find_modules('ESPResSo')) + # device type is parameterized for an impending CUDA ESPResSo module. + device_type = parameter([DEVICE_TYPES[CPU]]) + + executable = 'python3 madelung.py' + + default_strong_scaling_system_size = 9 + default_weak_scaling_system_size = 6 + + benchmark_info = parameter([ + ('mpi.ionic_crystals.p3m', 'p3m'), + ], fmt=lambda x: x[0], loggable=True) + + @run_after('init') + def run_after_init(self): + """hooks to run after init phase""" + # Filter on which scales are supported by the partitions defined in the ReFrame configuration + hooks.filter_supported_scales(self) + + hooks.filter_valid_systems_by_device_type(self, required_device_type=self.device_type) + + hooks.set_modules(self) + + # Set scales as tags + hooks.set_tag_scale(self) + + @run_after('init') + def set_tag_ci(self): + """ Setting tests under CI tag. """ + if (self.benchmark_info[0] in ['mpi.ionic_crystals.p3m'] and SCALES[self.scale]['num_nodes'] < 2): + self.tags.add('CI') + log(f'tags set to {self.tags}') + + if (self.benchmark_info[0] == 'mpi.ionic_crystals.p3m'): + self.tags.add('ionic_crystals_p3m') + + @run_after('init') + def set_executable_opts(self): + """Set executable opts based on device_type parameter""" + num_default = 0 # If this test already has executable opts, they must have come from the command line + hooks.check_custom_executable_opts(self, num_default=num_default) + if not self.has_custom_executable_opts: + # By default we run weak scaling since the strong scaling sizes need to change based on max node size and a + # corresponding min node size has to be chozen. + self.executable_opts += ['--size', str(self.default_weak_scaling_system_size), '--weak-scaling'] + utils.log(f'executable_opts set to {self.executable_opts}') + + @run_after('setup') + def set_num_tasks_per_node(self): + """ Setting number of tasks per node and cpus per task in this function. This function sets num_cpus_per_task + for 1 node and 2 node options where the request is for full nodes.""" + hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT[CPU]) + + @run_after('setup') + def set_mem(self): + """ Setting an extra job option of memory. Here the assumption made is that HPC systems will contain at + least 1 GB per core of memory.""" + mem_required_per_node = self.num_tasks_per_node * 0.9 + hooks.req_memory_per_node(test=self, app_mem_req=mem_required_per_node) + + @deferrable + def assert_completion(self): + '''Check completion''' + cao = sn.extractsingle(r'^resulting parameters:.*cao: (?P\S+),', self.stdout, 'cao', int) + return (sn.assert_found(r'^Algorithm executed.', self.stdout) and cao) + + @deferrable + def assert_convergence(self): + '''Check convergence''' + check_string = sn.assert_found(r'Final convergence met with tolerances:', self.stdout) + energy = sn.extractsingle(r'^\s+energy:\s+(?P\S+)', self.stdout, 'energy', float) + return (check_string and (energy != 0.0)) + + @sanity_function + def assert_sanity(self): + '''Check all sanity criteria''' + return sn.all([ + self.assert_completion(), + self.assert_convergence(), + ]) + + @performance_function('s/step') + def perf(self): + return sn.extractsingle(r'^Performance:\s+(?P\S+)', self.stdout, 'perf', float) diff --git a/eessi/testsuite/tests/apps/espresso/src/__init__.py b/eessi/testsuite/tests/apps/espresso/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/eessi/testsuite/tests/apps/espresso/src/madelung.py b/eessi/testsuite/tests/apps/espresso/src/madelung.py new file mode 100644 index 00000000..3f73b5d5 --- /dev/null +++ b/eessi/testsuite/tests/apps/espresso/src/madelung.py @@ -0,0 +1,148 @@ +# +# Copyright (C) 2013-2024 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import espressomd +import espressomd.version +import espressomd.electrostatics +import argparse +import time +import numpy as np + +parser = argparse.ArgumentParser(description="Benchmark P3M simulations.") +parser.add_argument("--size", metavar="S", action="store", + default=9, required=False, type=int, + help="Problem size, such that the number of particles N is " + "equal to (2*S)^2; with --weak-scaling this number N " + "is multiplied by the number of cores!") +parser.add_argument("--gpu", action=argparse.BooleanOptionalAction, + default=False, required=False, help="Use GPU implementation") +parser.add_argument("--topology", metavar=("X", "Y", "Z"), nargs=3, action="store", + default=None, required=False, type=int, help="Cartesian topology") +group = parser.add_mutually_exclusive_group() +group.add_argument("--weak-scaling", action="store_true", + help="Weak scaling benchmark (Gustafson's law: constant work per core)") +group.add_argument("--strong-scaling", action="store_true", + help="Strong scaling benchmark (Amdahl's law: constant total work)") +args = parser.parse_args() + + +def get_reference_values_per_ion(base_vector): + madelung_constant = -1.74756459463318219 + base_tensor = base_vector * np.eye(3) + ref_energy = madelung_constant + ref_pressure = madelung_constant * base_tensor / np.trace(base_tensor) + return ref_energy, ref_pressure + + +def get_normalized_values_per_ion(system): + energy = system.analysis.energy()["coulomb"] + p_scalar = system.analysis.pressure()["coulomb"] + p_tensor = system.analysis.pressure_tensor()["coulomb"] + N = len(system.part) + V = system.volume() + return 2. * energy / N, 2. * p_scalar * V / N, 2. * p_tensor * V / N + + +# initialize system +system = espressomd.System(box_l=[100., 100., 100.]) +system.time_step = 0.01 +system.cell_system.skin = 0.4 + +# set MPI Cartesian topology +node_grid = system.cell_system.node_grid.copy() +n_cores = int(np.prod(node_grid)) +if args.topology: + system.cell_system.node_grid = node_grid = args.topology + +# place ions on a cubic lattice +base_vector = np.array([1., 1., 1.]) +lattice_size = 3 * [2 * args.size] +if args.weak_scaling: + lattice_size = np.multiply(lattice_size, node_grid) +system.box_l = np.multiply(lattice_size, base_vector) +for var_j in range(lattice_size[0]): + for var_k in range(lattice_size[1]): + for var_l in range(lattice_size[2]): + _ = system.part.add(pos=np.multiply([var_j, var_k, var_l], base_vector), + q=(-1.)**(var_j + var_k + var_l), fix=3 * [True]) + +# setup P3M algorithm +algorithm = espressomd.electrostatics.P3M +if args.gpu: + algorithm = espressomd.electrostatics.P3MGPU +solver = algorithm(prefactor=1., accuracy=1e-6) +if (espressomd.version.major(), espressomd.version.minor()) == (4, 2): + system.actors.add(solver) +else: + system.electrostatics.solver = solver + + +print("Algorithm executed. \n") + +# Old rtol_pressure = 2e-5 +# This resulted in failures especially at high number of nodes therefore increased +# to a larger value. + +atol_energy = atol_pressure = 1e-12 +atol_forces = 1e-5 +atol_abs_forces = 2e-6 + +rtol_energy = 5e-6 +rtol_pressure = 1e-4 +rtol_forces = 0. +rtol_abs_forces = 0. +# run checks +print("Executing sanity checks...\n") +forces = np.copy(system.part.all().f) +energy, p_scalar, p_tensor = get_normalized_values_per_ion(system) +ref_energy, ref_pressure = get_reference_values_per_ion(base_vector) +np.testing.assert_allclose(energy, ref_energy, atol=atol_energy, rtol=rtol_energy) +np.testing.assert_allclose(p_scalar, np.trace(ref_pressure) / 3., + atol=atol_pressure, rtol=rtol_pressure) +np.testing.assert_allclose(p_tensor, ref_pressure, atol=atol_pressure, rtol=rtol_pressure) +np.testing.assert_allclose(forces, 0., atol=atol_forces, rtol=rtol_forces) +np.testing.assert_allclose(np.median(np.abs(forces)), 0., atol=atol_abs_forces, rtol=rtol_abs_forces) + +print("Final convergence met with tolerances: \n\ + energy: ", atol_energy, "\n\ + p_scalar: ", atol_pressure, "\n\ + p_tensor: ", atol_pressure, "\n\ + forces: ", atol_forces, "\n\ + abs_forces: ", atol_abs_forces, "\n") + +print("Sampling runtime...\n") +# sample runtime +n_steps = 10 +timings = [] +for _ in range(10): + tick = time.time() + system.integrator.run(n_steps) + tock = time.time() + timings.append((tock - tick) / n_steps) + +print("10 steps executed...\n") +# write results to file +header = '"mode","cores","mpi.x","mpi.y","mpi.z","particles","mean","std"\n' +report = f'''"{"weak scaling" if args.weak_scaling else "strong scaling"}",\ +{n_cores},{node_grid[0]},{node_grid[1]},{node_grid[2]},{len(system.part)},\ +{np.mean(timings):.3e},{np.std(timings,ddof=1):.3e}\n''' +print(header) +print(report) + +print(f"Performance: {np.mean(timings):.3e} \n")