diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index 20ea5e7e..a87b22d1 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -18,9 +18,9 @@ from eessi.testsuite.utils import find_modules, log -def filter_scales_P3M(): +def filter_scales(): """ - Filtering function for filtering scales for P3M test. + Filtering function for filtering scales for P3M test and the LJ test. This is currently required because the 16 node test takes way too long and always fails due to time limit. Once a solution to mesh tuning algorithm is found, where we can specify the mesh sizes for a particular scale, this function can be removed. @@ -31,27 +31,14 @@ def filter_scales_P3M(): ] -@rfm.simple_test -class EESSI_ESPRESSO_P3M_IONIC_CRYSTALS(rfm.RunOnlyRegressionTest): - - scale = parameter(filter_scales_P3M()) +class EESSI_ESPRESSO(rfm.RunOnlyRegressionTest): valid_prog_environs = ['default'] valid_systems = ['*'] - time_limit = '300m' # Need to check if QuantumESPRESSO also gets listed. module_name = parameter(find_modules('ESPResSo')) # device type is parameterized for an impending CUDA ESPResSo module. device_type = parameter([DEVICE_TYPES[CPU]]) - executable = 'python3 madelung.py' - - default_strong_scaling_system_size = 9 - default_weak_scaling_system_size = 6 - - benchmark_info = parameter([ - ('mpi.ionic_crystals.p3m', 'p3m'), - ], fmt=lambda x: x[0], loggable=True) - @run_after('init') def run_after_init(self): """hooks to run after init phase""" @@ -65,27 +52,6 @@ def run_after_init(self): # Set scales as tags hooks.set_tag_scale(self) - @run_after('init') - def set_tag_ci(self): - """ Setting tests under CI tag. """ - if (self.benchmark_info[0] in ['mpi.ionic_crystals.p3m'] and SCALES[self.scale]['num_nodes'] < 2): - self.tags.add('CI') - log(f'tags set to {self.tags}') - - if (self.benchmark_info[0] == 'mpi.ionic_crystals.p3m'): - self.tags.add('ionic_crystals_p3m') - - @run_after('init') - def set_executable_opts(self): - """Set executable opts based on device_type parameter""" - num_default = 0 # If this test already has executable opts, they must have come from the command line - hooks.check_custom_executable_opts(self, num_default=num_default) - if not self.has_custom_executable_opts: - # By default we run weak scaling since the strong scaling sizes need to change based on max node size and a - # corresponding min node size has to be chozen. - self.executable_opts += ['--size', str(self.default_weak_scaling_system_size), '--weak-scaling'] - utils.log(f'executable_opts set to {self.executable_opts}') - @run_after('setup') def set_num_tasks_per_node(self): """ Setting number of tasks per node and cpus per task in this function. This function sets num_cpus_per_task @@ -102,14 +68,23 @@ def set_mem(self): @deferrable def assert_completion(self): '''Check completion''' - cao = sn.extractsingle(r'^resulting parameters:.*cao: (?P\S+),', self.stdout, 'cao', int) - return (sn.assert_found(r'^Algorithm executed.', self.stdout) and cao) + if self.benchmark_info[0] in ['mpi.ionic_crystals.p3m']: + cao = sn.extractsingle(r'^resulting parameters:.*cao: (?P\S+),', self.stdout, 'cao', int) + return (sn.assert_found(r'^Algorithm executed.', self.stdout) and cao) + elif self.benchmark_info[0] in ['mpi.particles.lj']: + return (sn.assert_found(r'^Algorithm executed.', self.stdout)) @deferrable def assert_convergence(self): '''Check convergence''' - check_string = sn.assert_found(r'Final convergence met with tolerances:', self.stdout) - energy = sn.extractsingle(r'^\s+energy:\s+(?P\S+)', self.stdout, 'energy', float) + check_string = False + energy = 0.0 + if self.benchmark_info[0] in ['mpi.ionic_crystals.p3m']: + check_string = sn.assert_found(r'Final convergence met with tolerances:', self.stdout) + energy = sn.extractsingle(r'^\s+energy:\s+(?P\S+)', self.stdout, 'energy', float) + elif self.benchmark_info[0] in ['mpi.particles.lj']: + check_string = sn.assert_found(r'Final convergence met with relative tolerances:', self.stdout) + energy = sn.extractsingle(r'^\s+sim_energy:\s+(?P\S+)', self.stdout, 'energy', float) return (check_string and (energy != 0.0)) @sanity_function @@ -123,3 +98,99 @@ def assert_sanity(self): @performance_function('s/step') def perf(self): return sn.extractsingle(r'^Performance:\s+(?P\S+)', self.stdout, 'perf', float) + + +@rfm.simple_test +class EESSI_ESPRESSO_P3M_IONIC_CRYSTALS(EESSI_ESPRESSO): + scale = parameter(filter_scales()) + time_limit = '300m' + + executable = 'python3 madelung.py' + + default_weak_scaling_system_size = 6 + + @run_after('init') + def set_tag_ci(self): + """ Setting tests under CI tag. """ + if SCALES[self.scale]['num_nodes'] < 2: + self.tags.add('CI') + log(f'tags set to {self.tags}') + + self.tags.add('ionic_crystals_p3m') + + @run_after('init') + def set_executable_opts(self): + """Set executable opts based on device_type parameter""" + num_default = 0 # If this test already has executable opts, they must have come from the command line + hooks.check_custom_executable_opts(self, num_default=num_default) + # By default we run weak scaling since the strong scaling sizes need to change based on max node size and a + # corresponding min node size has to be chozen. + self.executable_opts += ['--size', str(self.default_weak_scaling_system_size), '--weak-scaling'] + utils.log(f'executable_opts set to {self.executable_opts}') + + @run_after('setup') + def set_mem(self): + """ Setting an extra job option of memory. Here the assumption made is that HPC systems will contain at + least 1 GB per core of memory.""" + mem_required_per_node = self.num_tasks_per_node * 0.9 + hooks.req_memory_per_node(test=self, app_mem_req=mem_required_per_node) + + @deferrable + def assert_completion(self): + '''Check completion''' + cao = sn.extractsingle(r'^resulting parameters:.*cao: (?P\S+),', self.stdout, 'cao', int) + return (sn.assert_found(r'^Algorithm executed.', self.stdout) and cao) + + @deferrable + def assert_convergence(self): + '''Check convergence''' + check_string = False + energy = 0.0 + check_string = sn.assert_found(r'Final convergence met with tolerances:', self.stdout) + energy = sn.extractsingle(r'^\s+energy:\s+(?P\S+)', self.stdout, 'energy', float) + return (check_string and (energy != 0.0)) + + +@rfm.simple_test +class EESSI_ESPRESSO_LJ_PARTICLES(EESSI_ESPRESSO): + scale = parameter(filter_scales()) + time_limit = '300m' + + executable = 'python3 lj.py' + + @run_after('init') + def set_tag_ci(self): + """ Setting tests under CI tag. """ + if SCALES[self.scale]['num_nodes'] < 2: + self.tags.add('CI') + log(f'tags set to {self.tags}') + + self.tags.add('particles_lj') + + @run_after('init') + def set_executable_opts(self): + """Allow executable opts to be overwritten from command line""" + num_default = 0 # If this test already has executable opts, they must have come from the command line + hooks.check_custom_executable_opts(self, num_default=num_default) + + @run_after('setup') + def set_mem(self): + """ Setting an extra job option of memory. Here the assumption made is that HPC systems will contain at + least 1 GB per core of memory. LJ requires much lesser memory than P3M. 200 MB per core is as per measurement, + therefore 300 should be more than enough. """ + mem_required_per_node = self.num_tasks_per_node * 0.3 + hooks.req_memory_per_node(test=self, app_mem_req=mem_required_per_node) + + @deferrable + def assert_completion(self): + '''Check completion''' + return (sn.assert_found(r'^Algorithm executed.', self.stdout)) + + @deferrable + def assert_convergence(self): + '''Check convergence''' + check_string = False + energy = 0.0 + check_string = sn.assert_found(r'Final convergence met with relative tolerances:', self.stdout) + energy = sn.extractsingle(r'^\s+sim_energy:\s+(?P\S+)', self.stdout, 'energy', float) + return (check_string and (energy != 0.0)) diff --git a/eessi/testsuite/tests/apps/espresso/src/lj.py b/eessi/testsuite/tests/apps/espresso/src/lj.py new file mode 100644 index 00000000..ad924b46 --- /dev/null +++ b/eessi/testsuite/tests/apps/espresso/src/lj.py @@ -0,0 +1,161 @@ +# +# Copyright (C) 2018-2024 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import argparse +import time +import espressomd +import numpy as np + +required_features = ["LENNARD_JONES"] +espressomd.assert_features(required_features) + +parser = argparse.ArgumentParser(description="Benchmark LJ simulations.") +parser.add_argument("--particles-per-core", metavar="N", action="store", + type=int, default=2000, required=False, + help="Number of particles in the simulation box") +parser.add_argument("--sample-size", metavar="S", action="store", + type=int, default=30, required=False, + help="Sample size") +parser.add_argument("--volume-fraction", metavar="FRAC", action="store", + type=float, default=0.50, required=False, + help="Fraction of the simulation box volume occupied by " + "particles (range: [0.01-0.74], default: 0.50)") +args = parser.parse_args() + +# process and check arguments +measurement_steps = 100 +if args.particles_per_core < 16000: + measurement_steps = 200 +if args.particles_per_core < 10000: + measurement_steps = 500 +if args.particles_per_core < 5000: + measurement_steps = 1000 +if args.particles_per_core < 1000: + measurement_steps = 2000 +if args.particles_per_core < 600: + measurement_steps = 4000 +if args.particles_per_core < 260: + measurement_steps = 6000 +assert args.volume_fraction > 0., "volume_fraction must be a positive number" +assert args.volume_fraction < np.pi / (3. * np.sqrt(2.)), \ + "volume_fraction exceeds the physical limit of sphere packing (~0.74)" + +# make simulation deterministic +np.random.seed(42) + + +def get_reference_values_per_atom(x): + # result of a polynomial fit in the range from 0.01 to 0.55 + energy = 54.2 * x**3 - 23.8 * x**2 + 4.6 * x - 0.09 + pressure = 377. * x**3 - 149. * x**2 + 32.2 * x - 0.58 + return energy, pressure + + +def get_normalized_values_per_atom(system): + energy = system.analysis.energy()["non_bonded"] + pressure = system.analysis.pressure()["non_bonded"] + N = len(system.part) + V = system.volume() + return 2. * energy / N, 2. * pressure * V / N + + +system = espressomd.System(box_l=[10., 10., 10.]) +system.time_step = 0.01 +system.cell_system.skin = 0.5 + +lj_eps = 1.0 # LJ epsilon +lj_sig = 1.0 # particle diameter +lj_cut = lj_sig * 2**(1. / 6.) # cutoff distance + +n_proc = system.cell_system.get_state()["n_nodes"] +n_part = n_proc * args.particles_per_core +node_grid = np.array(system.cell_system.node_grid) +# volume of N spheres with radius r: N * (4/3*pi*r^3) +box_v = args.particles_per_core * 4. / 3. * \ + np.pi * (lj_sig / 2.)**3 / args.volume_fraction +# box_v = (x * n) * x * x for a column +system.box_l = float((box_v)**(1. / 3.)) * node_grid +assert np.abs(n_part * 4. / 3. * np.pi * (lj_sig / 2.)**3 / np.prod(system.box_l) - args.volume_fraction) < 0.1 + +system.non_bonded_inter[0, 0].lennard_jones.set_params( + epsilon=lj_eps, sigma=lj_sig, cutoff=lj_cut, shift="auto") + +system.part.add(pos=np.random.random((n_part, 3)) * system.box_l) + +# energy minimization +max_steps = 1000 +# particle forces for volume fractions between 0.1 and 0.5 follow a polynomial +target_f_max = 20. * args.volume_fraction**2 +system.integrator.set_steepest_descent( + f_max=target_f_max, gamma=0.001, max_displacement=0.01 * lj_sig) +n_steps = system.integrator.run(max_steps) +assert n_steps < max_steps, f'''energy minimization failed: \ +E = {system.analysis.energy()["total"] / len(system.part):.3g} per particle, \ +f_max = {np.max(np.linalg.norm(system.part.all().f, axis=1)):.2g}, \ +target f_max = {target_f_max:.2g}''' + +# warmup +system.integrator.set_vv() +system.thermostat.set_langevin(kT=1.0, gamma=1.0, seed=42) + +# tuning and equilibration +min_skin = 0.2 +max_skin = 1.0 +print("Tune skin: {:.3f}".format(system.cell_system.tune_skin( + min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=100))) +print("Equilibration") +system.integrator.run(min(5 * measurement_steps, 60000)) +print("Tune skin: {:.3f}".format(system.cell_system.tune_skin( + min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=100))) +print("Equilibration") +system.integrator.run(min(10 * measurement_steps, 60000)) + +print("Sampling runtime...") +timings = [] +energies = [] +pressures = [] +for i in range(args.sample_size): + tick = time.time() + system.integrator.run(measurement_steps) + tock = time.time() + t = (tock - tick) / measurement_steps + timings.append(t) + energy, pressure = get_normalized_values_per_atom(system) + energies.append(energy) + pressures.append(pressure) + +sim_energy = np.mean(energies) +sim_pressure = np.mean(pressures) +ref_energy, ref_pressure = get_reference_values_per_atom(args.volume_fraction) + +print("Algorithm executed. \n") +np.testing.assert_allclose(sim_energy, ref_energy, atol=0., rtol=0.1) +np.testing.assert_allclose(sim_pressure, ref_pressure, atol=0., rtol=0.1) + +print("Final convergence met with relative tolerances: \n\ + sim_energy: ", 0.1, "\n\ + sim_pressure: ", 0.1, "\n") + +header = '"mode","cores","mpi.x","mpi.y","mpi.z","particles","volume_fraction","mean","std"' +report = f'''"weak scaling",{n_proc},{node_grid[0]},{node_grid[1]},\ +{node_grid[2]},{len(system.part)},{args.volume_fraction:.4f},\ +{np.mean(timings):.3e},{np.std(timings,ddof=1):.3e}''' +print(header) +print(report) +print(f"Performance: {np.mean(timings):.3e}")