From e882e6ddeed5041059287402f3fa67107723c28e Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Tue, 14 May 2024 09:37:24 +0200 Subject: [PATCH 01/17] First commit ESPResSo test. Still WIP and needs more polishing. --- .../tests/apps/espresso/benchmarks.csv | 27 ++++ .../testsuite/tests/apps/espresso/espresso.py | 97 +++++++++++++ eessi/testsuite/tests/apps/espresso/job.sh | 10 ++ .../testsuite/tests/apps/espresso/madelung.py | 132 ++++++++++++++++++ eessi/testsuite/tests/apps/espresso/plot.py | 39 ++++++ .../apps/espresso/scripts_Espresso.tar.gz | Bin 0 -> 3089 bytes 6 files changed, 305 insertions(+) create mode 100644 eessi/testsuite/tests/apps/espresso/benchmarks.csv create mode 100644 eessi/testsuite/tests/apps/espresso/espresso.py create mode 100644 eessi/testsuite/tests/apps/espresso/job.sh create mode 100644 eessi/testsuite/tests/apps/espresso/madelung.py create mode 100644 eessi/testsuite/tests/apps/espresso/plot.py create mode 100644 eessi/testsuite/tests/apps/espresso/scripts_Espresso.tar.gz diff --git a/eessi/testsuite/tests/apps/espresso/benchmarks.csv b/eessi/testsuite/tests/apps/espresso/benchmarks.csv new file mode 100644 index 00000000..95724751 --- /dev/null +++ b/eessi/testsuite/tests/apps/espresso/benchmarks.csv @@ -0,0 +1,27 @@ +"mode","cores","mpi.x","mpi.y","mpi.z","particles","mean","std" +"weak scaling",4,2,2,1,6912,2.341e-01,8.081e-03 +"strong scaling",4,2,2,1,5832,2.496e-01,9.019e-03 +"weak scaling",16,4,2,2,27648,2.417e+00,9.576e-02 +"strong scaling",16,4,2,2,5832,3.853e-02,1.991e-03 +"weak scaling",32,4,4,2,55296,4.263e+00,1.161e+00 +"strong scaling",32,4,4,2,5832,2.194e-02,7.303e-04 +"weak scaling",1,1,1,1,1728,7.655e-02,3.434e-03 +"weak scaling",2,2,1,1,3456,1.456e-01,4.679e-03 +"strong scaling",2,2,1,1,5832,3.936e-01,1.098e-02 +"strong scaling",1,1,1,1,5832,6.333e-01,1.194e-01 +"strong scaling",64,4,4,4,5832,1.910e-02,6.132e-04 +"weak scaling",1,1,1,1,1728,9.482e-02,2.956e-03 +"weak scaling",2,2,1,1,3456,2.111e-01,6.614e-03 +"strong scaling",1,1,1,1,5832,9.133e-01,2.868e-02 +"strong scaling",16,4,2,2,5832,4.285e-02,1.327e-03 +"strong scaling",64,4,4,4,5832,1.715e-02,5.776e-04 +"strong scaling",128,8,4,4,5832,1.980e-02,7.013e-04 +"weak scaling",64,4,4,4,110592,4.375e-01,1.414e-02 +"weak scaling",100,5,5,4,172800,4.450e-01,1.437e-02 +"weak scaling",128,8,4,4,221184,8.720e+00,2.753e-01 +"weak scaling",128,8,4,4,221184,8.760e+00,3.110e-01 +"weak scaling",4,2,2,1,6912,2.626e-01,8.142e-03 +"weak scaling",4,2,2,1,6912,2.780e-01,8.683e-03 +"weak scaling",4,2,2,1,6912,2.627e-01,8.391e-03 +"weak scaling",4,2,2,1,6912,2.617e-01,8.155e-03 +"weak scaling",2,2,1,1,3456,2.028e-01,6.255e-03 diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py new file mode 100644 index 00000000..494abf67 --- /dev/null +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -0,0 +1,97 @@ +""" +This module tests Espresso in available modules containing substring 'ESPResSo' which is different from Quantum Espresso. +Tests included: +- P3M benchmark - Ionic crystals + - Weak scaling + - Strong scaling +Weak and strong scaling are options that are needed to be provided tothe script and the system is either scaled based on +number of cores or kept constant. +""" + +import reframe as rfm +from reframe.core.builtins import parameter, run_after # added only to make the linter happy +from reframe.utility import reframe + +from hpctestlib.microbenchmarks.mpi.osu import osu_benchmark + +from eessi.testsuite import hooks, utils +from eessi.testsuite.constants import * +from eessi.testsuite.utils import find_modules, log + +@rfm.simple_test +class EESSI_ESPRESSO_P3M_IONIC_CRYSTALS(rfm.RunOnlyRegressionTest): + '''''' + scale = parameter(SCALES.keys()) + valid_prog_environs = ['default'] + valid_systems = ['*'] + time_limit = '30m' + # Need to check if QuantumESPRESSO also gets listed. + module_name = parameter(find_modules('ESPResSo')) + # device type is parameterized for an impending CUDA ESPResSo module. + device_type = parameter([DEVICE_TYPES[CPU]]) + + executable = 'python3 madelung.py' + + default_strong_scaling_system_size = 9 + default_weak_scaling_system_size = 6 + + benchmark_info = parameter([ + ('mpi.ionic_crystals.p3m'), + ], fmt=lambda x: x[0], loggable=True) + + + @run_after('init') + def run_after_init(self): + """hooks to run after init phase""" + + # Filter on which scales are supported by the partitions defined in the ReFrame configuration + hooks.filter_supported_scales(self) + + hooks.filter_valid_systems_by_device_type(self, required_device_type=self.device_type) + + hooks.set_modules(self) + + # Set scales as tags + hooks.set_tag_scale(self) + + @run_after('init') + def set_tag_ci(self): + """ Setting tests under CI tag. """ + if (self.benchmark_info[0] in ['mpi.ionic_crystals.p3m']): + self.tags.add('CI') + log(f'tags set to {self.tags}') + + if (self.benchmark_info[0] == 'mpi.ionic_crystals.p3m'): + self.tags.add('ionic_crystals_p3m') + + + @run_after('init') + def set_mem(self): + """ Setting an extra job option of memory. """ + self.extra_resources = {'memory': {'size': '50GB'}} + + @run_after('init') + def set_executable_opts(self): + """Set executable opts based on device_type parameter""" + num_default = 0 # If this test already has executable opts, they must have come from the command line + hooks.check_custom_executable_opts(self, num_default=num_default) + if not self.has_custom_executable_opts: + # By default we run weak scaling since the strong scaling sizes need to change based on max node size and a + # corresponding min node size has to be chozen. + self.executable_opts += ['--size', self.default_weak_scaling_system_size, '--weak-scaling'] + utils.log(f'executable_opts set to {self.executable_opts}') + + @run_after('setup') + def set_num_tasks_per_node(self): + """ Setting number of tasks per node and cpus per task in this function. This function sets num_cpus_per_task + for 1 node and 2 node options where the request is for full nodes.""" + hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT[CPU]) + + @sanity_function + def assert_sanity(self): + '''Check all sanity criteria''' + return sn.all([ + self.assert_completion(), + self.assert_convergence(), + ]) + diff --git a/eessi/testsuite/tests/apps/espresso/job.sh b/eessi/testsuite/tests/apps/espresso/job.sh new file mode 100644 index 00000000..17399c52 --- /dev/null +++ b/eessi/testsuite/tests/apps/espresso/job.sh @@ -0,0 +1,10 @@ +#!/bin/bash +#SBATCH --time=00:40:00 +#SBATCH --output %j.stdout +#SBATCH --error %j.stderr +module load spack/default gcc/12.3.0 cuda/12.3.0 openmpi/4.1.6 \ + fftw/3.3.10 boost/1.83.0 python/3.12.1 +source ../espresso-4.3/venv/bin/activate +srun --cpu-bind=cores python3 madelung.py --size 6 --weak-scaling +srun --cpu-bind=cores python3 madelung.py --size 9 --strong-scaling +deactivate diff --git a/eessi/testsuite/tests/apps/espresso/madelung.py b/eessi/testsuite/tests/apps/espresso/madelung.py new file mode 100644 index 00000000..4bfb1df1 --- /dev/null +++ b/eessi/testsuite/tests/apps/espresso/madelung.py @@ -0,0 +1,132 @@ +# +# Copyright (C) 2013-2024 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import espressomd +import espressomd.version +import espressomd.electrostatics +import argparse +import pathlib +import time +import numpy as np + +parser = argparse.ArgumentParser(description="Benchmark P3M simulations.") +parser.add_argument("--size", metavar="S", action="store", + default=9, required=False, type=int, + help="Problem size, such that the number of particles N is " + "equal to (2*S)^2; with --weak-scaling this number N " + "is multiplied by the number of cores!") +parser.add_argument("--gpu", action=argparse.BooleanOptionalAction, + default=False, required=False, help="Use GPU implementation") +parser.add_argument("--topology", metavar=("X", "Y", "Z"), nargs=3, action="store", + default=None, required=False, type=int, help="Cartesian topology") +parser.add_argument("--output", metavar="FILEPATH", action="store", + type=str, required=False, default="benchmarks.csv", + help="Output file (default: benchmarks.csv)") +group = parser.add_mutually_exclusive_group() +group.add_argument("--weak-scaling", action="store_true", + help="Weak scaling benchmark (Gustafson's law: constant work per core)") +group.add_argument("--strong-scaling", action="store_true", + help="Strong scaling benchmark (Amdahl's law: constant total work)") +args = parser.parse_args() + +def get_reference_values_per_ion(base_vector): + madelung_constant = -1.74756459463318219 + base_tensor = base_vector * np.eye(3) + ref_energy = madelung_constant + ref_pressure = madelung_constant * base_tensor / np.trace(base_tensor) + return ref_energy, ref_pressure + +def get_normalized_values_per_ion(system): + energy = system.analysis.energy()["coulomb"] + p_scalar = system.analysis.pressure()["coulomb"] + p_tensor = system.analysis.pressure_tensor()["coulomb"] + N = len(system.part) + V = system.volume() + return 2. * energy / N, 2. * p_scalar * V / N, 2. * p_tensor * V / N + +# initialize system +system = espressomd.System(box_l=[100., 100., 100.]) +system.time_step = 0.01 +system.cell_system.skin = 0.4 + +# set MPI Cartesian topology +node_grid = system.cell_system.node_grid.copy() +n_cores = int(np.prod(node_grid)) +if args.topology: + system.cell_system.node_grid = node_grid = args.topology + +# place ions on a cubic lattice +base_vector = np.array([1., 1., 1.]) +lattice_size = 3 * [2 * args.size] +if args.weak_scaling: + lattice_size = np.multiply(lattice_size, node_grid) +system.box_l = np.multiply(lattice_size, base_vector) +for j in range(lattice_size[0]): + for k in range(lattice_size[1]): + for l in range(lattice_size[2]): + _ = system.part.add(pos=np.multiply([j, k, l], base_vector), + q=(-1.)**(j + k + l), fix=3 * [True]) + +# setup P3M algorithm +algorithm = espressomd.electrostatics.P3M +if args.gpu: + algorithm = espressomd.electrostatics.P3MGPU +solver = algorithm(prefactor=1., accuracy=1e-6) +if (espressomd.version.major(), espressomd.version.minor()) == (4, 2): + system.actors.add(solver) +else: + system.electrostatics.solver = solver + +# run checks +forces = np.copy(system.part.all().f) +energy, p_scalar, p_tensor = get_normalized_values_per_ion(system) +ref_energy, ref_pressure = get_reference_values_per_ion(base_vector) +np.testing.assert_allclose(energy, ref_energy, atol=1e-12, rtol=5e-6) +np.testing.assert_allclose(p_scalar, np.trace(ref_pressure) / 3., + atol=1e-12, rtol=2e-5) +np.testing.assert_allclose(p_tensor, ref_pressure, atol=1e-12, rtol=2e-5) +np.testing.assert_allclose(forces, 0., atol=1e-5, rtol=0.) +np.testing.assert_allclose(np.median(np.abs(forces)), 0., atol=2e-6, rtol=0.) + + +print("Executing sanity ...\n") +print (np.all([np.allclose(energy, ref_energy, atol=1e-12, rtol=5e-6), + np.allclose(p_scalar, np.trace(ref_pressure) / 3., + atol=1e-12, rtol=2e-5), + np.allclose(p_tensor, ref_pressure, atol=1e-12, rtol=2e-5), + np.allclose(forces, 0., atol=1e-5, rtol=0.), + np.allclose(np.median(np.abs(forces)), 0., atol=2e-6, rtol=0.)])) + +print("Sanity checking ...\n") +# sample runtime +n_steps = 10 +timings = [] +for _ in range(10): + tick = time.time() + system.integrator.run(n_steps) + tock = time.time() + timings.append((tock - tick) / n_steps) + +# write results to file +header = '"mode","cores","mpi.x","mpi.y","mpi.z","particles","mean","std"\n' +report = f'"{"weak scaling" if args.weak_scaling else "strong scaling"}",{n_cores},{node_grid[0]},{node_grid[1]},{node_grid[2]},{len(system.part)},{np.mean(timings):.3e},{np.std(timings, ddof=1):.3e}\n' +if pathlib.Path(args.output).is_file(): + header = "" +with open(args.output, "a") as f: + f.write(header + report) diff --git a/eessi/testsuite/tests/apps/espresso/plot.py b/eessi/testsuite/tests/apps/espresso/plot.py new file mode 100644 index 00000000..c9a023c4 --- /dev/null +++ b/eessi/testsuite/tests/apps/espresso/plot.py @@ -0,0 +1,39 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.ticker as mtick + +df = pd.read_csv("benchmarks.csv") +df = df.sort_values(by=["mode", "cores", "mpi.x", "mpi.y", "mpi.z"]) + +group = df.query(f"mode == 'strong scaling'") + +fig = plt.figure(figsize=(12, 6)) +ax = fig.subplots().axes +xdata = group["cores"].to_numpy() +ydata = group["mean"].to_numpy() +ax.axline((xdata[0], xdata[0]), slope=1, linestyle="--", color="grey", label="Theoretical maximum") +ax.plot(xdata, ydata[0] / ydata, "o-", label="Measurements") +ax.set_title("Strong scaling") +ax.set_xlabel("Number of cores") +ax.set_ylabel("Speed-up") +ax.set_xscale("log", base=2) +ax.set_yscale("log", base=10) +ax.legend() +plt.show() + +group = df.query(f"mode == 'weak scaling'") + +fig = plt.figure(figsize=(12, 6)) +ax = fig.subplots().axes +xdata = group["cores"].to_numpy() +ydata = group["mean"].to_numpy() +ax.axline((-np.inf, 1), slope=0, linestyle="--", color="grey", label="Theoretical maximum") +ax.plot(xdata, ydata[0] / ydata, "o-", label="Measurements") +ax.set_title("Weak scaling") +ax.set_xlabel("Number of cores") +ax.set_ylabel("Efficiency") +ax.set_xscale("log", base=2) +ax.yaxis.set_major_formatter(mtick.PercentFormatter(1)) +ax.legend() +plt.show() diff --git a/eessi/testsuite/tests/apps/espresso/scripts_Espresso.tar.gz b/eessi/testsuite/tests/apps/espresso/scripts_Espresso.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24e2621fec80e082c1830617209e16fa63c8df4e GIT binary patch literal 3089 zcmV+s4DRzEiwFP!000001MM1XZ`(MspYRr0(9qv~LhNs*Q44Y*X-3GCt9eGlNARK` z{T}LkZ+NObC9il|WYK&fiSf!LuI+ez*LHn!zF_3_*~uvrXPl%NzhZ&Zkl=g~2{Maf zM&O##OcFk8FfE1B&>?2V7!iCXZ)wK%Nx^dx(1c_xjD*ahX)b}0Bs2*JJR?gUMzaD~ zz)+rqEF%*1kStpY6oU_Mjz5w&EMXaq$w@wqqkz1N0+t9yC@6#@V!=W(MK(xqh#H(# z8jwQ{e5s6h(jzQ__ZhiinSf{F)gmf>J;F2KVNi)`XN0FnW`eE-iK$#sZq9l&xsXJO zV!Gfd%wz$~U~acj9Fr*{xnQ$A?g2I6k{^%G-+uUbP7aPgksl9EPY;gIKm8qW<$}Xo zcEd_e(K3xAU<~ugXd(+x8yLKQefsJxP#nBGdUtgG33MliN9V_{&(6rh_82K0~Eb*4N6#tWIGQ7p>&egant&@2wgg5EH2X~3cz z(2J4)g6hlu0v`nkTu%v-uz{ zd5FP3Q5y|x!XXryNHZn`Uxu3_R^_cNZ&(b`opHzpG73bEK(jf-H|SYCr&KQDXj(tY zXxY3<@+Hg|;x9=xO%Y{e+%T{XvU$E_i9AtnjF5>Si&Qj?^_MIO7E794lN0Yf5z#V_ zRd@XSQLGj?3JXn+gjcx`DOkvXCbj%4+5HmrJ zaVh9q87k002Z##EiQzs!Gym(B!A|=9TSl+@BA{^u;h`d<;(z=oKfnX?$*5c()8d|H z0a!--@UU6uX}&UKvb#AIvE3HrK^&+f5+&sXiRQ-cerz}=R#bBSTGcaK`^Ni;y ztk>48mbrw`jEf1o3*uZrUYjT^qr%>pdOKq8Pj(`+{DB#liv9@1qz?B+KVrPW24yCA z@=QR*x!s2dPGBT~0>a^a3So^g-OOP#Q9xEp=D%C7Gey2lvx8+w7x9K#l1o_7s2u8q zLAf%kx}bIfjHrP-$ehVZ#%3%7zHD+s21ON% z9E#u;B;eVWH$Vw8qX9Eo!?iS-XGu$FZ;kuPFbU6=VD_I`c;7Oj5Rxq`qcobAqZWk} zRftGfdchYPNNv*^-qQ;aD5mgwQw8uacvs!Ho16i8lUn_1m zJcgJx)=cDDVAg7S19IFe2O9OC1ATkAQm`7mb&1(xgDKJYiQMC)OAu}5gUC=0nMuTRWS~SCULs+$lGCCgP4b0m2Yt*QI8bbgR z>_iAO$I#T}Q|R5GIf3fVwDyV`W4vQCS{N4&8czKK(^be5Y#0E`1Mgh8@S|km;ANvX z=Fdr$XiDAh5rc@TwiibGRc|$m##hRjkJ7ZWi-!H}753@SPS4pI=7oJ(Ei^>BegMg- zk!r;F0YtZrh%i|N04@wn;6}>DxUKocRgYZvNPM}j_V&uxhR&T)*9M1vrQTw07B)*q@sH#VDkYe}y#`Si9l{MIr`@SRDzhR@Q`k?OBgT^M*{9^>@A?voz(pUAAcdMNXwNdju;&O}$eQ+t#C`xQJN@i68Ii zR8+i7v&9YM4_n-DF;d(ZU_7ZEW}=D?Mw4S}Fam<`;o?%It;s6UICd4*5TVzAfz+yM zRb{mbLMR{%_SoQh3q%+d`w~U+t(28ii>4_{Lc>sKeI*B-*RTNPZ(#-FQGkHtC2(hl z&9}B-G*rvwnZAT%t@m_Q&fvOCBkQiZ7S;7=^6J4zJvih%j@0Y5o{t9<>34wgS_UNoXy7+j%nL!NnH2Ar)l(}zt ztQ-cd>rvRmgnTx3%C|U`XjUI8SSN5blpP)ubH@1i`LFFbzIX5Z*Y#Y#JOBM2@P95wZGG|e z{GJPcPH#AZ?CM&c51G<-dV7|=hj*TKe+@PiJ9{2d_@kks7(shIDk;__IKzs>-5vUS zNa5_V7q$%)JG)5hZpvtAln~F_+wl;w=UAhWvrPzK`HEy`#~lH&{_0U;{1)7)PK9~9$*c3b`-v6`JTT`snSkP&+~VNpa}dbOZnDtceLHM zHAOY;kylbUmOa{gz*5zpq8M79=T#KtY@AJsq2DY26bV?=vDGw&mgBi<+JAJ2ckl?L zF_+)++aJOg^1thO8{hwI*Xi>A_kb-QFPcvsNeBb@L9O|cvZN9N2Ah?%z&rIimRsIq zX9yh!ykFw2riC*+2MnzYT9FC#8pi!&T(d+8XRx(`7P{Q87}H{WQ57jtS1D4LDN@%d zQWYxP%hpG^!02<%vci}tdffRvThCt4K&Cc}=BQXKEx5yzFSy`FeQe+cVQ88fy~9p3 znp+~D;`9V;-sl|@+FeK`#f=t9yr{Ihv?QM>dqV46tYfIkxrRXRfEDP%3`22)=CeoY zJJ=+M7<#<116zN<5weKcSnu~?!U2zAbF9xZhSQJfl*MEHe8E5mus4mt7kAMzU+PK* zs$B}`k)q-bZF^CU=)Aw;@t#rKC*kq2D2WB^BV{C^550F9yn217hzb2+{bdxR*N z^@!6r-2SsTy!~0?cewlYY!(F(tn=b49A3Z#ghEj#Ig=S4en`nOL!}Vwgu&K1k%!H& zV?G!Pe{^2`tH2eXg2}$Ej4#xGd+6M+|2DMpUH$(a@brhlG)e}r3K!bbvzG_wuilb= z9}j-Uw!QD$`>@qnjhCkk3G$bOW!S$f}{7U9Q zFmPaVXxSvlLt5YCVFpz3fp0n1ko>fE7&x2B+kppYj!mYV3psGCJtRsCx!?)B0w#_o zcn&FoSk|CE0`B{kH@IQRjhZHY`n;i%X#$UG`~4uz`!F7k%TFsMGq3r$2Tcws|EC2R z-uoX3zn#N~-2WD Date: Tue, 14 May 2024 22:15:57 +0200 Subject: [PATCH 02/17] Moved some files and added __init__.py to mark the directories and sub-dirs as part of the package. Sanity for the weak scaling is already added. Next step is to extract the performance timing and log it. --- .../testsuite/tests/apps/espresso/__init__.py | 0 .../tests/apps/espresso/benchmarks.csv | 7 +++ .../testsuite/tests/apps/espresso/espresso.py | 19 +++++++- .../tests/apps/espresso/src/__init__.py | 0 .../tests/apps/espresso/{ => src}/job.sh | 0 .../tests/apps/espresso/{ => src}/madelung.py | 44 +++++++++++++----- .../tests/apps/espresso/{ => src}/plot.py | 0 .../{ => src}/scripts_Espresso.tar.gz | Bin 8 files changed, 56 insertions(+), 14 deletions(-) create mode 100644 eessi/testsuite/tests/apps/espresso/__init__.py create mode 100644 eessi/testsuite/tests/apps/espresso/src/__init__.py rename eessi/testsuite/tests/apps/espresso/{ => src}/job.sh (100%) rename eessi/testsuite/tests/apps/espresso/{ => src}/madelung.py (76%) rename eessi/testsuite/tests/apps/espresso/{ => src}/plot.py (100%) rename eessi/testsuite/tests/apps/espresso/{ => src}/scripts_Espresso.tar.gz (100%) diff --git a/eessi/testsuite/tests/apps/espresso/__init__.py b/eessi/testsuite/tests/apps/espresso/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/eessi/testsuite/tests/apps/espresso/benchmarks.csv b/eessi/testsuite/tests/apps/espresso/benchmarks.csv index 95724751..9091534b 100644 --- a/eessi/testsuite/tests/apps/espresso/benchmarks.csv +++ b/eessi/testsuite/tests/apps/espresso/benchmarks.csv @@ -25,3 +25,10 @@ "weak scaling",4,2,2,1,6912,2.627e-01,8.391e-03 "weak scaling",4,2,2,1,6912,2.617e-01,8.155e-03 "weak scaling",2,2,1,1,3456,2.028e-01,6.255e-03 +"weak scaling",2,2,1,1,3456,3.247e-01,1.026e-02 +"weak scaling",2,2,1,1,3456,3.249e-01,1.029e-02 +"weak scaling",2,2,1,1,3456,3.257e-01,1.028e-02 +"weak scaling",2,2,1,1,3456,3.375e-01,1.095e-02 +"weak scaling",2,2,1,1,3456,3.367e-01,1.086e-02 +"weak scaling",2,2,1,1,3456,3.241e-01,1.048e-02 +"weak scaling",2,2,1,1,3456,3.243e-01,1.038e-02 diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index 494abf67..37f81344 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -9,6 +9,8 @@ """ import reframe as rfm +import reframe.utility.sanity as sn + from reframe.core.builtins import parameter, run_after # added only to make the linter happy from reframe.utility import reframe @@ -36,7 +38,7 @@ class EESSI_ESPRESSO_P3M_IONIC_CRYSTALS(rfm.RunOnlyRegressionTest): default_weak_scaling_system_size = 6 benchmark_info = parameter([ - ('mpi.ionic_crystals.p3m'), + ('mpi.ionic_crystals.p3m', 'p3m'), ], fmt=lambda x: x[0], loggable=True) @@ -78,7 +80,7 @@ def set_executable_opts(self): if not self.has_custom_executable_opts: # By default we run weak scaling since the strong scaling sizes need to change based on max node size and a # corresponding min node size has to be chozen. - self.executable_opts += ['--size', self.default_weak_scaling_system_size, '--weak-scaling'] + self.executable_opts += ['--size', str(self.default_weak_scaling_system_size), '--weak-scaling'] utils.log(f'executable_opts set to {self.executable_opts}') @run_after('setup') @@ -87,6 +89,19 @@ def set_num_tasks_per_node(self): for 1 node and 2 node options where the request is for full nodes.""" hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT[CPU]) + @deferrable + def assert_completion(self): + '''Check completion''' + cao = sn.extractsingle(r'^resulting parameters:.*cao: (?P\S+),', self.stdout, 'cao', int) + return (sn.assert_found(r'^Algorithm executed.', self.stdout) and cao) + + @deferrable + def assert_convergence(self): + '''Check convergence''' + check_string = sn.assert_found(r'Final convergence met with tolerances:', self.stdout) + energy = sn.extractsingle(r'^\s+energy:\s+(?P\S+)', self.stdout, 'energy', float) + return (check_string and (energy != 0.0)) + @sanity_function def assert_sanity(self): '''Check all sanity criteria''' diff --git a/eessi/testsuite/tests/apps/espresso/src/__init__.py b/eessi/testsuite/tests/apps/espresso/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/eessi/testsuite/tests/apps/espresso/job.sh b/eessi/testsuite/tests/apps/espresso/src/job.sh similarity index 100% rename from eessi/testsuite/tests/apps/espresso/job.sh rename to eessi/testsuite/tests/apps/espresso/src/job.sh diff --git a/eessi/testsuite/tests/apps/espresso/madelung.py b/eessi/testsuite/tests/apps/espresso/src/madelung.py similarity index 76% rename from eessi/testsuite/tests/apps/espresso/madelung.py rename to eessi/testsuite/tests/apps/espresso/src/madelung.py index 4bfb1df1..628d8eab 100644 --- a/eessi/testsuite/tests/apps/espresso/madelung.py +++ b/eessi/testsuite/tests/apps/espresso/src/madelung.py @@ -93,27 +93,46 @@ def get_normalized_values_per_ion(system): else: system.electrostatics.solver = solver + +print("Algorithm executed. \n") + +atol_energy = atol_pressure = 1e-12 +atol_forces = 1e-5 +atol_abs_forces = 2e-6 + +rtol_energy = 5e-6 +rtol_pressure = 2e-5 +rtol_forces = 0. +rtol_abs_forces = 0. # run checks forces = np.copy(system.part.all().f) energy, p_scalar, p_tensor = get_normalized_values_per_ion(system) ref_energy, ref_pressure = get_reference_values_per_ion(base_vector) -np.testing.assert_allclose(energy, ref_energy, atol=1e-12, rtol=5e-6) +np.testing.assert_allclose(energy, ref_energy, atol=atol_energy, rtol=rtol_energy) np.testing.assert_allclose(p_scalar, np.trace(ref_pressure) / 3., - atol=1e-12, rtol=2e-5) -np.testing.assert_allclose(p_tensor, ref_pressure, atol=1e-12, rtol=2e-5) -np.testing.assert_allclose(forces, 0., atol=1e-5, rtol=0.) -np.testing.assert_allclose(np.median(np.abs(forces)), 0., atol=2e-6, rtol=0.) + atol=atol_pressure, rtol=rtol_pressure) +np.testing.assert_allclose(p_tensor, ref_pressure, atol=atol_pressure, rtol=rtol_pressure) +np.testing.assert_allclose(forces, 0., atol=atol_forces, rtol=rtol_forces) +np.testing.assert_allclose(np.median(np.abs(forces)), 0., atol=atol_abs_forces, rtol=rtol_abs_forces) -print("Executing sanity ...\n") -print (np.all([np.allclose(energy, ref_energy, atol=1e-12, rtol=5e-6), +print("Executing sanity checks...\n") +if (np.all([np.allclose(energy, ref_energy, atol=atol_energy, rtol=rtol_energy), np.allclose(p_scalar, np.trace(ref_pressure) / 3., - atol=1e-12, rtol=2e-5), - np.allclose(p_tensor, ref_pressure, atol=1e-12, rtol=2e-5), - np.allclose(forces, 0., atol=1e-5, rtol=0.), - np.allclose(np.median(np.abs(forces)), 0., atol=2e-6, rtol=0.)])) + atol=atol_pressure, rtol=rtol_pressure), + np.allclose(p_tensor, ref_pressure, atol=atol_pressure, rtol=rtol_pressure), + np.allclose(forces, 0., atol=atol_forces, rtol=rtol_forces), + np.allclose(np.median(np.abs(forces)), 0., atol=atol_abs_forces, rtol=rtol_abs_forces)])): + print("Final convergence met with tolerances: \n\ + energy: ", atol_energy, "\n\ + p_scalar: ", atol_pressure, "\n\ + p_tensor: ", atol_pressure, "\n\ + forces: ", atol_forces, "\n\ + abs_forces: ", atol_abs_forces, "\n") +else: + print("At least one parameter did not meet the tolerance, see the log above.\n") -print("Sanity checking ...\n") +print("Sampling runtime...\n") # sample runtime n_steps = 10 timings = [] @@ -126,6 +145,7 @@ def get_normalized_values_per_ion(system): # write results to file header = '"mode","cores","mpi.x","mpi.y","mpi.z","particles","mean","std"\n' report = f'"{"weak scaling" if args.weak_scaling else "strong scaling"}",{n_cores},{node_grid[0]},{node_grid[1]},{node_grid[2]},{len(system.part)},{np.mean(timings):.3e},{np.std(timings, ddof=1):.3e}\n' +print(report) if pathlib.Path(args.output).is_file(): header = "" with open(args.output, "a") as f: diff --git a/eessi/testsuite/tests/apps/espresso/plot.py b/eessi/testsuite/tests/apps/espresso/src/plot.py similarity index 100% rename from eessi/testsuite/tests/apps/espresso/plot.py rename to eessi/testsuite/tests/apps/espresso/src/plot.py diff --git a/eessi/testsuite/tests/apps/espresso/scripts_Espresso.tar.gz b/eessi/testsuite/tests/apps/espresso/src/scripts_Espresso.tar.gz similarity index 100% rename from eessi/testsuite/tests/apps/espresso/scripts_Espresso.tar.gz rename to eessi/testsuite/tests/apps/espresso/src/scripts_Espresso.tar.gz From 6e429af96f2297f5f881ca178360350a99e4a9f8 Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Wed, 29 May 2024 19:05:35 +0200 Subject: [PATCH 03/17] 1. Scaled memory with the number of tasks per node. 2. Increased time limit to account for tuning that takes longer on large number of cores. --- .../testsuite/tests/apps/espresso/espresso.py | 15 ++++++------ .../tests/apps/espresso/src/madelung.py | 23 +++++++------------ 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index 37f81344..98b0017e 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -26,7 +26,7 @@ class EESSI_ESPRESSO_P3M_IONIC_CRYSTALS(rfm.RunOnlyRegressionTest): scale = parameter(SCALES.keys()) valid_prog_environs = ['default'] valid_systems = ['*'] - time_limit = '30m' + time_limit = '180m' # Need to check if QuantumESPRESSO also gets listed. module_name = parameter(find_modules('ESPResSo')) # device type is parameterized for an impending CUDA ESPResSo module. @@ -66,12 +66,6 @@ def set_tag_ci(self): if (self.benchmark_info[0] == 'mpi.ionic_crystals.p3m'): self.tags.add('ionic_crystals_p3m') - - @run_after('init') - def set_mem(self): - """ Setting an extra job option of memory. """ - self.extra_resources = {'memory': {'size': '50GB'}} - @run_after('init') def set_executable_opts(self): """Set executable opts based on device_type parameter""" @@ -89,6 +83,13 @@ def set_num_tasks_per_node(self): for 1 node and 2 node options where the request is for full nodes.""" hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT[CPU]) + @run_after('setup') + def set_mem(self): + """ Setting an extra job option of memory. Here the assumption made is that HPC systems will contain at + least 1 GB per core of memory.""" + mem_required_per_node = str(self.num_tasks_per_node * 1) + 'GB' + self.extra_resources = {'memory': {'size': mem_required_per_node}} + @deferrable def assert_completion(self): '''Check completion''' diff --git a/eessi/testsuite/tests/apps/espresso/src/madelung.py b/eessi/testsuite/tests/apps/espresso/src/madelung.py index 628d8eab..2cf6fea0 100644 --- a/eessi/testsuite/tests/apps/espresso/src/madelung.py +++ b/eessi/testsuite/tests/apps/espresso/src/madelung.py @@ -105,6 +105,7 @@ def get_normalized_values_per_ion(system): rtol_forces = 0. rtol_abs_forces = 0. # run checks +print("Executing sanity checks...\n") forces = np.copy(system.part.all().f) energy, p_scalar, p_tensor = get_normalized_values_per_ion(system) ref_energy, ref_pressure = get_reference_values_per_ion(base_vector) @@ -115,22 +116,12 @@ def get_normalized_values_per_ion(system): np.testing.assert_allclose(forces, 0., atol=atol_forces, rtol=rtol_forces) np.testing.assert_allclose(np.median(np.abs(forces)), 0., atol=atol_abs_forces, rtol=rtol_abs_forces) - -print("Executing sanity checks...\n") -if (np.all([np.allclose(energy, ref_energy, atol=atol_energy, rtol=rtol_energy), - np.allclose(p_scalar, np.trace(ref_pressure) / 3., - atol=atol_pressure, rtol=rtol_pressure), - np.allclose(p_tensor, ref_pressure, atol=atol_pressure, rtol=rtol_pressure), - np.allclose(forces, 0., atol=atol_forces, rtol=rtol_forces), - np.allclose(np.median(np.abs(forces)), 0., atol=atol_abs_forces, rtol=rtol_abs_forces)])): - print("Final convergence met with tolerances: \n\ +print("Final convergence met with tolerances: \n\ energy: ", atol_energy, "\n\ p_scalar: ", atol_pressure, "\n\ p_tensor: ", atol_pressure, "\n\ forces: ", atol_forces, "\n\ abs_forces: ", atol_abs_forces, "\n") -else: - print("At least one parameter did not meet the tolerance, see the log above.\n") print("Sampling runtime...\n") # sample runtime @@ -142,11 +133,13 @@ def get_normalized_values_per_ion(system): tock = time.time() timings.append((tock - tick) / n_steps) +print("10 steps executed...\n") # write results to file header = '"mode","cores","mpi.x","mpi.y","mpi.z","particles","mean","std"\n' report = f'"{"weak scaling" if args.weak_scaling else "strong scaling"}",{n_cores},{node_grid[0]},{node_grid[1]},{node_grid[2]},{len(system.part)},{np.mean(timings):.3e},{np.std(timings, ddof=1):.3e}\n' print(report) -if pathlib.Path(args.output).is_file(): - header = "" -with open(args.output, "a") as f: - f.write(header + report) + +# if pathlib.Path(args.output).is_file(): +# header = "" +# with open(args.output, "a") as f: +# f.write(header + report) From e4321a8cc4088634983a853539fd91d978f9f44c Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Wed, 29 May 2024 19:08:43 +0200 Subject: [PATCH 04/17] 1. Increased time limit again to 5 hours for 16 node tests. This is a temporary fix until the mesh size can be fixed based on extrapolation. --- eessi/testsuite/tests/apps/espresso/espresso.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index 98b0017e..d39c4aaa 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -26,7 +26,7 @@ class EESSI_ESPRESSO_P3M_IONIC_CRYSTALS(rfm.RunOnlyRegressionTest): scale = parameter(SCALES.keys()) valid_prog_environs = ['default'] valid_systems = ['*'] - time_limit = '180m' + time_limit = '300m' # Need to check if QuantumESPRESSO also gets listed. module_name = parameter(find_modules('ESPResSo')) # device type is parameterized for an impending CUDA ESPResSo module. From c06e32f2d0a692144954b7338b3183253c484b23 Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Wed, 29 May 2024 19:18:31 +0200 Subject: [PATCH 05/17] Deleting the tar file. --- .../apps/espresso/src/scripts_Espresso.tar.gz | Bin 3089 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 eessi/testsuite/tests/apps/espresso/src/scripts_Espresso.tar.gz diff --git a/eessi/testsuite/tests/apps/espresso/src/scripts_Espresso.tar.gz b/eessi/testsuite/tests/apps/espresso/src/scripts_Espresso.tar.gz deleted file mode 100644 index 24e2621fec80e082c1830617209e16fa63c8df4e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3089 zcmV+s4DRzEiwFP!000001MM1XZ`(MspYRr0(9qv~LhNs*Q44Y*X-3GCt9eGlNARK` z{T}LkZ+NObC9il|WYK&fiSf!LuI+ez*LHn!zF_3_*~uvrXPl%NzhZ&Zkl=g~2{Maf zM&O##OcFk8FfE1B&>?2V7!iCXZ)wK%Nx^dx(1c_xjD*ahX)b}0Bs2*JJR?gUMzaD~ zz)+rqEF%*1kStpY6oU_Mjz5w&EMXaq$w@wqqkz1N0+t9yC@6#@V!=W(MK(xqh#H(# z8jwQ{e5s6h(jzQ__ZhiinSf{F)gmf>J;F2KVNi)`XN0FnW`eE-iK$#sZq9l&xsXJO zV!Gfd%wz$~U~acj9Fr*{xnQ$A?g2I6k{^%G-+uUbP7aPgksl9EPY;gIKm8qW<$}Xo zcEd_e(K3xAU<~ugXd(+x8yLKQefsJxP#nBGdUtgG33MliN9V_{&(6rh_82K0~Eb*4N6#tWIGQ7p>&egant&@2wgg5EH2X~3cz z(2J4)g6hlu0v`nkTu%v-uz{ zd5FP3Q5y|x!XXryNHZn`Uxu3_R^_cNZ&(b`opHzpG73bEK(jf-H|SYCr&KQDXj(tY zXxY3<@+Hg|;x9=xO%Y{e+%T{XvU$E_i9AtnjF5>Si&Qj?^_MIO7E794lN0Yf5z#V_ zRd@XSQLGj?3JXn+gjcx`DOkvXCbj%4+5HmrJ zaVh9q87k002Z##EiQzs!Gym(B!A|=9TSl+@BA{^u;h`d<;(z=oKfnX?$*5c()8d|H z0a!--@UU6uX}&UKvb#AIvE3HrK^&+f5+&sXiRQ-cerz}=R#bBSTGcaK`^Ni;y ztk>48mbrw`jEf1o3*uZrUYjT^qr%>pdOKq8Pj(`+{DB#liv9@1qz?B+KVrPW24yCA z@=QR*x!s2dPGBT~0>a^a3So^g-OOP#Q9xEp=D%C7Gey2lvx8+w7x9K#l1o_7s2u8q zLAf%kx}bIfjHrP-$ehVZ#%3%7zHD+s21ON% z9E#u;B;eVWH$Vw8qX9Eo!?iS-XGu$FZ;kuPFbU6=VD_I`c;7Oj5Rxq`qcobAqZWk} zRftGfdchYPNNv*^-qQ;aD5mgwQw8uacvs!Ho16i8lUn_1m zJcgJx)=cDDVAg7S19IFe2O9OC1ATkAQm`7mb&1(xgDKJYiQMC)OAu}5gUC=0nMuTRWS~SCULs+$lGCCgP4b0m2Yt*QI8bbgR z>_iAO$I#T}Q|R5GIf3fVwDyV`W4vQCS{N4&8czKK(^be5Y#0E`1Mgh8@S|km;ANvX z=Fdr$XiDAh5rc@TwiibGRc|$m##hRjkJ7ZWi-!H}753@SPS4pI=7oJ(Ei^>BegMg- zk!r;F0YtZrh%i|N04@wn;6}>DxUKocRgYZvNPM}j_V&uxhR&T)*9M1vrQTw07B)*q@sH#VDkYe}y#`Si9l{MIr`@SRDzhR@Q`k?OBgT^M*{9^>@A?voz(pUAAcdMNXwNdju;&O}$eQ+t#C`xQJN@i68Ii zR8+i7v&9YM4_n-DF;d(ZU_7ZEW}=D?Mw4S}Fam<`;o?%It;s6UICd4*5TVzAfz+yM zRb{mbLMR{%_SoQh3q%+d`w~U+t(28ii>4_{Lc>sKeI*B-*RTNPZ(#-FQGkHtC2(hl z&9}B-G*rvwnZAT%t@m_Q&fvOCBkQiZ7S;7=^6J4zJvih%j@0Y5o{t9<>34wgS_UNoXy7+j%nL!NnH2Ar)l(}zt ztQ-cd>rvRmgnTx3%C|U`XjUI8SSN5blpP)ubH@1i`LFFbzIX5Z*Y#Y#JOBM2@P95wZGG|e z{GJPcPH#AZ?CM&c51G<-dV7|=hj*TKe+@PiJ9{2d_@kks7(shIDk;__IKzs>-5vUS zNa5_V7q$%)JG)5hZpvtAln~F_+wl;w=UAhWvrPzK`HEy`#~lH&{_0U;{1)7)PK9~9$*c3b`-v6`JTT`snSkP&+~VNpa}dbOZnDtceLHM zHAOY;kylbUmOa{gz*5zpq8M79=T#KtY@AJsq2DY26bV?=vDGw&mgBi<+JAJ2ckl?L zF_+)++aJOg^1thO8{hwI*Xi>A_kb-QFPcvsNeBb@L9O|cvZN9N2Ah?%z&rIimRsIq zX9yh!ykFw2riC*+2MnzYT9FC#8pi!&T(d+8XRx(`7P{Q87}H{WQ57jtS1D4LDN@%d zQWYxP%hpG^!02<%vci}tdffRvThCt4K&Cc}=BQXKEx5yzFSy`FeQe+cVQ88fy~9p3 znp+~D;`9V;-sl|@+FeK`#f=t9yr{Ihv?QM>dqV46tYfIkxrRXRfEDP%3`22)=CeoY zJJ=+M7<#<116zN<5weKcSnu~?!U2zAbF9xZhSQJfl*MEHe8E5mus4mt7kAMzU+PK* zs$B}`k)q-bZF^CU=)Aw;@t#rKC*kq2D2WB^BV{C^550F9yn217hzb2+{bdxR*N z^@!6r-2SsTy!~0?cewlYY!(F(tn=b49A3Z#ghEj#Ig=S4en`nOL!}Vwgu&K1k%!H& zV?G!Pe{^2`tH2eXg2}$Ej4#xGd+6M+|2DMpUH$(a@brhlG)e}r3K!bbvzG_wuilb= z9}j-Uw!QD$`>@qnjhCkk3G$bOW!S$f}{7U9Q zFmPaVXxSvlLt5YCVFpz3fp0n1ko>fE7&x2B+kppYj!mYV3psGCJtRsCx!?)B0w#_o zcn&FoSk|CE0`B{kH@IQRjhZHY`n;i%X#$UG`~4uz`!F7k%TFsMGq3r$2Tcws|EC2R z-uoX3zn#N~-2WD Date: Tue, 4 Jun 2024 12:50:55 +0200 Subject: [PATCH 06/17] Increasing the pressure tolerance for higher node counts as instructed by Jean-Noel. --- eessi/testsuite/tests/apps/espresso/src/madelung.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eessi/testsuite/tests/apps/espresso/src/madelung.py b/eessi/testsuite/tests/apps/espresso/src/madelung.py index 2cf6fea0..0c848dfc 100644 --- a/eessi/testsuite/tests/apps/espresso/src/madelung.py +++ b/eessi/testsuite/tests/apps/espresso/src/madelung.py @@ -101,7 +101,8 @@ def get_normalized_values_per_ion(system): atol_abs_forces = 2e-6 rtol_energy = 5e-6 -rtol_pressure = 2e-5 +#rtol_pressure = 2e-5 +rtol_pressure = 1e-4 rtol_forces = 0. rtol_abs_forces = 0. # run checks From eea35380407b6b8dfce398831881ab79b7387483 Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Tue, 4 Jun 2024 14:29:30 +0200 Subject: [PATCH 07/17] Introduced a performance function for weak scaling which is the mean time per step. --- eessi/testsuite/tests/apps/espresso/espresso.py | 4 ++++ eessi/testsuite/tests/apps/espresso/src/madelung.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index d39c4aaa..aee07353 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -111,3 +111,7 @@ def assert_sanity(self): self.assert_convergence(), ]) + @performance_function('s/step') + def perf(self): + return sn.extractsingle(r'^Performance:\s+(?P\S+)', self.stdout, 'perf', float) + diff --git a/eessi/testsuite/tests/apps/espresso/src/madelung.py b/eessi/testsuite/tests/apps/espresso/src/madelung.py index 0c848dfc..7d55bd0d 100644 --- a/eessi/testsuite/tests/apps/espresso/src/madelung.py +++ b/eessi/testsuite/tests/apps/espresso/src/madelung.py @@ -138,8 +138,11 @@ def get_normalized_values_per_ion(system): # write results to file header = '"mode","cores","mpi.x","mpi.y","mpi.z","particles","mean","std"\n' report = f'"{"weak scaling" if args.weak_scaling else "strong scaling"}",{n_cores},{node_grid[0]},{node_grid[1]},{node_grid[2]},{len(system.part)},{np.mean(timings):.3e},{np.std(timings, ddof=1):.3e}\n' +print(header) print(report) +print(f"Performance: {np.mean(timings):.3e} \n") + # if pathlib.Path(args.output).is_file(): # header = "" # with open(args.output, "a") as f: From a2c660faa87d0689360d815b2c9a92eb8ed10d46 Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Tue, 4 Jun 2024 14:57:32 +0200 Subject: [PATCH 08/17] Trying to make the linter happy. --- eessi/testsuite/tests/apps/espresso/espresso.py | 15 ++++++--------- .../testsuite/tests/apps/espresso/src/madelung.py | 14 +++++++------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index aee07353..9a3a8ecb 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -1,11 +1,10 @@ """ -This module tests Espresso in available modules containing substring 'ESPResSo' which is different from Quantum Espresso. -Tests included: +This module tests Espresso in available modules containing substring 'ESPResSo' which is different from Quantum +Espresso. Tests included: - P3M benchmark - Ionic crystals - Weak scaling - - Strong scaling -Weak and strong scaling are options that are needed to be provided tothe script and the system is either scaled based on -number of cores or kept constant. + - Strong scaling Weak and strong scaling are options that are needed to be provided to the script and the system is + either scaled based on number of cores or kept constant. """ import reframe as rfm @@ -14,15 +13,14 @@ from reframe.core.builtins import parameter, run_after # added only to make the linter happy from reframe.utility import reframe -from hpctestlib.microbenchmarks.mpi.osu import osu_benchmark - from eessi.testsuite import hooks, utils from eessi.testsuite.constants import * from eessi.testsuite.utils import find_modules, log @rfm.simple_test class EESSI_ESPRESSO_P3M_IONIC_CRYSTALS(rfm.RunOnlyRegressionTest): - '''''' + + scale = parameter(SCALES.keys()) valid_prog_environs = ['default'] valid_systems = ['*'] @@ -45,7 +43,6 @@ class EESSI_ESPRESSO_P3M_IONIC_CRYSTALS(rfm.RunOnlyRegressionTest): @run_after('init') def run_after_init(self): """hooks to run after init phase""" - # Filter on which scales are supported by the partitions defined in the ReFrame configuration hooks.filter_supported_scales(self) diff --git a/eessi/testsuite/tests/apps/espresso/src/madelung.py b/eessi/testsuite/tests/apps/espresso/src/madelung.py index 7d55bd0d..ce41d61a 100644 --- a/eessi/testsuite/tests/apps/espresso/src/madelung.py +++ b/eessi/testsuite/tests/apps/espresso/src/madelung.py @@ -21,7 +21,6 @@ import espressomd.version import espressomd.electrostatics import argparse -import pathlib import time import numpy as np @@ -45,6 +44,7 @@ help="Strong scaling benchmark (Amdahl's law: constant total work)") args = parser.parse_args() + def get_reference_values_per_ion(base_vector): madelung_constant = -1.74756459463318219 base_tensor = base_vector * np.eye(3) @@ -52,6 +52,7 @@ def get_reference_values_per_ion(base_vector): ref_pressure = madelung_constant * base_tensor / np.trace(base_tensor) return ref_energy, ref_pressure + def get_normalized_values_per_ion(system): energy = system.analysis.energy()["coulomb"] p_scalar = system.analysis.pressure()["coulomb"] @@ -60,6 +61,7 @@ def get_normalized_values_per_ion(system): V = system.volume() return 2. * energy / N, 2. * p_scalar * V / N, 2. * p_tensor * V / N + # initialize system system = espressomd.System(box_l=[100., 100., 100.]) system.time_step = 0.01 @@ -96,12 +98,15 @@ def get_normalized_values_per_ion(system): print("Algorithm executed. \n") +# Old rtol_pressure = 2e-5 +# This resulted in failures especially at high number of nodes therefore increased +# to a larger value. + atol_energy = atol_pressure = 1e-12 atol_forces = 1e-5 atol_abs_forces = 2e-6 rtol_energy = 5e-6 -#rtol_pressure = 2e-5 rtol_pressure = 1e-4 rtol_forces = 0. rtol_abs_forces = 0. @@ -142,8 +147,3 @@ def get_normalized_values_per_ion(system): print(report) print(f"Performance: {np.mean(timings):.3e} \n") - -# if pathlib.Path(args.output).is_file(): -# header = "" -# with open(args.output, "a") as f: -# f.write(header + report) From cbe7fe9865bdeaa142a7e4cc37d38d4dacf3ab45 Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Tue, 4 Jun 2024 15:00:39 +0200 Subject: [PATCH 09/17] Linter changes. --- eessi/testsuite/tests/apps/espresso/espresso.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index 9a3a8ecb..9fbccf58 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -17,10 +17,10 @@ from eessi.testsuite.constants import * from eessi.testsuite.utils import find_modules, log + @rfm.simple_test class EESSI_ESPRESSO_P3M_IONIC_CRYSTALS(rfm.RunOnlyRegressionTest): - scale = parameter(SCALES.keys()) valid_prog_environs = ['default'] valid_systems = ['*'] @@ -39,7 +39,6 @@ class EESSI_ESPRESSO_P3M_IONIC_CRYSTALS(rfm.RunOnlyRegressionTest): ('mpi.ionic_crystals.p3m', 'p3m'), ], fmt=lambda x: x[0], loggable=True) - @run_after('init') def run_after_init(self): """hooks to run after init phase""" @@ -111,4 +110,3 @@ def assert_sanity(self): @performance_function('s/step') def perf(self): return sn.extractsingle(r'^Performance:\s+(?P\S+)', self.stdout, 'perf', float) - From f434c48a68512d9a8f3fcc5ff786d21f76957c83 Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Wed, 5 Jun 2024 00:55:03 +0200 Subject: [PATCH 10/17] Removed plot.py as it served no purpose and improved formatting in madelung.py for the linter. --- .../tests/apps/espresso/src/madelung.py | 4 +- .../testsuite/tests/apps/espresso/src/plot.py | 39 ------------------- 2 files changed, 3 insertions(+), 40 deletions(-) delete mode 100644 eessi/testsuite/tests/apps/espresso/src/plot.py diff --git a/eessi/testsuite/tests/apps/espresso/src/madelung.py b/eessi/testsuite/tests/apps/espresso/src/madelung.py index ce41d61a..1c019e29 100644 --- a/eessi/testsuite/tests/apps/espresso/src/madelung.py +++ b/eessi/testsuite/tests/apps/espresso/src/madelung.py @@ -142,7 +142,9 @@ def get_normalized_values_per_ion(system): print("10 steps executed...\n") # write results to file header = '"mode","cores","mpi.x","mpi.y","mpi.z","particles","mean","std"\n' -report = f'"{"weak scaling" if args.weak_scaling else "strong scaling"}",{n_cores},{node_grid[0]},{node_grid[1]},{node_grid[2]},{len(system.part)},{np.mean(timings):.3e},{np.std(timings, ddof=1):.3e}\n' +report = f'''"{"weak scaling" if args.weak_scaling else "strong scaling"}",\ +{n_cores},{node_grid[0]},{node_grid[1]},{node_grid[2]},{len(system.part)},\ +{np.mean(timings):.3e},{np.std(timings,ddof=1):.3e}\n''' print(header) print(report) diff --git a/eessi/testsuite/tests/apps/espresso/src/plot.py b/eessi/testsuite/tests/apps/espresso/src/plot.py deleted file mode 100644 index c9a023c4..00000000 --- a/eessi/testsuite/tests/apps/espresso/src/plot.py +++ /dev/null @@ -1,39 +0,0 @@ -import numpy as np -import pandas as pd -import matplotlib.pyplot as plt -import matplotlib.ticker as mtick - -df = pd.read_csv("benchmarks.csv") -df = df.sort_values(by=["mode", "cores", "mpi.x", "mpi.y", "mpi.z"]) - -group = df.query(f"mode == 'strong scaling'") - -fig = plt.figure(figsize=(12, 6)) -ax = fig.subplots().axes -xdata = group["cores"].to_numpy() -ydata = group["mean"].to_numpy() -ax.axline((xdata[0], xdata[0]), slope=1, linestyle="--", color="grey", label="Theoretical maximum") -ax.plot(xdata, ydata[0] / ydata, "o-", label="Measurements") -ax.set_title("Strong scaling") -ax.set_xlabel("Number of cores") -ax.set_ylabel("Speed-up") -ax.set_xscale("log", base=2) -ax.set_yscale("log", base=10) -ax.legend() -plt.show() - -group = df.query(f"mode == 'weak scaling'") - -fig = plt.figure(figsize=(12, 6)) -ax = fig.subplots().axes -xdata = group["cores"].to_numpy() -ydata = group["mean"].to_numpy() -ax.axline((-np.inf, 1), slope=0, linestyle="--", color="grey", label="Theoretical maximum") -ax.plot(xdata, ydata[0] / ydata, "o-", label="Measurements") -ax.set_title("Weak scaling") -ax.set_xlabel("Number of cores") -ax.set_ylabel("Efficiency") -ax.set_xscale("log", base=2) -ax.yaxis.set_major_formatter(mtick.PercentFormatter(1)) -ax.legend() -plt.show() From bd04f8340bacb434cf2db908d17091e1c74f93d8 Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Wed, 5 Jun 2024 01:02:14 +0200 Subject: [PATCH 11/17] Making linter happy again. --- eessi/testsuite/tests/apps/espresso/src/madelung.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/eessi/testsuite/tests/apps/espresso/src/madelung.py b/eessi/testsuite/tests/apps/espresso/src/madelung.py index 1c019e29..37d0b44a 100644 --- a/eessi/testsuite/tests/apps/espresso/src/madelung.py +++ b/eessi/testsuite/tests/apps/espresso/src/madelung.py @@ -79,11 +79,11 @@ def get_normalized_values_per_ion(system): if args.weak_scaling: lattice_size = np.multiply(lattice_size, node_grid) system.box_l = np.multiply(lattice_size, base_vector) -for j in range(lattice_size[0]): - for k in range(lattice_size[1]): - for l in range(lattice_size[2]): - _ = system.part.add(pos=np.multiply([j, k, l], base_vector), - q=(-1.)**(j + k + l), fix=3 * [True]) +for var_j in range(lattice_size[0]): + for var_k in range(lattice_size[1]): + for var_l in range(lattice_size[2]): + _ = system.part.add(pos=np.multiply([var_j, var_k, var_l], base_vector), + q=(-1.)**(var_j + var_k + var_l), fix=3 * [True]) # setup P3M algorithm algorithm = espressomd.electrostatics.P3M From ef21ed5259c787b3993732df78e9d4c81b042f56 Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Thu, 6 Jun 2024 23:59:19 +0200 Subject: [PATCH 12/17] Using mem_required_per_node from the hooks. Tested on Snellius and it works properly. --- eessi/testsuite/tests/apps/espresso/espresso.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index 9fbccf58..2fbb5ce3 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -83,8 +83,9 @@ def set_num_tasks_per_node(self): def set_mem(self): """ Setting an extra job option of memory. Here the assumption made is that HPC systems will contain at least 1 GB per core of memory.""" - mem_required_per_node = str(self.num_tasks_per_node * 1) + 'GB' - self.extra_resources = {'memory': {'size': mem_required_per_node}} + mem_required_per_node = self.num_tasks_per_node * 0.9 + hooks.req_memory_per_node(test=self, app_mem_req=mem_required_per_node) + @deferrable def assert_completion(self): From 140b9e424e81694f22207303eda7e54467d932b3 Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Fri, 7 Jun 2024 00:03:31 +0200 Subject: [PATCH 13/17] Making the linter happy. --- eessi/testsuite/tests/apps/espresso/espresso.py | 1 - 1 file changed, 1 deletion(-) diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index 2fbb5ce3..7db09ff9 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -86,7 +86,6 @@ def set_mem(self): mem_required_per_node = self.num_tasks_per_node * 0.9 hooks.req_memory_per_node(test=self, app_mem_req=mem_required_per_node) - @deferrable def assert_completion(self): '''Check completion''' From e5234583ec01cda67927471554753a9b5043f20d Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Mon, 10 Jun 2024 16:25:55 +0200 Subject: [PATCH 14/17] Removing 16 node test case for now since it takes way too long and have dialing down the scales within the CI tests since they should not take too much time. --- eessi/testsuite/tests/apps/espresso/espresso.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index 7db09ff9..7213ee6c 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -17,11 +17,23 @@ from eessi.testsuite.constants import * from eessi.testsuite.utils import find_modules, log +def filter_scales_P3M(): + """ + Filtering function for filtering scales for P3M test. + This is currently required because the 16 node test takes way too long and always fails due to time limit. + Once a solution to mesh tuning algorithm is found, where we can specify the mesh sizes for a particular scale, + this function can be removed. + """ + return [ + k for (k, v) in SCALES.items() + if v['num_nodes'] != 16 + ] + @rfm.simple_test class EESSI_ESPRESSO_P3M_IONIC_CRYSTALS(rfm.RunOnlyRegressionTest): - scale = parameter(SCALES.keys()) + scale = parameter(filter_scales_P3M()) valid_prog_environs = ['default'] valid_systems = ['*'] time_limit = '300m' @@ -55,7 +67,8 @@ def run_after_init(self): @run_after('init') def set_tag_ci(self): """ Setting tests under CI tag. """ - if (self.benchmark_info[0] in ['mpi.ionic_crystals.p3m']): + if (self.benchmark_info[0] in ['mpi.ionic_crystals.p3m'] + and SCALES[self.scale]['num_nodes'] < 2): self.tags.add('CI') log(f'tags set to {self.tags}') From c5e02458a45e39047dd5a81c7ad7b1a20304a139 Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Mon, 10 Jun 2024 16:32:29 +0200 Subject: [PATCH 15/17] Trying to make the linter happy. --- eessi/testsuite/tests/apps/espresso/espresso.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index 7213ee6c..5366fe5b 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -17,6 +17,7 @@ from eessi.testsuite.constants import * from eessi.testsuite.utils import find_modules, log + def filter_scales_P3M(): """ Filtering function for filtering scales for P3M test. @@ -67,8 +68,8 @@ def run_after_init(self): @run_after('init') def set_tag_ci(self): """ Setting tests under CI tag. """ - if (self.benchmark_info[0] in ['mpi.ionic_crystals.p3m'] - and SCALES[self.scale]['num_nodes'] < 2): + if (self.benchmark_info[0] in ['mpi.ionic_crystals.p3m'] and + SCALES[self.scale]['num_nodes'] < 2): self.tags.add('CI') log(f'tags set to {self.tags}') From df8873c69fcc4f7ff47b486b8fd8ebfc2fa9e9f0 Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Mon, 10 Jun 2024 16:36:04 +0200 Subject: [PATCH 16/17] Making the linter happy. --- eessi/testsuite/tests/apps/espresso/espresso.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index 5366fe5b..a1675afd 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -68,8 +68,7 @@ def run_after_init(self): @run_after('init') def set_tag_ci(self): """ Setting tests under CI tag. """ - if (self.benchmark_info[0] in ['mpi.ionic_crystals.p3m'] and - SCALES[self.scale]['num_nodes'] < 2): + if (self.benchmark_info[0] in ['mpi.ionic_crystals.p3m'] and SCALES[self.scale]['num_nodes'] < 2): self.tags.add('CI') log(f'tags set to {self.tags}') From aebfdc189c5fa6104bc6a0e588d2914a32b9eab0 Mon Sep 17 00:00:00 2001 From: Satish Kamath Date: Tue, 11 Jun 2024 16:55:28 +0200 Subject: [PATCH 17/17] Removing files that are not relevant: job.sh and benchmark.csv and removing the statement from madelung that puts benchmark.csv as path within the output parameter. --- .../tests/apps/espresso/benchmarks.csv | 34 ------------------- .../testsuite/tests/apps/espresso/src/job.sh | 10 ------ .../tests/apps/espresso/src/madelung.py | 3 -- 3 files changed, 47 deletions(-) delete mode 100644 eessi/testsuite/tests/apps/espresso/benchmarks.csv delete mode 100644 eessi/testsuite/tests/apps/espresso/src/job.sh diff --git a/eessi/testsuite/tests/apps/espresso/benchmarks.csv b/eessi/testsuite/tests/apps/espresso/benchmarks.csv deleted file mode 100644 index 9091534b..00000000 --- a/eessi/testsuite/tests/apps/espresso/benchmarks.csv +++ /dev/null @@ -1,34 +0,0 @@ -"mode","cores","mpi.x","mpi.y","mpi.z","particles","mean","std" -"weak scaling",4,2,2,1,6912,2.341e-01,8.081e-03 -"strong scaling",4,2,2,1,5832,2.496e-01,9.019e-03 -"weak scaling",16,4,2,2,27648,2.417e+00,9.576e-02 -"strong scaling",16,4,2,2,5832,3.853e-02,1.991e-03 -"weak scaling",32,4,4,2,55296,4.263e+00,1.161e+00 -"strong scaling",32,4,4,2,5832,2.194e-02,7.303e-04 -"weak scaling",1,1,1,1,1728,7.655e-02,3.434e-03 -"weak scaling",2,2,1,1,3456,1.456e-01,4.679e-03 -"strong scaling",2,2,1,1,5832,3.936e-01,1.098e-02 -"strong scaling",1,1,1,1,5832,6.333e-01,1.194e-01 -"strong scaling",64,4,4,4,5832,1.910e-02,6.132e-04 -"weak scaling",1,1,1,1,1728,9.482e-02,2.956e-03 -"weak scaling",2,2,1,1,3456,2.111e-01,6.614e-03 -"strong scaling",1,1,1,1,5832,9.133e-01,2.868e-02 -"strong scaling",16,4,2,2,5832,4.285e-02,1.327e-03 -"strong scaling",64,4,4,4,5832,1.715e-02,5.776e-04 -"strong scaling",128,8,4,4,5832,1.980e-02,7.013e-04 -"weak scaling",64,4,4,4,110592,4.375e-01,1.414e-02 -"weak scaling",100,5,5,4,172800,4.450e-01,1.437e-02 -"weak scaling",128,8,4,4,221184,8.720e+00,2.753e-01 -"weak scaling",128,8,4,4,221184,8.760e+00,3.110e-01 -"weak scaling",4,2,2,1,6912,2.626e-01,8.142e-03 -"weak scaling",4,2,2,1,6912,2.780e-01,8.683e-03 -"weak scaling",4,2,2,1,6912,2.627e-01,8.391e-03 -"weak scaling",4,2,2,1,6912,2.617e-01,8.155e-03 -"weak scaling",2,2,1,1,3456,2.028e-01,6.255e-03 -"weak scaling",2,2,1,1,3456,3.247e-01,1.026e-02 -"weak scaling",2,2,1,1,3456,3.249e-01,1.029e-02 -"weak scaling",2,2,1,1,3456,3.257e-01,1.028e-02 -"weak scaling",2,2,1,1,3456,3.375e-01,1.095e-02 -"weak scaling",2,2,1,1,3456,3.367e-01,1.086e-02 -"weak scaling",2,2,1,1,3456,3.241e-01,1.048e-02 -"weak scaling",2,2,1,1,3456,3.243e-01,1.038e-02 diff --git a/eessi/testsuite/tests/apps/espresso/src/job.sh b/eessi/testsuite/tests/apps/espresso/src/job.sh deleted file mode 100644 index 17399c52..00000000 --- a/eessi/testsuite/tests/apps/espresso/src/job.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -#SBATCH --time=00:40:00 -#SBATCH --output %j.stdout -#SBATCH --error %j.stderr -module load spack/default gcc/12.3.0 cuda/12.3.0 openmpi/4.1.6 \ - fftw/3.3.10 boost/1.83.0 python/3.12.1 -source ../espresso-4.3/venv/bin/activate -srun --cpu-bind=cores python3 madelung.py --size 6 --weak-scaling -srun --cpu-bind=cores python3 madelung.py --size 9 --strong-scaling -deactivate diff --git a/eessi/testsuite/tests/apps/espresso/src/madelung.py b/eessi/testsuite/tests/apps/espresso/src/madelung.py index 37d0b44a..3f73b5d5 100644 --- a/eessi/testsuite/tests/apps/espresso/src/madelung.py +++ b/eessi/testsuite/tests/apps/espresso/src/madelung.py @@ -34,9 +34,6 @@ default=False, required=False, help="Use GPU implementation") parser.add_argument("--topology", metavar=("X", "Y", "Z"), nargs=3, action="store", default=None, required=False, type=int, help="Cartesian topology") -parser.add_argument("--output", metavar="FILEPATH", action="store", - type=str, required=False, default="benchmarks.csv", - help="Output file (default: benchmarks.csv)") group = parser.add_mutually_exclusive_group() group.add_argument("--weak-scaling", action="store_true", help="Weak scaling benchmark (Gustafson's law: constant work per core)")