From 6d96af5ef9d2b9074ebba2fd32d58d5d8d5ecb22 Mon Sep 17 00:00:00 2001 From: august-knox <112430443+august-knox@users.noreply.github.com> Date: Thu, 14 Nov 2024 15:07:00 -0800 Subject: [PATCH 1/4] Adding experiment class for Laghos (#428) * initial commit for laghos experiment.py file * adding strong and weak scaling variants * changing scaling variant to experiments and adding existing exp * changed default exp to example * updating scaling * fixing error w/ application.py * lint * lint * Adding dry run * Adding dry run * Removing modifier * initial commit for progress on sandia microbenchmarks * Triplept problem * Triplept problem * Triplept problem * lint * Lint * Setting workload * lint * Removing non-existing parameters * lint * workload * workload * colon * Change laghos experiment.py * package changes * laghos package.py zlib dependency * experiment class and package fixes * new experiment class, rma version * lint * lint * more lint * Delete experiments/smb/mpi directory remove old format * lint * removing hidden tab * lint * adding space between lines * adding dryruns * Delete var/exp_repo/experiments/laghos directory removing old laghos location * lint * Delete experiments/smb directory removing smb * Delete repo/smb directory removing smb * fixing dryrun * adding zlib system packages * fixing missing indent * fixing dryruns * generic x86 fixed * Update system.py * Fix workflow file indentation * Update system.py * Move zlip to externals --------- Co-authored-by: august-knox Co-authored-by: pearce8 Co-authored-by: Riyaz Haque Co-authored-by: Riyaz Haque <5333387+rfhaque@users.noreply.github.com> --- .github/workflows/run.yml | 11 +-- experiments/laghos/experiment.py | 80 +++++++++++++++++++ repo/laghos/application.py | 26 +++++- repo/laghos/package.py | 3 +- systems/cts/externals/base/00-packages.yaml | 4 +- .../externals/base/00-packages.yaml | 8 ++ .../sierra/externals/base/00-packages.yaml | 2 + systems/tioga/externals/base/00-packages.yaml | 2 + 8 files changed, 126 insertions(+), 10 deletions(-) create mode 100644 experiments/laghos/experiment.py create mode 100644 systems/genericx86/externals/base/00-packages.yaml diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml index 8e233c9d9..e0fa43697 100644 --- a/.github/workflows/run.yml +++ b/.github/workflows/run.yml @@ -219,22 +219,23 @@ jobs: --disable-logger \ workspace setup --dry-run - - name: Dry run laghos/mpi-only on LLNL-Magma-Penguin-icelake-OmniPath with allocation modifier + - name: Dry run dynamic laghos/mpi-only on LLNL-Magma-Penguin-icelake-OmniPath with allocation modifier run: | - ./bin/benchpark setup laghos/mpi-only LLNL-Magma-Penguin-icelake-OmniPath workspace/ + ./bin/benchpark experiment init --dest=laghos-mpi-only laghos + ./bin/benchpark setup ./laghos-mpi-only LLNL-Magma-Penguin-icelake-OmniPath workspace/ . workspace/setup.sh ramble \ - --workspace-dir workspace/laghos/mpi-only/LLNL-Magma-Penguin-icelake-OmniPath/workspace \ + --workspace-dir workspace/laghos-mpi-only/LLNL-Magma-Penguin-icelake-OmniPath/workspace \ --disable-progress-bar \ --disable-logger \ workspace setup --dry-run - name: Dry run laghos/mpi-only on LLNL-Ruby-icelake-OmniPath with allocation modifier run: | - ./bin/benchpark setup laghos/mpi-only LLNL-Ruby-icelake-OmniPath workspace/ + ./bin/benchpark setup laghos-mpi-only LLNL-Ruby-icelake-OmniPath workspace/ . workspace/setup.sh ramble \ - --workspace-dir workspace/laghos/mpi-only/LLNL-Ruby-icelake-OmniPath/workspace \ + --workspace-dir workspace/laghos-mpi-only/LLNL-Ruby-icelake-OmniPath/workspace \ --disable-progress-bar \ --disable-logger \ workspace setup --dry-run diff --git a/experiments/laghos/experiment.py b/experiments/laghos/experiment.py new file mode 100644 index 000000000..ec4b8d7e6 --- /dev/null +++ b/experiments/laghos/experiment.py @@ -0,0 +1,80 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + +from benchpark.error import BenchparkError +from benchpark.directives import variant +from benchpark.experiment import Experiment +from benchpark.scaling import StrongScaling +from benchpark.expr.builtin.caliper import Caliper + + +class Laghos( + Experiment, + StrongScaling, + Caliper, +): + + variant( + "workload", + default="triplept", + description="triplept or other problem", + ) + + variant( + "version", + default="develop", + description="app version", + ) + + def compute_applications_section(self): + # TODO: Replace with conflicts clause + scaling_modes = { + "strong": self.spec.satisfies("strong=oui"), + "single_node": self.spec.satisfies("single_node=oui"), + } + + scaling_mode_enabled = [key for key, value in scaling_modes.items() if value] + if len(scaling_mode_enabled) != 1: + raise BenchparkError( + f"Only one type of scaling per experiment is allowed for application package {self.name}" + ) + + # Number of initial nodes + num_nodes = {"n_nodes": 1} + + if self.spec.satisfies("single_node=oui"): + for pk, pv in num_nodes.items(): + self.add_experiment_variable(pk, pv, True) + elif self.spec.satisfies("strong=oui"): + scaled_variables = self.generate_strong_scaling_params( + {tuple(num_nodes.keys()): list(num_nodes.values())}, + int(self.spec.variants["scaling-factor"][0]), + int(self.spec.variants["scaling-iterations"][0]), + ) + for pk, pv in scaled_variables.items(): + self.add_experiment_variable(pk, pv, True) + + self.add_experiment_variable( + "n_ranks", "{sys_cores_per_node} * {n_nodes}", True + ) + + def compute_spack_section(self): + # get package version + app_version = self.spec.variants["version"][0] + + # get system config options + # TODO: Get compiler/mpi/package handles directly from system.py + system_specs = {} + system_specs["compiler"] = "default-compiler" + system_specs["mpi"] = "default-mpi" + + # set package spack specs + # empty package_specs value implies external package + self.add_spack_spec(system_specs["mpi"]) + # self.add_spack_spec(system_specs["blas"]) + + self.add_spack_spec( + self.name, [f"laghos@{app_version} +metis", system_specs["compiler"]] + ) diff --git a/repo/laghos/application.py b/repo/laghos/application.py index 33dca8351..65be3d327 100644 --- a/repo/laghos/application.py +++ b/repo/laghos/application.py @@ -19,9 +19,29 @@ class Laghos(ExecutableApplication): 'lagrangian','spatial-discretization','unstructured-grid', 'network-latency-bound','network-collectives','unstructured-grid'] - executable('p', 'laghos -p 3 -m {laghos}/data/box01_hex.mesh -rs 5 -ms 500', use_mpi=True) - - workload('problem', executables=['p']) + executable('prob', 'laghos -p {problem} -m {mesh} -rs {rs} -rp {rp} -ms {ms}', use_mpi=True) + + workload('triplept', executables=['prob']) + + workload_variable('mesh', default='{laghos}/data/box01_hex.mesh', + description='mesh file', + workloads=['triplept']) + + workload_variable('problem', default='3', + description='problem number', + workloads=['triplept']) + + workload_variable('rs', default='5', + description='number of serial refinements', + workloads=['triplept']) + + workload_variable('rp', default='0', + description='number of parallel refinements', + workloads=['triplept']) + + workload_variable('ms', default='500', + description='max number of steps', + workloads=['triplept']) figure_of_merit('Major kernels total time', log_file='{experiment_run_dir}/{experiment_name}.out', diff --git a/repo/laghos/package.py b/repo/laghos/package.py index d30819bd9..2c0c7d3b5 100644 --- a/repo/laghos/package.py +++ b/repo/laghos/package.py @@ -33,7 +33,8 @@ class Laghos(MakefilePackage): depends_on("caliper", when="+caliper") depends_on("adiak", when="+caliper") - depends_on("mfem@develop", when="@develop") + depends_on("zlib@1.3.1+optimize+pic+shared", when="@develop") + depends_on("mfem@develop^zlib@1.3.1+optimize+pic+shared", when="@develop") depends_on("mfem@4.2.0:", when="@3.1") depends_on("mfem@4.1.0:4.1", when="@3.0") # Recommended mfem version for laghos v2.0 is: ^mfem@3.4.1-laghos-v2.0 diff --git a/systems/cts/externals/base/00-packages.yaml b/systems/cts/externals/base/00-packages.yaml index 921d6033b..1692ba7f8 100644 --- a/systems/cts/externals/base/00-packages.yaml +++ b/systems/cts/externals/base/00-packages.yaml @@ -47,4 +47,6 @@ packages: buildable: false externals: - spec: fftw@3.3.10 - prefix: /usr/tce/packages/fftw/fftw-3.3.10 \ No newline at end of file + prefix: /usr/tce/packages/fftw/fftw-3.3.10 + zlib-api: + require: zlib diff --git a/systems/genericx86/externals/base/00-packages.yaml b/systems/genericx86/externals/base/00-packages.yaml new file mode 100644 index 000000000..2c580f63c --- /dev/null +++ b/systems/genericx86/externals/base/00-packages.yaml @@ -0,0 +1,8 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + +packages: + zlib-api: + require: zlib diff --git a/systems/sierra/externals/base/00-packages.yaml b/systems/sierra/externals/base/00-packages.yaml index fe22877ef..278fa8a4b 100644 --- a/systems/sierra/externals/base/00-packages.yaml +++ b/systems/sierra/externals/base/00-packages.yaml @@ -49,3 +49,5 @@ packages: buildable: false mpi: buildable: false + zlib-api: + require: zlib diff --git a/systems/tioga/externals/base/00-packages.yaml b/systems/tioga/externals/base/00-packages.yaml index 567888887..8e58108f7 100644 --- a/systems/tioga/externals/base/00-packages.yaml +++ b/systems/tioga/externals/base/00-packages.yaml @@ -184,3 +184,5 @@ packages: buildable: false rocsolver: buildable: false + zlib-api: + require: zlib From eeda106feaf03b31aaa55fc126b6462d34dc1d16 Mon Sep 17 00:00:00 2001 From: Stephanie Brink Date: Thu, 14 Nov 2024 15:12:14 -0800 Subject: [PATCH 2/4] docs: set max height for outputs (#432) --- docs/_static/css/custom.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css index 0292011a8..e8d64b6ea 100644 --- a/docs/_static/css/custom.css +++ b/docs/_static/css/custom.css @@ -89,3 +89,7 @@ a:visited { html.writer-html4 .rst-content dl:not(.docutils) > dt, html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) > dt { color: #7fa866 } + +.rst-content div[class^="highlight"] pre { + max-height: 600px; +} From 564f24862bd3920213cab146bf73029c127f1a17 Mon Sep 17 00:00:00 2001 From: Stephanie Brink Date: Thu, 14 Nov 2024 15:12:37 -0800 Subject: [PATCH 3/4] docs: update text (#434) update text --- docs/llnl-tutorial.rst | 12 ++++++------ docs/modifiers.rst | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/llnl-tutorial.rst b/docs/llnl-tutorial.rst index c4bad60ed..ff757e587 100644 --- a/docs/llnl-tutorial.rst +++ b/docs/llnl-tutorial.rst @@ -26,9 +26,9 @@ system specification in Benchpark:: benchpark system init --dest=ruby-system cts cluster=ruby -To run the cuda, strong scaling version of the AMG20223 benchmark, initialize it for experiments:: +To run the openmp, strong scaling version of the AMG20223 benchmark, initialize it for experiments:: - benchpark experiment init --dest=amg2023-benchmark amg2023 cuda=oui strong=oui + benchpark experiment init --dest=amg2023-benchmark amg2023 openmp=oui Then setup the workspace directory for the system and experiment together:: @@ -44,8 +44,8 @@ Then setup the Ramble experiment workspace, this builds all software and may tak cd ./workspace/amg2023-benchmark/Cts-6d48f81/workspace/ ramble --workspace-dir . --disable-progress-bar workspace setup -Next, we run the Saxpy experiments, which will launch jobs through the -scheduler on Tioga:: +Next, we run the AMG2023 experiments, which will launch jobs through the +scheduler on the CTS system:: ramble --workspace-dir . --disable-progress-bar on @@ -53,13 +53,13 @@ scheduler on Tioga:: Tioga ------ -This second tutorial will guide you through the process of using the cuda +This second tutorial will guide you through the process of using the ROCm version of the Saxpy benchmark on Tioga. The parameters for initializing the system are slightly different due to the different variants defined for the system. For example, the variant ``~gtl`` turns off gtl-enabled MPI, ``+gtl`` turns it on:: benchpark system init --dest=tioga-system tioga ~gtl - benchpark experiment init --dest=saxpy-benchmark saxpy cuda=oui + benchpark experiment init --dest=saxpy-benchmark saxpy rocm=oui benchpark setup ./saxpy-benchmark ./tioga-system workspace/ . workspace/setup.sh cd ./workspace/saxpy-benchmark/Tioga-975af3c/workspace/ diff --git a/docs/modifiers.rst b/docs/modifiers.rst index 81ba45e4f..a343c7ec5 100644 --- a/docs/modifiers.rst +++ b/docs/modifiers.rst @@ -99,7 +99,7 @@ setup step:: benchpark experiment init --dest= caliper= -Valid values for ```` are found in the **Caliper Variant** +Valid values for ```` are found in the **Caliper Variant** column of the table below. Benchpark will link the experiment to Caliper, and inject appropriate Caliper configuration at runtime. After the experiments in the workspace have completed running, a ``.cali`` file From c3b3aa7f94b376a331ca90993b20d5ca1641701f Mon Sep 17 00:00:00 2001 From: Alec Scott Date: Thu, 14 Nov 2024 16:49:12 -0800 Subject: [PATCH 4/4] ci: fix too many jobs spawned on LLNL resources (#436) * ci: fix too many jobs spawned on LLNL resources * Limit to only run on merge requests and the main branch --- .gitlab/ci/test.yml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.gitlab/ci/test.yml b/.gitlab/ci/test.yml index b5ed65f3b..add33ed0c 100644 --- a/.gitlab/ci/test.yml +++ b/.gitlab/ci/test.yml @@ -7,13 +7,19 @@ ARCHCONFIG: LLNL-Dane-DELL-sapphirerapids-OmniPath test_run: + resource_group: $HOST stage: test tags: - $HOST - - shell + - batch <<: *test_clusters + variables: + SCHEDULER_PARAMETERS: -N 1 -t 1h rules: - - changes: + - if: | + $CI_PIPELINE_SOURCE == "merge_request_event" || + $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH + changes: - .gitlab-ci.yml - .gitlab/ci/* - experiments/** @@ -32,4 +38,5 @@ test_run: - cd ./workspace/saxpy/openmp/$ARCHCONFIG/workspace/ - ramble --workspace-dir . --disable-progress-bar --disable-logger workspace setup # Run Saxpy Experiments - - ramble --workspace-dir . --disable-progress-bar --disable-logger on + - ramble --workspace-dir . --disable-progress-bar --disable-logger + on --executor '{execute_experiment}' --where '{n_nodes} == 1'