From 299d690de90f0f50a2887223f4f28491a4674c85 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Thu, 31 Oct 2024 17:36:21 +0100 Subject: [PATCH 01/17] Update config and CI_configs to be able to run tests for local stack and EESSI stack --- CI/hortense_EESSI_ss/ci_config.sh | 17 ++++++++++++++ CI/hortense_local_ss/ci_config.sh | 16 +++++++++++++ CI/run_reframe.sh | 26 ++++++++++++++------- config/vsc_hortense.py | 39 +++++++++++++++++++------------ 4 files changed, 75 insertions(+), 23 deletions(-) create mode 100644 CI/hortense_EESSI_ss/ci_config.sh create mode 100644 CI/hortense_local_ss/ci_config.sh diff --git a/CI/hortense_EESSI_ss/ci_config.sh b/CI/hortense_EESSI_ss/ci_config.sh new file mode 100644 index 00000000..ac35ffef --- /dev/null +++ b/CI/hortense_EESSI_ss/ci_config.sh @@ -0,0 +1,17 @@ +# Configurable items +if [ -z "${REFRAME_ARGS}" ]; then + REFRAME_ARGS="--tag CI --tag 1_node|2_nodes --system hortense:cpu_rome_256gb" +fi + +if [ -z "${UNSET_MODULEPATH}" ]; then + export UNSET_MODULEPATH=False + module --force purge +fi + +if [ -z "${USE_EESSI_SOFTWARE_STACK}" ]; then + export USE_EESSI_SOFTWARE_STACK=True +fi + +if [ -z "${RFM_CONFIG_FILES}" ]; then + export RFM_CONFIG_FILES="/dodrio/scratch/users/vsc46128/vsc_hortense.py" +fi diff --git a/CI/hortense_local_ss/ci_config.sh b/CI/hortense_local_ss/ci_config.sh new file mode 100644 index 00000000..569bc486 --- /dev/null +++ b/CI/hortense_local_ss/ci_config.sh @@ -0,0 +1,16 @@ +# Configurable items +if [ -z "${REFRAME_ARGS}" ]; then + REFRAME_ARGS="--tag CI --tag 1_node|2_nodes --system hortense:cpu_rome_256gb" +fi + +if [ -z "${UNSET_MODULEPATH}" ]; then + export UNSET_MODULEPATH=False +fi + +if [ -z "${USE_EESSI_SOFTWARE_STACK}" ]; then + export USE_EESSI_SOFTWARE_STACK=False +fi + +if [ -z "${RFM_CONFIG_FILES}" ]; then + export RFM_CONFIG_FILES="/dodrio/scratch/users/vsc46128/vsc_hortense.py" +fi diff --git a/CI/run_reframe.sh b/CI/run_reframe.sh index eda506f0..77937f4f 100755 --- a/CI/run_reframe.sh +++ b/CI/run_reframe.sh @@ -50,11 +50,14 @@ fi if [ -z "${EESSI_TESTSUITE_BRANCH}" ]; then EESSI_TESTSUITE_BRANCH='v0.4.0' fi -if [ -z "${EESSI_CVMFS_REPO}" ]; then - export EESSI_CVMFS_REPO=/cvmfs/software.eessi.io -fi -if [ -z "${EESSI_VERSION}" ]; then - export EESSI_VERSION=2023.06 +if [ -z "${USE_EESSI_SOFTWARE_STACK}" ] | [ $USE_EESSI_SOFTWARE_STACK == "True" ]; then + export USE_EESSI_SOFTWARE_STACK=True + if [ -z "${EESSI_CVMFS_REPO}" ]; then + export EESSI_CVMFS_REPO=/cvmfs/software.eessi.io + fi + if [ -z "${EESSI_VERSION}" ]; then + export EESSI_VERSION=2023.06 + fi fi if [ -z "${RFM_CONFIG_FILES}" ]; then export RFM_CONFIG_FILES="${TEMPDIR}/test-suite/config/${EESSI_CI_SYSTEM_NAME}.py" @@ -73,6 +76,9 @@ if [ -z "${REFRAME_TIMEOUT}" ]; then # This will prevent multiple ReFrame runs from piling up and exceeding the quota on our Magic Castle clusters export REFRAME_TIMEOUT=1430m fi +if [ -z "${UNSET_MODULEPATH}" ]; then + export UNSET_MODULEPATH=True +fi # Create virtualenv for ReFrame using system python python3 -m venv "${TEMPDIR}"/reframe_venv @@ -93,9 +99,13 @@ git clone ${EESSI_CLONE_ARGS} export PYTHONPATH="${PYTHONPATH}":"${TEMPDIR}"/test-suite/ # Start the EESSI environment -unset MODULEPATH -eessi_init_path="${EESSI_CVMFS_REPO}"/versions/"${EESSI_VERSION}"/init/bash -source "${eessi_init_path}" +if [ $UNSET_MODULEPATH == "True" ]; then + unset MODULEPATH +fi +if [ $USE_EESSI_SOFTWARE_STACK == "True" ]; then + eessi_init_path="${EESSI_CVMFS_REPO}"/versions/"${EESSI_VERSION}"/init/bash + source "${eessi_init_path}" +fi # Needed in order to make sure the reframe from our TEMPDIR is first on the PATH, # prior to the one shipped with the 2021.12 compat layer diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 1783e95e..7c1f6017 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -20,6 +20,15 @@ class MyMpirunLauncher(JobLauncher): def command(self, job): return ['mympirun', '--hybrid', str(job.num_tasks_per_node)] +eessi_cvmfs_repo = os.getenv('EESSI_CVMFS_REPO', None) +if eessi_cvmfs_repo is not None: + prepare_eessi_init = 'module --force purge' + launcher = 'mpirun' + mpi_module = '' +else: + prepare_eessi_init = '' + launcher = 'mympirun' + mpi_module = 'vsc-mympirun' site_configuration = { 'systems': [ @@ -32,13 +41,13 @@ def command(self, job): { 'name': 'cpu_rome_256gb', 'scheduler': 'slurm', - 'prepare_cmds': [common_eessi_init()], + 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=cpu_rome'], 'environs': ['default'], 'descr': 'CPU nodes (AMD Rome, 256GiB RAM)', 'max_jobs': 20, - 'launcher': 'mympirun', - 'modules': ['vsc-mympirun'], + 'launcher': launcher, + 'modules': [mpi_module], 'processor': { 'num_cpus': 128, 'num_sockets': 2, @@ -64,13 +73,13 @@ def command(self, job): { 'name': 'cpu_rome_512gb', 'scheduler': 'slurm', - 'prepare_cmds': [common_eessi_init()], + 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=cpu_rome_512'], 'environs': ['default'], 'descr': 'CPU nodes (AMD Rome, 512GiB RAM)', 'max_jobs': 20, - 'launcher': 'mympirun', - 'modules': ['vsc-mympirun'], + 'launcher': launcher, + 'modules': [mpi_module], 'processor': { 'num_cpus': 128, 'num_sockets': 2, @@ -96,13 +105,13 @@ def command(self, job): { 'name': 'cpu_milan', 'scheduler': 'slurm', - 'prepare_cmds': [common_eessi_init()], + 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=cpu_milan'], 'environs': ['default'], 'descr': 'CPU nodes (AMD Milan, 256GiB RAM)', 'max_jobs': 20, - 'launcher': 'mympirun', - 'modules': ['vsc-mympirun'], + 'launcher': launcher, + 'modules': [mpi_module], 'processor': { 'num_cpus': 128, 'num_sockets': 2, @@ -128,13 +137,13 @@ def command(self, job): { 'name': 'gpu_rome_a100_40gb', 'scheduler': 'slurm', - 'prepare_cmds': [common_eessi_init()], + 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=gpu_rome_a100_40'], 'environs': ['default'], 'descr': 'GPU nodes (A100 40GB)', 'max_jobs': 20, - 'launcher': 'mympirun', - 'modules': ['vsc-mympirun'], + 'launcher': launcher, + 'modules': [mpi_module], 'processor': { 'num_cpus': 48, 'num_sockets': 2, @@ -172,13 +181,13 @@ def command(self, job): { 'name': 'gpu_rome_a100_80gb', 'scheduler': 'slurm', - 'prepare_cmds': [common_eessi_init()], + 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=gpu_rome_a100_80'], 'environs': ['default'], 'descr': 'GPU nodes (A100 80GB)', 'max_jobs': 20, - 'launcher': 'mympirun', - 'modules': ['vsc-mympirun'], + 'launcher': launcher, + 'modules': [mpi_module], 'processor': { 'num_cpus': 48, 'num_sockets': 2, From 7b4946c141df4fb670a6dc86b5b3b9c7da7129a2 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Thu, 31 Oct 2024 17:46:46 +0100 Subject: [PATCH 02/17] resolve style error --- config/vsc_hortense.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 7c1f6017..3ad2c674 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -2,6 +2,7 @@ # https://docs.vscentrum.be/en/latest/gent/tier1_hortense.html # # authors: Samuel Moors (VUB-HPC), Kenneth Hoste (HPC-UGent) +import os from reframe.core.backends import register_launcher from reframe.core.launchers import JobLauncher @@ -20,6 +21,7 @@ class MyMpirunLauncher(JobLauncher): def command(self, job): return ['mympirun', '--hybrid', str(job.num_tasks_per_node)] + eessi_cvmfs_repo = os.getenv('EESSI_CVMFS_REPO', None) if eessi_cvmfs_repo is not None: prepare_eessi_init = 'module --force purge' From bca9b29e447c59431577ee9608a58e28ee3e85e8 Mon Sep 17 00:00:00 2001 From: Lara Ramona Peeters <49882639+laraPPr@users.noreply.github.com> Date: Wed, 11 Dec 2024 12:10:55 +0100 Subject: [PATCH 03/17] Update CI/run_reframe.sh Co-authored-by: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com> --- CI/run_reframe.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CI/run_reframe.sh b/CI/run_reframe.sh index 77937f4f..2a66ce3d 100755 --- a/CI/run_reframe.sh +++ b/CI/run_reframe.sh @@ -50,7 +50,7 @@ fi if [ -z "${EESSI_TESTSUITE_BRANCH}" ]; then EESSI_TESTSUITE_BRANCH='v0.4.0' fi -if [ -z "${USE_EESSI_SOFTWARE_STACK}" ] | [ $USE_EESSI_SOFTWARE_STACK == "True" ]; then +if [ -z "${USE_EESSI_SOFTWARE_STACK}" ] || [ "$USE_EESSI_SOFTWARE_STACK" == "True" ]; then export USE_EESSI_SOFTWARE_STACK=True if [ -z "${EESSI_CVMFS_REPO}" ]; then export EESSI_CVMFS_REPO=/cvmfs/software.eessi.io From 164e522f85fa813793d192e7519447150351d111 Mon Sep 17 00:00:00 2001 From: Lara Ramona Peeters <49882639+laraPPr@users.noreply.github.com> Date: Wed, 11 Dec 2024 12:11:17 +0100 Subject: [PATCH 04/17] Update CI/run_reframe.sh Co-authored-by: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com> --- CI/run_reframe.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CI/run_reframe.sh b/CI/run_reframe.sh index 2a66ce3d..6067d199 100755 --- a/CI/run_reframe.sh +++ b/CI/run_reframe.sh @@ -102,7 +102,7 @@ export PYTHONPATH="${PYTHONPATH}":"${TEMPDIR}"/test-suite/ if [ $UNSET_MODULEPATH == "True" ]; then unset MODULEPATH fi -if [ $USE_EESSI_SOFTWARE_STACK == "True" ]; then +if [ "$USE_EESSI_SOFTWARE_STACK" == "True" ]; then eessi_init_path="${EESSI_CVMFS_REPO}"/versions/"${EESSI_VERSION}"/init/bash source "${eessi_init_path}" fi From ccd832a6695c7936c09ed089eb6536804f9375db Mon Sep 17 00:00:00 2001 From: Lara Ramona Peeters <49882639+laraPPr@users.noreply.github.com> Date: Wed, 11 Dec 2024 12:11:26 +0100 Subject: [PATCH 05/17] Update CI/run_reframe.sh Co-authored-by: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com> --- CI/run_reframe.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CI/run_reframe.sh b/CI/run_reframe.sh index 6067d199..79074a05 100755 --- a/CI/run_reframe.sh +++ b/CI/run_reframe.sh @@ -99,7 +99,7 @@ git clone ${EESSI_CLONE_ARGS} export PYTHONPATH="${PYTHONPATH}":"${TEMPDIR}"/test-suite/ # Start the EESSI environment -if [ $UNSET_MODULEPATH == "True" ]; then +if [ "$UNSET_MODULEPATH" == "True" ]; then unset MODULEPATH fi if [ "$USE_EESSI_SOFTWARE_STACK" == "True" ]; then From 699358aa5cbaf2d2207707eeec7b039ade7aa646 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 11 Dec 2024 12:55:16 +0100 Subject: [PATCH 06/17] add echo for checking that the module path is correctly set --- CI/run_reframe.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CI/run_reframe.sh b/CI/run_reframe.sh index 77937f4f..127f8a17 100755 --- a/CI/run_reframe.sh +++ b/CI/run_reframe.sh @@ -98,6 +98,9 @@ echo "Cloning EESSI repo: git clone ${EESSI_CLONE_ARGS}" git clone ${EESSI_CLONE_ARGS} export PYTHONPATH="${PYTHONPATH}":"${TEMPDIR}"/test-suite/ +# Set local module environment +if [ $SET_LOCOL_MULE ] + # Start the EESSI environment if [ $UNSET_MODULEPATH == "True" ]; then unset MODULEPATH @@ -129,6 +132,8 @@ echo "ReFrame check search path: ${RFM_CHECK_SEARCH_PATH}" echo "ReFrame check search recursive: ${RFM_CHECK_SEARCH_RECURSIVE}" echo "ReFrame prefix: ${RFM_PREFIX}" echo "ReFrame args: ${REFRAME_ARGS}" +echo "Using EESSI: ${USE_EESSI_SOFTWARE_STACK}" +echo "MODULEPATH: ${MODULEPATH}" echo "" # List tests From cb3d24cef3d54c2031d93a3b1613754195805cc4 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 11 Dec 2024 13:56:11 +0100 Subject: [PATCH 07/17] add functionality to load local module environment --- CI/hortense_EESSI_ss/ci_config.sh | 30 ++++++++++++++++++++++++++++-- CI/hortense_local_ss/ci_config.sh | 30 ++++++++++++++++++++++++++++-- CI/run_reframe.sh | 24 +++++++++++++++++------- 3 files changed, 73 insertions(+), 11 deletions(-) diff --git a/CI/hortense_EESSI_ss/ci_config.sh b/CI/hortense_EESSI_ss/ci_config.sh index ac35ffef..35dab1ae 100644 --- a/CI/hortense_EESSI_ss/ci_config.sh +++ b/CI/hortense_EESSI_ss/ci_config.sh @@ -1,6 +1,14 @@ # Configurable items +if [ -z "${TEST_SUITE_PARTITION}" ]; then + echo "You have to indicate on which partition the test-suite will run on vsc-Hortense" + echo "This needs to be TEST_SUITE_PARTITION=cpu_rome_256gb" + echo "untill new functionality of `sched_options` is part of" + echo "# the ReFrame release https://github.com/reframe-hpc/reframe/issues/2970" + exit 1 +fi + if [ -z "${REFRAME_ARGS}" ]; then - REFRAME_ARGS="--tag CI --tag 1_node|2_nodes --system hortense:cpu_rome_256gb" + REFRAME_ARGS="--tag CI --tag 1_node|2_nodes --system hortense:${TEST_SUITE_PARTITION}" fi if [ -z "${UNSET_MODULEPATH}" ]; then @@ -13,5 +21,23 @@ if [ -z "${USE_EESSI_SOFTWARE_STACK}" ]; then fi if [ -z "${RFM_CONFIG_FILES}" ]; then - export RFM_CONFIG_FILES="/dodrio/scratch/users/vsc46128/vsc_hortense.py" + export RFM_CONFIG_FILES="${TEMPDIR}/test-suite/config/vsc_hortense.py" +fi + +if [ -z "${SET_LOCAL_MODULE_ENV}"]; then + export SET_LOCAL_MODULE_ENV=True +fi + +if [ -z "${LOCAL_MODULES}"]; then + if [ "$TEST_SUITE_PARTITION" == "cpu_rome_256gb" ]; then + export LOCAL_MODULES="env/vsc/dodrio/cpu_rome env/slurm/dodrio/cpu_rome" + elif [ "$TEST_SUITE_PARTITION" == "cpu_rome_512gb" ]; then + export LOCAL_MODULES="env/vsc/dodrio/cpu_rome_512 env/slurm/dodrio/cpu_rome_512" + elif [ "$TEST_SUITE_PARTITION" == "gpu_rome_a100_40gb" ]; then + export LOCAL_MODULES="env/vsc/dodrio/gpu_rome_a100_40 env/slurm/dodrio/gpu_rome_a100_40" + elif [ "$TEST_SUITE_PARTITION" == "gpu_rome_a100_80gb" ]; then + export LOCAL_MODULES="env/vsc/dodrio/gpu_rome_a100_80 env/slurm/dodrio/gpu_rome_a100_80" + else + export LOCAL_MODULES="env/vsc/dodrio/${TEST_SUITE_PARTITION} env/slurm/dodrio/${TEST_SUITE_PARTITION}" + fi fi diff --git a/CI/hortense_local_ss/ci_config.sh b/CI/hortense_local_ss/ci_config.sh index 569bc486..d6346f9f 100644 --- a/CI/hortense_local_ss/ci_config.sh +++ b/CI/hortense_local_ss/ci_config.sh @@ -1,6 +1,14 @@ # Configurable items +if [ -z "${TEST_SUITE_PARTITION}" ]; then + echo "You have to indicate on which partition the test-suite will run on vsc-Hortense" + echo "This environment variable needs to be set TEST_SUITE_PARTITION=cpu_rome_256gb" + echo "Can only set to 'cpu_rome_256gb' untill new functionality of 'sched_options' is part of" + echo "the ReFrame release https://github.com/reframe-hpc/reframe/issues/2970" + exit 1 +fi + if [ -z "${REFRAME_ARGS}" ]; then - REFRAME_ARGS="--tag CI --tag 1_node|2_nodes --system hortense:cpu_rome_256gb" + REFRAME_ARGS="--tag CI --tag 1_node|2_nodes --system hortense:${TEST_SUITE_PARTITION}" fi if [ -z "${UNSET_MODULEPATH}" ]; then @@ -12,5 +20,23 @@ if [ -z "${USE_EESSI_SOFTWARE_STACK}" ]; then fi if [ -z "${RFM_CONFIG_FILES}" ]; then - export RFM_CONFIG_FILES="/dodrio/scratch/users/vsc46128/vsc_hortense.py" + export RFM_CONFIG_FILES="${TEMPDIR}/test-suite/config/vsc_hortense.py" fi + +if [ -z "${SET_LOCAL_MODULE_ENV}"]; then + export SET_LOCAL_MODULE_ENV=True +fi + +if [ -z "${LOCAL_MODULES}"]; then + if [ "$TEST_SUITE_PARTITION" == "cpu_rome_256gb" ]; then + export LOCAL_MODULES="cluster/dodrio/cpu_rome" + elif [ "$TEST_SUITE_PARTITION" == "cpu_rome_512gb" ]; then + export LOCAL_MODULES="cluster/dodrio/cpu_rome_512" + elif [ "$TEST_SUITE_PARTITION" == "gpu_rome_a100_40gb" ]; then + export LOCAL_MODULES="cluster/dodrio/gpu_rome_a100_40" + elif [ "$TEST_SUITE_PARTITION" == "gpu_rome_a100_80gb" ]; then + export LOCAL_MODULES="cluster/dodrio/gpu_rome_a100_80" + else + export LOCAL_MODULES="cluster/dodrio/${TEST_SUITE_PARTITION}" + fi +fi diff --git a/CI/run_reframe.sh b/CI/run_reframe.sh index d7bf8eb5..0dd4cfd2 100755 --- a/CI/run_reframe.sh +++ b/CI/run_reframe.sh @@ -25,13 +25,15 @@ if [ ! -f "${CI_CONFIG}" ]; then exit 1 fi +# Create temporary directory +if [ -z "${TEMPDIR}" ]; then + TEMPDIR=$(mktemp --directory --tmpdir=/tmp -t rfm.XXXXXXXXXX) +fi + # Set the CI configuration for this system source "${CI_CONFIG}" # Set default configuration, but let anything set by CI_CONFIG take priority -if [ -z "${TEMPDIR}" ]; then - TEMPDIR=$(mktemp --directory --tmpdir=/tmp -t rfm.XXXXXXXXXX) -fi if [ -z "${REFRAME_ARGS}" ]; then REFRAME_ARGS="--tag CI --tag 1_node" fi @@ -98,13 +100,21 @@ echo "Cloning EESSI repo: git clone ${EESSI_CLONE_ARGS}" git clone ${EESSI_CLONE_ARGS} export PYTHONPATH="${PYTHONPATH}":"${TEMPDIR}"/test-suite/ -# Set local module environment -if [ $SET_LOCOL_MULE ] - -# Start the EESSI environment +# Unset the ModulePath on systems where it is required if [ "$UNSET_MODULEPATH" == "True" ]; then unset MODULEPATH fi + +# Set local module environment +if [ "$SET_LOCAL_MODULE_ENV" == "True" ]; then + if [ -z "${LOCAL_MODULES}" ]; then + echo "You have to add the name of the module in the ci_config.sh file of your system" + exit 1 + fi + module load "${LOCAL_MODULES}" +fi + +# Start the EESSI environment if [ "$USE_EESSI_SOFTWARE_STACK" == "True" ]; then eessi_init_path="${EESSI_CVMFS_REPO}"/versions/"${EESSI_VERSION}"/init/bash source "${eessi_init_path}" From 5637be55cf8d262bb6ac8329a3dc0e941f0de426 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 11 Dec 2024 15:19:58 +0100 Subject: [PATCH 08/17] load slurm environment when using EESSI in test-suite --- config/vsc_hortense.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index e7563878..e316ebcc 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -42,13 +42,15 @@ def command(self, job): eessi_cvmfs_repo = os.getenv('EESSI_CVMFS_REPO', None) if eessi_cvmfs_repo is not None: - prepare_eessi_init = 'module --force purge' - launcher = 'mpirun' - mpi_module = '' + prepare_eessi_init = "module --force purge" + launcher = "mpirun" + env_module = "env/vsc/dodrio/%(partition)s env/slurm/dodrio/%(partition)s" + mpi_module = "" else: - prepare_eessi_init = '' - launcher = 'mympirun' - mpi_module = 'vsc-mympirun' + prepare_eessi_init = "" + launcher = "mympirun" + mpi_module = "vsc-mympirun" + env_module = "" site_configuration = { 'systems': [ @@ -67,7 +69,7 @@ def command(self, job): 'descr': 'CPU nodes (AMD Rome, 256GiB RAM)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module], + 'modules': [mpi_module, env_module % {'partition': "cpu_rome"}], 'resources': [ { 'name': 'memory', @@ -92,7 +94,7 @@ def command(self, job): 'descr': 'CPU nodes (AMD Rome, 512GiB RAM)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module], + 'modules': [mpi_module, env_module % {'partition': "cpu_rome_512"}], 'resources': [ { 'name': 'memory', @@ -117,7 +119,7 @@ def command(self, job): 'descr': 'CPU nodes (AMD Milan, 256GiB RAM)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module], + 'modules': [mpi_module, env_module % {'partition': "cpu_milan"}], 'resources': [ { 'name': 'memory', @@ -142,7 +144,7 @@ def command(self, job): 'descr': 'GPU nodes (A100 40GB)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module], + 'modules': [mpi_module, env_module % {'partition': "gpu_rome_a100_40"}], 'features': [ FEATURES[GPU], ] + list(SCALES.keys()), @@ -179,7 +181,7 @@ def command(self, job): 'descr': 'GPU nodes (A100 80GB)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module], + 'modules': [mpi_module, env_module % {'partition': "gpu_rome_a100_80"}], 'features': [ FEATURES[GPU], ] + list(SCALES.keys()), From c74d4afe42c1c96b623dc12431bda529b4b74c24 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 11 Dec 2024 17:43:12 +0100 Subject: [PATCH 09/17] have different launcher depending on using EESSI or not --- CI/hortense_EESSI_ss/ci_config.sh | 8 ++++---- config/vsc_hortense.py | 15 +++++++-------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/CI/hortense_EESSI_ss/ci_config.sh b/CI/hortense_EESSI_ss/ci_config.sh index 35dab1ae..af0c78e1 100644 --- a/CI/hortense_EESSI_ss/ci_config.sh +++ b/CI/hortense_EESSI_ss/ci_config.sh @@ -1,14 +1,14 @@ # Configurable items if [ -z "${TEST_SUITE_PARTITION}" ]; then echo "You have to indicate on which partition the test-suite will run on vsc-Hortense" - echo "This needs to be TEST_SUITE_PARTITION=cpu_rome_256gb" - echo "untill new functionality of `sched_options` is part of" - echo "# the ReFrame release https://github.com/reframe-hpc/reframe/issues/2970" + echo "This environment variable needs to be set TEST_SUITE_PARTITION=cpu_rome_256gb" + echo "Can only set to 'cpu_rome_256gb' untill new functionality of 'sched_options' is part of" + echo "the ReFrame release https://github.com/reframe-hpc/reframe/issues/2970" exit 1 fi if [ -z "${REFRAME_ARGS}" ]; then - REFRAME_ARGS="--tag CI --tag 1_node|2_nodes --system hortense:${TEST_SUITE_PARTITION}" + REFRAME_ARGS="--tag CI --tag 1_core --system hortense:${TEST_SUITE_PARTITION}" fi if [ -z "${UNSET_MODULEPATH}" ]; then diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index e316ebcc..ba06d690 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -21,6 +21,7 @@ # reframe --detect-host-topology \ # ~/.reframe/topology/hortense-{partition_name}/processor.json # ``` +import os from reframe.core.backends import register_launcher from reframe.core.launchers import JobLauncher @@ -44,13 +45,11 @@ def command(self, job): if eessi_cvmfs_repo is not None: prepare_eessi_init = "module --force purge" launcher = "mpirun" - env_module = "env/vsc/dodrio/%(partition)s env/slurm/dodrio/%(partition)s" - mpi_module = "" + mpi_module = "env/vsc/dodrio/%s" else: prepare_eessi_init = "" launcher = "mympirun" mpi_module = "vsc-mympirun" - env_module = "" site_configuration = { 'systems': [ @@ -69,7 +68,7 @@ def command(self, job): 'descr': 'CPU nodes (AMD Rome, 256GiB RAM)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module, env_module % {'partition': "cpu_rome"}], + 'modules': [mpi_module % 'cpu_rome'], 'resources': [ { 'name': 'memory', @@ -94,7 +93,7 @@ def command(self, job): 'descr': 'CPU nodes (AMD Rome, 512GiB RAM)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module, env_module % {'partition': "cpu_rome_512"}], + 'modules': [mpi_module % 'cpu_rome_512'], 'resources': [ { 'name': 'memory', @@ -119,7 +118,7 @@ def command(self, job): 'descr': 'CPU nodes (AMD Milan, 256GiB RAM)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module, env_module % {'partition': "cpu_milan"}], + 'modules': [mpi_module % 'cpu_milan'], 'resources': [ { 'name': 'memory', @@ -144,7 +143,7 @@ def command(self, job): 'descr': 'GPU nodes (A100 40GB)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module, env_module % {'partition': "gpu_rome_a100_40"}], + 'modules': [mpi_module % 'gpu_rome_a100_40'], 'features': [ FEATURES[GPU], ] + list(SCALES.keys()), @@ -181,7 +180,7 @@ def command(self, job): 'descr': 'GPU nodes (A100 80GB)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module, env_module % {'partition': "gpu_rome_a100_80"}], + 'modules': [mpi_module % 'gpu_rome_a100_80'], 'features': [ FEATURES[GPU], ] + list(SCALES.keys()), From 9bdea87692e540265fce6cf1aabdf689dff362f9 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 11 Dec 2024 17:57:45 +0100 Subject: [PATCH 10/17] rename partitions in config + add extra logging --- CI/hortense_EESSI_ss/ci_config.sh | 12 +----------- CI/hortense_local_ss/ci_config.sh | 19 +++++++------------ CI/run_reframe.sh | 4 ++++ config/vsc_hortense.py | 8 ++++---- 4 files changed, 16 insertions(+), 27 deletions(-) diff --git a/CI/hortense_EESSI_ss/ci_config.sh b/CI/hortense_EESSI_ss/ci_config.sh index af0c78e1..1fd49eb6 100644 --- a/CI/hortense_EESSI_ss/ci_config.sh +++ b/CI/hortense_EESSI_ss/ci_config.sh @@ -29,15 +29,5 @@ if [ -z "${SET_LOCAL_MODULE_ENV}"]; then fi if [ -z "${LOCAL_MODULES}"]; then - if [ "$TEST_SUITE_PARTITION" == "cpu_rome_256gb" ]; then - export LOCAL_MODULES="env/vsc/dodrio/cpu_rome env/slurm/dodrio/cpu_rome" - elif [ "$TEST_SUITE_PARTITION" == "cpu_rome_512gb" ]; then - export LOCAL_MODULES="env/vsc/dodrio/cpu_rome_512 env/slurm/dodrio/cpu_rome_512" - elif [ "$TEST_SUITE_PARTITION" == "gpu_rome_a100_40gb" ]; then - export LOCAL_MODULES="env/vsc/dodrio/gpu_rome_a100_40 env/slurm/dodrio/gpu_rome_a100_40" - elif [ "$TEST_SUITE_PARTITION" == "gpu_rome_a100_80gb" ]; then - export LOCAL_MODULES="env/vsc/dodrio/gpu_rome_a100_80 env/slurm/dodrio/gpu_rome_a100_80" - else - export LOCAL_MODULES="env/vsc/dodrio/${TEST_SUITE_PARTITION} env/slurm/dodrio/${TEST_SUITE_PARTITION}" - fi + export LOCAL_MODULES="env/vsc/dodrio/${TEST_SUITE_PARTITION} env/slurm/dodrio/${TEST_SUITE_PARTITION}" fi diff --git a/CI/hortense_local_ss/ci_config.sh b/CI/hortense_local_ss/ci_config.sh index d6346f9f..4fd21271 100644 --- a/CI/hortense_local_ss/ci_config.sh +++ b/CI/hortense_local_ss/ci_config.sh @@ -23,20 +23,15 @@ if [ -z "${RFM_CONFIG_FILES}" ]; then export RFM_CONFIG_FILES="${TEMPDIR}/test-suite/config/vsc_hortense.py" fi +if [ -z "${UNSET_MODULEPATH}" ]; then + export UNSET_MODULEPATH=False + module --force purge +fi + if [ -z "${SET_LOCAL_MODULE_ENV}"]; then export SET_LOCAL_MODULE_ENV=True fi if [ -z "${LOCAL_MODULES}"]; then - if [ "$TEST_SUITE_PARTITION" == "cpu_rome_256gb" ]; then - export LOCAL_MODULES="cluster/dodrio/cpu_rome" - elif [ "$TEST_SUITE_PARTITION" == "cpu_rome_512gb" ]; then - export LOCAL_MODULES="cluster/dodrio/cpu_rome_512" - elif [ "$TEST_SUITE_PARTITION" == "gpu_rome_a100_40gb" ]; then - export LOCAL_MODULES="cluster/dodrio/gpu_rome_a100_40" - elif [ "$TEST_SUITE_PARTITION" == "gpu_rome_a100_80gb" ]; then - export LOCAL_MODULES="cluster/dodrio/gpu_rome_a100_80" - else - export LOCAL_MODULES="cluster/dodrio/${TEST_SUITE_PARTITION}" - fi -fi + export LOCAL_MODULES="cluster/dodrio/${TEST_SUITE_PARTITION}" +fi diff --git a/CI/run_reframe.sh b/CI/run_reframe.sh index 0dd4cfd2..e458ffc2 100755 --- a/CI/run_reframe.sh +++ b/CI/run_reframe.sh @@ -81,6 +81,9 @@ fi if [ -z "${UNSET_MODULEPATH}" ]; then export UNSET_MODULEPATH=True fi +if [ -z "${SET_LOCAL_MODULE_ENV}" ]; then + export SET_LOCAL_MODULE_ENV=False +fi # Create virtualenv for ReFrame using system python python3 -m venv "${TEMPDIR}"/reframe_venv @@ -143,6 +146,7 @@ echo "ReFrame check search recursive: ${RFM_CHECK_SEARCH_RECURSIVE}" echo "ReFrame prefix: ${RFM_PREFIX}" echo "ReFrame args: ${REFRAME_ARGS}" echo "Using EESSI: ${USE_EESSI_SOFTWARE_STACK}" +echo "Using local software stack ${SET_LOCAL_MODULE_ENV}" echo "MODULEPATH: ${MODULEPATH}" echo "" diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index ba06d690..99157250 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -60,7 +60,7 @@ def command(self, job): 'modules_system': 'lmod', 'partitions': [ { - 'name': 'cpu_rome_256gb', + 'name': 'cpu_rome', 'scheduler': 'slurm', 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=cpu_rome'], @@ -85,7 +85,7 @@ def command(self, job): }, }, { - 'name': 'cpu_rome_512gb', + 'name': 'cpu_rome_512', 'scheduler': 'slurm', 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=cpu_rome_512'], @@ -135,7 +135,7 @@ def command(self, job): }, }, { - 'name': 'gpu_rome_a100_40gb', + 'name': 'gpu_rome_a100_40', 'scheduler': 'slurm', 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=gpu_rome_a100_40'], @@ -172,7 +172,7 @@ def command(self, job): }, { - 'name': 'gpu_rome_a100_80gb', + 'name': 'gpu_rome_a100_80', 'scheduler': 'slurm', 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=gpu_rome_a100_80'], From a0641a52a43924866d5a3889dc005c2bc683144e Mon Sep 17 00:00:00 2001 From: Lara Ramona Peeters <49882639+laraPPr@users.noreply.github.com> Date: Mon, 16 Dec 2024 10:20:29 +0100 Subject: [PATCH 11/17] Update config/vsc_hortense.py Co-authored-by: Sam Moors --- config/vsc_hortense.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 99157250..af981cb6 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -45,7 +45,7 @@ def command(self, job): if eessi_cvmfs_repo is not None: prepare_eessi_init = "module --force purge" launcher = "mpirun" - mpi_module = "env/vsc/dodrio/%s" + mpi_module = "env/vsc/dodrio/{}" else: prepare_eessi_init = "" launcher = "mympirun" From 2ed1e816b68f2a3afa17cf88196929c0289823af Mon Sep 17 00:00:00 2001 From: Lara Ramona Peeters <49882639+laraPPr@users.noreply.github.com> Date: Mon, 16 Dec 2024 10:20:38 +0100 Subject: [PATCH 12/17] Update config/vsc_hortense.py Co-authored-by: Sam Moors --- config/vsc_hortense.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index af981cb6..3ba5ad4d 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -68,7 +68,7 @@ def command(self, job): 'descr': 'CPU nodes (AMD Rome, 256GiB RAM)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module % 'cpu_rome'], + 'modules': [mpi_module.format('cpu_rome')], 'resources': [ { 'name': 'memory', From 5cc90d28260e082545597d4bc7f2ac124b6276db Mon Sep 17 00:00:00 2001 From: Lara Ramona Peeters <49882639+laraPPr@users.noreply.github.com> Date: Mon, 16 Dec 2024 10:20:45 +0100 Subject: [PATCH 13/17] Update config/vsc_hortense.py Co-authored-by: Sam Moors --- config/vsc_hortense.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 3ba5ad4d..2036f5e4 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -93,7 +93,7 @@ def command(self, job): 'descr': 'CPU nodes (AMD Rome, 512GiB RAM)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module % 'cpu_rome_512'], + 'modules': [mpi_module.format('cpu_rome_512')], 'resources': [ { 'name': 'memory', From c9894b82f3e1ce2029ef77e10aee64982b7b3628 Mon Sep 17 00:00:00 2001 From: Lara Ramona Peeters <49882639+laraPPr@users.noreply.github.com> Date: Mon, 16 Dec 2024 10:20:52 +0100 Subject: [PATCH 14/17] Update config/vsc_hortense.py Co-authored-by: Sam Moors --- config/vsc_hortense.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 2036f5e4..4078026d 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -118,7 +118,7 @@ def command(self, job): 'descr': 'CPU nodes (AMD Milan, 256GiB RAM)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module % 'cpu_milan'], + 'modules': [mpi_module.format('cpu_milan')], 'resources': [ { 'name': 'memory', From d184fe2452182873ed47f9f087629017ddc2bc57 Mon Sep 17 00:00:00 2001 From: Lara Ramona Peeters <49882639+laraPPr@users.noreply.github.com> Date: Mon, 16 Dec 2024 10:21:00 +0100 Subject: [PATCH 15/17] Update config/vsc_hortense.py Co-authored-by: Sam Moors --- config/vsc_hortense.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 4078026d..b99ddbf1 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -143,7 +143,7 @@ def command(self, job): 'descr': 'GPU nodes (A100 40GB)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module % 'gpu_rome_a100_40'], + 'modules': [mpi_module.format('gpu_rome_a100_40')], 'features': [ FEATURES[GPU], ] + list(SCALES.keys()), From 58da284ae142161690eb0ce26b4b1dd047016063 Mon Sep 17 00:00:00 2001 From: Lara Ramona Peeters <49882639+laraPPr@users.noreply.github.com> Date: Mon, 16 Dec 2024 10:21:06 +0100 Subject: [PATCH 16/17] Update config/vsc_hortense.py Co-authored-by: Sam Moors --- config/vsc_hortense.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index b99ddbf1..8ad36b75 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -180,7 +180,7 @@ def command(self, job): 'descr': 'GPU nodes (A100 80GB)', 'max_jobs': 20, 'launcher': launcher, - 'modules': [mpi_module % 'gpu_rome_a100_80'], + 'modules': [mpi_module.format('gpu_rome_a100_80')], 'features': [ FEATURES[GPU], ] + list(SCALES.keys()), From 80c0384357f8bd224e756b357e33bfc7e8f3a71c Mon Sep 17 00:00:00 2001 From: Sam Moors Date: Wed, 18 Dec 2024 13:42:00 +0100 Subject: [PATCH 17/17] avoid exporting UNSET_MODULEPATH twice --- CI/hortense_local_ss/ci_config.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CI/hortense_local_ss/ci_config.sh b/CI/hortense_local_ss/ci_config.sh index 4fd21271..24667b2c 100644 --- a/CI/hortense_local_ss/ci_config.sh +++ b/CI/hortense_local_ss/ci_config.sh @@ -11,10 +11,6 @@ if [ -z "${REFRAME_ARGS}" ]; then REFRAME_ARGS="--tag CI --tag 1_node|2_nodes --system hortense:${TEST_SUITE_PARTITION}" fi -if [ -z "${UNSET_MODULEPATH}" ]; then - export UNSET_MODULEPATH=False -fi - if [ -z "${USE_EESSI_SOFTWARE_STACK}" ]; then export USE_EESSI_SOFTWARE_STACK=False fi