diff --git a/CI/hortense_EESSI_ss/ci_config.sh b/CI/hortense_EESSI_ss/ci_config.sh new file mode 100644 index 00000000..1fd49eb6 --- /dev/null +++ b/CI/hortense_EESSI_ss/ci_config.sh @@ -0,0 +1,33 @@ +# Configurable items +if [ -z "${TEST_SUITE_PARTITION}" ]; then + echo "You have to indicate on which partition the test-suite will run on vsc-Hortense" + echo "This environment variable needs to be set TEST_SUITE_PARTITION=cpu_rome_256gb" + echo "Can only set to 'cpu_rome_256gb' untill new functionality of 'sched_options' is part of" + echo "the ReFrame release https://github.com/reframe-hpc/reframe/issues/2970" + exit 1 +fi + +if [ -z "${REFRAME_ARGS}" ]; then + REFRAME_ARGS="--tag CI --tag 1_core --system hortense:${TEST_SUITE_PARTITION}" +fi + +if [ -z "${UNSET_MODULEPATH}" ]; then + export UNSET_MODULEPATH=False + module --force purge +fi + +if [ -z "${USE_EESSI_SOFTWARE_STACK}" ]; then + export USE_EESSI_SOFTWARE_STACK=True +fi + +if [ -z "${RFM_CONFIG_FILES}" ]; then + export RFM_CONFIG_FILES="${TEMPDIR}/test-suite/config/vsc_hortense.py" +fi + +if [ -z "${SET_LOCAL_MODULE_ENV}"]; then + export SET_LOCAL_MODULE_ENV=True +fi + +if [ -z "${LOCAL_MODULES}"]; then + export LOCAL_MODULES="env/vsc/dodrio/${TEST_SUITE_PARTITION} env/slurm/dodrio/${TEST_SUITE_PARTITION}" +fi diff --git a/CI/hortense_local_ss/ci_config.sh b/CI/hortense_local_ss/ci_config.sh new file mode 100644 index 00000000..24667b2c --- /dev/null +++ b/CI/hortense_local_ss/ci_config.sh @@ -0,0 +1,33 @@ +# Configurable items +if [ -z "${TEST_SUITE_PARTITION}" ]; then + echo "You have to indicate on which partition the test-suite will run on vsc-Hortense" + echo "This environment variable needs to be set TEST_SUITE_PARTITION=cpu_rome_256gb" + echo "Can only set to 'cpu_rome_256gb' untill new functionality of 'sched_options' is part of" + echo "the ReFrame release https://github.com/reframe-hpc/reframe/issues/2970" + exit 1 +fi + +if [ -z "${REFRAME_ARGS}" ]; then + REFRAME_ARGS="--tag CI --tag 1_node|2_nodes --system hortense:${TEST_SUITE_PARTITION}" +fi + +if [ -z "${USE_EESSI_SOFTWARE_STACK}" ]; then + export USE_EESSI_SOFTWARE_STACK=False +fi + +if [ -z "${RFM_CONFIG_FILES}" ]; then + export RFM_CONFIG_FILES="${TEMPDIR}/test-suite/config/vsc_hortense.py" +fi + +if [ -z "${UNSET_MODULEPATH}" ]; then + export UNSET_MODULEPATH=False + module --force purge +fi + +if [ -z "${SET_LOCAL_MODULE_ENV}"]; then + export SET_LOCAL_MODULE_ENV=True +fi + +if [ -z "${LOCAL_MODULES}"]; then + export LOCAL_MODULES="cluster/dodrio/${TEST_SUITE_PARTITION}" +fi diff --git a/CI/run_reframe.sh b/CI/run_reframe.sh index eda506f0..e458ffc2 100755 --- a/CI/run_reframe.sh +++ b/CI/run_reframe.sh @@ -25,13 +25,15 @@ if [ ! -f "${CI_CONFIG}" ]; then exit 1 fi +# Create temporary directory +if [ -z "${TEMPDIR}" ]; then + TEMPDIR=$(mktemp --directory --tmpdir=/tmp -t rfm.XXXXXXXXXX) +fi + # Set the CI configuration for this system source "${CI_CONFIG}" # Set default configuration, but let anything set by CI_CONFIG take priority -if [ -z "${TEMPDIR}" ]; then - TEMPDIR=$(mktemp --directory --tmpdir=/tmp -t rfm.XXXXXXXXXX) -fi if [ -z "${REFRAME_ARGS}" ]; then REFRAME_ARGS="--tag CI --tag 1_node" fi @@ -50,11 +52,14 @@ fi if [ -z "${EESSI_TESTSUITE_BRANCH}" ]; then EESSI_TESTSUITE_BRANCH='v0.4.0' fi -if [ -z "${EESSI_CVMFS_REPO}" ]; then - export EESSI_CVMFS_REPO=/cvmfs/software.eessi.io -fi -if [ -z "${EESSI_VERSION}" ]; then - export EESSI_VERSION=2023.06 +if [ -z "${USE_EESSI_SOFTWARE_STACK}" ] || [ "$USE_EESSI_SOFTWARE_STACK" == "True" ]; then + export USE_EESSI_SOFTWARE_STACK=True + if [ -z "${EESSI_CVMFS_REPO}" ]; then + export EESSI_CVMFS_REPO=/cvmfs/software.eessi.io + fi + if [ -z "${EESSI_VERSION}" ]; then + export EESSI_VERSION=2023.06 + fi fi if [ -z "${RFM_CONFIG_FILES}" ]; then export RFM_CONFIG_FILES="${TEMPDIR}/test-suite/config/${EESSI_CI_SYSTEM_NAME}.py" @@ -73,6 +78,12 @@ if [ -z "${REFRAME_TIMEOUT}" ]; then # This will prevent multiple ReFrame runs from piling up and exceeding the quota on our Magic Castle clusters export REFRAME_TIMEOUT=1430m fi +if [ -z "${UNSET_MODULEPATH}" ]; then + export UNSET_MODULEPATH=True +fi +if [ -z "${SET_LOCAL_MODULE_ENV}" ]; then + export SET_LOCAL_MODULE_ENV=False +fi # Create virtualenv for ReFrame using system python python3 -m venv "${TEMPDIR}"/reframe_venv @@ -92,10 +103,25 @@ echo "Cloning EESSI repo: git clone ${EESSI_CLONE_ARGS}" git clone ${EESSI_CLONE_ARGS} export PYTHONPATH="${PYTHONPATH}":"${TEMPDIR}"/test-suite/ +# Unset the ModulePath on systems where it is required +if [ "$UNSET_MODULEPATH" == "True" ]; then + unset MODULEPATH +fi + +# Set local module environment +if [ "$SET_LOCAL_MODULE_ENV" == "True" ]; then + if [ -z "${LOCAL_MODULES}" ]; then + echo "You have to add the name of the module in the ci_config.sh file of your system" + exit 1 + fi + module load "${LOCAL_MODULES}" +fi + # Start the EESSI environment -unset MODULEPATH -eessi_init_path="${EESSI_CVMFS_REPO}"/versions/"${EESSI_VERSION}"/init/bash -source "${eessi_init_path}" +if [ "$USE_EESSI_SOFTWARE_STACK" == "True" ]; then + eessi_init_path="${EESSI_CVMFS_REPO}"/versions/"${EESSI_VERSION}"/init/bash + source "${eessi_init_path}" +fi # Needed in order to make sure the reframe from our TEMPDIR is first on the PATH, # prior to the one shipped with the 2021.12 compat layer @@ -119,6 +145,9 @@ echo "ReFrame check search path: ${RFM_CHECK_SEARCH_PATH}" echo "ReFrame check search recursive: ${RFM_CHECK_SEARCH_RECURSIVE}" echo "ReFrame prefix: ${RFM_PREFIX}" echo "ReFrame args: ${REFRAME_ARGS}" +echo "Using EESSI: ${USE_EESSI_SOFTWARE_STACK}" +echo "Using local software stack ${SET_LOCAL_MODULE_ENV}" +echo "MODULEPATH: ${MODULEPATH}" echo "" # List tests diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index ddd7c1c8..8ad36b75 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -21,6 +21,7 @@ # reframe --detect-host-topology \ # ~/.reframe/topology/hortense-{partition_name}/processor.json # ``` +import os from reframe.core.backends import register_launcher from reframe.core.launchers import JobLauncher @@ -40,6 +41,16 @@ def command(self, job): return ['mympirun', '--hybrid', str(job.num_tasks_per_node)] +eessi_cvmfs_repo = os.getenv('EESSI_CVMFS_REPO', None) +if eessi_cvmfs_repo is not None: + prepare_eessi_init = "module --force purge" + launcher = "mpirun" + mpi_module = "env/vsc/dodrio/{}" +else: + prepare_eessi_init = "" + launcher = "mympirun" + mpi_module = "vsc-mympirun" + site_configuration = { 'systems': [ { @@ -49,15 +60,15 @@ def command(self, job): 'modules_system': 'lmod', 'partitions': [ { - 'name': 'cpu_rome_256gb', + 'name': 'cpu_rome', 'scheduler': 'slurm', - 'prepare_cmds': [common_eessi_init()], + 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=cpu_rome'], 'environs': ['default'], 'descr': 'CPU nodes (AMD Rome, 256GiB RAM)', 'max_jobs': 20, - 'launcher': 'mympirun', - 'modules': ['vsc-mympirun'], + 'launcher': launcher, + 'modules': [mpi_module.format('cpu_rome')], 'resources': [ { 'name': 'memory', @@ -74,15 +85,15 @@ def command(self, job): }, }, { - 'name': 'cpu_rome_512gb', + 'name': 'cpu_rome_512', 'scheduler': 'slurm', - 'prepare_cmds': [common_eessi_init()], + 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=cpu_rome_512'], 'environs': ['default'], 'descr': 'CPU nodes (AMD Rome, 512GiB RAM)', 'max_jobs': 20, - 'launcher': 'mympirun', - 'modules': ['vsc-mympirun'], + 'launcher': launcher, + 'modules': [mpi_module.format('cpu_rome_512')], 'resources': [ { 'name': 'memory', @@ -101,13 +112,13 @@ def command(self, job): { 'name': 'cpu_milan', 'scheduler': 'slurm', - 'prepare_cmds': [common_eessi_init()], + 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=cpu_milan'], 'environs': ['default'], 'descr': 'CPU nodes (AMD Milan, 256GiB RAM)', 'max_jobs': 20, - 'launcher': 'mympirun', - 'modules': ['vsc-mympirun'], + 'launcher': launcher, + 'modules': [mpi_module.format('cpu_milan')], 'resources': [ { 'name': 'memory', @@ -124,15 +135,15 @@ def command(self, job): }, }, { - 'name': 'gpu_rome_a100_40gb', + 'name': 'gpu_rome_a100_40', 'scheduler': 'slurm', - 'prepare_cmds': [common_eessi_init()], + 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=gpu_rome_a100_40'], 'environs': ['default'], 'descr': 'GPU nodes (A100 40GB)', 'max_jobs': 20, - 'launcher': 'mympirun', - 'modules': ['vsc-mympirun'], + 'launcher': launcher, + 'modules': [mpi_module.format('gpu_rome_a100_40')], 'features': [ FEATURES[GPU], ] + list(SCALES.keys()), @@ -161,15 +172,15 @@ def command(self, job): }, { - 'name': 'gpu_rome_a100_80gb', + 'name': 'gpu_rome_a100_80', 'scheduler': 'slurm', - 'prepare_cmds': [common_eessi_init()], + 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=gpu_rome_a100_80'], 'environs': ['default'], 'descr': 'GPU nodes (A100 80GB)', 'max_jobs': 20, - 'launcher': 'mympirun', - 'modules': ['vsc-mympirun'], + 'launcher': launcher, + 'modules': [mpi_module.format('gpu_rome_a100_80')], 'features': [ FEATURES[GPU], ] + list(SCALES.keys()),