Skip to content

Commit

Permalink
Merge pull request #200 from laraPPr/update_CI_scripts
Browse files Browse the repository at this point in the history
 General config and `run_reframe.sh` for local and EESSI stack
  • Loading branch information
smoors authored Dec 18, 2024
2 parents 05d6a14 + 80c0384 commit 82bd926
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 30 deletions.
33 changes: 33 additions & 0 deletions CI/hortense_EESSI_ss/ci_config.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Configurable items
if [ -z "${TEST_SUITE_PARTITION}" ]; then
echo "You have to indicate on which partition the test-suite will run on vsc-Hortense"
echo "This environment variable needs to be set TEST_SUITE_PARTITION=cpu_rome_256gb"
echo "Can only set to 'cpu_rome_256gb' untill new functionality of 'sched_options' is part of"
echo "the ReFrame release https://github.com/reframe-hpc/reframe/issues/2970"
exit 1
fi

if [ -z "${REFRAME_ARGS}" ]; then
REFRAME_ARGS="--tag CI --tag 1_core --system hortense:${TEST_SUITE_PARTITION}"
fi

if [ -z "${UNSET_MODULEPATH}" ]; then
export UNSET_MODULEPATH=False
module --force purge
fi

if [ -z "${USE_EESSI_SOFTWARE_STACK}" ]; then
export USE_EESSI_SOFTWARE_STACK=True
fi

if [ -z "${RFM_CONFIG_FILES}" ]; then
export RFM_CONFIG_FILES="${TEMPDIR}/test-suite/config/vsc_hortense.py"
fi

if [ -z "${SET_LOCAL_MODULE_ENV}"]; then
export SET_LOCAL_MODULE_ENV=True
fi

if [ -z "${LOCAL_MODULES}"]; then
export LOCAL_MODULES="env/vsc/dodrio/${TEST_SUITE_PARTITION} env/slurm/dodrio/${TEST_SUITE_PARTITION}"
fi
33 changes: 33 additions & 0 deletions CI/hortense_local_ss/ci_config.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Configurable items
if [ -z "${TEST_SUITE_PARTITION}" ]; then
echo "You have to indicate on which partition the test-suite will run on vsc-Hortense"
echo "This environment variable needs to be set TEST_SUITE_PARTITION=cpu_rome_256gb"
echo "Can only set to 'cpu_rome_256gb' untill new functionality of 'sched_options' is part of"
echo "the ReFrame release https://github.com/reframe-hpc/reframe/issues/2970"
exit 1
fi

if [ -z "${REFRAME_ARGS}" ]; then
REFRAME_ARGS="--tag CI --tag 1_node|2_nodes --system hortense:${TEST_SUITE_PARTITION}"
fi

if [ -z "${USE_EESSI_SOFTWARE_STACK}" ]; then
export USE_EESSI_SOFTWARE_STACK=False
fi

if [ -z "${RFM_CONFIG_FILES}" ]; then
export RFM_CONFIG_FILES="${TEMPDIR}/test-suite/config/vsc_hortense.py"
fi

if [ -z "${UNSET_MODULEPATH}" ]; then
export UNSET_MODULEPATH=False
module --force purge
fi

if [ -z "${SET_LOCAL_MODULE_ENV}"]; then
export SET_LOCAL_MODULE_ENV=True
fi

if [ -z "${LOCAL_MODULES}"]; then
export LOCAL_MODULES="cluster/dodrio/${TEST_SUITE_PARTITION}"
fi
51 changes: 40 additions & 11 deletions CI/run_reframe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@ if [ ! -f "${CI_CONFIG}" ]; then
exit 1
fi

# Create temporary directory
if [ -z "${TEMPDIR}" ]; then
TEMPDIR=$(mktemp --directory --tmpdir=/tmp -t rfm.XXXXXXXXXX)
fi

# Set the CI configuration for this system
source "${CI_CONFIG}"

# Set default configuration, but let anything set by CI_CONFIG take priority
if [ -z "${TEMPDIR}" ]; then
TEMPDIR=$(mktemp --directory --tmpdir=/tmp -t rfm.XXXXXXXXXX)
fi
if [ -z "${REFRAME_ARGS}" ]; then
REFRAME_ARGS="--tag CI --tag 1_node"
fi
Expand All @@ -50,11 +52,14 @@ fi
if [ -z "${EESSI_TESTSUITE_BRANCH}" ]; then
EESSI_TESTSUITE_BRANCH='v0.4.0'
fi
if [ -z "${EESSI_CVMFS_REPO}" ]; then
export EESSI_CVMFS_REPO=/cvmfs/software.eessi.io
fi
if [ -z "${EESSI_VERSION}" ]; then
export EESSI_VERSION=2023.06
if [ -z "${USE_EESSI_SOFTWARE_STACK}" ] || [ "$USE_EESSI_SOFTWARE_STACK" == "True" ]; then
export USE_EESSI_SOFTWARE_STACK=True
if [ -z "${EESSI_CVMFS_REPO}" ]; then
export EESSI_CVMFS_REPO=/cvmfs/software.eessi.io
fi
if [ -z "${EESSI_VERSION}" ]; then
export EESSI_VERSION=2023.06
fi
fi
if [ -z "${RFM_CONFIG_FILES}" ]; then
export RFM_CONFIG_FILES="${TEMPDIR}/test-suite/config/${EESSI_CI_SYSTEM_NAME}.py"
Expand All @@ -73,6 +78,12 @@ if [ -z "${REFRAME_TIMEOUT}" ]; then
# This will prevent multiple ReFrame runs from piling up and exceeding the quota on our Magic Castle clusters
export REFRAME_TIMEOUT=1430m
fi
if [ -z "${UNSET_MODULEPATH}" ]; then
export UNSET_MODULEPATH=True
fi
if [ -z "${SET_LOCAL_MODULE_ENV}" ]; then
export SET_LOCAL_MODULE_ENV=False
fi

# Create virtualenv for ReFrame using system python
python3 -m venv "${TEMPDIR}"/reframe_venv
Expand All @@ -92,10 +103,25 @@ echo "Cloning EESSI repo: git clone ${EESSI_CLONE_ARGS}"
git clone ${EESSI_CLONE_ARGS}
export PYTHONPATH="${PYTHONPATH}":"${TEMPDIR}"/test-suite/

# Unset the ModulePath on systems where it is required
if [ "$UNSET_MODULEPATH" == "True" ]; then
unset MODULEPATH
fi

# Set local module environment
if [ "$SET_LOCAL_MODULE_ENV" == "True" ]; then
if [ -z "${LOCAL_MODULES}" ]; then
echo "You have to add the name of the module in the ci_config.sh file of your system"
exit 1
fi
module load "${LOCAL_MODULES}"
fi

# Start the EESSI environment
unset MODULEPATH
eessi_init_path="${EESSI_CVMFS_REPO}"/versions/"${EESSI_VERSION}"/init/bash
source "${eessi_init_path}"
if [ "$USE_EESSI_SOFTWARE_STACK" == "True" ]; then
eessi_init_path="${EESSI_CVMFS_REPO}"/versions/"${EESSI_VERSION}"/init/bash
source "${eessi_init_path}"
fi

# Needed in order to make sure the reframe from our TEMPDIR is first on the PATH,
# prior to the one shipped with the 2021.12 compat layer
Expand All @@ -119,6 +145,9 @@ echo "ReFrame check search path: ${RFM_CHECK_SEARCH_PATH}"
echo "ReFrame check search recursive: ${RFM_CHECK_SEARCH_RECURSIVE}"
echo "ReFrame prefix: ${RFM_PREFIX}"
echo "ReFrame args: ${REFRAME_ARGS}"
echo "Using EESSI: ${USE_EESSI_SOFTWARE_STACK}"
echo "Using local software stack ${SET_LOCAL_MODULE_ENV}"
echo "MODULEPATH: ${MODULEPATH}"
echo ""

# List tests
Expand Down
49 changes: 30 additions & 19 deletions config/vsc_hortense.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# reframe --detect-host-topology \
# ~/.reframe/topology/hortense-{partition_name}/processor.json
# ```
import os

from reframe.core.backends import register_launcher
from reframe.core.launchers import JobLauncher
Expand All @@ -40,6 +41,16 @@ def command(self, job):
return ['mympirun', '--hybrid', str(job.num_tasks_per_node)]


eessi_cvmfs_repo = os.getenv('EESSI_CVMFS_REPO', None)
if eessi_cvmfs_repo is not None:
prepare_eessi_init = "module --force purge"
launcher = "mpirun"
mpi_module = "env/vsc/dodrio/{}"
else:
prepare_eessi_init = ""
launcher = "mympirun"
mpi_module = "vsc-mympirun"

site_configuration = {
'systems': [
{
Expand All @@ -49,15 +60,15 @@ def command(self, job):
'modules_system': 'lmod',
'partitions': [
{
'name': 'cpu_rome_256gb',
'name': 'cpu_rome',
'scheduler': 'slurm',
'prepare_cmds': [common_eessi_init()],
'prepare_cmds': [prepare_eessi_init, common_eessi_init()],
'access': hortense_access + ['--partition=cpu_rome'],
'environs': ['default'],
'descr': 'CPU nodes (AMD Rome, 256GiB RAM)',
'max_jobs': 20,
'launcher': 'mympirun',
'modules': ['vsc-mympirun'],
'launcher': launcher,
'modules': [mpi_module.format('cpu_rome')],
'resources': [
{
'name': 'memory',
Expand All @@ -74,15 +85,15 @@ def command(self, job):
},
},
{
'name': 'cpu_rome_512gb',
'name': 'cpu_rome_512',
'scheduler': 'slurm',
'prepare_cmds': [common_eessi_init()],
'prepare_cmds': [prepare_eessi_init, common_eessi_init()],
'access': hortense_access + ['--partition=cpu_rome_512'],
'environs': ['default'],
'descr': 'CPU nodes (AMD Rome, 512GiB RAM)',
'max_jobs': 20,
'launcher': 'mympirun',
'modules': ['vsc-mympirun'],
'launcher': launcher,
'modules': [mpi_module.format('cpu_rome_512')],
'resources': [
{
'name': 'memory',
Expand All @@ -101,13 +112,13 @@ def command(self, job):
{
'name': 'cpu_milan',
'scheduler': 'slurm',
'prepare_cmds': [common_eessi_init()],
'prepare_cmds': [prepare_eessi_init, common_eessi_init()],
'access': hortense_access + ['--partition=cpu_milan'],
'environs': ['default'],
'descr': 'CPU nodes (AMD Milan, 256GiB RAM)',
'max_jobs': 20,
'launcher': 'mympirun',
'modules': ['vsc-mympirun'],
'launcher': launcher,
'modules': [mpi_module.format('cpu_milan')],
'resources': [
{
'name': 'memory',
Expand All @@ -124,15 +135,15 @@ def command(self, job):
},
},
{
'name': 'gpu_rome_a100_40gb',
'name': 'gpu_rome_a100_40',
'scheduler': 'slurm',
'prepare_cmds': [common_eessi_init()],
'prepare_cmds': [prepare_eessi_init, common_eessi_init()],
'access': hortense_access + ['--partition=gpu_rome_a100_40'],
'environs': ['default'],
'descr': 'GPU nodes (A100 40GB)',
'max_jobs': 20,
'launcher': 'mympirun',
'modules': ['vsc-mympirun'],
'launcher': launcher,
'modules': [mpi_module.format('gpu_rome_a100_40')],
'features': [
FEATURES[GPU],
] + list(SCALES.keys()),
Expand Down Expand Up @@ -161,15 +172,15 @@ def command(self, job):

},
{
'name': 'gpu_rome_a100_80gb',
'name': 'gpu_rome_a100_80',
'scheduler': 'slurm',
'prepare_cmds': [common_eessi_init()],
'prepare_cmds': [prepare_eessi_init, common_eessi_init()],
'access': hortense_access + ['--partition=gpu_rome_a100_80'],
'environs': ['default'],
'descr': 'GPU nodes (A100 80GB)',
'max_jobs': 20,
'launcher': 'mympirun',
'modules': ['vsc-mympirun'],
'launcher': launcher,
'modules': [mpi_module.format('gpu_rome_a100_80')],
'features': [
FEATURES[GPU],
] + list(SCALES.keys()),
Expand Down

0 comments on commit 82bd926

Please sign in to comment.