diff --git a/bot/check-build.sh b/bot/check-build.sh index e075518421..d8246c67be 100755 --- a/bot/check-build.sh +++ b/bot/check-build.sh @@ -400,6 +400,7 @@ comment_details_list=${comment_details_list}$(add_detail ${NO_MISSING} 1 "${succ success_msg="found message matching ${GP_tgz_created}" failure_msg="no message matching ${GP_tgz_created}" comment_details_list=${comment_details_list}$(add_detail ${TGZ} 1 "${success_msg}" "${failure_msg}") + # Now, do the actual replacement of __DETAILS_FMT__ comment_details_fmt="
_Details_
__DETAILS_LIST__
" comment_details="${comment_details_fmt/__DETAILS_LIST__/${comment_details_list}}" diff --git a/bot/check-test.sh b/bot/check-test.sh index 76e0df7f40..f045b9500a 100755 --- a/bot/check-test.sh +++ b/bot/check-test.sh @@ -13,8 +13,193 @@ job_dir=${PWD} job_out="slurm-${SLURM_JOB_ID}.out" job_test_result_file="_bot_job${SLURM_JOB_ID}.test" +# Check that job output file is found +[[ ${VERBOSE} -ne 0 ]] && echo ">> searching for job output file(s) matching '"${job_out}"'" +if [[ -f ${job_out} ]]; then + SLURM_OUTPUT_FOUND=1 + [[ ${VERBOSE} -ne 0 ]] && echo " found slurm output file '"${job_out}"'" +else + SLURM_OUTPUT_FOUND=0 + [[ ${VERBOSE} -ne 0 ]] && echo " Slurm output file '"${job_out}"' NOT found" +fi + + +# ReFrame prints e.g. +#[----------] start processing checks +#[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:rome+default +#[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:genoa+default +#[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=1_cpn_2_nodes %module_name=GROMACS/2021.3-foss-2021a /f4194106 @snellius:genoa+default +#[ FAIL ] (1/3) GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:genoa+default +#==> test failed during 'sanity': test staged in '/scratch-shared/casparl/reframe_output/staging/snellius/genoa/default/GROMACS_EESSI_d597cff4' +#[ OK ] (2/3) GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:rome+default +#P: perf: 8.441 ns/day (r:0, l:None, u:None) +#[ FAIL ] (3/3) GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=1_cpn_2_nodes %module_name=GROMACS/2021.3-foss-2021a /f4194106 @snellius:genoa+default +#==> test failed during 'sanity': test staged in '/scratch-shared/casparl/reframe_output/staging/snellius/genoa/default/GROMACS_EESSI_f4194106' +#[----------] all spawned checks have finished +#[ FAILED ] Ran 3/3 test case(s) from 2 check(s) (2 failure(s), 0 skipped, 0 aborted) + +# We will grep for the last and final line, since this reflects the overall result +# Specifically, we grep for FAILED, since this is also what we print if a step in the test script itself fails +FAILED=-1 +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + GP_failed='\[\s*FAILED\s*\].*Ran .* test case' + grep_reframe_failed=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_failed}") + [[ $? -eq 0 ]] && FAILED=1 || FAILED=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_failed}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_reframe_failed}" +fi + +# Here, we grep for 'ERROR:', which is printed if a fatal_error is encountered when executing the test step +# I.e. this is an error in execution of the run_tests.sh itself, NOT in running the actual tests +ERROR=-1 +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + GP_error='ERROR: ' + grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_error}") + [[ $? -eq 0 ]] && ERROR=1 || ERROR=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_error}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}" +fi + +SUCCESS=-1 +# Grep for the success pattern, so we can report the amount of tests run +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + GP_success='\[\s*PASSED\s*\].*Ran .* test case' + grep_reframe_success=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_success}") + [[ $? -eq 0 ]] && SUCCESS=1 || SUCCESS=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_success}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_reframe_success}" +fi + +if [[ ! -z ${grep_reframe_failed} ]]; then + grep_reframe_result=${grep_reframe_failed} +else + grep_reframe_result=${grep_reframe_success} +fi + echo "[TEST]" > ${job_test_result_file} -echo "comment_description = (no tests yet)" >> ${job_test_result_file} -echo "status = SUCCESS" >> ${job_test_result_file} +if [[ ${SLURM_OUTPUT_FOUND} -eq 0 ]]; then + summary=":cry: FAILURE" + reason="Job output file not found, cannot check test results." + status="FAILURE" +# Should come before general errors: if SUCCESS==1, it indicates the test suite ran succesfully +# regardless of other things that might have gone wrong +elif [[ ${SUCCESS} -eq 1 ]]; then + summary=":grin: SUCCESS" + reason="" + status="SUCCESS" +# Should come before general errors: if FAILED==1, it indicates the test suite ran +# otherwise the pattern wouldn't have been there +elif [[ ${FAILED} -eq 1 ]]; then + summary=":cry: FAILURE" + reason="EESSI test suite produced failures." + status="FAILURE" +elif [[ ${ERROR} -eq 1 ]]; then + summary=":cry: FAILURE" + reason="EESSI test suite was not run, test step itself failed to execute." + status="FAILURE" +else + summary=":grin: FAILURE" + reason="Failed for unknown reason" + status="FAILURE" +fi + + +echo "[TEST]" > ${job_test_result_file} +echo -n "comment_description = " >> ${job_test_result_file} + +# Use template for writing PR comment with details +# construct and write complete PR comment details: implements third alternative +comment_template="
__SUMMARY_FMT__
__REASON_FMT____REFRAME_FMT____DETAILS_FMT__
" +comment_success_item_fmt=":white_check_mark: __ITEM__" +comment_failure_item_fmt=":x: __ITEM__" + +# Initialize comment_description +comment_description=${comment_template} + +# Now, start replacing template items one by one +comment_summary_fmt="__SUMMARY__ _(click triangle for details)_" +comment_summary="${comment_summary_fmt/__SUMMARY__/${summary}}" +comment_description=${comment_description/__SUMMARY_FMT__/${comment_summary}} + + +# Only add if there is a reason (e.g. no reason for successful runs) +if [[ ! -z ${reason} ]]; then + comment_reason_fmt="
_Reason_
__REASONS__
" + reason_details="${comment_reason_fmt/__REASONS__/${reason}}" + comment_description=${comment_description/__REASON_FMT__/${reason_details}} +else + comment_description=${comment_description/__REASON_FMT__/""} +fi + +# Only add if there is a reframe summary (e.g. no reframe summary if reframe wasn't launched succesfully) +echo "ReFrame result:" +echo "${grep_reframe_result}" +if [[ ! -z ${grep_reframe_result} ]]; then + comment_reframe_fmt="
_ReFrame Summary_
__REFRAME_SUMMARY__
" + reframe_summary=${comment_reframe_fmt/__REFRAME_SUMMARY__/${grep_reframe_result}} + comment_description=${comment_description/__REFRAME_FMT__/${reframe_summary}} +else + comment_description=${comment_description/__REFRAME_FMT__/""} +fi + +# Declare functions +function print_br_item() { + format="${1}" + item="${2}" + echo -n "${format//__ITEM__/${item}}
" +} + +function success() { + format="${comment_success_item_fmt}" + item="$1" + print_br_item "${format}" "${item}" +} + +function failure() { + format="${comment_failure_item_fmt}" + item="$1" + print_br_item "${format}" "${item}" +} + +function add_detail() { + actual=${1} + expected=${2} + success_msg="${3}" + failure_msg="${4}" + if [[ ${actual} -eq ${expected} ]]; then + success "${success_msg}" + else + failure "${failure_msg}" + fi +} + +# first construct comment_details_list, abbreviated comment_details_list +# then use it to set comment_details +comment_details_list="" + +success_msg="job output file ${job_out}" +failure_msg="no job output file ${job_out}" +comment_details_list=${comment_details_list}$(add_detail ${SLURM_OUTPUT_FOUND} 1 "${success_msg}" "${failure_msg}") + +success_msg="no message matching ${GP_error}" +failure_msg="found message matching ${GP_error}" +comment_details_list=${comment_details_list}$(add_detail ${ERROR} 0 "${success_msg}" "${failure_msg}") + +# Add an escape character to every *, for it to be printed correctly in the comment on GitHub +GP_failed="${GP_failed//\*/\\*}" +success_msg="no message matching ""${GP_failed}""" +failure_msg="found message matching ""${GP_failed}""" +comment_details_list=${comment_details_list}$(add_detail ${FAILED} 0 "${success_msg}" "${failure_msg}") + +comment_details_fmt="
_Details_
__DETAILS_LIST__
" +comment_details="${comment_details_fmt/__DETAILS_LIST__/${comment_details_list}}" +comment_description=${comment_description/__DETAILS_FMT__/${comment_details}} + +# Actually writing the comment description to the result file +echo "${comment_description}" >> ${job_test_result_file} +echo "status = ${status}" >> ${job_test_result_file} exit 0 diff --git a/bot/test.sh b/bot/test.sh index 9d978cdcd0..4984340e6e 100755 --- a/bot/test.sh +++ b/bot/test.sh @@ -1,13 +1,225 @@ -#!/bin/bash +#!/usr/bin/env bash # -# Dummy script, no tests yet +# script to run tests or the test suite for the whole EESSI software layer or +# just what has been built in a job. Intended use is that it is called +# at the end of a (batch) job running on a compute node. # # This script is part of the EESSI software layer, see # https://github.com/EESSI/software-layer.git # -# author: Kenneth Hoste (HPC-UGent) +# author: Thomas Roeblitz (@trz42) +# author: Caspar van Leeuwen (@casparvl) # # license: GPLv2 # +# ASSUMPTIONs: +# + assumption for the build step (as run through bot/build.sh which is provided +# in this repository too) +# - working directory has been prepared by the bot with a checkout of a +# pull request (OR by some other means) +# - the working directory contains a directory 'cfg' where the main config +# file 'job.cfg' has been deposited +# - the directory may contain any additional files referenced in job.cfg +# + assumptions for the test step +# - temporary storage is still available +# example +# Using /localscratch/9640860/NESSI/eessi.x765Dd8mFh as tmp directory (to resume session add '--resume /localscratch/9640860/NESSI/eessi.x765Dd8mFh'). +# - run test-suite.sh inside build container using tmp storage from build step +# plus possibly additional settings (repo, etc.) +# - needed setup steps may be similar to bot/inspect.sh (PR#317) + +# stop as soon as something fails +set -e + +# source utils.sh and cfg_files.sh +source scripts/utils.sh +source scripts/cfg_files.sh + +# defaults +export JOB_CFG_FILE="${JOB_CFG_FILE_OVERRIDE:=./cfg/job.cfg}" +HOST_ARCH=$(uname -m) + +# check if ${JOB_CFG_FILE} exists +if [[ ! -r "${JOB_CFG_FILE}" ]]; then + fatal_error "job config file (JOB_CFG_FILE=${JOB_CFG_FILE}) does not exist or not readable" +fi +echo "bot/test.sh: showing ${JOB_CFG_FILE} from software-layer side" +cat ${JOB_CFG_FILE} + +echo "bot/test.sh: obtaining configuration settings from '${JOB_CFG_FILE}'" +cfg_load ${JOB_CFG_FILE} + +# if http_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $http_proxy +HTTP_PROXY=$(cfg_get_value "site_config" "http_proxy") +HTTP_PROXY=${HTTP_PROXY:-${http_proxy}} +echo "bot/test.sh: HTTP_PROXY='${HTTP_PROXY}'" + +# if https_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $https_proxy +HTTPS_PROXY=$(cfg_get_value "site_config" "https_proxy") +HTTPS_PROXY=${HTTPS_PROXY:-${https_proxy}} +echo "bot/test.sh: HTTPS_PROXY='${HTTPS_PROXY}'" + +LOCAL_TMP=$(cfg_get_value "site_config" "local_tmp") +echo "bot/test.sh: LOCAL_TMP='${LOCAL_TMP}'" +# TODO should local_tmp be mandatory? --> then we check here and exit if it is not provided + +# check if path to copy build logs to is specified, so we can copy build logs for failing builds there +BUILD_LOGS_DIR=$(cfg_get_value "site_config" "build_logs_dir") +echo "bot/test.sh: BUILD_LOGS_DIR='${BUILD_LOGS_DIR}'" +# if $BUILD_LOGS_DIR is set, add it to $SINGULARITY_BIND so the path is available in the build container +if [[ ! -z ${BUILD_LOGS_DIR} ]]; then + mkdir -p ${BUILD_LOGS_DIR} + if [[ -z ${SINGULARITY_BIND} ]]; then + export SINGULARITY_BIND="${BUILD_LOGS_DIR}" + else + export SINGULARITY_BIND="${SINGULARITY_BIND},${BUILD_LOGS_DIR}" + fi +fi + +# check if path to directory on shared filesystem is specified, +# and use it as location for source tarballs used by EasyBuild if so +SHARED_FS_PATH=$(cfg_get_value "site_config" "shared_fs_path") +echo "bot/test.sh: SHARED_FS_PATH='${SHARED_FS_PATH}'" +# if $SHARED_FS_PATH is set, add it to $SINGULARITY_BIND so the path is available in the build container +if [[ ! -z ${SHARED_FS_PATH} ]]; then + mkdir -p ${SHARED_FS_PATH} + if [[ -z ${SINGULARITY_BIND} ]]; then + export SINGULARITY_BIND="${SHARED_FS_PATH}" + else + export SINGULARITY_BIND="${SINGULARITY_BIND},${SHARED_FS_PATH}" + fi +fi + +SINGULARITY_CACHEDIR=$(cfg_get_value "site_config" "container_cachedir") +echo "bot/test.sh: SINGULARITY_CACHEDIR='${SINGULARITY_CACHEDIR}'" +if [[ ! -z ${SINGULARITY_CACHEDIR} ]]; then + # make sure that separate directories are used for different CPU families + SINGULARITY_CACHEDIR=${SINGULARITY_CACHEDIR}/${HOST_ARCH} + export SINGULARITY_CACHEDIR +fi + +# try to determine tmp directory from build job +RESUME_DIR=$(grep 'Using .* as tmp directory' slurm-${SLURM_JOBID}.out | head -1 | awk '{print $2}') + +if [[ -z ${RESUME_DIR} ]]; then + echo -n "setting \$STORAGE by replacing any var in '${LOCAL_TMP}' -> " + # replace any env variable in ${LOCAL_TMP} with its + # current value (e.g., a value that is local to the job) + STORAGE=$(envsubst <<< ${LOCAL_TMP}) + echo "'${STORAGE}'" + + # make sure ${STORAGE} exists + mkdir -p ${STORAGE} + + # make sure the base tmp storage is unique + JOB_STORAGE=$(mktemp --directory --tmpdir=${STORAGE} bot_job_tmp_XXX) + echo "bot/test.sh: created unique base tmp storage directory at ${JOB_STORAGE}" + + RESUME_TGZ=${PWD}/previous_tmp/build_step/$(ls previous_tmp/build_step) + if [[ -z ${RESUME_TGZ} ]]; then + echo "bot/test.sh: no information about tmp directory and tarball of build step; --> giving up" + exit 2 + fi +fi + +# obtain list of modules to be loaded +LOAD_MODULES=$(cfg_get_value "site_config" "load_modules") +echo "bot/test.sh: LOAD_MODULES='${LOAD_MODULES}'" + +# singularity/apptainer settings: CONTAINER, HOME, TMPDIR, BIND +CONTAINER=$(cfg_get_value "repository" "container") +export SINGULARITY_HOME="${PWD}:/eessi_bot_job" +export SINGULARITY_TMPDIR="${PWD}/singularity_tmpdir" +mkdir -p ${SINGULARITY_TMPDIR} + +# load modules if LOAD_MODULES is not empty +if [[ ! -z ${LOAD_MODULES} ]]; then + for mod in $(echo ${LOAD_MODULES} | tr ',' '\n') + do + echo "bot/test.sh: loading module '${mod}'" + module load ${mod} + done +else + echo "bot/test.sh: no modules to be loaded" +fi + +# determine repository to be used from entry .repository in ${JOB_CFG_FILE} +REPOSITORY=$(cfg_get_value "repository" "repo_id") +EESSI_REPOS_CFG_DIR_OVERRIDE=$(cfg_get_value "repository" "repos_cfg_dir") +export EESSI_REPOS_CFG_DIR_OVERRIDE=${EESSI_REPOS_CFG_DIR_OVERRIDE:-${PWD}/cfg} +echo "bot/test.sh: EESSI_REPOS_CFG_DIR_OVERRIDE='${EESSI_REPOS_CFG_DIR_OVERRIDE}'" + +# determine pilot version to be used from .repository.repo_version in ${JOB_CFG_FILE} +# here, just set & export EESSI_PILOT_VERSION_OVERRIDE +# next script (eessi_container.sh) makes use of it via sourcing init scripts +# (e.g., init/eessi_defaults or init/minimal_eessi_env) +export EESSI_PILOT_VERSION_OVERRIDE=$(cfg_get_value "repository" "repo_version") +echo "bot/test.sh: EESSI_PILOT_VERSION_OVERRIDE='${EESSI_PILOT_VERSION_OVERRIDE}'" + +# determine CVMFS repo to be used from .repository.repo_name in ${JOB_CFG_FILE} +# here, just set EESSI_CVMFS_REPO_OVERRIDE, a bit further down +# "source init/eessi_defaults" via sourcing init/minimal_eessi_env +export EESSI_CVMFS_REPO_OVERRIDE=$(cfg_get_value "repository" "repo_name") +echo "bot/test.sh: EESSI_CVMFS_REPO_OVERRIDE='${EESSI_CVMFS_REPO_OVERRIDE}'" + +# determine architecture to be used from entry .architecture in ${JOB_CFG_FILE} +# fallbacks: +# - ${CPU_TARGET} handed over from bot +# - left empty to let downstream script(s) determine subdir to be used +EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(cfg_get_value "architecture" "software_subdir") +EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE:-${CPU_TARGET}} +export EESSI_SOFTWARE_SUBDIR_OVERRIDE +echo "bot/test.sh: EESSI_SOFTWARE_SUBDIR_OVERRIDE='${EESSI_SOFTWARE_SUBDIR_OVERRIDE}'" + +# get EESSI_OS_TYPE from .architecture.os_type in ${JOB_CFG_FILE} (default: linux) +EESSI_OS_TYPE=$(cfg_get_value "architecture" "os_type") +export EESSI_OS_TYPE=${EESSI_OS_TYPE:-linux} +echo "bot/test.sh: EESSI_OS_TYPE='${EESSI_OS_TYPE}'" + +# prepare arguments to eessi_container.sh common to build and tarball steps +declare -a COMMON_ARGS=() +COMMON_ARGS+=("--verbose") +COMMON_ARGS+=("--access" "ro") +COMMON_ARGS+=("--mode" "run") +[[ ! -z ${CONTAINER} ]] && COMMON_ARGS+=("--container" "${CONTAINER}") +[[ ! -z ${HTTP_PROXY} ]] && COMMON_ARGS+=("--http-proxy" "${HTTP_PROXY}") +[[ ! -z ${HTTPS_PROXY} ]] && COMMON_ARGS+=("--https-proxy" "${HTTPS_PROXY}") +[[ ! -z ${REPOSITORY} ]] && COMMON_ARGS+=("--repository" "${REPOSITORY}") + +# make sure to use the same parent dir for storing tarballs of tmp +PREVIOUS_TMP_DIR=${PWD}/previous_tmp + +# prepare directory to store tarball of tmp for test step +TARBALL_TMP_TEST_STEP_DIR=${PREVIOUS_TMP_DIR}/test_step +mkdir -p ${TARBALL_TMP_TEST_STEP_DIR} + +# prepare arguments to eessi_container.sh specific to test step +declare -a TEST_STEP_ARGS=() +TEST_STEP_ARGS+=("--save" "${TARBALL_TMP_TEST_STEP_DIR}") + +if [[ -z ${RESUME_DIR} ]]; then + TEST_STEP_ARGS+=("--storage" "${STORAGE}") + TEST_STEP_ARGS+=("--resume" "${RESUME_TGZ}") +else + TEST_STEP_ARGS+=("--resume" "${RESUME_DIR}") +fi + +# prepare arguments to test_suite.sh (specific to test step) +declare -a TEST_SUITE_ARGS=() +if [[ ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} =~ .*/generic$ ]]; then + TEST_SUITE_ARGS+=("--generic") +fi +# [[ ! -z ${BUILD_LOGS_DIR} ]] && TEST_SUITE_ARGS+=("--build-logs-dir" "${BUILD_LOGS_DIR}") +# [[ ! -z ${SHARED_FS_PATH} ]] && TEST_SUITE_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}") + +# create tmp file for output of build step +test_outerr=$(mktemp test.outerr.XXXX) + +echo "Executing command to test software:" +echo "./eessi_container.sh ${COMMON_ARGS[@]} ${TEST_STEP_ARGS[@]}" +echo " -- ./run_tests.sh \"${TEST_SUITE_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${test_outerr}" +./eessi_container.sh "${COMMON_ARGS[@]}" "${TEST_STEP_ARGS[@]}" \ + -- ./run_tests.sh "${TEST_SUITE_ARGS[@]}" "$@" 2>&1 | tee -a ${test_outerr} + exit 0 diff --git a/reframe_config_bot.py.tmpl b/reframe_config_bot.py.tmpl new file mode 100644 index 0000000000..0cc3e9f530 --- /dev/null +++ b/reframe_config_bot.py.tmpl @@ -0,0 +1,59 @@ +# WARNING: this file is intended as template and the __X__ template variables need to be replaced +# before it can act as a configuration file +# Once replaced, this is a config file for running tests after the build phase, by the bot + +from eessi.testsuite.common_config import common_logging_config +from eessi.testsuite.constants import * # noqa: F403 + + +site_configuration = { + 'systems': [ + { + 'name': 'BotBuildTests', + 'descr': 'Software-layer bot', + 'hostnames': ['.*'], + 'modules_system': 'lmod', + 'partitions': [ + { + 'name': 'default', + 'scheduler': 'local', + 'launcher': 'mpirun', + 'environs': ['default'], + 'features': [ + FEATURES[CPU] + ] + list(SCALES.keys()), + 'processor': { + 'num_cpus': __NUM_CPUS__, + 'num_sockets': __NUM_SOCKETS__, + 'num_cpus_per_core': __NUM_CPUS_PER_CORE__, + 'num_cpus_per_socket': __NUM_CPUS_PER_SOCKET__, + }, + 'resources': [ + { + 'name': 'memory', + 'options': ['--mem={size}'], + } + ], + 'max_jobs': 1 + } + ] + } + ], + 'environments': [ + { + 'name': 'default', + 'cc': 'cc', + 'cxx': '', + 'ftn': '' + } + ], + 'general': [ + { + 'purge_environment': True, + 'resolve_module_conflicts': False, # avoid loading the module before submitting the job + # disable automatic detection of CPU architecture (since we're using local scheduler) + 'remote_detect': False, + } + ], + 'logging': common_logging_config(), +} diff --git a/run_tests.sh b/run_tests.sh new file mode 100755 index 0000000000..1dbb47db9d --- /dev/null +++ b/run_tests.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# +# This script gets invoked by the bot/test.sh script to run within the EESSI container +# Thus, this script defines all of the steps that should run for the tests. +# Note that, unless we have good reason, we don't run test steps in the prefix environment: +# users also typically don't run in the prefix environment, and we want to check if the +# software works well in that specific setup. +# +# This script is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Caspar van Leeuwen (@casparvl) +# +# license: GPLv2 +# + +base_dir=$(dirname $(realpath $0)) +source ${base_dir}/init/eessi_defaults + +# Git clone has to be run in compat layer, to make the git command available +./run_in_compat_layer_env.sh "git clone https://github.com/EESSI/test-suite EESSI-test-suite" + +# Run the test suite +./test_suite.sh "$@" diff --git a/test_suite.sh b/test_suite.sh new file mode 100755 index 0000000000..95eb9daa2a --- /dev/null +++ b/test_suite.sh @@ -0,0 +1,200 @@ +#!/bin/bash +# +# This script creates a ReFrame config file from a template, in which CPU properties get replaced +# based on where this script is run (typically: a build node). Then, it runs the EESSI test suite. +# +# This script is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Caspar van Leeuwen (@casparvl) +# +# license: GPLv2 + +display_help() { + echo "usage: $0 [OPTIONS]" + echo " -g | --generic - instructs script to test for generic architecture target" + echo " -h | --help - display this usage information" + echo " -x | --http-proxy URL - provides URL for the environment variable http_proxy" + echo " -y | --https-proxy URL - provides URL for the environment variable https_proxy" +} + +POSITIONAL_ARGS=() + +while [[ $# -gt 0 ]]; do + case $1 in + -g|--generic) + DETECTION_PARAMETERS="--generic" + shift + ;; + -h|--help) + display_help # Call your function + # no shifting needed here, we're done. + exit 0 + ;; + -x|--http-proxy) + export http_proxy="$2" + shift 2 + ;; + -y|--https-proxy) + export https_proxy="$2" + shift 2 + ;; + --build-logs-dir) + export build_logs_dir="${2}" + shift 2 + ;; + --shared-fs-path) + export shared_fs_path="${2}" + shift 2 + ;; + -*|--*) + echo "Error: Unknown option: $1" >&2 + exit 1 + ;; + *) # No more options + POSITIONAL_ARGS+=("$1") # save positional arg + shift + ;; + esac +done + +set -- "${POSITIONAL_ARGS[@]}" + +TOPDIR=$(dirname $(realpath $0)) + +source $TOPDIR/scripts/utils.sh + +# honor $TMPDIR if it is already defined, use /tmp otherwise +if [ -z $TMPDIR ]; then + export WORKDIR=/tmp/$USER +else + export WORKDIR=$TMPDIR/$USER +fi + +TMPDIR=$(mktemp -d) + +echo ">> Setting up environment..." +module --force purge +export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS) + +source $TOPDIR/init/bash + +# Load the ReFrame module +# Currently, we load the default version. Maybe we should somehow make this configurable in the future? +module load ReFrame +if [[ $? -eq 0 ]]; then + echo_green ">> Loaded ReFrame module" +else + fatal_error "Failed to load the ReFrame module" +fi + +# Check that a system python3 is available +python3_found=$(command -v python3) +if [ -z ${python3_found} ]; then + fatal_error "No system python3 found" +else + echo_green "System python3 found:" + python3 -V +fi + +# Check ReFrame came with the hpctestlib and we can import it +reframe_import="hpctestlib.sciapps.gromacs" +python3 -c "import ${reframe_import}" +if [[ $? -eq 0 ]]; then + echo_green "Succesfully found and imported ${reframe_import}" +else + fatal_error "Failed to import ${reframe_import}" +fi + +# Cloning should already be done in run_tests.sh before test_suite.sh is invoked +# Check if that succeeded +export TESTSUITEPREFIX=$PWD/EESSI-test-suite +if [ -d $TESTSUITEPREFIX ]; then + echo_green "Clone of the test suite $TESTSUITEPREFIX available, OK!" +else + fatal_error "Clone of the test suite $TESTSUITEPREFIX is not available!" +fi +export PYTHONPATH=$TESTSUITEPREFIX:$PYTHONPATH + +# Check that we can import from the testsuite +testsuite_import="eessi.testsuite" +python3 -c "import ${testsuite_import}" +if [[ $? -eq 0 ]]; then + echo_green "Succesfully found and imported ${testsuite_import}" +else + fatal_error "Failed to import ${testsuite_import}" +fi + +# Configure ReFrame, see https://www.eessi.io/docs/test-suite/installation-configuration +export RFM_CONFIG_FILES=$TOPDIR/reframe_config_bot.py +export RFM_CONFIG_FILE_TEMPLATE=$TOPDIR/reframe_config_bot.py.tmpl +export RFM_CHECK_SEARCH_PATH=$TESTSUITEPREFIX/eessi/testsuite/tests +export RFM_CHECK_SEARCH_RECURSIVE=1 +export RFM_PREFIX=$PWD/reframe_runs + +echo "Configured reframe with the following environment variables:" +env | grep "RFM_" + +# Inject correct CPU properties into the ReFrame config file +cpuinfo=$(lscpu) +if [[ "${cpuinfo}" =~ CPU\(s\):[^0-9]*([0-9]+) ]]; then + cpu_count=${BASH_REMATCH[1]} +else + fatal_error "Failed to get the number of CPUs for the current test hardware with lscpu." +fi +if [[ "${cpuinfo}" =~ Socket\(s\):[^0-9]*([0-9]+) ]]; then + socket_count=${BASH_REMATCH[1]} +else + fatal_error "Failed to get the number of sockets for the current test hardware with lscpu." +fi +if [[ "${cpuinfo}" =~ (Thread\(s\) per core:[^0-9]*([0-9]+)) ]]; then + threads_per_core=${BASH_REMATCH[2]} +else + fatal_error "Failed to get the number of threads per core for the current test hardware with lscpu." +fi +if [[ "${cpuinfo}" =~ (Core\(s\) per socket:[^0-9]*([0-9]+)) ]]; then + cores_per_socket=${BASH_REMATCH[2]} +else + fatal_error "Failed to get the number of cores per socket for the current test hardware with lscpu." +fi +cp ${RFM_CONFIG_FILE_TEMPLATE} ${RFM_CONFIG_FILES} +sed -i "s/__NUM_CPUS__/${cpu_count}/g" $RFM_CONFIG_FILES +sed -i "s/__NUM_SOCKETS__/${socket_count}/g" $RFM_CONFIG_FILES +sed -i "s/__NUM_CPUS_PER_CORE__/${threads_per_core}/g" $RFM_CONFIG_FILES +sed -i "s/__NUM_CPUS_PER_SOCKET__/${cores_per_socket}/g" $RFM_CONFIG_FILES + +# Workaround for https://github.com/EESSI/software-layer/pull/467#issuecomment-1973341966 +export PSM3_DEVICES='self,shm' # this is enough, since we only run single node for now + +# Check we can run reframe +reframe --version +if [[ $? -eq 0 ]]; then + echo_green "Succesfully ran 'reframe --version'" +else + fatal_error "Failed to run 'reframe --version'" +fi + +# List the tests we want to run +export REFRAME_ARGS='--tag CI --tag 1_node --nocolor' +echo "Listing tests: reframe ${REFRAME_ARGS} --list" +reframe ${REFRAME_ARGS} --list +if [[ $? -eq 0 ]]; then + echo_green "Succesfully listed ReFrame tests with command: reframe ${REFRAME_ARGS} --list" +else + fatal_error "Failed to list ReFrame tests with command: reframe ${REFRAME_ARGS} --list" +fi + +# Run all tests +echo "Running tests: reframe ${REFRAME_ARGS} --run" +reframe ${REFRAME_ARGS} --run +reframe_exit_code=$? +if [[ ${reframe_exit_code} -eq 0 ]]; then + echo_green "ReFrame runtime ran succesfully with command: reframe ${REFRAME_ARGS} --run." +else + fatal_error "ReFrame runtime failed to run with command: reframe ${REFRAME_ARGS} --run." +fi + +echo ">> Cleaning up ${TMPDIR}..." +rm -r ${TMPDIR} + +exit ${reframe_exit_code}