Skip to content

Commit

Permalink
try to run reframe tests with bot
Browse files Browse the repository at this point in the history
  • Loading branch information
bedroge committed Jan 7, 2025
1 parent 842ae7b commit 92b6192
Show file tree
Hide file tree
Showing 2 changed files with 381 additions and 0 deletions.
238 changes: 238 additions & 0 deletions bot/check-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
#!/bin/bash
#
# Dummy script that only creates test result file for the bot, without actually checking anything
#
# This script is part of the EESSI software layer, see
# https://github.com/EESSI/software-layer.git
#
# author: Kenneth Hoste (HPC-UGent)
#
# license: GPLv2
#
job_dir=${PWD}
job_out="slurm-${SLURM_JOB_ID}.out"
job_test_result_file="_bot_job${SLURM_JOB_ID}.test"

# Check that job output file is found
[[ ${VERBOSE} -ne 0 ]] && echo ">> searching for job output file(s) matching '"${job_out}"'"
if [[ -f ${job_out} ]]; then
SLURM_OUTPUT_FOUND=1
[[ ${VERBOSE} -ne 0 ]] && echo " found slurm output file '"${job_out}"'"
else
SLURM_OUTPUT_FOUND=0
[[ ${VERBOSE} -ne 0 ]] && echo " Slurm output file '"${job_out}"' NOT found"
fi

# ReFrame prints e.g.
#[----------] start processing checks
#[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:rome+default
#[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:genoa+default
#[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=1_cpn_2_nodes %module_name=GROMACS/2021.3-foss-2021a /f4194106 @snellius:genoa+default
#[ FAIL ] (1/3) GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:genoa+default
#==> test failed during 'sanity': test staged in '/scratch-shared/casparl/reframe_output/staging/snellius/genoa/default/GROMACS_EESSI_d597cff4'
#[ OK ] (2/3) GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:rome+default
#P: perf: 8.441 ns/day (r:0, l:None, u:None)
#[ FAIL ] (3/3) GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=1_cpn_2_nodes %module_name=GROMACS/2021.3-foss-2021a /f4194106 @snellius:genoa+default
#==> test failed during 'sanity': test staged in '/scratch-shared/casparl/reframe_output/staging/snellius/genoa/default/GROMACS_EESSI_f4194106'
#[----------] all spawned checks have finished
#[ FAILED ] Ran 3/3 test case(s) from 2 check(s) (2 failure(s), 0 skipped, 0 aborted)

# We will grep for the last and final line, since this reflects the overall result
# Specifically, we grep for FAILED, since this is also what we print if a step in the test script itself fails
FAILED=-1
if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then
GP_failed='\[\s*FAILED\s*\].*Ran .* test case'
grep_reframe_failed=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_failed}")
[[ $? -eq 0 ]] && FAILED=1 || FAILED=0
# have to be careful to not add searched for pattern into slurm out file
[[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_failed}"'"
[[ ${VERBOSE} -ne 0 ]] && echo "${grep_reframe_failed}"
fi

# Here, we grep for 'ERROR:', which is printed if a fatal_error is encountered when executing the test step
# I.e. this is an error in execution of the run_tests.sh itself, NOT in running the actual tests
ERROR=-1
if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then
GP_error='ERROR: '
grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_error}")
[[ $? -eq 0 ]] && ERROR=1 || ERROR=0
# have to be careful to not add searched for pattern into slurm out file
[[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_error}"'"
[[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}"
fi

SUCCESS=-1
# Grep for the success pattern, so we can report the amount of tests run
if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then
GP_success='\[\s*PASSED\s*\].*Ran .* test case'
grep_reframe_success=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_success}")
[[ $? -eq 0 ]] && SUCCESS=1 || SUCCESS=0
# have to be careful to not add searched for pattern into slurm out file
[[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_success}"'"
[[ ${VERBOSE} -ne 0 ]] && echo "${grep_reframe_success}"
fi

if [[ ! -z ${grep_reframe_failed} ]]; then
grep_reframe_result=${grep_reframe_failed}
else
# Grep the entire output of ReFrame, so that we can report it in the foldable section of the test report
GP_success_full='(?s)\[----------\] start processing checks.*?\[==========\] Finished on [a-zA-Z0-9 ]*'
# Grab the full ReFrame report, than cut the irrelevant parts
# Note that the character limit for messages in github is around 65k, so cutting is important
grep_reframe_success_full=$( \
grep -v "^>> searching for " ${job_dir}/${job_out} | \
# Use -z
grep -Pzo "${GP_success_full}" | \
# Replace null character with newline, to undo the -z option
sed 's/\x00/\n/g' | \
# Remove the [ RUN ] lines from reframe, they are not very informative
grep -v -P '\[\s*RUN\s*]' | \
# Remove the line '[----------] all spawned checks have finished'
grep -v '\[-*\]' | \
# Remove the line '[==========] Finished on Mon Oct 7 21'
grep -v '\[=*\]' | \
# Remove blank line(s) from the report
grep -v '^$' | \
# Remove warnings about the local spawner not supporting memory requests
grep -v 'WARNING\: hooks\.req_memory_per_node does not support the scheduler you configured .local.*$' | \
# Strip color coding characters
sed 's/\x1B\[[0-9;]*m//g' | \
# Replace all newline characters with <br/>
sed ':a;N;$!ba;s/\n/<br\/>/g' | \
# Replace % with %%. Use \%\% to interpret both %% as (non-special) characters
sed 's/\%/\%\%/g' \
)
# TODO (optional): we could impose a character limit here, and truncate if too long
# (though we should do that before inserting the <br/> statements).
# If we do, we should probably re-append the final summary, e.g.
# [ PASSED ] Ran 10/10 test case(s) from 10 check(s) (0 failure(s), 0 skipped, 0 aborted)
# so that that is always displayed
# However, that's not implemented yet - let's see if this ever even becomes an issue
grep_reframe_result=${grep_reframe_success_full}
fi
echo "grep_reframe_result: ${grep_reframe_result}"

echo "[TEST]" > ${job_test_result_file}
if [[ ${SLURM_OUTPUT_FOUND} -eq 0 ]]; then
summary=":cry: FAILURE"
reason="Job output file not found, cannot check test results."
status="FAILURE"
# Should come before general errors: if SUCCESS==1, it indicates the test suite ran succesfully
# regardless of other things that might have gone wrong
elif [[ ${SUCCESS} -eq 1 ]]; then
summary=":grin: SUCCESS"
reason=""
status="SUCCESS"
# Should come before general errors: if FAILED==1, it indicates the test suite ran
# otherwise the pattern wouldn't have been there
elif [[ ${FAILED} -eq 1 ]]; then
summary=":cry: FAILURE"
reason="EESSI test suite produced failures."
status="FAILURE"
elif [[ ${ERROR} -eq 1 ]]; then
summary=":cry: FAILURE"
reason="EESSI test suite was not run, test step itself failed to execute."
status="FAILURE"
else
summary=":cry: FAILURE"
reason="Failed for unknown reason"
status="FAILURE"
fi


echo "[TEST]" > ${job_test_result_file}
echo -n "comment_description = " >> ${job_test_result_file}

# Use template for writing PR comment with details
# construct and write complete PR comment details: implements third alternative
comment_template="<details>__SUMMARY_FMT__<dl>__REASON_FMT____REFRAME_FMT____DETAILS_FMT__</dl></details>"
comment_success_item_fmt=":white_check_mark: __ITEM__"
comment_failure_item_fmt=":x: __ITEM__"

# Initialize comment_description
comment_description=${comment_template}

# Now, start replacing template items one by one
comment_summary_fmt="<summary>__SUMMARY__ _(click triangle for details)_</summary>"
comment_summary="${comment_summary_fmt/__SUMMARY__/${summary}}"
comment_description=${comment_description/__SUMMARY_FMT__/${comment_summary}}


# Only add if there is a reason (e.g. no reason for successful runs)
if [[ ! -z ${reason} ]]; then
comment_reason_fmt="<dt>_Reason_</dt><dd>__REASONS__</dd>"
reason_details="${comment_reason_fmt/__REASONS__/${reason}}"
comment_description=${comment_description/__REASON_FMT__/${reason_details}}
else
comment_description=${comment_description/__REASON_FMT__/""}
fi

# Only add if there is a reframe summary (e.g. no reframe summary if reframe wasn't launched succesfully)
echo "ReFrame result:"
echo "${grep_reframe_result}"
if [[ ! -z ${grep_reframe_result} ]]; then
comment_reframe_fmt="<dt>_ReFrame Summary_</dt><dd>__REFRAME_SUMMARY__</dd>"
reframe_summary=${comment_reframe_fmt/__REFRAME_SUMMARY__/${grep_reframe_result}}
comment_description=${comment_description/__REFRAME_FMT__/${reframe_summary}}
else
comment_description=${comment_description/__REFRAME_FMT__/""}
fi

# Declare functions
function print_br_item() {
format="${1}"
item="${2}"
echo -n "${format//__ITEM__/${item}}<br/>"
}

function success() {
format="${comment_success_item_fmt}"
item="$1"
print_br_item "${format}" "${item}"
}

function failure() {
format="${comment_failure_item_fmt}"
item="$1"
print_br_item "${format}" "${item}"
}

function add_detail() {
actual=${1}
expected=${2}
success_msg="${3}"
failure_msg="${4}"
if [[ ${actual} -eq ${expected} ]]; then
success "${success_msg}"
else
failure "${failure_msg}"
fi
}

# first construct comment_details_list, abbreviated comment_details_list
# then use it to set comment_details
comment_details_list=""

success_msg="job output file <code>${job_out}</code>"
failure_msg="no job output file <code>${job_out}</code>"
comment_details_list=${comment_details_list}$(add_detail ${SLURM_OUTPUT_FOUND} 1 "${success_msg}" "${failure_msg}")

success_msg="no message matching <code>${GP_error}</code>"
failure_msg="found message matching <code>${GP_error}</code>"
comment_details_list=${comment_details_list}$(add_detail ${ERROR} 0 "${success_msg}" "${failure_msg}")

# Add an escape character to every *, for it to be printed correctly in the comment on GitHub
GP_failed="${GP_failed//\*/\\*}"
success_msg="no message matching <code>""${GP_failed}""</code>"
failure_msg="found message matching <code>""${GP_failed}""</code>"
comment_details_list=${comment_details_list}$(add_detail ${FAILED} 0 "${success_msg}" "${failure_msg}")

comment_details_fmt="<dt>_Details_</dt><dd>__DETAILS_LIST__</dd>"
comment_details="${comment_details_fmt/__DETAILS_LIST__/${comment_details_list}}"
comment_description=${comment_description/__DETAILS_FMT__/${comment_details}}

# Actually writing the comment description to the result file
echo "${comment_description}" >> ${job_test_result_file}
echo "status = ${status}" >> ${job_test_result_file}

exit 0
143 changes: 143 additions & 0 deletions bot/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/usr/bin/env bash
#
# Script to run tests for the whole EESSI compatibility software layer.
# Intended use is that it is called at the end of a (batch) job running on a compute node.
#
# This script is part of the EESSI compatibility layer, see
# https://github.com/EESSI/compatibility-layer.git
#
# author: Thomas Roeblitz (@trz42)
# author: Caspar van Leeuwen (@casparvl)
# author: Bob Dröge (@bedroge)
#
# license: GPLv2
#

# ASSUMPTIONs:
# + assumption for the build step (as run through bot/build.sh which is provided
# in this repository too)
# - working directory has been prepared by the bot with a checkout of a
# pull request (OR by some other means)
# - the working directory contains a directory 'cfg' where the main config
# file 'job.cfg' has been deposited
# - the directory may contain any additional files referenced in job.cfg
# + assumptions for the test step
# - temporary storage is still available
# example
# Using /tmp/bot/EESSI/eessi.7l3zm2x7qH as temporary storage...
# - run test/compat_layer.py with ReFrame inside build container using tmp storage from build step
# plus possibly additional settings (repo, etc.)

# stop as soon as something fails
set -e

# source utils.sh and cfg_files.sh
source scripts/utils.sh
source scripts/cfg_files.sh

# defaults
export JOB_CFG_FILE="${JOB_CFG_FILE_OVERRIDE:=./cfg/job.cfg}"
HOST_ARCH=$(uname -m)

# check if ${JOB_CFG_FILE} exists
if [[ ! -r "${JOB_CFG_FILE}" ]]; then
fatal_error "job config file (JOB_CFG_FILE=${JOB_CFG_FILE}) does not exist or not readable"
fi
echo "bot/test.sh: showing ${JOB_CFG_FILE} from software-layer side"
cat ${JOB_CFG_FILE}

echo "bot/test.sh: obtaining configuration settings from '${JOB_CFG_FILE}'"
cfg_load ${JOB_CFG_FILE}

# if http_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $http_proxy
HTTP_PROXY=$(cfg_get_value "site_config" "http_proxy")
HTTP_PROXY=${HTTP_PROXY:-${http_proxy}}
echo "bot/test.sh: HTTP_PROXY='${HTTP_PROXY}'"

# if https_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $https_proxy
HTTPS_PROXY=$(cfg_get_value "site_config" "https_proxy")
HTTPS_PROXY=${HTTPS_PROXY:-${https_proxy}}
echo "bot/test.sh: HTTPS_PROXY='${HTTPS_PROXY}'"

LOCAL_TMP=$(cfg_get_value "site_config" "local_tmp")
echo "bot/test.sh: LOCAL_TMP='${LOCAL_TMP}'"

# try to determine tmp directory from build job
EESSI_TMPDIR=$(grep -oP "To resume work add '--resume \K.*(?=')" slurm-${SLURM_JOBID}.out)

if [[ -z ${EESSI_TMPDIR} ]]; then
echo "bot/test.sh: no information about tmp directory build step; --> giving up"
exit 2
fi

# obtain list of modules to be loaded
LOAD_MODULES=$(cfg_get_value "site_config" "load_modules")
echo "bot/test.sh: LOAD_MODULES='${LOAD_MODULES}'"

# load modules if LOAD_MODULES is not empty
if [[ ! -z ${LOAD_MODULES} ]]; then
for mod in $(echo ${LOAD_MODULES} | tr ',' '\n')
do
echo "bot/test.sh: loading module '${mod}'"
module load ${mod}
done
else
echo "bot/test.sh: no modules to be loaded"
fi

cpu_target_arch=$(cfg_get_value "architecture" "software_subdir" | cut -d/ -f1)
host_arch=$(uname -m)
eessi_arch=${cpu_target_arch:-${host_arch}}
eessi_os=linux
job_version=$(cfg_get_value "repository" "repo_version")
eessi_version=${job_version:-2023.06}
job_repo=$(cfg_get_value "repository" "repo_name")
eessi_repo=${job_repo:-software.eessi.io}
tar_topdir=/cvmfs/${eessi_repo}/versions

if [ "${eessi_arch}" != "${host_arch}" ]; then
echo "Requested architecture (${eessi_arch}) is different from this machine's architecture ($(uname -m))!"
exit 1
fi

RUNTIME=$(get_container_runtime)
exit_code=$?
[[ ${VERBOSE} == '-vvv' ]] && echo "RUNTIME='${RUNTIME}'"
check_exit_code ${exit_code} "using runtime ${RUNTIME}" "oh no, neither apptainer nor singularity available"

# Set up paths and mount points for Apptainer
if [[ -z ${APPTAINER_CACHEDIR} ]]; then
export APPTAINER_CACHEDIR=${EESSI_TMPDIR}/apptainer_cache
[[ ${VERBOSE} == '-vvv' ]] && echo "APPTAINER_CACHEDIR='${APPTAINER_CACHEDIR}'"
fi
export APPTAINER_BIND="${EESSI_TMPDIR}/cvmfs:/cvmfs,${PWD}:/compatibility-layer"
export APPTAINER_BIND="${APPTAINER_BIND},${EESSI_TMPDIR}/tmp:/tmp"
[[ ${VERBOSE} == '-vvv' ]] && echo "APPTAINER_BIND='${APPTAINER_BIND}'"
export APPTAINER_HOME="${EESSI_TMPDIR}/home:/home/${USER}"
[[ ${VERBOSE} == '-vvv' ]] && echo "APPTAINER_HOME='${APPTAINER_HOME}'"

# also define SINGULARITY_* env vars
if [[ -z ${SINGULARITY_CACHEDIR} ]]; then
export SINGULARITY_CACHEDIR=${EESSI_TMPDIR}/apptainer_cache
[[ ${VERBOSE} == '-vvv' ]] && echo "SINGULARITY_CACHEDIR='${SINGULARITY_CACHEDIR}'"
fi
export SINGULARITY_BIND="${EESSI_TMPDIR}/cvmfs:/cvmfs,${PWD}:/compatibility-layer"
export SINGULARITY_BIND="${SINGULARITY_BIND},${EESSI_TMPDIR}/tmp:/tmp"
[[ ${VERBOSE} == '-vvv' ]] && echo "SINGULARITY_BIND='${SINGULARITY_BIND}'"
export SINGULARITY_HOME="${EESSI_TMPDIR}/home:/home/${USER}"
[[ ${VERBOSE} == '-vvv' ]] && echo "SINGULARITY_HOME='${SINGULARITY_HOME}'"

CONTAINER=docker://ghcr.io/eessi/bootstrap-prefix:debian11

${RUNTIME} shell ${CONTAINER} <<EOF
pip3 install --ignore-installed --prefix=/tmp/reframe reframe-hpc
export PYTHONPATH=/tmp/reframe/lib/python3.9/site-packages
export EESSI_REPO_DIR="/cvmfs/${eessi_repo}"
export EESSI_VERSION=${eessi_version}
export EESSI_ARCH=${host_arch}
export EESSI_OS=linux
export RFM_PREFIX=/compatibility-layer/reframe_runs
/tmp/reframe/bin/reframe --nocolor -r -v -c /compatibility-layer/test/compat_layer.py
EOF

exit 0

0 comments on commit 92b6192

Please sign in to comment.