diff --git a/bot/check-result.sh b/bot/check-result.sh new file mode 100755 index 0000000000..182b8555a8 --- /dev/null +++ b/bot/check-result.sh @@ -0,0 +1,492 @@ +#!/bin/bash +# +# Script to check the result of building the EESSI software layer. +# Intended use is that it is called by a (batch) job running on a compute +# node. +# +# This script is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Thomas Roeblitz (@trz42) +# +# license: GPLv2 +# + +# result cases + +# - SUCCESS (all of) +# - working directory contains slurm-JOBID.out file +# - working directory contains eessi*tar.gz +# - no message ERROR +# - no message FAILED +# - no message ' required modules missing:' +# - one or more of 'No missing installations' +# - message regarding created tarball +# - FAILED (one of ... implemented as NOT SUCCESS) +# - no slurm-JOBID.out file +# - no tarball +# - message with ERROR +# - message with FAILED +# - message with ' required modules missing:' +# - no message regarding created tarball + +# stop as soon as something fails +# set -e + +TOPDIR=$(dirname $(realpath $0)) + +source ${TOPDIR}/../scripts/utils.sh +source ${TOPDIR}/../scripts/cfg_files.sh + +# defaults +export JOB_CFG_FILE="${JOB_CFG_FILE_OVERRIDE:=./cfg/job.cfg}" + +# check if ${JOB_CFG_FILE} exists +if [[ ! -r "${JOB_CFG_FILE}" ]]; then + echo_red "job config file (JOB_CFG_FILE=${JOB_CFG_FILE}) does not exist or not readable" +else + echo "bot/check-result.sh: showing ${JOB_CFG_FILE} from software-layer side" + cat ${JOB_CFG_FILE} + + echo "bot/check-result.sh: obtaining configuration settings from '${JOB_CFG_FILE}'" + cfg_load ${JOB_CFG_FILE} +fi + +display_help() { + echo "usage: $0 [OPTIONS]" + echo " OPTIONS:" + echo " -h | --help - display this usage information [default: false]" + echo " -v | --verbose - display more information [default: false]" +} + +# set defaults for command line arguments +VERBOSE=0 + +POSITIONAL_ARGS=() + +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + display_help + exit 0 + ;; + -v|--verbose) + VERBOSE=1 + shift 1 + ;; + --) + shift + POSITIONAL_ARGS+=("$@") # save positional args + break + ;; + -*|--*) + fatal_error "Unknown option: $1" "${CMDLINE_ARG_UNKNOWN_EXITCODE}" + ;; + *) # No more options + POSITIONAL_ARGS+=("$1") # save positional arg + shift + ;; + esac +done + +set -- "${POSITIONAL_ARGS[@]}" + +job_dir=${PWD} + +[[ ${VERBOSE} -ne 0 ]] && echo ">> analysing job in directory ${job_dir}" + +job_out="slurm-${SLURM_JOB_ID}.out" +[[ ${VERBOSE} -ne 0 ]] && echo ">> searching for job output file(s) matching '"${job_out}"'" +if [[ -f ${job_out} ]]; then + SLURM=1 + [[ ${VERBOSE} -ne 0 ]] && echo " found slurm output file '"${job_out}"'" +else + SLURM=0 + [[ ${VERBOSE} -ne 0 ]] && echo " Slurm output file '"${job_out}"' NOT found" +fi + +ERROR=-1 +if [[ ${SLURM} -eq 1 ]]; then + GP_error='ERROR: ' + grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_error}") + [[ $? -eq 0 ]] && ERROR=1 || ERROR=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_error}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}" +fi + +FAILED=-1 +if [[ ${SLURM} -eq 1 ]]; then + GP_failed='FAILED: ' + grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_failed}") + [[ $? -eq 0 ]] && FAILED=1 || FAILED=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_failed}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}" +fi + +MISSING=-1 +if [[ ${SLURM} -eq 1 ]]; then + GP_req_missing=' required modules missing:' + grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_req_missing}") + [[ $? -eq 0 ]] && MISSING=1 || MISSING=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_req_missing}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}" +fi + +NO_MISSING=-1 +if [[ ${SLURM} -eq 1 ]]; then + GP_no_missing='No missing installations' + grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_no_missing}") + [[ $? -eq 0 ]] && NO_MISSING=1 || NO_MISSING=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_no_missing}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}" +fi + +TGZ=-1 +TARBALL= +if [[ ${SLURM} -eq 1 ]]; then + GP_tgz_created="\.tar\.gz created!" + grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_tgz_created}" | sort -u) + if [[ $? -eq 0 ]]; then + TGZ=1 + TARBALL=$(echo ${grep_out} | sed -e 's@^.*/\(eessi[^/ ]*\) .*$@\1@') + else + TGZ=0 + fi + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_tgz_created}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}" +fi + +[[ ${VERBOSE} -ne 0 ]] && echo "SUMMARY: ${job_dir}/${job_out}" +[[ ${VERBOSE} -ne 0 ]] && echo " : ()" +[[ ${VERBOSE} -ne 0 ]] && echo " ERROR......: $([[ $ERROR -eq 1 ]] && echo 'yes' || echo 'no') (no)" +[[ ${VERBOSE} -ne 0 ]] && echo " FAILED.....: $([[ $FAILED -eq 1 ]] && echo 'yes' || echo 'no') (no)" +[[ ${VERBOSE} -ne 0 ]] && echo " REQ_MISSING: $([[ $MISSING -eq 1 ]] && echo 'yes' || echo 'no') (no)" +[[ ${VERBOSE} -ne 0 ]] && echo " NO_MISSING.: $([[ $NO_MISSING -eq 1 ]] && echo 'yes' || echo 'no') (yes)" +[[ ${VERBOSE} -ne 0 ]] && echo " TGZ_CREATED: $([[ $TGZ -eq 1 ]] && echo 'yes' || echo 'no') (yes)" + +job_result_file=_bot_job${SLURM_JOB_ID}.result + +if [[ ${SLURM} -eq 1 ]] && \ + [[ ${ERROR} -eq 0 ]] && \ + [[ ${FAILED} -eq 0 ]] && \ + [[ ${MISSING} -eq 0 ]] && \ + [[ ${NO_MISSING} -eq 1 ]] && \ + [[ ${TGZ} -eq 1 ]] && \ + [[ ! -z ${TARBALL} ]]; then + # SUCCESS + status="SUCCESS" + summary=":grin: SUCCESS" +else + # FAILURE + status="FAILURE" + summary=":cry: FAILURE" +fi + +### Example details/descriptions +# Note, final string must not contain any line breaks. Below example include +# line breaks for the sake of readability. In case of FAILURE, the structure is +# very similar (incl. information about Artefacts if any was produced), however, +# under Details some lines will be marked with :heavy_multiplication_x: +#
+# :grin: SUCCESS _(click triangle for details)_ +#
+#
_Details_
+#
+# :heavy_check_mark: job output file slurm-4682.out
+# :heavy_check_mark: no message matching ERROR:
+# :heavy_check_mark: no message matching FAILED:
+# :heavy_check_mark: no message matching required modules missing:
+# :heavy_check_mark: found message(s) matching No missing installations
+# :heavy_check_mark: found message matching tar.gz created!
+#
+#
_Artefacts_
+#
+#
+# eessi-2023.06-software-linux-x86_64-generic-1682696567.tar.gz +# size: 234 MiB (245366784 bytes)
+# entries: 1234
+# modules under _2023.06/software/linux/x86_64/generic/modules/all/_
+#
+#           GCC/9.3.0.lua
+# GCC/10.3.0.lua
+# OpenSSL/1.1.lua +#
+# software under _2023.06/software/linux/x86_64/generic/software/_ +#
+#           GCC/9.3.0/
+# CMake/3.20.1-GCCcore-10.3.0/
+# OpenMPI/4.1.1-GCC-10.3.0/ +#
+# other under _2023.06/software/linux/x86_64/generic/_ +#
+#           .lmod/cache/spiderT.lua
+# .lmod/cache/spiderT.luac_5.1
+# .lmod/cache/timestamp +#
+#
+#
+#
+#
+# +#
+# :cry: FAILURE _(click triangle for details)_ +#
+#
_Details_
+#
+# :heavy_check_mark: job output file slurm-4682.out
+# :heavy_multiplication_x: no message matching ERROR:
+# :heavy_check_mark: no message matching FAILED:
+# :heavy_multiplication_x: no message matching required modules missing:
+# :heavy_check_mark: found message(s) matching No missing installations
+# :heavy_check_mark: found message matching tar.gz created!
+#
+#
_Artefacts_
+#
+# No artefacts were created or found. +#
+#
+#
+### + +# construct and write complete PR comment details: implements third alternative +comment_template="
__SUMMARY_FMT__
__DETAILS_FMT____ARTEFACTS_FMT__
" +comment_summary_fmt="__SUMMARY__ _(click triangle for details)_" +comment_details_fmt="
_Details_
__DETAILS_LIST__
" +comment_success_item_fmt=":heavy_check_mark: __ITEM__" +comment_failure_item_fmt=":heavy_multiplication_x: __ITEM__" +comment_artefacts_fmt="
_Artefacts_
__ARTEFACTS_LIST__
" +comment_artefact_details_fmt="
__ARTEFACT_SUMMARY____ARTEFACT_DETAILS__
" + +function print_br_item() { + format="${1}" + item="${2}" + echo -n "${format//__ITEM__/${item}}
" +} + +function print_br_item2() { + format="${1}" + item="${2}" + item2="${3}" + format1="${format//__ITEM__/${item}}" + echo -n "${format1//__ITEM2__/${item2}}
" +} + +function print_code_item() { + format="${1}" + item="${2}" + echo -n "${format//__ITEM__/${item}}" +} + +function print_dd_item() { + format="${1}" + item="${2}" + echo -n "
${format//__ITEM__/${item}}
" +} + +function print_list_item() { + format="${1}" + item="${2}" + echo -n "
  • ${format//__ITEM__/${item}}
  • " +} + +function print_pre_item() { + format="${1}" + item="${2}" + echo -n "
    ${format//__ITEM__/${item}}
    " +} + +function success() { + format="${comment_success_item_fmt}" + item="$1" + print_br_item "${format}" "${item}" +} + +function failure() { + format="${comment_failure_item_fmt}" + item="$1" + print_br_item "${format}" "${item}" +} + +function add_detail() { + actual=${1} + expected=${2} + success_msg="${3}" + failure_msg="${4}" + if [[ ${actual} -eq ${expected} ]]; then + success "${success_msg}" + else + failure "${failure_msg}" + fi +} + +echo "[RESULT]" > ${job_result_file} +echo -n "comment_description = " >> ${job_result_file} + +# construct values for placeholders in comment_template: +# - __SUMMARY_FMT__ -> variable $comment_summary +# - __DETAILS_FMT__ -> variable $comment_details +# - __ARTEFACTS_FMT__ -> variable $comment_artefacts + +comment_summary="${comment_summary_fmt/__SUMMARY__/${summary}}" + +# first construct comment_details_list, abbreviated CoDeList +# then use it to set comment_details +CoDeList="" + +success_msg="job output file ${job_out}" +failure_msg="no job output file ${job_out}" +CoDeList=${CoDeList}$(add_detail ${SLURM} 1 "${success_msg}" "${failure_msg}") + +success_msg="no message matching ${GP_error}" +failure_msg="found message matching ${GP_error}" +CoDeList=${CoDeList}$(add_detail ${ERROR} 0 "${success_msg}" "${failure_msg}") + +success_msg="no message matching ${GP_failed}" +failure_msg="found message matching ${GP_failed}" +CoDeList=${CoDeList}$(add_detail ${FAILED} 0 "${success_msg}" "${failure_msg}") + +success_msg="no message matching ${GP_req_missing}" +failure_msg="found message matching ${GP_req_missing}" +CoDeList=${CoDeList}$(add_detail ${MISSING} 0 "${success_msg}" "${failure_msg}") + +success_msg="found message(s) matching ${GP_no_missing}" +failure_msg="no message matching ${GP_no_missing}" +CoDeList=${CoDeList}$(add_detail ${NO_MISSING} 1 "${success_msg}" "${failure_msg}") + +success_msg="found message matching ${GP_tgz_created}" +failure_msg="no message matching ${GP_tgz_created}" +CoDeList=${CoDeList}$(add_detail ${TGZ} 1 "${success_msg}" "${failure_msg}") + +comment_details="${comment_details_fmt/__DETAILS_LIST__/${CoDeList}}" + + +# first construct comment_artefacts_list, abbreviated CoArList +# then use it to set comment_artefacts +CoArList="" + +# TARBALL should only contain a single tarball +if [[ ! -z ${TARBALL} ]]; then + # Example of the detailed information for a tarball. The actual result MUST be a + # single line (no '\n') or it would break the structure of the markdown table + # that holds status updates of a bot job. + # + #
    + #
    + # eessi-2023.06-software-linux-x86_64-generic-1682696567.tar.gz + # size: 234 MiB (245366784 bytes)
    + # entries: 1234
    + # modules under _2023.06/software/linux/x86_64/intel/cascadelake/modules/all/_
    + #
    +    #       GCC/9.3.0.lua
    + # GCC/10.3.0.lua
    + # OpenSSL/1.1.lua + #
    + # software under _2023.06/software/linux/x86_64/intel/cascadelake/software/_ + #
    +    #       GCC/9.3.0/
    + # CMake/3.20.1-GCCcore-10.3.0/
    + # OpenMPI/4.1.1-GCC-10.3.0/ + #
    + # other under _2023.06/software/linux/x86_64/intel/cascadelake/_ + #
    +    #       .lmod/cache/spiderT.lua
    + # .lmod/cache/spiderT.luac_5.1
    + # .lmod/cache/timestamp + #
    + #
    + #
    + size="$(stat --dereference --printf=%s ${TARBALL})" + size_mib=$((${size} >> 20)) + tmpfile=$(mktemp --tmpdir=. tarfiles.XXXX) + tar tf ${TARBALL} > ${tmpfile} + entries=$(cat ${tmpfile} | wc -l) + # determine prefix from job config: VERSION/software/OS_TYPE/CPU_FAMILY/ARCHITECTURE + # e.g., 2023.06/software/linux/x86_64/intel/skylake_avx512 + # cfg/job.cfg contains (only the attributes to be used are shown below): + # [repository] + # repo_version = 2023.06 + # [architecture] + # os_type = linux + # software_subdir = x86_64/intel/skylake_avx512 + repo_version=$(cfg_get_value "repository" "repo_version") + os_type=$(cfg_get_value "architecture" "os_type") + software_subdir=$(cfg_get_value "architecture" "software_subdir") + prefix="${repo_version}/software/${os_type}/${software_subdir}" + + # extract directories/entries from tarball content + modules_entries=$(grep "${prefix}/modules" ${tmpfile}) + software_entries=$(grep "${prefix}/software" ${tmpfile}) + other_entries=$(cat ${tmpfile} | grep -v "${prefix}/modules" | grep -v "${prefix}/software") + other_shortened=$(echo "${other_entries}" | sed -e "s@^.*${prefix}/@@" | sort -u) + modules=$(echo "${modules_entries}" | grep "/all/.*/.*lua$" | sed -e 's@^.*/\([^/]*/[^/]*.lua\)$@\1@' | sort -u) + software_pkgs=$(echo "${software_entries}" | sed -e "s@${prefix}/software/@@" | awk -F/ '{if (NR >= 2) {print $1 "/" $2}}' | sort -u) + + artefact_summary="$(print_code_item '__ITEM__' ${TARBALL})" + CoArList="" + CoArList="${CoArList}$(print_br_item2 'size: __ITEM__ MiB (__ITEM2__ bytes)' ${size_mib} ${size})" + CoArList="${CoArList}$(print_br_item 'entries: __ITEM__' ${entries})" + CoArList="${CoArList}$(print_br_item 'modules under ___ITEM___' ${prefix}/modules/all)" + CoArList="${CoArList}
    "
    +    if [[ ! -z ${modules} ]]; then
    +        while IFS= read -r mod ; do
    +            CoArList="${CoArList}$(print_br_item '__ITEM__' ${mod})"
    +        done <<< "${modules}"
    +    else
    +        CoArList="${CoArList}$(print_br_item '__ITEM__' 'no module files in tarball')"
    +    fi
    +    CoArList="${CoArList}
    " + CoArList="${CoArList}$(print_br_item 'software under ___ITEM___' ${prefix}/software)" + CoArList="${CoArList}
    "
    +    if [[ ! -z ${software_pkgs} ]]; then
    +        while IFS= read -r sw_pkg ; do
    +            CoArList="${CoArList}$(print_br_item '__ITEM__' ${sw_pkg})"
    +        done <<< "${software_pkgs}"
    +    else
    +        CoArList="${CoArList}$(print_br_item '__ITEM__' 'no software packages in tarball')"
    +    fi
    +    CoArList="${CoArList}
    " + CoArList="${CoArList}$(print_br_item 'other under ___ITEM___' ${prefix})" + CoArList="${CoArList}
    "
    +    if [[ ! -z ${other_shortened} ]]; then
    +        while IFS= read -r other ; do
    +            CoArList="${CoArList}$(print_br_item '__ITEM__' ${other})"
    +        done <<< "${other_shortened}"
    +    else
    +        CoArList="${CoArList}$(print_br_item '__ITEM__' 'no other files in tarball')"
    +    fi
    +    CoArList="${CoArList}
    " +else + CoArList="${CoArList}$(print_dd_item 'No artefacts were created or found.' '')" +fi + +comment_artefacts_details="${comment_artefact_details_fmt/__ARTEFACT_SUMMARY__/${artefact_summary}}" +comment_artefacts_details="${comment_artefacts_details/__ARTEFACT_DETAILS__/${CoArList}}" +comment_artefacts="${comment_artefacts_fmt/__ARTEFACTS_LIST__/${comment_artefacts_details}}" + +# now put all pieces together creating comment_details from comment_template +comment_description=${comment_template/__SUMMARY_FMT__/${comment_summary}} +comment_description=${comment_description/__DETAILS_FMT__/${comment_details}} +comment_description=${comment_description/__ARTEFACTS_FMT__/${comment_artefacts}} + +echo "${comment_description}" >> ${job_result_file} + +# add overall result: SUCCESS, FAILURE, UNKNOWN + artefacts +# - this should make use of subsequent steps such as deploying a tarball more +# efficient +echo "status = ${status}" >> ${job_result_file} +echo "artefacts = " >> ${job_result_file} +echo "${TARBALL}" | sed -e 's/^/ /g' >> ${job_result_file} + +# remove tmpfile +if [[ -f ${tmpfile} ]]; then + rm ${tmpfile} +fi + +# exit script with value that reflects overall job result: SUCCESS (0), FAILURE (1) +test "${status}" == "SUCCESS" +exit $? diff --git a/check_missing_installations.sh b/check_missing_installations.sh index 4a5316c09f..3627d1d0b5 100755 --- a/check_missing_installations.sh +++ b/check_missing_installations.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Script to check for missing installations in EESSI pilot software stack (version 2021.12) +# Script to check for missing installations in EESSI pilot software stack (version 2023.06) # # author: Kenneth Hoste (@boegel) # author: Thomas Roeblitz (@trz42) @@ -10,10 +10,11 @@ TOPDIR=$(dirname $(realpath $0)) -if [ -z ${EESSI_PILOT_VERSION} ]; then - echo "ERROR: \${EESSI_PILOT_VERSION} must be set to run $0!" >&2 +if [ $# -ne 1 ]; then + echo "ERROR: Usage: $0 " >&2 exit 1 fi +easystack=$1 LOCAL_TMPDIR=$(mktemp -d) @@ -23,9 +24,7 @@ source $TOPDIR/configure_easybuild echo ">> Checking for missing installations in ${EASYBUILD_INSTALLPATH}..." eb_missing_out=$LOCAL_TMPDIR/eb_missing.out -# we need to use --from-pr to pull in some easyconfigs that are not available in EasyBuild version being used -# PR #16531: Nextflow-22.10.1.eb -${EB:-eb} --from-pr 16531 --easystack eessi-${EESSI_PILOT_VERSION}.yml --experimental --missing 2>&1 | tee ${eb_missing_out} +${EB:-eb} --easystack ${easystack} --missing 2>&1 | tee ${eb_missing_out} exit_code=${PIPESTATUS[0]} ok_msg="Command 'eb --missing ...' succeeded, analysing output..." diff --git a/configure_easybuild b/configure_easybuild index 245553f342..23f3920154 100644 --- a/configure_easybuild +++ b/configure_easybuild @@ -36,3 +36,6 @@ fi export EASYBUILD_FILTER_DEPS=$DEPS_TO_FILTER export EASYBUILD_MODULE_EXTENSIONS=1 + +# need to enable use of experimental features, since we're using easystack files +export EASYBUILD_EXPERIMENTAL=1