diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8e74a4e844..faa7eb82ff 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -22,7 +22,7 @@ jobs: - name: install Python packages run: | - pip install archspec + pip install archspec==0.2.2 - name: test eessi_software_subdir.py script run: | diff --git a/.github/workflows/tests_init.yml b/.github/workflows/tests_init.yml index 38ccbbad31..053acb9730 100644 --- a/.github/workflows/tests_init.yml +++ b/.github/workflows/tests_init.yml @@ -22,7 +22,7 @@ jobs: - name: install Python packages run: | - pip install archspec pytest + pip install archspec==0.2.2 pytest - name: unit tests for eessi_software_subdir_for_host.py script run: diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 69de9d1997..75b4f71178 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -237,7 +237,7 @@ else copy_build_log "${eb_last_log}" "${build_logs_dir}" fi - $TOPDIR/check_missing_installations.sh ${TOPDIR}/${easystack_file} + $TOPDIR/check_missing_installations.sh ${TOPDIR}/${easystack_file} ${TOPDIR}/${pr_diff} else fatal_error "Easystack file ${easystack_file} not found!" fi diff --git a/bot/check-build.sh b/bot/check-build.sh index ec1ca56bba..d8246c67be 100755 --- a/bot/check-build.sh +++ b/bot/check-build.sh @@ -98,15 +98,15 @@ job_dir=${PWD} job_out="slurm-${SLURM_JOB_ID}.out" [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for job output file(s) matching '"${job_out}"'" if [[ -f ${job_out} ]]; then - SLURM=1 + SLURM_OUTPUT_FOUND=1 [[ ${VERBOSE} -ne 0 ]] && echo " found slurm output file '"${job_out}"'" else - SLURM=0 + SLURM_OUTPUT_FOUND=0 [[ ${VERBOSE} -ne 0 ]] && echo " Slurm output file '"${job_out}"' NOT found" fi ERROR=-1 -if [[ ${SLURM} -eq 1 ]]; then +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then GP_error='ERROR: ' grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_error}") [[ $? -eq 0 ]] && ERROR=1 || ERROR=0 @@ -116,7 +116,7 @@ if [[ ${SLURM} -eq 1 ]]; then fi FAILED=-1 -if [[ ${SLURM} -eq 1 ]]; then +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then GP_failed='FAILED: ' grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_failed}") [[ $? -eq 0 ]] && FAILED=1 || FAILED=0 @@ -126,7 +126,7 @@ if [[ ${SLURM} -eq 1 ]]; then fi MISSING=-1 -if [[ ${SLURM} -eq 1 ]]; then +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then GP_req_missing=' required modules missing:' grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_req_missing}") [[ $? -eq 0 ]] && MISSING=1 || MISSING=0 @@ -136,7 +136,7 @@ if [[ ${SLURM} -eq 1 ]]; then fi NO_MISSING=-1 -if [[ ${SLURM} -eq 1 ]]; then +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then GP_no_missing='No missing installations' grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_no_missing}") [[ $? -eq 0 ]] && NO_MISSING=1 || NO_MISSING=0 @@ -147,7 +147,7 @@ fi TGZ=-1 TARBALL= -if [[ ${SLURM} -eq 1 ]]; then +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then GP_tgz_created="\.tar\.gz created!" grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_tgz_created}" | sort -u) if [[ $? -eq 0 ]]; then @@ -169,9 +169,27 @@ fi [[ ${VERBOSE} -ne 0 ]] && echo " NO_MISSING.: $([[ $NO_MISSING -eq 1 ]] && echo 'yes' || echo 'no') (yes)" [[ ${VERBOSE} -ne 0 ]] && echo " TGZ_CREATED: $([[ $TGZ -eq 1 ]] && echo 'yes' || echo 'no') (yes)" +# Here, we try to do some additional analysis on the output file +# to see if we can print a more clear 'reason' for the failure +# For now, we only analyse unmerged EasyConfigs as potential cause, but we can easily add checks for other +# specific scenarios below + +# Check for the pattern being added here by check_missing_installations.sh to the output to +# see if EasyConfigs might have been unmerged, and that's causing a failure +UNMERGED_EASYCONFIG=-1 +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + gp_unmerged="are you sure all PRs referenced have been merged in EasyBuild" + grep_unmerged=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${gp_unmerged}") + [[ $? -eq 0 ]] && UNMERGED_EASYCONFIG=1 || UNMERGED_EASYCONFIG=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${gp_unmerged}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_unmerged}" +fi + job_result_file=_bot_job${SLURM_JOB_ID}.result -if [[ ${SLURM} -eq 1 ]] && \ +# Default reason: +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]] && \ [[ ${ERROR} -eq 0 ]] && \ [[ ${FAILED} -eq 0 ]] && \ [[ ${MISSING} -eq 0 ]] && \ @@ -180,10 +198,17 @@ if [[ ${SLURM} -eq 1 ]] && \ [[ ! -z ${TARBALL} ]]; then # SUCCESS status="SUCCESS" + reason="" summary=":grin: SUCCESS" +elif [[ ${UNMERGED_EASYCONFIG} -eq 1 ]]; then + status="FAILURE" + reason="EasyConfig not found during missing installation check. Are you sure all PRs referenced have been merged in EasyBuild?" + summary=":cry: FAILURE" else # FAILURE status="FAILURE" + # General failure, we don't know a more specific reason + reason="" summary=":cry: FAILURE" fi @@ -253,14 +278,6 @@ fi # ### -# construct and write complete PR comment details: implements third alternative -comment_template="
__SUMMARY_FMT__
__DETAILS_FMT____ARTEFACTS_FMT__
" -comment_summary_fmt="__SUMMARY__ _(click triangle for details)_" -comment_details_fmt="
_Details_
__DETAILS_LIST__
" -comment_success_item_fmt=":white_check_mark: __ITEM__" -comment_failure_item_fmt=":x: __ITEM__" -comment_artefacts_fmt="
_Artefacts_
__ARTEFACTS_LIST__
" -comment_artefact_details_fmt="
__ARTEFACT_SUMMARY____ARTEFACT_DETAILS__
" function print_br_item() { format="${1}" @@ -332,42 +349,66 @@ echo -n "comment_description = " >> ${job_result_file} # - __DETAILS_FMT__ -> variable $comment_details # - __ARTEFACTS_FMT__ -> variable $comment_artefacts +# construct and write complete PR comment details: implements third alternative +comment_template="
__SUMMARY_FMT__
__REASON_FMT____DETAILS_FMT____ARTEFACTS_FMT__
" +comment_success_item_fmt=":white_check_mark: __ITEM__" +comment_failure_item_fmt=":x: __ITEM__" + +# Initialize comment_description +comment_description=${comment_template} + +# Now, start replacing template items one by one +# Replace the summary template (__SUMMARY_FMT__) +comment_summary_fmt="__SUMMARY__ _(click triangle for details)_" comment_summary="${comment_summary_fmt/__SUMMARY__/${summary}}" +comment_description=${comment_template/__SUMMARY_FMT__/${comment_summary}} + +# Only add if there is a reason (e.g. no reason for successful runs) +if [[ ! -z ${reason} ]]; then + comment_reason_fmt="
_Reason_
__REASONS__
" + reason_details="${comment_reason_fmt/__REASONS__/${reason}}" + comment_description=${comment_description/__REASON_FMT__/${reason_details}} +else + comment_description=${comment_description/__REASON_FMT__/""} +fi -# first construct comment_details_list, abbreviated CoDeList +# Replace the details template (__DETAILS_FMT__) +# first construct comment_details_list, abbreviated comment_details_list # then use it to set comment_details -CoDeList="" +comment_details_list="" success_msg="job output file ${job_out}" failure_msg="no job output file ${job_out}" -CoDeList=${CoDeList}$(add_detail ${SLURM} 1 "${success_msg}" "${failure_msg}") +comment_details_list=${comment_details_list}$(add_detail ${SLURM_OUTPUT_FOUND} 1 "${success_msg}" "${failure_msg}") success_msg="no message matching ${GP_error}" failure_msg="found message matching ${GP_error}" -CoDeList=${CoDeList}$(add_detail ${ERROR} 0 "${success_msg}" "${failure_msg}") +comment_details_list=${comment_details_list}$(add_detail ${ERROR} 0 "${success_msg}" "${failure_msg}") success_msg="no message matching ${GP_failed}" failure_msg="found message matching ${GP_failed}" -CoDeList=${CoDeList}$(add_detail ${FAILED} 0 "${success_msg}" "${failure_msg}") +comment_details_list=${comment_details_list}$(add_detail ${FAILED} 0 "${success_msg}" "${failure_msg}") success_msg="no message matching ${GP_req_missing}" failure_msg="found message matching ${GP_req_missing}" -CoDeList=${CoDeList}$(add_detail ${MISSING} 0 "${success_msg}" "${failure_msg}") +comment_details_list=${comment_details_list}$(add_detail ${MISSING} 0 "${success_msg}" "${failure_msg}") success_msg="found message(s) matching ${GP_no_missing}" failure_msg="no message matching ${GP_no_missing}" -CoDeList=${CoDeList}$(add_detail ${NO_MISSING} 1 "${success_msg}" "${failure_msg}") +comment_details_list=${comment_details_list}$(add_detail ${NO_MISSING} 1 "${success_msg}" "${failure_msg}") success_msg="found message matching ${GP_tgz_created}" failure_msg="no message matching ${GP_tgz_created}" -CoDeList=${CoDeList}$(add_detail ${TGZ} 1 "${success_msg}" "${failure_msg}") - -comment_details="${comment_details_fmt/__DETAILS_LIST__/${CoDeList}}" +comment_details_list=${comment_details_list}$(add_detail ${TGZ} 1 "${success_msg}" "${failure_msg}") +# Now, do the actual replacement of __DETAILS_FMT__ +comment_details_fmt="
_Details_
__DETAILS_LIST__
" +comment_details="${comment_details_fmt/__DETAILS_LIST__/${comment_details_list}}" +comment_description=${comment_description/__DETAILS_FMT__/${comment_details}} -# first construct comment_artefacts_list, abbreviated CoArList +# first construct comment_artefacts_list # then use it to set comment_artefacts -CoArList="" +comment_artifacts_list="" # TARBALL should only contain a single tarball if [[ ! -z ${TARBALL} ]]; then @@ -427,50 +468,49 @@ if [[ ! -z ${TARBALL} ]]; then software_pkgs=$(echo "${software_entries}" | sed -e "s@${prefix}/software/@@" | awk -F/ '{if (NR >= 2) {print $1 "/" $2}}' | sort -u) artefact_summary="$(print_code_item '__ITEM__' ${TARBALL})" - CoArList="" - CoArList="${CoArList}$(print_br_item2 'size: __ITEM__ MiB (__ITEM2__ bytes)' ${size_mib} ${size})" - CoArList="${CoArList}$(print_br_item 'entries: __ITEM__' ${entries})" - CoArList="${CoArList}$(print_br_item 'modules under ___ITEM___' ${prefix}/modules/all)" - CoArList="${CoArList}
"
+    comment_artifacts_list=""
+    comment_artifacts_list="${comment_artifacts_list}$(print_br_item2 'size: __ITEM__ MiB (__ITEM2__ bytes)' ${size_mib} ${size})"
+    comment_artifacts_list="${comment_artifacts_list}$(print_br_item 'entries: __ITEM__' ${entries})"
+    comment_artifacts_list="${comment_artifacts_list}$(print_br_item 'modules under ___ITEM___' ${prefix}/modules/all)"
+    comment_artifacts_list="${comment_artifacts_list}
"
     if [[ ! -z ${modules} ]]; then
         while IFS= read -r mod ; do
-            CoArList="${CoArList}$(print_br_item '__ITEM__' ${mod})"
+            comment_artifacts_list="${comment_artifacts_list}$(print_br_item '__ITEM__' ${mod})"
         done <<< "${modules}"
     else
-        CoArList="${CoArList}$(print_br_item '__ITEM__' 'no module files in tarball')"
+        comment_artifacts_list="${comment_artifacts_list}$(print_br_item '__ITEM__' 'no module files in tarball')"
     fi
-    CoArList="${CoArList}
" - CoArList="${CoArList}$(print_br_item 'software under ___ITEM___' ${prefix}/software)" - CoArList="${CoArList}
"
+    comment_artifacts_list="${comment_artifacts_list}
" + comment_artifacts_list="${comment_artifacts_list}$(print_br_item 'software under ___ITEM___' ${prefix}/software)" + comment_artifacts_list="${comment_artifacts_list}
"
     if [[ ! -z ${software_pkgs} ]]; then
         while IFS= read -r sw_pkg ; do
-            CoArList="${CoArList}$(print_br_item '__ITEM__' ${sw_pkg})"
+            comment_artifacts_list="${comment_artifacts_list}$(print_br_item '__ITEM__' ${sw_pkg})"
         done <<< "${software_pkgs}"
     else
-        CoArList="${CoArList}$(print_br_item '__ITEM__' 'no software packages in tarball')"
+        comment_artifacts_list="${comment_artifacts_list}$(print_br_item '__ITEM__' 'no software packages in tarball')"
     fi
-    CoArList="${CoArList}
" - CoArList="${CoArList}$(print_br_item 'other under ___ITEM___' ${prefix})" - CoArList="${CoArList}
"
+    comment_artifacts_list="${comment_artifacts_list}
" + comment_artifacts_list="${comment_artifacts_list}$(print_br_item 'other under ___ITEM___' ${prefix})" + comment_artifacts_list="${comment_artifacts_list}
"
     if [[ ! -z ${other_shortened} ]]; then
         while IFS= read -r other ; do
-            CoArList="${CoArList}$(print_br_item '__ITEM__' ${other})"
+            comment_artifacts_list="${comment_artifacts_list}$(print_br_item '__ITEM__' ${other})"
         done <<< "${other_shortened}"
     else
-        CoArList="${CoArList}$(print_br_item '__ITEM__' 'no other files in tarball')"
+        comment_artifacts_list="${comment_artifacts_list}$(print_br_item '__ITEM__' 'no other files in tarball')"
     fi
-    CoArList="${CoArList}
" + comment_artifacts_list="${comment_artifacts_list}
" else - CoArList="${CoArList}$(print_dd_item 'No artefacts were created or found.' '')" + comment_artifacts_list="${comment_artifacts_list}$(print_dd_item 'No artefacts were created or found.' '')" fi +comment_artefact_details_fmt="
__ARTEFACT_SUMMARY____ARTEFACT_DETAILS__
" comment_artefacts_details="${comment_artefact_details_fmt/__ARTEFACT_SUMMARY__/${artefact_summary}}" -comment_artefacts_details="${comment_artefacts_details/__ARTEFACT_DETAILS__/${CoArList}}" -comment_artefacts="${comment_artefacts_fmt/__ARTEFACTS_LIST__/${comment_artefacts_details}}" +comment_artefacts_details="${comment_artefacts_details/__ARTEFACT_DETAILS__/${comment_artifacts_list}}" -# now put all pieces together creating comment_details from comment_template -comment_description=${comment_template/__SUMMARY_FMT__/${comment_summary}} -comment_description=${comment_description/__DETAILS_FMT__/${comment_details}} +comment_artefacts_fmt="
_Artefacts_
__ARTEFACTS_LIST__
" +comment_artefacts="${comment_artefacts_fmt/__ARTEFACTS_LIST__/${comment_artefacts_details}}" comment_description=${comment_description/__ARTEFACTS_FMT__/${comment_artefacts}} echo "${comment_description}" >> ${job_result_file} diff --git a/bot/check-test.sh b/bot/check-test.sh index 76e0df7f40..f045b9500a 100755 --- a/bot/check-test.sh +++ b/bot/check-test.sh @@ -13,8 +13,193 @@ job_dir=${PWD} job_out="slurm-${SLURM_JOB_ID}.out" job_test_result_file="_bot_job${SLURM_JOB_ID}.test" +# Check that job output file is found +[[ ${VERBOSE} -ne 0 ]] && echo ">> searching for job output file(s) matching '"${job_out}"'" +if [[ -f ${job_out} ]]; then + SLURM_OUTPUT_FOUND=1 + [[ ${VERBOSE} -ne 0 ]] && echo " found slurm output file '"${job_out}"'" +else + SLURM_OUTPUT_FOUND=0 + [[ ${VERBOSE} -ne 0 ]] && echo " Slurm output file '"${job_out}"' NOT found" +fi + + +# ReFrame prints e.g. +#[----------] start processing checks +#[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:rome+default +#[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:genoa+default +#[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=1_cpn_2_nodes %module_name=GROMACS/2021.3-foss-2021a /f4194106 @snellius:genoa+default +#[ FAIL ] (1/3) GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:genoa+default +#==> test failed during 'sanity': test staged in '/scratch-shared/casparl/reframe_output/staging/snellius/genoa/default/GROMACS_EESSI_d597cff4' +#[ OK ] (2/3) GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:rome+default +#P: perf: 8.441 ns/day (r:0, l:None, u:None) +#[ FAIL ] (3/3) GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=1_cpn_2_nodes %module_name=GROMACS/2021.3-foss-2021a /f4194106 @snellius:genoa+default +#==> test failed during 'sanity': test staged in '/scratch-shared/casparl/reframe_output/staging/snellius/genoa/default/GROMACS_EESSI_f4194106' +#[----------] all spawned checks have finished +#[ FAILED ] Ran 3/3 test case(s) from 2 check(s) (2 failure(s), 0 skipped, 0 aborted) + +# We will grep for the last and final line, since this reflects the overall result +# Specifically, we grep for FAILED, since this is also what we print if a step in the test script itself fails +FAILED=-1 +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + GP_failed='\[\s*FAILED\s*\].*Ran .* test case' + grep_reframe_failed=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_failed}") + [[ $? -eq 0 ]] && FAILED=1 || FAILED=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_failed}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_reframe_failed}" +fi + +# Here, we grep for 'ERROR:', which is printed if a fatal_error is encountered when executing the test step +# I.e. this is an error in execution of the run_tests.sh itself, NOT in running the actual tests +ERROR=-1 +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + GP_error='ERROR: ' + grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_error}") + [[ $? -eq 0 ]] && ERROR=1 || ERROR=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_error}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}" +fi + +SUCCESS=-1 +# Grep for the success pattern, so we can report the amount of tests run +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + GP_success='\[\s*PASSED\s*\].*Ran .* test case' + grep_reframe_success=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_success}") + [[ $? -eq 0 ]] && SUCCESS=1 || SUCCESS=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_success}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_reframe_success}" +fi + +if [[ ! -z ${grep_reframe_failed} ]]; then + grep_reframe_result=${grep_reframe_failed} +else + grep_reframe_result=${grep_reframe_success} +fi + echo "[TEST]" > ${job_test_result_file} -echo "comment_description = (no tests yet)" >> ${job_test_result_file} -echo "status = SUCCESS" >> ${job_test_result_file} +if [[ ${SLURM_OUTPUT_FOUND} -eq 0 ]]; then + summary=":cry: FAILURE" + reason="Job output file not found, cannot check test results." + status="FAILURE" +# Should come before general errors: if SUCCESS==1, it indicates the test suite ran succesfully +# regardless of other things that might have gone wrong +elif [[ ${SUCCESS} -eq 1 ]]; then + summary=":grin: SUCCESS" + reason="" + status="SUCCESS" +# Should come before general errors: if FAILED==1, it indicates the test suite ran +# otherwise the pattern wouldn't have been there +elif [[ ${FAILED} -eq 1 ]]; then + summary=":cry: FAILURE" + reason="EESSI test suite produced failures." + status="FAILURE" +elif [[ ${ERROR} -eq 1 ]]; then + summary=":cry: FAILURE" + reason="EESSI test suite was not run, test step itself failed to execute." + status="FAILURE" +else + summary=":grin: FAILURE" + reason="Failed for unknown reason" + status="FAILURE" +fi + + +echo "[TEST]" > ${job_test_result_file} +echo -n "comment_description = " >> ${job_test_result_file} + +# Use template for writing PR comment with details +# construct and write complete PR comment details: implements third alternative +comment_template="
__SUMMARY_FMT__
__REASON_FMT____REFRAME_FMT____DETAILS_FMT__
" +comment_success_item_fmt=":white_check_mark: __ITEM__" +comment_failure_item_fmt=":x: __ITEM__" + +# Initialize comment_description +comment_description=${comment_template} + +# Now, start replacing template items one by one +comment_summary_fmt="__SUMMARY__ _(click triangle for details)_" +comment_summary="${comment_summary_fmt/__SUMMARY__/${summary}}" +comment_description=${comment_description/__SUMMARY_FMT__/${comment_summary}} + + +# Only add if there is a reason (e.g. no reason for successful runs) +if [[ ! -z ${reason} ]]; then + comment_reason_fmt="
_Reason_
__REASONS__
" + reason_details="${comment_reason_fmt/__REASONS__/${reason}}" + comment_description=${comment_description/__REASON_FMT__/${reason_details}} +else + comment_description=${comment_description/__REASON_FMT__/""} +fi + +# Only add if there is a reframe summary (e.g. no reframe summary if reframe wasn't launched succesfully) +echo "ReFrame result:" +echo "${grep_reframe_result}" +if [[ ! -z ${grep_reframe_result} ]]; then + comment_reframe_fmt="
_ReFrame Summary_
__REFRAME_SUMMARY__
" + reframe_summary=${comment_reframe_fmt/__REFRAME_SUMMARY__/${grep_reframe_result}} + comment_description=${comment_description/__REFRAME_FMT__/${reframe_summary}} +else + comment_description=${comment_description/__REFRAME_FMT__/""} +fi + +# Declare functions +function print_br_item() { + format="${1}" + item="${2}" + echo -n "${format//__ITEM__/${item}}
" +} + +function success() { + format="${comment_success_item_fmt}" + item="$1" + print_br_item "${format}" "${item}" +} + +function failure() { + format="${comment_failure_item_fmt}" + item="$1" + print_br_item "${format}" "${item}" +} + +function add_detail() { + actual=${1} + expected=${2} + success_msg="${3}" + failure_msg="${4}" + if [[ ${actual} -eq ${expected} ]]; then + success "${success_msg}" + else + failure "${failure_msg}" + fi +} + +# first construct comment_details_list, abbreviated comment_details_list +# then use it to set comment_details +comment_details_list="" + +success_msg="job output file ${job_out}" +failure_msg="no job output file ${job_out}" +comment_details_list=${comment_details_list}$(add_detail ${SLURM_OUTPUT_FOUND} 1 "${success_msg}" "${failure_msg}") + +success_msg="no message matching ${GP_error}" +failure_msg="found message matching ${GP_error}" +comment_details_list=${comment_details_list}$(add_detail ${ERROR} 0 "${success_msg}" "${failure_msg}") + +# Add an escape character to every *, for it to be printed correctly in the comment on GitHub +GP_failed="${GP_failed//\*/\\*}" +success_msg="no message matching ""${GP_failed}""" +failure_msg="found message matching ""${GP_failed}""" +comment_details_list=${comment_details_list}$(add_detail ${FAILED} 0 "${success_msg}" "${failure_msg}") + +comment_details_fmt="
_Details_
__DETAILS_LIST__
" +comment_details="${comment_details_fmt/__DETAILS_LIST__/${comment_details_list}}" +comment_description=${comment_description/__DETAILS_FMT__/${comment_details}} + +# Actually writing the comment description to the result file +echo "${comment_description}" >> ${job_test_result_file} +echo "status = ${status}" >> ${job_test_result_file} exit 0 diff --git a/bot/test.sh b/bot/test.sh index 9d978cdcd0..4984340e6e 100755 --- a/bot/test.sh +++ b/bot/test.sh @@ -1,13 +1,225 @@ -#!/bin/bash +#!/usr/bin/env bash # -# Dummy script, no tests yet +# script to run tests or the test suite for the whole EESSI software layer or +# just what has been built in a job. Intended use is that it is called +# at the end of a (batch) job running on a compute node. # # This script is part of the EESSI software layer, see # https://github.com/EESSI/software-layer.git # -# author: Kenneth Hoste (HPC-UGent) +# author: Thomas Roeblitz (@trz42) +# author: Caspar van Leeuwen (@casparvl) # # license: GPLv2 # +# ASSUMPTIONs: +# + assumption for the build step (as run through bot/build.sh which is provided +# in this repository too) +# - working directory has been prepared by the bot with a checkout of a +# pull request (OR by some other means) +# - the working directory contains a directory 'cfg' where the main config +# file 'job.cfg' has been deposited +# - the directory may contain any additional files referenced in job.cfg +# + assumptions for the test step +# - temporary storage is still available +# example +# Using /localscratch/9640860/NESSI/eessi.x765Dd8mFh as tmp directory (to resume session add '--resume /localscratch/9640860/NESSI/eessi.x765Dd8mFh'). +# - run test-suite.sh inside build container using tmp storage from build step +# plus possibly additional settings (repo, etc.) +# - needed setup steps may be similar to bot/inspect.sh (PR#317) + +# stop as soon as something fails +set -e + +# source utils.sh and cfg_files.sh +source scripts/utils.sh +source scripts/cfg_files.sh + +# defaults +export JOB_CFG_FILE="${JOB_CFG_FILE_OVERRIDE:=./cfg/job.cfg}" +HOST_ARCH=$(uname -m) + +# check if ${JOB_CFG_FILE} exists +if [[ ! -r "${JOB_CFG_FILE}" ]]; then + fatal_error "job config file (JOB_CFG_FILE=${JOB_CFG_FILE}) does not exist or not readable" +fi +echo "bot/test.sh: showing ${JOB_CFG_FILE} from software-layer side" +cat ${JOB_CFG_FILE} + +echo "bot/test.sh: obtaining configuration settings from '${JOB_CFG_FILE}'" +cfg_load ${JOB_CFG_FILE} + +# if http_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $http_proxy +HTTP_PROXY=$(cfg_get_value "site_config" "http_proxy") +HTTP_PROXY=${HTTP_PROXY:-${http_proxy}} +echo "bot/test.sh: HTTP_PROXY='${HTTP_PROXY}'" + +# if https_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $https_proxy +HTTPS_PROXY=$(cfg_get_value "site_config" "https_proxy") +HTTPS_PROXY=${HTTPS_PROXY:-${https_proxy}} +echo "bot/test.sh: HTTPS_PROXY='${HTTPS_PROXY}'" + +LOCAL_TMP=$(cfg_get_value "site_config" "local_tmp") +echo "bot/test.sh: LOCAL_TMP='${LOCAL_TMP}'" +# TODO should local_tmp be mandatory? --> then we check here and exit if it is not provided + +# check if path to copy build logs to is specified, so we can copy build logs for failing builds there +BUILD_LOGS_DIR=$(cfg_get_value "site_config" "build_logs_dir") +echo "bot/test.sh: BUILD_LOGS_DIR='${BUILD_LOGS_DIR}'" +# if $BUILD_LOGS_DIR is set, add it to $SINGULARITY_BIND so the path is available in the build container +if [[ ! -z ${BUILD_LOGS_DIR} ]]; then + mkdir -p ${BUILD_LOGS_DIR} + if [[ -z ${SINGULARITY_BIND} ]]; then + export SINGULARITY_BIND="${BUILD_LOGS_DIR}" + else + export SINGULARITY_BIND="${SINGULARITY_BIND},${BUILD_LOGS_DIR}" + fi +fi + +# check if path to directory on shared filesystem is specified, +# and use it as location for source tarballs used by EasyBuild if so +SHARED_FS_PATH=$(cfg_get_value "site_config" "shared_fs_path") +echo "bot/test.sh: SHARED_FS_PATH='${SHARED_FS_PATH}'" +# if $SHARED_FS_PATH is set, add it to $SINGULARITY_BIND so the path is available in the build container +if [[ ! -z ${SHARED_FS_PATH} ]]; then + mkdir -p ${SHARED_FS_PATH} + if [[ -z ${SINGULARITY_BIND} ]]; then + export SINGULARITY_BIND="${SHARED_FS_PATH}" + else + export SINGULARITY_BIND="${SINGULARITY_BIND},${SHARED_FS_PATH}" + fi +fi + +SINGULARITY_CACHEDIR=$(cfg_get_value "site_config" "container_cachedir") +echo "bot/test.sh: SINGULARITY_CACHEDIR='${SINGULARITY_CACHEDIR}'" +if [[ ! -z ${SINGULARITY_CACHEDIR} ]]; then + # make sure that separate directories are used for different CPU families + SINGULARITY_CACHEDIR=${SINGULARITY_CACHEDIR}/${HOST_ARCH} + export SINGULARITY_CACHEDIR +fi + +# try to determine tmp directory from build job +RESUME_DIR=$(grep 'Using .* as tmp directory' slurm-${SLURM_JOBID}.out | head -1 | awk '{print $2}') + +if [[ -z ${RESUME_DIR} ]]; then + echo -n "setting \$STORAGE by replacing any var in '${LOCAL_TMP}' -> " + # replace any env variable in ${LOCAL_TMP} with its + # current value (e.g., a value that is local to the job) + STORAGE=$(envsubst <<< ${LOCAL_TMP}) + echo "'${STORAGE}'" + + # make sure ${STORAGE} exists + mkdir -p ${STORAGE} + + # make sure the base tmp storage is unique + JOB_STORAGE=$(mktemp --directory --tmpdir=${STORAGE} bot_job_tmp_XXX) + echo "bot/test.sh: created unique base tmp storage directory at ${JOB_STORAGE}" + + RESUME_TGZ=${PWD}/previous_tmp/build_step/$(ls previous_tmp/build_step) + if [[ -z ${RESUME_TGZ} ]]; then + echo "bot/test.sh: no information about tmp directory and tarball of build step; --> giving up" + exit 2 + fi +fi + +# obtain list of modules to be loaded +LOAD_MODULES=$(cfg_get_value "site_config" "load_modules") +echo "bot/test.sh: LOAD_MODULES='${LOAD_MODULES}'" + +# singularity/apptainer settings: CONTAINER, HOME, TMPDIR, BIND +CONTAINER=$(cfg_get_value "repository" "container") +export SINGULARITY_HOME="${PWD}:/eessi_bot_job" +export SINGULARITY_TMPDIR="${PWD}/singularity_tmpdir" +mkdir -p ${SINGULARITY_TMPDIR} + +# load modules if LOAD_MODULES is not empty +if [[ ! -z ${LOAD_MODULES} ]]; then + for mod in $(echo ${LOAD_MODULES} | tr ',' '\n') + do + echo "bot/test.sh: loading module '${mod}'" + module load ${mod} + done +else + echo "bot/test.sh: no modules to be loaded" +fi + +# determine repository to be used from entry .repository in ${JOB_CFG_FILE} +REPOSITORY=$(cfg_get_value "repository" "repo_id") +EESSI_REPOS_CFG_DIR_OVERRIDE=$(cfg_get_value "repository" "repos_cfg_dir") +export EESSI_REPOS_CFG_DIR_OVERRIDE=${EESSI_REPOS_CFG_DIR_OVERRIDE:-${PWD}/cfg} +echo "bot/test.sh: EESSI_REPOS_CFG_DIR_OVERRIDE='${EESSI_REPOS_CFG_DIR_OVERRIDE}'" + +# determine pilot version to be used from .repository.repo_version in ${JOB_CFG_FILE} +# here, just set & export EESSI_PILOT_VERSION_OVERRIDE +# next script (eessi_container.sh) makes use of it via sourcing init scripts +# (e.g., init/eessi_defaults or init/minimal_eessi_env) +export EESSI_PILOT_VERSION_OVERRIDE=$(cfg_get_value "repository" "repo_version") +echo "bot/test.sh: EESSI_PILOT_VERSION_OVERRIDE='${EESSI_PILOT_VERSION_OVERRIDE}'" + +# determine CVMFS repo to be used from .repository.repo_name in ${JOB_CFG_FILE} +# here, just set EESSI_CVMFS_REPO_OVERRIDE, a bit further down +# "source init/eessi_defaults" via sourcing init/minimal_eessi_env +export EESSI_CVMFS_REPO_OVERRIDE=$(cfg_get_value "repository" "repo_name") +echo "bot/test.sh: EESSI_CVMFS_REPO_OVERRIDE='${EESSI_CVMFS_REPO_OVERRIDE}'" + +# determine architecture to be used from entry .architecture in ${JOB_CFG_FILE} +# fallbacks: +# - ${CPU_TARGET} handed over from bot +# - left empty to let downstream script(s) determine subdir to be used +EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(cfg_get_value "architecture" "software_subdir") +EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE:-${CPU_TARGET}} +export EESSI_SOFTWARE_SUBDIR_OVERRIDE +echo "bot/test.sh: EESSI_SOFTWARE_SUBDIR_OVERRIDE='${EESSI_SOFTWARE_SUBDIR_OVERRIDE}'" + +# get EESSI_OS_TYPE from .architecture.os_type in ${JOB_CFG_FILE} (default: linux) +EESSI_OS_TYPE=$(cfg_get_value "architecture" "os_type") +export EESSI_OS_TYPE=${EESSI_OS_TYPE:-linux} +echo "bot/test.sh: EESSI_OS_TYPE='${EESSI_OS_TYPE}'" + +# prepare arguments to eessi_container.sh common to build and tarball steps +declare -a COMMON_ARGS=() +COMMON_ARGS+=("--verbose") +COMMON_ARGS+=("--access" "ro") +COMMON_ARGS+=("--mode" "run") +[[ ! -z ${CONTAINER} ]] && COMMON_ARGS+=("--container" "${CONTAINER}") +[[ ! -z ${HTTP_PROXY} ]] && COMMON_ARGS+=("--http-proxy" "${HTTP_PROXY}") +[[ ! -z ${HTTPS_PROXY} ]] && COMMON_ARGS+=("--https-proxy" "${HTTPS_PROXY}") +[[ ! -z ${REPOSITORY} ]] && COMMON_ARGS+=("--repository" "${REPOSITORY}") + +# make sure to use the same parent dir for storing tarballs of tmp +PREVIOUS_TMP_DIR=${PWD}/previous_tmp + +# prepare directory to store tarball of tmp for test step +TARBALL_TMP_TEST_STEP_DIR=${PREVIOUS_TMP_DIR}/test_step +mkdir -p ${TARBALL_TMP_TEST_STEP_DIR} + +# prepare arguments to eessi_container.sh specific to test step +declare -a TEST_STEP_ARGS=() +TEST_STEP_ARGS+=("--save" "${TARBALL_TMP_TEST_STEP_DIR}") + +if [[ -z ${RESUME_DIR} ]]; then + TEST_STEP_ARGS+=("--storage" "${STORAGE}") + TEST_STEP_ARGS+=("--resume" "${RESUME_TGZ}") +else + TEST_STEP_ARGS+=("--resume" "${RESUME_DIR}") +fi + +# prepare arguments to test_suite.sh (specific to test step) +declare -a TEST_SUITE_ARGS=() +if [[ ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} =~ .*/generic$ ]]; then + TEST_SUITE_ARGS+=("--generic") +fi +# [[ ! -z ${BUILD_LOGS_DIR} ]] && TEST_SUITE_ARGS+=("--build-logs-dir" "${BUILD_LOGS_DIR}") +# [[ ! -z ${SHARED_FS_PATH} ]] && TEST_SUITE_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}") + +# create tmp file for output of build step +test_outerr=$(mktemp test.outerr.XXXX) + +echo "Executing command to test software:" +echo "./eessi_container.sh ${COMMON_ARGS[@]} ${TEST_STEP_ARGS[@]}" +echo " -- ./run_tests.sh \"${TEST_SUITE_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${test_outerr}" +./eessi_container.sh "${COMMON_ARGS[@]}" "${TEST_STEP_ARGS[@]}" \ + -- ./run_tests.sh "${TEST_SUITE_ARGS[@]}" "$@" 2>&1 | tee -a ${test_outerr} + exit 0 diff --git a/check_missing_installations.sh b/check_missing_installations.sh index 5ea7c5a4f5..d8135ea3cb 100755 --- a/check_missing_installations.sh +++ b/check_missing_installations.sh @@ -10,14 +10,29 @@ TOPDIR=$(dirname $(realpath $0)) -if [ $# -ne 1 ]; then - echo "ERROR: Usage: $0 " >&2 +if [ "$#" -eq 1 ]; then + true +elif [ "$#" -eq 2 ]; then + echo "Using $2 to give create exceptions for PR filtering of easystack" + # Find lines that are added and use from-pr, make them unique, grab the + # PR numbers and use them to construct something we can use within awk + pr_exceptions=$(grep ^+ $2 | grep from-pr | uniq | awk '{print $3}' | xargs -i echo " || /'{}'/") +else + echo "ERROR: Usage: $0 ()" >&2 exit 1 fi easystack=$1 LOCAL_TMPDIR=$(mktemp -d) +# Clone the develop branch of EasyBuild and use that to search for easyconfigs +git clone -b develop https://github.com/easybuilders/easybuild-easyconfigs.git $LOCAL_TMPDIR/easyconfigs +export EASYBUILD_ROBOT_PATHS=$LOCAL_TMPDIR/easyconfigs/easybuild/easyconfigs + +# All PRs used in EESSI are supposed to be merged, so we can strip out all cases of from-pr +tmp_easystack=${LOCAL_TMPDIR}/$(basename ${easystack}) +grep -v from-pr ${easystack} > ${tmp_easystack} + source $TOPDIR/scripts/utils.sh source $TOPDIR/configure_easybuild @@ -27,11 +42,35 @@ ${EB:-eb} --show-config echo ">> Checking for missing installations in ${EASYBUILD_INSTALLPATH}..." eb_missing_out=$LOCAL_TMPDIR/eb_missing.out -${EB:-eb} --easystack ${easystack} --missing 2>&1 | tee ${eb_missing_out} +${EB:-eb} --easystack ${tmp_easystack} --missing 2>&1 | tee ${eb_missing_out} exit_code=${PIPESTATUS[0]} ok_msg="Command 'eb --missing ...' succeeded, analysing output..." fail_msg="Command 'eb --missing ...' failed, check log '${eb_missing_out}'" +if [ "$exit_code" -ne 0 ] && [ ! -z "$pr_exceptions" ]; then + # We might have failed due to unmerged PRs. Try to make exceptions for --from-pr added in this PR + # to software-layer, and see if then it passes. If so, we can report a more specific fail_msg + # Note that if no --from-pr's were used in this PR, $pr_exceptions will be empty and we might as + # well skip this check - unmerged PRs can not be the reason for the non-zero exit code in that scenario + + # Let's use awk so we can allow for exceptions if we are given a PR diff file + awk_command="awk '\!/'from-pr'/ EXCEPTIONS' $easystack" + awk_command=${awk_command/\\/} # Strip out the backslash we needed for ! + eval ${awk_command/EXCEPTIONS/$pr_exceptions} > ${tmp_easystack} + + msg=">> Checking for missing installations in ${EASYBUILD_INSTALLPATH}," + msg="${msg} allowing for --from-pr's that were added in this PR..." + echo ${msg} + eb_missing_out=$LOCAL_TMPDIR/eb_missing_with_from_pr.out + ${EB:-eb} --easystack ${tmp_easystack} --missing 2>&1 | tee ${eb_missing_out} + exit_code_with_from_pr=${PIPESTATUS[0]} + + # If now we succeeded, the reason must be that we originally stripped the --from-pr's + if [ "$exit_code_with_from_pr" -eq 0 ]; then + fail_msg="$fail_msg (are you sure all PRs referenced have been merged in EasyBuild?)" + fi +fi + check_exit_code ${exit_code} "${ok_msg}" "${fail_msg}" # the above assesses the installed software for each easyconfig provided in diff --git a/create_lmodrc.py b/create_lmodrc.py index bc69dd4396..621c8e271a 100755 --- a/create_lmodrc.py +++ b/create_lmodrc.py @@ -29,7 +29,7 @@ return content end -local function cuda_enabled_load_hook(t) +local function eessi_cuda_enabled_load_hook(t) local frameStk = require("FrameStk"):singleton() local mt = frameStk:mt() local simpleName = string.match(t.modFullName, "(.-)/") @@ -94,7 +94,7 @@ end end -local function openmpi_load_hook(t) +local function eessi_openmpi_load_hook(t) -- disable smcuda BTL when loading OpenMPI module for aarch64/neoverse_v1, -- to work around hang/crash due to bug in OpenMPI; -- see https://gitlab.com/eessi/support/-/issues/41 @@ -114,8 +114,15 @@ end end -hook.register("load", cuda_enabled_load_hook) -hook.register("load", openmpi_load_hook) +-- Combine both functions into a single one, as we can only register one function as load hook in lmod +-- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed +function eessi_load_hook(t) + eessi_cuda_enabled_load_hook(t) + eessi_openmpi_load_hook(t) +end + + +hook.register("load", eessi_load_hook) """ def error(msg): diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2022b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2022b.yml index 9e92c79062..1a19242093 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2022b.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2022b.yml @@ -1,3 +1,10 @@ easyconfigs: - SciPy-bundle-2023.02-gfbf-2022b.eb - GDAL-3.6.2-foss-2022b.eb + - waLBerla-6.1-foss-2022b.eb: + options: + from-pr: 19324 + - WRF-4.4.1-foss-2022b-dmpar.eb + - ImageMagick-7.1.0-53-GCCcore-12.2.0.eb: + options: + from-pr: 20086 \ No newline at end of file diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml index 157a47a49e..384aa04a9c 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml @@ -29,3 +29,26 @@ easyconfigs: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19554 options: from-pr: 19554 + - Pillow-SIMD-9.5.0-GCCcore-12.3.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19996 + from-pr: 19996 + - dask-2023.9.2-foss-2023a.eb + - OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb + - OSU-Micro-Benchmarks-7.2-gompi-2023b.eb + - JupyterNotebook-7.0.2-GCCcore-12.3.0.eb + - ImageMagick-7.1.1-15-GCCcore-12.3.0.eb: + options: + from-pr: 20086 + - Z3-4.12.2-GCCcore-12.3.0.eb: + options: + # The Z3 dependency of PyTorch had it's versionsuffix removed + # and we need to workaround the problem this creates, + # see https://github.com/EESSI/software-layer/pull/501 for details + from-pr: 20050 + - PyOpenGL-3.1.7-GCCcore-12.3.0.eb: + options: + from-pr: 20007 + - ipympl-0.9.3-foss-2023a.eb: + options: + from-pr: 20126 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023b.yml index 4dd31dbd5d..e2d35276df 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023b.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023b.yml @@ -8,3 +8,44 @@ easyconfigs: - matplotlib-3.8.2-gfbf-2023b.eb: options: from-pr: 19552 + - AOFlagger-3.4.0-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - arpack-ng-3.9.0-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - Armadillo-12.8.0-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - casacore-3.5.0-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - IDG-1.2.0-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - EveryBeam-0.5.2-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - DP3-6.0-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - WSClean-3.4-foss-2023b.eb: + options: + from-pr: 19840 + include-easyblocks-from-pr: 3088 + - CDO-2.2.2-gompi-2023b.eb: + options: + from-pr: 19792 + - python-casacore-3.5.2-foss-2023b.eb: + options: + from-pr: 20089 + - libspatialindex-1.9.3-GCCcore-13.2.0.eb: + options: + from-pr: 19922 diff --git a/eb_hooks.py b/eb_hooks.py index d29a837339..d93ee37067 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -21,7 +21,7 @@ CPU_TARGET_NEOVERSE_V1 = 'aarch64/neoverse_v1' -CPU_TARGET_AARCH64_GENERIC = 'aarch64/generic' +CPU_TARGET_AARCH64_GENERIC = 'aarch64/generic' EESSI_RPATH_OVERRIDE_ATTR = 'orig_rpath_override_dirs' @@ -160,6 +160,32 @@ def post_prepare_hook(self, *args, **kwargs): POST_PREPARE_HOOKS[self.name](self, *args, **kwargs) +def parse_hook_casacore_disable_vectorize(ec, eprefix): + """ + Disable 'vectorize' toolchain option for casacore 3.5.0 on aarch64/neoverse_v1 + Compiling casacore 3.5.0 with GCC 13.2.0 (foss-2023b) gives an error when building for aarch64/neoverse_v1. + See also, https://github.com/EESSI/software-layer/pull/479 + """ + if ec.name == 'casacore': + tcname, tcversion = ec['toolchain']['name'], ec['toolchain']['version'] + if ( + LooseVersion(ec.version) == LooseVersion('3.5.0') and + tcname == 'foss' and tcversion == '2023b' + ): + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + if cpu_target == CPU_TARGET_NEOVERSE_V1: + if not hasattr(ec, 'toolchainopts'): + ec['toolchainopts'] = {} + ec['toolchainopts']['vectorize'] = False + print_msg("Changed toochainopts for %s: %s", ec.name, ec['toolchainopts']) + else: + print_msg("Not changing option vectorize for %s on non-neoverse_v1", ec.name) + else: + print_msg("Not changing option vectorize for %s %s %s", ec.name, ec.version, ec.toolchain) + else: + raise EasyBuildError("casacore-specific hook triggered for non-casacore easyconfig?!") + + def parse_hook_cgal_toolchainopts_precise(ec, eprefix): """Enable 'precise' rather than 'strict' toolchain option for CGAL on POWER.""" if ec.name == 'CGAL': @@ -327,7 +353,7 @@ def pre_configure_hook_wrf_aarch64(self, *args, **kwargs): if LooseVersion(self.version) <= LooseVersion('3.9.0'): self.cfg.update('preconfigopts', "sed -i 's/%s/%s/g' arch/configure_new.defaults && " % (pattern, repl)) print_msg("Using custom preconfigopts for %s: %s", self.name, self.cfg['preconfigopts']) - + if LooseVersion('4.0.0') <= LooseVersion(self.version) <= LooseVersion('4.2.1'): self.cfg.update('preconfigopts', "sed -i 's/%s/%s/g' arch/configure.defaults && " % (pattern, repl)) print_msg("Using custom preconfigopts for %s: %s", self.name, self.cfg['preconfigopts']) @@ -414,7 +440,7 @@ def pre_test_hook_ignore_failing_tests_netCDF(self, *args, **kwargs): """ cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') if self.name == 'netCDF' and self.version == '4.9.2' and cpu_target == CPU_TARGET_NEOVERSE_V1: - self.cfg['testopts'] = "|| echo ignoring failing tests" + self.cfg['testopts'] = "|| echo ignoring failing tests" def pre_test_hook_increase_max_failed_tests_arm_PyTorch(self, *args, **kwargs): """ @@ -579,6 +605,7 @@ def inject_gpu_property(ec): PARSE_HOOKS = { + 'casacore': parse_hook_casacore_disable_vectorize, 'CGAL': parse_hook_cgal_toolchainopts_precise, 'fontconfig': parse_hook_fontconfig_add_fonts, 'OpenBLAS': parse_hook_openblas_relax_lapack_tests_num_errors, diff --git a/eessi_container.sh b/eessi_container.sh index d6e9558202..143cbcc6c8 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -457,6 +457,12 @@ if [[ ${SETUP_NVIDIA} -eq 1 ]]; then mkdir -p ${EESSI_USR_LOCAL_CUDA} BIND_PATHS="${BIND_PATHS},${EESSI_VAR_LOG}:/var/log,${EESSI_USR_LOCAL_CUDA}:/usr/local/cuda" [[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}" + if [[ "${NVIDIA_MODE}" == "install" ]] ; then + # We need to "trick" our LMOD_RC file to allow us to load CUDA modules even without a CUDA driver + # (this works because we build within a container and the LMOD_RC recognises that) + touch ${EESSI_TMPDIR}/libcuda.so + export SINGULARITY_CONTAINLIBS="${EESSI_TMPDIR}/libcuda.so" + fi fi fi diff --git a/init/eessi_environment_variables b/init/eessi_environment_variables index af5222e7b9..e042e8575a 100644 --- a/init/eessi_environment_variables +++ b/init/eessi_environment_variables @@ -50,6 +50,15 @@ if [ -d $EESSI_PREFIX ]; then show_msg "Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory." export EESSI_SOFTWARE_PATH=$EESSI_PREFIX/software/$EESSI_OS_TYPE/$EESSI_SOFTWARE_SUBDIR + + # Configure our LMOD_RC file + export LMOD_RC="$EESSI_SOFTWARE_PATH/.lmod/lmodrc.lua" + if [ -f $LMOD_RC ]; then + show_msg "Found Lmod configuration file at $LMOD_RC" + else + error "Lmod configuration file not found at $LMOD_RC" + fi + if [ ! -z $EESSI_BASIC_ENV ]; then show_msg "Only setting up basic environment, so we're done" elif [ -d $EESSI_SOFTWARE_PATH ]; then @@ -76,13 +85,6 @@ if [ -d $EESSI_PREFIX ]; then false fi - export LMOD_RC="$EESSI_SOFTWARE_PATH/.lmod/lmodrc.lua" - if [ -f $LMOD_RC ]; then - show_msg "Found Lmod configuration file at $LMOD_RC" - else - error "Lmod configuration file not found at $LMOD_RC" - fi - else error "EESSI software layer at $EESSI_SOFTWARE_PATH not found!" fi diff --git a/reframe_config_bot.py.tmpl b/reframe_config_bot.py.tmpl new file mode 100644 index 0000000000..0cc3e9f530 --- /dev/null +++ b/reframe_config_bot.py.tmpl @@ -0,0 +1,59 @@ +# WARNING: this file is intended as template and the __X__ template variables need to be replaced +# before it can act as a configuration file +# Once replaced, this is a config file for running tests after the build phase, by the bot + +from eessi.testsuite.common_config import common_logging_config +from eessi.testsuite.constants import * # noqa: F403 + + +site_configuration = { + 'systems': [ + { + 'name': 'BotBuildTests', + 'descr': 'Software-layer bot', + 'hostnames': ['.*'], + 'modules_system': 'lmod', + 'partitions': [ + { + 'name': 'default', + 'scheduler': 'local', + 'launcher': 'mpirun', + 'environs': ['default'], + 'features': [ + FEATURES[CPU] + ] + list(SCALES.keys()), + 'processor': { + 'num_cpus': __NUM_CPUS__, + 'num_sockets': __NUM_SOCKETS__, + 'num_cpus_per_core': __NUM_CPUS_PER_CORE__, + 'num_cpus_per_socket': __NUM_CPUS_PER_SOCKET__, + }, + 'resources': [ + { + 'name': 'memory', + 'options': ['--mem={size}'], + } + ], + 'max_jobs': 1 + } + ] + } + ], + 'environments': [ + { + 'name': 'default', + 'cc': 'cc', + 'cxx': '', + 'ftn': '' + } + ], + 'general': [ + { + 'purge_environment': True, + 'resolve_module_conflicts': False, # avoid loading the module before submitting the job + # disable automatic detection of CPU architecture (since we're using local scheduler) + 'remote_detect': False, + } + ], + 'logging': common_logging_config(), +} diff --git a/run_tests.sh b/run_tests.sh new file mode 100755 index 0000000000..1dbb47db9d --- /dev/null +++ b/run_tests.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# +# This script gets invoked by the bot/test.sh script to run within the EESSI container +# Thus, this script defines all of the steps that should run for the tests. +# Note that, unless we have good reason, we don't run test steps in the prefix environment: +# users also typically don't run in the prefix environment, and we want to check if the +# software works well in that specific setup. +# +# This script is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Caspar van Leeuwen (@casparvl) +# +# license: GPLv2 +# + +base_dir=$(dirname $(realpath $0)) +source ${base_dir}/init/eessi_defaults + +# Git clone has to be run in compat layer, to make the git command available +./run_in_compat_layer_env.sh "git clone https://github.com/EESSI/test-suite EESSI-test-suite" + +# Run the test suite +./test_suite.sh "$@" diff --git a/test_suite.sh b/test_suite.sh new file mode 100755 index 0000000000..95eb9daa2a --- /dev/null +++ b/test_suite.sh @@ -0,0 +1,200 @@ +#!/bin/bash +# +# This script creates a ReFrame config file from a template, in which CPU properties get replaced +# based on where this script is run (typically: a build node). Then, it runs the EESSI test suite. +# +# This script is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Caspar van Leeuwen (@casparvl) +# +# license: GPLv2 + +display_help() { + echo "usage: $0 [OPTIONS]" + echo " -g | --generic - instructs script to test for generic architecture target" + echo " -h | --help - display this usage information" + echo " -x | --http-proxy URL - provides URL for the environment variable http_proxy" + echo " -y | --https-proxy URL - provides URL for the environment variable https_proxy" +} + +POSITIONAL_ARGS=() + +while [[ $# -gt 0 ]]; do + case $1 in + -g|--generic) + DETECTION_PARAMETERS="--generic" + shift + ;; + -h|--help) + display_help # Call your function + # no shifting needed here, we're done. + exit 0 + ;; + -x|--http-proxy) + export http_proxy="$2" + shift 2 + ;; + -y|--https-proxy) + export https_proxy="$2" + shift 2 + ;; + --build-logs-dir) + export build_logs_dir="${2}" + shift 2 + ;; + --shared-fs-path) + export shared_fs_path="${2}" + shift 2 + ;; + -*|--*) + echo "Error: Unknown option: $1" >&2 + exit 1 + ;; + *) # No more options + POSITIONAL_ARGS+=("$1") # save positional arg + shift + ;; + esac +done + +set -- "${POSITIONAL_ARGS[@]}" + +TOPDIR=$(dirname $(realpath $0)) + +source $TOPDIR/scripts/utils.sh + +# honor $TMPDIR if it is already defined, use /tmp otherwise +if [ -z $TMPDIR ]; then + export WORKDIR=/tmp/$USER +else + export WORKDIR=$TMPDIR/$USER +fi + +TMPDIR=$(mktemp -d) + +echo ">> Setting up environment..." +module --force purge +export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS) + +source $TOPDIR/init/bash + +# Load the ReFrame module +# Currently, we load the default version. Maybe we should somehow make this configurable in the future? +module load ReFrame +if [[ $? -eq 0 ]]; then + echo_green ">> Loaded ReFrame module" +else + fatal_error "Failed to load the ReFrame module" +fi + +# Check that a system python3 is available +python3_found=$(command -v python3) +if [ -z ${python3_found} ]; then + fatal_error "No system python3 found" +else + echo_green "System python3 found:" + python3 -V +fi + +# Check ReFrame came with the hpctestlib and we can import it +reframe_import="hpctestlib.sciapps.gromacs" +python3 -c "import ${reframe_import}" +if [[ $? -eq 0 ]]; then + echo_green "Succesfully found and imported ${reframe_import}" +else + fatal_error "Failed to import ${reframe_import}" +fi + +# Cloning should already be done in run_tests.sh before test_suite.sh is invoked +# Check if that succeeded +export TESTSUITEPREFIX=$PWD/EESSI-test-suite +if [ -d $TESTSUITEPREFIX ]; then + echo_green "Clone of the test suite $TESTSUITEPREFIX available, OK!" +else + fatal_error "Clone of the test suite $TESTSUITEPREFIX is not available!" +fi +export PYTHONPATH=$TESTSUITEPREFIX:$PYTHONPATH + +# Check that we can import from the testsuite +testsuite_import="eessi.testsuite" +python3 -c "import ${testsuite_import}" +if [[ $? -eq 0 ]]; then + echo_green "Succesfully found and imported ${testsuite_import}" +else + fatal_error "Failed to import ${testsuite_import}" +fi + +# Configure ReFrame, see https://www.eessi.io/docs/test-suite/installation-configuration +export RFM_CONFIG_FILES=$TOPDIR/reframe_config_bot.py +export RFM_CONFIG_FILE_TEMPLATE=$TOPDIR/reframe_config_bot.py.tmpl +export RFM_CHECK_SEARCH_PATH=$TESTSUITEPREFIX/eessi/testsuite/tests +export RFM_CHECK_SEARCH_RECURSIVE=1 +export RFM_PREFIX=$PWD/reframe_runs + +echo "Configured reframe with the following environment variables:" +env | grep "RFM_" + +# Inject correct CPU properties into the ReFrame config file +cpuinfo=$(lscpu) +if [[ "${cpuinfo}" =~ CPU\(s\):[^0-9]*([0-9]+) ]]; then + cpu_count=${BASH_REMATCH[1]} +else + fatal_error "Failed to get the number of CPUs for the current test hardware with lscpu." +fi +if [[ "${cpuinfo}" =~ Socket\(s\):[^0-9]*([0-9]+) ]]; then + socket_count=${BASH_REMATCH[1]} +else + fatal_error "Failed to get the number of sockets for the current test hardware with lscpu." +fi +if [[ "${cpuinfo}" =~ (Thread\(s\) per core:[^0-9]*([0-9]+)) ]]; then + threads_per_core=${BASH_REMATCH[2]} +else + fatal_error "Failed to get the number of threads per core for the current test hardware with lscpu." +fi +if [[ "${cpuinfo}" =~ (Core\(s\) per socket:[^0-9]*([0-9]+)) ]]; then + cores_per_socket=${BASH_REMATCH[2]} +else + fatal_error "Failed to get the number of cores per socket for the current test hardware with lscpu." +fi +cp ${RFM_CONFIG_FILE_TEMPLATE} ${RFM_CONFIG_FILES} +sed -i "s/__NUM_CPUS__/${cpu_count}/g" $RFM_CONFIG_FILES +sed -i "s/__NUM_SOCKETS__/${socket_count}/g" $RFM_CONFIG_FILES +sed -i "s/__NUM_CPUS_PER_CORE__/${threads_per_core}/g" $RFM_CONFIG_FILES +sed -i "s/__NUM_CPUS_PER_SOCKET__/${cores_per_socket}/g" $RFM_CONFIG_FILES + +# Workaround for https://github.com/EESSI/software-layer/pull/467#issuecomment-1973341966 +export PSM3_DEVICES='self,shm' # this is enough, since we only run single node for now + +# Check we can run reframe +reframe --version +if [[ $? -eq 0 ]]; then + echo_green "Succesfully ran 'reframe --version'" +else + fatal_error "Failed to run 'reframe --version'" +fi + +# List the tests we want to run +export REFRAME_ARGS='--tag CI --tag 1_node --nocolor' +echo "Listing tests: reframe ${REFRAME_ARGS} --list" +reframe ${REFRAME_ARGS} --list +if [[ $? -eq 0 ]]; then + echo_green "Succesfully listed ReFrame tests with command: reframe ${REFRAME_ARGS} --list" +else + fatal_error "Failed to list ReFrame tests with command: reframe ${REFRAME_ARGS} --list" +fi + +# Run all tests +echo "Running tests: reframe ${REFRAME_ARGS} --run" +reframe ${REFRAME_ARGS} --run +reframe_exit_code=$? +if [[ ${reframe_exit_code} -eq 0 ]]; then + echo_green "ReFrame runtime ran succesfully with command: reframe ${REFRAME_ARGS} --run." +else + fatal_error "ReFrame runtime failed to run with command: reframe ${REFRAME_ARGS} --run." +fi + +echo ">> Cleaning up ${TMPDIR}..." +rm -r ${TMPDIR} + +exit ${reframe_exit_code}