From d50d67e8878e2a2234b7a740f499bd43811ba16b Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 21 Jan 2024 20:30:32 +0100 Subject: [PATCH 01/12] add README.md for easystacks --- easystacks/pilot.nessi.no/2023.06/README.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 easystacks/pilot.nessi.no/2023.06/README.md diff --git a/easystacks/pilot.nessi.no/2023.06/README.md b/easystacks/pilot.nessi.no/2023.06/README.md new file mode 100644 index 0000000000..733ebf9475 --- /dev/null +++ b/easystacks/pilot.nessi.no/2023.06/README.md @@ -0,0 +1,7 @@ +File naming matters, since it determines the order in which easystack files are processed. + +Software installed with system toolchain should be installed first, +this includes EasyBuild itself, see `eessi-2023.06-eb-4.8.2-001-system.yml` . + +CUDA installations must be done before CUDA is required as dependency for something +built with a non-system toolchain, see `eessi-2023.06-eb-4.8.2-010-CUDA.yml` . From d6d1df132fa2b32f71a0f25f4cae49989994b0f2 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 21 Jan 2024 20:35:05 +0100 Subject: [PATCH 02/12] removed easystack files for 2021.06 and 2021.12 --- eessi-2021.06.yml | 53 ------------------------------------ eessi-2021.12.yml | 69 ----------------------------------------------- 2 files changed, 122 deletions(-) delete mode 100644 eessi-2021.06.yml delete mode 100644 eessi-2021.12.yml diff --git a/eessi-2021.06.yml b/eessi-2021.06.yml deleted file mode 100644 index 3587827746..0000000000 --- a/eessi-2021.06.yml +++ /dev/null @@ -1,53 +0,0 @@ -software: - R-bundle-Bioconductor: - toolchains: - foss-2020a: - versions: - '3.11': - versionsuffix: -R-4.0.0 - GROMACS: - toolchains: - foss-2020a: - versions: - '2020.1': - versionsuffix: -Python-3.8.2 - '2020.4': - versionsuffix: -Python-3.8.2 - Horovod: - toolchains: - foss-2020a: - versions: - '0.21.3': - versionsuffix: -TensorFlow-2.3.1-Python-3.8.2 - OpenFOAM: - toolchains: - foss-2020a: - versions: ['8', 'v2006'] - OSU-Micro-Benchmarks: - toolchains: - gompi-2020a: - versions: ['5.6.3'] - QuantumESPRESSO: - toolchains: - foss-2020a: - versions: ['6.6'] - TensorFlow: - toolchains: - foss-2020a: - versions: - '2.3.1': - versionsuffix: -Python-3.8.2 - RStudio-Server: - toolchains: - foss-2020a: - versions: - '1.3.1093': - versionsuffix: -Java-11-R-4.0.0 - ReFrame: - toolchains: - SYSTEM: - versions: '3.6.2' - code-server: - toolchains: - SYSTEM: - versions: '3.7.3' diff --git a/eessi-2021.12.yml b/eessi-2021.12.yml deleted file mode 100644 index 210bbb2845..0000000000 --- a/eessi-2021.12.yml +++ /dev/null @@ -1,69 +0,0 @@ -software: - code-server: - toolchains: - SYSTEM: - versions: '3.7.3' - GROMACS: - toolchains: - foss-2020a: - versions: - '2020.1': - versionsuffix: -Python-3.8.2 - '2020.4': - versionsuffix: -Python-3.8.2 - Horovod: - toolchains: - foss-2020a: - versions: - '0.21.3': - versionsuffix: -TensorFlow-2.3.1-Python-3.8.2 - Nextflow: - toolchains: - SYSTEM: - versions: '22.10.1' - OpenFOAM: - toolchains: - foss-2020a: - versions: ['8', 'v2006'] - OSU-Micro-Benchmarks: - toolchains: - gompi-2020a: - versions: ['5.6.3'] - gompi-2021a: - versions: ['5.7.1'] - QuantumESPRESSO: - toolchains: - foss-2020a: - versions: ['6.6'] - R: - toolchains: - foss-2021a: - versions: '4.1.0' - R-bundle-Bioconductor: - toolchains: - foss-2020a: - versions: - '3.11': - versionsuffix: -R-4.0.0 - RStudio-Server: - toolchains: - foss-2020a: - versions: - '1.3.1093': - versionsuffix: -Java-11-R-4.0.0 - SciPy-bundle: - toolchains: - foss-2021a: - versions: ['2021.05'] - TensorFlow: - toolchains: - foss-2020a: - versions: - '2.3.1': - versionsuffix: -Python-3.8.2 - WRF: - toolchains: - foss-2020a: - versions: - '3.9.1.1': - versionsuffix: -dmpar From 5f2050d5455ea2fd175b4e09acf32c8edb528b7d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 21 Jan 2024 20:56:56 +0100 Subject: [PATCH 03/12] using pilot.nessi.no in testing eessi_container.sh --- .../workflows/test_eessi_container_script.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test_eessi_container_script.yml b/.github/workflows/test_eessi_container_script.yml index 929fb22cec..3bb67b445f 100644 --- a/.github/workflows/test_eessi_container_script.yml +++ b/.github/workflows/test_eessi_container_script.yml @@ -45,7 +45,7 @@ jobs: elif [[ ${{matrix.SCRIPT_TEST}} == 'listrepos_default' ]]; then outfile=out_listrepos.txt ./eessi_container.sh --verbose --list-repos | tee ${outfile} - grep "EESSI-pilot" ${outfile} + grep "EESSI" ${outfile} # test use of --list-repos with custom repos.cfg elif [[ ${{matrix.SCRIPT_TEST}} == 'listrepos_custom' ]]; then @@ -57,7 +57,7 @@ jobs: echo "[EESSI/20HT.TP]" >> cfg/repos.cfg echo "repo_version = 20HT.TP" >> cfg/repos.cfg ./eessi_container.sh --verbose --list-repos | tee ${outfile} - grep "EESSI-pilot" ${outfile} + grep "EESSI" ${outfile} export EESSI_REPOS_CFG_DIR_OVERRIDE=${PWD}/cfg ./eessi_container.sh --verbose --list-repos | tee ${outfile2} @@ -90,15 +90,15 @@ jobs: elif [[ ${{matrix.SCRIPT_TEST}} == 'readwrite' ]]; then outfile=out_readwrite.txt fn="test_${RANDOM}.txt" - echo "touch /cvmfs/pilot.eessi-hpc.org/${fn}" > test_script.sh + echo "touch /cvmfs/pilot.nessi.no/${fn}" > test_script.sh chmod u+x test_script.sh export SINGULARITY_BIND="$PWD:/test" ./eessi_container.sh --verbose --access rw --mode run /test/test_script.sh > ${outfile} tmpdir=$(grep "\-\-resume" ${outfile} | sed "s/.*--resume \([^']*\).*/\1/g") # note: must use '--access rw' again here, since touched file is in overlay upper dir - ./eessi_container.sh --verbose --resume ${tmpdir} --access rw --mode shell <<< "ls -l /cvmfs/pilot.eessi-hpc.org/${fn}" > ${outfile} - grep "/cvmfs/pilot.eessi-hpc.org/${fn}$" $outfile + ./eessi_container.sh --verbose --resume ${tmpdir} --access rw --mode shell <<< "ls -l /cvmfs/pilot.nessi.no/${fn}" > ${outfile} + grep "/cvmfs/pilot.nessi.no/${fn}$" $outfile # test use of --resume elif [[ ${{matrix.SCRIPT_TEST}} == 'resume' ]]; then @@ -120,12 +120,12 @@ jobs: elif [[ ${{matrix.SCRIPT_TEST}} == 'save' ]]; then outfile=out_save.txt fn="test_${RANDOM}.txt" - test_cmd="touch /cvmfs/pilot.eessi-hpc.org/${fn}" + test_cmd="touch /cvmfs/pilot.nessi.no/${fn}" ./eessi_container.sh --verbose --mode shell --access rw --save test-save.tar <<< "${test_cmd}" 2>&1 | tee ${outfile} rm -f ${outfile} - ./eessi_container.sh --verbose --mode shell --access rw --resume test-save.tar <<< "ls -l /cvmfs/pilot.eessi-hpc.org/${fn}" > ${outfile} - grep "/cvmfs/pilot.eessi-hpc.org/${fn}$" $outfile + ./eessi_container.sh --verbose --mode shell --access rw --resume test-save.tar <<< "ls -l /cvmfs/pilot.nessi.no/${fn}" > ${outfile} + grep "/cvmfs/pilot.nessi.no/${fn}$" $outfile tar tfv test-save.tar | grep "overlay-upper/${fn}" From 9909e1176c11fe110ea29561fb096cf48b9d002c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 21 Jan 2024 21:13:24 +0100 Subject: [PATCH 04/12] add CI for testing licenses --- .github/workflows/test_licenses.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 .github/workflows/test_licenses.yml diff --git a/.github/workflows/test_licenses.yml b/.github/workflows/test_licenses.yml new file mode 100644 index 0000000000..00a2c90f6b --- /dev/null +++ b/.github/workflows/test_licenses.yml @@ -0,0 +1,20 @@ +# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions +name: Test software licenses +on: [push, pull_request] +permissions: + contents: read # to fetch code (actions/checkout) +jobs: + build: + runs-on: ubuntu-20.04 + steps: + - name: Check out software-layer repository + uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + + - name: set up Python + uses: actions/setup-python@13ae5bb136fac2878aff31522b9efb785519f984 # v4.3.0 + with: + python-version: '3.9' + + - name: Check software licenses + run: | + python licenses/spdx.py licenses/licenses.json From 8523da373082399c93ab0fff7cf639bce20248c7 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 21 Jan 2024 21:23:32 +0100 Subject: [PATCH 05/12] remove pilot from CI --- .github/workflows/tests_scripts.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests_scripts.yml b/.github/workflows/tests_scripts.yml index 607e5c0744..a369f4f187 100644 --- a/.github/workflows/tests_scripts.yml +++ b/.github/workflows/tests_scripts.yml @@ -5,7 +5,7 @@ on: paths: - build_container.sh - create_directory_tarballs.sh - - EESSI-pilot-install-software.sh + - EESSI-install-software.sh - install_software_layer.sh - load_easybuild_module.sh - run_in_compat_layer_env.sh @@ -16,7 +16,7 @@ on: paths: - build_container.sh - create_directory_tarballs.sh - - EESSI-pilot-install-software.sh + - EESSI-install-software.sh - install_software_layer.sh - load_easybuild_module.sh - run_in_compat_layer_env.sh @@ -40,7 +40,7 @@ jobs: # bind current directory into container as /software-layer export SINGULARITY_BIND="${PWD}:/software-layer" - # can't test with EasyBuild versions older than v4.5.2 when using EESSI pilot 2023.06, + # can't test with EasyBuild versions older than v4.5.2 when using EESSI 2023.06, # since Python in compat layer is Python 3.11.x; # testing with a single EasyBuild version takes a while in GitHub Actions, so stick to a single sensible version for EB_VERSION in '4.6.0'; do From 0403274bd3d42b16517810ba8c62f4044bf60c53 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 21 Jan 2024 21:40:06 +0100 Subject: [PATCH 06/12] updated CI for testing archdetect --- .github/workflows/tests_archdetect.yml | 45 +++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tests_archdetect.yml b/.github/workflows/tests_archdetect.yml index 618f6eb142..6f5b15cce0 100644 --- a/.github/workflows/tests_archdetect.yml +++ b/.github/workflows/tests_archdetect.yml @@ -13,24 +13,59 @@ jobs: - x86_64/intel/skylake_avx512/archspec-linux-6132 - x86_64/amd/zen2/Azure-CentOS7-7V12 - x86_64/amd/zen3/Azure-CentOS7-7V73X - - ppc64le/power9le/unknown-power9le - - aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra - - aarch64/arm/neoverse-n1/AWS-awslinux-graviton2 - - aarch64/arm/neoverse-v1/AWS-awslinux-graviton3 + - aarch64/neoverse-n1/Azure-Ubuntu20-Altra + - aarch64/neoverse-n1/AWS-awslinux-graviton2 + - aarch64/neoverse-v1/AWS-awslinux-graviton3 + # commented out since these targets are currently not supported in pilot.nessi.no repo + # (and some tests assume that the corresponding subdirectory in software layer is there) + # - ppc64le/power9le/unknown-power9le fail-fast: false steps: - name: checkout uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + - name: Mount NESSI CernVM-FS repository + uses: cvmfs-contrib/github-action-cvmfs@d4641d0d591c9a5c3be23835ced2fb648b44c04b # v3.1 + with: + cvmfs_config_package: https://github.com/NorESSI/filesystem-layer/releases/download/latest/cvmfs-config-nessi_latest_all.deb + cvmfs_http_proxy: DIRECT + cvmfs_repositories: pilot.nessi.no + - name: test eessi_archdetect.sh run: | export EESSI_MACHINE_TYPE=${{matrix.proc_cpuinfo}} export EESSI_MACHINE_TYPE=${EESSI_MACHINE_TYPE%%/*} export EESSI_PROC_CPUINFO=./tests/archdetect/${{matrix.proc_cpuinfo}}.cpuinfo + # check that printing of best match works correctly CPU_ARCH=$(./init/eessi_archdetect.sh cpupath) if [[ $CPU_ARCH == "$( cat ./tests/archdetect/${{matrix.proc_cpuinfo}}.output )" ]]; then - echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCH" >&2 + echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCH" else echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCH" >&2 exit 1 fi + # check that $EESSI_SOFTWARE_SUBDIR_OVERRIDE is honored + export EESSI_SOFTWARE_SUBDIR_OVERRIDE='dummy/cpu' + CPU_ARCH=$(./init/eessi_archdetect.sh cpupath) + if [[ $CPU_ARCH == "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then + echo "Test for picking up on \$EESSI_SOFTWARE_SUBDIR_OVERRIDE PASSED" + else + echo "Test for picking up on \$EESSI_SOFTWARE_SUBDIR_OVERRIDE FAILED" >&2 + exit 1 + fi + unset EESSI_SOFTWARE_SUBDIR_OVERRIDE + # check that printing of all matches works correctly (-a option for cpupath action) + CPU_ARCHES=$(./init/eessi_archdetect.sh -a cpupath) + if [[ $CPU_ARCHES == "$( cat ./tests/archdetect/${{matrix.proc_cpuinfo}}.all.output )" ]]; then + echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCHES" + else + echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCHES" >&2 + exit 1 + fi + # Check all those architectures actually exist (if this EESSI version has been populated already) + if [ -d ${EESSI_PREFIX}/software/linux ]; then + for dir in $(echo "$CPU_ARCHES" | tr ':' '\n'); do + # Search all EESSI versions as we may drop support at some point + ls -d ${EESSI_PREFIX}/software/linux/${dir} + done + fi From b1bf69e20b7569409298be39eef450557aca9eec Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 21 Jan 2024 22:00:33 +0100 Subject: [PATCH 07/12] fix typo in archdetect --- init/eessi_archdetect.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/eessi_archdetect.sh b/init/eessi_archdetect.sh index 81846658c7..236ac5a367 100755 --- a/init/eessi_archdetect.sh +++ b/init/eessi_archdetect.sh @@ -70,7 +70,7 @@ check_allinfirst(){ cpupath(){ # If EESSI_SOFTWARE_SUBDIR_OVERRIDE is set, use it log "DEBUG" "cpupath: Override variable set as '$EESSI_SOFTWARE_SUBDIR_OVERRIDE' " - [ $EESI_SOFTWARE_SUBDIR_OVERRIDE ] && echo ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} && exit + [ $EESSI_SOFTWARE_SUBDIR_OVERRIDE ] && echo ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} && exit # Identify the best matching CPU architecture from a list of supported specifications for the host CPU # Return the path to the installation files in NESSI of the best matching architecture From eae822fcefac1b19d8127501e97fa3fd6e12bf83 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 21 Jan 2024 22:07:16 +0100 Subject: [PATCH 08/12] comment out non supported architectures in NESSI --- .github/workflows/tests_archdetect.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests_archdetect.yml b/.github/workflows/tests_archdetect.yml index 6f5b15cce0..74dbf032f0 100644 --- a/.github/workflows/tests_archdetect.yml +++ b/.github/workflows/tests_archdetect.yml @@ -13,12 +13,12 @@ jobs: - x86_64/intel/skylake_avx512/archspec-linux-6132 - x86_64/amd/zen2/Azure-CentOS7-7V12 - x86_64/amd/zen3/Azure-CentOS7-7V73X - - aarch64/neoverse-n1/Azure-Ubuntu20-Altra - - aarch64/neoverse-n1/AWS-awslinux-graviton2 - - aarch64/neoverse-v1/AWS-awslinux-graviton3 # commented out since these targets are currently not supported in pilot.nessi.no repo # (and some tests assume that the corresponding subdirectory in software layer is there) # - ppc64le/power9le/unknown-power9le + # - aarch64/neoverse-n1/Azure-Ubuntu20-Altra + # - aarch64/neoverse-n1/AWS-awslinux-graviton2 + # - aarch64/neoverse-v1/AWS-awslinux-graviton3 fail-fast: false steps: - name: checkout From a154d210e4702ca910c28d13d9b569f8d63deb73 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 21 Jan 2024 22:29:24 +0100 Subject: [PATCH 09/12] renamed CI file and ported changes from EESSI --- ...test_eessi.yml => test-pilot.nessi.no.yml} | 58 +++++++------------ 1 file changed, 22 insertions(+), 36 deletions(-) rename .github/workflows/{test_eessi.yml => test-pilot.nessi.no.yml} (62%) diff --git a/.github/workflows/test_eessi.yml b/.github/workflows/test-pilot.nessi.no.yml similarity index 62% rename from .github/workflows/test_eessi.yml rename to .github/workflows/test-pilot.nessi.no.yml index fee95f6b79..6342d5df19 100644 --- a/.github/workflows/test_eessi.yml +++ b/.github/workflows/test-pilot.nessi.no.yml @@ -1,80 +1,66 @@ # documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions -name: Tests relying on having EESSI pilot repo mounted +name: Check for missing software installations in pilot.nessi.no on: [push, pull_request, workflow_dispatch] permissions: contents: read # to fetch code (actions/checkout) jobs: - pilot: - runs-on: ubuntu-20.04 + check_missing: + runs-on: ubuntu-22.04 strategy: fail-fast: false matrix: EESSI_VERSION: - 2023.06 - EESSI_SOFTWARE_SUBDIR: + EESSI_SOFTWARE_SUBDIR_OVERRIDE: # - aarch64/generic - x86_64/amd/zen2 - x86_64/intel/broadwell -# - x86_64/intel/cascadelake - x86_64/intel/skylake_avx512 - x86_64/generic - EASYSTACK_FILE: - - eessi-2023.06-eb-4.7.2-2021a.yml - - eessi-2023.06-eb-4.7.2-2021b.yml - - eessi-2023.06-eb-4.7.2-2022a.yml - - eessi-2023.06-eb-4.7.2-2022b.yml - - eessi-2023.06-eb-4.7.2-system.yml - - eessi-2023.06-eb-4.8.0-system.yml - - eessi-2023.06-eb-4.8.1-2022a.yml - - eessi-2023.06-eb-4.8.1-system.yml - - eessi-2023.06-eb-4.8.2-2022a.yml steps: - name: Check out software-layer repository uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 - - name: Mount EESSI CernVM-FS pilot repository + - name: Mount NESSI CernVM-FS repository uses: cvmfs-contrib/github-action-cvmfs@d4641d0d591c9a5c3be23835ced2fb648b44c04b # v3.1 with: cvmfs_config_package: https://github.com/NorESSI/filesystem-layer/releases/download/latest/cvmfs-config-nessi_latest_all.deb cvmfs_http_proxy: DIRECT cvmfs_repositories: pilot.nessi.no - - name: Test check_missing_installations.sh script with EESSI_SOFTWARE_SUBDIR_OVERRIDE - if: '!cancelled()' - run: | - export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR}} - source /cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}/init/bash - module load EasyBuild - eb --version - export EESSI_PREFIX=/cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}} - export EESSI_OS_TYPE=linux - env | grep ^EESSI | sort - echo "just run check_missing_installations.sh (should use ${{matrix.EASYSTACK_FILE}})" - ./check_missing_installations.sh ${{matrix.EASYSTACK_FILE}} - - - name: Test check_missing_installations.sh script without EESSI_SOFTWARE_SUBDIR_OVERRIDE - if: '!cancelled()' + - name: Test check_missing_installations.sh script run: | + export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} source /cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}/init/bash + # set $EESSI_CPU_FAMILY to the CPU architecture that corresponds to $EESSI_SOFTWARE_SUBDIR_OVERRIDE (part before the first slash), + # to prevent issues with checks in the Easybuild configuration that use this variable + export EESSI_CPU_FAMILY=${EESSI_SOFTWARE_SUBDIR_OVERRIDE%%/*} module load EasyBuild + which eb eb --version export EESSI_PREFIX=/cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}} export EESSI_OS_TYPE=linux - export EESSI_SOFTWARE_SUBDIR=${{matrix.EESSI_SOFTWARE_SUBDIR}} env | grep ^EESSI | sort - echo "just run check_missing_installations.sh (should use ${{matrix.EASYSTACK_FILE}})" - ./check_missing_installations.sh ${{matrix.EASYSTACK_FILE}} + echo "just run check_missing_installations.sh (should use easystacks/pilot.nessi.no/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml)" + for easystack_file in $(ls easystacks/pilot.nessi.no/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do + echo "check missing installations for ${easystack_file}..." + ./check_missing_installations.sh ${easystack_file} + ec=$? + if [[ ${ec} -ne 0 ]]; then echo "missing installations found for ${easystack_file}!" >&2; exit ${ec}; fi + done - name: Test check_missing_installations.sh with missing package (GCC/8.3.0) - if: '!cancelled()' run: | + export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} source /cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}/init/bash + # set $EESSI_CPU_FAMILY to the CPU architecture that corresponds to $EESSI_SOFTWARE_SUBDIR_OVERRIDE (part before the first slash), + # to prevent issues with checks in the Easybuild configuration that use this variable + export EESSI_CPU_FAMILY=${EESSI_SOFTWARE_SUBDIR_OVERRIDE%%/*} module load EasyBuild which eb eb --version export EESSI_PREFIX=/cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}} export EESSI_OS_TYPE=linux - export EESSI_SOFTWARE_SUBDIR=${{matrix.EESSI_SOFTWARE_SUBDIR}} env | grep ^EESSI | sort # create dummy easystack file with a single entry (something that is not installed in EESSI) easystack_file="test.yml" From 50aff09f09e1e2de7de6d4ecac53ff26c4539631 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Mon, 22 Jan 2024 13:33:58 +0000 Subject: [PATCH 10/12] Took care of all noted changes --- create_tarball.sh | 2 +- eb_hooks.py | 43 +++++++++++++++++++++++--------- eessi-2023.06-known-issues.yml | 28 +++++++++++++++++++++ eessi_container.sh | 8 +++--- init/bash | 2 +- init/eessi_archdetect.sh | 2 +- init/eessi_environment_variables | 10 ++++---- 7 files changed, 71 insertions(+), 24 deletions(-) create mode 100644 eessi-2023.06-known-issues.yml diff --git a/create_tarball.sh b/create_tarball.sh index f6239cf28e..a619df9439 100755 --- a/create_tarball.sh +++ b/create_tarball.sh @@ -43,7 +43,7 @@ module_files_list=${tmpdir}/module_files.list.txt if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod ]; then # include Lmod cache and configuration file (lmodrc.lua), # skip whiteout files and backup copies of Lmod cache (spiderT.old.*) - find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' > ${files_list} + find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' >> ${files_list} fi # include scripts that were copied by install_scripts.sh, which we want to ship in EESSI repository diff --git a/eb_hooks.py b/eb_hooks.py index b3cbc4b28e..1a4c07fca0 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -185,20 +185,26 @@ def parse_hook_fontconfig_add_fonts(ec, eprefix): def parse_hook_openblas_relax_lapack_tests_num_errors(ec, eprefix): - """Relax number of failing numerical LAPACK tests for aarch64/neoverse_v1 CPU target.""" + """Relax number of failing numerical LAPACK tests for aarch64/neoverse_v1 CPU target for OpenBLAS < 0.3.23""" cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') if ec.name == 'OpenBLAS': - # relax maximum number of failed numerical LAPACK tests for aarch64/neoverse_v1 CPU target - # since the default setting of 150 that works well on other aarch64 targets and x86_64 is a bit too strict - # See https://github.com/EESSI/software-layer/issues/314 - cfg_option = 'max_failing_lapack_tests_num_errors' - if cpu_target == CPU_TARGET_NEOVERSE_V1: - orig_value = ec[cfg_option] - ec[cfg_option] = 400 - print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)", - ec.name, ec[cfg_option], orig_value) - else: - print_msg("Not changing option %s for %s on non-AARCH64", cfg_option, ec.name) + if LooseVersion(ec.version) < LooseVersion('0.3.23'): + # relax maximum number of failed numerical LAPACK tests for aarch64/neoverse_v1 CPU target + # since the default setting of 150 that works well on other aarch64 targets and x86_64 is a bit too strict + # See https://github.com/EESSI/software-layer/issues/314 + cfg_option = 'max_failing_lapack_tests_num_errors' + if cpu_target == CPU_TARGET_NEOVERSE_V1: + orig_value = ec[cfg_option] + ec[cfg_option] = 400 + print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)", + ec.name, ec[cfg_option], orig_value) + elif cpu_target == CPU_TARGET_AARCH64_GENERIC: + orig_value = ec[cfg_option] + ec[cfg_option] = 302 + print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)", + ec.name, ec[cfg_option], orig_value) ec.name, ec[cfg_option], orig_value) + else: + print_msg("Not changing option %s for %s on non-AARCH64", cfg_option, ec.name) else: raise EasyBuildError("OpenBLAS-specific hook triggered for non-OpenBLAS easyconfig?!") @@ -393,6 +399,18 @@ def pre_test_hook_ignore_failing_tests_SciPybundle(self, *args, **kwargs): if self.name == 'SciPy-bundle' and self.version in scipy_bundle_versions and cpu_target == CPU_TARGET_NEOVERSE_V1: self.cfg['testopts'] = "|| echo ignoring failing tests" +def pre_test_hook_ignore_failing_tests_netCDF(self, *args, **kwargs): + """ + Pre-test hook for netCDF: skip failing tests for selected netCDF versions on neoverse_v1 + cfr. https://github.com/EESSI/software-layer/issues/425 + The following tests are problematic: + 163 - nc_test4_run_par_test (Timeout) + 190 - h5_test_run_par_tests (Timeout) + A few other tests are skipped in the easyconfig and patches for similar issues, see above issue for details. + """ + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + if self.name == 'netCDF' and self.version == '4.9.2' and cpu_target == CPU_TARGET_NEOVERSE_V1: + self.cfg['testopts'] = "|| echo ignoring failing tests" def pre_single_extension_hook(ext, *args, **kwargs): """Main pre-extension: trigger custom functions based on software name.""" @@ -573,6 +591,7 @@ def inject_gpu_property(ec): 'ESPResSo': pre_test_hook_ignore_failing_tests_ESPResSo, 'FFTW.MPI': pre_test_hook_ignore_failing_tests_FFTWMPI, 'SciPy-bundle': pre_test_hook_ignore_failing_tests_SciPybundle, + 'netCDF': pre_test_hook_ignore_failing_tests_netCDF, } PRE_SINGLE_EXTENSION_HOOKS = { diff --git a/eessi-2023.06-known-issues.yml b/eessi-2023.06-known-issues.yml new file mode 100644 index 0000000000..475ee2c1d7 --- /dev/null +++ b/eessi-2023.06-known-issues.yml @@ -0,0 +1,28 @@ +- aarch64/neoverse_v1: + - ESPResSo-4.2.1-foss-2023a: + - issue: https://github.com/EESSI/software-layer/issues/363 + - info: "ESPResSo tests failing due to timeouts" + - FFTW.MPI-3.3.10-gompi-2023a: + - issue: https://github.com/EESSI/software-layer/issues/325 + - info: "Flaky FFTW tests, random failures" + - FFTW.MPI-3.3.10-gompi-2023b: + - issue: https://github.com/EESSI/software-layer/issues/325 + - info: "Flaky FFTW tests, random failures" + - netCDF-4.9.2-gompi-2023a.eb: + - issue: https://github.com/EESSI/software-layer/issues/425 + - info: "netCDF intermittent test failures" + - netCDF-4.9.2-gompi-2023b.eb: + - issue: https://github.com/EESSI/software-layer/issues/425 + - info: "netCDF intermittent test failures" + - OpenBLAS-0.3.21-GCC-12.2.0: + - issue: https://github.com/EESSI/software-layer/issues/314 + - info: "Increased number of numerical errors in OpenBLAS test suite (344 vs max. 150 on x86_64/*)" + - SciPy-bundle-2023.02-gfbf-2022b: + - issue: https://github.com/EESSI/software-layer/issues/318 + - info: "numpy built with -march=armv8.4-a instead of -mcpu=native (no SVE) + 2 failing tests (vs 50005 passed) in scipy test suite" + - SciPy-bundle-2023.07-gfbf-2023a: + - issue: https://github.com/EESSI/software-layer/issues/318 + - info: "2 failing tests (vs 54409 passed) in scipy test suite" + - SciPy-bundle-2023.11-gfbf-2023b: + - issue: https://github.com/EESSI/software-layer/issues/318 + - info: "2 failing tests (vs 54876 passed) in scipy test suite" diff --git a/eessi_container.sh b/eessi_container.sh index bfc5963426..e575f4a0e4 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -30,8 +30,8 @@ # -. initial settings & exit codes TOPDIR=$(dirname $(realpath $0)) -source ${TOPDIR}/scripts/utils.sh -source ${TOPDIR}/scripts/cfg_files.sh +source "${TOPDIR}"/scripts/utils.sh +source "${TOPDIR}"/scripts/cfg_files.sh # exit codes: bitwise shift codes to allow for combination of exit codes # ANY_ERROR_EXITCODE is sourced from ${TOPDIR}/scripts/utils.sh @@ -83,7 +83,7 @@ display_help() { echo " MODE==run (run a script or command) [default: shell]" echo " -n | --nvidia MODE - configure the container to work with NVIDIA GPUs," echo " MODE==install for a CUDA installation, MODE==run to" - echo " attach a GPU, MODE==all for both [default: false]" + echo " attach a GPU, MODE==all for both [default: false]" echo " -r | --repository CFG - configuration file or identifier defining the" echo " repository to use [default: EESSI via" echo " default container, see --container]" @@ -575,7 +575,7 @@ fi declare -a EESSI_FUSE_MOUNTS=() # always mount cvmfs-config repo (to get access to software.eessi.io) -# EESSI_FUSE_MOUNTS+=("--fusemount" "container:cvmfs2 cvmfs-config.cern.ch /cvmfs/cvmfs-config.cern.ch") +# Commented out intentionally EESSI_FUSE_MOUNTS+=("--fusemount" "container:cvmfs2 cvmfs-config.cern.ch /cvmfs/cvmfs-config.cern.ch") if [[ "${ACCESS}" == "ro" ]]; then export EESSI_READONLY="container:cvmfs2 ${repo_name} /cvmfs/${repo_name}" diff --git a/init/bash b/init/bash index 26598bb9dd..e985c1452e 100644 --- a/init/bash +++ b/init/bash @@ -23,7 +23,7 @@ if [ $? -eq 0 ]; then source $EESSI_EPREFIX/usr/share/Lmod/init/bash # prepend location of modules for EESSI software stack to $MODULEPATH - echo "Prepending $EESSI_MODULEPATH to \$MODULEPATH..." >> $output + show_msg "Prepending $EESSI_MODULEPATH to \$MODULEPATH..." >> $output module use $EESSI_MODULEPATH #show_msg "" diff --git a/init/eessi_archdetect.sh b/init/eessi_archdetect.sh index 236ac5a367..4dd2436cc1 100755 --- a/init/eessi_archdetect.sh +++ b/init/eessi_archdetect.sh @@ -118,7 +118,7 @@ cpupath(){ # each flag in this CPU specification must be found in the list of flags of the host check_allinfirst "${cpu_flags[*]}" ${arch_spec[2]} && best_arch_match=${arch_spec[0]} && \ all_arch_matches="$best_arch_match:$all_arch_matches" && \ - log "DEBUG" "cpupath: host CPU best match updated to $best_arch_match" + log "DEBUG" "cpupath: host CPU best match updated to $best_arch_match" fi done diff --git a/init/eessi_environment_variables b/init/eessi_environment_variables index d4a2e72b36..8d149c4042 100644 --- a/init/eessi_environment_variables +++ b/init/eessi_environment_variables @@ -2,6 +2,11 @@ # $BASH_SOURCE points to correct path, see also http://mywiki.wooledge.org/BashFAQ/028 EESSI_INIT_DIR_PATH=$(dirname $(realpath $BASH_SOURCE)) +function error() { + echo -e "\e[31mERROR: $1\e[0m" >&2 + false +} + function show_msg { # only echo msg if EESSI_SILENT is unset msg=$1 @@ -10,11 +15,6 @@ function show_msg { fi } -function error() { - echo -e "\e[31mERROR: $1\e[0m" >&2 - false -} - # set up minimal environment: $EESSI_PREFIX, $EESSI_VERSION, $EESSI_OS_TYPE, $EESSI_CPU_FAMILY, $EPREFIX source $EESSI_INIT_DIR_PATH/minimal_eessi_env From 538fe73920c6d554de3f39a5f5bbdc42ba39fd71 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Mon, 22 Jan 2024 13:43:03 +0000 Subject: [PATCH 11/12] removed additional space --- eessi_container.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi_container.sh b/eessi_container.sh index e575f4a0e4..6e68524edb 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -164,7 +164,7 @@ while [[ $# -gt 0 ]]; do SETUP_NVIDIA=1 NVIDIA_MODE="$2" shift 2 - ;; + ;; -r|--repository) REPOSITORY="$2" shift 2 From 27f47ef5789ebbd0a7d40867d5cb33f480fab489 Mon Sep 17 00:00:00 2001 From: TopRichard <121792457+TopRichard@users.noreply.github.com> Date: Mon, 22 Jan 2024 14:44:50 +0100 Subject: [PATCH 12/12] Update eb_hooks.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Thomas Röblitz --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 1a4c07fca0..7e899b502e 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -185,7 +185,7 @@ def parse_hook_fontconfig_add_fonts(ec, eprefix): def parse_hook_openblas_relax_lapack_tests_num_errors(ec, eprefix): - """Relax number of failing numerical LAPACK tests for aarch64/neoverse_v1 CPU target for OpenBLAS < 0.3.23""" + """Relax number of failing numerical LAPACK tests for aarch64/* CPU targets for OpenBLAS < 0.3.23""" cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') if ec.name == 'OpenBLAS': if LooseVersion(ec.version) < LooseVersion('0.3.23'):