From 3fc17467b9665724eeae6651c860fd77d28283d0 Mon Sep 17 00:00:00 2001 From: ocaisa Date: Fri, 16 Jun 2023 14:32:20 +0200 Subject: [PATCH 01/59] Allow `archdetect` to print all possible cpu paths Allow fix incorrect generic path --- init/eessi_archdetect.sh | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/init/eessi_archdetect.sh b/init/eessi_archdetect.sh index d2b6dacf04..97b45bf936 100755 --- a/init/eessi_archdetect.sh +++ b/init/eessi_archdetect.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -VERSION="1.0.0" +VERSION="1.1.0" # Logging LOG_LEVEL="INFO" @@ -105,7 +105,8 @@ cpupath(){ log "DEBUG" "cpupath: CPU flags of host system: '$cpu_flags'" # Default to generic CPU - local best_arch_match="generic" + local best_arch_match="$machine_type/generic" + local all_arch_matches=$best_arch_match # Iterate over the supported CPU specifications to find the best match for host CPU # Order of the specifications matters, the last one to match will be selected @@ -114,22 +115,28 @@ cpupath(){ if [ "${cpu_vendor}x" == "${arch_spec[1]}x" ]; then # each flag in this CPU specification must be found in the list of flags of the host check_allinfirst "${cpu_flags[*]}" ${arch_spec[2]} && best_arch_match=${arch_spec[0]} && \ - log "DEBUG" "cpupath: host CPU best match updated to $best_arch_match" + all_arch_matches="$best_arch_match:$all_arch_matches" && \ + log "DEBUG" "cpupath: host CPU best match updated to $best_arch_match" fi done log "INFO" "cpupath: best match for host CPU: $best_arch_match" - echo "$best_arch_match" + if [ "allx" == "${1}x" ]; then + echo "$all_arch_matches" + else + echo "$best_arch_match" + fi } # Parse command line arguments -USAGE="Usage: eessi_archdetect.sh [-h][-d] " +USAGE="Usage: eessi_archdetect.sh [-h][-d][-a] " -while getopts 'hdv' OPTION; do +while getopts 'hdva' OPTION; do case "$OPTION" in h) echo "$USAGE"; exit 0;; d) LOG_LEVEL="DEBUG";; v) echo "eessi_archdetect.sh v$VERSION"; exit 0;; + a) all="all";; ?) echo "$USAGE"; exit 1;; esac done @@ -138,6 +145,6 @@ shift "$(($OPTIND -1))" ARGUMENT=${1:-none} case "$ARGUMENT" in - "cpupath") cpupath; exit;; - *) echo "$USAGE"; log "ERROR" "Missing argument";; + "cpupath") cpupath $all; exit;; + *) echo "$USAGE"; log "ERROR" "Missing argument (possible actions: 'cpupath')";; esac From 551d0082bcb15fd0cb90cf958eb4280b03dc2a58 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Fri, 16 Jun 2023 15:44:08 +0200 Subject: [PATCH 02/59] Add tests --- .github/workflows/tests_archdetect.yml | 7 +++++++ .../arm/neoverse-n1/AWS-awslinux-graviton2.all.output | 1 + .../arm/neoverse-n1/Azure-Ubuntu20-Altra.all.output | 1 + .../arm/neoverse-v1/AWS-awslinux-graviton3.all.output | 1 + .../ppc64le/power9le/unknown-power9le.all.output | 1 + .../x86_64/amd/zen2/Azure-CentOS7-7V12.all.output | 1 + .../x86_64/amd/zen3/Azure-CentOS7-7V73X.all.output | 1 + .../intel/haswell/archspec-linux-E5-2680-v3.all.output | 1 + .../intel/skylake_avx512/archspec-linux-6132.all.output | 1 + 9 files changed, 15 insertions(+) create mode 100644 tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.all.output create mode 100644 tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.all.output create mode 100644 tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.all.output create mode 100644 tests/archdetect/ppc64le/power9le/unknown-power9le.all.output create mode 100644 tests/archdetect/x86_64/amd/zen2/Azure-CentOS7-7V12.all.output create mode 100644 tests/archdetect/x86_64/amd/zen3/Azure-CentOS7-7V73X.all.output create mode 100644 tests/archdetect/x86_64/intel/haswell/archspec-linux-E5-2680-v3.all.output create mode 100644 tests/archdetect/x86_64/intel/skylake_avx512/archspec-linux-6132.all.output diff --git a/.github/workflows/tests_archdetect.yml b/.github/workflows/tests_archdetect.yml index 618f6eb142..cabbaedb6d 100644 --- a/.github/workflows/tests_archdetect.yml +++ b/.github/workflows/tests_archdetect.yml @@ -34,3 +34,10 @@ jobs: echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCH" >&2 exit 1 fi + CPU_ARCHES=$(./init/eessi_archdetect.sh -a cpupath) + if [[ $CPU_ARCHES == "$( cat ./tests/archdetect/${{matrix.proc_cpuinfo}}.all.output )" ]]; then + echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCHES" >&2 + else + echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCHES" >&2 + exit 1 + fi diff --git a/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.all.output b/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.all.output new file mode 100644 index 0000000000..4e4f20c1de --- /dev/null +++ b/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.all.output @@ -0,0 +1 @@ +aarch64/arm/neoverse-n1:aarch64/generic \ No newline at end of file diff --git a/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.all.output b/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.all.output new file mode 100644 index 0000000000..4e4f20c1de --- /dev/null +++ b/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.all.output @@ -0,0 +1 @@ +aarch64/arm/neoverse-n1:aarch64/generic \ No newline at end of file diff --git a/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.all.output b/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.all.output new file mode 100644 index 0000000000..61f4d892f0 --- /dev/null +++ b/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.all.output @@ -0,0 +1 @@ +aarch64/arm/neoverse-v1:aarch64/arm/neoverse-n1:aarch64/generic \ No newline at end of file diff --git a/tests/archdetect/ppc64le/power9le/unknown-power9le.all.output b/tests/archdetect/ppc64le/power9le/unknown-power9le.all.output new file mode 100644 index 0000000000..7ecf79d0a7 --- /dev/null +++ b/tests/archdetect/ppc64le/power9le/unknown-power9le.all.output @@ -0,0 +1 @@ +ppc64le/power9le:ppc64le/generic \ No newline at end of file diff --git a/tests/archdetect/x86_64/amd/zen2/Azure-CentOS7-7V12.all.output b/tests/archdetect/x86_64/amd/zen2/Azure-CentOS7-7V12.all.output new file mode 100644 index 0000000000..180de26f0e --- /dev/null +++ b/tests/archdetect/x86_64/amd/zen2/Azure-CentOS7-7V12.all.output @@ -0,0 +1 @@ +x86_64/amd/zen2:x86_64/generic \ No newline at end of file diff --git a/tests/archdetect/x86_64/amd/zen3/Azure-CentOS7-7V73X.all.output b/tests/archdetect/x86_64/amd/zen3/Azure-CentOS7-7V73X.all.output new file mode 100644 index 0000000000..798a0aa565 --- /dev/null +++ b/tests/archdetect/x86_64/amd/zen3/Azure-CentOS7-7V73X.all.output @@ -0,0 +1 @@ +x86_64/amd/zen3:x86_64/amd/zen2:x86_64/generic \ No newline at end of file diff --git a/tests/archdetect/x86_64/intel/haswell/archspec-linux-E5-2680-v3.all.output b/tests/archdetect/x86_64/intel/haswell/archspec-linux-E5-2680-v3.all.output new file mode 100644 index 0000000000..a047dd42cc --- /dev/null +++ b/tests/archdetect/x86_64/intel/haswell/archspec-linux-E5-2680-v3.all.output @@ -0,0 +1 @@ +x86_64/intel/haswell:x86_64/generic \ No newline at end of file diff --git a/tests/archdetect/x86_64/intel/skylake_avx512/archspec-linux-6132.all.output b/tests/archdetect/x86_64/intel/skylake_avx512/archspec-linux-6132.all.output new file mode 100644 index 0000000000..c9fa524ea6 --- /dev/null +++ b/tests/archdetect/x86_64/intel/skylake_avx512/archspec-linux-6132.all.output @@ -0,0 +1 @@ +x86_64/intel/skylake_avx512:x86_64/intel/haswell:x86_64/generic \ No newline at end of file From 7db0616a0c78f40864507d77085817af6eb7028a Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Fri, 1 Sep 2023 11:58:10 +0200 Subject: [PATCH 03/59] Remove need to pass argument to function --- init/eessi_archdetect.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/init/eessi_archdetect.sh b/init/eessi_archdetect.sh index 97b45bf936..58dd99eb6b 100755 --- a/init/eessi_archdetect.sh +++ b/init/eessi_archdetect.sh @@ -3,6 +3,8 @@ VERSION="1.1.0" # Logging LOG_LEVEL="INFO" +# Default result type is a best match +CPUPATH_RESULT="best" timestamp () { date "+%Y-%m-%d %H:%M:%S" @@ -120,10 +122,11 @@ cpupath(){ fi done - log "INFO" "cpupath: best match for host CPU: $best_arch_match" - if [ "allx" == "${1}x" ]; then + if [ "allx" == "${CPUPATH_RESULT}x" ]; then + log "INFO" "cpupath: all matches for host CPU: $all_arch_matches" echo "$all_arch_matches" else + log "INFO" "cpupath: best match for host CPU: $best_arch_match" echo "$best_arch_match" fi } @@ -136,7 +139,7 @@ while getopts 'hdva' OPTION; do h) echo "$USAGE"; exit 0;; d) LOG_LEVEL="DEBUG";; v) echo "eessi_archdetect.sh v$VERSION"; exit 0;; - a) all="all";; + a) CPUPATH_RESULT="all";; ?) echo "$USAGE"; exit 1;; esac done @@ -145,6 +148,6 @@ shift "$(($OPTIND -1))" ARGUMENT=${1:-none} case "$ARGUMENT" in - "cpupath") cpupath $all; exit;; + "cpupath") cpupath; exit;; *) echo "$USAGE"; log "ERROR" "Missing argument (possible actions: 'cpupath')";; esac From b81b729f9350d10dee15a3d561417c688258e99d Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Sun, 3 Sep 2023 16:40:32 +0200 Subject: [PATCH 04/59] Make sure all the directories actually exist --- .github/workflows/tests_archdetect.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/tests_archdetect.yml b/.github/workflows/tests_archdetect.yml index cabbaedb6d..676a7a8c4d 100644 --- a/.github/workflows/tests_archdetect.yml +++ b/.github/workflows/tests_archdetect.yml @@ -23,6 +23,9 @@ jobs: uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 - name: test eessi_archdetect.sh + uses: eessi/github-action-eessi@main + with: + eessi_stack_version: '2023.06' run: | export EESSI_MACHINE_TYPE=${{matrix.proc_cpuinfo}} export EESSI_MACHINE_TYPE=${EESSI_MACHINE_TYPE%%/*} @@ -41,3 +44,7 @@ jobs: echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCHES" >&2 exit 1 fi + # Check all those architectures exist + for dir in $(echo "$CPU_ARCHES" | tr ':' '\n'); do + [ -d "$EESSI_PREFIX/software/linux/$dir" ] + done From 739a92da2dfabe70da52ae433c5152337effa494 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Sun, 3 Sep 2023 16:44:16 +0200 Subject: [PATCH 05/59] Fix steps in action --- .github/workflows/tests_archdetect.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests_archdetect.yml b/.github/workflows/tests_archdetect.yml index 676a7a8c4d..92907607c4 100644 --- a/.github/workflows/tests_archdetect.yml +++ b/.github/workflows/tests_archdetect.yml @@ -21,11 +21,10 @@ jobs: steps: - name: checkout uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 - - - name: test eessi_archdetect.sh - uses: eessi/github-action-eessi@main + - uses: eessi/github-action-eessi@main with: eessi_stack_version: '2023.06' + - name: test eessi_archdetect.sh run: | export EESSI_MACHINE_TYPE=${{matrix.proc_cpuinfo}} export EESSI_MACHINE_TYPE=${EESSI_MACHINE_TYPE%%/*} @@ -44,7 +43,7 @@ jobs: echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCHES" >&2 exit 1 fi - # Check all those architectures exist + # Check all those architectures actually exist for dir in $(echo "$CPU_ARCHES" | tr ':' '\n'); do [ -d "$EESSI_PREFIX/software/linux/$dir" ] done From 45dc7dadd07ec5bdc4bb8f29b43d3eb75f5f9c34 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Sun, 3 Sep 2023 16:57:17 +0200 Subject: [PATCH 06/59] Fix ARM detection for archdetect --- .github/workflows/tests_archdetect.yml | 6 +++--- init/arch_specs/eessi_arch_arm.spec | 8 ++++---- .../arm/neoverse-n1/AWS-awslinux-graviton2.all.output | 1 - .../aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.output | 1 - .../arm/neoverse-n1/Azure-Ubuntu20-Altra.all.output | 1 - .../aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.output | 1 - .../arm/neoverse-v1/AWS-awslinux-graviton3.all.output | 1 - .../aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.output | 1 - .../arm/neoverse_n1/AWS-awslinux-graviton2.all.output | 1 + .../AWS-awslinux-graviton2.cpuinfo | 0 .../aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.output | 1 + .../arm/neoverse_n1/Azure-Ubuntu20-Altra.all.output | 1 + .../Azure-Ubuntu20-Altra.cpuinfo | 0 .../aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.output | 1 + .../arm/neoverse_v1/AWS-awslinux-graviton3.all.output | 1 + .../AWS-awslinux-graviton3.cpuinfo | 0 .../aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.output | 1 + 17 files changed, 13 insertions(+), 13 deletions(-) delete mode 100644 tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.all.output delete mode 100644 tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.output delete mode 100644 tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.all.output delete mode 100644 tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.output delete mode 100644 tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.all.output delete mode 100644 tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.output create mode 100644 tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.all.output rename tests/archdetect/aarch64/arm/{neoverse-n1 => neoverse_n1}/AWS-awslinux-graviton2.cpuinfo (100%) create mode 100644 tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.output create mode 100644 tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.all.output rename tests/archdetect/aarch64/arm/{neoverse-n1 => neoverse_n1}/Azure-Ubuntu20-Altra.cpuinfo (100%) create mode 100644 tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.output create mode 100644 tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.all.output rename tests/archdetect/aarch64/arm/{neoverse-v1 => neoverse_v1}/AWS-awslinux-graviton3.cpuinfo (100%) create mode 100644 tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.output diff --git a/.github/workflows/tests_archdetect.yml b/.github/workflows/tests_archdetect.yml index 92907607c4..76cc7315c7 100644 --- a/.github/workflows/tests_archdetect.yml +++ b/.github/workflows/tests_archdetect.yml @@ -14,9 +14,9 @@ jobs: - x86_64/amd/zen2/Azure-CentOS7-7V12 - x86_64/amd/zen3/Azure-CentOS7-7V73X - ppc64le/power9le/unknown-power9le - - aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra - - aarch64/arm/neoverse-n1/AWS-awslinux-graviton2 - - aarch64/arm/neoverse-v1/AWS-awslinux-graviton3 + - aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra + - aarch64/arm/neoverse_n1/AWS-awslinux-graviton2 + - aarch64/arm/neoverse_v1/AWS-awslinux-graviton3 fail-fast: false steps: - name: checkout diff --git a/init/arch_specs/eessi_arch_arm.spec b/init/arch_specs/eessi_arch_arm.spec index 92f32a76d8..97d9cd2edc 100755 --- a/init/arch_specs/eessi_arch_arm.spec +++ b/init/arch_specs/eessi_arch_arm.spec @@ -1,6 +1,6 @@ # ARM CPU architecture specifications # Software path in EESSI | Vendor ID | List of defining CPU features -"aarch64/arm/neoverse-n1" "ARM" "asimd" # Ampere Altra -"aarch64/arm/neoverse-n1" "" "asimd" # AWS Graviton2 -"aarch64/arm/neoverse-v1" "ARM" "asimd svei8mm" -"aarch64/arm/neoverse-v1" "" "asimd svei8mm" # AWS Graviton3 +"aarch64/arm/neoverse_n1" "ARM" "asimd" # Ampere Altra +"aarch64/arm/neoverse_n1" "" "asimd" # AWS Graviton2 +"aarch64/arm/neoverse_v1" "ARM" "asimd svei8mm" +"aarch64/arm/neoverse_v1" "" "asimd svei8mm" # AWS Graviton3 diff --git a/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.all.output b/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.all.output deleted file mode 100644 index 4e4f20c1de..0000000000 --- a/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.all.output +++ /dev/null @@ -1 +0,0 @@ -aarch64/arm/neoverse-n1:aarch64/generic \ No newline at end of file diff --git a/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.output b/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.output deleted file mode 100644 index b4dc5e9f1b..0000000000 --- a/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.output +++ /dev/null @@ -1 +0,0 @@ -aarch64/arm/neoverse-n1 diff --git a/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.all.output b/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.all.output deleted file mode 100644 index 4e4f20c1de..0000000000 --- a/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.all.output +++ /dev/null @@ -1 +0,0 @@ -aarch64/arm/neoverse-n1:aarch64/generic \ No newline at end of file diff --git a/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.output b/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.output deleted file mode 100644 index b4dc5e9f1b..0000000000 --- a/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.output +++ /dev/null @@ -1 +0,0 @@ -aarch64/arm/neoverse-n1 diff --git a/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.all.output b/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.all.output deleted file mode 100644 index 61f4d892f0..0000000000 --- a/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.all.output +++ /dev/null @@ -1 +0,0 @@ -aarch64/arm/neoverse-v1:aarch64/arm/neoverse-n1:aarch64/generic \ No newline at end of file diff --git a/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.output b/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.output deleted file mode 100644 index 20db96d01f..0000000000 --- a/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.output +++ /dev/null @@ -1 +0,0 @@ -aarch64/arm/neoverse-v1 diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.all.output b/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.all.output new file mode 100644 index 0000000000..e6aee274ae --- /dev/null +++ b/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.all.output @@ -0,0 +1 @@ +aarch64/arm/neoverse_n1:aarch64/generic \ No newline at end of file diff --git a/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.cpuinfo b/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.cpuinfo similarity index 100% rename from tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.cpuinfo rename to tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.cpuinfo diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.output b/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.output new file mode 100644 index 0000000000..c841d54b50 --- /dev/null +++ b/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.output @@ -0,0 +1 @@ +aarch64/arm/neoverse_n1 diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.all.output b/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.all.output new file mode 100644 index 0000000000..e6aee274ae --- /dev/null +++ b/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.all.output @@ -0,0 +1 @@ +aarch64/arm/neoverse_n1:aarch64/generic \ No newline at end of file diff --git a/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.cpuinfo b/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.cpuinfo similarity index 100% rename from tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.cpuinfo rename to tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.cpuinfo diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.output b/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.output new file mode 100644 index 0000000000..c841d54b50 --- /dev/null +++ b/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.output @@ -0,0 +1 @@ +aarch64/arm/neoverse_n1 diff --git a/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.all.output b/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.all.output new file mode 100644 index 0000000000..bda6c2e498 --- /dev/null +++ b/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.all.output @@ -0,0 +1 @@ +aarch64/arm/neoverse_v1:aarch64/arm/neoverse_n1:aarch64/generic \ No newline at end of file diff --git a/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.cpuinfo b/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.cpuinfo similarity index 100% rename from tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.cpuinfo rename to tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.cpuinfo diff --git a/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.output b/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.output new file mode 100644 index 0000000000..0c80cb1efb --- /dev/null +++ b/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.output @@ -0,0 +1 @@ +aarch64/arm/neoverse_v1 From 71cf7c95021ab16b65f098b41af853b1534aa62c Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Sun, 3 Sep 2023 17:01:13 +0200 Subject: [PATCH 07/59] Fix ARM spec --- init/arch_specs/eessi_arch_arm.spec | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/init/arch_specs/eessi_arch_arm.spec b/init/arch_specs/eessi_arch_arm.spec index 97d9cd2edc..b5c9275043 100755 --- a/init/arch_specs/eessi_arch_arm.spec +++ b/init/arch_specs/eessi_arch_arm.spec @@ -1,6 +1,6 @@ # ARM CPU architecture specifications # Software path in EESSI | Vendor ID | List of defining CPU features -"aarch64/arm/neoverse_n1" "ARM" "asimd" # Ampere Altra -"aarch64/arm/neoverse_n1" "" "asimd" # AWS Graviton2 -"aarch64/arm/neoverse_v1" "ARM" "asimd svei8mm" -"aarch64/arm/neoverse_v1" "" "asimd svei8mm" # AWS Graviton3 +"aarch64/neoverse_n1" "ARM" "asimd" # Ampere Altra +"aarch64/neoverse_n1" "" "asimd" # AWS Graviton2 +"aarch64/neoverse_v1" "ARM" "asimd svei8mm" +"aarch64/neoverse_v1" "" "asimd svei8mm" # AWS Graviton3 From 98a95feaaee00046ec233ca92f37e7862d6b3035 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Sun, 3 Sep 2023 17:05:15 +0200 Subject: [PATCH 08/59] Also fix archdetect tests --- .../aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.all.output | 2 +- .../aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.output | 2 +- .../aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.all.output | 2 +- .../aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.output | 2 +- .../aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.all.output | 2 +- .../aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.output | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.all.output b/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.all.output index e6aee274ae..340aaa5d02 100644 --- a/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.all.output +++ b/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.all.output @@ -1 +1 @@ -aarch64/arm/neoverse_n1:aarch64/generic \ No newline at end of file +aarch64/neoverse_n1:aarch64/generic diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.output b/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.output index c841d54b50..a9bd49c75c 100644 --- a/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.output +++ b/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.output @@ -1 +1 @@ -aarch64/arm/neoverse_n1 +aarch64/neoverse_n1 diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.all.output b/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.all.output index e6aee274ae..340aaa5d02 100644 --- a/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.all.output +++ b/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.all.output @@ -1 +1 @@ -aarch64/arm/neoverse_n1:aarch64/generic \ No newline at end of file +aarch64/neoverse_n1:aarch64/generic diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.output b/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.output index c841d54b50..a9bd49c75c 100644 --- a/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.output +++ b/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.output @@ -1 +1 @@ -aarch64/arm/neoverse_n1 +aarch64/neoverse_n1 diff --git a/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.all.output b/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.all.output index bda6c2e498..920d5f9996 100644 --- a/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.all.output +++ b/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.all.output @@ -1 +1 @@ -aarch64/arm/neoverse_v1:aarch64/arm/neoverse_n1:aarch64/generic \ No newline at end of file +aarch64/neoverse_v1:aarch64/neoverse_n1:aarch64/generic diff --git a/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.output b/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.output index 0c80cb1efb..a8e072a9c6 100644 --- a/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.output +++ b/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.output @@ -1 +1 @@ -aarch64/arm/neoverse_v1 +aarch64/neoverse_v1 From ef1a53aff3b22b120a0405aaaa0521bb6426f219 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Sun, 3 Sep 2023 17:20:00 +0200 Subject: [PATCH 09/59] Make tests flexible in terms of EESSI version --- .github/workflows/tests_archdetect.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests_archdetect.yml b/.github/workflows/tests_archdetect.yml index 76cc7315c7..b6fc6434b2 100644 --- a/.github/workflows/tests_archdetect.yml +++ b/.github/workflows/tests_archdetect.yml @@ -21,9 +21,7 @@ jobs: steps: - name: checkout uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 - - uses: eessi/github-action-eessi@main - with: - eessi_stack_version: '2023.06' + - uses: eessi/github-action-eessi@v2 - name: test eessi_archdetect.sh run: | export EESSI_MACHINE_TYPE=${{matrix.proc_cpuinfo}} @@ -45,5 +43,6 @@ jobs: fi # Check all those architectures actually exist for dir in $(echo "$CPU_ARCHES" | tr ':' '\n'); do - [ -d "$EESSI_PREFIX/software/linux/$dir" ] + # Search all EESSI versions as we may drop support at some point + ls -d "$EESSI_PREFIX/../*/software/linux/$dir" done From 5665fad15dea939751ad6eb17286a4670353f973 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Sun, 3 Sep 2023 17:28:38 +0200 Subject: [PATCH 10/59] Fix hash of EESSI action --- .github/workflows/tests_archdetect.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests_archdetect.yml b/.github/workflows/tests_archdetect.yml index b6fc6434b2..f5781ef2a7 100644 --- a/.github/workflows/tests_archdetect.yml +++ b/.github/workflows/tests_archdetect.yml @@ -21,7 +21,9 @@ jobs: steps: - name: checkout uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 - - uses: eessi/github-action-eessi@v2 + - uses: eessi/github-action-eessi@58b50fd2eead2162c2b9ac258d4fb60cc9f30503 # v2.0.13 + with: + eessi_stack_version: '2023.06' - name: test eessi_archdetect.sh run: | export EESSI_MACHINE_TYPE=${{matrix.proc_cpuinfo}} From 07bfbb1d9b14bdb971d82b7a6d61c7e9818d8946 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Sun, 3 Sep 2023 17:32:41 +0200 Subject: [PATCH 11/59] Careful with quotes --- .github/workflows/tests_archdetect.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests_archdetect.yml b/.github/workflows/tests_archdetect.yml index f5781ef2a7..a794e5ebc4 100644 --- a/.github/workflows/tests_archdetect.yml +++ b/.github/workflows/tests_archdetect.yml @@ -21,7 +21,8 @@ jobs: steps: - name: checkout uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 - - uses: eessi/github-action-eessi@58b50fd2eead2162c2b9ac258d4fb60cc9f30503 # v2.0.13 + - name: Enable EESSI + uses: eessi/github-action-eessi@58b50fd2eead2162c2b9ac258d4fb60cc9f30503 # v2.0.13 with: eessi_stack_version: '2023.06' - name: test eessi_archdetect.sh @@ -46,5 +47,5 @@ jobs: # Check all those architectures actually exist for dir in $(echo "$CPU_ARCHES" | tr ':' '\n'); do # Search all EESSI versions as we may drop support at some point - ls -d "$EESSI_PREFIX/../*/software/linux/$dir" + ls -d "$EESSI_PREFIX"/../*/software/linux/"$dir" done From b10415b16834b15f17a49ade9391d84f1fc2d469 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Sun, 3 Sep 2023 17:43:46 +0200 Subject: [PATCH 12/59] Also drop arm subdir from tests --- .github/workflows/tests_archdetect.yml | 6 +++--- .../{arm => }/neoverse_n1/AWS-awslinux-graviton2.all.output | 0 .../{arm => }/neoverse_n1/AWS-awslinux-graviton2.cpuinfo | 0 .../{arm => }/neoverse_n1/AWS-awslinux-graviton2.output | 0 .../{arm => }/neoverse_n1/Azure-Ubuntu20-Altra.all.output | 0 .../{arm => }/neoverse_n1/Azure-Ubuntu20-Altra.cpuinfo | 0 .../{arm => }/neoverse_n1/Azure-Ubuntu20-Altra.output | 0 .../{arm => }/neoverse_v1/AWS-awslinux-graviton3.all.output | 0 .../{arm => }/neoverse_v1/AWS-awslinux-graviton3.cpuinfo | 0 .../{arm => }/neoverse_v1/AWS-awslinux-graviton3.output | 0 10 files changed, 3 insertions(+), 3 deletions(-) rename tests/archdetect/aarch64/{arm => }/neoverse_n1/AWS-awslinux-graviton2.all.output (100%) rename tests/archdetect/aarch64/{arm => }/neoverse_n1/AWS-awslinux-graviton2.cpuinfo (100%) rename tests/archdetect/aarch64/{arm => }/neoverse_n1/AWS-awslinux-graviton2.output (100%) rename tests/archdetect/aarch64/{arm => }/neoverse_n1/Azure-Ubuntu20-Altra.all.output (100%) rename tests/archdetect/aarch64/{arm => }/neoverse_n1/Azure-Ubuntu20-Altra.cpuinfo (100%) rename tests/archdetect/aarch64/{arm => }/neoverse_n1/Azure-Ubuntu20-Altra.output (100%) rename tests/archdetect/aarch64/{arm => }/neoverse_v1/AWS-awslinux-graviton3.all.output (100%) rename tests/archdetect/aarch64/{arm => }/neoverse_v1/AWS-awslinux-graviton3.cpuinfo (100%) rename tests/archdetect/aarch64/{arm => }/neoverse_v1/AWS-awslinux-graviton3.output (100%) diff --git a/.github/workflows/tests_archdetect.yml b/.github/workflows/tests_archdetect.yml index a794e5ebc4..61bf0e73fd 100644 --- a/.github/workflows/tests_archdetect.yml +++ b/.github/workflows/tests_archdetect.yml @@ -14,9 +14,9 @@ jobs: - x86_64/amd/zen2/Azure-CentOS7-7V12 - x86_64/amd/zen3/Azure-CentOS7-7V73X - ppc64le/power9le/unknown-power9le - - aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra - - aarch64/arm/neoverse_n1/AWS-awslinux-graviton2 - - aarch64/arm/neoverse_v1/AWS-awslinux-graviton3 + - aarch64/neoverse_n1/Azure-Ubuntu20-Altra + - aarch64/neoverse_n1/AWS-awslinux-graviton2 + - aarch64/neoverse_v1/AWS-awslinux-graviton3 fail-fast: false steps: - name: checkout diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.all.output b/tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.all.output similarity index 100% rename from tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.all.output rename to tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.all.output diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.cpuinfo b/tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.cpuinfo similarity index 100% rename from tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.cpuinfo rename to tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.cpuinfo diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.output b/tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.output similarity index 100% rename from tests/archdetect/aarch64/arm/neoverse_n1/AWS-awslinux-graviton2.output rename to tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.output diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.all.output b/tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.all.output similarity index 100% rename from tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.all.output rename to tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.all.output diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.cpuinfo b/tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.cpuinfo similarity index 100% rename from tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.cpuinfo rename to tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.cpuinfo diff --git a/tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.output b/tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.output similarity index 100% rename from tests/archdetect/aarch64/arm/neoverse_n1/Azure-Ubuntu20-Altra.output rename to tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.output diff --git a/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.all.output b/tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.all.output similarity index 100% rename from tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.all.output rename to tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.all.output diff --git a/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.cpuinfo b/tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.cpuinfo similarity index 100% rename from tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.cpuinfo rename to tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.cpuinfo diff --git a/tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.output b/tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.output similarity index 100% rename from tests/archdetect/aarch64/arm/neoverse_v1/AWS-awslinux-graviton3.output rename to tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.output From 409ba53ba360268449cffb3de59a91700eb1f78f Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Sun, 3 Sep 2023 17:45:24 +0200 Subject: [PATCH 13/59] Remove EESSI version selection --- .github/workflows/tests_archdetect.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/tests_archdetect.yml b/.github/workflows/tests_archdetect.yml index 61bf0e73fd..37338693c5 100644 --- a/.github/workflows/tests_archdetect.yml +++ b/.github/workflows/tests_archdetect.yml @@ -23,8 +23,6 @@ jobs: uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 - name: Enable EESSI uses: eessi/github-action-eessi@58b50fd2eead2162c2b9ac258d4fb60cc9f30503 # v2.0.13 - with: - eessi_stack_version: '2023.06' - name: test eessi_archdetect.sh run: | export EESSI_MACHINE_TYPE=${{matrix.proc_cpuinfo}} From 18c36d51f4857466c123ac8f6a63ec98c843a078 Mon Sep 17 00:00:00 2001 From: lara Date: Mon, 25 Sep 2023 11:58:24 +0200 Subject: [PATCH 14/59] {2023.06}[foss/2021b] matplotlib v3.4.3 --- eessi-2023.06-eb-4.8.1-2021b.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/eessi-2023.06-eb-4.8.1-2021b.yml b/eessi-2023.06-eb-4.8.1-2021b.yml index 62f529563a..a1b9927e97 100644 --- a/eessi-2023.06-eb-4.8.1-2021b.yml +++ b/eessi-2023.06-eb-4.8.1-2021b.yml @@ -5,3 +5,4 @@ easyconfigs: options: from-pr: 18834 - R-4.2.0-foss-2021b.eb + - matplotlib-3.4.3-foss-2021b.eb From 73f7e42a10481b3a6279ccde227afa030726ae13 Mon Sep 17 00:00:00 2001 From: lara Date: Tue, 3 Oct 2023 11:00:43 +0200 Subject: [PATCH 15/59] add Pillow to easystack --- eessi-2023.06-eb-4.8.1-2021b.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/eessi-2023.06-eb-4.8.1-2021b.yml b/eessi-2023.06-eb-4.8.1-2021b.yml index a1b9927e97..ec019ea879 100644 --- a/eessi-2023.06-eb-4.8.1-2021b.yml +++ b/eessi-2023.06-eb-4.8.1-2021b.yml @@ -5,4 +5,8 @@ easyconfigs: options: from-pr: 18834 - R-4.2.0-foss-2021b.eb + - Pillow-8.3.2-GCCcore-11.2.0.eb: + # avoid that hardcoded paths like /usr/include are used in build commands + options: + from-pr: 18881 - matplotlib-3.4.3-foss-2021b.eb From 92b764998fc46b0906b20de7346d0044248281b4 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Thu, 5 Oct 2023 10:59:49 +0200 Subject: [PATCH 16/59] remove bot configuration (main) --- bot/bot-eessi-aws-citc.cfg | 151 ------------------------------------- 1 file changed, 151 deletions(-) delete mode 100644 bot/bot-eessi-aws-citc.cfg diff --git a/bot/bot-eessi-aws-citc.cfg b/bot/bot-eessi-aws-citc.cfg deleted file mode 100644 index 5b3ad34612..0000000000 --- a/bot/bot-eessi-aws-citc.cfg +++ /dev/null @@ -1,151 +0,0 @@ -# Also see documentation at https://github.com/EESSI/eessi-bot-software-layer/blob/main/README.md#step5.5 - -[github] -# replace '123456' with the ID of your GitHub App -app_id = 281041 - -# a short (!) name for your app instance that can be used for example -# when adding/updating a comment to a PR -# (!) a short yet descriptive name is preferred because it appears in -# comments to the PR -# for example, the name could include the name of the cluster the bot -# runs on and the username which runs the bot -# NOTE avoid putting an actual username here as it will be visible on -# potentially publicly accessible GitHub pages. -app_name = eessi-bot-citc-aws - -# replace '12345678' with the ID of the installation of your GitHub App -# (can be derived by creating an event and then checking for the list -# of sent events and its payload either via the Smee channel's web page -# or via the Advanced section of your GitHub App on github.com) -installation_id = 33078935 - -# path to the private key that was generated when the GitHub App was registered -private_key = /mnt/shared/home/bot/eessi-bot-software-layer/eessi-bot-citc-aws-private-key.pem - - -[buildenv] -# name of the job script used for building an EESSI stack -build_job_script = /mnt/shared/home/bot/eessi-bot-software-layer/scripts/bot-build.slurm - -# The container_cachedir may be used to reuse downloaded container image files -# across jobs. Thus, jobs can more quickly launch containers. -container_cachedir = /mnt/shared/home/bot/eessi-bot-software-layer/containers-cache-dir - -# it may happen that we need to customize some CVMFS configuration -# the value of cvmfs_customizations is a dictionary which maps a file -# name to an entry that needs to be added to that file -cvmfs_customizations = {} - -# if compute nodes have no internet connection, we need to set http(s)_proxy -# or commands such as pip3 cannot download software from package repositories -# for example, the temporary EasyBuild is installed via pip3 first -# http_proxy = http://PROXY_DNS:3128/ -# https_proxy = http://PROXY_DNS:3128/ - -# directory under which the bot prepares directories per job -# structure created is as follows: YYYY.MM/pr_PR_NUMBER/event_EVENT_ID/run_RUN_NUMBER/OS+SUBDIR -jobs_base_dir = /mnt/shared/home/bot/eessi-bot-software-layer/jobs - -# configure environment -# list of comma-separated modules to be loaded by build_job_script -# useful/needed if some tool is not provided as system-wide package -# (read by bot and handed over to build_job_script via parameter -# --load-modules) -load_modules = - -# PATH to temporary directory on build node ... ends up being used for -# for example, EESSI_TMPDIR --> /tmp/$USER/EESSI -# escaping variables with '\' delays expansion to the start of the -# build_job_script; this can be used for referencing environment -# variables that are only set inside a Slurm job -local_tmp = /tmp/$USER/EESSI - -# parameters to be added to all job submissions -# NOTE do not quote parameter string. Quotes are retained when reading in config and -# then the whole 'string' is recognised as a single parameter. -# NOTE 2 '--get-user-env' may be needed on systems where the job's environment needs -# to be initialised as if it is for a login shell. -# note: hardcoded 24h time limit until https://github.com/EESSI/eessi-bot-software-layer/issues/146 is fixed -slurm_params = --hold --time=24:0:0 - -# full path to the job submission command -submit_command = /usr/bin/sbatch - -# which GH account has the permission to trigger the build (by setting -# the label 'bot:build' (apparently this cannot be restricted on GitHub) -# if value is left/empty everyone can trigger the build -# value can be a space delimited list of GH accounts -build_permission = boegel trz42 bedroge - -[architecturetargets] -# defines both for which architectures the bot will build -# and what submission parameters shall be used -# medium instances (8 cores, 16GB RAM) -#arch_target_map = { "linux/x86_64/generic" : "--constraint shape=c4.4xlarge", "linux/x86_64/intel/haswell" : "--constraint shape=c4.4xlarge", "linux/x86_64/intel/skylake_avx512" : "--constraint shape=c5.4xlarge", "linux/x86_64/amd/zen2": "--constraint shape=c5a.4xlarge", "linux/x86_64/amd/zen3" : "--constraint shape=c6a.4xlarge", "linux/aarch64/generic" : "--constraint shape=c6g.4xlarge", "linux/aarch64/graviton2" : "--constraint shape=c6g.4xlarge", "linux/aarch64/graviton3" : "--constraint shape=c7g.4xlarge"} -# larger instances (16 cores, 32GB RAM) -arch_target_map = { "linux/x86_64/generic" : "--constraint shape=c4.4xlarge", "linux/x86_64/intel/haswell" : "--constraint shape=c4.4xlarge", "linux/x86_64/intel/skylake_avx512" : "--constraint shape=c5.4xlarge", "linux/x86_64/amd/zen2": "--constraint shape=c5a.4xlarge", "linux/x86_64/amd/zen3" : "--constraint shape=c6a.4xlarge", "linux/aarch64/generic" : "--constraint shape=c6g.4xlarge", "linux/aarch64/graviton2" : "--constraint shape=c6g.4xlarge", "linux/aarch64/graviton3" : "--constraint shape=c7g.4xlarge"} - -[repo_targets] -# defines for which repository a arch_target should be build for -# -# only building for repository EESSI-pilot -repo_target_map = { "linux/x86_64/generic" : ["EESSI-pilot"], "linux/x86_64/intel/haswell" : ["EESSI-pilot"], "linux/x86_64/intel/skylake_avx512" : ["EESSI-pilot"], "linux/x86_64/amd/zen2": ["EESSI-pilot"], "linux/x86_64/amd/zen3" : ["EESSI-pilot"], "linux/aarch64/generic" : ["EESSI-pilot"], "linux/aarch64/graviton2" : ["EESSI-pilot"], "linux/aarch64/graviton3" : ["EESSI-pilot"]} - -# points to definition of repositories (default EESSI-pilot defined by build container) -repos_cfg_dir = /mnt/shared/home/bot/eessi-bot-software-layer/cfg-bundles - -# configuration for event handler which receives events from a GitHub repository. -[event_handler] -# path to the log file to log messages for event handler -log_path = /mnt/shared/home/bot/eessi-bot-software-layer/eessi_bot_event_handler.log - - -[job_manager] -# path to the log file to log messages for job manager -log_path = /mnt/shared/home/bot/eessi-bot-software-layer/eessi_bot_job_manager.log - -# directory where job manager stores information about jobs to be tracked -# e.g. as symbolic link JOBID -> directory to job -job_ids_dir = /mnt/shared/home/bot/eessi-bot-software-layer/jobs - -# full path to the job status checking command -poll_command = /usr/bin/squeue - -# polling interval in seconds -poll_interval = 60 - -# full path to the command for manipulating existing jobs -scontrol_command = /usr/bin/scontrol - -[deploycfg] -# script for uploading built software packages -tarball_upload_script = /mnt/shared/home/bot/eessi-bot-software-layer/scripts/eessi-upload-to-staging - -# URL to S3/minio bucket -# if attribute is set, bucket_base will be constructed as follows -# bucket_base=${endpoint_url}/${bucket_name} -# otherwise, bucket_base will be constructed as follows -# bucket_base=https://${bucket_name}.s3.amazonaws.com -# - The former variant is used for non AWS S3 services, eg, minio, or when -# the bucket name is not provided in the hostname (see latter case). -# - The latter variant is used for AWS S3 services. -#endpoint_url = URL_TO_S3_SERVER - -# bucket name -bucket_name = eessi-staging - -# upload policy: defines what policy is used for uploading built artefacts -# to an S3 bucket -# 'all' ..: upload all artefacts (mulitple uploads of the same artefact possible) -# 'latest': for each build target (eessi-VERSION-{software,init,compat}-OS-ARCH) -# only upload the latest built artefact -# 'once' : only once upload any built artefact for the build target -# 'none' : do not upload any built artefacts -upload_policy = once - -# which GH account has the permission to trigger the deployment (by setting -# the label 'bot:deploy' (apparently this cannot be restricted on GitHub) -# if value is left/empty everyone can trigger the deployment -# value can be a space delimited list of GH accounts -deploy_permission = boegel trz42 bedroge From 093c01d04c9279f2c0e65d130f5fce2e668c33ee Mon Sep 17 00:00:00 2001 From: lara Date: Tue, 10 Oct 2023 08:27:03 +0200 Subject: [PATCH 17/59] {2023.06}[foss/2021b] LAMMPS 23Jun2022 --- eessi-2023.06-eb-4.8.1-2021b.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/eessi-2023.06-eb-4.8.1-2021b.yml b/eessi-2023.06-eb-4.8.1-2021b.yml index 62f529563a..90a62ba2d6 100644 --- a/eessi-2023.06-eb-4.8.1-2021b.yml +++ b/eessi-2023.06-eb-4.8.1-2021b.yml @@ -5,3 +5,4 @@ easyconfigs: options: from-pr: 18834 - R-4.2.0-foss-2021b.eb + - LAMMPS-23Jun2022-foss-2021b-kokkos.eb From 89cc228e5fc0a629a422134e67693b1a60f2fe25 Mon Sep 17 00:00:00 2001 From: TopRichard Date: Wed, 11 Oct 2023 06:42:48 +0000 Subject: [PATCH 18/59] {2023.06}[foss/2022a] matplotlib v3.5.2 --- eb_hooks.py | 14 +++++++++++++- eessi-2023.06-eb-4.8.1-2022a.yml | 8 +++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index dbb8415541..2bdead4b73 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -54,13 +54,25 @@ def get_rpath_override_dirs(software_name): return rpath_injection_dirs +def set_Pillow_envvars(ec): + """Get an EESSI_CPATH environment variable from the environment""" + EESSI_CPATH = os.getenv('EESSI_EPREFIX') + '/usr/include' + EESSI_LIB_PATH = os.getenv('EESSI_EPREFIX') + '/usr/lib64' + if ec.name == 'Pillow': + os.environ['CPATH'] = os.pathsep + EESSI_CPATH + os.environ['LIBRARY_PATH'] = os.pathsep + EESSI_LIB_PATH + print_msg("NOTE: For Pillow which has Szip as a dependancy, CPATH has been set to %s", os.getenv('CPATH')) + print_msg("NOTE: For Pillow which has Szip as a dependancy, LIBRARY_PATH has been set to %s", os.getenv('LIBRARY_PATH')) + ec.log.info("NOTE: For Pillow which has Szip as a dependancy, CPATH has been set to %s", os.getenv('CPATH')) + ec.log.info("NOTE: For Pillow which has Szip as a dependancy, LIBRARY_PATH has been set to %s", os.getenv('LIBRARY_PATH')) + def parse_hook(ec, *args, **kwargs): """Main parse hook: trigger custom functions based on software name.""" # determine path to Prefix installation in compat layer via $EPREFIX eprefix = get_eessi_envvar('EPREFIX') - + set_Pillow_envvars(ec) if ec.name in PARSE_HOOKS: PARSE_HOOKS[ec.name](ec, eprefix) diff --git a/eessi-2023.06-eb-4.8.1-2022a.yml b/eessi-2023.06-eb-4.8.1-2022a.yml index 97704ebe30..87ca700d96 100644 --- a/eessi-2023.06-eb-4.8.1-2022a.yml +++ b/eessi-2023.06-eb-4.8.1-2022a.yml @@ -6,4 +6,10 @@ easyconfigs: from-pr: 18870 - foss-2022a - SciPy-bundle-2022.05-foss-2022a - - BAMM-2.5.0-foss-2022a.eb \ No newline at end of file + - BAMM-2.5.0-foss-2022a.eb + - Pillow-9.1.1-GCCcore-11.3.0.eb: + # avoid that hardcoded paths like /usr/include are used in build commands + # Uses a hook to modify the hardcoded LIBRARY and Header paths. + options: + from-pr: 18881 + - matplotlib-3.5.2-foss-2022a.eb From 6b0bcb9c00c4768614f5b9087e7f3ce2777e44d3 Mon Sep 17 00:00:00 2001 From: lara Date: Fri, 13 Oct 2023 13:59:44 +0200 Subject: [PATCH 19/59] add TBB solution for Aarch64 --- eessi-2023.06-eb-4.8.1-2021b.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/eessi-2023.06-eb-4.8.1-2021b.yml b/eessi-2023.06-eb-4.8.1-2021b.yml index 90a62ba2d6..1414df5e44 100644 --- a/eessi-2023.06-eb-4.8.1-2021b.yml +++ b/eessi-2023.06-eb-4.8.1-2021b.yml @@ -5,4 +5,7 @@ easyconfigs: options: from-pr: 18834 - R-4.2.0-foss-2021b.eb - - LAMMPS-23Jun2022-foss-2021b-kokkos.eb + - LAMMPS-23Jun2022-foss-2021b-kokkos.eb: + # TBB is an optional dependency when building on Intel arch + options: + from-pr: 19000 From 3db7a53ace90e3b5191486cc52e3b9c808eb7b02 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Wed, 18 Oct 2023 14:25:39 +0200 Subject: [PATCH 20/59] Build CUDA under --- .../nvidia/install_cuda_host_injections.sh | 92 +++++++++++++++++++ scripts/utils.sh | 80 +++++++++++++++- 2 files changed, 167 insertions(+), 5 deletions(-) create mode 100644 gpu_support/nvidia/install_cuda_host_injections.sh diff --git a/gpu_support/nvidia/install_cuda_host_injections.sh b/gpu_support/nvidia/install_cuda_host_injections.sh new file mode 100644 index 0000000000..b33efbfedd --- /dev/null +++ b/gpu_support/nvidia/install_cuda_host_injections.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +# Initialise our bash functions +TOPDIR=$(dirname $(realpath $BASH_SOURCE)) +source "$TOPDIR"/../../scripts/utils.sh + +# Make sure EESSI is initialised +check_eessi_initialised() + +if [[ $# -eq 0 ]] ; then + fatal_error "You must provide the CUDA version as an argument, e.g.:\n $0 11.3.1" +fi +install_cuda_version=$1 +if [[ -z "${EESSI_SOFTWARE_PATH}" ]]; then + fatal_error "This script cannot be used without having first defined EESSI_SOFTWARE_PATH" +else + # As an installation location just use $EESSI_SOFTWARE_PATH but replacing `versions` with `host_injections` + # (CUDA is a binary installation so no need to worry too much about the EasyBuild setup) + cuda_install_parent=${EESSI_SOFTWARE_PATH/versions/host_injections} +fi + +# Only install CUDA if specified version is not found. +# This is only relevant for users, the shipped CUDA installation will +# always be in versions instead of host_injections and have symlinks pointing +# to host_injections for everything we're not allowed to ship +# (existence of easybuild subdir implies a successful install) +if [ -d "${cuda_install_parent}"/software/CUDA/"${install_cuda_version}"/easybuild ]; then + echo_green "CUDA software found! No need to install CUDA again, proceed with testing." +else + # We need to be able write to the installation space so let's make sure we can + if ! create_directory_structure "${cuda_install_parent}"/software/CUDA ; then + fatal_error "No write permissions to directory ${cuda_install_parent}/software/CUDA" + fi + + # we need a directory we can use for temporary storage + if [[ -z "${CUDA_TEMP_DIR}" ]]; then + tmpdir=$(mktemp -d) + else + tmpdir="${CUDA_TEMP_DIR}"/temp + if ! mkdir "$tmpdir" ; then + fatal_error "Could not create directory ${tmpdir}" + fi + fi + + required_space_in_tmpdir=50000 + # Let's see if we have sources and build locations defined if not, we use the temporary space + if [[ -z "${EASYBUILD_BUILDPATH}" ]]; then + export EASYBUILD_BUILDPATH=${tmpdir}/build + required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000)) + fi + if [[ -z "${EASYBUILD_SOURCEPATH}" ]]; then + export EASYBUILD_SOURCEPATH=${tmpdir}/sources + required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000)) + fi + + # The install is pretty fat, you need lots of space for download/unpack/install (~3*5GB), + # need to do a space check before we proceed + avail_space=$(df --output=avail "${cuda_install_parent}"/ | tail -n 1 | awk '{print $1}') + if (( avail_space < 5000000 )); then + fatal_error "Need at least 5GB disk space to install CUDA under ${cuda_install_parent}, exiting now..." + fi + avail_space=$(df --output=avail "${tmpdir}"/ | tail -n 1 | awk '{print $1}') + if (( avail_space < required_space_in_tmpdir )); then + error="Need at least ${required_space_in_tmpdir} disk space under ${tmpdir}.\n" + error="${error}Set the environment variable CUDA_TEMP_DIR to a location with adequate space to pass this check." + error="${error}You can alternatively set EASYBUILD_BUILDPATH and/or EASYBUILD_SOURCEPATH " + error="${error}to reduce this requirement. Exiting now..." + fatal_error "${error}" + fi + + if [[ -z "${EBROOTEASYBUILD}" ]]; then + echo_yellow "Loading EasyBuild module to do actual install" + module load EasyBuild + fi + + # we need the --rebuild option and a (random) dir for the module since we are + # fixing the broken links of the EESSI-shipped installation + extra_args="--rebuild --installpath-modules=${tmpdir}" + + # We don't want hooks used in this install, we need a vanilla CUDA installation + touch "$tmpdir"/none.py + # shellcheck disable=SC2086 # Intended splitting of extra_args + eb --prefix="$tmpdir" ${extra_args} --hooks="$tmpdir"/none.py --installpath="${cuda_install_parent}"/ CUDA-"${install_cuda_version}".eb + ret=$? + if [ $ret -ne 0 ]; then + fatal_error "CUDA installation failed, please check EasyBuild logs..." + else + echo_green "CUDA installation at ${cuda_install_parent}/software/CUDA/${install_cuda_version} succeeded!" + fi + # clean up tmpdir + rm -rf "${tmpdir}" +fi diff --git a/scripts/utils.sh b/scripts/utils.sh index d0da95e87f..07760f0dd0 100644 --- a/scripts/utils.sh +++ b/scripts/utils.sh @@ -14,7 +14,7 @@ ANY_ERROR_EXITCODE=1 function fatal_error() { echo_red "ERROR: $1" >&2 if [[ $# -gt 1 ]]; then - exit $2 + exit "$2" else exit "${ANY_ERROR_EXITCODE}" fi @@ -32,11 +32,81 @@ function check_exit_code { fi } +function check_eessi_initialised() { + if [[ -z "${EESSI_SOFTWARE_PATH}" ]]; then + fatal_error "EESSI has not been initialised!" + else + return 0 + fi +} + +function float_greater_than() { + # Make sure we have two arguments + if [ $# -ne 2 ]; then + echo_red "greater_than_float requires two (float) numbers" >&2 + return $ANY_ERROR_EXITCODE + fi + # Make sure the arguments are numbers + if [[ ! $1 =~ ^[+-]?[0-9]+\.?[0-9]*$ ]]; then + echo_yellow "Input to float_greater_than is not a float, ignoring" + return $ANY_ERROR_EXITCODE + fi + if [[ ! $2 =~ ^[+-]?[0-9]+\.?[0-9]*$ ]]; then + echo_yellow "Input to float_greater_than is not a float, ignoring" + return $ANY_ERROR_EXITCODE + fi + # Now do the actual evaluation + return_code=$ANY_ERROR_EXITCODE + result=$(echo "$1" "$2" | awk '{if ($1 > $2) print "true"}') + if [ "$result" = true ] ; then + return_code=0 + fi + return $return_code +} + +function check_in_prefix_shell() { + # Make sure EPREFIX is defined + if [[ -z "${EPREFIX}" ]]; then + fatal_error "This script cannot be used without having first defined EPREFIX" + fi + if [[ ! ${SHELL} = ${EPREFIX}/bin/bash ]]; then + fatal_error "Not running in Gentoo Prefix environment, run '${EPREFIX}/startprefix' first!" + fi +} + +function create_directory_structure() { + # Ensure we are given a single path argument + if [ $# -ne 1 ]; then + echo_red "Function requires a single (relative or absolute) path argument" >&2 + return $ANY_ERROR_EXITCODE + fi + dir_structure="$1" + + # Attempt to create the directory structure + error_message=$(mkdir -p "$dir_structure" 2>&1) + return_code=$? + # If it fails be explicit about the error + if [ ${return_code} -ne 0 ]; then + real_dir=$(realpath -m "$dir_structure") + echo_red "Creating ${dir_structure} (real path ${real_dir}) failed with:\n ${error_message}" >&2 + else + # If we're creating it, our use case is that we want to be able to write there + # (this is a check in case the directory already existed) + if [ ! -w "${dir_structure}" ]; then + real_dir=$(realpath -m "$dir_structure") + echo_red "You do not have (required) write permissions to ${dir_structure} (real path ${real_dir})!" + return_code=$ANY_ERROR_EXITCODE + fi + fi + + return $return_code +} + function get_path_for_tool { tool_name=$1 tool_envvar_name=$2 - which_out=$(which ${tool_name} 2>&1) + which_out=$(which "${tool_name}" 2>&1) exit_code=$? if [[ ${exit_code} -eq 0 ]]; then echo "INFO: found tool ${tool_name} in PATH (${which_out})" >&2 @@ -68,7 +138,7 @@ function get_host_from_url { url=$1 re="(http|https)://([^/:]+)" if [[ $url =~ $re ]]; then - echo ${BASH_REMATCH[2]} + echo "${BASH_REMATCH[2]}" return 0 else echo "" @@ -80,7 +150,7 @@ function get_port_from_url { url=$1 re="(http|https)://[^:]+:([0-9]+)" if [[ $url =~ $re ]]; then - echo ${BASH_REMATCH[2]} + echo "${BASH_REMATCH[2]}" return 0 else echo "" @@ -90,7 +160,7 @@ function get_port_from_url { function get_ipv4_address { hname=$1 - hipv4=$(grep ${hname} /etc/hosts | grep -v '^[[:space:]]*#' | cut -d ' ' -f 1) + hipv4=$(grep "${hname}" /etc/hosts | grep -v '^[[:space:]]*#' | cut -d ' ' -f 1) # TODO try other methods if the one above does not work --> tool that verifies # what method can be used? echo "${hipv4}" From 82893a136bad5c0a3125c91aaa2bf354f70c7450 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Wed, 18 Oct 2023 14:32:22 +0200 Subject: [PATCH 21/59] Make script executable --- gpu_support/nvidia/install_cuda_host_injections.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 gpu_support/nvidia/install_cuda_host_injections.sh diff --git a/gpu_support/nvidia/install_cuda_host_injections.sh b/gpu_support/nvidia/install_cuda_host_injections.sh old mode 100644 new mode 100755 From 3797953057ff83e1e82c1be0232ee9aa61e18b1f Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Wed, 18 Oct 2023 14:42:23 +0200 Subject: [PATCH 22/59] Fix calling function --- gpu_support/nvidia/install_cuda_host_injections.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu_support/nvidia/install_cuda_host_injections.sh b/gpu_support/nvidia/install_cuda_host_injections.sh index b33efbfedd..d6278ff3d7 100755 --- a/gpu_support/nvidia/install_cuda_host_injections.sh +++ b/gpu_support/nvidia/install_cuda_host_injections.sh @@ -5,7 +5,7 @@ TOPDIR=$(dirname $(realpath $BASH_SOURCE)) source "$TOPDIR"/../../scripts/utils.sh # Make sure EESSI is initialised -check_eessi_initialised() +check_eessi_initialised if [[ $# -eq 0 ]] ; then fatal_error "You must provide the CUDA version as an argument, e.g.:\n $0 11.3.1" From 445048a248bf61ae82bb0456acf242fcb48428ed Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Thu, 19 Oct 2023 15:07:33 +0200 Subject: [PATCH 23/59] Make additional bind mounts for CUDA install to succeed in container --- build_container.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build_container.sh b/build_container.sh index 23a9e665c9..42f7afade9 100755 --- a/build_container.sh +++ b/build_container.sh @@ -24,12 +24,12 @@ echo "Using $EESSI_TMPDIR as parent for temporary directories..." # create temporary directories mkdir -p $EESSI_TMPDIR/{home,overlay-upper,overlay-work} -mkdir -p $EESSI_TMPDIR/{var-lib-cvmfs,var-run-cvmfs} +mkdir -p $EESSI_TMPDIR/{var-lib-cvmfs,var-run-cvmfs,var-log,opt-eessi,usr-local-cuda} # configure Singularity export SINGULARITY_CACHEDIR=$EESSI_TMPDIR/singularity_cache # take into account that $SINGULARITY_BIND may be defined already, to bind additional paths into the build container -BIND_PATHS="$EESSI_TMPDIR/var-run-cvmfs:/var/run/cvmfs,$EESSI_TMPDIR/var-lib-cvmfs:/var/lib/cvmfs,$EESSI_TMPDIR" +BIND_PATHS="$EESSI_TMPDIR/var-log:/var/log,$EESSI_TMPDIR/usr-local-cuda:/usr/local/cuda,$EESSI_TMPDIR/var-run-cvmfs:/var/run/cvmfs,$EESSI_TMPDIR/var-lib-cvmfs:/var/lib/cvmfs,$EESSI_TMPDIR/opt-eessi:/opt/eessi,$EESSI_TMPDIR" if [ -z $SINGULARITY_BIND ]; then export SINGULARITY_BIND="$BIND_PATHS" else From b26dc50b9f3a282a185466e1004071074b1ac345 Mon Sep 17 00:00:00 2001 From: lara Date: Mon, 23 Oct 2023 17:40:05 +0200 Subject: [PATCH 24/59] add PLUMED hook --- eb_hooks.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/eb_hooks.py b/eb_hooks.py index 31f2b9588d..3147fc9bf5 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -268,6 +268,22 @@ def pre_configure_hook_wrf_aarch64(self, *args, **kwargs): raise EasyBuildError("WRF-specific hook triggered for non-WRF easyconfig?!") +def pre_configure_hook_PLUMED_aarch64(self, *args, **kwargs): + """ + pre-configure hook for PLUMED: + - remove unsupported --enable-asmjit option on aarch64 + """ + + if self.name == 'PLUMED': + if get_cpu_architecture() == AARCH64: + configopts = self.cfg['configopts'] + regex = re.compile(r'--enable-asmjit') + if re.search(regex, configopts): + self.cfg['configopts'] = regex.sub('', configopts) + else: + raise EasyBuildError("PLUMED-specific hook triggered for non-PLUMED easyconfig?!") + + def pre_test_hook(self,*args, **kwargs): """Main pre-test hook: trigger custom functions based on software name.""" if self.name in PRE_TEST_HOOKS: @@ -346,6 +362,7 @@ def pre_single_extension_isoband(ext, *args, **kwargs): 'MetaBAT': pre_configure_hook_metabat_filtered_zlib_dep, 'OpenBLAS': pre_configure_hook_openblas_optarch_generic, 'WRF': pre_configure_hook_wrf_aarch64, + 'PLUMED': pre_configure_hook_PLUMED_aarch64, } PRE_TEST_HOOKS = { From 9d13aa874e1961029f0d73634351db07f5a19eb4 Mon Sep 17 00:00:00 2001 From: lara Date: Mon, 30 Oct 2023 16:12:31 +0100 Subject: [PATCH 25/59] replace plumed hook with updated easyconfig and add LAMMPS hook [context](https://gitlab.com/eessi/support/-/issues/10#note_1625972083) --- eb_hooks.py | 14 ++++++-------- eessi-2023.06-eb-4.8.1-2021b.yml | 4 ++++ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index 3147fc9bf5..28ee7907a4 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -268,18 +268,16 @@ def pre_configure_hook_wrf_aarch64(self, *args, **kwargs): raise EasyBuildError("WRF-specific hook triggered for non-WRF easyconfig?!") -def pre_configure_hook_PLUMED_aarch64(self, *args, **kwargs): +def pre_configure_hook_LAMMPS_aarch64(self, *args, **kwargs): """ pre-configure hook for PLUMED: - remove unsupported --enable-asmjit option on aarch64 """ - if self.name == 'PLUMED': - if get_cpu_architecture() == AARCH64: - configopts = self.cfg['configopts'] - regex = re.compile(r'--enable-asmjit') - if re.search(regex, configopts): - self.cfg['configopts'] = regex.sub('', configopts) + if self.name == 'LAMMPS': + if self.version == '23Jun2022': + if get_cpu_architecture() == AARCH64: + self.cfg['kokkos_arch'] = 'A64FX' else: raise EasyBuildError("PLUMED-specific hook triggered for non-PLUMED easyconfig?!") @@ -362,7 +360,7 @@ def pre_single_extension_isoband(ext, *args, **kwargs): 'MetaBAT': pre_configure_hook_metabat_filtered_zlib_dep, 'OpenBLAS': pre_configure_hook_openblas_optarch_generic, 'WRF': pre_configure_hook_wrf_aarch64, - 'PLUMED': pre_configure_hook_PLUMED_aarch64, + 'PLUMED': pre_configure_hook_LAMMPS_aarch64, } PRE_TEST_HOOKS = { diff --git a/eessi-2023.06-eb-4.8.1-2021b.yml b/eessi-2023.06-eb-4.8.1-2021b.yml index 1414df5e44..30035fa837 100644 --- a/eessi-2023.06-eb-4.8.1-2021b.yml +++ b/eessi-2023.06-eb-4.8.1-2021b.yml @@ -5,6 +5,10 @@ easyconfigs: options: from-pr: 18834 - R-4.2.0-foss-2021b.eb + - PLUMED-2.7.3-foss-2021b.eb: + # the --enable-asmjit is not supported on Aarch64 + options: + from-pr: 19110 - LAMMPS-23Jun2022-foss-2021b-kokkos.eb: # TBB is an optional dependency when building on Intel arch options: From 7537a7f17099c04ebd1520bc6ae093d5585e54b1 Mon Sep 17 00:00:00 2001 From: lara Date: Mon, 30 Oct 2023 16:17:13 +0100 Subject: [PATCH 26/59] small fix --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 28ee7907a4..546194111d 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -279,7 +279,7 @@ def pre_configure_hook_LAMMPS_aarch64(self, *args, **kwargs): if get_cpu_architecture() == AARCH64: self.cfg['kokkos_arch'] = 'A64FX' else: - raise EasyBuildError("PLUMED-specific hook triggered for non-PLUMED easyconfig?!") + raise EasyBuildError("LAMMPS-specific hook triggered for non-LAMMPS easyconfig?!") def pre_test_hook(self,*args, **kwargs): From d56e0dd90132f3585f4da2eed1803f0a9191f2c6 Mon Sep 17 00:00:00 2001 From: lara Date: Mon, 30 Oct 2023 16:28:36 +0100 Subject: [PATCH 27/59] fix PRE_CONFIGURE_HOOKS --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 546194111d..ea9c3bfd48 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -360,7 +360,7 @@ def pre_single_extension_isoband(ext, *args, **kwargs): 'MetaBAT': pre_configure_hook_metabat_filtered_zlib_dep, 'OpenBLAS': pre_configure_hook_openblas_optarch_generic, 'WRF': pre_configure_hook_wrf_aarch64, - 'PLUMED': pre_configure_hook_LAMMPS_aarch64, + 'LAMMPS': pre_configure_hook_LAMMPS_aarch64, } PRE_TEST_HOOKS = { From 5c9316dd86a1ceb2d2b2e5d04c8627d0da31abe1 Mon Sep 17 00:00:00 2001 From: lara Date: Mon, 30 Oct 2023 16:36:53 +0100 Subject: [PATCH 28/59] fix in description --- eb_hooks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index ea9c3bfd48..a2890da75e 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -270,8 +270,8 @@ def pre_configure_hook_wrf_aarch64(self, *args, **kwargs): def pre_configure_hook_LAMMPS_aarch64(self, *args, **kwargs): """ - pre-configure hook for PLUMED: - - remove unsupported --enable-asmjit option on aarch64 + pre-configure hook for LAMMPS: + - set kokkos_arch on Aarch64 """ if self.name == 'LAMMPS': From 8b457d4dfebeb78f9b1915cb66683ec174f2d638 Mon Sep 17 00:00:00 2001 From: lara Date: Tue, 31 Oct 2023 10:50:24 +0100 Subject: [PATCH 29/59] use framework fix and Pillow fix in easybuild v4.8.2 --- eessi-2023.06-eb-4.8.1-2021b.yml | 5 ----- eessi-2023.06-eb-4.8.2-2021b.yml | 2 ++ 2 files changed, 2 insertions(+), 5 deletions(-) create mode 100644 eessi-2023.06-eb-4.8.2-2021b.yml diff --git a/eessi-2023.06-eb-4.8.1-2021b.yml b/eessi-2023.06-eb-4.8.1-2021b.yml index ec019ea879..62f529563a 100644 --- a/eessi-2023.06-eb-4.8.1-2021b.yml +++ b/eessi-2023.06-eb-4.8.1-2021b.yml @@ -5,8 +5,3 @@ easyconfigs: options: from-pr: 18834 - R-4.2.0-foss-2021b.eb - - Pillow-8.3.2-GCCcore-11.2.0.eb: - # avoid that hardcoded paths like /usr/include are used in build commands - options: - from-pr: 18881 - - matplotlib-3.4.3-foss-2021b.eb diff --git a/eessi-2023.06-eb-4.8.2-2021b.yml b/eessi-2023.06-eb-4.8.2-2021b.yml new file mode 100644 index 0000000000..746f8df05f --- /dev/null +++ b/eessi-2023.06-eb-4.8.2-2021b.yml @@ -0,0 +1,2 @@ +easyconfigs: + - matplotlib-3.4.3-foss-2021b.eb From 617c8ada42cf8ca3421e9a8d7f41453d7697309e Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Tue, 31 Oct 2023 16:12:45 +0100 Subject: [PATCH 30/59] Switch GPU support to eessi_container.sh --- build_container.sh | 4 +-- eessi_container.sh | 64 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/build_container.sh b/build_container.sh index 42f7afade9..23a9e665c9 100755 --- a/build_container.sh +++ b/build_container.sh @@ -24,12 +24,12 @@ echo "Using $EESSI_TMPDIR as parent for temporary directories..." # create temporary directories mkdir -p $EESSI_TMPDIR/{home,overlay-upper,overlay-work} -mkdir -p $EESSI_TMPDIR/{var-lib-cvmfs,var-run-cvmfs,var-log,opt-eessi,usr-local-cuda} +mkdir -p $EESSI_TMPDIR/{var-lib-cvmfs,var-run-cvmfs} # configure Singularity export SINGULARITY_CACHEDIR=$EESSI_TMPDIR/singularity_cache # take into account that $SINGULARITY_BIND may be defined already, to bind additional paths into the build container -BIND_PATHS="$EESSI_TMPDIR/var-log:/var/log,$EESSI_TMPDIR/usr-local-cuda:/usr/local/cuda,$EESSI_TMPDIR/var-run-cvmfs:/var/run/cvmfs,$EESSI_TMPDIR/var-lib-cvmfs:/var/lib/cvmfs,$EESSI_TMPDIR/opt-eessi:/opt/eessi,$EESSI_TMPDIR" +BIND_PATHS="$EESSI_TMPDIR/var-run-cvmfs:/var/run/cvmfs,$EESSI_TMPDIR/var-lib-cvmfs:/var/lib/cvmfs,$EESSI_TMPDIR" if [ -z $SINGULARITY_BIND ]; then export SINGULARITY_BIND="$BIND_PATHS" else diff --git a/eessi_container.sh b/eessi_container.sh index 48c4653ba9..b58bddf309 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -46,6 +46,7 @@ SAVE_ERROR_EXITCODE=$((${ANY_ERROR_EXITCODE} << 8)) HTTP_PROXY_ERROR_EXITCODE=$((${ANY_ERROR_EXITCODE} << 9)) HTTPS_PROXY_ERROR_EXITCODE=$((${ANY_ERROR_EXITCODE} << 10)) RUN_SCRIPT_MISSING_EXITCODE=$((${ANY_ERROR_EXITCODE} << 11)) +NVIDIA_MODE_UNKNOWN_EXITCODE=$((${ANY_ERROR_EXITCODE} << 12)) # CernVM-FS settings CVMFS_VAR_LIB="var-lib-cvmfs" @@ -72,12 +73,17 @@ display_help() { echo " -a | --access {ro,rw} - ro (read-only), rw (read & write) [default: ro]" echo " -c | --container IMG - image file or URL defining the container to use" echo " [default: docker://ghcr.io/eessi/build-node:debian11]" - echo " -h | --help - display this usage information [default: false]" echo " -g | --storage DIR - directory space on host machine (used for" echo " temporary data) [default: 1. TMPDIR, 2. /tmp]" + echo " -h | --help - display this usage information [default: false]" + echo " -i | --host-injections - directory to link to for host_injections " + echo " [default: /..storage../opt-eessi]" echo " -l | --list-repos - list available repository identifiers [default: false]" echo " -m | --mode MODE - with MODE==shell (launch interactive shell) or" echo " MODE==run (run a script or command) [default: shell]" + echo " -n | --nvidia MODE - configure the container to work with NVIDIA GPUs," + echo " MODE==install for a CUDA installation, MODE==run to" + echo " attach a GPU, MODE==all for both [default: false]" echo " -r | --repository CFG - configuration file or identifier defining the" echo " repository to use [default: EESSI-pilot via" echo " default container, see --container]" @@ -111,6 +117,8 @@ VERBOSE=0 STORAGE= LIST_REPOS=0 MODE="shell" +SETUP_NVIDIA=0 +ADDITIONAL_SINGULARITY_FLAGS= REPOSITORY="EESSI-pilot" RESUME= SAVE= @@ -141,6 +149,10 @@ while [[ $# -gt 0 ]]; do display_help exit 0 ;; + -i|--host-injections) + USER_HOST_INJECTIONS="$2" + shift 2 + ;; -l|--list-repos) LIST_REPOS=1 shift 1 @@ -149,6 +161,11 @@ while [[ $# -gt 0 ]]; do MODE="$2" shift 2 ;; + -n|--nvidia) + SETUP_NVIDIA=1 + NVIDIA_MODE="$2" + shift 2 + ;; -r|--repository) REPOSITORY="$2" shift 2 @@ -224,6 +241,13 @@ if [[ "${MODE}" != "shell" && "${MODE}" != "run" ]]; then fatal_error "unknown execution mode '${MODE}'" "${MODE_UNKNOWN_EXITCODE}" fi +# Also validate the NVIDIA GPU mode (if present) +if [[ ${SETUP_NVIDIA} -eq 1 ]]; then + if [[ "${NVIDIA_MODE}" != "run" && "${NVIDIA_MODE}" != "install" && "${NVIDIA_MODE}" != "all" ]]; then + fatal_error "unknown NVIDIA mode '${NVIDIA_MODE}'" "${NVIDIA_MODE_UNKNOWN_EXITCODE}" + fi +fi + # TODO (arg -r|--repository) check if repository is known # REPOSITORY_ERROR_EXITCODE if [[ ! -z "${REPOSITORY}" && "${REPOSITORY}" != "EESSI-pilot" && ! -r ${EESSI_REPOS_CFG_FILE} ]]; then @@ -294,6 +318,7 @@ else echo "Using ${EESSI_HOST_STORAGE} as tmp directory (to resume session add '--resume ${EESSI_HOST_STORAGE}')." fi + # if ${RESUME} is a file (assume a tgz), unpack it into ${EESSI_HOST_STORAGE} if [[ ! -z ${RESUME} && -f ${RESUME} ]]; then tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} @@ -310,12 +335,25 @@ fi # |-overlay-work # |-home # |-repos_cfg +# |-opt-eessi (unless otherwise specificed for host_injections) # tmp dir for EESSI EESSI_TMPDIR=${EESSI_HOST_STORAGE} mkdir -p ${EESSI_TMPDIR} [[ ${VERBOSE} -eq 1 ]] && echo "EESSI_TMPDIR=${EESSI_TMPDIR}" +# Set host_injections directory and ensure it is a writable directory (if user provided) +if [ -z ${USER_HOST_INJECTIONS+x} ]; then + # Not set, so use our default + HOST_INJECTIONS=${EESSI_TMPDIR}/opt-eessi + mkdir -p $HOST_INJECTIONS +else + # Make sure the host_injections directory specified exists and is a folder + mkdir -p ${USER_HOST_INJECTIONS} || fatal_error "host_injections directory ${USER_HOST_INJECTIONS} is either not a directory or cannot be created" + HOST_INJECTIONS=${USER_HOST_INJECTIONS} +fi +[[ ${VERBOSE} -eq 1 ]] && echo "HOST_INJECTIONS=${HOST_INJECTIONS}" + # configure Singularity: if SINGULARITY_CACHEDIR is already defined, use that # a global SINGULARITY_CACHEDIR would ensure that we don't consume # storage space again and again for the container & also speed-up @@ -394,12 +432,30 @@ fi [[ ${VERBOSE} -eq 1 ]] && echo "SINGULARITY_HOME=${SINGULARITY_HOME}" # define paths to add to SINGULARITY_BIND (added later when all BIND mounts are defined) -BIND_PATHS="${EESSI_CVMFS_VAR_LIB}:/var/lib/cvmfs,${EESSI_CVMFS_VAR_RUN}:/var/run/cvmfs" +BIND_PATHS="${EESSI_CVMFS_VAR_LIB}:/var/lib/cvmfs,${EESSI_CVMFS_VAR_RUN}:/var/run/cvmfs,${HOST_INJECTIONS}:/opt/eessi" # provide a '/tmp' inside the container BIND_PATHS="${BIND_PATHS},${EESSI_TMPDIR}:${TMP_IN_CONTAINER}" [[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}" +# Configure anything we need for NVIDIA GPUs and CUDA installation +if [[ ${SETUP_NVIDIA} -eq 1 ]]; then + if [[ "${NVIDIA_MODE}" == "run" || "${NVIDIA_MODE}" == "all" ]]; then + # Give singularity the appropriate flag + ADDITIONAL_SINGULARITY_FLAGS="--nv ${ADDITIONAL_SINGULARITY_FLAGS}" + [[ ${VERBOSE} -eq 1 ]] && echo "ADDITIONAL_SINGULARITY_FLAGS=${ADDITIONAL_SINGULARITY_FLAGS}" + fi + if [[ "${NVIDIA_MODE}" == "install" || "${NVIDIA_MODE}" == "all" ]]; then + # Add additional bind mounts to allow CUDA to install within a container + EESSI_VAR_LOG=${EESSI_TMPDIR}/var-log + EESSI_USR_LOCAL_CUDA=${EESSI_TMPDIR}/usr-local-cuda + mkdir -p ${EESSI_VAR_LOG} + mkdir -p ${EESSI_USR_LOCAL_CUDA} + BIND_PATHS="${BIND_PATHS},${EESSI_VAR_LOG}:/var/log,${EESSI_USR_LOCAL_CUDA}:/usr/local/cuda" + [[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}" + fi +fi + # set up repository config (always create directory repos_cfg and populate it with info when # arg -r|--repository is used) mkdir -p ${EESSI_TMPDIR}/repos_cfg @@ -558,8 +614,8 @@ if [ ! -z ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} ]; then fi echo "Launching container with command (next line):" -echo "singularity ${RUN_QUIET} ${MODE} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@" -singularity ${RUN_QUIET} ${MODE} "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@" +echo "singularity ${RUN_QUIET} ${MODE} ${ADDITIONAL_SINGULARITY_FLAGS} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@" +singularity ${RUN_QUIET} ${MODE} ${ADDITIONAL_SINGULARITY_FLAGS} "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@" exit_code=$? # 6. save tmp if requested (arg -s|--save) From a2ab66466d882b9724eb998bf92c136fe514150b Mon Sep 17 00:00:00 2001 From: lara Date: Tue, 7 Nov 2023 11:59:04 +0100 Subject: [PATCH 31/59] update easystack --- eessi-2023.06-eb-4.8.1-2021b.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eessi-2023.06-eb-4.8.1-2021b.yml b/eessi-2023.06-eb-4.8.1-2021b.yml index 30035fa837..478899f672 100644 --- a/eessi-2023.06-eb-4.8.1-2021b.yml +++ b/eessi-2023.06-eb-4.8.1-2021b.yml @@ -9,7 +9,11 @@ easyconfigs: # the --enable-asmjit is not supported on Aarch64 options: from-pr: 19110 + - ScaFaCoS-1.0.4-foss-2021b.eb: + # Newer version of ScaFaCoS for LAMMPS + options: + from-pr: 19163 - LAMMPS-23Jun2022-foss-2021b-kokkos.eb: # TBB is an optional dependency when building on Intel arch options: - from-pr: 19000 + from-pr: 19164 From 3f5ef9acbef5d3a8f11d15dd27d5b36fc6ff26aa Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Thu, 9 Nov 2023 15:48:54 +0100 Subject: [PATCH 32/59] don't filter Lua dependency --- configure_easybuild | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/configure_easybuild b/configure_easybuild index 7dca1ce682..c67b879cf3 100644 --- a/configure_easybuild +++ b/configure_easybuild @@ -25,8 +25,7 @@ fi # note: filtering Bison may break some installations, like Qt5 (see https://github.com/EESSI/software-layer/issues/49) # filtering pkg-config breaks R-bundle-Bioconductor installation (see also https://github.com/easybuilders/easybuild-easyconfigs/pull/11104) -# problems occur when filtering pkg-config with gnuplot too (picks up Lua 5.1 from $EPREFIX rather than from Lua 5.3 dependency) -DEPS_TO_FILTER=Autoconf,Automake,Autotools,binutils,bzip2,DBus,flex,gettext,gperf,help2man,intltool,libreadline,libtool,Lua,M4,makeinfo,ncurses,util-linux,XZ,zlib +DEPS_TO_FILTER=Autoconf,Automake,Autotools,binutils,bzip2,DBus,flex,gettext,gperf,help2man,intltool,libreadline,libtool,M4,makeinfo,ncurses,util-linux,XZ,zlib # For aarch64 we need to also filter out Yasm. # See https://github.com/easybuilders/easybuild-easyconfigs/issues/11190 if [[ "$EESSI_CPU_FAMILY" == "aarch64" ]]; then From fd6ac5aa5cb7ad0e5538ebf70335c1c35741e090 Mon Sep 17 00:00:00 2001 From: lara Date: Fri, 10 Nov 2023 14:52:14 +0100 Subject: [PATCH 33/59] update easystack --- eessi-2023.06-eb-4.8.1-2021b.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/eessi-2023.06-eb-4.8.1-2021b.yml b/eessi-2023.06-eb-4.8.1-2021b.yml index 478899f672..d8a7caff2e 100644 --- a/eessi-2023.06-eb-4.8.1-2021b.yml +++ b/eessi-2023.06-eb-4.8.1-2021b.yml @@ -9,11 +9,7 @@ easyconfigs: # the --enable-asmjit is not supported on Aarch64 options: from-pr: 19110 - - ScaFaCoS-1.0.4-foss-2021b.eb: - # Newer version of ScaFaCoS for LAMMPS - options: - from-pr: 19163 - LAMMPS-23Jun2022-foss-2021b-kokkos.eb: - # TBB is an optional dependency when building on Intel arch + # TBB and ScaFaCos are optional dependencies when building on Intel arch options: from-pr: 19164 From b03340ac61e2e1d168efe0b56ece7122c06c16a6 Mon Sep 17 00:00:00 2001 From: TopRichard Date: Mon, 13 Nov 2023 16:57:32 +0000 Subject: [PATCH 34/59] removed Pillow hook --- eb_hooks.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index ede8401760..c592fcd6ec 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -54,25 +54,12 @@ def get_rpath_override_dirs(software_name): return rpath_injection_dirs -def set_Pillow_envvars(ec): - """Get an EESSI_CPATH environment variable from the environment""" - EESSI_CPATH = os.getenv('EESSI_EPREFIX') + '/usr/include' - EESSI_LIB_PATH = os.getenv('EESSI_EPREFIX') + '/usr/lib64' - if ec.name == 'Pillow': - os.environ['CPATH'] = os.pathsep + EESSI_CPATH - os.environ['LIBRARY_PATH'] = os.pathsep + EESSI_LIB_PATH - print_msg("NOTE: For Pillow which has Szip as a dependancy, CPATH has been set to %s", os.getenv('CPATH')) - print_msg("NOTE: For Pillow which has Szip as a dependancy, LIBRARY_PATH has been set to %s", os.getenv('LIBRARY_PATH')) - ec.log.info("NOTE: For Pillow which has Szip as a dependancy, CPATH has been set to %s", os.getenv('CPATH')) - ec.log.info("NOTE: For Pillow which has Szip as a dependancy, LIBRARY_PATH has been set to %s", os.getenv('LIBRARY_PATH')) - def parse_hook(ec, *args, **kwargs): """Main parse hook: trigger custom functions based on software name.""" # determine path to Prefix installation in compat layer via $EPREFIX eprefix = get_eessi_envvar('EPREFIX') - set_Pillow_envvars(ec) if ec.name in PARSE_HOOKS: PARSE_HOOKS[ec.name](ec, eprefix) From 09c97bd2b96f1628461f1784d2aaa813d22d9145 Mon Sep 17 00:00:00 2001 From: TopRichard Date: Mon, 13 Nov 2023 16:59:11 +0000 Subject: [PATCH 35/59] removed the hook comment --- eessi-2023.06-eb-4.8.1-2022a.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/eessi-2023.06-eb-4.8.1-2022a.yml b/eessi-2023.06-eb-4.8.1-2022a.yml index 0aaa66a4ab..b6504915ca 100644 --- a/eessi-2023.06-eb-4.8.1-2022a.yml +++ b/eessi-2023.06-eb-4.8.1-2022a.yml @@ -12,7 +12,6 @@ easyconfigs: from-pr: 18963 - Pillow-9.1.1-GCCcore-11.3.0.eb: # avoid that hardcoded paths like /usr/include are used in build commands - # Uses a hook to modify the hardcoded LIBRARY and Header paths. options: from-pr: 19266 - matplotlib-3.5.2-foss-2022a.eb From 9b150a73de58592050383adfc7c73b90c2797c69 Mon Sep 17 00:00:00 2001 From: TopRichard Date: Mon, 13 Nov 2023 18:28:17 +0000 Subject: [PATCH 36/59] added the eb file in 4.8.2 yml file --- eessi-2023.06-eb-4.8.1-2022a.yml | 5 ----- eessi-2023.06-eb-4.8.2-2022a.yml | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/eessi-2023.06-eb-4.8.1-2022a.yml b/eessi-2023.06-eb-4.8.1-2022a.yml index b6504915ca..8317a40b7a 100644 --- a/eessi-2023.06-eb-4.8.1-2022a.yml +++ b/eessi-2023.06-eb-4.8.1-2022a.yml @@ -10,8 +10,3 @@ easyconfigs: - ESPResSo-4.2.1-foss-2022a: options: from-pr: 18963 - - Pillow-9.1.1-GCCcore-11.3.0.eb: - # avoid that hardcoded paths like /usr/include are used in build commands - options: - from-pr: 19266 - - matplotlib-3.5.2-foss-2022a.eb diff --git a/eessi-2023.06-eb-4.8.2-2022a.yml b/eessi-2023.06-eb-4.8.2-2022a.yml index bab796db2b..81fa757cd7 100644 --- a/eessi-2023.06-eb-4.8.2-2022a.yml +++ b/eessi-2023.06-eb-4.8.2-2022a.yml @@ -35,3 +35,8 @@ easyconfigs: - WSClean-3.4-foss-2022a: options: from-pr: 19119 + - Pillow-9.1.1-GCCcore-11.3.0.eb: + # avoid that hardcoded paths like /usr/include are used in build commands + options: + from-pr: 19266 + - matplotlib-3.5.2-foss-2022a.eb From b396a56257d42a4a365a97f9e3bcc680404c3fec Mon Sep 17 00:00:00 2001 From: TopRichard Date: Tue, 14 Nov 2023 09:06:56 +0000 Subject: [PATCH 37/59] reference to PR#19226 instead --- eessi-2023.06-eb-4.8.2-2022a.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi-2023.06-eb-4.8.2-2022a.yml b/eessi-2023.06-eb-4.8.2-2022a.yml index 81fa757cd7..6406064513 100644 --- a/eessi-2023.06-eb-4.8.2-2022a.yml +++ b/eessi-2023.06-eb-4.8.2-2022a.yml @@ -38,5 +38,5 @@ easyconfigs: - Pillow-9.1.1-GCCcore-11.3.0.eb: # avoid that hardcoded paths like /usr/include are used in build commands options: - from-pr: 19266 + from-pr: 19226 - matplotlib-3.5.2-foss-2022a.eb From c2e26c159230aa7e06851cd9099368785e189075 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bob=20Dr=C3=B6ge?= Date: Wed, 15 Nov 2023 09:33:11 +0100 Subject: [PATCH 38/59] restore blank line --- eb_hooks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eb_hooks.py b/eb_hooks.py index c592fcd6ec..31f2b9588d 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -60,6 +60,7 @@ def parse_hook(ec, *args, **kwargs): # determine path to Prefix installation in compat layer via $EPREFIX eprefix = get_eessi_envvar('EPREFIX') + if ec.name in PARSE_HOOKS: PARSE_HOOKS[ec.name](ec, eprefix) From da82af74a0f5fd700842cda0f847038ed9469c5e Mon Sep 17 00:00:00 2001 From: lara Date: Wed, 15 Nov 2023 09:49:29 +0100 Subject: [PATCH 39/59] change kokkos mapping for Aarch64" --- eb_hooks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index a2890da75e..dfd91ab18b 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -19,6 +19,7 @@ CPU_TARGET_NEOVERSE_V1 = 'aarch64/neoverse_v1' +CPU_TARGET_AARCH64_GENERIC = 'aarch64/generic' EESSI_RPATH_OVERRIDE_ATTR = 'orig_rpath_override_dirs' @@ -277,7 +278,10 @@ def pre_configure_hook_LAMMPS_aarch64(self, *args, **kwargs): if self.name == 'LAMMPS': if self.version == '23Jun2022': if get_cpu_architecture() == AARCH64: - self.cfg['kokkos_arch'] = 'A64FX' + if cpu_target == CPU_TARGET_AARCH64_GENERIC: + self.cfg['kokkos_arch'] = 'ARM81' + else: + self.cfg['kokkos_arch'] = 'ARM80' else: raise EasyBuildError("LAMMPS-specific hook triggered for non-LAMMPS easyconfig?!") From 151c4924068fe3dd841e3d1380cd2379a50b5d8b Mon Sep 17 00:00:00 2001 From: lara Date: Wed, 15 Nov 2023 09:52:48 +0100 Subject: [PATCH 40/59] update kokkos mapping --- eb_hooks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index dfd91ab18b..1d01372912 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -279,9 +279,9 @@ def pre_configure_hook_LAMMPS_aarch64(self, *args, **kwargs): if self.version == '23Jun2022': if get_cpu_architecture() == AARCH64: if cpu_target == CPU_TARGET_AARCH64_GENERIC: - self.cfg['kokkos_arch'] = 'ARM81' - else: self.cfg['kokkos_arch'] = 'ARM80' + else: + self.cfg['kokkos_arch'] = 'ARM81' else: raise EasyBuildError("LAMMPS-specific hook triggered for non-LAMMPS easyconfig?!") From ed22feaf78ae50d98dc675f17a3597a8622fc1d1 Mon Sep 17 00:00:00 2001 From: lara Date: Wed, 15 Nov 2023 11:25:57 +0100 Subject: [PATCH 41/59] update hook --- eb_hooks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eb_hooks.py b/eb_hooks.py index 1d01372912..8d05523c2b 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -275,6 +275,7 @@ def pre_configure_hook_LAMMPS_aarch64(self, *args, **kwargs): - set kokkos_arch on Aarch64 """ + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') if self.name == 'LAMMPS': if self.version == '23Jun2022': if get_cpu_architecture() == AARCH64: From 6d138dd2390a8fc9efd843f96c3210640e1535b4 Mon Sep 17 00:00:00 2001 From: lara Date: Thu, 16 Nov 2023 17:14:10 +0100 Subject: [PATCH 42/59] add fix for sanity check --- eessi-2023.06-eb-4.8.1-2021b.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi-2023.06-eb-4.8.1-2021b.yml b/eessi-2023.06-eb-4.8.1-2021b.yml index d8a7caff2e..6ee44926ba 100644 --- a/eessi-2023.06-eb-4.8.1-2021b.yml +++ b/eessi-2023.06-eb-4.8.1-2021b.yml @@ -12,4 +12,4 @@ easyconfigs: - LAMMPS-23Jun2022-foss-2021b-kokkos.eb: # TBB and ScaFaCos are optional dependencies when building on Intel arch options: - from-pr: 19164 + from-pr: 19246 From c558a45966cec53d86e4f5a7f5331451c2a19e04 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Sat, 18 Nov 2023 10:23:21 +0100 Subject: [PATCH 43/59] add gnuplot explicitly (dep for OpenFOAM), to trigger re-install after removing Lua from filtered dependencies in EasyBuild configuration --- eessi-2023.06-eb-4.8.0-2021b.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/eessi-2023.06-eb-4.8.0-2021b.yml b/eessi-2023.06-eb-4.8.0-2021b.yml index 477ba6320c..9524dd01df 100644 --- a/eessi-2023.06-eb-4.8.0-2021b.yml +++ b/eessi-2023.06-eb-4.8.0-2021b.yml @@ -4,4 +4,5 @@ easyconfigs: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/18746 options: from-pr: 18746 + - gnuplot-5.4.2-GCCcore-11.2.0.eb - OpenFOAM-v2112-foss-2021b.eb From 73097a32ea0956644adc209ba2097a69340f82e2 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Sat, 18 Nov 2023 11:25:21 +0100 Subject: [PATCH 44/59] remove filter-deps for AOFlagger, since Lua is no longer filtered in general EasyBuild configuration --- eessi-2023.06-eb-4.8.2-2022a.yml | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/eessi-2023.06-eb-4.8.2-2022a.yml b/eessi-2023.06-eb-4.8.2-2022a.yml index bab796db2b..9921bad045 100644 --- a/eessi-2023.06-eb-4.8.2-2022a.yml +++ b/eessi-2023.06-eb-4.8.2-2022a.yml @@ -5,27 +5,6 @@ easyconfigs: - AOFlagger-3.4.0-foss-2022a: options: from-pr: 19119 - # Exclude Lua from `filter-deps` as the compat layer version is too old for the software - filter-deps: - - Autoconf - - Automake - - Autotools - - binutils - - bzip2 - - DBus - - flex - - gettext - - gperf - - help2man - - intltool - - libreadline - - libtool - - ncurses - - M4 - - makeinfo - - util-linux - - XZ - - zlib - EveryBeam-0.5.2-foss-2022a: options: from-pr: 19119 From f1ab7f23fbc3bbfa0178c961d8b70248f60db3b3 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Sat, 18 Nov 2023 12:19:18 +0100 Subject: [PATCH 45/59] install gnuplot via easyconfig PR that fixing linking error to Lua dependency --- eessi-2023.06-eb-4.8.0-2021b.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eessi-2023.06-eb-4.8.0-2021b.yml b/eessi-2023.06-eb-4.8.0-2021b.yml index 9524dd01df..a2562a4ed1 100644 --- a/eessi-2023.06-eb-4.8.0-2021b.yml +++ b/eessi-2023.06-eb-4.8.0-2021b.yml @@ -4,5 +4,9 @@ easyconfigs: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/18746 options: from-pr: 18746 - - gnuplot-5.4.2-GCCcore-11.2.0.eb + - gnuplot-5.4.2-GCCcore-11.2.0.eb: + # make sure that Lua dependency is correctly picked up, + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19261 + options: + from-pr: 19261 - OpenFOAM-v2112-foss-2021b.eb From 7c679a03000d5542afde8cf1b34fa6db6335f1ec Mon Sep 17 00:00:00 2001 From: lara Date: Mon, 20 Nov 2023 11:22:24 +0100 Subject: [PATCH 46/59] Make Pillow sysroot aware --- eessi-2023.06-eb-4.8.2-2021b.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/eessi-2023.06-eb-4.8.2-2021b.yml b/eessi-2023.06-eb-4.8.2-2021b.yml index 746f8df05f..00c02adf91 100644 --- a/eessi-2023.06-eb-4.8.2-2021b.yml +++ b/eessi-2023.06-eb-4.8.2-2021b.yml @@ -1,2 +1,6 @@ easyconfigs: + - Pillow-8.3.2-GCCcore-11.2.0.eb: + # avoid that hardcoded paths like /usr/include are used in build commands + options: + from-pr: 19226 - matplotlib-3.4.3-foss-2021b.eb From 090351864cbc5b97f35498b278d16041a5b97b1e Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Thu, 30 Nov 2023 11:16:55 +0100 Subject: [PATCH 47/59] Address review and improve UI and error reporting --- eessi_container.sh | 8 +- .../nvidia/install_cuda_host_injections.sh | 132 ++++++++++++++++-- scripts/utils.sh | 24 ---- 3 files changed, 125 insertions(+), 39 deletions(-) diff --git a/eessi_container.sh b/eessi_container.sh index b58bddf309..e31808d546 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -30,8 +30,8 @@ # -. initial settings & exit codes TOPDIR=$(dirname $(realpath $0)) -source ${TOPDIR}/scripts/utils.sh -source ${TOPDIR}/scripts/cfg_files.sh +source "${TOPDIR}"/scripts/utils.sh +source "${TOPDIR}"/scripts/cfg_files.sh # exit codes: bitwise shift codes to allow for combination of exit codes # ANY_ERROR_EXITCODE is sourced from ${TOPDIR}/scripts/utils.sh @@ -447,6 +447,10 @@ if [[ ${SETUP_NVIDIA} -eq 1 ]]; then fi if [[ "${NVIDIA_MODE}" == "install" || "${NVIDIA_MODE}" == "all" ]]; then # Add additional bind mounts to allow CUDA to install within a container + # (Experience tells us that these are necessary, but we don't know _why_ + # as the CUDA installer is a black box. The suspicion is that the CUDA + # installer gets confused by the permissions on these directories when + # inside a container) EESSI_VAR_LOG=${EESSI_TMPDIR}/var-log EESSI_USR_LOCAL_CUDA=${EESSI_TMPDIR}/usr-local-cuda mkdir -p ${EESSI_VAR_LOG} diff --git a/gpu_support/nvidia/install_cuda_host_injections.sh b/gpu_support/nvidia/install_cuda_host_injections.sh index d6278ff3d7..2c59a891f8 100755 --- a/gpu_support/nvidia/install_cuda_host_injections.sh +++ b/gpu_support/nvidia/install_cuda_host_injections.sh @@ -1,16 +1,94 @@ #!/usr/bin/env bash +# This script can be used to install CUDA under the `.../host_injections` directory. +# This provides the parts of the CUDA installation that cannot be redistributed as +# part of EESSI due to license limitations. While GPU-based software from EESSI will +# _run_ without these, installation of additional CUDA software requires the CUDA +# installation(s) under `host_injections` to be present. +# +# The `host_injections` directory is a variant symlink that by default points to +# `/opt/eessi`, unless otherwise defined in the local CVMFS configuration (see +# https://cvmfs.readthedocs.io/en/stable/cpt-repo.html#variant-symlinks). For the +# installation to be successful, this directory needs to be writeable by the user +# executing this script. + # Initialise our bash functions TOPDIR=$(dirname $(realpath $BASH_SOURCE)) source "$TOPDIR"/../../scripts/utils.sh +# Function to display help message +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " --help Display this help message" + echo " -c, --cuda-version CUDA_VERSION Specify a version o CUDA to install (must" + echo " have a corresponding easyconfig in the" + echo " EasyBuild release)" + echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" + echo " storage during the CUDA install" + echo " (must have >10GB available)" +} + +# Initialize variables +install_cuda_version="" + +# Parse command-line options +while [[ $# -gt 0 ]]; do + case "$1" in + --help) + show_help + exit 0 + ;; + -c|--cuda-version) + if [ -n "$2" ]; then + install_cuda_version="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + -t|--temp-dir) + if [ -n "$2" ]; then + CUDA_TEMP_DIR="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + *) + show_help + fatal_error "Error: Unknown option: $1" + ;; + esac +done + +# Make sure the CUDA version supplied is a semantic version +is_semantic_version() { + local version=$1 + local regex='^[0-9]+\.[0-9]+\.[0-9]+$' + + if [[ $version =~ $regex ]]; then + return 0 # Return success (0) if it's a semantic version + else + return 1 # Return failure (1) if it's not a semantic version + fi +} +if ! is_semantic_version "$install_cuda_version"; then + show_help + error="\nYou must provide a semantic version for CUDA (e.g., 12.1.1) via the appropriate\n" + error="${error}command line option. This script is intended for use with EESSI so the 'correct'\n" + error="${error}version to provide is probably the one that is available under\n" + error="${error}$EESSI_SOFTWARE_PATH/software/CUDA\n" + fatal_error "${error}" +fi + # Make sure EESSI is initialised check_eessi_initialised -if [[ $# -eq 0 ]] ; then - fatal_error "You must provide the CUDA version as an argument, e.g.:\n $0 11.3.1" -fi -install_cuda_version=$1 if [[ -z "${EESSI_SOFTWARE_PATH}" ]]; then fatal_error "This script cannot be used without having first defined EESSI_SOFTWARE_PATH" else @@ -20,12 +98,9 @@ else fi # Only install CUDA if specified version is not found. -# This is only relevant for users, the shipped CUDA installation will -# always be in versions instead of host_injections and have symlinks pointing -# to host_injections for everything we're not allowed to ship # (existence of easybuild subdir implies a successful install) if [ -d "${cuda_install_parent}"/software/CUDA/"${install_cuda_version}"/easybuild ]; then - echo_green "CUDA software found! No need to install CUDA again, proceed with testing." + echo_green "CUDA software found! No need to install CUDA again." else # We need to be able write to the installation space so let's make sure we can if ! create_directory_structure "${cuda_install_parent}"/software/CUDA ; then @@ -68,19 +143,50 @@ else fatal_error "${error}" fi - if [[ -z "${EBROOTEASYBUILD}" ]]; then - echo_yellow "Loading EasyBuild module to do actual install" + if ! command -v "eb" &>/dev/null; then + echo_yellow "Attempting to load an EasyBuild module to do actual install" module load EasyBuild + # There are some scenarios where this may fail + if [ $? -ne 0 ]; then + error="'eb' command not found in your environment and\n" + error="${error} module load EasyBuild\n" + error="${error}failed for some reason.\n" + error="${error}Please re-run this script with the 'eb' command available." + fatal_error "${error}" + fi + fi + + cuda_easyconfig="CUDA-${install_cuda_version}.eb" + + # Check the easyconfig file is available in the release + # (eb search always returns 0, so we need a grep to ensure a usable exit code) + eb --search ^${cuda_easyconfig}|grep CUDA > /dev/null 2>&1 + # Check the exit code + if [ $? -ne 0 ]; then + eb_version=$(eb --version) + available_cuda_easyconfigs=$(eb --search ^CUDA-*.eb|grep CUDA) + + error="The easyconfig ${cuda_easyconfig} was not found in EasyBuild version:\n" + error="${error} ${eb_version}\n" + error="${error}You either need to give a different version of CUDA to install _or_ \n" + error="${error}use a different version of EasyBuild for the installation.\n" + error="${error}\nThe versions of available with the current eb command are:\n" + error="${error}${available_cuda_easyconfigs}" + fatal_error "${error}" fi - # we need the --rebuild option and a (random) dir for the module since we are - # fixing the broken links of the EESSI-shipped installation + # We need the --rebuild option, as the CUDA module may or may not be on the + # `MODULEPATH` yet. Even if it is, we still want to redo this installation + # since it will provide the symlinked targets for the parts of the CUDA + # installation in the `.../versions/...` prefix + # We install the module in our `tmpdir` since we do not need the modulefile, + # we only care about providing the targets for the symlinks. extra_args="--rebuild --installpath-modules=${tmpdir}" # We don't want hooks used in this install, we need a vanilla CUDA installation touch "$tmpdir"/none.py # shellcheck disable=SC2086 # Intended splitting of extra_args - eb --prefix="$tmpdir" ${extra_args} --hooks="$tmpdir"/none.py --installpath="${cuda_install_parent}"/ CUDA-"${install_cuda_version}".eb + eb --prefix="$tmpdir" ${extra_args} --hooks="$tmpdir"/none.py --installpath="${cuda_install_parent}"/ "${cuda_easyconfig}" ret=$? if [ $ret -ne 0 ]; then fatal_error "CUDA installation failed, please check EasyBuild logs..." diff --git a/scripts/utils.sh b/scripts/utils.sh index 07760f0dd0..b2be3f6221 100644 --- a/scripts/utils.sh +++ b/scripts/utils.sh @@ -40,30 +40,6 @@ function check_eessi_initialised() { fi } -function float_greater_than() { - # Make sure we have two arguments - if [ $# -ne 2 ]; then - echo_red "greater_than_float requires two (float) numbers" >&2 - return $ANY_ERROR_EXITCODE - fi - # Make sure the arguments are numbers - if [[ ! $1 =~ ^[+-]?[0-9]+\.?[0-9]*$ ]]; then - echo_yellow "Input to float_greater_than is not a float, ignoring" - return $ANY_ERROR_EXITCODE - fi - if [[ ! $2 =~ ^[+-]?[0-9]+\.?[0-9]*$ ]]; then - echo_yellow "Input to float_greater_than is not a float, ignoring" - return $ANY_ERROR_EXITCODE - fi - # Now do the actual evaluation - return_code=$ANY_ERROR_EXITCODE - result=$(echo "$1" "$2" | awk '{if ($1 > $2) print "true"}') - if [ "$result" = true ] ; then - return_code=0 - fi - return $return_code -} - function check_in_prefix_shell() { # Make sure EPREFIX is defined if [[ -z "${EPREFIX}" ]]; then From 12719eccaabf57112332cc4a4958d0d03e382ef4 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Thu, 30 Nov 2023 12:56:11 +0100 Subject: [PATCH 48/59] Make sure users are forced to accept CUDA EULA --- .../nvidia/install_cuda_host_injections.sh | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/gpu_support/nvidia/install_cuda_host_injections.sh b/gpu_support/nvidia/install_cuda_host_injections.sh index 2c59a891f8..79af986a0a 100755 --- a/gpu_support/nvidia/install_cuda_host_injections.sh +++ b/gpu_support/nvidia/install_cuda_host_injections.sh @@ -21,6 +21,9 @@ show_help() { echo "Usage: $0 [OPTIONS]" echo "Options:" echo " --help Display this help message" + echo " --accept-cuda-eula You _must_ accept the CUDA EULA to install" + echo " CUDA, see the EULA at" + echo " https://docs.nvidia.com/cuda/eula/index.html" echo " -c, --cuda-version CUDA_VERSION Specify a version o CUDA to install (must" echo " have a corresponding easyconfig in the" echo " EasyBuild release)" @@ -31,6 +34,7 @@ show_help() { # Initialize variables install_cuda_version="" +eula_accepted=0 # Parse command-line options while [[ $# -gt 0 ]]; do @@ -49,6 +53,10 @@ while [[ $# -gt 0 ]]; do exit 1 fi ;; + --accept-cuda-eula) + eula_accepted=1 + shift 1 + ;; -t|--temp-dir) if [ -n "$2" ]; then CUDA_TEMP_DIR="$2" @@ -86,6 +94,13 @@ if ! is_semantic_version "$install_cuda_version"; then fatal_error "${error}" fi +# Make sure they have accepted the CUDA EULA +if [ "$eula_accepted" -ne 1 ]; then + show_help + error="\nYou _must_ accept the CUDA EULA via the appropriate command line option.\n" + fatal_error "${error}" +fi + # Make sure EESSI is initialised check_eessi_initialised @@ -186,7 +201,7 @@ else # We don't want hooks used in this install, we need a vanilla CUDA installation touch "$tmpdir"/none.py # shellcheck disable=SC2086 # Intended splitting of extra_args - eb --prefix="$tmpdir" ${extra_args} --hooks="$tmpdir"/none.py --installpath="${cuda_install_parent}"/ "${cuda_easyconfig}" + eb --prefix="$tmpdir" ${extra_args} --accept-eula-for=CUDA --hooks="$tmpdir"/none.py --installpath="${cuda_install_parent}"/ "${cuda_easyconfig}" ret=$? if [ $ret -ne 0 ]; then fatal_error "CUDA installation failed, please check EasyBuild logs..." From b005591069f055c4a5c206216279fd428878b887 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Thu, 30 Nov 2023 14:50:16 +0100 Subject: [PATCH 49/59] Add script to link in host NVIDIA drivers --- .../nvidia/link_nvidia_host_libraries.sh | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100755 gpu_support/nvidia/link_nvidia_host_libraries.sh diff --git a/gpu_support/nvidia/link_nvidia_host_libraries.sh b/gpu_support/nvidia/link_nvidia_host_libraries.sh new file mode 100755 index 0000000000..eb713fce1b --- /dev/null +++ b/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -0,0 +1,128 @@ +#!/bin/bash + +# This script links host libraries related to GPU drivers to a location where +# they can be found by the EESSI linker + +# Initialise our bash functions +TOPDIR=$(dirname $(realpath $BASH_SOURCE)) +source "$TOPDIR"/../../scripts/utils.sh + +# We rely on ldconfig to give us the location of the libraries on the host +command_name="ldconfig" +# We cannot use a version of ldconfig that's being shipped under CVMFS +exclude_prefix="/cvmfs" + +found_paths=() +# Always attempt to use /sbin/ldconfig +if [ -x "/sbin/$command_name" ]; then + found_paths+=("$dir/$command_name") +fi +IFS=':' read -ra path_dirs <<< "$PATH" +for dir in "${path_dirs[@]}"; do + if [[ ! "$dir" =~ ^$exclude_prefix ]]; then + if [ -x "$dir/$command_name" ]; then + found_paths+=("$dir/$command_name") + fi + fi +done + +if [ ${#found_paths[@]} -gt 0 ]; then + echo "Found $command_name in the following locations:" + printf -- "- %s\n" "${found_paths[@]}" + echo "Using first version" + host_ldconfig=${found_paths[0]} +else + error="$command_name not found in PATH or only found in paths starting with $exclude_prefix." + fatal_error $error +fi + +# Make sure EESSI is initialised (doesn't matter what version) +check_eessi_initialised + +# Find the CUDA version of the host CUDA drivers +# (making sure that this can still work inside prefix environment inside a container) +nvidia_smi_command="LD_LIBRARY_PATH=/.singularity/libs:$LD_LIBRARY_PATH nvidia-smi --query-gpu=driver_version --format=csv,noheader" +if $nvidia_smi_command; then + host_cuda_version=$($nvidia_smi_command | tail -n1) +else + error="Failed to successfully execute\n $nvidia_smi_command\n" + fatal_error $error +fi + +# Let's make sure the driver libraries are not already in place +link_drivers=1 + +host_injections_nvidia_dir="/cvmfs/pilot.eessi-hpc.org/host_injections/${EESSI_CPU_FAMILY}/nvidia" +host_injection_driver_dir="${host_injections_nvidia_dir}/host" +host_injection_driver_version_file="$host_injection_driver_dir/version.txt" +if [ -e "$host_injection_driver_version_file" ]; then + if grep -q "$host_cuda_version" "$host_injection_driver_version_file"; then + echo_green "The host CUDA driver libraries have already been linked!" + link_drivers=0 + else + # There's something there but it is out of date + echo_yellow "Cleaning out outdated symlinks" + rm $host_injection_driver_dir/* + if [ $? -ne 0 ]; then + error="Unable to remove files under '$host_injection_driver_dir'." + fatal_error $error + fi + fi +fi + +drivers_linked=0 +if [ "$link_drivers" -eq 1 ]; then + if ! create_directory_structure "${host_injection_driver_dir}" ; then + fatal_error "No write permissions to directory ${host_injection_driver_dir}" + fi + cd ${host_injection_driver_dir} + # Need a small temporary space to hold a couple of files + temp_dir=$(mktemp -d) + + # Gather libraries on the host (_must_ be host ldconfig) + $host_ldconfig -p | awk '{print $NF}' > "$temp_dir"/libs.txt + # Allow for the fact that we may be in a container so the CUDA libs might be in there + ls /.singularity.d/libs/* >> "$temp_dir"/libs.txt 2>/dev/null + + # Leverage singularity to find the full list of libraries we should be linking to + curl -o "$temp_dir"/nvliblist.conf https://raw.githubusercontent.com/apptainer/apptainer/main/etc/nvliblist.conf + + # Make symlinks to all the interesting libraries + grep '.so$' "$temp_dir"/nvliblist.conf | xargs -i grep {} libs.txt | xargs -i ln -s {} + + # Inject CUDA version into dir + echo $host_cuda_version > version.txt + drivers_linked=1 + + # Remove the temporary directory when done + rm -r "$temp_dir" +fi + +# Make latest symlink for NVIDIA drivers +cd $host_injections_nvidia_dir +symlink="latest" +if [ -L "$symlink" ]; then + # Unless the drivers have been installed, leave the symlink alone + if [ "$drivers_linked" -eq 1 ]; then + ln -sf host latest + fi +else + # No link exists yet + ln -s host latest +fi + +# Make sure the libraries can be found by the EESSI linker +host_injection_linker_dir=${EESSI_EPREFIX/versions/host_injections} +if [ -L "$host_injection_linker_dir/lib" ]; then + target_path=$(readlink -f "$host_injection_linker_dir/lib") + if [ "$target_path" != "$$host_injections_nvidia_dir/latest" ]; then + cd $host_injection_linker_dir + ln -sf $host_injections_nvidia_dir/latest lib + fi +else + create_directory_structure $host_injection_linker_dir + cd $host_injection_linker_dir + ln -s $host_injections_nvidia_dir/latest lib +fi + +echo_green "Host NVIDIA gpu drivers linked successfully for EESSI" \ No newline at end of file From 9d6e91de4c0536b3552fdee9d1bd9d631a9a8be7 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Thu, 30 Nov 2023 14:52:13 +0100 Subject: [PATCH 50/59] Typo --- gpu_support/nvidia/link_nvidia_host_libraries.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu_support/nvidia/link_nvidia_host_libraries.sh b/gpu_support/nvidia/link_nvidia_host_libraries.sh index eb713fce1b..eb68e8dd69 100755 --- a/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -15,7 +15,7 @@ exclude_prefix="/cvmfs" found_paths=() # Always attempt to use /sbin/ldconfig if [ -x "/sbin/$command_name" ]; then - found_paths+=("$dir/$command_name") + found_paths+=("/sbin/$command_name") fi IFS=':' read -ra path_dirs <<< "$PATH" for dir in "${path_dirs[@]}"; do From cb126492b53e7c77ce896883dd98c47cbf0cd61d Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Thu, 30 Nov 2023 14:54:46 +0100 Subject: [PATCH 51/59] Wrap the command exection because of the envvars --- gpu_support/nvidia/link_nvidia_host_libraries.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu_support/nvidia/link_nvidia_host_libraries.sh b/gpu_support/nvidia/link_nvidia_host_libraries.sh index eb68e8dd69..77796ac9c9 100755 --- a/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -42,7 +42,7 @@ check_eessi_initialised # Find the CUDA version of the host CUDA drivers # (making sure that this can still work inside prefix environment inside a container) nvidia_smi_command="LD_LIBRARY_PATH=/.singularity/libs:$LD_LIBRARY_PATH nvidia-smi --query-gpu=driver_version --format=csv,noheader" -if $nvidia_smi_command; then +if $($nvidia_smi_command); then host_cuda_version=$($nvidia_smi_command | tail -n1) else error="Failed to successfully execute\n $nvidia_smi_command\n" From f9268e0fd80f726ba6ddf37727d821d1dc231d9c Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Thu, 30 Nov 2023 14:57:07 +0100 Subject: [PATCH 52/59] Move envvar setting outside of command --- gpu_support/nvidia/link_nvidia_host_libraries.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gpu_support/nvidia/link_nvidia_host_libraries.sh b/gpu_support/nvidia/link_nvidia_host_libraries.sh index 77796ac9c9..56c0b7a2ae 100755 --- a/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -41,8 +41,9 @@ check_eessi_initialised # Find the CUDA version of the host CUDA drivers # (making sure that this can still work inside prefix environment inside a container) -nvidia_smi_command="LD_LIBRARY_PATH=/.singularity/libs:$LD_LIBRARY_PATH nvidia-smi --query-gpu=driver_version --format=csv,noheader" -if $($nvidia_smi_command); then +LD_LIBRARY_PATH=/.singularity/libs:$LD_LIBRARY_PATH +nvidia_smi_command="nvidia-smi --query-gpu=driver_version --format=csv,noheader" +if $nvidia_smi_command; then host_cuda_version=$($nvidia_smi_command | tail -n1) else error="Failed to successfully execute\n $nvidia_smi_command\n" From a06f541445b5f9d5a76d659fc07c03d17003e930 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Thu, 30 Nov 2023 14:58:38 +0100 Subject: [PATCH 53/59] Forgot to add temp location --- gpu_support/nvidia/link_nvidia_host_libraries.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu_support/nvidia/link_nvidia_host_libraries.sh b/gpu_support/nvidia/link_nvidia_host_libraries.sh index 56c0b7a2ae..a15c1e98d4 100755 --- a/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -89,7 +89,7 @@ if [ "$link_drivers" -eq 1 ]; then curl -o "$temp_dir"/nvliblist.conf https://raw.githubusercontent.com/apptainer/apptainer/main/etc/nvliblist.conf # Make symlinks to all the interesting libraries - grep '.so$' "$temp_dir"/nvliblist.conf | xargs -i grep {} libs.txt | xargs -i ln -s {} + grep '.so$' "$temp_dir"/nvliblist.conf | xargs -i grep {} "$temp_dir"/libs.txt | xargs -i ln -s {} # Inject CUDA version into dir echo $host_cuda_version > version.txt From 600d4b46880d853e62428f94b7ce8912604b29ba Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Thu, 30 Nov 2023 15:01:12 +0100 Subject: [PATCH 54/59] Wrong location under host_injections --- gpu_support/nvidia/link_nvidia_host_libraries.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu_support/nvidia/link_nvidia_host_libraries.sh b/gpu_support/nvidia/link_nvidia_host_libraries.sh index a15c1e98d4..b4b8699628 100755 --- a/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -53,7 +53,7 @@ fi # Let's make sure the driver libraries are not already in place link_drivers=1 -host_injections_nvidia_dir="/cvmfs/pilot.eessi-hpc.org/host_injections/${EESSI_CPU_FAMILY}/nvidia" +host_injections_nvidia_dir="/cvmfs/pilot.eessi-hpc.org/host_injections/nvidia/${EESSI_CPU_FAMILY}" host_injection_driver_dir="${host_injections_nvidia_dir}/host" host_injection_driver_version_file="$host_injection_driver_dir/version.txt" if [ -e "$host_injection_driver_version_file" ]; then From b2664a3943115c75a85099936111e5ab231ebd2c Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Thu, 30 Nov 2023 15:17:13 +0100 Subject: [PATCH 55/59] Export LD_LIBRARY_PATH in script --- gpu_support/nvidia/link_nvidia_host_libraries.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu_support/nvidia/link_nvidia_host_libraries.sh b/gpu_support/nvidia/link_nvidia_host_libraries.sh index b4b8699628..c971e4e65e 100755 --- a/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -41,7 +41,7 @@ check_eessi_initialised # Find the CUDA version of the host CUDA drivers # (making sure that this can still work inside prefix environment inside a container) -LD_LIBRARY_PATH=/.singularity/libs:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=/.singularity.d/libs:$LD_LIBRARY_PATH nvidia_smi_command="nvidia-smi --query-gpu=driver_version --format=csv,noheader" if $nvidia_smi_command; then host_cuda_version=$($nvidia_smi_command | tail -n1) From 9854e79edbc5b7a3450aa1ee1484f8e58ad4575d Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Thu, 30 Nov 2023 15:29:19 +0100 Subject: [PATCH 56/59] Also export CUDA version to the links directory --- gpu_support/nvidia/link_nvidia_host_libraries.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/gpu_support/nvidia/link_nvidia_host_libraries.sh b/gpu_support/nvidia/link_nvidia_host_libraries.sh index c971e4e65e..ac826f9572 100755 --- a/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -43,8 +43,9 @@ check_eessi_initialised # (making sure that this can still work inside prefix environment inside a container) export LD_LIBRARY_PATH=/.singularity.d/libs:$LD_LIBRARY_PATH nvidia_smi_command="nvidia-smi --query-gpu=driver_version --format=csv,noheader" -if $nvidia_smi_command; then - host_cuda_version=$($nvidia_smi_command | tail -n1) +if $nvidia_smi_command > /dev/null; then + host_driver_version=$($nvidia_smi_command | tail -n1) + host_cuda_version=$(nvidia-smi -q --display=COMPUTE | grep CUDA | awk 'NF>1{print $NF}') else error="Failed to successfully execute\n $nvidia_smi_command\n" fatal_error $error @@ -57,7 +58,7 @@ host_injections_nvidia_dir="/cvmfs/pilot.eessi-hpc.org/host_injections/nvidia/${ host_injection_driver_dir="${host_injections_nvidia_dir}/host" host_injection_driver_version_file="$host_injection_driver_dir/version.txt" if [ -e "$host_injection_driver_version_file" ]; then - if grep -q "$host_cuda_version" "$host_injection_driver_version_file"; then + if grep -q "$host_driver_version" "$host_injection_driver_version_file"; then echo_green "The host CUDA driver libraries have already been linked!" link_drivers=0 else @@ -91,8 +92,9 @@ if [ "$link_drivers" -eq 1 ]; then # Make symlinks to all the interesting libraries grep '.so$' "$temp_dir"/nvliblist.conf | xargs -i grep {} "$temp_dir"/libs.txt | xargs -i ln -s {} - # Inject CUDA version into dir - echo $host_cuda_version > version.txt + # Inject driver and CUDA versions into dir + echo $host_driver_version > version.txt + echo $host_cuda_version > cuda_version.txt drivers_linked=1 # Remove the temporary directory when done From 59fee787c0b8a709c05fc8bba379f2cb5e2abcd7 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Thu, 30 Nov 2023 15:34:49 +0100 Subject: [PATCH 57/59] Also export CUDA version to the links directory --- gpu_support/nvidia/link_nvidia_host_libraries.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gpu_support/nvidia/link_nvidia_host_libraries.sh b/gpu_support/nvidia/link_nvidia_host_libraries.sh index ac826f9572..e71697c8d7 100755 --- a/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -45,6 +45,7 @@ export LD_LIBRARY_PATH=/.singularity.d/libs:$LD_LIBRARY_PATH nvidia_smi_command="nvidia-smi --query-gpu=driver_version --format=csv,noheader" if $nvidia_smi_command > /dev/null; then host_driver_version=$($nvidia_smi_command | tail -n1) + # If the first worked, this should work too host_cuda_version=$(nvidia-smi -q --display=COMPUTE | grep CUDA | awk 'NF>1{print $NF}') else error="Failed to successfully execute\n $nvidia_smi_command\n" @@ -56,7 +57,7 @@ link_drivers=1 host_injections_nvidia_dir="/cvmfs/pilot.eessi-hpc.org/host_injections/nvidia/${EESSI_CPU_FAMILY}" host_injection_driver_dir="${host_injections_nvidia_dir}/host" -host_injection_driver_version_file="$host_injection_driver_dir/version.txt" +host_injection_driver_version_file="$host_injection_driver_dir/driver_version.txt" if [ -e "$host_injection_driver_version_file" ]; then if grep -q "$host_driver_version" "$host_injection_driver_version_file"; then echo_green "The host CUDA driver libraries have already been linked!" @@ -93,7 +94,7 @@ if [ "$link_drivers" -eq 1 ]; then grep '.so$' "$temp_dir"/nvliblist.conf | xargs -i grep {} "$temp_dir"/libs.txt | xargs -i ln -s {} # Inject driver and CUDA versions into dir - echo $host_driver_version > version.txt + echo $host_driver_version > driver_version.txt echo $host_cuda_version > cuda_version.txt drivers_linked=1 From 3d0c3dd3eb6fd0e9a80652b477379325a040cdba Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Thu, 30 Nov 2023 15:46:46 +0100 Subject: [PATCH 58/59] Add a comment to explain the download --- gpu_support/nvidia/link_nvidia_host_libraries.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gpu_support/nvidia/link_nvidia_host_libraries.sh b/gpu_support/nvidia/link_nvidia_host_libraries.sh index e71697c8d7..d714c0ec8b 100755 --- a/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -88,6 +88,7 @@ if [ "$link_drivers" -eq 1 ]; then ls /.singularity.d/libs/* >> "$temp_dir"/libs.txt 2>/dev/null # Leverage singularity to find the full list of libraries we should be linking to + echo_yellow "Downloading latest version of nvliblist.conf from Apptainer" curl -o "$temp_dir"/nvliblist.conf https://raw.githubusercontent.com/apptainer/apptainer/main/etc/nvliblist.conf # Make symlinks to all the interesting libraries @@ -129,4 +130,4 @@ else ln -s $host_injections_nvidia_dir/latest lib fi -echo_green "Host NVIDIA gpu drivers linked successfully for EESSI" \ No newline at end of file +echo_green "Host NVIDIA gpu drivers linked successfully for EESSI" From 480d35643c7740c48f5508303e1cccadf04aae76 Mon Sep 17 00:00:00 2001 From: Alan O'Cais Date: Fri, 1 Dec 2023 15:07:34 +0100 Subject: [PATCH 59/59] Address review comments --- .../nvidia/install_cuda_host_injections.sh | 18 +++++++----------- .../nvidia/link_nvidia_host_libraries.sh | 13 ++++++++----- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/gpu_support/nvidia/install_cuda_host_injections.sh b/gpu_support/nvidia/install_cuda_host_injections.sh index 79af986a0a..f02f0da02e 100755 --- a/gpu_support/nvidia/install_cuda_host_injections.sh +++ b/gpu_support/nvidia/install_cuda_host_injections.sh @@ -74,6 +74,9 @@ while [[ $# -gt 0 ]]; do esac done +# Make sure EESSI is initialised +check_eessi_initialised + # Make sure the CUDA version supplied is a semantic version is_semantic_version() { local version=$1 @@ -89,7 +92,7 @@ if ! is_semantic_version "$install_cuda_version"; then show_help error="\nYou must provide a semantic version for CUDA (e.g., 12.1.1) via the appropriate\n" error="${error}command line option. This script is intended for use with EESSI so the 'correct'\n" - error="${error}version to provide is probably the one that is available under\n" + error="${error}version to provide is probably one of those available under\n" error="${error}$EESSI_SOFTWARE_PATH/software/CUDA\n" fatal_error "${error}" fi @@ -101,16 +104,9 @@ if [ "$eula_accepted" -ne 1 ]; then fatal_error "${error}" fi -# Make sure EESSI is initialised -check_eessi_initialised - -if [[ -z "${EESSI_SOFTWARE_PATH}" ]]; then - fatal_error "This script cannot be used without having first defined EESSI_SOFTWARE_PATH" -else - # As an installation location just use $EESSI_SOFTWARE_PATH but replacing `versions` with `host_injections` - # (CUDA is a binary installation so no need to worry too much about the EasyBuild setup) - cuda_install_parent=${EESSI_SOFTWARE_PATH/versions/host_injections} -fi +# As an installation location just use $EESSI_SOFTWARE_PATH but replacing `versions` with `host_injections` +# (CUDA is a binary installation so no need to worry too much about the EasyBuild setup) +cuda_install_parent=${EESSI_SOFTWARE_PATH/versions/host_injections} # Only install CUDA if specified version is not found. # (existence of easybuild subdir implies a successful install) diff --git a/gpu_support/nvidia/link_nvidia_host_libraries.sh b/gpu_support/nvidia/link_nvidia_host_libraries.sh index d714c0ec8b..26760f0b82 100755 --- a/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -19,11 +19,14 @@ if [ -x "/sbin/$command_name" ]; then fi IFS=':' read -ra path_dirs <<< "$PATH" for dir in "${path_dirs[@]}"; do - if [[ ! "$dir" =~ ^$exclude_prefix ]]; then - if [ -x "$dir/$command_name" ]; then - found_paths+=("$dir/$command_name") - fi - fi + if [ "$dir" = "/sbin" ]; then + continue # we've already checked for $command_name in /sbin, don't need to do it twice + fi + if [[ ! "$dir" =~ ^$exclude_prefix ]]; then + if [ -x "$dir/$command_name" ]; then + found_paths+=("$dir/$command_name") + fi + fi done if [ ${#found_paths[@]} -gt 0 ]; then