diff --git a/.github/workflows/modules/fake_module.lua b/.github/workflows/modules/fake_module.lua new file mode 100644 index 0000000000..e45cb640d6 --- /dev/null +++ b/.github/workflows/modules/fake_module.lua @@ -0,0 +1,3 @@ +setenv("INSIDE_GITHUB_ACTIONS", "true") +-- Interfere with PATH so Lmod keeps a record +prepend_path("PATH", "/snap/bin") diff --git a/.github/workflows/scripts/only_latest_easystacks.sh b/.github/workflows/scripts/only_latest_easystacks.sh new file mode 100755 index 0000000000..acc9d3279a --- /dev/null +++ b/.github/workflows/scripts/only_latest_easystacks.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# +# This script figures out the latest version of EasyBuild being used for the installation of easystack +# files. +# +# This file is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Alan O'Cais (CECAM) +# +# license: GPLv2 +# + +EESSI_VERSION=${EESSI_VERSION:-"2023.06"} + +directory="easystacks/software.eessi.io/${EESSI_VERSION}" +# List of example filenames +files=($(find "$directory" -name "*.yml" | grep -e '-eb-')) +[ -n "$DEBUG" ] && echo "${files[@]}" + +versions=() +# Loop over each filename +for filename in "${files[@]}"; do + # Extract the semantic version using grep + version=$(echo "$filename" | grep -oP '(?<=eb-)\d+\.\d+\.\d+?(?=-)') + + # Output the result + [ -n "$DEBUG" ] && echo "Filename: $filename" + [ -n "$DEBUG" ] && echo "Extracted version: $version" + [ -n "$DEBUG" ] && echo + versions+=("$version") +done +highest_version=$(printf "%s\n" "${versions[@]}" | sort -V | tail -n 1) + +[ -n "$DEBUG" ] && echo "Highest version: $highest_version" +[ -n "$DEBUG" ] && echo +[ -n "$DEBUG" ] && echo "Matching files:" +all_latest_easystacks=($(find $directory -type f -name "*eb-$highest_version*.yml")) + +accel_latest_easystacks=() +cpu_latest_easystacks=() + +# Loop through the array and split based on partial matching of string +accel="/accel/" +for item in "${all_latest_easystacks[@]}"; do + if [[ "$item" == *"$accel"* ]]; then + accel_latest_easystacks+=("$item") + else + cpu_latest_easystacks+=("$item") + fi +done + +# Output the results +if [ -n "$ACCEL_EASYSTACKS" ]; then + echo "${accel_latest_easystacks[@]}" +else + echo "${cpu_latest_easystacks[@]}" +fi diff --git a/.github/workflows/scripts/test_init_scripts.sh b/.github/workflows/scripts/test_init_scripts.sh new file mode 100755 index 0000000000..048fba81f5 --- /dev/null +++ b/.github/workflows/scripts/test_init_scripts.sh @@ -0,0 +1,50 @@ +#!/bin/bash +EESSI_VERSION="2023.06" +export LMOD_PAGER=cat + +# initialize assert framework +if [ ! -d assert.sh ]; then + echo "assert.sh not cloned." + echo "" + echo "run \`git clone https://github.com/lehmannro/assert.sh.git\`" + exit 1 +fi +. assert.sh/assert.sh + +TEST_SHELLS=("bash" "zsh" "fish" "ksh") +SHELLS=$@ + +for shell in ${SHELLS[@]}; do + echo = | awk 'NF += (OFS = $_) + 100' + echo RUNNING TESTS FOR SHELL: $shell + echo = | awk 'NF += (OFS = $_) + 100' + if [[ ! " ${TEST_SHELLS[*]} " =~ [[:space:]]${shell}[[:space:]] ]]; then + ### EXCEPTION FOR CSH ### + echo -e "\033[33mWe don't now how to test the shell '$shell', PRs are Welcome.\033[0m" + else + # TEST 1: Source Script and check Module Output + assert "$shell -c 'source init/lmod/$shell' 2>&1 " "EESSI/$EESSI_VERSION loaded successfully" + # TEST 2: Check if module overviews first section is the loaded EESSI module + MODULE_SECTIONS=($($shell -c "source init/lmod/$shell 2>/dev/null; module ov 2>&1 | grep -e '---'")) + PATTERN="/cvmfs/software\.eessi\.io/versions/$EESSI_VERSION/software/linux/x86_64/(intel/haswell|amd/zen3)/modules/all" + assert_raises 'echo "${MODULE_SECTIONS[1]}" | grep -E "$PATTERN"' + # TEST 3: Check if module overviews second section is the EESSI init module + assert "echo ${MODULE_SECTIONS[4]}" "/cvmfs/software.eessi.io/versions/$EESSI_VERSION/init/modules" + # Test 4: Load Python module and check version + command="$shell -c 'source init/lmod/$shell 2>/dev/null; module load Python/3.10.8-GCCcore-12.2.0; python --version'" + expected="Python 3.10.8" + assert "$command" "$expected" + # Test 5: Load Python module and check path + PYTHON_PATH=$($shell -c "source init/lmod/$shell 2>/dev/null; module load Python/3.10.8-GCCcore-12.2.0; which python") + PATTERN="/cvmfs/software\.eessi\.io/versions/$EESSI_VERSION/software/linux/x86_64/(intel/haswell|amd/zen3)/software/Python/3\.10\.8-GCCcore-12\.2\.0/bin/python" + echo "$PYTHON_PATH" | grep -E "$PATTERN" + assert_raises 'echo "$PYTHON_PATH" | grep -E "$PATTERN"' + + #End Test Suite + assert_end "source_eessi_$shell" + fi +done + + +# RESET PAGER +export LMOD_PAGER= diff --git a/.github/workflows/test-software.eessi.io.yml b/.github/workflows/test-software.eessi.io.yml index d4d980901f..ca3792f6ef 100644 --- a/.github/workflows/test-software.eessi.io.yml +++ b/.github/workflows/test-software.eessi.io.yml @@ -7,6 +7,12 @@ on: workflow_dispatch: permissions: contents: read # to fetch code (actions/checkout) +env: + EESSI_ACCELERATOR_TARGETS: | + x86_64/amd/zen2: + - nvidia/cc80 + x86_64/amd/zen3: + - nvidia/cc80 jobs: check_missing: runs-on: ubuntu-latest @@ -21,6 +27,7 @@ jobs: - aarch64/neoverse_v1 - x86_64/amd/zen2 - x86_64/amd/zen3 + - x86_64/amd/zen4 - x86_64/intel/haswell - x86_64/intel/skylake_avx512 - x86_64/generic @@ -48,14 +55,48 @@ jobs: export EESSI_PREFIX=/cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}} export EESSI_OS_TYPE=linux env | grep ^EESSI | sort - echo "just run check_missing_installations.sh (should use easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml)" - for easystack_file in $(ls easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do + + # first check the CPU-only builds for this CPU target + echo "just run check_missing_installations.sh (should use easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml with latest EasyBuild release)" + for easystack_file in $(EESSI_VERSION=${{matrix.EESSI_VERSION}} .github/workflows/scripts/only_latest_easystacks.sh); do + if [ ${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} = "x86_64/amd/zen4" ]; then + if grep -q 2022b <<<"${easystack_file}"; then + # skip the check of installed software on zen4 for foss/2022b builds + continue + fi + if [[ $easystack_file == *"rebuilds"* ]]; then + # Also handle rebuilds, make a temporary EasyStack file where we clean out all 2022b stuff and use that + new_easystack=$(mktemp pruned_easystackXXX --suffix=.yml) + # first clean out the options then clean out the .eb name + sed '/2022b\|12\.2\.0/,/\.eb/{/\.eb/!d}' "${easystack_file}" | sed '/2022b\|12\.2\.0/d' > $new_easystack + diff --unified=0 "$easystack_file" "$new_easystack" || : + easystack_file="$new_easystack" + fi + fi echo "check missing installations for ${easystack_file}..." ./check_missing_installations.sh ${easystack_file} ec=$? if [[ ${ec} -ne 0 ]]; then echo "missing installations found for ${easystack_file}!" >&2; exit ${ec}; fi done + # now check the accelerator builds for this CPU target + accelerators=$(echo "${EESSI_ACCELERATOR_TARGETS}" | yq ".${EESSI_SOFTWARE_SUBDIR_OVERRIDE}[]") + if [ -z ${accelerators} ]; then + echo "no accelerator targets defined for ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" + else + for accel in ${accelerators}; do + module use ${EESSI_SOFTWARE_PATH}/accel/${accel}/modules/all + echo "checking missing installations for accelerator ${accel} using modulepath: ${MODULEPATH}" + for easystack_file in $(EESSI_VERSION=${{matrix.EESSI_VERSION}} ACCEL_EASYSTACKS=1 .github/workflows/scripts/only_latest_easystacks.sh); do + echo "check missing installations for ${easystack_file}..." + ./check_missing_installations.sh ${easystack_file} + ec=$? + if [[ ${ec} -ne 0 ]]; then echo "missing installations found for ${easystack_file}!" >&2; exit ${ec}; fi + done + module unuse ${EESSI_SOFTWARE_PATH}/accel/${accel}/modules/all + done + fi + - name: Test check_missing_installations.sh with missing package (GCC/8.3.0) run: | export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} diff --git a/.github/workflows/tests_archdetect_nvidia_gpu.yml b/.github/workflows/tests_archdetect_nvidia_gpu.yml new file mode 100644 index 0000000000..8ad5f4fb36 --- /dev/null +++ b/.github/workflows/tests_archdetect_nvidia_gpu.yml @@ -0,0 +1,124 @@ +# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions +name: Tests for accelerator detection (NVIDIA GPU) +on: + push: + pull_request: +permissions: + contents: read # to fetch code (actions/checkout) +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + fake_nvidia_smi_script: + - none # no nvidia-smi command + - no_devices # nvidia-smi command works, but no GPUs available + - 1xa100 # cc80, supported with (atleast) zen2 CPU + - 2xa100 # cc80, supported with (atleast) zen2 CPU + - 4xa100 # cc80, supported with (atleast) zen2 CPU + - cc01 # non-existing GPU + fail-fast: false + steps: + - name: checkout + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + + # we deliberately do not use the eessi/github-action-eessi action, + # because we want to control when the EESSI environment is initialized + - name: Mount EESSI CernVM-FS repository + uses: cvmfs-contrib/github-action-cvmfs@55899ca74cf78ab874bdf47f5a804e47c198743c # v4.0 + with: + cvmfs_config_package: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi_latest_all.deb + cvmfs_http_proxy: DIRECT + cvmfs_repositories: software.eessi.io + + - name: test accelerator detection + run: | + export EESSI_SOFTWARE_SUBDIR_OVERRIDE='x86_64/amd/zen2' + + # put fake nvidia-smi command in place (unless we don't want to) + if [[ "${{matrix.fake_nvidia_smi_script}}" != "none" ]]; then + tmpdir=$(mktemp -d) + ln -s $PWD/tests/archdetect/nvidia-smi/${{matrix.fake_nvidia_smi_script}}.sh $tmpdir/nvidia-smi + export PATH=$tmpdir:$PATH + fi + + # first run with debugging enabled, just to show the output + ./init/eessi_archdetect.sh -d accelpath || echo "non-zero exit code: $?" + + # verify output (or exit code if non-zero) + out=$(./init/eessi_archdetect.sh accelpath || echo "non-zero exit code: $?") + + if [[ $out == "$( cat ./tests/archdetect/nvidia-smi/${{matrix.fake_nvidia_smi_script}}.output )" ]]; then + + echo "Test for '${{matrix.fake_nvidia_smi_script}}' PASSED: '$out'" + + # run full EESSI init script, which pick up on the accelerator (if available) + echo + . init/bash 2>&1 | tee init.out + echo "-----------------------------------------------------------------------------" + + if [[ "${{matrix.fake_nvidia_smi_script}}" == "none" ]] || [[ "${{matrix.fake_nvidia_smi_script}}" == "no_devices" ]]; then + + pattern="archdetect could not detect any accelerators" + echo ">>> checking for pattern '${pattern}' in init output..." + grep "${pattern}" init.out || (echo "FAILED 1" || exit 1) + + pattern="archdetect found supported accelerator" + echo ">>> checking for lack of pattern '${pattern}' in init output..." + match=$(grep "${pattern}" init.out || true) + test "x${match}" = "x" || (echo "unexpected match found for '${pattern}' in init output" && exit 1) + + pattern="Prepending /cvmfs/software.eessi.io/versions/2023.06/software/linux/.*/accel/.*/modules/all to \$MODULEPATH" + echo ">>> checking for lack of pattern '${pattern}' in init output..." + match=$(grep "${pattern}" init.out || true) + test "x${match}" = "x" || (echo "unexpected match found for '${pattern}' in init output" && exit 1) + + elif [[ "${{matrix.fake_nvidia_smi_script}}" == "cc01" ]]; then + + pattern="No matching path found in x86_64/amd/zen2 for accelerator detected by archdetect (accel/nvidia/cc01)" + echo ">>> checking for pattern '${pattern}' in init output..." + grep "${pattern}" init.out || (echo "FAILED 1" || exit 1) + + pattern="Prepending /cvmfs/software.eessi.io/versions/2023.06/software/linux/.*/accel/.*/modules/all to \$MODULEPATH" + echo ">>> checking for lack of pattern '${pattern}' in init output..." + match=$(grep "${pattern}" init.out || true) + test "x${match}" = "x" || (echo "unexpected match found for '${pattern}' in init output" && exit 1) + + else + echo ">>> checking for 'accel/nvidia/cc80' in init output..." + grep "archdetect found supported accelerator for CPU target x86_64/amd/zen2: accel/nvidia/cc80" init.out || (echo "FAILED 2" && exit 1) + grep "Prepending /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen2/accel/nvidia/cc80/modules/all to \$MODULEPATH" init.out || (echo "FAILED 3" && exit 1) + fi + + echo ">>> checking last line of init output..." + tail -1 init.out | grep "Environment set up to use EESSI (2023.06), have fun!" || (echo "FAILED, full init utput:" && cat init.out && exit 1) + + echo "All checks on init output PASSED" + else + echo "Test for '${{matrix.fake_nvidia_smi_script}}' FAILED: '$out'" >&2 + exit 1 + fi + + - name: test accelerator detection under $EESSI_ACCEL_SOFTWARE_SUBDIR_OVERRIDE + $EESSI_ACCELERATOR_TARGET_OVERRIDE + run: | + export EESSI_SOFTWARE_SUBDIR_OVERRIDE='x86_64/amd/zen2' + export EESSI_ACCEL_SOFTWARE_SUBDIR_OVERRIDE='x86_64/amd/zen3' + export EESSI_ACCELERATOR_TARGET_OVERRIDE='accel/nvidia/cc80' + + # first run with debugging enabled, just to show the output + ./init/eessi_archdetect.sh -d accelpath || echo "non-zero exit code: $?" + + # verify output (or exit code if non-zero) + out=$(./init/eessi_archdetect.sh accelpath || echo "non-zero exit code: $?") + + echo + . init/bash 2>&1 | tee init.out + echo "-----------------------------------------------------------------------------" + + echo ">>> checking for 'accel/nvidia/cc80' in init output..." + grep "archdetect found supported accelerator for CPU target x86_64/amd/zen3: accel/nvidia/cc80" init.out || (echo "FAILED 1" && exit 1) + grep "Using x86_64/amd/zen2 as software subdirectory" init.out || (echo "FAILED 2" && exit 1) + grep "Prepending /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen2/modules/all to \$MODULEPATH" init.out || (echo "FAILED 3" && exit 1) + grep "Prepending /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen3/accel/nvidia/cc80/modules/all to \$MODULEPATH" init.out || (echo "FAILED 4" && exit 1) + + echo "All checks on init output PASSED" diff --git a/.github/workflows/tests_eessi_module.yml b/.github/workflows/tests_eessi_module.yml new file mode 100644 index 0000000000..2bf4b39bde --- /dev/null +++ b/.github/workflows/tests_eessi_module.yml @@ -0,0 +1,207 @@ +# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions +name: Tests for eessi_module_functionality in software.eessi.io +on: + push: + branches: [ "*-software.eessi.io" ] + pull_request: +permissions: + contents: read # to fetch code (actions/checkout) +jobs: + basic_checks: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + EESSI_VERSION: + - 2023.06 + steps: + - name: Check out software-layer repository + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Mount EESSI CernVM-FS pilot repository + uses: cvmfs-contrib/github-action-cvmfs@55899ca74cf78ab874bdf47f5a804e47c198743c # v4.0 + with: + cvmfs_config_package: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi_latest_all.deb + cvmfs_http_proxy: DIRECT + cvmfs_repositories: software.eessi.io + + - name: Test for making sure spider cache is being used and not being rebuilt + run: | + . /cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}}/compat/linux/$(uname -m)/usr/share/Lmod/init/bash # Initialise Lmod + export MODULEPATH=init/modules + configfile="configfile.txt" + module -T load EESSI/${{matrix.EESSI_VERSION}} + module --config > "${configfile}" 2>&1 + grep cache "${configfile}" | grep software | grep -v compat + if timeout 10s bash -c "LMOD_PAGER=none module --terse avail" && grep cache "${configfile}" | grep software | grep -v compat; then + echo "EESSI spider cache is being used" + else + echo "EESSI spider cache is being rebuilt" >&2 + exit 1 + fi + env | grep LMOD + module purge + unset MODULEPATH + + - name: Test for archdetect_cpu functionality with invalid path + run: | + # Initialise Lmod + . /cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}}/compat/linux/$(uname -m)/usr/share/Lmod/init/bash + export MODULEPATH=init/modules + set +e # Do not exit immediately if a command exits with a non-zero status + export EESSI_ARCHDETECT_OPTIONS_OVERRIDE="dummy/cpu" + outfile="outfile.txt" + module load EESSI/${{matrix.EESSI_VERSION}} > "${outfile}" 2>&1 + cat "${outfile}" + if grep -q "Software directory check" "${outfile}"; then + echo "Test for picking up invalid path on \${archdetect_cpu} PASSED" + else + echo "Test for picking up invalid path on \${archdetect_cpu} FAILED" >&2 + exit 1 + fi + unset EESSI_ARCHDETECT_OPTIONS_OVERRIDE + set -e # Re-enable exit on non-zero status + + lmod_and_init_script_comparison: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + EESSI_VERSION: + - 2023.06 + EESSI_SOFTWARE_SUBDIR_OVERRIDE: + - x86_64/amd/zen3 + - x86_64/amd/zen4 + EESSI_ACCELERATOR_TARGET_OVERRIDE: + - accel/nvidia/cc80 + steps: + - name: Check out software-layer repository + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Mount EESSI CernVM-FS pilot repository + uses: cvmfs-contrib/github-action-cvmfs@55899ca74cf78ab874bdf47f5a804e47c198743c # v4.0 + with: + cvmfs_config_package: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi_latest_all.deb + cvmfs_http_proxy: DIRECT + cvmfs_repositories: software.eessi.io + + - name: Test for expected variables match between Lmod init script and original bash script + run: | + # Initialise Lmod + . /cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}}/compat/linux/$(uname -m)/usr/share/Lmod/init/bash + + # Set our path overrides according to our matrix + export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} + export EESSI_ACCELERATOR_TARGET_OVERRIDE=${{matrix.EESSI_ACCELERATOR_TARGET_OVERRIDE}} + + moduleoutfile="moduleout.txt" + sourceoutfile="sourceout.txt" + + # First do (and undo) the Lmod initialisation + export MODULEPATH=init/modules + # Turn on debug output in case we want to take a look + export EESSI_DEBUG_INIT=true + CPU_ARCH=$(./init/eessi_archdetect.sh -a cpupath) + export EESSI_ARCHDETECT_OPTIONS_OVERRIDE="dummy/cpu:${CPU_ARCH}:dummy1/cpu1" + module load EESSI/${{matrix.EESSI_VERSION}} + # EESSI_ARCHDETECT_OPTIONS_OVERRIDE/EESSI_DEBUG_INIT only relevant for Lmod init + unset EESSI_ARCHDETECT_OPTIONS_OVERRIDE + unset EESSI_DEBUG_INIT + # Store all relevant environment variables + env | grep -E '(^EESSI_|^LMOD_RC|^LMOD_PACKAGE_PATH)' | sort > "${moduleoutfile}" + module unload EESSI/${{matrix.EESSI_VERSION}} + + # Now do the init script initialisation + source ./init/bash + # source script version sets environment variables to force archdetect, ignore these + unset EESSI_USE_ARCHSPEC + unset EESSI_USE_ARCHDETECT + env | grep -E '(^EESSI_|^LMOD_RC|^LMOD_PACKAGE_PATH)' | sort > "${sourceoutfile}" + + # Now compare the two results + echo "" + echo "Lmod initialisation:" + cat "${moduleoutfile}" + echo "" + echo "Source script initialisation:" + cat "${sourceoutfile}" + echo "" + echo "" + if (diff "${moduleoutfile}" "${sourceoutfile}" > /dev/null); then + echo "Test for checking env variables PASSED" + else + echo "Test for checking env variables FAILED" >&2 + diff --unified=0 "${moduleoutfile}" "${sourceoutfile}" + exit 1 + fi + + make_sure_load_and_unload_work: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + EESSI_VERSION: + - 2023.06 + EESSI_SOFTWARE_SUBDIR_OVERRIDE: + - none + - x86_64/amd/zen2 + - x86_64/amd/zen4 + EESSI_ACCELERATOR_TARGET_OVERRIDE: + - none + - accel/nvidia/cc80 + steps: + - name: Check out software-layer repository + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Mount EESSI CernVM-FS pilot repository + uses: cvmfs-contrib/github-action-cvmfs@55899ca74cf78ab874bdf47f5a804e47c198743c # v4.0 + with: + cvmfs_config_package: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi_latest_all.deb + cvmfs_http_proxy: DIRECT + cvmfs_repositories: software.eessi.io + + - name: Test for identical environment after loading and unloading the EESSI module + run: | + # Initialise Lmod + . /cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}}/compat/linux/$(uname -m)/usr/share/Lmod/init/bash + + # Set our cpu path overrides according to our matrix + if [[ "${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}}" != "none" ]]; then + export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} + fi + + # Set our accelerator path overrides according to our matrix + if [[ "${{matrix.EESSI_ACCELERATOR_TARGET_OVERRIDE}}" != "none" ]]; then + export EESSI_ACCELERATOR_TARGET_OVERRIDE=${{matrix.EESSI_ACCELERATOR_TARGET_OVERRIDE}} + fi + + # Turn on debug output in case we want to take a look + export EESSI_DEBUG_INIT=true + + initial_env_file="initial_env.txt" + module_cycled_file="load_unload_cycle.txt" + + # prepare Lmod, resetting it in a roundabout given we don't want defaults set + export MODULEPATH=init/modules:.github/workflows/modules + module load fake_module + module purge + module unuse .github/workflows/modules + module avail + + # Store the initial environment (ignoring Lmod tables) + env | grep -v _ModuleTable | sort > "${initial_env_file}" + + # Do (and undo) loading the EESSI module + CPU_ARCH=$(./init/eessi_archdetect.sh -a cpupath) + module load EESSI/${{matrix.EESSI_VERSION}} + module unload EESSI/${{matrix.EESSI_VERSION}} + env | grep -v _ModuleTable | sort > "${module_cycled_file}" + + # Now compare the two results (do not expose the files, as they contain the full environment!) + if (diff "${initial_env_file}" "${module_cycled_file}" > /dev/null); then + echo "Test for checking env variables PASSED" + else + echo "Test for checking env variables FAILED" >&2 + diff --unified=0 "${initial_env_file}" "${module_cycled_file}" + exit 1 + fi \ No newline at end of file diff --git a/.github/workflows/tests_init_module.yml b/.github/workflows/tests_init_module.yml new file mode 100644 index 0000000000..cfc4ae7b3d --- /dev/null +++ b/.github/workflows/tests_init_module.yml @@ -0,0 +1,43 @@ +# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions +name: Check for EESSI init shell scripts to load eessi software module in software.eessi.io +on: + push: + branches: [ "*-software.eessi.io" ] + pull_request: + workflow_dispatch: +permissions: + contents: read # to fetch code (actions/checkout) +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + EESSI_VERSION: + - 2023.06 + EESSI_SOFTWARE_SUBDIR_OVERRIDE: + - x86_64/intel/haswell + steps: + - name: Check out software-layer repository + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Mount EESSI CernVM-FS pilot repository + uses: cvmfs-contrib/github-action-cvmfs@55899ca74cf78ab874bdf47f5a804e47c198743c # v4.0 + with: + cvmfs_config_package: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi_latest_all.deb + cvmfs_http_proxy: DIRECT + cvmfs_repositories: software.eessi.io + + - name: Clone assert.sh script + run: git clone https://github.com/lehmannro/assert.sh.git + + - name: Install missing shells + run: | + sudo apt update + sudo apt install zsh ksh fish + echo "# INIT ZSH" > ~/.zshrc + + - name: Run tests for available shells + run: | + .github/workflows/scripts/test_init_scripts.sh "bash" "zsh" "ksh" "fish" "csh" + diff --git a/EESSI-extend-2023.06-easybuild.eb b/EESSI-extend-2023.06-easybuild.eb index b525ee462d..bfe7931c8f 100644 --- a/EESSI-extend-2023.06-easybuild.eb +++ b/EESSI-extend-2023.06-easybuild.eb @@ -87,12 +87,22 @@ if (os.getenv("EESSI_CVMFS_INSTALL") ~= nil) then end eessi_cvmfs_install = true easybuild_installpath = os.getenv("EESSI_SOFTWARE_PATH") + eessi_accelerator_target = os.getenv("EESSI_ACCELERATOR_TARGET") + if (eessi_accelerator_target ~= nil) then + cuda_compute_capability = string.match(eessi_accelerator_target, "^nvidia/cc([0-9][0-9])$") + if (cuda_compute_capability ~= nil) then + easybuild_installpath = pathJoin(easybuild_installpath, 'accel', eessi_accelerator_target) + easybuild_cuda_compute_capabilities = cuda_compute_capability:sub(1, 1) .. "." .. cuda_compute_capability:sub(2, 2) + else + LmodError("Incorrect value for $EESSI_ACCELERATOR_TARGET: " .. eessi_accelerator_target) + end + end elseif (os.getenv("EESSI_SITE_INSTALL") ~= nil) then -- Make sure no other EESSI install environment variables are set if ((os.getenv("EESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("EESSI_USER_INSTALL") ~= nil)) then LmodError("You cannot use EESSI_SITE_INSTALL in combination with any other EESSI_*_INSTALL environment variables") end - easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), 'versions', 'host_injections') + easybuild_installpath = os.getenv("EESSI_SITE_SOFTWARE_PATH") else -- Deal with user and project installs project_install = os.getenv("EESSI_PROJECT_INSTALL") @@ -146,6 +156,11 @@ setenv ("EASYBUILD_UMASK", "022") -- Allow this module to be loaded when running EasyBuild setenv ("EASYBUILD_ALLOW_LOADED_MODULES", "EasyBuild,EESSI-extend") +-- Set environment variables if building for CUDA compute capabilities +if (easybuild_cuda_compute_capabilities ~= nil) then + setenv ("EASYBUILD_CUDA_COMPUTE_CAPABILITIES", easybuild_cuda_compute_capabilities) +end + -- Set all related environment variables if we have project or user installations (including extending MODULEPATH) if (user_modulepath ~= nil) then -- Use a more restrictive umask for this case @@ -166,7 +181,7 @@ elseif (project_modulepath ~= nil) then end -- Make sure EasyBuild itself is loaded if not ( isloaded("EasyBuild") ) then - load("EasyBuild") + load(latest("EasyBuild")) end """ diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 7d358e205a..83c06c2184 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -112,26 +112,8 @@ fi TMPDIR=$(mktemp -d) -echo ">> Setting up environment..." - -source $TOPDIR/init/minimal_eessi_env - -if [ -d $EESSI_CVMFS_REPO ]; then - echo_green "$EESSI_CVMFS_REPO available, OK!" -else - fatal_error "$EESSI_CVMFS_REPO is not available!" -fi - -# make sure we're in Prefix environment by checking $SHELL -if [[ ${SHELL} = ${EPREFIX}/bin/bash ]]; then - echo_green ">> It looks like we're in a Gentoo Prefix environment, good!" -else - fatal_error "Not running in Gentoo Prefix environment, run '${EPREFIX}/startprefix' first!" -fi - -# avoid that pyc files for EasyBuild are stored in EasyBuild installation directory -export PYTHONPYCACHEPREFIX=$TMPDIR/pycache +# Get override subdir DETECTION_PARAMETERS='' GENERIC=0 EB='eb' @@ -148,10 +130,76 @@ if [ -z $EESSI_SOFTWARE_SUBDIR_OVERRIDE ]; then echo ">> Determined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE via 'eessi_software_subdir.py $DETECTION_PARAMETERS' script" else echo ">> Picking up pre-defined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE: ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" - # make sure directory exists (since it's expected by init/eessi_environment_variables when using archdetect) - mkdir -p ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE} + # Run in a subshell, so that minimal_eessi_env doesn't change the shell environment for the rest of this script + ( + # Make sure EESSI_PREFIX and EESSI_OS_TYPE are set + source $TOPDIR/init/minimal_eessi_env + + # make sure directory exists (since it's expected by init/eessi_environment_variables when using archdetect) + mkdir -p ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE} + ) +fi + +echo ">> Setting up environment..." + +# If EESSI_VERSION is not set, source the defaults script to set it +if [ -z ${EESSI_VERSION} ]; then + source $TOPDIR/init/eessi_defaults +fi + +# If module command does not exist, use the one from the compat layer +command -v module +module_cmd_exists=$? +if [[ "$module_cmd_exists" -ne 0 ]]; then + echo_green "No module command found, initializing lmod from the compatibility layer" + # Minimal initalization of the lmod from the compat layer + source $TOPDIR/init/lmod/bash +else + echo_green "Module command found" +fi +ml_version_out=$TMPDIR/ml.out +ml --version &> $ml_version_out +if [[ $? -eq 0 ]]; then + echo_green ">> Found Lmod ${LMOD_VERSION}" +else + fatal_error "Failed to initialize Lmod?! (see output in ${ml_version_out}" +fi + +# Make sure we start with no modules and clean $MODULEPATH +echo ">> Setting up \$MODULEPATH..." +module --force purge +module unuse $MODULEPATH + +# Initialize the EESSI environment +module use $TOPDIR/init/modules +module load EESSI/$EESSI_VERSION + +# make sure we're in Prefix environment by checking $SHELL +# We can only do this after loading the EESSI module, as we need ${EPREFIX} +if [[ ${SHELL} = ${EPREFIX}/bin/bash ]]; then + echo_green ">> It looks like we're in a Gentoo Prefix environment, good!" +else + fatal_error "Not running in Gentoo Prefix environment, run '${EPREFIX}/startprefix' first!" +fi + +if [ -d $EESSI_CVMFS_REPO ]; then + echo_green "$EESSI_CVMFS_REPO available, OK!" +else + fatal_error "$EESSI_CVMFS_REPO is not available!" fi +# Check that EESSI_SOFTWARE_SUBDIR now matches EESSI_SOFTWARE_SUBDIR_OVERRIDE +if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then + fatal_error "Failed to determine software subdirectory?!" +elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then + fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!" +else + echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!" +fi + +# avoid that pyc files for EasyBuild are stored in EasyBuild installation directory +export PYTHONPYCACHEPREFIX=$TMPDIR/pycache + # if we run the script for the first time, e.g., to start building for a new # stack, we need to ensure certain files are present in # ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE} @@ -161,40 +209,71 @@ _eessi_software_path=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_ _lmod_cfg_dir=${_eessi_software_path}/.lmod _lmod_rc_file=${_lmod_cfg_dir}/lmodrc.lua if [ ! -f ${_lmod_rc_file} ]; then + echo "Lmod file '${_lmod_rc_file}' does not exist yet; creating it..." command -V python3 python3 ${TOPDIR}/create_lmodrc.py ${_eessi_software_path} fi _lmod_sitepackage_file=${_lmod_cfg_dir}/SitePackage.lua if [ ! -f ${_lmod_sitepackage_file} ]; then + echo "Lmod file '${_lmod_sitepackage_file}' does not exist yet; creating it..." command -V python3 python3 ${TOPDIR}/create_lmodsitepackage.py ${_eessi_software_path} fi -# Set all the EESSI environment variables (respecting $EESSI_SOFTWARE_SUBDIR_OVERRIDE) -# $EESSI_SILENT - don't print any messages -# $EESSI_BASIC_ENV - give a basic set of environment variables -EESSI_SILENT=1 EESSI_BASIC_ENV=1 source $TOPDIR/init/eessi_environment_variables +# install any additional required scripts +# order is important: these are needed to install a full CUDA SDK in host_injections +# for now, this just reinstalls all scripts. Note the most elegant, but works -if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then - fatal_error "Failed to determine software subdirectory?!" -elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then - fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!" -else - echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!" +# Only run install_scripts.sh if not dev.eessi.io for security +if [[ "${EESSI_CVMFS_REPO}" != /cvmfs/dev.eessi.io ]]; then + ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} fi -echo ">> Initializing Lmod..." -source $EPREFIX/usr/share/Lmod/init/bash -ml_version_out=$TMPDIR/ml.out -ml --version &> $ml_version_out -if [[ $? -eq 0 ]]; then - echo_green ">> Found Lmod ${LMOD_VERSION}" +echo ">> Configuring EasyBuild..." + +# Make sure EESSI-extend is not loaded, and configure location variables for a +# CVMFS installation +module unload EESSI-extend +unset EESSI_USER_INSTALL +unset EESSI_PROJECT_INSTALL +unset EESSI_SITE_INSTALL +export EESSI_CVMFS_INSTALL=1 + +# We now run 'source load_eessi_extend_module.sh' to load or install and load the +# EESSI-extend module which sets up all build environment settings. +# The script requires the EESSI_VERSION given as argument, a couple of +# environment variables set (TMPDIR, EB and EASYBUILD_INSTALLPATH) and the +# function check_exit_code defined. +# NOTE 1, the script exits if those variables/functions are undefined. +# NOTE 2, loading the EESSI-extend module may adjust the value of EASYBUILD_INSTALLPATH, +# e.g., to point to the installation directory for accelerators. +# NOTE 3, we have to set a default for EASYBUILD_INSTALLPATH here in cases the +# EESSI-extend module itself needs to be installed. +export EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE} +source load_eessi_extend_module.sh ${EESSI_VERSION} + +# Install full CUDA SDK and cu* libraries in host_injections +# Hardcode this for now, see if it works +# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install +# Allow skipping CUDA SDK install in e.g. CI environments +echo "Going to install full CUDA SDK and cu* libraries under host_injections if necessary" +temp_install_storage=${TMPDIR}/temp_install_storage +mkdir -p ${temp_install_storage} +if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \ + -t ${temp_install_storage} \ + --accept-cuda-eula \ + --accept-cudnn-eula else - fatal_error "Failed to initialize Lmod?! (see output in ${ml_version_out}" + echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed" +fi + +# Install NVIDIA drivers in host_injections (if they exist) +if command_exists "nvidia-smi"; then + echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..." + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh fi -echo ">> Configuring EasyBuild..." -source $TOPDIR/configure_easybuild if [ ! -z "${shared_fs_path}" ]; then shared_eb_sourcepath=${shared_fs_path}/easybuild/sources @@ -202,12 +281,23 @@ if [ ! -z "${shared_fs_path}" ]; then export EASYBUILD_SOURCEPATH=${shared_eb_sourcepath}:${EASYBUILD_SOURCEPATH} fi -echo ">> Setting up \$MODULEPATH..." -# make sure no modules are loaded -module --force purge -# ignore current $MODULEPATH entirely -module unuse $MODULEPATH +# if an accelerator target is specified, we need to make sure that the CPU-only modules are also still available +if [ ! -z ${EESSI_ACCELERATOR_TARGET} ]; then + CPU_ONLY_MODULES_PATH=$(echo $EASYBUILD_INSTALLPATH | sed "s@/accel/${EESSI_ACCELERATOR_TARGET}@@g")/modules/all + if [ -d ${CPU_ONLY_MODULES_PATH} ]; then + module use ${CPU_ONLY_MODULES_PATH} + else + fatal_error "Derived path to CPU-only modules does not exist: ${CPU_ONLY_MODULES_PATH}" + fi +fi + +# If in dev.eessi.io, allow building on top of software.eessi.io +if [[ "${EESSI_CVMFS_REPO}" == /cvmfs/dev.eessi.io ]]; then + module use /cvmfs/software.eessi.io/versions/$EESSI_VERSION/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}/modules/all +fi + module use $EASYBUILD_INSTALLPATH/modules/all + if [[ -z ${MODULEPATH} ]]; then fatal_error "Failed to set up \$MODULEPATH?!" else @@ -217,40 +307,9 @@ fi # assume there's only one diff file that corresponds to the PR patch file pr_diff=$(ls [0-9]*.diff | head -1) -# install any additional required scripts -# order is important: these are needed to install a full CUDA SDK in host_injections -# for now, this just reinstalls all scripts. Note the most elegant, but works -${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} - -# Install full CUDA SDK in host_injections -# Hardcode this for now, see if it works -# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install -# Allow skipping CUDA SDK install in e.g. CI environments -# The install_cuda... script uses EasyBuild. So, we need to check if we have EB -# or skip this step. -module_avail_out=$TMPDIR/ml.out -module avail 2>&1 | grep EasyBuild &> ${module_avail_out} -if [[ $? -eq 0 ]]; then - echo_green ">> Found an EasyBuild module" -else - echo_yellow ">> No EasyBuild module found: skipping step to install CUDA (see output in ${module_avail_out})" - export skip_cuda_install=True -fi - -if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then - ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula -else - echo "Skipping installation of CUDA SDK in host_injections, since the --skip-cuda-install flag was passed OR no EasyBuild module was found" -fi - -# Install NVIDIA drivers in host_injections (if they exist) -if command_exists "nvidia-smi"; then - echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..." - ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh -fi # use PR patch file to determine in which easystack files stuff was added -changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing') +changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep 'easystacks/.*yml$' | egrep -v 'known-issues|missing') if [ -z "${changed_easystacks}" ]; then echo "No missing installations, party time!" # Ensure the bot report success, as there was nothing to be build here else @@ -276,7 +335,7 @@ else if [ -f ${easystack_file} ]; then echo_green "Feeding easystack file ${easystack_file} to EasyBuild..." - ${EB} --easystack ${TOPDIR}/${easystack_file} --robot + ${EB} --easystack ${easystack_file} --robot ec=$? # copy EasyBuild log file if EasyBuild exited with an error @@ -289,7 +348,7 @@ else copy_build_log "${eb_last_log}" "${build_logs_dir}" fi - $TOPDIR/check_missing_installations.sh ${TOPDIR}/${easystack_file} ${TOPDIR}/${pr_diff} + $TOPDIR/check_missing_installations.sh ${easystack_file} ${pr_diff} else fatal_error "Easystack file ${easystack_file} not found!" fi @@ -297,22 +356,30 @@ else done fi -### add packages here - -echo ">> Creating/updating Lmod RC file..." export LMOD_CONFIG_DIR="${EASYBUILD_INSTALLPATH}/.lmod" lmod_rc_file="$LMOD_CONFIG_DIR/lmodrc.lua" +if [[ ! -z ${EESSI_ACCELERATOR_TARGET} ]]; then + # EESSI_ACCELERATOR_TARGET is set, so let's remove the accelerator path from $lmod_rc_file + lmod_rc_file=$(echo ${lmod_rc_file} | sed "s@/accel/${EESSI_ACCELERATOR_TARGET}@@") + echo "Path to lmodrc.lua changed to '${lmod_rc_file}'" +fi lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?) if [ ! -f $lmod_rc_file ] || [ ${lmodrc_changed} == '0' ]; then + echo ">> Creating/updating Lmod RC file (${lmod_rc_file})..." python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH} check_exit_code $? "$lmod_rc_file created" "Failed to create $lmod_rc_file" fi -echo ">> Creating/updating Lmod SitePackage.lua ..." export LMOD_PACKAGE_PATH="${EASYBUILD_INSTALLPATH}/.lmod" lmod_sitepackage_file="$LMOD_PACKAGE_PATH/SitePackage.lua" +if [[ ! -z ${EESSI_ACCELERATOR_TARGET} ]]; then + # EESSI_ACCELERATOR_TARGET is set, so let's remove the accelerator path from $lmod_sitepackage_file + lmod_sitepackage_file=$(echo ${lmod_sitepackage_file} | sed "s@/accel/${EESSI_ACCELERATOR_TARGET}@@") + echo "Path to SitePackage.lua changed to '${lmod_sitepackage_file}'" +fi sitepackage_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodsitepackage.py$' > /dev/null; echo $?) if [ ! -f "$lmod_sitepackage_file" ] || [ "${sitepackage_changed}" == '0' ]; then + echo ">> Creating/updating Lmod SitePackage.lua (${lmod_sitepackage_file})..." python3 $TOPDIR/create_lmodsitepackage.py ${EASYBUILD_INSTALLPATH} check_exit_code $? "$lmod_sitepackage_file created" "Failed to create $lmod_sitepackage_file" fi diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index 446a156cb8..1a03a7af98 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -79,6 +79,17 @@ echo ">> Setting up \$MODULEPATH..." module --force purge # ignore current $MODULEPATH entirely module unuse $MODULEPATH + +# if an accelerator target is specified, we need to make sure that the CPU-only modules are also still available +if [ ! -z ${EESSI_ACCELERATOR_TARGET} ]; then + CPU_ONLY_MODULES_PATH=$(echo $EASYBUILD_INSTALLPATH | sed "s@/accel/${EESSI_ACCELERATOR_TARGET}@@g")/modules/all + if [ -d ${CPU_ONLY_MODULES_PATH} ]; then + module use ${CPU_ONLY_MODULES_PATH} + else + fatal_error "Derived path to CPU-only modules does not exist: ${CPU_ONLY_MODULES_PATH}" + fi +fi + module use $EASYBUILD_INSTALLPATH/modules/all if [[ -z ${MODULEPATH} ]]; then fatal_error "Failed to set up \$MODULEPATH?!" @@ -91,7 +102,7 @@ pr_diff=$(ls [0-9]*.diff | head -1) # if this script is run as root, use PR patch file to determine if software needs to be removed first if [ $EUID -eq 0 ]; then - changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") + changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep 'easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") if [ -z ${changed_easystacks_rebuilds} ]; then echo "No software needs to be removed." else @@ -109,11 +120,18 @@ if [ $EUID -eq 0 ]; then # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') for app in ${rebuild_apps}; do - app_dir=${EASYBUILD_INSTALLPATH}/software/${app} - app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua + # Returns e.g. /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen2/modules/all: + app_modulepath=$(module --terse av ${app} 2>&1 | head -n 1 | sed 's/://') + # Two dirname invocations, so returns e.g. /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen2 + app_installprefix=$(dirname $(dirname ${app_modulepath})) + app_dir=${app_installprefix}/software/${app} + app_module=${app_installprefix}/modules/all/${app}.lua echo_yellow "Removing ${app_dir} and ${app_module}..." rm -rf ${app_dir} rm -rf ${app_module} + # recreate some directory to work around permission denied + # issues when rebuilding the package + mkdir -p ${app_dir}/easybuild done else fatal_error "Easystack file ${easystack_file} not found!" diff --git a/bot/build.sh b/bot/build.sh index 145be740d3..81b3ef4660 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -21,12 +21,14 @@ # stop as soon as something fails set -e +# Make sure we are referring to software-layer as working directory +software_layer_dir=$(dirname $(dirname $(realpath $0))) # source utils.sh and cfg_files.sh -source scripts/utils.sh -source scripts/cfg_files.sh +source $software_layer_dir/scripts/utils.sh +source $software_layer_dir/scripts/cfg_files.sh # defaults -export JOB_CFG_FILE="${JOB_CFG_FILE_OVERRIDE:=./cfg/job.cfg}" +export JOB_CFG_FILE="${JOB_CFG_FILE_OVERRIDE:=cfg/job.cfg}" HOST_ARCH=$(uname -m) # check if ${JOB_CFG_FILE} exists @@ -141,7 +143,7 @@ echo "bot/build.sh: EESSI_VERSION_OVERRIDE='${EESSI_VERSION_OVERRIDE}'" export EESSI_CVMFS_REPO_OVERRIDE=/cvmfs/$(cfg_get_value "repository" "repo_name") echo "bot/build.sh: EESSI_CVMFS_REPO_OVERRIDE='${EESSI_CVMFS_REPO_OVERRIDE}'" -# determine architecture to be used from entry .architecture in ${JOB_CFG_FILE} +# determine CPU architecture to be used from entry .architecture in ${JOB_CFG_FILE} # fallbacks: # - ${CPU_TARGET} handed over from bot # - left empty to let downstream script(s) determine subdir to be used @@ -150,6 +152,10 @@ EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE:-${CPU_TARGET}} export EESSI_SOFTWARE_SUBDIR_OVERRIDE echo "bot/build.sh: EESSI_SOFTWARE_SUBDIR_OVERRIDE='${EESSI_SOFTWARE_SUBDIR_OVERRIDE}'" +# determine accelerator target (if any) from .architecture in ${JOB_CFG_FILE} +export EESSI_ACCELERATOR_TARGET=$(cfg_get_value "architecture" "accelerator") +echo "bot/build.sh: EESSI_ACCELERATOR_TARGET='${EESSI_ACCELERATOR_TARGET}'" + # get EESSI_OS_TYPE from .architecture.os_type in ${JOB_CFG_FILE} (default: linux) EESSI_OS_TYPE=$(cfg_get_value "architecture" "os_type") export EESSI_OS_TYPE=${EESSI_OS_TYPE:-linux} @@ -165,6 +171,12 @@ COMMON_ARGS+=("--mode" "run") [[ ! -z ${HTTPS_PROXY} ]] && COMMON_ARGS+=("--https-proxy" "${HTTPS_PROXY}") [[ ! -z ${REPOSITORY} ]] && COMMON_ARGS+=("--repository" "${REPOSITORY}") +# Also expose software.eessi.io when configured for dev.eessi.io +# Need software.eessi.io for the compat layer +if [[ "${REPOSITORY}" == dev.eessi.io ]]; then + COMMON_ARGS+=("--repository" "software.eessi.io,access=ro") +fi + # make sure to use the same parent dir for storing tarballs of tmp PREVIOUS_TMP_DIR=${PWD}/previous_tmp @@ -184,7 +196,7 @@ fi pr_diff=$(ls [0-9]*.diff | head -1) # the true at the end of the next command is important: grep will expectedly return 1 if there is no easystack file being added under rebuilds, # but due to "set -e" the entire script would otherwise fail -changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | (grep "/rebuilds/" || true)) +changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep 'easystacks/.*yml$' | (grep "/rebuilds/" || true)) if [[ -z "${changed_easystacks_rebuilds}" ]]; then echo "This PR does not add any easystack files in a rebuilds subdirectory, so let's skip the removal step." else @@ -196,6 +208,7 @@ else declare -a REMOVAL_STEP_ARGS=() REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") + # add fakeroot option in order to be able to remove software, see: # https://github.com/EESSI/software-layer/issues/312 REMOVAL_STEP_ARGS+=("--fakeroot") @@ -204,10 +217,10 @@ else removal_outerr=$(mktemp remove.outerr.XXXX) echo "Executing command to remove software:" - echo "./eessi_container.sh ${COMMON_ARGS[@]} ${REMOVAL_STEP_ARGS[@]}" - echo " -- ./EESSI-remove-software.sh \"${REMOVAL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${removal_outerr}" - ./eessi_container.sh "${COMMON_ARGS[@]}" "${REMOVAL_STEP_ARGS[@]}" \ - -- ./EESSI-remove-software.sh "${REMOVAL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${removal_outerr} + echo "$software_layer_dir/eessi_container.sh ${COMMON_ARGS[@]} ${REMOVAL_STEP_ARGS[@]}" + echo " -- $software_layer_dir/EESSI-remove-software.sh \"${REMOVAL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${removal_outerr}" + $software_layer_dir/eessi_container.sh "${COMMON_ARGS[@]}" "${REMOVAL_STEP_ARGS[@]}" \ + -- $software_layer_dir/EESSI-remove-software.sh "${REMOVAL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${removal_outerr} # make sure that the build step resumes from the same temporary directory # this is important, as otherwise the removed software will still be there @@ -240,10 +253,10 @@ fi build_outerr=$(mktemp build.outerr.XXXX) echo "Executing command to build software:" -echo "./eessi_container.sh ${COMMON_ARGS[@]} ${BUILD_STEP_ARGS[@]}" -echo " -- ./install_software_layer.sh \"${INSTALL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${build_outerr}" -./eessi_container.sh "${COMMON_ARGS[@]}" "${BUILD_STEP_ARGS[@]}" \ - -- ./install_software_layer.sh "${INSTALL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${build_outerr} +echo "$software_layer_dir/eessi_container.sh ${COMMON_ARGS[@]} ${BUILD_STEP_ARGS[@]}" +echo " -- $software_layer_dir/install_software_layer.sh \"${INSTALL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${build_outerr}" +$software_layer_dir/eessi_container.sh "${COMMON_ARGS[@]}" "${BUILD_STEP_ARGS[@]}" \ + -- $software_layer_dir/install_software_layer.sh "${INSTALL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${build_outerr} # prepare directory to store tarball of tmp for tarball step TARBALL_TMP_TARBALL_STEP_DIR=${PREVIOUS_TMP_DIR}/tarball_step @@ -268,7 +281,7 @@ fi timestamp=$(date +%s) # to set EESSI_VERSION we need to source init/eessi_defaults now -source init/eessi_defaults +source $software_layer_dir/init/eessi_defaults export TGZ=$(printf "eessi-%s-software-%s-%s-%d.tar.gz" ${EESSI_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${timestamp}) # value of first parameter to create_tarball.sh - TMP_IN_CONTAINER - needs to be @@ -277,9 +290,9 @@ export TGZ=$(printf "eessi-%s-software-%s-%s-%d.tar.gz" ${EESSI_VERSION} ${EESSI # /tmp as default? TMP_IN_CONTAINER=/tmp echo "Executing command to create tarball:" -echo "./eessi_container.sh ${COMMON_ARGS[@]} ${TARBALL_STEP_ARGS[@]}" -echo " -- ./create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} /eessi_bot_job/${TGZ} 2>&1 | tee -a ${tar_outerr}" -./eessi_container.sh "${COMMON_ARGS[@]}" "${TARBALL_STEP_ARGS[@]}" \ - -- ./create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} /eessi_bot_job/${TGZ} 2>&1 | tee -a ${tar_outerr} +echo "$software_layer_dir/eessi_container.sh ${COMMON_ARGS[@]} ${TARBALL_STEP_ARGS[@]}" +echo " -- $software_layer_dir/create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} \"${EESSI_ACCELERATOR_TARGET}\" /eessi_bot_job/${TGZ} 2>&1 | tee -a ${tar_outerr}" +$software_layer_dir/eessi_container.sh "${COMMON_ARGS[@]}" "${TARBALL_STEP_ARGS[@]}" \ + -- $software_layer_dir/create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} "${EESSI_ACCELERATOR_TARGET}" /eessi_bot_job/${TGZ} 2>&1 | tee -a ${tar_outerr} exit 0 diff --git a/bot/check-build.sh b/bot/check-build.sh index d8246c67be..41aeab258e 100755 --- a/bot/check-build.sh +++ b/bot/check-build.sh @@ -17,6 +17,7 @@ # - SUCCESS (all of) # - working directory contains slurm-JOBID.out file # - working directory contains eessi*tar.gz +# - no message FATAL # - no message ERROR # - no message FAILED # - no message ' required modules missing:' @@ -25,6 +26,7 @@ # - FAILED (one of ... implemented as NOT SUCCESS) # - no slurm-JOBID.out file # - no tarball +# - message with FATAL # - message with ERROR # - message with FAILED # - message with ' required modules missing:' @@ -105,6 +107,16 @@ else [[ ${VERBOSE} -ne 0 ]] && echo " Slurm output file '"${job_out}"' NOT found" fi +FATAL=-1 +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + GP_fatal='FATAL: ' + grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_fatal}") + [[ $? -eq 0 ]] && FATAL=1 || FATAL=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_fatal}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}" +fi + ERROR=-1 if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then GP_error='ERROR: ' @@ -163,6 +175,7 @@ fi [[ ${VERBOSE} -ne 0 ]] && echo "SUMMARY: ${job_dir}/${job_out}" [[ ${VERBOSE} -ne 0 ]] && echo " : ()" +[[ ${VERBOSE} -ne 0 ]] && echo " FATAL......: $([[ $FATAL -eq 1 ]] && echo 'yes' || echo 'no') (no)" [[ ${VERBOSE} -ne 0 ]] && echo " ERROR......: $([[ $ERROR -eq 1 ]] && echo 'yes' || echo 'no') (no)" [[ ${VERBOSE} -ne 0 ]] && echo " FAILED.....: $([[ $FAILED -eq 1 ]] && echo 'yes' || echo 'no') (no)" [[ ${VERBOSE} -ne 0 ]] && echo " REQ_MISSING: $([[ $MISSING -eq 1 ]] && echo 'yes' || echo 'no') (no)" @@ -190,6 +203,7 @@ job_result_file=_bot_job${SLURM_JOB_ID}.result # Default reason: if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]] && \ + [[ ${FATAL} -eq 0 ]] && \ [[ ${ERROR} -eq 0 ]] && \ [[ ${FAILED} -eq 0 ]] && \ [[ ${MISSING} -eq 0 ]] && \ @@ -223,6 +237,7 @@ fi #
_Details_
#
# :white_check_mark: job output file slurm-4682.out
+# :white_check_mark: no message matching FATAL:
# :white_check_mark: no message matching ERROR:
# :white_check_mark: no message matching FAILED:
# :white_check_mark: no message matching required modules missing:
@@ -264,6 +279,7 @@ fi #
_Details_
#
# :white_check_mark: job output file slurm-4682.out
+# :x: no message matching FATAL:
# :x: no message matching ERROR:
# :white_check_mark: no message matching FAILED:
# :x: no message matching required modules missing:
@@ -381,6 +397,10 @@ success_msg="job output file ${job_out}" failure_msg="no job output file ${job_out}" comment_details_list=${comment_details_list}$(add_detail ${SLURM_OUTPUT_FOUND} 1 "${success_msg}" "${failure_msg}") +success_msg="no message matching ${GP_fatal}" +failure_msg="found message matching ${GP_fatal}" +comment_details_list=${comment_details_list}$(add_detail ${FATAL} 0 "${success_msg}" "${failure_msg}") + success_msg="no message matching ${GP_error}" failure_msg="found message matching ${GP_error}" comment_details_list=${comment_details_list}$(add_detail ${ERROR} 0 "${success_msg}" "${failure_msg}") @@ -457,8 +477,14 @@ if [[ ! -z ${TARBALL} ]]; then repo_version=$(cfg_get_value "repository" "repo_version") os_type=$(cfg_get_value "architecture" "os_type") software_subdir=$(cfg_get_value "architecture" "software_subdir") + accelerator=$(cfg_get_value "architecture" "accelerator") prefix="${repo_version}/software/${os_type}/${software_subdir}" + # if we build for an accelerator, the prefix is different + if [[ ! -z ${accelerator} ]]; then + prefix="${prefix}/accel/${accelerator}" + fi + # extract directories/entries from tarball content modules_entries=$(grep "${prefix}/modules" ${tmpfile}) software_entries=$(grep "${prefix}/software" ${tmpfile}) diff --git a/bot/check-test.sh b/bot/check-test.sh index 3b16e5c415..2731e75464 100755 --- a/bot/check-test.sh +++ b/bot/check-test.sh @@ -23,7 +23,6 @@ else [[ ${VERBOSE} -ne 0 ]] && echo " Slurm output file '"${job_out}"' NOT found" fi - # ReFrame prints e.g. #[----------] start processing checks #[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:rome+default @@ -76,8 +75,42 @@ fi if [[ ! -z ${grep_reframe_failed} ]]; then grep_reframe_result=${grep_reframe_failed} else - grep_reframe_result=${grep_reframe_success} + # Grep the entire output of ReFrame, so that we can report it in the foldable section of the test report + GP_success_full='(?s)\[----------\] start processing checks.*?\[==========\] Finished on [a-zA-Z0-9 ]*' + # Grab the full ReFrame report, than cut the irrelevant parts + # Note that the character limit for messages in github is around 65k, so cutting is important + grep_reframe_success_full=$( \ + grep -v "^>> searching for " ${job_dir}/${job_out} | \ + # Use -z + grep -Pzo "${GP_success_full}" | \ + # Replace null character with newline, to undo the -z option + sed 's/\x00/\n/g' | \ + # Remove the [ RUN ] lines from reframe, they are not very informative + grep -v -P '\[\s*RUN\s*]' | \ + # Remove the line '[----------] all spawned checks have finished' + grep -v '\[-*\]' | \ + # Remove the line '[==========] Finished on Mon Oct 7 21' + grep -v '\[=*\]' | \ + # Remove blank line(s) from the report + grep -v '^$' | \ + # Remove warnings about the local spawner not supporting memory requests + grep -v 'WARNING\: hooks\.req_memory_per_node does not support the scheduler you configured .local.*$' | \ + # Strip color coding characters + sed 's/\x1B\[[0-9;]*m//g' | \ + # Replace all newline characters with
+ sed ':a;N;$!ba;s/\n//g' | \ + # Replace % with %%. Use \%\% to interpret both %% as (non-special) characters + sed 's/\%/\%\%/g' \ + ) + # TODO (optional): we could impose a character limit here, and truncate if too long + # (though we should do that before inserting the
statements). + # If we do, we should probably re-append the final summary, e.g. + # [ PASSED ] Ran 10/10 test case(s) from 10 check(s) (0 failure(s), 0 skipped, 0 aborted) + # so that that is always displayed + # However, that's not implemented yet - let's see if this ever even becomes an issue + grep_reframe_result=${grep_reframe_success_full} fi +echo "grep_reframe_result: ${grep_reframe_result}" echo "[TEST]" > ${job_test_result_file} if [[ ${SLURM_OUTPUT_FOUND} -eq 0 ]]; then diff --git a/check_missing_installations.sh b/check_missing_installations.sh index d8135ea3cb..79f6acc733 100755 --- a/check_missing_installations.sh +++ b/check_missing_installations.sh @@ -25,14 +25,6 @@ easystack=$1 LOCAL_TMPDIR=$(mktemp -d) -# Clone the develop branch of EasyBuild and use that to search for easyconfigs -git clone -b develop https://github.com/easybuilders/easybuild-easyconfigs.git $LOCAL_TMPDIR/easyconfigs -export EASYBUILD_ROBOT_PATHS=$LOCAL_TMPDIR/easyconfigs/easybuild/easyconfigs - -# All PRs used in EESSI are supposed to be merged, so we can strip out all cases of from-pr -tmp_easystack=${LOCAL_TMPDIR}/$(basename ${easystack}) -grep -v from-pr ${easystack} > ${tmp_easystack} - source $TOPDIR/scripts/utils.sh source $TOPDIR/configure_easybuild @@ -42,34 +34,11 @@ ${EB:-eb} --show-config echo ">> Checking for missing installations in ${EASYBUILD_INSTALLPATH}..." eb_missing_out=$LOCAL_TMPDIR/eb_missing.out -${EB:-eb} --easystack ${tmp_easystack} --missing 2>&1 | tee ${eb_missing_out} +${EB:-eb} --easystack ${easystack} --missing 2>&1 | tee ${eb_missing_out} exit_code=${PIPESTATUS[0]} ok_msg="Command 'eb --missing ...' succeeded, analysing output..." fail_msg="Command 'eb --missing ...' failed, check log '${eb_missing_out}'" -if [ "$exit_code" -ne 0 ] && [ ! -z "$pr_exceptions" ]; then - # We might have failed due to unmerged PRs. Try to make exceptions for --from-pr added in this PR - # to software-layer, and see if then it passes. If so, we can report a more specific fail_msg - # Note that if no --from-pr's were used in this PR, $pr_exceptions will be empty and we might as - # well skip this check - unmerged PRs can not be the reason for the non-zero exit code in that scenario - - # Let's use awk so we can allow for exceptions if we are given a PR diff file - awk_command="awk '\!/'from-pr'/ EXCEPTIONS' $easystack" - awk_command=${awk_command/\\/} # Strip out the backslash we needed for ! - eval ${awk_command/EXCEPTIONS/$pr_exceptions} > ${tmp_easystack} - - msg=">> Checking for missing installations in ${EASYBUILD_INSTALLPATH}," - msg="${msg} allowing for --from-pr's that were added in this PR..." - echo ${msg} - eb_missing_out=$LOCAL_TMPDIR/eb_missing_with_from_pr.out - ${EB:-eb} --easystack ${tmp_easystack} --missing 2>&1 | tee ${eb_missing_out} - exit_code_with_from_pr=${PIPESTATUS[0]} - - # If now we succeeded, the reason must be that we originally stripped the --from-pr's - if [ "$exit_code_with_from_pr" -eq 0 ]; then - fail_msg="$fail_msg (are you sure all PRs referenced have been merged in EasyBuild?)" - fi -fi check_exit_code ${exit_code} "${ok_msg}" "${fail_msg}" diff --git a/configure_easybuild b/configure_easybuild index ed3e651a4c..3b6d40cd96 100644 --- a/configure_easybuild +++ b/configure_easybuild @@ -1,7 +1,26 @@ +# if $WORKDIR is not defined, use a local temporary directory +if [ -z ${WORKDIR} ]; then + WORKDIR=$(mktemp -d) +fi + export EASYBUILD_PREFIX=${WORKDIR}/easybuild export EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR} export EASYBUILD_SOURCEPATH=${WORKDIR}/easybuild/sources:${EESSI_SOURCEPATH} +# take into account accelerator target (if specified via $EESSI_ACCELERATOR_TARGET) +if [ ! -z ${EESSI_ACCELERATOR_TARGET} ]; then + if [[ "${EESSI_ACCELERATOR_TARGET}" =~ ^nvidia/cc[0-9][0-9]$ ]]; then + # tweak path to installation directories used by EasyBuild + export EASYBUILD_INSTALLPATH=${EASYBUILD_INSTALLPATH}/accel/${EESSI_ACCELERATOR_TARGET} + # nvidia/cc80 should result in setting $EASYBUILD_CUDA_COMPUTE_CAPABILITIES to '8.0' + export EASYBUILD_CUDA_COMPUTE_CAPABILITIES=$(echo ${EESSI_ACCELERATOR_TARGET} | cut -f2 -d/ | sed 's/^cc\([0-9]\)\([0-9]\)/\1.\2/g') + else + fatal_error "Incorrect value for \$EESSI_ACCELERATOR_TARGET: ${EESSI_ACCELERATOR_TARGET}" + fi +else + echo_yellow "(configure_easybuild) \$EESSI_ACCELERATOR_TARGET not defined" +fi + # just ignore OS dependencies for now, see https://github.com/easybuilders/easybuild-framework/issues/3430 export EASYBUILD_IGNORE_OSDEPS=1 diff --git a/create_lmodrc.py b/create_lmodrc.py index 28ad2a1915..1720b762f0 100755 --- a/create_lmodrc.py +++ b/create_lmodrc.py @@ -33,6 +33,12 @@ def error(msg): error("Prefix directory %s does not exist!" % prefix) lmodrc_path = os.path.join(prefix, DOT_LMOD, 'lmodrc.lua') +# Lmod itself doesn't care about the accelerator subdir so remove this duplication from +# the target path (if it exists) +accel_subdir = os.getenv("EESSI_ACCELERATOR_TARGET") +if accel_subdir: + lmodrc_path = lmodrc_path.replace("/accel/%s" % accel_subdir, '') + lmodrc_txt = TEMPLATE_LMOD_RC % { 'dot_lmod': DOT_LMOD, 'prefix': prefix, diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py index 7e55bce2a5..e959572ab1 100755 --- a/create_lmodsitepackage.py +++ b/create_lmodsitepackage.py @@ -8,7 +8,7 @@ DOT_LMOD = '.lmod' -hook_txt ="""require("strict") +hook_txt = """require("strict") local hook = require("Hook") local open = io.open @@ -28,7 +28,7 @@ -- If EESSI_PREFIX wasn't defined, we cannot check if this module was from the EESSI environment -- In that case, we assume it isn't, otherwise EESSI_PREFIX would (probably) have been set if eessi_prefix == nil then - return False + return false else -- NOTE: exact paths for site so may need to be updated later. -- See https://github.com/EESSI/software-layer/pull/371 @@ -36,7 +36,7 @@ -- eessi_prefix_host_injections is the prefix with site-extensions (i.e. additional modules) -- to the official EESSI modules, e.g. /cvmfs/software.eessi.io/host_injections/2023.06 local eessi_prefix_host_injections = string.gsub(eessi_prefix, 'versions', 'host_injections') - + -- Check if the full modulepath starts with the eessi_prefix_* return string.find(t.fn, "^" .. eessi_prefix) ~= nil or string.find(t.fn, "^" .. eessi_prefix_host_injections) ~= nil end @@ -103,7 +103,7 @@ if isFile(archSitePackage) then dofile(archSitePackage) end - + end @@ -111,10 +111,10 @@ local frameStk = require("FrameStk"):singleton() local mt = frameStk:mt() local simpleName = string.match(t.modFullName, "(.-)/") - -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. + -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse -- to load the CUDA module and print an informative message on how to set up GPU support for EESSI - local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" + local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/site_specific_config/gpu/.\\n" if simpleName == 'CUDA' then -- get the full host_injections path local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') @@ -207,6 +207,7 @@ load_site_specific_hooks() """ + def error(msg): sys.stderr.write("ERROR: %s\n" % msg) sys.exit(1) @@ -221,12 +222,18 @@ def error(msg): error("Prefix directory %s does not exist!" % prefix) sitepackage_path = os.path.join(prefix, DOT_LMOD, 'SitePackage.lua') + +# Lmod itself doesn't care about compute capability so remove this duplication from +# the install path (if it exists) +accel_subdir = os.getenv("EESSI_ACCELERATOR_TARGET") +if accel_subdir: + sitepackage_path = sitepackage_path.replace("/accel/%s" % accel_subdir, '') try: os.makedirs(os.path.dirname(sitepackage_path), exist_ok=True) with open(sitepackage_path, 'w') as fp: fp.write(hook_txt) # Make sure that the created Lmod file has "read/write" for the user/group and "read" permissions for others - os.chmod(sitepackage_path, S_IREAD|S_IWRITE|S_IRGRP|S_IWGRP|S_IROTH) + os.chmod(sitepackage_path, S_IREAD | S_IWRITE | S_IRGRP | S_IWGRP | S_IROTH) except (IOError, OSError) as err: error("Failed to create %s: %s" % (sitepackage_path, err)) diff --git a/create_tarball.sh b/create_tarball.sh index 2dee665060..01f498e1ac 100755 --- a/create_tarball.sh +++ b/create_tarball.sh @@ -4,14 +4,15 @@ set -e base_dir=$(dirname $(realpath $0)) -if [ $# -ne 4 ]; then - echo "ERROR: Usage: $0 " >&2 +if [ $# -ne 5 ]; then + echo "ERROR: Usage: $0 " >&2 exit 1 fi eessi_tmpdir=$1 eessi_version=$2 cpu_arch_subdir=$3 -target_tgz=$4 +accel_subdir=$4 +target_tgz=$5 tmpdir=`mktemp -d` echo ">> tmpdir: $tmpdir" @@ -35,6 +36,7 @@ if [ ! -d ${software_dir_overlay} ]; then exit 3 fi +current_workdir=${PWD} cd ${overlay_upper_dir}/versions/ echo ">> Collecting list of files/directories to include in tarball via ${PWD}..." @@ -57,36 +59,47 @@ if [ -d ${eessi_version}/init ]; then find ${eessi_version}/init -type f | grep -v '/\.wh\.' >> ${files_list} fi -if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules ]; then - # module files - find ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules -type f | grep -v '/\.wh\.' >> ${files_list} - # module symlinks - find ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules -type l | grep -v '/\.wh\.' >> ${files_list} - # module files and symlinks - find ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules/all -type f -o -type l \ - | grep -v '/\.wh\.' | grep -v '/\.modulerc\.lua' | sed -e 's/.lua$//' | sed -e 's@.*/modules/all/@@g' | sort -u \ - >> ${module_files_list} -fi +# consider both CPU-only and accelerator subdirectories +for subdir in ${cpu_arch_subdir} ${cpu_arch_subdir}/accel/${accel_subdir}; do + + if [ -d ${eessi_version}/software/${os}/${subdir}/modules ]; then + # module files + find ${eessi_version}/software/${os}/${subdir}/modules -type f | grep -v '/\.wh\.' >> ${files_list} || true # Make sure we don't exit because of set -e if grep doesn't return a match + # module symlinks + find ${eessi_version}/software/${os}/${subdir}/modules -type l | grep -v '/\.wh\.' >> ${files_list} || true # Make sure we don't exit because of set -e if grep doesn't return a match + # module files and symlinks + find ${eessi_version}/software/${os}/${subdir}/modules/all -type f -o -type l \ + | grep -v '/\.wh\.' | grep -v '/\.modulerc\.lua' | sed -e 's/.lua$//' | sed -e 's@.*/modules/all/@@g' | sort -u \ + >> ${module_files_list} + fi + + if [ -d ${eessi_version}/software/${os}/${subdir}/software -a -r ${module_files_list} ]; then + # installation directories but only those for which module files were created + # Note, we assume that module names (as defined by 'PACKAGE_NAME/VERSION.lua' + # using EasyBuild's standard module naming scheme) match the name of the + # software installation directory (expected to be 'PACKAGE_NAME/VERSION/'). + # If either side changes (module naming scheme or naming of software + # installation directories), the procedure will likely not work. + for package_version in $(cat ${module_files_list}); do + echo "handling ${package_version}" + ls -d ${eessi_version}/software/${os}/${subdir}/software/${package_version} \ + | grep -v '/\.wh\.' >> ${files_list} || true # Make sure we don't exit because of set -e if grep doesn't return a match + done + fi +done -if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/software -a -r ${module_files_list} ]; then - # installation directories but only those for which module files were created - # Note, we assume that module names (as defined by 'PACKAGE_NAME/VERSION.lua' - # using EasyBuild's standard module naming scheme) match the name of the - # software installation directory (expected to be 'PACKAGE_NAME/VERSION/'). - # If either side changes (module naming scheme or naming of software - # installation directories), the procedure will likely not work. - for package_version in $(cat ${module_files_list}); do - echo "handling ${package_version}" - ls -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/software/${package_version} \ - | grep -v '/\.wh\.' >> ${files_list} - done +# add a bit debug output +if [ -r ${files_list} ]; then + echo "wrote file list to ${files_list}" + cat ${files_list} fi +if [ -r ${module_files_list} ]; then + echo "wrote module file list to ${module_files_list}" + cat ${module_files_list} -# add a bit debug output -echo "wrote file list to ${files_list}" -[ -r ${files_list} ] && cat ${files_list} -echo "wrote module file list to ${module_files_list}" -[ -r ${module_files_list} ] && cat ${module_files_list} + # Copy the module files list to current workindg dir for later use in the test step + cp ${module_files_list} ${current_workdir}/module_files.list.txt +fi topdir=${cvmfs_repo}/versions/ diff --git a/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.2-2023a.yml b/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.2-2023a.yml index 5521b92398..6474d658ce 100644 --- a/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.2-2023a.yml +++ b/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.2-2023a.yml @@ -4,3 +4,23 @@ easyconfigs: - SciPy-bundle-2023.07-gfbf-2023a.eb - ESPResSo-4.2.2-foss-2023a.eb - ParaView-5.11.2-foss-2023a.eb + - OpenFOAM-10-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20958 + from-commit: dbadb2074464d816740ee0e95595c2cb31b6338f + - OpenFOAM-11-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20958 + from-commit: dbadb2074464d816740ee0e95595c2cb31b6338f + - OpenFOAM-v2312-foss-2023a.eb: + options: + # https://github.com/easybuilders/easybuild-easyblocks/pull/3388 + include-easyblocks-from-commit: c8256a36e7062bc09f5ce30552a9de9827054c9e + # https://github.com/easybuilders/easybuild-easyconfigs/pull/20841 + from-commit: f0e91e6e430ebf902f7788ebb47f0203dee60649 + - R-4.3.2-gfbf-2023a.eb + - Highway-1.0.4-GCCcore-12.3.0.eb + - Brunsli-0.1-GCCcore-12.3.0.eb: + options: + # https://github.com/easybuilders/easybuild-easyconfigs/pull/21366 + from-commit: 1736a123b1685836452587a5c51793257570bb2d diff --git a/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.3-2023a.yml b/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.3-2023a.yml new file mode 100644 index 0000000000..df3d0dedaa --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.3-2023a.yml @@ -0,0 +1,2 @@ +easyconfigs: + - LAMMPS-2Aug2023_update2-foss-2023a-kokkos.eb diff --git a/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.4-2023a.yml b/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.4-2023a.yml new file mode 100644 index 0000000000..170a639064 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.4-2023a.yml @@ -0,0 +1,11 @@ +easyconfigs: + - ROOT-6.30.06-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21526 + from-commit: 6cbfbd7d7a55dc7243f46d0beea510278f4718df + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3467 + include-easyblocks-from-commit: c3aebe1f133d064a228c5d6c282e898b83d74601 + - waLBerla-6.1-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21600 + from-commit: 9b12318bcff1749781d9eb71c23e21bc3a79ed01 diff --git a/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.4-2023b.yml b/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.4-2023b.yml new file mode 100644 index 0000000000..a60f9bec6a --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.4-2023b.yml @@ -0,0 +1,4 @@ +easyconfigs: + - SciPy-bundle-2023.11-gfbf-2023b.eb + - ESPResSo-4.2.2-foss-2023b.eb + - pyMBE-0.8.0-foss-2023b.eb diff --git a/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.3-2023a-CUDA.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.3-2023a-CUDA.yml new file mode 100644 index 0000000000..8935a3f3c3 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.3-2023a-CUDA.yml @@ -0,0 +1,6 @@ +easyconfigs: + - LAMMPS-2Aug2023_update2-foss-2023a-kokkos-CUDA-12.1.1.eb + - ESPResSo-4.2.2-foss-2023a-CUDA-12.1.1.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21440 + from-commit: 5525968921d7b5eae54f7d16391201e17ffae13c diff --git a/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml new file mode 100644 index 0000000000..873c19aa33 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml @@ -0,0 +1,3 @@ +easyconfigs: + - CUDA-12.1.1.eb + - cuDNN-8.9.2.26-CUDA-12.1.1.eb diff --git a/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023b-CUDA.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023b-CUDA.yml new file mode 100644 index 0000000000..5f195b3714 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023b-CUDA.yml @@ -0,0 +1,5 @@ +easyconfigs: + - CUDA-12.4.0.eb: + options: + accept-eula-for: CUDA + - UCX-CUDA-1.15.0-GCCcore-13.2.0-CUDA-12.4.0.eb diff --git a/easystacks/software.eessi.io/2023.06/accel/nvidia/rebuilds/20240925-eb-4.9.4-NCCL-2.18.3-in-accel-prefix.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/rebuilds/20240925-eb-4.9.4-NCCL-2.18.3-in-accel-prefix.yml new file mode 100644 index 0000000000..d6667af9a1 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/accel/nvidia/rebuilds/20240925-eb-4.9.4-NCCL-2.18.3-in-accel-prefix.yml @@ -0,0 +1,5 @@ +# 2024.09.25 +# We need to reinstall NCCL in the accelerator prefixes +# See https://github.com/EESSI/software-layer/pull/487 +easyconfigs: + - NCCL-2.18.3-GCCcore-12.3.0-CUDA-12.1.1.eb diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a-CUDA.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a-CUDA.yml new file mode 100644 index 0000000000..f8bde420de --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a-CUDA.yml @@ -0,0 +1,9 @@ +easyconfigs: + - CUDA-Samples-12.1-GCC-12.3.0-CUDA-12.1.1.eb: + # use easyconfig that only install subset of CUDA samples, + # to circumvent problem with nvcc linking to glibc of host OS, + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19189; + # and where additional samples are excluded because they fail to build on aarch64, + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19451; + options: + from-pr: 19451 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml index 7244219dc3..43b081b122 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml @@ -35,14 +35,6 @@ easyconfigs: - Boost-1.82.0-GCC-12.3.0.eb - netCDF-4.9.2-gompi-2023a.eb - FFmpeg-6.0-GCCcore-12.3.0.eb - - CUDA-Samples-12.1-GCC-12.3.0-CUDA-12.1.1.eb: - # use easyconfig that only install subset of CUDA samples, - # to circumvent problem with nvcc linking to glibc of host OS, - # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19189; - # and where additional samples are excluded because they fail to build on aarch64, - # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19451; - options: - from-pr: 19451 - ALL-0.9.2-foss-2023a.eb: options: from-pr: 19455 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a-CUDA.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a-CUDA.yml new file mode 100644 index 0000000000..cccbfa6808 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a-CUDA.yml @@ -0,0 +1,2 @@ +easyconfigs: + - OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml index 4b58cb6106..3f6590c3cd 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml @@ -34,7 +34,6 @@ easyconfigs: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19996 from-pr: 19996 - dask-2023.9.2-foss-2023a.eb - - OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb - JupyterNotebook-7.0.2-GCCcore-12.3.0.eb - ImageMagick-7.1.1-15-GCCcore-12.3.0.eb: options: diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-001-system.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-001-system.yml new file mode 100644 index 0000000000..1b2343ec1f --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-001-system.yml @@ -0,0 +1,11 @@ +easyconfigs: + - EasyBuild-4.9.3.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21412 + from-commit: 1cdd81524c974a29825e37bcf8ef3ccc291f5227 + - ReFrame-4.6.2.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21307 + from-commit: 0c4bd5c5a80f571a8932fbc38880d72455406816 + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3431 + include-easyblocks-from-commit: efddeb02abe1a679324ac01ef19601dedbe79cc0 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2022b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2022b.yml new file mode 100644 index 0000000000..969b0d469b --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2022b.yml @@ -0,0 +1,52 @@ +easyconfigs: + - BLAST+-2.14.0-gompi-2022b.eb + - BioPerl-1.7.8-GCCcore-12.2.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21136 + from-commit: d8076ebaf8cb915762adebf88d385cc672b350dc + - gnuplot-5.4.6-GCCcore-12.2.0.eb + - h5py-3.8.0-foss-2022b.eb + - MDAnalysis-2.4.2-foss-2022b.eb + - ncbi-vdb-3.0.5-gompi-2022b.eb + - Bio-DB-HTS-3.01-GCC-12.2.0.eb + - MAFFT-7.505-GCC-12.2.0-with-extensions.eb + - MetaEuk-6-GCC-12.2.0.eb + - BamTools-2.5.2-GCC-12.2.0.eb + - Bio-SearchIO-hmmer-1.7.3-GCC-12.2.0.eb + - Mash-2.3-GCC-12.2.0.eb + - CapnProto-0.10.3-GCCcore-12.2.0.eb + - WhatsHap-2.1-foss-2022b.eb + - SAMtools-1.17-GCC-12.2.0.eb + - Bowtie2-2.5.1-GCC-12.2.0.eb + - CD-HIT-4.8.1-GCC-12.2.0.eb + - VCFtools-0.1.16-GCC-12.2.0.eb + - GenomeTools-1.6.2-GCC-12.2.0.eb + - Bio-SearchIO-hmmer-1.7.3-GCC-12.2.0.eb + - parallel-20230722-GCCcore-12.2.0.eb + - BCFtools-1.17-GCC-12.2.0.eb + - lpsolve-5.5.2.11-GCC-12.2.0.eb + - fastp-0.23.4-GCC-12.2.0.eb + - KronaTools-2.8.1-GCCcore-12.2.0.eb + - MultiQC-1.14-foss-2022b.eb + - CGAL-5.5.2-GCCcore-12.2.0.eb + - KaHIP-3.14-gompi-2022b.eb + - MPC-1.3.1-GCCcore-12.2.0.eb + - MUMPS-5.6.1-foss-2022b-metis.eb + - GL2PS-1.4.2-GCCcore-12.2.0.eb + - GST-plugins-base-1.22.1-GCC-12.2.0.eb + - wxWidgets-3.2.2.1-GCC-12.2.0.eb + - Archive-Zip-1.68-GCCcore-12.2.0.eb + - jemalloc-5.3.0-GCCcore-12.2.0.eb + - Judy-1.0.5-GCCcore-12.2.0.eb + - libaio-0.3.113-GCCcore-12.2.0.eb + - Z3-4.12.2-GCCcore-12.2.0.eb + - tbb-2021.10.0-GCCcore-12.2.0.eb + - dask-2023.7.1-foss-2022b.eb + - netcdf4-python-1.6.3-foss-2022b.eb + - Ruby-3.2.2-GCCcore-12.2.0.eb + - ROOT-6.26.10-foss-2022b.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21526 + from-commit: 6cbfbd7d7a55dc7243f46d0beea510278f4718df + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3467 + include-easyblocks-from-commit: c3aebe1f133d064a228c5d6c282e898b83d74601 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2023a.yml index 7e9f4ff05a..e9011a0664 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2023a.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2023a.yml @@ -26,3 +26,48 @@ easyconfigs: options: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20951 from-commit: a92667fe32396bbd4106243658625f7ff2adcd68 + - amdahl-0.3.1-gompi-2023a.eb + - librosa-0.10.1-foss-2023a.eb + - xarray-2023.9.0-gfbf-2023a.eb + - SciTools-Iris-3.9.0-foss-2023a.eb + - OpenFOAM-v2312-foss-2023a.eb: + options: + # https://github.com/easybuilders/easybuild-easyblocks/pull/3388 + include-easyblocks-from-commit: c8256a36e7062bc09f5ce30552a9de9827054c9e + # https://github.com/easybuilders/easybuild-easyconfigs/pull/20841 + from-commit: f0e91e6e430ebf902f7788ebb47f0203dee60649 + - BioPerl-1.7.8-GCCcore-12.3.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21136 + from-commit: d8076ebaf8cb915762adebf88d385cc672b350dc + - grpcio-1.57.0-GCCcore-12.3.0.eb + - orjson-3.9.15-GCCcore-12.3.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20880 + from-commit: bc6e08f89759b8b70166de5bfcb5056b9db8ec90 + - wradlib-2.0.3-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21094 + from-commit: 3a2e0b8e6ee45277d01fb7e2eb93027a28c9461a + - MBX-1.1.0-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21155 + from-commit: 6929a67401f2a2ec58f91fb306332a77497d73ff + - Transrate-1.0.3-GCC-12.3.0.eb: + options: + # https://github.com/easybuilders/easybuild-easyblocks/pull/3381 + include-easyblocks-from-commit: bb86f05d4917b29e022023f152efdf0ca5c14ded + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20964 + from-commit: 7d539a9e599d8bc5ac2bda6ee9587ef62351ee03 + - Critic2-1.2-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20833 + from-commit: 78426c2383fc7e4b9b9e77d7a77f336e1bee3843 + - LRBinner-0.1-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21310 + from-commit: 799d9101df2cf81aabe252f00cc82a7246363f53 + - Redland-1.0.17-GCC-12.3.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21227 + from-commit: 4c5e3455dec31e68e8383c7fd86d1f80c434676d diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2023b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2023b.yml index f118e94adb..6398f014dc 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2023b.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2023b.yml @@ -11,3 +11,20 @@ easyconfigs: from-commit: 120f4d56efebd2bc61382db4c84a664a339c66cf # see https://github.com/easybuilders/easybuild-easyblocks/pull/3393 include-easyblocks-from-commit: c4951c78d62fa5cf8e9f6fe0ead212d2a4d7cb9c + - pyMBE-0.8.0-foss-2023b.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21034 + from-commit: 76e7fc6657bab64bfbec826540a3a8f0040258f2 + - STAR-2.7.11b-GCC-13.2.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21200 + from-commit: 765ba900daf5953e306c4dad896febe52fdd6c00 + - HPL-2.3-foss-2023b.eb + - Brunsli-0.1-GCCcore-13.2.0.eb: + options: + # https://github.com/easybuilders/easybuild-easyconfigs/pull/21366 + from-commit: 1736a123b1685836452587a5c51793257570bb2d + - R-bundle-CRAN-2024.06-foss-2023b.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21285 + from-commit: 41a2cd83f9fb017b76f0693f6a264d8acb548317 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-001-system.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-001-system.yml new file mode 100644 index 0000000000..d9c6075561 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-001-system.yml @@ -0,0 +1,5 @@ +easyconfigs: + - EasyBuild-4.9.4.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21465 + from-commit: 39cdebd7bd2cb4a9c170ee22439401316b2e7a25 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-2023a.yml new file mode 100644 index 0000000000..0c863f0025 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-2023a.yml @@ -0,0 +1,8 @@ +easyconfigs: + - ccache-4.9-GCCcore-12.3.0.eb + - GDB-13.2-GCCcore-12.3.0.eb + - tmux-3.3a-GCCcore-12.3.0.eb + - Vim-9.1.0004-GCCcore-12.3.0.eb + - gmsh-4.12.2-foss-2023a.eb + - basemap-1.3.9-foss-2023a.eb + - geopandas-0.14.2-foss-2023a.eb \ No newline at end of file diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-2023b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-2023b.yml new file mode 100644 index 0000000000..5325f2e553 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-2023b.yml @@ -0,0 +1,9 @@ +easyconfigs: + - LAMMPS-29Aug2024-foss-2023b-kokkos.eb: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21436 + options: + from-commit: 9dc24e57880a8adb06ae10557c5315e66671a533 + - GROMACS-2024.3-foss-2023b.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21430 + from-commit: 8b509882d03402e2998ff9b22c154a6957e36d6b diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023a.yml new file mode 100644 index 0000000000..ba050fe2fa --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023a.yml @@ -0,0 +1,27 @@ +easyconfigs: + - ROOT-6.30.06-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21526 + from-commit: 6cbfbd7d7a55dc7243f46d0beea510278f4718df + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3467 + include-easyblocks-from-commit: c3aebe1f133d064a228c5d6c282e898b83d74601 + - waLBerla-6.1-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21600 + from-commit: 9b12318bcff1749781d9eb71c23e21bc3a79ed01 + - mpl-ascii-0.10.0-gfbf-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21679 + from-commit: 7106f63160b1418d605882dd02ba151d099300bd + - jedi-0.19.0-GCCcore-12.3.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21650 + from-commit: 109998f6adcda7efb4174b1e5f73b41ee82d1f13 + - Solids4foam-2.1-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21606 + from-commit: 63562c58acf1be64407192b6862c3bd80253d2e0 + - Cassiopeia-2.0.0-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21657 + from-commit: 7f1f0e60487e7e1fcb5c4e6bc4fbc4f89994e3fd diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml new file mode 100644 index 0000000000..d9b7dca5d5 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml @@ -0,0 +1,15 @@ +easyconfigs: + - SIONlib-1.7.7-GCCcore-13.2.0-tools.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21752 + from-commit: 6b8b53493a1188a5baa56a133574daac239730e7 + - Score-P-8.4-gompi-2023b.eb: + options: + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3496 + include-easyblocks-from-commit: 60633b0acfd41a0732992d9e16800dae71a056eb + - Cython-3.0.10-GCCcore-13.2.0.eb + - Mustache-1.3.3-foss-2023b.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21783 + from-commit: 5fa3db9eb36f91cba3fbf351549f8ba2849abc33 + - GDRCopy-2.4-GCCcore-13.2.0.eb diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/2024.05.06-eb-4.9.1-CUDA-12.1.1-ship-full-runtime.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240506-eb-4.9.1-CUDA-12.1.1-ship-full-runtime.yml similarity index 100% rename from easystacks/software.eessi.io/2023.06/rebuilds/2024.05.06-eb-4.9.1-CUDA-12.1.1-ship-full-runtime.yml rename to easystacks/software.eessi.io/2023.06/rebuilds/20240506-eb-4.9.1-CUDA-12.1.1-ship-full-runtime.yml diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240719-eb-4.9.2-GObject-Introspection-filter-envvars-zen4.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240719-eb-4.9.2-GObject-Introspection-filter-envvars-zen4.yml new file mode 100644 index 0000000000..2c9b411736 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240719-eb-4.9.2-GObject-Introspection-filter-envvars-zen4.yml @@ -0,0 +1,11 @@ +# 2024.07.19 +# GObject-Introspection sets $LD_LIBRARY_PATH (to many different paths, including $EPREFIX/lib) +# when calling gcc, and this causes a lot of issues for, especially, scripts using /bin/bash. +# +# This rebuild ensures (by using a new EasyBuild hook) that GObject-Introspection will not set +# environment variables that are configured to be filtered by EasyBuild. +# +# See https://github.com/EESSI/software-layer/issues/196 +easyconfigs: + - GObject-Introspection-1.76.1-GCCcore-12.3.0.eb + - GObject-Introspection-1.78.1-GCCcore-13.2.0.eb diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240719-eb-4.9.2-GObject-Introspection-filter-envvars.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240719-eb-4.9.2-GObject-Introspection-filter-envvars.yml new file mode 100644 index 0000000000..a61cd9705b --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240719-eb-4.9.2-GObject-Introspection-filter-envvars.yml @@ -0,0 +1,14 @@ +# 2024.07.19 +# GObject-Introspection sets $LD_LIBRARY_PATH (to many different paths, including $EPREFIX/lib) +# when calling gcc, and this causes a lot of issues for, especially, scripts using /bin/bash. +# +# This rebuild ensures (by using a new EasyBuild hook) that GObject-Introspection will not set +# environment variables that are configured to be filtered by EasyBuild. +# +# See https://github.com/EESSI/software-layer/issues/196 +easyconfigs: + - GObject-Introspection-1.74.0-GCCcore-12.2.0.eb + - GObject-Introspection-1.76.1-GCCcore-12.3.0.eb + - GObject-Introspection-1.78.1-GCCcore-13.2.0.eb + - at-spi2-core-2.46.0-GCCcore-12.2.0.eb + - at-spi2-core-2.49.91-GCCcore-12.3.0.eb diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240729-eb-4.9.2-Python-ctypes.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240729-eb-4.9.2-Python-ctypes.yml new file mode 100644 index 0000000000..7554289c3b --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240729-eb-4.9.2-Python-ctypes.yml @@ -0,0 +1,24 @@ +# 2024.07.29 +# Python ctypes relies on LD_LIBRARY_PATH and doesn't respect rpath linking. There is a workaround +# for the EasyBuild context in https://github.com/easybuilders/easybuild-easyblocks/pull/3352. +# +# This rebuild ensures this fix is available for all Python versions shipped with EESSI. +# +# See https://gitlab.com/eessi/support/-/issues/77 +easyconfigs: + - Python-3.10.8-GCCcore-12.2.0-bare: + options: + # See https://github.com/easybuilders/easybuild-easyblocks/pull/3352 + include-easyblocks-from-commit: 1ee17c0f7726c69e97442f53c65c5f041d65c94f + - Python-3.10.8-GCCcore-12.2.0: + options: + # See https://github.com/easybuilders/easybuild-easyblocks/pull/3352 + include-easyblocks-from-commit: 1ee17c0f7726c69e97442f53c65c5f041d65c94f + - Python-3.11.3-GCCcore-12.3.0: + options: + # See https://github.com/easybuilders/easybuild-easyblocks/pull/3352 + include-easyblocks-from-commit: 1ee17c0f7726c69e97442f53c65c5f041d65c94f + - Python-3.11.5-GCCcore-13.2.0: + options: + # See https://github.com/easybuilders/easybuild-easyblocks/pull/3352 + include-easyblocks-from-commit: 1ee17c0f7726c69e97442f53c65c5f041d65c94f diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240801-eb-4.9.2-Python-ctypes-zen4.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240801-eb-4.9.2-Python-ctypes-zen4.yml new file mode 100644 index 0000000000..2104b4d836 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240801-eb-4.9.2-Python-ctypes-zen4.yml @@ -0,0 +1,17 @@ +# 2024.08.01 +# Python ctypes relies on LD_LIBRARY_PATH and doesn't respect rpath linking. There is a workaround +# for the EasyBuild context in https://github.com/easybuilders/easybuild-easyblocks/pull/3352. +# +# This rebuild ensures this fix is available for all Python versions shipped for +# zen4 with EESSI. +# +# See https://gitlab.com/eessi/support/-/issues/77 +easyconfigs: + - Python-3.11.3-GCCcore-12.3.0: + options: + # See https://github.com/easybuilders/easybuild-easyblocks/pull/3352 + include-easyblocks-from-commit: 1ee17c0f7726c69e97442f53c65c5f041d65c94f + - Python-3.11.5-GCCcore-13.2.0: + options: + # See https://github.com/easybuilders/easybuild-easyblocks/pull/3352 + include-easyblocks-from-commit: 1ee17c0f7726c69e97442f53c65c5f041d65c94f diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240814-eb-4.9.2-hatchling-1.18.0-updated-easyconfig.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240814-eb-4.9.2-hatchling-1.18.0-updated-easyconfig.yml new file mode 100644 index 0000000000..7ab02420ca --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240814-eb-4.9.2-hatchling-1.18.0-updated-easyconfig.yml @@ -0,0 +1,9 @@ +# 2024.08.14 +# hatchling-1.18.0 rebuild to account for easyconfig changed upstream +# see https://gitlab.com/eessi/support/-/issues/85 and +# https://github.com/easybuilders/easybuild-easyconfigs/pull/20389 +easyconfigs: + - hatchling-1.18.0-GCCcore-12.3.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20389 + from-commit: 9580c0d67d6dd97b160b768a839bfcba6d5b21b9 diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240823-eb-4.9.2-GObject-Introspection-filter-envvars-a64fx.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240823-eb-4.9.2-GObject-Introspection-filter-envvars-a64fx.yml new file mode 100644 index 0000000000..782db66e78 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240823-eb-4.9.2-GObject-Introspection-filter-envvars-a64fx.yml @@ -0,0 +1,13 @@ +# 2024.08.23 +# GObject-Introspection sets $LD_LIBRARY_PATH (to many different paths, including $EPREFIX/lib) +# when calling gcc, and this causes a lot of issues for, especially, scripts using /bin/bash. +# +# This rebuild ensures (by using a new EasyBuild hook) that GObject-Introspection will not set +# environment variables that are configured to be filtered by EasyBuild. +# This rebuild was not done initially for A64FX. This file is meant to do the same as the +# previous rebuild of GObject-Introspection-1.76.1-GCCcore-12.3.0 in other architectures, +# but for A64FX. +# +# See https://github.com/EESSI/software-layer/issues/196 +easyconfigs: + - GObject-Introspection-1.76.1-GCCcore-12.3.0.eb diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240918-eb-4.9.3-CUDA-12.1.1-in-accel-prefix.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240918-eb-4.9.3-CUDA-12.1.1-in-accel-prefix.yml new file mode 100644 index 0000000000..755bea096e --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240918-eb-4.9.3-CUDA-12.1.1-in-accel-prefix.yml @@ -0,0 +1,7 @@ +# 2024.09.18 +# We need to reinstall CUDA in the accelerator prefixes +# See https://github.com/EESSI/software-layer/pull/720 +easyconfigs: + - CUDA-12.1.1.eb: + options: + accept-eula-for: CUDA diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240919-eb-4.9.3-Cuda-Samples-in-accel-prefix.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240919-eb-4.9.3-Cuda-Samples-in-accel-prefix.yml new file mode 100644 index 0000000000..da2c06ae1e --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240919-eb-4.9.3-Cuda-Samples-in-accel-prefix.yml @@ -0,0 +1,5 @@ +# 2024.09.19 +# We need to reinstall CUDA-Samples in the accelerator prefixes +# See https://github.com/EESSI/software-layer/pull/715 +easyconfigs: + - CUDA-Samples-12.1-GCC-12.3.0-CUDA-12.1.1.eb diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240919-eb-4.9.3-osu-microbenchmarks-in-accel-prefix.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240919-eb-4.9.3-osu-microbenchmarks-in-accel-prefix.yml new file mode 100644 index 0000000000..23801e0250 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240919-eb-4.9.3-osu-microbenchmarks-in-accel-prefix.yml @@ -0,0 +1,5 @@ +# 2024.09.19 +# We need to reinstall OSU-Micro-Benchmarks in the accelerator prefixes +# See https://github.com/EESSI/software-layer/pull/716 +easyconfigs: + - OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240919-eb-4.9.3-uxc-cuda-in-accel-prefix.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240919-eb-4.9.3-uxc-cuda-in-accel-prefix.yml new file mode 100644 index 0000000000..d347af335a --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240919-eb-4.9.3-uxc-cuda-in-accel-prefix.yml @@ -0,0 +1,5 @@ +# 2024.09.19 +# We need to reinstall UCX-CUDA in the accelerator prefixes +# See https://github.com/EESSI/software-layer/pull/719 +easyconfigs: + - UCX-CUDA-1.14.1-GCCcore-12.3.0-CUDA-12.1.1.eb diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240925-eb-4.9.3-ucc-cuda-in-accel-prefix.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240925-eb-4.9.3-ucc-cuda-in-accel-prefix.yml new file mode 100644 index 0000000000..a418086c44 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240925-eb-4.9.3-ucc-cuda-in-accel-prefix.yml @@ -0,0 +1,4 @@ +# 2024.09.19 +# We need to reinstall UCC-CUDA in the accelerator prefixes +easyconfigs: + - UCC-CUDA-1.2.0-GCCcore-12.3.0-CUDA-12.1.1.eb diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240925-eb-4.9.4-EESSI-extend.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240925-eb-4.9.4-EESSI-extend.yml new file mode 100644 index 0000000000..9cd1b451cd --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240925-eb-4.9.4-EESSI-extend.yml @@ -0,0 +1,6 @@ +# 2024.09.25 +# EESSI-extend did not support LMOD_EXACT_MATCH +# (see https://github.com/EESSI/software-layer/pull/747) +easyconfigs: + - EESSI-extend-2023.06-easybuild.eb + diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20241008-eb-4.9.4-EESSI-extend.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20241008-eb-4.9.4-EESSI-extend.yml new file mode 100644 index 0000000000..5491ef8427 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20241008-eb-4.9.4-EESSI-extend.yml @@ -0,0 +1,5 @@ +# 2024.10.08 +# EESSI-extend should use EESSI_SITE_INSTALLPATH, instead of recalculating this +easyconfigs: + - EESSI-extend-2023.06-easybuild.eb + diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20241017-eb-4.9.4-OpenBLAS-aarch64-generic.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20241017-eb-4.9.4-OpenBLAS-aarch64-generic.yml new file mode 100644 index 0000000000..d6d8f70143 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20241017-eb-4.9.4-OpenBLAS-aarch64-generic.yml @@ -0,0 +1,18 @@ +# 2024.10.17 +# TARGET=ARMV8 must be used when building OpenBLAS for aarch64/generic, +# since otherwise "Illegal instruction" errors may happen in the driver part of OpenBLAS +# on systems that only support a minimal instruction set like Arm v8 (like Raspberry Pi SBCs); +# see also https://github.com/OpenMathLib/OpenBLAS/issues/4945 +easyconfigs: + - OpenBLAS-0.3.21-GCC-12.2.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3492 + include-easyblocks-from-commit: d06d9617d9bfb63d338b6879eab9da81c8a312d8 + - OpenBLAS-0.3.23-GCC-12.3.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3492 + include-easyblocks-from-commit: d06d9617d9bfb63d338b6879eab9da81c8a312d8 + - OpenBLAS-0.3.24-GCC-13.2.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3492 + include-easyblocks-from-commit: d06d9617d9bfb63d338b6879eab9da81c8a312d8 diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml new file mode 100644 index 0000000000..e4c658784f --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml @@ -0,0 +1,6 @@ +# 2024.11.12 +# for installations under /cvmfs, if EESSI_ACCELERATOR_TARGET is set, +# EESSI-extend should adjust EASYBUILD_INSTALLPATH and set +# EASYBUILD_CUDA_COMPUTE_CAPABILITIES +easyconfigs: + - EESSI-extend-2023.06-easybuild.eb diff --git a/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.2-001-system.yml b/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.2-001-system.yml new file mode 100644 index 0000000000..f1fde247d0 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.2-001-system.yml @@ -0,0 +1,2 @@ +easyconfigs: + - ReFrame-4.3.3.eb diff --git a/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.2-2023a.yml b/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.2-2023a.yml index 65a12695e4..316754a6d1 100644 --- a/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.2-2023a.yml +++ b/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.2-2023a.yml @@ -11,3 +11,15 @@ easyconfigs: options: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20958 from-commit: dbadb2074464d816740ee0e95595c2cb31b6338f + - Highway-1.0.4-GCCcore-12.3.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20942 + from-commit: 524da37b903585cea5a9eeb4156d1c8d57636bd8 + - librosa-0.10.1-foss-2023a.eb + - R-bundle-Bioconductor-3.18-foss-2023a-R-4.3.2.eb + - BioPerl-1.7.8-GCCcore-12.3.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21136 + from-commit: d8076ebaf8cb915762adebf88d385cc672b350dc + - MODFLOW-6.4.4-foss-2023a.eb + - ALL-0.9.2-foss-2023a.eb diff --git a/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-001-system.yml b/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-001-system.yml new file mode 100644 index 0000000000..25337649ce --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-001-system.yml @@ -0,0 +1,4 @@ +easyconfigs: + - Nextflow-23.10.0.eb + - EasyBuild-4.8.2.eb + - EasyBuild-4.9.0.eb diff --git a/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-2023a.yml b/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-2023a.yml new file mode 100644 index 0000000000..519d7701dc --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-2023a.yml @@ -0,0 +1,19 @@ +easyconfigs: + - LAMMPS-2Aug2023_update2-foss-2023a-kokkos.eb + - JupyterNotebook-7.0.2-GCCcore-12.3.0.eb + - PyQt5-5.15.10-GCCcore-12.3.0.eb + - OrthoFinder-2.5.5-foss-2023a.eb + - snakemake-8.4.2-foss-2023a.eb + - Rivet-3.1.9-gompi-2023a-HepMC3-3.2.6.eb + - GATK-4.5.0.0-GCCcore-12.3.0-Java-17.eb + - ipympl-0.9.3-gfbf-2023a.eb + - LHAPDF-6.5.4-GCC-12.3.0.eb + - LoopTools-2.15-GCC-12.3.0.eb + - ncdu-1.18-GCC-12.3.0.eb + - WhatsHap-2.2-foss-2023a.eb + - PyOpenGL-3.1.7-GCCcore-12.3.0.eb + - SAMtools-1.18-GCC-12.3.0.eb + - CDO-2.2.2-gompi-2023a.eb + - OSU-Micro-Benchmarks-7.1-1-gompi-2023a.eb + - BWA-0.7.17-20220923-GCCcore-12.3.0.eb + - Valgrind-3.21.0-gompi-2023a.eb diff --git a/eb_hooks.py b/eb_hooks.py index b591c5ea64..8a12d63d7e 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -26,10 +26,15 @@ CPU_TARGET_AARCH64_GENERIC = 'aarch64/generic' CPU_TARGET_A64FX = 'aarch64/a64fx' +CPU_TARGET_ZEN4 = 'x86_64/amd/zen4' + EESSI_RPATH_OVERRIDE_ATTR = 'orig_rpath_override_dirs' SYSTEM = EASYCONFIG_CONSTANTS['SYSTEM'][0] +EESSI_INSTALLATION_REGEX = r"^/cvmfs/[^/]*.eessi.io/versions/" +HOST_INJECTIONS_LOCATION = "/cvmfs/software.eessi.io/host_injections/" + def get_eessi_envvar(eessi_envvar): """Get an EESSI environment variable from the environment""" @@ -83,10 +88,10 @@ def post_ready_hook(self, *args, **kwargs): # 'parallel' easyconfig parameter is set via EasyBlock.set_parallel in ready step based on available cores. # here we reduce parallellism to only use half of that for selected software, # to avoid failing builds/tests due to out-of-memory problems; - memory_hungry_build = self.name in ['libxc', 'TensorFlow'] + memory_hungry_build = self.name in ['libxc', 'MBX', 'TensorFlow'] # on A64FX systems, (HBM) memory is typically scarce, so we need to use fewer cores for some builds cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') - memory_hungry_build_a64fx = cpu_target == CPU_TARGET_A64FX and self.name in ['Qt5'] + memory_hungry_build_a64fx = cpu_target == CPU_TARGET_A64FX and self.name in ['Qt5', 'ROOT'] if memory_hungry_build or memory_hungry_build_a64fx: parallel = self.cfg['parallel'] if parallel > 1: @@ -129,7 +134,8 @@ def pre_prepare_hook(self, *args, **kwargs): def post_prepare_hook_gcc_prefixed_ld_rpath_wrapper(self, *args, **kwargs): """ Post-configure hook for GCCcore: - - copy RPATH wrapper script for linker commands to also have a wrapper in place with system type prefix like 'x86_64-pc-linux-gnu' + - copy RPATH wrapper script for linker commands to also have a wrapper in + place with system type prefix like 'x86_64-pc-linux-gnu' """ if self.name == 'GCCcore': config_guess = obtain_config_guess() @@ -184,7 +190,9 @@ def parse_hook_casacore_disable_vectorize(ec, eprefix): ): cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') if cpu_target == CPU_TARGET_NEOVERSE_V1: - if not hasattr(ec, 'toolchainopts'): + # Make sure the toolchainopts key exists, and the value is a dict, + # before we add the option to disable vectorization + if 'toolchainopts' not in ec or ec['toolchainopts'] is None: ec['toolchainopts'] = {} ec['toolchainopts']['vectorize'] = False print_msg("Changed toochainopts for %s: %s", ec.name, ec['toolchainopts']) @@ -220,6 +228,19 @@ def parse_hook_fontconfig_add_fonts(ec, eprefix): raise EasyBuildError("fontconfig-specific hook triggered for non-fontconfig easyconfig?!") +def parse_hook_grpcio_zlib(ec, ecprefix): + """Adjust preinstallopts to use ZLIB from compat layer.""" + if ec.name == 'grpcio' and ec.version in ['1.57.0']: + exts_list = ec['exts_list'] + original_preinstallopts = (exts_list[0][2])['preinstallopts'] + original_option = "GRPC_PYTHON_BUILD_SYSTEM_ZLIB=True" + new_option = "GRPC_PYTHON_BUILD_SYSTEM_ZLIB=False" + (exts_list[0][2])['preinstallopts'] = original_preinstallopts.replace(original_option, new_option, 1) + print_msg("Modified the easyconfig to use compat ZLIB with GRPC_PYTHON_BUILD_SYSTEM_ZLIB=False") + else: + raise EasyBuildError("grpcio-specific hook triggered for a non-grpcio easyconfig?!") + + def parse_hook_openblas_relax_lapack_tests_num_errors(ec, eprefix): """Relax number of failing numerical LAPACK tests for aarch64/neoverse_v1 CPU target for OpenBLAS < 0.3.23""" cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') @@ -264,10 +285,10 @@ def parse_hook_qt5_check_qtwebengine_disable(ec, eprefix): Disable check for QtWebEngine in Qt5 as workaround for problem with determining glibc version. """ if ec.name == 'Qt5': - # workaround for glibc version being reported as "UNKNOWN" in Gentoo Prefix environment by EasyBuild v4.7.2, - # see also https://github.com/easybuilders/easybuild-framework/pull/4290 - ec['check_qtwebengine'] = False - print_msg("Checking for QtWebEgine in Qt5 installation has been disabled") + # workaround for glibc version being reported as "UNKNOWN" in Gentoo Prefix environment by EasyBuild v4.7.2, + # see also https://github.com/easybuilders/easybuild-framework/pull/4290 + ec['check_qtwebengine'] = False + print_msg("Checking for QtWebEgine in Qt5 installation has been disabled") else: raise EasyBuildError("Qt5-specific hook triggered for non-Qt5 easyconfig?!") @@ -282,19 +303,36 @@ def parse_hook_ucx_eprefix(ec, eprefix): raise EasyBuildError("UCX-specific hook triggered for non-UCX easyconfig?!") -def parse_hook_lammps_remove_deps_for_CI_aarch64(ec, *args, **kwargs): +def parse_hook_freeimage_aarch64(ec, *args, **kwargs): """ - Remove x86_64 specific dependencies for the CI to pass on aarch64 + Make sure to build with -fPIC on ARM to avoid + https://github.com/EESSI/software-layer/pull/736#issuecomment-2373261889 """ - if ec.name == 'LAMMPS' and ec.version in ('2Aug2023_update2',): + if ec.name == 'FreeImage' and ec.version in ('3.18.0',): if os.getenv('EESSI_CPU_FAMILY') == 'aarch64': - # ScaFaCoS and tbb are not compatible with aarch64/* CPU targets, - # so remove them as dependencies for LAMMPS (they're optional); - # see also https://github.com/easybuilders/easybuild-easyconfigs/pull/19164 + - # https://github.com/easybuilders/easybuild-easyconfigs/pull/19000; - # we need this hook because we check for missing installations for all CPU targets - # on an x86_64 VM in GitHub Actions (so condition based on ARCH in LAMMPS easyconfig is always true) - ec['dependencies'] = [dep for dep in ec['dependencies'] if dep[0] not in ('ScaFaCoS', 'tbb')] + # Make sure the toolchainopts key exists, and the value is a dict, + # before we add the option to enable PIC and disable PNG_ARM_NEON_OPT + if 'toolchainopts' not in ec or ec['toolchainopts'] is None: + ec['toolchainopts'] = {} + ec['toolchainopts']['pic'] = True + ec['toolchainopts']['extra_cflags'] = '-DPNG_ARM_NEON_OPT=0' + print_msg("Changed toolchainopts for %s: %s", ec.name, ec['toolchainopts']) + + +def parse_hook_lammps_remove_deps_for_aarch64(ec, *args, **kwargs): + """ + Remove x86_64 specific dependencies for the CI and missing installations to pass on aarch64 + """ + if ec.name == 'LAMMPS': + if ec.version in ('2Aug2023_update2', '29Aug2024'): + if os.getenv('EESSI_CPU_FAMILY') == 'aarch64': + # ScaFaCoS and tbb are not compatible with aarch64/* CPU targets, + # so remove them as dependencies for LAMMPS (they're optional); + # see also https://github.com/easybuilders/easybuild-easyconfigs/pull/19164 + + # https://github.com/easybuilders/easybuild-easyconfigs/pull/19000; + # we need this hook because we check for missing installations for all CPU targets + # on an x86_64 VM in GitHub Actions (so condition based on ARCH in LAMMPS easyconfig is always true) + ec['dependencies'] = [dep for dep in ec['dependencies'] if dep[0] not in ('ScaFaCoS', 'tbb',)] else: raise EasyBuildError("LAMMPS-specific hook triggered for non-LAMMPS easyconfig?!") @@ -320,16 +358,16 @@ def pre_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwarg Solve issues with compiling or running the tests on both neoverse_n1 and neoverse_v1 with Highway 1.0.4 and GCC 12.3.0: - for neoverse_n1 we set optarch to GENERIC - - for neoverse_v1 we completely disable the tests + - for neoverse_v1 and a64fx we completely disable the tests cfr. https://github.com/EESSI/software-layer/issues/469 """ if self.name == 'Highway': tcname, tcversion = self.toolchain.name, self.toolchain.version cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') - # note: keep condition in sync with the one used in + # note: keep condition in sync with the one used in # post_prepare_hook_highway_handle_test_compilation_issues if self.version in ['1.0.4'] and tcname == 'GCCcore' and tcversion == '12.3.0': - if cpu_target == CPU_TARGET_NEOVERSE_V1: + if cpu_target in [CPU_TARGET_A64FX, CPU_TARGET_NEOVERSE_V1]: self.cfg.update('configopts', '-DHWY_ENABLE_TESTS=OFF') if cpu_target == CPU_TARGET_NEOVERSE_N1: self.orig_optarch = build_option('optarch') @@ -345,12 +383,13 @@ def post_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwar if self.name == 'Highway': tcname, tcversion = self.toolchain.name, self.toolchain.version cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') - # note: keep condition in sync with the one used in + # note: keep condition in sync with the one used in # pre_prepare_hook_highway_handle_test_compilation_issues if self.version in ['1.0.4'] and tcname == 'GCCcore' and tcversion == '12.3.0': if cpu_target == CPU_TARGET_NEOVERSE_N1: update_build_option('optarch', self.orig_optarch) + def pre_configure_hook(self, *args, **kwargs): """Main pre-configure hook: trigger custom functions based on software name.""" if self.name in PRE_CONFIGURE_HOOKS: @@ -374,6 +413,30 @@ def pre_configure_hook_BLIS_a64fx(self, *args, **kwargs): else: raise EasyBuildError("BLIS-specific hook triggered for non-BLIS easyconfig?!") + +def pre_configure_hook_score_p(self, *args, **kwargs): + """ + Pre-configure hook for Score-p + - specify correct path to binutils (in compat layer) + """ + if self.name == 'Score-P': + + # determine path to Prefix installation in compat layer via $EPREFIX + eprefix = get_eessi_envvar('EPREFIX') + + binutils_lib_path_glob_pattern = os.path.join(eprefix, 'usr', 'lib*', 'binutils', '*-linux-gnu', '2.*') + binutils_lib_path = glob.glob(binutils_lib_path_glob_pattern) + if len(binutils_lib_path) == 1: + self.cfg.update('configopts', '--with-libbfd-lib=' + binutils_lib_path[0]) + self.cfg.update('configopts', '--with-libbfd-include=' + os.path.join(binutils_lib_path[0], 'include')) + else: + raise EasyBuildError("Failed to isolate path for binutils libraries using %s, got %s", + binutils_lib_path_glob_pattern, binutils_lib_path) + + else: + raise EasyBuildError("Score-P-specific hook triggered for non-Score-P easyconfig?!") + + def pre_configure_hook_extrae(self, *args, **kwargs): """ Pre-configure hook for Extrae @@ -399,10 +462,31 @@ def pre_configure_hook_extrae(self, *args, **kwargs): # replace use of 'which' with 'command -v', since 'which' is broken in EESSI build container; # this must be done *after* running configure script, because initial configuration re-writes configure script, # and problem due to use of which only pops up when running make ?! - self.cfg.update('prebuildopts', "cp config/mpi-macros.m4 config/mpi-macros.m4.orig && sed -i 's/`which /`command -v /g' config/mpi-macros.m4 && ") + self.cfg.update( + 'prebuildopts', + "cp config/mpi-macros.m4 config/mpi-macros.m4.orig && " + "sed -i 's/`which /`command -v /g' config/mpi-macros.m4 && " + ) else: raise EasyBuildError("Extrae-specific hook triggered for non-Extrae easyconfig?!") + +def pre_configure_hook_gobject_introspection(self, *args, **kwargs): + """ + pre-configure hook for GObject-Introspection: + - prevent GObject-Introspection from setting $LD_LIBRARY_PATH if EasyBuild is configured to filter it, see: + https://github.com/EESSI/software-layer/issues/196 + """ + if self.name == 'GObject-Introspection': + # inject a line that removes all items from runtime_path_envvar that are in $EASYBUILD_FILTER_ENVVARS + sed_cmd = r'sed -i "s@\(^\s*runtime_path_envvar = \)\(.*\)@' + sed_cmd += r'\1\2\n\1 [x for x in runtime_path_envvar if not x in os.environ.get(\'EASYBUILD_FILTER_ENV_VARS\', \'\').split(\',\')]@g"' + sed_cmd += ' %(start_dir)s/giscanner/ccompiler.py && ' + self.cfg.update('preconfigopts', sed_cmd) + else: + raise EasyBuildError("GObject-Introspection-specific hook triggered for non-GObject-Introspection easyconfig?!") + + def pre_configure_hook_gromacs(self, *args, **kwargs): """ Pre-configure hook for GROMACS: @@ -413,7 +497,10 @@ def pre_configure_hook_gromacs(self, *args, **kwargs): cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') if LooseVersion(self.version) <= LooseVersion('2024.1') and cpu_target == CPU_TARGET_NEOVERSE_V1: self.cfg.update('configopts', '-DGMX_SIMD=ARM_NEON_ASIMD') - print_msg("Avoiding use of SVE instructions for GROMACS %s by using ARM_NEON_ASIMD as GMX_SIMD value", self.version) + print_msg( + "Avoiding use of SVE instructions for GROMACS %s by using ARM_NEON_ASIMD as GMX_SIMD value", + self.version + ) else: raise EasyBuildError("GROMACS-specific hook triggered for non-GROMACS easyconfig?!") @@ -422,17 +509,33 @@ def pre_configure_hook_openblas_optarch_generic(self, *args, **kwargs): """ Pre-configure hook for OpenBLAS: add DYNAMIC_ARCH=1 to build/test/install options when using --optarch=GENERIC """ + # note: OpenBLAS easyblock was updated in https://github.com/easybuilders/easybuild-easyblocks/pull/3492 + # to take care of this already, so at some point this hook can be removed... if self.name == 'OpenBLAS': if build_option('optarch') == OPTARCH_GENERIC: + dynamic_arch = 'DYNAMIC_ARCH=1' for step in ('build', 'test', 'install'): - self.cfg.update(f'{step}opts', "DYNAMIC_ARCH=1") + if dynamic_arch not in self.cfg[f'{step}opts']: + self.cfg.update(f'{step}opts', dynamic_arch) - # use -mtune=generic rather than -mcpu=generic in $CFLAGS on aarch64, - # because -mcpu=generic implies a particular -march=armv* which clashes with those used by OpenBLAS - # when building with DYNAMIC_ARCH=1 if get_cpu_architecture() == AARCH64: - cflags = os.getenv('CFLAGS').replace('-mcpu=generic', '-mtune=generic') - env.setvar('CFLAGS', cflags) + # when building for aarch64/generic, we also need to set TARGET=ARMV8 to make sure + # that the driver parts of OpenBLAS are compiled generically; + # see also https://github.com/OpenMathLib/OpenBLAS/issues/4945 + target_armv8 = 'TARGET=ARMV8' + for step in ('build', 'test', 'install'): + if target_armv8 not in self.cfg[f'{step}opts']: + self.cfg.update(f'{step}opts', target_armv8) + + # use -mtune=generic rather than -mcpu=generic in $CFLAGS for aarch64/generic, + # because -mcpu=generic implies a particular -march=armv* which clashes with those used by OpenBLAS + # when building with DYNAMIC_ARCH=1 + mcpu_generic = '-mcpu=generic' + cflags = os.getenv('CFLAGS') + if mcpu_generic in cflags: + cflags = cflags.replace(mcpu_generic, '-mtune=generic') + self.log.info("Replaced -mcpu=generic with -mtune=generic in $CFLAGS") + env.setvar('CFLAGS', cflags) else: raise EasyBuildError("OpenBLAS-specific hook triggered for non-OpenBLAS easyconfig?!") @@ -474,32 +577,34 @@ def pre_configure_hook_wrf_aarch64(self, *args, **kwargs): pattern = "Linux x86_64 ppc64le, gfortran" repl = "Linux x86_64 aarch64 ppc64le, gfortran" if LooseVersion(self.version) <= LooseVersion('3.9.0'): - self.cfg.update('preconfigopts', "sed -i 's/%s/%s/g' arch/configure_new.defaults && " % (pattern, repl)) - print_msg("Using custom preconfigopts for %s: %s", self.name, self.cfg['preconfigopts']) + self.cfg.update('preconfigopts', "sed -i 's/%s/%s/g' arch/configure_new.defaults && " % (pattern, repl)) + print_msg("Using custom preconfigopts for %s: %s", self.name, self.cfg['preconfigopts']) if LooseVersion('4.0.0') <= LooseVersion(self.version) <= LooseVersion('4.2.1'): - self.cfg.update('preconfigopts', "sed -i 's/%s/%s/g' arch/configure.defaults && " % (pattern, repl)) - print_msg("Using custom preconfigopts for %s: %s", self.name, self.cfg['preconfigopts']) + self.cfg.update('preconfigopts', "sed -i 's/%s/%s/g' arch/configure.defaults && " % (pattern, repl)) + print_msg("Using custom preconfigopts for %s: %s", self.name, self.cfg['preconfigopts']) else: raise EasyBuildError("WRF-specific hook triggered for non-WRF easyconfig?!") -def pre_configure_hook_atspi2core_filter_ld_library_path(self, *args, **kwargs): +def pre_configure_hook_LAMMPS_zen4(self, *args, **kwargs): """ - pre-configure hook for at-spi2-core: - - instruct GObject-Introspection's g-ir-scanner tool to not set $LD_LIBRARY_PATH - when EasyBuild is configured to filter it, see: - https://github.com/EESSI/software-layer/issues/196 + pre-configure hook for LAMMPS: + - set kokkos_arch on x86_64/amd/zen4 """ - if self.name == 'at-spi2-core': - if build_option('filter_env_vars') and 'LD_LIBRARY_PATH' in build_option('filter_env_vars'): - sed_cmd = 'sed -i "s/gir_extra_args = \[/gir_extra_args = \[\\n \'--lib-dirs-envvar=FILTER_LD_LIBRARY_PATH\',/g" %(start_dir)s/atspi/meson.build && ' - self.cfg.update('preconfigopts', sed_cmd) + + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + if self.name == 'LAMMPS': + if self.version in ('2Aug2023_update2', '29Aug2024'): + if get_cpu_architecture() == X86_64: + if cpu_target == CPU_TARGET_ZEN4: + # There is no support for ZEN4 in LAMMPS yet so falling back to ZEN3 + self.cfg['kokkos_arch'] = 'ZEN3' else: - raise EasyBuildError("at-spi2-core-specific hook triggered for non-at-spi2-core easyconfig?!") + raise EasyBuildError("LAMMPS-specific hook triggered for non-LAMMPS easyconfig?!") -def pre_test_hook(self,*args, **kwargs): +def pre_test_hook(self, *args, **kwargs): """Main pre-test hook: trigger custom functions based on software name.""" if self.name in PRE_TEST_HOOKS: PRE_TEST_HOOKS[self.name](self, *args, **kwargs) @@ -562,6 +667,7 @@ def pre_test_hook_ignore_failing_tests_SciPybundle(self, *args, **kwargs): elif cpu_target == CPU_TARGET_A64FX and self.version in scipy_bundle_versions_a64fx: self.cfg['testopts'] = "|| echo ignoring failing tests" + def pre_test_hook_ignore_failing_tests_netCDF(self, *args, **kwargs): """ Pre-test hook for netCDF: skip failing tests for selected netCDF versions on neoverse_v1 @@ -575,6 +681,7 @@ def pre_test_hook_ignore_failing_tests_netCDF(self, *args, **kwargs): if self.name == 'netCDF' and self.version == '4.9.2' and cpu_target == CPU_TARGET_NEOVERSE_V1: self.cfg['testopts'] = "|| echo ignoring failing tests" + def pre_test_hook_increase_max_failed_tests_arm_PyTorch(self, *args, **kwargs): """ Pre-test hook for PyTorch: increase max failing tests for ARM for PyTorch 2.1.2 @@ -639,18 +746,22 @@ def pre_single_extension_testthat(ext, *args, **kwargs): ext.cfg['preinstallopts'] = "sed -i 's/SIGSTKSZ/32768/g' inst/include/testthat/vendor/catch.h && " -def post_sanitycheck_hook(self, *args, **kwargs): - """Main post-sanity-check hook: trigger custom functions based on software name.""" - if self.name in POST_SANITYCHECK_HOOKS: - POST_SANITYCHECK_HOOKS[self.name](self, *args, **kwargs) +def post_postproc_hook(self, *args, **kwargs): + """Main post-postprocessing hook: trigger custom functions based on software name.""" + if self.name in POST_POSTPROC_HOOKS: + POST_POSTPROC_HOOKS[self.name](self, *args, **kwargs) -def post_sanitycheck_cuda(self, *args, **kwargs): +def post_postproc_cuda(self, *args, **kwargs): """ Remove files from CUDA installation that we are not allowed to ship, and replace them with a symlink to a corresponding installation under host_injections. """ - if self.name == 'CUDA': + + # We need to check if we are doing an EESSI-distributed installation + eessi_installation = bool(re.search(EESSI_INSTALLATION_REGEX, self.installdir)) + + if self.name == 'CUDA' and eessi_installation: print_msg("Replacing files in CUDA installation that we can not ship with symlinks to host_injections...") # read CUDA EULA, construct allowlist based on section 2.6 that specifies list of files that can be shipped @@ -675,6 +786,11 @@ def post_sanitycheck_cuda(self, *args, **kwargs): for word in line.split(): if any(ext in word for ext in file_extensions): allowlist.append(os.path.splitext(word)[0]) + # The EULA of CUDA 12.4 introduced a typo (confirmed by NVIDIA): + # libnvrtx-builtins_static.so should be libnvrtc-builtins_static.so + if 'libnvrtx-builtins_static' in allowlist: + allowlist.remove('libnvrtx-builtins_static') + allowlist.append('libnvrtc-builtins_static') allowlist = sorted(set(allowlist)) self.log.info("Allowlist for files in CUDA installation that can be redistributed: " + ', '.join(allowlist)) @@ -684,56 +800,170 @@ def post_sanitycheck_cuda(self, *args, **kwargs): if 'libcudart' not in allowlist: raise EasyBuildError("Did not find 'libcudart' in allowlist: %s" % allowlist) - # iterate over all files in the CUDA installation directory - for dir_path, _, files in os.walk(self.installdir): - for filename in files: - full_path = os.path.join(dir_path, filename) - # we only really care about real files, i.e. not symlinks - if not os.path.islink(full_path): - # check if the current file name stub is part of the allowlist - basename = filename.split('.')[0] - if basename in allowlist: - self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) - else: - self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s", - basename, full_path) - # if it is not in the allowlist, delete the file and create a symlink to host_injections - host_inj_path = full_path.replace('versions', 'host_injections') - # make sure source and target of symlink are not the same - if full_path == host_inj_path: - raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " - "are using this hook for an EESSI installation?", - full_path, host_inj_path) - remove_file(full_path) - symlink(host_inj_path, full_path) + # replace files that are not distributable with symlinks into + # host_injections + replace_non_distributable_files_with_symlinks(self.log, self.installdir, self.name, allowlist) else: raise EasyBuildError("CUDA-specific hook triggered for non-CUDA easyconfig?!") +def post_postproc_cudnn(self, *args, **kwargs): + """ + Remove files from cuDNN installation that we are not allowed to ship, + and replace them with a symlink to a corresponding installation under host_injections. + """ + + # We need to check if we are doing an EESSI-distributed installation + eessi_installation = bool(re.search(EESSI_INSTALLATION_REGEX, self.installdir)) + + if self.name == 'cuDNN' and eessi_installation: + print_msg("Replacing files in cuDNN installation that we can not ship with symlinks to host_injections...") + + allowlist = ['LICENSE'] + + # read cuDNN LICENSE, construct allowlist based on section "2. Distribution" that specifies list of files that can be shipped + license_path = os.path.join(self.installdir, 'LICENSE') + search_string = "2. Distribution. The following portions of the SDK are distributable under the Agreement:" + found_search_string = False + with open(license_path) as infile: + for line in infile: + if line.strip().startswith(search_string): + found_search_string = True + # remove search string, split into words, remove trailing + # dots '.' and only retain words starting with a dot '.' + distributable = line[len(search_string):] + # distributable looks like ' the runtime files .so and .dll.' + # note the '.' after '.dll' + for word in distributable.split(): + if word[0] == '.': + # rstrip is used to remove the '.' after '.dll' + allowlist.append(word.rstrip('.')) + if not found_search_string: + # search string wasn't found in LICENSE file + raise EasyBuildError("search string '%s' was not found in license file '%s';" + "hence installation may be replaced by symlinks only", + search_string, license_path) + + allowlist = sorted(set(allowlist)) + self.log.info("Allowlist for files in cuDNN installation that can be redistributed: " + ', '.join(allowlist)) + + # replace files that are not distributable with symlinks into + # host_injections + replace_non_distributable_files_with_symlinks(self.log, self.installdir, self.name, allowlist) + else: + raise EasyBuildError("cuDNN-specific hook triggered for non-cuDNN easyconfig?!") + + +def replace_non_distributable_files_with_symlinks(log, install_dir, pkg_name, allowlist): + """ + Replace files that cannot be distributed with symlinks into host_injections + """ + # Different packages use different ways to specify which files or file + # 'types' may be redistributed. For CUDA, the 'EULA.txt' lists full file + # names. For cuDNN, the 'LICENSE' lists file endings/suffixes (e.g., '.so') + # that can be redistributed. + # The map 'extension_based' defines which of these two ways are employed. If + # full file names are used it maps a package name (key) to False (value). If + # endings/suffixes are used, it maps a package name to True. Later we can + # easily use this data structure to employ the correct method for + # postprocessing an installation. + extension_based = { + "CUDA": False, + "cuDNN": True, + } + if not pkg_name in extension_based: + raise EasyBuildError("Don't know how to strip non-distributable files from package %s.", pkg_name) + + # iterate over all files in the package installation directory + for dir_path, _, files in os.walk(install_dir): + for filename in files: + full_path = os.path.join(dir_path, filename) + # we only really care about real files, i.e. not symlinks + if not os.path.islink(full_path): + check_by_extension = extension_based[pkg_name] and '.' in filename + if check_by_extension: + # if the allowlist only contains extensions, we have to + # determine that from filename. we assume the extension is + # the second element when splitting the filename at dots + # (e.g., for 'libcudnn_adv_infer.so.8.9.2' the extension + # would be '.so') + extension = '.' + filename.split('.')[1] + # check if the current file name stub or its extension is part of the allowlist + basename = filename.split('.')[0] + if basename in allowlist: + log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) + elif check_by_extension and extension in allowlist: + log.debug("%s is found in allowlist, so keeping it: %s", extension, full_path) + else: + print_name = filename if extension_based[pkg_name] else basename + log.debug("%s is not found in allowlist, so replacing it with symlink: %s", + print_name, full_path) + # the host_injections path is under a fixed repo/location for CUDA or cuDNN + host_inj_path = re.sub(EESSI_INSTALLATION_REGEX, HOST_INJECTIONS_LOCATION, full_path) + # CUDA and cu* libraries themselves don't care about compute capability so remove this + # duplication from under host_injections (symlink to a single CUDA or cu* library + # installation for all compute capabilities) + accel_subdir = os.getenv("EESSI_ACCELERATOR_TARGET") + if accel_subdir: + host_inj_path = host_inj_path.replace("/accel/%s" % accel_subdir, '') + # make sure source and target of symlink are not the same + if full_path == host_inj_path: + raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " + "are using this hook for an EESSI installation?", + full_path, host_inj_path) + remove_file(full_path) + symlink(host_inj_path, full_path) + + def inject_gpu_property(ec): """ - Add 'gpu' property, via modluafooter easyconfig parameter + Add 'gpu' property and EESSIVERSION envvars via modluafooter + easyconfig parameter, and drop dependencies to build dependencies """ ec_dict = ec.asdict() - # Check if CUDA is in the dependencies, if so add the 'gpu' Lmod property - if ('CUDA' in [dep[0] for dep in iter(ec_dict['dependencies'])]): - ec.log.info("Injecting gpu as Lmod arch property and envvar with CUDA version") - key = 'modluafooter' - value = 'add_property("arch","gpu")' - cuda_version = 0 - for dep in iter(ec_dict['dependencies']): - # Make CUDA a build dependency only (rpathing saves us from link errors) - if 'CUDA' in dep[0]: - cuda_version = dep[1] - ec_dict['dependencies'].remove(dep) - if dep not in ec_dict['builddependencies']: - ec_dict['builddependencies'].append(dep) - value = '\n'.join([value, 'setenv("EESSICUDAVERSION","%s")' % cuda_version]) - if key in ec_dict: - if not value in ec_dict[key]: - ec[key] = '\n'.join([ec_dict[key], value]) + # Check if CUDA, cuDNN, you-name-it is in the dependencies, if so + # - drop dependency to build dependency + # - add 'gpu' Lmod property + # - add envvar with package version + pkg_names = ( "CUDA", "cuDNN" ) + pkg_versions = { } + add_gpu_property = '' + + for pkg_name in pkg_names: + # Check if pkg_name is in the dependencies, if so drop dependency to build + # dependency and set variable for later adding the 'gpu' Lmod property + # to '.remove' dependencies from ec_dict['dependencies'] we make a copy, + # iterate over the copy and can then savely use '.remove' on the original + # ec_dict['dependencies']. + deps = ec_dict['dependencies'][:] + if (pkg_name in [dep[0] for dep in deps]): + add_gpu_property = 'add_property("arch","gpu")' + for dep in deps: + if pkg_name == dep[0]: + # make pkg_name a build dependency only (rpathing saves us from link errors) + ec.log.info("Dropping dependency on %s to build dependency" % pkg_name) + ec_dict['dependencies'].remove(dep) + if dep not in ec_dict['builddependencies']: + ec_dict['builddependencies'].append(dep) + # take note of version for creating the modluafooter + pkg_versions[pkg_name] = dep[1] + if add_gpu_property: + ec.log.info("Injecting gpu as Lmod arch property and envvars for dependencies with their version") + modluafooter = 'modluafooter' + extra_mod_footer_lines = [add_gpu_property] + for pkg_name, version in pkg_versions.items(): + envvar = "EESSI%sVERSION" % pkg_name.upper() + extra_mod_footer_lines.append('setenv("%s","%s")' % (envvar, version)) + # take into account that modluafooter may already be set + if modluafooter in ec_dict: + value = ec_dict[modluafooter] + for line in extra_mod_footer_lines: + if not line in value: + value = '\n'.join([value, line]) + ec[modluafooter] = value else: - ec[key] = value + ec[modluafooter] = '\n'.join(extra_mod_footer_lines) + return ec @@ -741,7 +971,9 @@ def inject_gpu_property(ec): 'casacore': parse_hook_casacore_disable_vectorize, 'CGAL': parse_hook_cgal_toolchainopts_precise, 'fontconfig': parse_hook_fontconfig_add_fonts, - 'LAMMPS': parse_hook_lammps_remove_deps_for_CI_aarch64, + 'FreeImage': parse_hook_freeimage_aarch64, + 'grpcio': parse_hook_grpcio_zlib, + 'LAMMPS': parse_hook_lammps_remove_deps_for_aarch64, 'CP2K': parse_hook_CP2K_remove_deps_for_aarch64, 'OpenBLAS': parse_hook_openblas_relax_lapack_tests_num_errors, 'pybind11': parse_hook_pybind11_replace_catch2, @@ -759,14 +991,16 @@ def inject_gpu_property(ec): } PRE_CONFIGURE_HOOKS = { - 'at-spi2-core': pre_configure_hook_atspi2core_filter_ld_library_path, 'BLIS': pre_configure_hook_BLIS_a64fx, + 'GObject-Introspection': pre_configure_hook_gobject_introspection, 'Extrae': pre_configure_hook_extrae, 'GROMACS': pre_configure_hook_gromacs, 'libfabric': pre_configure_hook_libfabric_disable_psm3_x86_64_generic, 'MetaBAT': pre_configure_hook_metabat_filtered_zlib_dep, 'OpenBLAS': pre_configure_hook_openblas_optarch_generic, 'WRF': pre_configure_hook_wrf_aarch64, + 'LAMMPS': pre_configure_hook_LAMMPS_zen4, + 'Score-P': pre_configure_hook_score_p, } PRE_TEST_HOOKS = { @@ -788,6 +1022,7 @@ def inject_gpu_property(ec): 'numpy': post_single_extension_numpy, } -POST_SANITYCHECK_HOOKS = { - 'CUDA': post_sanitycheck_cuda, +POST_POSTPROC_HOOKS = { + 'CUDA': post_postproc_cuda, + 'cuDNN': post_postproc_cudnn, } diff --git a/eessi_container.sh b/eessi_container.sh index e404b7ee18..fc97f9877c 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -254,7 +254,7 @@ if [[ ${LIST_REPOS} -eq 1 ]]; then default_label=", default" else default_label="" - fi + fi echo " ${cvmfs_repo} [CVMFS config repo${default_label}]" done for cfg_repo in "${!cfg_cvmfs_repos[@]}" @@ -323,7 +323,7 @@ do if [[ ! -n "${eessi_cvmfs_repos[${cvmfs_repo_name}]}" ]] ; then [[ ${VERBOSE} -eq 1 ]] && echo "repo '${cvmfs_repo_name}' is not an EESSI CVMFS repository..." # cvmfs_repo_name is actually a repository ID, use that to obtain - # the actual name from the EESSI_REPOS_CFG_FILE + # the actual name from the EESSI_REPOS_CFG_FILE cfg_repo_id=${cvmfs_repo_name} cvmfs_repo_name=$(cfg_get_value ${cfg_repo_id} "repo_name") fi @@ -595,11 +595,11 @@ do # that the necessary information for accessing a CVMFS repository is made # available inside the container if [[ -n "${cfg_cvmfs_repos[${cvmfs_repo_name}]}" ]] ; then - cfg_repo_id=${cvmfs_repo_name} + cfg_repo_id=${cvmfs_repo_name} - # obtain CVMFS repository name from section for the given ID + # obtain CVMFS repository name from section for the given ID cfg_repo_name=$(cfg_get_value ${cfg_repo_id} "repo_name") - # derive domain part from (cfg_)repo_name (everything after first '.') + # derive domain part from (cfg_)repo_name (everything after first '.') repo_name_domain=${repo_name#*.} # cfg_cvmfs_repos is populated through reading the file pointed to by @@ -609,15 +609,15 @@ do # copy repos.cfg to job directory --> makes it easier to inspect the job cp -a ${EESSI_REPOS_CFG_FILE} ${EESSI_TMPDIR}/repos_cfg/. - # cfg file should include sections (one per CVMFS repository to be mounted) - # with each section containing the settings: - # - repo_name, - # - repo_version, - # - config_bundle, and - # - a map { filepath_in_bundle -> container_filepath } + # cfg file should include sections (one per CVMFS repository to be mounted) + # with each section containing the settings: + # - repo_name, + # - repo_version, + # - config_bundle, and + # - a map { filepath_in_bundle -> container_filepath } # - # The config_bundle includes the files which are mapped ('->') to a target - # location in container: + # The config_bundle includes the files which are mapped ('->') to a target + # location in container: # - default.local -> /etc/cvmfs/default.local # contains CVMFS settings, e.g., CVMFS_HTTP_PROXY, CVMFS_QUOTA_LIMIT, ... # - ${repo_name_domain}.conf -> /etc/cvmfs/domain.d/${repo_name_domain}.conf @@ -641,7 +641,7 @@ do # use information to set up dir ${EESSI_TMPDIR}/repos_cfg and define # BIND mounts # check if config_bundle exists, if so, unpack it into - # ${EESSI_TMPDIR}/repos_cfg; if it doesn't, exit with an error + # ${EESSI_TMPDIR}/repos_cfg; if it doesn't, exit with an error # if config_bundle is relative path (no '/' at start) prepend it with # EESSI_REPOS_CFG_DIR config_bundle_path= @@ -726,7 +726,7 @@ do if [[ ${cfg_cvmfs_repos[${cvmfs_repo_name}]} ]]; then [[ ${VERBOSE} -eq 1 ]] && echo "repo '${cvmfs_repo_name}' is not an EESSI CVMFS repository..." # cvmfs_repo_name is actually a repository ID, use that to obtain - # the actual name from the EESSI_REPOS_CFG_FILE + # the actual name from the EESSI_REPOS_CFG_FILE cfg_repo_id=${cvmfs_repo_name} cvmfs_repo_name=$(cfg_get_value ${cfg_repo_id} "repo_name") fi @@ -736,15 +736,52 @@ do # add fusemount options depending on requested access mode ('ro' - read-only; 'rw' - read & write) if [[ ${cvmfs_repo_access} == "ro" ]] ; then - export EESSI_READONLY="container:cvmfs2 ${cvmfs_repo_name} /cvmfs/${cvmfs_repo_name}" + # need to distinguish between basic "ro" access and "ro" after a "rw" session + if [[ -d ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper ]]; then + # the overlay-upper directory is only created in a read-write-session, thus + # we are resuming from such a session here (otherwise there shouldn't be such + # directory yet as it is only created for read-write-sessions a bit further + # below); the overlay-upper directory can only exist because it is part of + # the ${RESUME} directory or tarball + # to be able to see the contents of the read-write session we have to mount + # the fuse-overlayfs (in read-only mode) on top of the CernVM-FS repository + + echo "While processing '${cvmfs_repo_name}' to be mounted 'read-only' we detected an overlay-upper" + echo " directory (${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper) likely from a previous" + echo " session. Will use it as left-most directory in 'lowerdir' argument for fuse-overlayfs." + + # make the target CernVM-FS repository available under /cvmfs_ro + export EESSI_READONLY="container:cvmfs2 ${cvmfs_repo_name} /cvmfs_ro/${cvmfs_repo_name}" + + EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}") + + # now, put the overlay-upper read-only on top of the repo and make it available under the usual prefix /cvmfs + EESSI_READONLY_OVERLAY="container:fuse-overlayfs" + # The contents of the previous session are available under + # ${EESSI_TMPDIR} which is bind mounted to ${TMP_IN_CONTAINER}. + # Hence, we have to use ${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper + # the left-most directory given for the lowerdir argument is put on top, + # and with no upperdir=... the whole overlayfs is made available read-only + EESSI_READONLY_OVERLAY+=" -o lowerdir=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper:/cvmfs_ro/${cvmfs_repo_name}" + EESSI_READONLY_OVERLAY+=" /cvmfs/${cvmfs_repo_name}" + export EESSI_READONLY_OVERLAY + + EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY_OVERLAY}") + export EESSI_FUSE_MOUNTS + else + # basic "ro" access that doesn't require any fuseoverlay-fs + echo "Mounting '${cvmfs_repo_name}' 'read-only' without fuse-overlayfs." - EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}") - export EESSI_FUSE_MOUNTS + export EESSI_READONLY="container:cvmfs2 ${cvmfs_repo_name} /cvmfs/${cvmfs_repo_name}" + + EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}") + export EESSI_FUSE_MOUNTS + fi elif [[ ${cvmfs_repo_access} == "rw" ]] ; then # use repo-specific overlay directories mkdir -p ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper mkdir -p ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-work - [[ ${VERBOSE} -eq 1 ]] && echo -e "TMP directory contents:\n$(ls -l ${EESSI_TMPDIR})" + [[ ${VERBOSE} -eq 1 ]] && echo -e "TMP directory contents:\n$(ls -l ${EESSI_TMPDIR})" # set environment variables for fuse mounts in Singularity container export EESSI_READONLY="container:cvmfs2 ${cvmfs_repo_name} /cvmfs_ro/${cvmfs_repo_name}" @@ -762,7 +799,7 @@ do export EESSI_FUSE_MOUNTS else echo -e "ERROR: access mode '${cvmfs_repo_access}' for CVMFS repository\n '${cvmfs_repo_name}' is not known" - exit ${REPOSITORY_ERROR_EXITCODE} + exit ${REPOSITORY_ERROR_EXITCODE} fi # create repo_settings.sh file in ${EESSI_TMPDIR}/${cvmfs_repo_name} to store # (intention is that the file could be just sourced to obtain the settings) @@ -809,14 +846,14 @@ if [[ ! -z ${SAVE} ]]; then # of these aspects to where the script is used if [[ -d ${SAVE} ]]; then # assume SAVE is name of a directory to which tarball shall be written to - # name format: {REPO_ID}-{TIMESTAMP}.tgz + # name format: tmp_storage-{TIMESTAMP}.tgz ts=$(date +%s) - TGZ=${SAVE}/${REPOSITORY}-${ts}.tgz + TGZ=${SAVE}/tmp_storage-${ts}.tgz else # assume SAVE is the full path to a tarball's name TGZ=${SAVE} fi - tar cf ${TGZ} -C ${EESSI_TMPDIR} . + tar czf ${TGZ} -C ${EESSI_TMPDIR} . echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TGZ}' (to resume session add '--resume ${TGZ}')" fi diff --git a/init/bash b/init/bash index 4ad09f6a1b..928ac6efdf 100644 --- a/init/bash +++ b/init/bash @@ -29,6 +29,11 @@ if [ $? -eq 0 ]; then show_msg "Prepending site path $EESSI_SITE_MODULEPATH to \$MODULEPATH..." module use $EESSI_SITE_MODULEPATH + if [ ! -z ${EESSI_MODULEPATH_ACCEL} ]; then + show_msg "Prepending $EESSI_MODULEPATH_ACCEL to \$MODULEPATH..." + module use $EESSI_MODULEPATH_ACCEL + fi + #show_msg "" #show_msg "*** Known problems in the ${EESSI_VERSION} software stack ***" #show_msg "" diff --git a/init/eessi_archdetect.sh b/init/eessi_archdetect.sh index ddfed1bfae..2b1534ce62 100755 --- a/init/eessi_archdetect.sh +++ b/init/eessi_archdetect.sh @@ -1,5 +1,23 @@ #!/usr/bin/env bash -VERSION="1.1.0" + +# Confirm the current shell is Bash >= 4 +# (works for sh, bash, dash, zsh, ksh, but not fish, tcsh, elvish) +if [ -n "$BASH_VERSION" ]; then + # Extract the major version numbers + bash_version=$(echo "$BASH_VERSION" | grep -oP '^\d+\.\d+') + major_version=$(echo "$bash_version" | cut -d. -f1) + + # Check if the major version is 4 or higher + if [ "$major_version" -lt 4 ]; then + echo "Error: This script must be run with Bash >= 4, you have $BASH_VERSION." >&2 + exit 1 + fi +else + echo "Error: This script must be run with Bash." >&2 + exit 1 +fi + +VERSION="1.2.0" # default log level: only emit warnings or errors LOG_LEVEL="WARN" @@ -132,8 +150,45 @@ cpupath(){ fi } +accelpath() { + # If EESSI_ACCELERATOR_TARGET_OVERRIDE is set, use it + log "DEBUG" "accelpath: Override variable set as '$EESSI_ACCELERATOR_TARGET_OVERRIDE' " + if [ ! -z $EESSI_ACCELERATOR_TARGET_OVERRIDE ]; then + if [[ "$EESSI_ACCELERATOR_TARGET_OVERRIDE" =~ ^accel/nvidia/cc[0-9][0-9]$ ]]; then + echo ${EESSI_ACCELERATOR_TARGET_OVERRIDE} + return 0 + else + log "ERROR" "Value of \$EESSI_ACCELERATOR_TARGET_OVERRIDE should match 'accel/nvidia/cc[0-9[0-9]', but it does not: '$EESSI_ACCELERATOR_TARGET_OVERRIDE'" + fi + return 0 + fi + + # check for NVIDIA GPUs via nvidia-smi command + nvidia_smi=$(command -v nvidia-smi) + if [[ $? -eq 0 ]]; then + log "DEBUG" "accelpath: nvidia-smi command found @ ${nvidia_smi}" + nvidia_smi_out=$(mktemp -p /tmp nvidia_smi_out.XXXXX) + nvidia-smi --query-gpu=gpu_name,count,driver_version,compute_cap --format=csv,noheader 2>&1 > $nvidia_smi_out + if [[ $? -eq 0 ]]; then + nvidia_smi_info=$(head -1 $nvidia_smi_out) + cuda_cc=$(echo $nvidia_smi_info | sed 's/, /,/g' | cut -f4 -d, | sed 's/\.//g') + log "DEBUG" "accelpath: CUDA compute capability '${cuda_cc}' derived from nvidia-smi output '${nvidia_smi_info}'" + res="accel/nvidia/cc${cuda_cc}" + log "DEBUG" "accelpath: result: ${res}" + echo $res + rm -f $nvidia_smi_out + else + log "DEBUG" "accelpath: nvidia-smi command failed, see output in $nvidia_smi_out" + exit 3 + fi + else + log "DEBUG" "accelpath: nvidia-smi command not found" + exit 2 + fi +} + # Parse command line arguments -USAGE="Usage: eessi_archdetect.sh [-h][-d][-a] " +USAGE="Usage: eessi_archdetect.sh [-h][-d][-a] " while getopts 'hdva' OPTION; do case "$OPTION" in @@ -150,5 +205,6 @@ ARGUMENT=${1:-none} case "$ARGUMENT" in "cpupath") cpupath; exit;; - *) echo "$USAGE"; log "ERROR" "Missing argument (possible actions: 'cpupath')";; + "accelpath") accelpath; exit;; + *) echo "$USAGE"; log "ERROR" "Missing argument (possible actions: 'cpupath', 'accelpath')";; esac diff --git a/init/eessi_environment_variables b/init/eessi_environment_variables index 8c10b1fca8..60d69cc198 100644 --- a/init/eessi_environment_variables +++ b/init/eessi_environment_variables @@ -38,6 +38,45 @@ if [ -d $EESSI_PREFIX ]; then break fi done + + # we need to make sure that errexit shell option (set -e) is not enabled, + # since archdetect will produce non-zero exit code if no accelerator was found + if [[ "$-" =~ e ]]; then + errexit_shell_option_set='yes' + set +e + else + errexit_shell_option_set='no' + fi + + # to be able to grab exit code of archdetect trying to detect accelerators, + # we can not run it via $(...), so we have to redirect the output to a temporary file + tmpout=$(mktemp) + ${EESSI_INIT_DIR_PATH}/eessi_archdetect.sh accelpath 2>&1 > $tmpout + accelpath_exit_code=$? + + if [[ "$errexit_shell_option_set" == "yes" ]]; then + set -e + fi + + if [[ $accelpath_exit_code -eq 0 ]]; then + export EESSI_ACCEL_SUBDIR=$(tail -1 $tmpout && rm -f $tmpout) + if [ -z ${EESSI_ACCEL_SUBDIR} ]; then + error "accelerator detection with archdetect worked, but no result was returned?!" + else + # allow specifying different parent directory for accel/* subdirectory via $EESSI_ACCEL_SOFTWARE_SUBDIR_OVERRIDE + EESSI_ACCEL_SOFTWARE_SUBDIR=${EESSI_ACCEL_SOFTWARE_SUBDIR_OVERRIDE:-$EESSI_SOFTWARE_SUBDIR} + # path to where accel/* subdirectory is located + EESSI_ACCEL_SOFTWARE_PATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_ACCEL_SOFTWARE_SUBDIR} + if [ -d $EESSI_ACCEL_SOFTWARE_PATH/${EESSI_ACCEL_SUBDIR} ]; then + show_msg "archdetect found supported accelerator for CPU target ${EESSI_ACCEL_SOFTWARE_SUBDIR}: ${EESSI_ACCEL_SUBDIR}" + else + show_msg "No matching path found in ${EESSI_ACCEL_SOFTWARE_SUBDIR} for accelerator detected by archdetect (${EESSI_ACCEL_SUBDIR})" + fi + fi + else + show_msg "archdetect could not detect any accelerators" + rm -f $tmpout + fi elif [ "$EESSI_USE_ARCHSPEC" == "1" ]; then # note: eessi_software_subdir_for_host.py will pick up value from $EESSI_SOFTWARE_SUBDIR_OVERRIDE if it's defined! export EESSI_EPREFIX_PYTHON=$EESSI_EPREFIX/usr/bin/python3 @@ -81,15 +120,17 @@ if [ -d $EESSI_PREFIX ]; then if [ ! -z $EESSI_BASIC_ENV ]; then show_msg "Only setting up basic environment, so we're done" elif [ -d $EESSI_SOFTWARE_PATH ]; then + export EESSI_SITE_SOFTWARE_PATH=${EESSI_SOFTWARE_PATH/versions/host_injections} + show_msg "Using ${EESSI_SITE_SOFTWARE_PATH} as the site extension directory for installations." + # Allow for use of alternative module tree shipped with EESSI + if [ -z ${EESSI_MODULE_SUBDIR+x} ]; then + # EESSI_MODULE_SUBDIR not set + EESSI_MODULE_SUBDIR="modules/all" + fi # Allow for the use of a custom MNS if [ -z ${EESSI_CUSTOM_MODULEPATH+x} ]; then # EESSI_CUSTOM_MODULEPATH not set so we use our defaults - # Allow for use of alternative module tree shipped with EESSI - if [ -z ${EESSI_MODULE_SUBDIR+x} ]; then - # EESSI_MODULE_SUBDIR not set - EESSI_MODULE_SUBDIR="modules/all" - fi EESSI_MODULEPATH=$EESSI_SOFTWARE_PATH/$EESSI_MODULE_SUBDIR else show_msg "Using defined environment variable \$EESSI_CUSTOM_MODULEPATH to set EESSI_MODULEPATH." @@ -99,18 +140,23 @@ if [ -d $EESSI_PREFIX ]; then if [ -d $EESSI_MODULEPATH ]; then export EESSI_MODULEPATH=$EESSI_MODULEPATH show_msg "Using ${EESSI_MODULEPATH} as the directory to be added to MODULEPATH." - export EESSI_SITE_MODULEPATH=${EESSI_MODULEPATH/versions/host_injections} + export EESSI_SITE_MODULEPATH=$EESSI_SITE_SOFTWARE_PATH/$EESSI_MODULE_SUBDIR show_msg "Using ${EESSI_SITE_MODULEPATH} as the site extension directory to be added to MODULEPATH." else error "EESSI module path at $EESSI_MODULEPATH not found!" false fi + if [ -d ${EESSI_ACCEL_SOFTWARE_PATH}/${EESSI_ACCEL_SUBDIR}/${EESSI_MODULE_SUBDIR} ]; then + export EESSI_MODULEPATH_ACCEL=${EESSI_ACCEL_SOFTWARE_PATH}/${EESSI_ACCEL_SUBDIR}/${EESSI_MODULE_SUBDIR} + show_msg "Using ${EESSI_MODULEPATH_ACCEL} as additional directory (for accelerators) to be added to MODULEPATH." + fi + # Fix wrong path for RHEL >=8 libcurl - # This is required here because we ship curl in our compat layer. If we only provided - # curl as a module file we could instead do this via a `modluafooter` in an EasyBuild - # hook (or via an Lmod hook) - rhel_libcurl_file="/etc/pki/tls/certs/ca-bundle.crt" + # This is required here because we ship curl in our compat layer. If we only provided + # curl as a module file we could instead do this via a `modluafooter` in an EasyBuild + # hook (or via an Lmod hook) + rhel_libcurl_file="/etc/pki/tls/certs/ca-bundle.crt" if [ -f $rhel_libcurl_file ]; then show_msg "Found libcurl CAs file at RHEL location, setting CURL_CA_BUNDLE" export CURL_CA_BUNDLE=$rhel_libcurl_file diff --git a/init/lmod/bash b/init/lmod/bash new file mode 100644 index 0000000000..b4941d6766 --- /dev/null +++ b/init/lmod/bash @@ -0,0 +1,16 @@ +# Choose an EESSI version +EESSI_VERSION="${EESSI_VERSION:-2023.06}" +# Path to top-level module tree +export MODULEPATH=/cvmfs/software.eessi.io/versions/"$EESSI_VERSION"/init/modules +. /cvmfs/software.eessi.io/versions/"$EESSI_VERSION"/compat/linux/$(uname -m)/usr/share/Lmod/init/bash + +if [ -z "$__Init_Default_Modules" ]; then + export __Init_Default_Modules=1; + + ## ability to predefine elsewhere the default list + LMOD_SYSTEM_DEFAULT_MODULES=${LMOD_SYSTEM_DEFAULT_MODULES:-"EESSI/$EESSI_VERSION"} + export LMOD_SYSTEM_DEFAULT_MODULES + module --initial_load --no_redirect restore +else + module refresh +fi diff --git a/init/lmod/csh b/init/lmod/csh new file mode 100644 index 0000000000..8e50d5e5c8 --- /dev/null +++ b/init/lmod/csh @@ -0,0 +1,16 @@ +# Choose an EESSI version +if (! $?EESSI_VERSION) then; set EESSI_VERSION = "2023.06"; endif +# Path to top-level module tree +setenv MODULEPATH /cvmfs/software.eessi.io/versions/"$EESSI_VERSION"/init/modules +source /cvmfs/software.eessi.io/versions/"$EESSI_VERSION"/compat/linux/`uname -m`/usr/share/Lmod/init/csh + +if (! $?__Init_Default_Modules ) then + setenv __Init_Default_Modules 1; + + ## ability to predefine elsewhere the default list + if (! $?LMOD_SYSTEM_DEFAULT_MODULES) then; setenv LMOD_SYSTEM_DEFAULT_MODULES "EESSI/$EESSI_VERSION"; endif + module --initial_load --no_redirect restore +else + module refresh +endif + diff --git a/init/lmod/fish b/init/lmod/fish new file mode 100644 index 0000000000..d4252ef32a --- /dev/null +++ b/init/lmod/fish @@ -0,0 +1,15 @@ +# Choose an EESSI version +set EESSI_VERSION (set -q EESSI_VERSION; and echo "$EESSI_VERSION"; or echo "2023.06") +# Path to top-level module tree +set -x MODULEPATH /cvmfs/software.eessi.io/versions/"$EESSI_VERSION"/init/modules +. /cvmfs/software.eessi.io/versions/"$EESSI_VERSION"/compat/linux/(uname -m)/usr/share/Lmod/init/fish + +if test -z "$__Init_Default_Modules" + export __Init_Default_Modules=1; + + ## ability to predefine elsewhere the default list + set -x LMOD_SYSTEM_DEFAULT_MODULES (set -q LMOD_SYSTEM_DEFAULT_MODULE; and echo "$LMOD_SYSTEM_DEFAULT_MODULE"; or echo "EESSI/$EESSI_VERSION") + module --initial_load --no_redirect restore +else + module refresh +end diff --git a/init/lmod/ksh b/init/lmod/ksh new file mode 100644 index 0000000000..71dc29542f --- /dev/null +++ b/init/lmod/ksh @@ -0,0 +1,16 @@ +# Choose an EESSI version +EESSI_VERSION="${EESSI_VERSION:-2023.06}" +# Path to top-level module tree +export MODULEPATH=/cvmfs/software.eessi.io/versions/"$EESSI_VERSION"/init/modules +. /cvmfs/software.eessi.io/versions/"$EESSI_VERSION"/compat/linux/$(uname -m)/usr/share/Lmod/init/ksh + +if [ -z "$__Init_Default_Modules" ]; then + export __Init_Default_Modules=1; + + ## ability to predefine elsewhere the default list + LMOD_SYSTEM_DEFAULT_MODULES=${LMOD_SYSTEM_DEFAULT_MODULES:-"EESSI/$EESSI_VERSION"} + export LMOD_SYSTEM_DEFAULT_MODULES + module --initial_load --no_redirect restore +else + module refresh +fi diff --git a/init/lmod/zsh b/init/lmod/zsh new file mode 100644 index 0000000000..5f605579c8 --- /dev/null +++ b/init/lmod/zsh @@ -0,0 +1,16 @@ +# Choose an EESSI version +EESSI_VERSION="${EESSI_VERSION:-2023.06}" +# Path to top-level module tree +export MODULEPATH=/cvmfs/software.eessi.io/versions/"$EESSI_VERSION"/init/modules +. /cvmfs/software.eessi.io/versions/"$EESSI_VERSION"/compat/linux/$(uname -m)/usr/share/Lmod/init/zsh + +if [ -z "$__Init_Default_Modules" ]; then + export __Init_Default_Modules=1; + + ## ability to predefine elsewhere the default list + LMOD_SYSTEM_DEFAULT_MODULES=${LMOD_SYSTEM_DEFAULT_MODULES:-"EESSI/$EESSI_VERSION"} + export LMOD_SYSTEM_DEFAULT_MODULES + module --initial_load --no_redirect restore +else + module refresh +fi diff --git a/init/lmod_eessi_archdetect_wrapper.sh b/init/lmod_eessi_archdetect_wrapper.sh new file mode 100644 index 0000000000..c12452c71d --- /dev/null +++ b/init/lmod_eessi_archdetect_wrapper.sh @@ -0,0 +1,2 @@ +# This can be leveraged by the source_sh() feature of Lmod +export EESSI_ARCHDETECT_OPTIONS=$($(dirname $(readlink -f $BASH_SOURCE))/eessi_archdetect.sh -a cpupath) diff --git a/init/lmod_eessi_archdetect_wrapper_accel.sh b/init/lmod_eessi_archdetect_wrapper_accel.sh new file mode 100644 index 0000000000..d4a0038cb5 --- /dev/null +++ b/init/lmod_eessi_archdetect_wrapper_accel.sh @@ -0,0 +1,2 @@ +# This can be leveraged by the source_sh() feature of Lmod +export EESSI_ACCEL_SUBDIR=$($(dirname $(readlink -f $BASH_SOURCE))/eessi_archdetect.sh accelpath) diff --git a/init/minimal_eessi_env b/init/minimal_eessi_env index 5273f27862..5e513c3c9f 100644 --- a/init/minimal_eessi_env +++ b/init/minimal_eessi_env @@ -20,4 +20,9 @@ fi export EESSI_CPU_FAMILY=$(uname -m) # set $EPREFIX since that is basically a standard in Gentoo Prefix -export EPREFIX=$EESSI_PREFIX/compat/$EESSI_OS_TYPE/$EESSI_CPU_FAMILY +# if $EESSI_COMPAT_LAYER_DIR is defined (for example by run_in_compat_layer_env.sh script), we use that value +if [ ! -z ${EESSI_COMPAT_LAYER_DIR} ]; then + export EPREFIX=$EESSI_COMPAT_LAYER_DIR +else + export EPREFIX=$EESSI_PREFIX/compat/$EESSI_OS_TYPE/$EESSI_CPU_FAMILY +fi diff --git a/init/modules/EESSI/2023.06.lua b/init/modules/EESSI/2023.06.lua new file mode 100644 index 0000000000..348699c0f1 --- /dev/null +++ b/init/modules/EESSI/2023.06.lua @@ -0,0 +1,157 @@ +help([[ +Description +=========== +The European Environment for Scientific Software Installations (EESSI, pronounced as easy) is a collaboration between different European partners in HPC community.The goal of this project is to build a common stack of scientific software installations for HPC systems and beyond, including laptops, personal workstations and cloud infrastructure. + +More information +================ + - URL: https://www.eessi.io/docs/ +]]) +whatis("Description: The European Environment for Scientific Software Installations (EESSI, pronounced as easy) is a collaboration between different European partners in HPC community. The goal of this project is to build a common stack of scientific software installations for HPC systems and beyond, including laptops, personal workstations and cloud infrastructure.") +whatis("URL: https://www.eessi.io/docs/") +conflict("EESSI") +local eessi_version = myModuleVersion() +local eessi_repo = "/cvmfs/software.eessi.io" +local eessi_prefix = pathJoin(eessi_repo, "versions", eessi_version) +local eessi_os_type = "linux" +setenv("EESSI_VERSION", eessi_version) +setenv("EESSI_CVMFS_REPO", eessi_repo) +setenv("EESSI_OS_TYPE", eessi_os_type) +function eessiDebug(text) + if (mode() == "load" and os.getenv("EESSI_DEBUG_INIT")) then + LmodMessage(text) + end +end +function archdetect_cpu() + local script = pathJoin(eessi_prefix, 'init', 'lmod_eessi_archdetect_wrapper.sh') + -- make sure that we grab the value for architecture before the module unsets the environment variable (in unload mode) + local archdetect_options = os.getenv("EESSI_ARCHDETECT_OPTIONS") or (os.getenv("EESSI_ARCHDETECT_OPTIONS_OVERRIDE") or "") + if not os.getenv("EESSI_ARCHDETECT_OPTIONS_OVERRIDE") then + if convertToCanonical(LmodVersion()) < convertToCanonical("8.6") then + LmodError("Loading this modulefile requires using Lmod version >= 8.6, but you can export EESSI_ARCHDETECT_OPTIONS_OVERRIDE to the available cpu architecture in the form of: x86_64/intel/haswell:x86_64/generic or aarch64/neoverse_v1:aarch64/generic") + end + source_sh("bash", script) + end + -- EESSI_ARCHDETECT_OPTIONS is set by the script (_if_ it was called) + archdetect_options = os.getenv("EESSI_ARCHDETECT_OPTIONS") or archdetect_options + if archdetect_options then + eessiDebug("Got archdetect CPU options: " .. archdetect_options) + -- archdetect_options is a colon-separated list of CPU architectures that are compatible with + -- the host CPU and ordered from most specific to least specific, e.g., + -- x86_64/intel/skylake_avx512:x86_64/intel/haswell:x86_64/generic + -- We loop over the list, and return the highest matching arch for which a directory exists for this EESSI version + for archdetect_filter_cpu in string.gmatch(archdetect_options, "([^" .. ":" .. "]+)") do + if isDir(pathJoin(eessi_prefix, "software", eessi_os_type, archdetect_filter_cpu, "software")) then + -- use x86_64/amd/zen3 for now when AMD Genoa (Zen4) CPU is detected, + -- since optimized software installations for Zen4 are a work-in-progress, + -- see https://gitlab.com/eessi/support/-/issues/37 + if (archdetect_filter_cpu == "x86_64/amd/zen4" and not os.getenv("EESSI_SOFTWARE_SUBDIR_OVERRIDE") == "x86_64/amd/zen4") then + archdetect_filter_cpu = "x86_64/amd/zen3" + if mode() == "load" then + LmodMessage("Sticking to " .. archdetect_filter_cpu .. " for now, since optimized installations for AMD Genoa (Zen4) are a work in progress.") + end + end + eessiDebug("Selected archdetect CPU: " .. archdetect_filter_cpu) + return archdetect_filter_cpu + end + end + LmodError("Software directory check for the detected architecture failed") + else + -- Still need to return something + return nil + end +end +function archdetect_accel() + local script = pathJoin(eessi_prefix, 'init', 'lmod_eessi_archdetect_wrapper_accel.sh') + -- for unload mode, we need to grab the value before it is unset + local archdetect_accel = os.getenv("EESSI_ACCEL_SUBDIR") or (os.getenv("EESSI_ACCELERATOR_TARGET_OVERRIDE") or "") + if not os.getenv("EESSI_ACCELERATOR_TARGET_OVERRIDE ") then + if convertToCanonical(LmodVersion()) < convertToCanonical("8.6") then + LmodError("Loading this modulefile requires using Lmod version >= 8.6, but you can export EESSI_ACCELERATOR_TARGET_OVERRIDE to the available accelerator architecture in the form of: accel/nvidia/cc80") + end + source_sh("bash", script) + end + archdetect_accel = os.getenv("EESSI_ACCEL_SUBDIR") or archdetect_accel + eessiDebug("Got archdetect accel option: " .. archdetect_accel) + return archdetect_accel +end +-- archdetect finds the best compatible architecture, e.g., x86_64/amd/zen3 +local archdetect = archdetect_cpu() +-- archdetect_accel() attempts to identify an accelerator, e.g., accel/nvidia/cc80 +local archdetect_accel = archdetect_accel() +-- eessi_cpu_family is derived from the archdetect match, e.g., x86_64 +local eessi_cpu_family = archdetect:match("([^/]+)") +local eessi_software_subdir = archdetect +-- eessi_eprefix is the base location of the compat layer, e.g., /cvmfs/software.eessi.io/versions/2023.06/compat/linux/x86_64 +local eessi_eprefix = pathJoin(eessi_prefix, "compat", eessi_os_type, eessi_cpu_family) +-- eessi_software_path is the location of the software installations, e.g., +-- /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen3 +local eessi_software_path = pathJoin(eessi_prefix, "software", eessi_os_type, eessi_software_subdir) +local eessi_modules_subdir = pathJoin("modules", "all") +-- eessi_module_path is the location of the _CPU_ module files, e.g., +-- /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen3/modules/all +local eessi_module_path = pathJoin(eessi_software_path, eessi_modules_subdir) +local eessi_site_software_path = string.gsub(eessi_software_path, "versions", "host_injections") +-- Site module path is the same as the EESSI one, but with `versions` changed to `host_injections`, e.g., +-- /cvmfs/software.eessi.io/host_injections/2023.06/software/linux/x86_64/amd/zen3/modules/all +local eessi_site_module_path = pathJoin(eessi_site_software_path, eessi_modules_subdir) +setenv("EPREFIX", eessi_eprefix) +eessiDebug("Setting EPREFIX to " .. eessi_eprefix) +setenv("EESSI_CPU_FAMILY", eessi_cpu_family) +eessiDebug("Setting EESSI_CPU_FAMILY to " .. eessi_cpu_family) +setenv("EESSI_SITE_SOFTWARE_PATH", eessi_site_software_path) +eessiDebug("Setting EESSI_SITE_SOFTWARE_PATH to " .. eessi_site_software_path) +setenv("EESSI_SITE_MODULEPATH", eessi_site_module_path) +eessiDebug("Setting EESSI_SITE_MODULEPATH to " .. eessi_site_module_path) +setenv("EESSI_SOFTWARE_SUBDIR", eessi_software_subdir) +eessiDebug("Setting EESSI_SOFTWARE_SUBDIR to " .. eessi_software_subdir) +setenv("EESSI_PREFIX", eessi_prefix) +eessiDebug("Setting EESSI_PREFIX to " .. eessi_prefix) +setenv("EESSI_EPREFIX", eessi_eprefix) +eessiDebug("Setting EPREFIX to " .. eessi_eprefix) +prepend_path("PATH", pathJoin(eessi_eprefix, "bin")) +eessiDebug("Adding " .. pathJoin(eessi_eprefix, "bin") .. " to PATH") +prepend_path("PATH", pathJoin(eessi_eprefix, "usr", "bin")) +eessiDebug("Adding " .. pathJoin(eessi_eprefix, "usr", "bin") .. " to PATH") +setenv("EESSI_SOFTWARE_PATH", eessi_software_path) +eessiDebug("Setting EESSI_SOFTWARE_PATH to " .. eessi_software_path) +setenv("EESSI_MODULEPATH", eessi_module_path) +eessiDebug("Setting EESSI_MODULEPATH to " .. eessi_module_path) +-- We ship our spider cache, so this location does not need to be spider-ed +if ( mode() ~= "spider" ) then + prepend_path("MODULEPATH", eessi_module_path) + eessiDebug("Adding " .. eessi_module_path .. " to MODULEPATH") +end +prepend_path("LMOD_RC", pathJoin(eessi_software_path, ".lmod", "lmodrc.lua")) +eessiDebug("Adding " .. pathJoin(eessi_software_path, ".lmod", "lmodrc.lua") .. " to LMOD_RC") +-- Use pushenv for LMOD_PACKAGE_PATH as this may be set locally by the site +pushenv("LMOD_PACKAGE_PATH", pathJoin(eessi_software_path, ".lmod")) +eessiDebug("Setting LMOD_PACKAGE_PATH to " .. pathJoin(eessi_software_path, ".lmod")) + +-- the accelerator may have an empty value and we need to give some flexibility +-- * construct the path we expect to find +-- * then check it exists +-- * then update the modulepath +if not (archdetect_accel == nil or archdetect_accel == '') then + -- The CPU subdirectory of the accelerator installations is _usually_ the same as host CPU, but this can be overridden + eessi_accel_software_subdir = os.getenv("EESSI_ACCEL_SOFTWARE_SUBDIR_OVERRIDE") or eessi_software_subdir + -- CPU location of the accelerator installations, e.g., + -- /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen3 + eessi_accel_software_path = pathJoin(eessi_prefix, "software", eessi_os_type, eessi_accel_software_subdir) + -- location of the accelerator modules, e.g., + -- /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen3/accel/nvidia/cc80/modules/all + eessi_module_path_accel = pathJoin(eessi_accel_software_path, archdetect_accel, eessi_modules_subdir) + eessiDebug("Checking if " .. eessi_module_path_accel .. " exists") + if isDir(eessi_module_path_accel) then + setenv("EESSI_MODULEPATH_ACCEL", eessi_module_path_accel) + prepend_path("MODULEPATH", eessi_module_path_accel) + eessiDebug("Using acclerator modules at: " .. eessi_module_path_accel) + end +end + +-- prepend the site module path last so it has priority +prepend_path("MODULEPATH", eessi_site_module_path) +eessiDebug("Adding " .. eessi_site_module_path .. " to MODULEPATH") +if mode() == "load" then + LmodMessage("EESSI/" .. eessi_version .. " loaded successfully") +end diff --git a/install_scripts.sh b/install_scripts.sh index ab06e47997..c5a9a556c2 100755 --- a/install_scripts.sh +++ b/install_scripts.sh @@ -86,7 +86,8 @@ TOPDIR=$(dirname $(realpath $0)) # Copy for init directory init_files=( bash eessi_archdetect.sh eessi_defaults eessi_environment_variables eessi_software_subdir_for_host.py - minimal_eessi_env README.md test.py + minimal_eessi_env README.md test.py lmod_eessi_archdetect_wrapper.sh lmod_eessi_archdetect_wrapper_accel.sh + ) copy_files_by_list ${TOPDIR}/init ${INSTALL_PREFIX}/init "${init_files[@]}" @@ -102,6 +103,18 @@ mc_files=( ) copy_files_by_list ${TOPDIR}/init/Magic_Castle ${INSTALL_PREFIX}/init/Magic_Castle "${mc_files[@]}" +# Copy for init/modules/EESSI directory +mc_files=( + 2023.06.lua +) +copy_files_by_list ${TOPDIR}/init/modules/EESSI ${INSTALL_PREFIX}/init/modules/EESSI "${mc_files[@]}" + +# Copy for init/lmod directory +init_script_files=( + bash zsh ksh fish csh +) +copy_files_by_list ${TOPDIR}/init/lmod ${INSTALL_PREFIX}/init/lmod "${init_script_files[@]}" + # Copy for the scripts directory script_files=( utils.sh @@ -110,10 +123,20 @@ copy_files_by_list ${TOPDIR}/scripts ${INSTALL_PREFIX}/scripts "${script_files[@ # Copy files for the scripts/gpu_support/nvidia directory nvidia_files=( - install_cuda_host_injections.sh link_nvidia_host_libraries.sh + install_cuda_and_libraries.sh + install_cuda_host_injections.sh + link_nvidia_host_libraries.sh ) copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}" +# Easystacks to be used to install software in host injections +host_injections_easystacks=( + eessi-2023.06-eb-4.9.4-2023a-CUDA-host-injections.yml + eessi-2023.06-eb-4.9.4-2023b-CUDA-host-injections.yml +) +copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia/easystacks \ +${INSTALL_PREFIX}/scripts/gpu_support/nvidia/easystacks "${host_injections_easystacks[@]}" + # Copy over EasyBuild hooks file used for installations hook_files=( eb_hooks.py diff --git a/install_software_layer.sh b/install_software_layer.sh index 82ca70b73f..8b88e75713 100755 --- a/install_software_layer.sh +++ b/install_software_layer.sh @@ -1,4 +1,4 @@ #!/bin/bash base_dir=$(dirname $(realpath $0)) source ${base_dir}/init/eessi_defaults -./run_in_compat_layer_env.sh ./EESSI-install-software.sh "$@" +$base_dir/run_in_compat_layer_env.sh $base_dir/EESSI-install-software.sh "$@" diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh new file mode 100755 index 0000000000..62b6e3f3ae --- /dev/null +++ b/load_eessi_extend_module.sh @@ -0,0 +1,116 @@ +# Script to load the environment module for EESSI-extend. +# If that module is not available yet, a specific version will be installed using the latest EasyBuild. +# +# This script must be sourced, since it makes changes in the current environment, like loading an EESSI-extend module. +# +# Assumptions (if one is not satisfied the script prints a message and exits) +# - EESSI version is given as first argument +# - TMPDIR is set +# - EB is set +# - EASYBUILD_INSTALLPATH needs to be set +# - Function check_exit_code is defined; +# scripts/utils.sh in EESSI/software-layer repository defines this function, hence +# scripts/utils.sh shall be sourced before this script is run +# +# This script is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Kenneth Hoste (@boegel, HPC-UGent) +# author: Alan O'Cais (@ocaisa, CECAM) +# author: Thomas Roeblitz (@trz42, University of Bergen) +# +# license: GPLv2 +# +# +set -o pipefail + +# this script is *sourced*, not executed, so can't rely on $0 to determine path to self or script name +# $BASH_SOURCE points to correct path or script name, see also http://mywiki.wooledge.org/BashFAQ/028 +if [ $# -ne 1 ]; then + echo "Usage: source ${BASH_SOURCE} " >&2 + exit 1 +fi + +EESSI_EXTEND_VERSION="${1}-easybuild" + +# make sure that environment variables that we expect to be set are indeed set +if [ -z "${TMPDIR}" ]; then + echo "\$TMPDIR is not set; exiting" >&2 + exit 2 +fi + +# ${EB} is used to specify which 'eb' command should be used; +# can potentially be more than just 'eb', for example when using 'eb --optarch=GENERIC' +if [ -z "${EB}" ]; then + echo "\$EB is not set; exiting" >&2 + exit 2 +fi + +# ${EASYBUILD_INSTALLPATH} points to the installation path and needs to be set +if [ -z "${EASYBUILD_INSTALLPATH}" ]; then + echo "\$EASYBUILD_INSTALLPATH is not set; exiting" >&2 + exit 2 +fi + +# make sure that utility functions are defined (cfr. scripts/utils.sh script in EESSI/software-layer repo) +type check_exit_code +if [ $? -ne 0 ]; then + echo "check_exit_code function is not defined; exiting" >&2 + exit 3 +fi + +echo ">> Checking for EESSI-extend module..." + +ml_av_eessi_extend_out=${TMPDIR}/ml_av_eessi_extend.out +# need to use --ignore_cache to avoid the case that the module was removed (to be +# rebuilt) but it is still in the cache +module --ignore_cache avail 2>&1 | grep -i EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out} + +if [[ $? -eq 0 ]]; then + echo_green ">> Module for EESSI-extend/${EESSI_EXTEND_VERSION} found!" +else + echo_yellow ">> No module yet for EESSI-extend/${EESSI_EXTEND_VERSION}, installing it..." + + EB_TMPDIR=${TMPDIR}/ebtmp + echo ">> Using temporary installation of EasyBuild (in ${EB_TMPDIR})..." + pip_install_out=${TMPDIR}/pip_install.out + pip3 install --prefix ${EB_TMPDIR} easybuild &> ${pip_install_out} + + # keep track of original $PATH and $PYTHONPATH values, so we can restore them + ORIG_PATH=${PATH} + ORIG_PYTHONPATH=${PYTHONPATH} + + # source configure_easybuild to use correct eb settings + ( + export EASYBUILD_PREFIX=${TMPDIR}/easybuild + export EASYBUILD_READ_ONLY_INSTALLDIR=1 + + echo ">> Final installation in ${EASYBUILD_INSTALLPATH}..." + export PATH=${EB_TMPDIR}/bin:${PATH} + export PYTHONPATH=$(ls -d ${EB_TMPDIR}/lib/python*/site-packages):${PYTHONPATH} + eb_install_out=${TMPDIR}/eb_install.out + ok_msg="EESSI-extend/${EESSI_EXTEND_VERSION} installed, let's go!" + fail_msg="Installing EESSI-extend/${EESSI_EXTEND_VERSION} failed, that's not good... (output: ${eb_install_out})" + # while always adding --try-amend=keep... may do no harm, we could make + # an attempt to figure out if it is needed, e.g., when we are rebuilding + ${EB} "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" --try-amend=keeppreviousinstall=True 2>&1 | tee ${eb_install_out} + check_exit_code $? "${ok_msg}" "${fail_msg}" + ) + + # restore origin $PATH and $PYTHONPATH values, and clean up environment variables that are no longer needed + export PATH=${ORIG_PATH} + export PYTHONPATH=${ORIG_PYTHONPATH} + unset EB_TMPDIR ORIG_PATH ORIG_PYTHONPATH + + module --ignore_cache avail EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out} + if [[ $? -eq 0 ]]; then + echo_green ">> EESSI-extend/${EESSI_EXTEND_VERSION} module installed!" + else + fatal_error "EESSI-extend/${EESSI_EXTEND_VERSION} module failed to install?! (output of 'pip install' in ${pip_install_out}, output of 'eb' in ${eb_install_out}, output of 'module avail EESSI-extend' in ${ml_av_eessi_extend_out})" + fi +fi + +echo ">> Loading EESSI-extend/${EESSI_EXTEND_VERSION} module..." +module --ignore_cache load EESSI-extend/${EESSI_EXTEND_VERSION} + +unset EESSI_EXTEND_VERSION diff --git a/reframe_config_bot.py.tmpl b/reframe_config_bot.py.tmpl index 607373767a..323aafd5ec 100644 --- a/reframe_config_bot.py.tmpl +++ b/reframe_config_bot.py.tmpl @@ -15,19 +15,13 @@ site_configuration = { 'modules_system': 'lmod', 'partitions': [ { - 'name': 'default', + 'name': '__RFM_PARTITION__', 'scheduler': 'local', 'launcher': 'mpirun', 'environs': ['default'], 'features': [ FEATURES[CPU] ] + list(SCALES.keys()), - 'processor': { - 'num_cpus': __NUM_CPUS__, - 'num_sockets': __NUM_SOCKETS__, - 'num_cpus_per_core': __NUM_CPUS_PER_CORE__, - 'num_cpus_per_socket': __NUM_CPUS_PER_SOCKET__, - }, 'resources': [ { 'name': 'memory', @@ -56,8 +50,7 @@ site_configuration = { { 'purge_environment': True, 'resolve_module_conflicts': False, # avoid loading the module before submitting the job - # disable automatic detection of CPU architecture (since we're using local scheduler) - 'remote_detect': False, + 'remote_detect': True, } ], 'logging': common_logging_config(), diff --git a/run_in_compat_layer_env.sh b/run_in_compat_layer_env.sh index b8e9cf979b..b4299c7a0d 100755 --- a/run_in_compat_layer_env.sh +++ b/run_in_compat_layer_env.sh @@ -7,7 +7,16 @@ if [ -z $EESSI_VERSION ]; then echo "ERROR: \$EESSI_VERSION must be set!" >&2 exit 1 fi -EESSI_COMPAT_LAYER_DIR="${EESSI_CVMFS_REPO}/versions/${EESSI_VERSION}/compat/linux/$(uname -m)" + +echo "EESSI_COMPAT_LAYER_DIR_OVERRIDE: ${EESSI_COMPAT_LAYER_DIR_OVERRIDE}" + +if [ ! -z ${EESSI_COMPAT_LAYER_DIR_OVERRIDE} ]; then + echo "EESSI_COMPAT_LAYER_DIR_OVERRIDE found. Setting EESSI_COMPAT_LAYER_DIR to ${EESSI_COMPAT_LAYER_DIR_OVERRIDE}" + EESSI_COMPAT_LAYER_DIR=${EESSI_COMPAT_LAYER_DIR_OVERRIDE} +else + EESSI_COMPAT_LAYER_DIR="${EESSI_CVMFS_REPO}/versions/${EESSI_VERSION}/compat/linux/$(uname -m)" +fi + if [ ! -d ${EESSI_COMPAT_LAYER_DIR} ]; then echo "ERROR: ${EESSI_COMPAT_LAYER_DIR} does not exist!" >&2 exit 1 @@ -20,12 +29,18 @@ fi if [ ! -z ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} ]; then INPUT="export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE}; ${INPUT}" fi +if [ ! -z ${EESSI_ACCELERATOR_TARGET} ]; then + INPUT="export EESSI_ACCELERATOR_TARGET=${EESSI_ACCELERATOR_TARGET}; ${INPUT}" +fi if [ ! -z ${EESSI_CVMFS_REPO_OVERRIDE} ]; then INPUT="export EESSI_CVMFS_REPO_OVERRIDE=${EESSI_CVMFS_REPO_OVERRIDE}; ${INPUT}" fi if [ ! -z ${EESSI_VERSION_OVERRIDE} ]; then INPUT="export EESSI_VERSION_OVERRIDE=${EESSI_VERSION_OVERRIDE}; ${INPUT}" fi +if [ ! -z ${EESSI_COMPAT_LAYER_DIR} ]; then + INPUT="export EESSI_COMPAT_LAYER_DIR=${EESSI_COMPAT_LAYER_DIR}; ${INPUT}" +fi if [ ! -z ${EESSI_OVERRIDE_GPU_CHECK} ]; then INPUT="export EESSI_OVERRIDE_GPU_CHECK=${EESSI_OVERRIDE_GPU_CHECK}; ${INPUT}" fi @@ -35,6 +50,9 @@ fi if [ ! -z ${https_proxy} ]; then INPUT="export https_proxy=${https_proxy}; ${INPUT}" fi +if [ ! -z ${EASYBUILD_ROBOT_PATHS} ]; then + INPUT="export EASYBUILD_ROBOT_PATHS=${EASYBUILD_ROBOT_PATHS}; ${INPUT}" +fi echo "Running '${INPUT}' in EESSI (${EESSI_CVMFS_REPO}) ${EESSI_VERSION} compatibility layer environment..." ${EESSI_COMPAT_LAYER_DIR}/startprefix <<< "${INPUT}" diff --git a/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023a-CUDA-host-injections.yml b/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023a-CUDA-host-injections.yml new file mode 100644 index 0000000000..83e68077a2 --- /dev/null +++ b/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023a-CUDA-host-injections.yml @@ -0,0 +1,9 @@ +# This EasyStack provides a list of all the EasyConfigs that should be installed in host_injections +# for nvidia GPU support, because they cannot (fully) be shipped as part of EESSI due to license constraints +easyconfigs: + - CUDA-12.1.1.eb + - cuDNN-8.9.2.26-CUDA-12.1.1.eb: + options: + # needed to enforce acceptance of EULA in cuDNN easyblock, + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3473 + include-easyblocks-from-commit: 11afb88ec55e0ca431cbe823696aa43e2a9bfca8 diff --git a/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023b-CUDA-host-injections.yml b/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023b-CUDA-host-injections.yml new file mode 100644 index 0000000000..5cfec813f6 --- /dev/null +++ b/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023b-CUDA-host-injections.yml @@ -0,0 +1,4 @@ +# This EasyStack provides a list of all the EasyConfigs that should be installed in host_injections +# for nvidia GPU support, because they cannot (fully) be shipped as part of EESSI due to license constraints +easyconfigs: + - CUDA-12.4.0.eb diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh new file mode 100755 index 0000000000..741ead0559 --- /dev/null +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -0,0 +1,265 @@ +#!/usr/bin/env bash + +# This script can be used to install CUDA and other libraries by NVIDIA under +# the `.../host_injections` directory. +# +# This provides the parts of the CUDA installation and other libriaries that +# cannot be redistributed as part of EESSI due to license limitations. While +# GPU-based software from EESSI will _run_ without these, installation of +# additional software that builds upon CUDA or other libraries requires that +# these installation are present under `host_injections`. +# +# The `host_injections` directory is a variant symlink that by default points to +# `/opt/eessi`, unless otherwise defined in the local CVMFS configuration (see +# https://cvmfs.readthedocs.io/en/stable/cpt-repo.html#variant-symlinks). For the +# installation to be successful, this directory needs to be writeable by the user +# executing this script. + +# Initialise our bash functions +TOPDIR=$(dirname $(realpath $BASH_SOURCE)) +source "$TOPDIR"/../../utils.sh + +# Function to display help message +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " --help Display this help message" + echo " --accept-cuda-eula You _must_ accept the CUDA EULA to install" + echo " CUDA, see the EULA at" + echo " https://docs.nvidia.com/cuda/eula/index.html" + echo " --accept-cudnn-eula You _must_ accept the cuDNN EULA to install" + echo " cuDNN, see the EULA at" + echo " https://docs.nvidia.com/deeplearning/cudnn/latest/reference/eula.html" + echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" + echo " storage during the installation of CUDA" + echo " and/or other libraries (must have" + echo " several GB available; depends on the number of installations)" +} + +# Initialize variables +cuda_eula_accepted=0 +cudnn_eula_accepted=0 +EASYSTACK_FILE= +TEMP_DIR= + +# Parse command-line options +while [[ $# -gt 0 ]]; do + case "$1" in + --help) + show_help + exit 0 + ;; + --accept-cuda-eula) + cuda_eula_accepted=1 + shift 1 + ;; + --accept-cudnn-eula) + cudnn_eula_accepted=1 + shift 1 + ;; + -t|--temp-dir) + if [ -n "$2" ]; then + TEMP_DIR="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + *) + show_help + fatal_error "Error: Unknown option: $1" + ;; + esac +done + +# Make sure EESSI is initialised +check_eessi_initialised + +# we need a directory we can use for temporary storage +if [[ -z "${TEMP_DIR}" ]]; then + tmpdir=$(mktemp -d) +else + mkdir -p ${TEMP_DIR} + tmpdir=$(mktemp -d --tmpdir=${TEMP_DIR} cuda_n_co.XXX) + if [[ ! -d "$tmpdir" ]] ; then + fatal_error "Could not create directory ${tmpdir}" + fi +fi +echo "Created temporary directory '${tmpdir}'" + +# Store MODULEPATH so it can be restored at the end of each loop iteration +SAVE_MODULEPATH=${MODULEPATH} + +for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do + echo -e "Processing easystack file ${easystack_file}...\n\n" + + # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file + eb_version=$(echo ${EASYSTACK_FILE} | sed 's/.*eb-\([0-9.]*\).*/\1/g') + + # Load EasyBuild version for this easystack file _before_ loading EESSI-extend + module_avail_out=${tmpdir}/ml.out + module avail 2>&1 | grep EasyBuild/${eb_version} &> ${module_avail_out} + if [[ $? -eq 0 ]]; then + echo_green ">> Found an EasyBuild/${eb_version} module" + else + echo_yellow ">> No EasyBuild/${eb_version} module found: skipping step to install easystack file ${easystack_file} (see output in ${module_avail_out})" + continue + fi + module load EasyBuild/${eb_version} + + # Make sure EESSI-extend does a site install here + # We need to reload it with the current environment variables set + unset EESSI_CVMFS_INSTALL + unset EESSI_PROJECT_INSTALL + unset EESSI_USER_INSTALL + export EESSI_SITE_INSTALL=1 + module unload EESSI-extend + ml_av_eessi_extend_out=${tmpdir}/ml_av_eessi_extend.out + # need to use --ignore_cache to avoid the case that the module was removed (to be + # rebuilt) but it is still in the cache and the rebuild failed + EESSI_EXTEND_VERSION=${EESSI_VERSION}-easybuild + module --ignore_cache avail 2>&1 | grep -i EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out} + if [[ $? -eq 0 ]]; then + echo_green ">> Module for EESSI-extend/${EESSI_EXTEND_VERSION} found!" + else + error="\nNo module for EESSI-extend/${EESSI_EXTEND_VERSION} found\nwhile EESSI has been initialised to use software under ${EESSI_SOFTWARE_PATH}\n" + fatal_error "${error}" + fi + module --ignore_cache load EESSI-extend/${EESSI_EXTEND_VERSION} + unset EESSI_EXTEND_VERSION + + # Install modules in hidden .modules dir to keep track of what was installed before + # (this action is temporary, and we do not call Lmod again within the current shell context, but in EasyBuild + # subshells, so loaded modules are not automatically unloaded) + MODULEPATH=${EESSI_SITE_SOFTWARE_PATH}/.modules/all + echo "set MODULEPATH=${MODULEPATH}" + + # We don't want hooks used in this install, we need vanilla installations + touch "${tmpdir}"/none.py + export EASYBUILD_HOOKS="${tmpdir}/none.py" + + # show EasyBuild configuration + echo "Show EasyBuild configuration" + eb --show-config + + # do a 'eb --dry-run-short' with the EASYSTACK_FILE and determine list of packages + # to be installed + echo ">> Determining if packages specified in ${EASYSTACK_FILE} are missing under ${EESSI_SITE_SOFTWARE_PATH}" + eb_dry_run_short_out=${tmpdir}/eb_dry_run_short.out + eb --dry-run-short --easystack ${EASYSTACK_FILE} 2>&1 | tee ${eb_dry_run_short_out} + ret=$? + + # Check if CUDA shall be installed + cuda_install_needed=0 + cat ${eb_dry_run_short_out} | grep "^ \* \[[ ]\]" | grep "module: CUDA/" > /dev/null + ret=$? + if [ "${ret}" -eq 0 ]; then + cuda_install_needed=1 + fi + + # Make sure the CUDA EULA is accepted if it shall be installed + if [ "${cuda_install_needed}" -eq 1 ] && [ "${cuda_eula_accepted}" -ne 1 ]; then + show_help + error="\nCUDA shall be installed. However, the CUDA EULA has not been accepted\nYou _must_ accept the CUDA EULA via the appropriate command line option.\n" + fatal_error "${error}" + fi + + # Check if cdDNN shall be installed + cudnn_install_needed=0 + cat ${eb_dry_run_short_out} | grep "^ \* \[[ ]\]" | grep "module: cuDNN/" > /dev/null + ret=$? + if [ "${ret}" -eq 0 ]; then + cudnn_install_needed=1 + fi + + # Make sure the cuDNN EULA is accepted if it shall be installed + if [ "${cudnn_install_needed}" -eq 1 ] && [ "${cudnn_eula_accepted}" -ne 1 ]; then + show_help + error="\ncuDNN shall be installed. However, the cuDNN EULA has not been accepted\nYou _must_ accept the cuDNN EULA via the appropriate command line option.\n" + fatal_error "${error}" + fi + + # determine the number of packages to be installed (assume 5 GB + num_packages * + # 3GB space needed). Both CUDA and cuDNN are about this size + number_of_packages=$(cat ${eb_dry_run_short_out} | grep "^ \* \[[ ]\]" | sed -e 's/^.*module: //' | sort -u | wc -l) + echo "number of packages to be (re-)installed: '${number_of_packages}'" + base_storage_space=$((5000000 + ${number_of_packages} * 3000000)) + + required_space_in_tmpdir=${base_storage_space} + # Let's see if we have sources and build locations defined if not, we use the temporary space + if [[ -z "${EASYBUILD_BUILDPATH}" ]]; then + export EASYBUILD_BUILDPATH=${tmpdir}/build + required_space_in_tmpdir=$((required_space_in_tmpdir + ${base_storage_space})) + fi + if [[ -z "${EASYBUILD_SOURCEPATH}" ]]; then + export EASYBUILD_SOURCEPATH=${tmpdir}/sources + required_space_in_tmpdir=$((required_space_in_tmpdir + ${base_storage_space})) + fi + + # The install is pretty fat, you need lots of space for download/unpack/install + # (~3*${base_storage_space}*1000 Bytes), + # need to do a space check before we proceed + avail_space=$(df --output=avail "${EESSI_SITE_SOFTWARE_PATH}"/ | tail -n 1 | awk '{print $1}') + min_disk_storage=$((3 * ${base_storage_space})) + if (( avail_space < ${min_disk_storage} )); then + fatal_error "Need at least $(echo "${min_disk_storage} / 1000000" | bc) GB disk space to install CUDA and other libraries under ${EESSI_SITE_SOFTWARE_PATH}, exiting now..." + fi + avail_space=$(df --output=avail "${tmpdir}"/ | tail -n 1 | awk '{print $1}') + if (( avail_space < required_space_in_tmpdir )); then + error="Need at least $(echo "${required_space_in_tmpdir} / 1000000" | bc) temporary disk space under ${tmpdir}.\n" + error="${error}Set the environment variable TEMP_DIR to a location with adequate space to pass this check." + error="${error}You can alternatively set EASYBUILD_BUILDPATH and/or EASYBUILD_SOURCEPATH" + error="${error}to reduce this requirement. Exiting now..." + fatal_error "${error}" + fi + + # Brief explanation of parameters: + # - prefix: using $tmpdir as default base directory for several EB settings + # - installpath-modules: We install the module in a hidden .modules, so that next time this script + # is run, it is not reinstalled. + # - ${accept_eula_opt}: We only set the --accept-eula-for=CUDA option if CUDA will be installed and if + # this script was called with the argument --accept-cuda-eula. + # - hooks: We don't want hooks used in this install, we need vanilla + # installations of CUDA and/or other libraries + # - easystack: Path to easystack file that defines which packages shall be + # installed + accept_eula_opt= + if [[ ${cuda_eula_accepted} -eq 1 ]]; then + accept_eula_opt="CUDA" + fi + if [[ ${cudnn_eula_accepted} -eq 1 ]]; then + if [[ -z ${accept_eula_opt} ]]; then + accept_eula_opt="cuDNN" + else + accept_eula_opt="${accept_eula_opt},cuDNN" + fi + fi + touch "$tmpdir"/none.py + eb_args="--prefix=$tmpdir" + eb_args="$eb_args --installpath-modules=${EASYBUILD_INSTALLPATH}/.modules" + eb_args="$eb_args --hooks="$tmpdir"/none.py" + eb_args="$eb_args --easystack ${EASYSTACK_FILE}" + if [[ ! -z ${accept_eula_opt} ]]; then + eb_args="$eb_args --accept-eula-for=$accept_eula_opt" + fi + echo "Running eb $eb_args" + eb $eb_args + ret=$? + if [ $ret -ne 0 ]; then + eb_last_log=$(unset EB_VERBOSE; eb --last-log) + cp -a ${eb_last_log} . + fatal_error "some installation failed, please check EasyBuild logs ${PWD}/$(basename ${eb_last_log})..." + else + echo_green "all installations at ${EESSI_SITE_SOFTWARE_PATH}/software/... succeeded!" + fi + + # clean up tmpdir content + rm -rf "${tmpdir}"/* + + # Restore MODULEPATH for next loop iteration + MODULEPATH=${SAVE_MODULEPATH} +done +# Remove the temporary directory +rm -rf "${tmpdir}" diff --git a/scripts/gpu_support/nvidia/install_cuda_host_injections.sh b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh index a9310d817a..3842aff307 100755 --- a/scripts/gpu_support/nvidia/install_cuda_host_injections.sh +++ b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh @@ -123,7 +123,7 @@ else tmpdir=$(mktemp -d) else tmpdir="${CUDA_TEMP_DIR}"/temp - if ! mkdir "$tmpdir" ; then + if ! mkdir -p "$tmpdir" ; then fatal_error "Could not create directory ${tmpdir}" fi fi @@ -175,13 +175,13 @@ else # Check the exit code if [ $? -ne 0 ]; then eb_version=$(eb --version) - available_cuda_easyconfigs=$(eb --search ^CUDA-*.eb|grep CUDA) + available_cuda_easyconfigs=$(eb --search "^CUDA-.*.eb"|grep CUDA) error="The easyconfig ${cuda_easyconfig} was not found in EasyBuild version:\n" error="${error} ${eb_version}\n" error="${error}You either need to give a different version of CUDA to install _or_ \n" error="${error}use a different version of EasyBuild for the installation.\n" - error="${error}\nThe versions of available with the current eb command are:\n" + error="${error}\nThe versions of CUDA available with the current eb command are:\n" error="${error}${available_cuda_easyconfigs}" fatal_error "${error}" fi diff --git a/test_suite.sh b/test_suite.sh index 6e73fbd87c..1f0b91c477 100755 --- a/test_suite.sh +++ b/test_suite.sh @@ -74,11 +74,17 @@ fi TMPDIR=$(mktemp -d) echo ">> Setting up environment..." -module --force purge -export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS) +# For this call to be succesful, it needs to be able to import archspec (which is part of EESSI) +# Thus, we execute it in a subshell where EESSI is already initialized (a bit like a bootstrap) +export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(source $TOPDIR/init/bash > /dev/null 2>&1; python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS) +echo "EESSI_SOFTWARE_SUBDIR_OVERRIDE: $EESSI_SOFTWARE_SUBDIR_OVERRIDE" source $TOPDIR/init/bash +# We have to ignore the LMOD cache, otherwise the software that is built in the build step cannot be found/loaded +# Reason is that the LMOD cache is normally only updated on the Stratum 0, once everything is ingested +export LMOD_IGNORE_CACHE=1 + # Load the ReFrame module # Currently, we load the default version. Maybe we should somehow make this configurable in the future? module load ReFrame @@ -135,41 +141,48 @@ export RFM_PREFIX=$PWD/reframe_runs echo "Configured reframe with the following environment variables:" env | grep "RFM_" -# Inject correct CPU/memory properties into the ReFrame config file -cpuinfo=$(lscpu) -if [[ "${cpuinfo}" =~ CPU\(s\):[^0-9]*([0-9]+) ]]; then - cpu_count=${BASH_REMATCH[1]} -else - fatal_error "Failed to get the number of CPUs for the current test hardware with lscpu." -fi -if [[ "${cpuinfo}" =~ Socket\(s\):[^0-9]*([0-9]+) ]]; then - socket_count=${BASH_REMATCH[1]} -else - fatal_error "Failed to get the number of sockets for the current test hardware with lscpu." -fi -if [[ "${cpuinfo}" =~ (Thread\(s\) per core:[^0-9]*([0-9]+)) ]]; then - threads_per_core=${BASH_REMATCH[2]} -else - fatal_error "Failed to get the number of threads per core for the current test hardware with lscpu." -fi -if [[ "${cpuinfo}" =~ (Core\(s\) per socket:[^0-9]*([0-9]+)) ]]; then - cores_per_socket=${BASH_REMATCH[2]} +# The /sys inside the container is not the same as the /sys of the host +# We want to extract the memory limit from the cgroup on the host (which is typically set by SLURM). +# Thus, bot/test.sh bind-mounts the host's /sys/fs/cgroup into /hostsys/fs/cgroup +# and that's the prefix we use to extract the memory limit from +cgroup_v1_mem_limit="/hostsys/fs/cgroup/memory/$( Tests: {additional_tests}") + elif debug: + print(f"Software: {software_name} -> No tests found") + + # Always add the default set of tests, if default_tests is specified + if 'default_tests' in mappings: + additional_tests = mappings['default_tests'] + for test in additional_tests: + if test not in tests_to_run: + tests_to_run.append(test) + + if additional_tests and debug: + print(f"Adding default set of tests: {additional_tests}") + + # Create argument string out of the list of tests to run + if tests_to_run: + arg_string = " ".join([f"-n {test_name}" for test_name in tests_to_run]) + + # Print final lists & argument string + if debug: + print(f"Full list of tests to run: {tests_to_run}") + print(f"Argument string: {arg_string}") + else: + # This is the only thing this script should print, unless run with --debug + print(f"{arg_string}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Map software names to their tests based on a YAML configuration.") + parser.add_argument('--mapping-file', type=str, help='Path to the YAML file containing the test mappings.') + parser.add_argument('--module-list', type=str, help='Path to the file containing the list of software names.') + defaults_help = "Don't consider the module-list file, only return the default tests from the mapping file" + parser.add_argument('--defaults-only', action='store_true', default=False, help=defaults_help) + parser.add_argument('--debug', action='store_true', default=False, help='Enable debug output.') + + args = parser.parse_args() + + main(args.mapping_file, args.module_list, args.debug, args.defaults_only) diff --git a/tests/eessi_test_mapping/software_to_tests.yml b/tests/eessi_test_mapping/software_to_tests.yml new file mode 100644 index 0000000000..626477781f --- /dev/null +++ b/tests/eessi_test_mapping/software_to_tests.yml @@ -0,0 +1,35 @@ +# This file creates a mapping between (regular expressions for) module names and test names from the EESSI test suite +# If a module name matches one of the regular expressions, the listed set of tests will be run in the test step +# For a given module name, the test list for the first matching regular expression is returned +# E.g. for +# mappings: +# foo-v1: +# - bar +# foo-* +# - bar2 +# only the bar test will be run for foo-v1 (even though it also matches the pattern (foo-*) +# If a module name does not match anything, the default_tests will be run +# Note that to list all available tests by name, one can do execute +# reframe -R -c /path/to/eessi/test-suite/ --list | grep -Po "\bEESSI_\S+?(?=[\s'])" | uniq +# Note that this regular expression is a bit sensitive to changes in the structure of ReFrame's output, +# but is confirmed to work for ReFrame version 4.6.1 +mappings: + PyTorch-Bundle/*: + - EESSI_PyTorch_torchvision + QuantumESPRESSO/*: + - EESSI_QuantumESPRESSO + CP2K/*: + - EESSI_CP2K + ESPResSo/*: + - EESSI_ESPRESSO + LAMMPS/*: + - EESSI_LAMMPS + OSU-Micro-Benchmarks/*: + - EESSI_OSU_Micro_Benchmarks + GROMACS/*: + - EESSI_GROMACS + default_tests: + # Low level tests + - EESSI_OSU_Micro_Benchmarks + # A very quick-to-run high level application test + - EESSI_LAMMPS