diff --git a/.github/workflows/scripts/only_latest_easystacks.sh b/.github/workflows/scripts/only_latest_easystacks.sh new file mode 100755 index 0000000000..acc9d3279a --- /dev/null +++ b/.github/workflows/scripts/only_latest_easystacks.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# +# This script figures out the latest version of EasyBuild being used for the installation of easystack +# files. +# +# This file is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Alan O'Cais (CECAM) +# +# license: GPLv2 +# + +EESSI_VERSION=${EESSI_VERSION:-"2023.06"} + +directory="easystacks/software.eessi.io/${EESSI_VERSION}" +# List of example filenames +files=($(find "$directory" -name "*.yml" | grep -e '-eb-')) +[ -n "$DEBUG" ] && echo "${files[@]}" + +versions=() +# Loop over each filename +for filename in "${files[@]}"; do + # Extract the semantic version using grep + version=$(echo "$filename" | grep -oP '(?<=eb-)\d+\.\d+\.\d+?(?=-)') + + # Output the result + [ -n "$DEBUG" ] && echo "Filename: $filename" + [ -n "$DEBUG" ] && echo "Extracted version: $version" + [ -n "$DEBUG" ] && echo + versions+=("$version") +done +highest_version=$(printf "%s\n" "${versions[@]}" | sort -V | tail -n 1) + +[ -n "$DEBUG" ] && echo "Highest version: $highest_version" +[ -n "$DEBUG" ] && echo +[ -n "$DEBUG" ] && echo "Matching files:" +all_latest_easystacks=($(find $directory -type f -name "*eb-$highest_version*.yml")) + +accel_latest_easystacks=() +cpu_latest_easystacks=() + +# Loop through the array and split based on partial matching of string +accel="/accel/" +for item in "${all_latest_easystacks[@]}"; do + if [[ "$item" == *"$accel"* ]]; then + accel_latest_easystacks+=("$item") + else + cpu_latest_easystacks+=("$item") + fi +done + +# Output the results +if [ -n "$ACCEL_EASYSTACKS" ]; then + echo "${accel_latest_easystacks[@]}" +else + echo "${cpu_latest_easystacks[@]}" +fi diff --git a/.github/workflows/test-software.eessi.io.yml b/.github/workflows/test-software.eessi.io.yml index 6f592cf4c4..ca3792f6ef 100644 --- a/.github/workflows/test-software.eessi.io.yml +++ b/.github/workflows/test-software.eessi.io.yml @@ -57,15 +57,20 @@ jobs: env | grep ^EESSI | sort # first check the CPU-only builds for this CPU target - echo "just run check_missing_installations.sh (should use easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml)" - for easystack_file in $(ls easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do + echo "just run check_missing_installations.sh (should use easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml with latest EasyBuild release)" + for easystack_file in $(EESSI_VERSION=${{matrix.EESSI_VERSION}} .github/workflows/scripts/only_latest_easystacks.sh); do if [ ${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} = "x86_64/amd/zen4" ]; then if grep -q 2022b <<<"${easystack_file}"; then - # skip the check of installed software on zen4 for foss/2022b builds - continue - elif grep -q CUDA <<<"${easystack_file}"; then - # skip the check of install CUDA software in the CPU path for zen4 - continue + # skip the check of installed software on zen4 for foss/2022b builds + continue + fi + if [[ $easystack_file == *"rebuilds"* ]]; then + # Also handle rebuilds, make a temporary EasyStack file where we clean out all 2022b stuff and use that + new_easystack=$(mktemp pruned_easystackXXX --suffix=.yml) + # first clean out the options then clean out the .eb name + sed '/2022b\|12\.2\.0/,/\.eb/{/\.eb/!d}' "${easystack_file}" | sed '/2022b\|12\.2\.0/d' > $new_easystack + diff --unified=0 "$easystack_file" "$new_easystack" || : + easystack_file="$new_easystack" fi fi echo "check missing installations for ${easystack_file}..." @@ -82,7 +87,7 @@ jobs: for accel in ${accelerators}; do module use ${EESSI_SOFTWARE_PATH}/accel/${accel}/modules/all echo "checking missing installations for accelerator ${accel} using modulepath: ${MODULEPATH}" - for easystack_file in $(ls easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/accel/$(dirname ${accel})/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do + for easystack_file in $(EESSI_VERSION=${{matrix.EESSI_VERSION}} ACCEL_EASYSTACKS=1 .github/workflows/scripts/only_latest_easystacks.sh); do echo "check missing installations for ${easystack_file}..." ./check_missing_installations.sh ${easystack_file} ec=$? diff --git a/EESSI-extend-2023.06-easybuild.eb b/EESSI-extend-2023.06-easybuild.eb index 338d19f624..21b62461bf 100644 --- a/EESSI-extend-2023.06-easybuild.eb +++ b/EESSI-extend-2023.06-easybuild.eb @@ -88,6 +88,16 @@ if (os.getenv("EESSI_CVMFS_INSTALL") ~= nil) then end eessi_cvmfs_install = true easybuild_installpath = os.getenv("EESSI_SOFTWARE_PATH") + eessi_accelerator_target = os.getenv("EESSI_ACCELERATOR_TARGET") + if (eessi_accelerator_target ~= nil) then + cuda_compute_capability = string.match(eessi_accelerator_target, "^nvidia/cc([0-9][0-9])$") + if (cuda_compute_capability ~= nil) then + easybuild_installpath = pathJoin(easybuild_installpath, 'accel', eessi_accelerator_target) + easybuild_cuda_compute_capabilities = cuda_compute_capability:sub(1, 1) .. "." .. cuda_compute_capability:sub(2, 2) + else + LmodError("Incorrect value for $EESSI_ACCELERATOR_TARGET: " .. eessi_accelerator_target) + end + end elseif (os.getenv("EESSI_SITE_INSTALL") ~= nil) then -- Make sure no other EESSI install environment variables are set if ((os.getenv("EESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("EESSI_USER_INSTALL") ~= nil)) then @@ -147,6 +157,11 @@ setenv ("EASYBUILD_UMASK", "022") -- Allow this module to be loaded when running EasyBuild setenv ("EASYBUILD_ALLOW_LOADED_MODULES", "EasyBuild,EESSI-extend") +-- Set environment variables if building for CUDA compute capabilities +if (easybuild_cuda_compute_capabilities ~= nil) then + setenv ("EASYBUILD_CUDA_COMPUTE_CAPABILITIES", easybuild_cuda_compute_capabilities) +end + -- Set all related environment variables if we have project or user installations (including extending MODULEPATH) if (user_modulepath ~= nil) then -- Use a more restrictive umask for this case diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 65c43d5ac5..83c06c2184 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -229,22 +229,34 @@ if [[ "${EESSI_CVMFS_REPO}" != /cvmfs/dev.eessi.io ]]; then ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} fi +echo ">> Configuring EasyBuild..." + +# Make sure EESSI-extend is not loaded, and configure location variables for a +# CVMFS installation +module unload EESSI-extend +unset EESSI_USER_INSTALL +unset EESSI_PROJECT_INSTALL +unset EESSI_SITE_INSTALL +export EESSI_CVMFS_INSTALL=1 + +# We now run 'source load_eessi_extend_module.sh' to load or install and load the +# EESSI-extend module which sets up all build environment settings. +# The script requires the EESSI_VERSION given as argument, a couple of +# environment variables set (TMPDIR, EB and EASYBUILD_INSTALLPATH) and the +# function check_exit_code defined. +# NOTE 1, the script exits if those variables/functions are undefined. +# NOTE 2, loading the EESSI-extend module may adjust the value of EASYBUILD_INSTALLPATH, +# e.g., to point to the installation directory for accelerators. +# NOTE 3, we have to set a default for EASYBUILD_INSTALLPATH here in cases the +# EESSI-extend module itself needs to be installed. +export EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE} +source load_eessi_extend_module.sh ${EESSI_VERSION} + # Install full CUDA SDK and cu* libraries in host_injections # Hardcode this for now, see if it works # TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install # Allow skipping CUDA SDK install in e.g. CI environments -# The install_cuda... script uses EasyBuild. So, we need to check if we have EB -# or skip this step. echo "Going to install full CUDA SDK and cu* libraries under host_injections if necessary" -module_avail_out=$TMPDIR/ml.out -module avail 2>&1 | grep EasyBuild &> ${module_avail_out} -if [[ $? -eq 0 ]]; then - echo_green ">> Found an EasyBuild module" -else - echo_yellow ">> No EasyBuild module found: skipping step to install CUDA (see output in ${module_avail_out})" - export skip_cuda_install=True -fi - temp_install_storage=${TMPDIR}/temp_install_storage mkdir -p ${temp_install_storage} if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then @@ -253,7 +265,7 @@ if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then --accept-cuda-eula \ --accept-cudnn-eula else - echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed OR no EasyBuild module was found" + echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed" fi # Install NVIDIA drivers in host_injections (if they exist) @@ -263,18 +275,6 @@ if command_exists "nvidia-smi"; then fi -echo ">> Configuring EasyBuild..." - -# Make sure that we use the EESSI_CVMFS_INSTALL -# Since the path is set when loading EESSI-extend, we reload it to make sure it works - even if it is already loaded -# Note we need to do this after running install_cuda_and_libraries, since that does installations in the EESSI_SITE_INSTALL -unset EESSI_USER_INSTALL -unset EESSI_PROJECT_INSTALL -unset EESSI_SITE_INSTALL -export EESSI_CVMFS_INSTALL=1 -module unload EESSI-extend -module load EESSI-extend/${EESSI_VERSION}-easybuild - if [ ! -z "${shared_fs_path}" ]; then shared_eb_sourcepath=${shared_fs_path}/easybuild/sources echo ">> Using ${shared_eb_sourcepath} as shared EasyBuild source path" diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index 98576efcb0..1a03a7af98 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -129,6 +129,9 @@ if [ $EUID -eq 0 ]; then echo_yellow "Removing ${app_dir} and ${app_module}..." rm -rf ${app_dir} rm -rf ${app_module} + # recreate some directory to work around permission denied + # issues when rebuilding the package + mkdir -p ${app_dir}/easybuild done else fatal_error "Easystack file ${easystack_file} not found!" diff --git a/bot/build.sh b/bot/build.sh index 3fd343e96f..81b3ef4660 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -208,6 +208,7 @@ else declare -a REMOVAL_STEP_ARGS=() REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") + # add fakeroot option in order to be able to remove software, see: # https://github.com/EESSI/software-layer/issues/312 REMOVAL_STEP_ARGS+=("--fakeroot") diff --git a/bot/check-build.sh b/bot/check-build.sh index f185b18dda..41aeab258e 100755 --- a/bot/check-build.sh +++ b/bot/check-build.sh @@ -17,6 +17,7 @@ # - SUCCESS (all of) # - working directory contains slurm-JOBID.out file # - working directory contains eessi*tar.gz +# - no message FATAL # - no message ERROR # - no message FAILED # - no message ' required modules missing:' @@ -25,6 +26,7 @@ # - FAILED (one of ... implemented as NOT SUCCESS) # - no slurm-JOBID.out file # - no tarball +# - message with FATAL # - message with ERROR # - message with FAILED # - message with ' required modules missing:' @@ -105,6 +107,16 @@ else [[ ${VERBOSE} -ne 0 ]] && echo " Slurm output file '"${job_out}"' NOT found" fi +FATAL=-1 +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + GP_fatal='FATAL: ' + grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_fatal}") + [[ $? -eq 0 ]] && FATAL=1 || FATAL=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_fatal}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}" +fi + ERROR=-1 if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then GP_error='ERROR: ' @@ -163,6 +175,7 @@ fi [[ ${VERBOSE} -ne 0 ]] && echo "SUMMARY: ${job_dir}/${job_out}" [[ ${VERBOSE} -ne 0 ]] && echo " : ()" +[[ ${VERBOSE} -ne 0 ]] && echo " FATAL......: $([[ $FATAL -eq 1 ]] && echo 'yes' || echo 'no') (no)" [[ ${VERBOSE} -ne 0 ]] && echo " ERROR......: $([[ $ERROR -eq 1 ]] && echo 'yes' || echo 'no') (no)" [[ ${VERBOSE} -ne 0 ]] && echo " FAILED.....: $([[ $FAILED -eq 1 ]] && echo 'yes' || echo 'no') (no)" [[ ${VERBOSE} -ne 0 ]] && echo " REQ_MISSING: $([[ $MISSING -eq 1 ]] && echo 'yes' || echo 'no') (no)" @@ -190,6 +203,7 @@ job_result_file=_bot_job${SLURM_JOB_ID}.result # Default reason: if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]] && \ + [[ ${FATAL} -eq 0 ]] && \ [[ ${ERROR} -eq 0 ]] && \ [[ ${FAILED} -eq 0 ]] && \ [[ ${MISSING} -eq 0 ]] && \ @@ -223,6 +237,7 @@ fi #
_Details_
#
# :white_check_mark: job output file slurm-4682.out
+# :white_check_mark: no message matching FATAL:
# :white_check_mark: no message matching ERROR:
# :white_check_mark: no message matching FAILED:
# :white_check_mark: no message matching required modules missing:
@@ -264,6 +279,7 @@ fi #
_Details_
#
# :white_check_mark: job output file slurm-4682.out
+# :x: no message matching FATAL:
# :x: no message matching ERROR:
# :white_check_mark: no message matching FAILED:
# :x: no message matching required modules missing:
@@ -381,6 +397,10 @@ success_msg="job output file ${job_out}" failure_msg="no job output file ${job_out}" comment_details_list=${comment_details_list}$(add_detail ${SLURM_OUTPUT_FOUND} 1 "${success_msg}" "${failure_msg}") +success_msg="no message matching ${GP_fatal}" +failure_msg="found message matching ${GP_fatal}" +comment_details_list=${comment_details_list}$(add_detail ${FATAL} 0 "${success_msg}" "${failure_msg}") + success_msg="no message matching ${GP_error}" failure_msg="found message matching ${GP_error}" comment_details_list=${comment_details_list}$(add_detail ${ERROR} 0 "${success_msg}" "${failure_msg}") diff --git a/check_missing_installations.sh b/check_missing_installations.sh index 280de294af..79f6acc733 100755 --- a/check_missing_installations.sh +++ b/check_missing_installations.sh @@ -25,17 +25,6 @@ easystack=$1 LOCAL_TMPDIR=$(mktemp -d) -# Clone the develop branch of EasyBuild and use that to search for easyconfigs - -if [[ -z ${EASYBUILD_ROBOT_PATHS} ]]; then - git clone -b develop https://github.com/easybuilders/easybuild-easyconfigs.git $LOCAL_TMPDIR/easyconfigs - export EASYBUILD_ROBOT_PATHS=$LOCAL_TMPDIR/easyconfigs/easybuild/easyconfigs -fi - -# All PRs used in EESSI are supposed to be merged, so we can strip out all cases of from-pr -tmp_easystack=${LOCAL_TMPDIR}/$(basename ${easystack}) -grep -v from-pr ${easystack} > ${tmp_easystack} - source $TOPDIR/scripts/utils.sh source $TOPDIR/configure_easybuild @@ -45,34 +34,11 @@ ${EB:-eb} --show-config echo ">> Checking for missing installations in ${EASYBUILD_INSTALLPATH}..." eb_missing_out=$LOCAL_TMPDIR/eb_missing.out -${EB:-eb} --easystack ${tmp_easystack} --missing 2>&1 | tee ${eb_missing_out} +${EB:-eb} --easystack ${easystack} --missing 2>&1 | tee ${eb_missing_out} exit_code=${PIPESTATUS[0]} ok_msg="Command 'eb --missing ...' succeeded, analysing output..." fail_msg="Command 'eb --missing ...' failed, check log '${eb_missing_out}'" -if [ "$exit_code" -ne 0 ] && [ ! -z "$pr_exceptions" ]; then - # We might have failed due to unmerged PRs. Try to make exceptions for --from-pr added in this PR - # to software-layer, and see if then it passes. If so, we can report a more specific fail_msg - # Note that if no --from-pr's were used in this PR, $pr_exceptions will be empty and we might as - # well skip this check - unmerged PRs can not be the reason for the non-zero exit code in that scenario - - # Let's use awk so we can allow for exceptions if we are given a PR diff file - awk_command="awk '\!/'from-pr'/ EXCEPTIONS' $easystack" - awk_command=${awk_command/\\/} # Strip out the backslash we needed for ! - eval ${awk_command/EXCEPTIONS/$pr_exceptions} > ${tmp_easystack} - - msg=">> Checking for missing installations in ${EASYBUILD_INSTALLPATH}," - msg="${msg} allowing for --from-pr's that were added in this PR..." - echo ${msg} - eb_missing_out=$LOCAL_TMPDIR/eb_missing_with_from_pr.out - ${EB:-eb} --easystack ${tmp_easystack} --missing 2>&1 | tee ${eb_missing_out} - exit_code_with_from_pr=${PIPESTATUS[0]} - - # If now we succeeded, the reason must be that we originally stripped the --from-pr's - if [ "$exit_code_with_from_pr" -eq 0 ]; then - fail_msg="$fail_msg (are you sure all PRs referenced have been merged in EasyBuild?)" - fi -fi check_exit_code ${exit_code} "${ok_msg}" "${fail_msg}" diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py index 11ca614be5..e959572ab1 100755 --- a/create_lmodsitepackage.py +++ b/create_lmodsitepackage.py @@ -114,7 +114,7 @@ -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse -- to load the CUDA module and print an informative message on how to set up GPU support for EESSI - local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" + local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/site_specific_config/gpu/.\\n" if simpleName == 'CUDA' then -- get the full host_injections path local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') diff --git a/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml index 873c19aa33..7ac4ba6cca 100644 --- a/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml +++ b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml @@ -1,3 +1,7 @@ easyconfigs: - CUDA-12.1.1.eb - cuDNN-8.9.2.26-CUDA-12.1.1.eb + - LightGBM-4.5.0-foss-2023a-CUDA-12.1.1.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21699 + from-commit: e3407bd127d248c08960f6b09c973da0fdecc2c3 diff --git a/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023b-CUDA.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023b-CUDA.yml new file mode 100644 index 0000000000..e6562e68a3 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023b-CUDA.yml @@ -0,0 +1,19 @@ +easyconfigs: + - CUDA-12.4.0.eb: + options: + accept-eula-for: CUDA + - UCX-CUDA-1.15.0-GCCcore-13.2.0-CUDA-12.4.0.eb + - UCC-CUDA-1.2.0-GCCcore-13.2.0-CUDA-12.4.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21565 + from-commit: 46141a3f40e699433fac03af2d3ed81bd5a62da7 + - OSU-Micro-Benchmarks-7.5-gompi-2023b-CUDA-12.4.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21926 + from-commit: de79ec74eb076e1aceda5e21235a73c05ed6764c + - GROMACS-2024.4-foss-2023b-CUDA-12.4.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21549 + from-commit: 12b53341343967ce5a402fe8190a3c85bce7d49b + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3523 + include-easyblocks-from-commit: 90495ed23d26b3d5fd8162bf5d7b4c073a0682fe diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240925-eb-4.9.4-NCCL-2.18.3-in-accel-prefix.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/rebuilds/20240925-eb-4.9.4-NCCL-2.18.3-in-accel-prefix.yml similarity index 100% rename from easystacks/software.eessi.io/2023.06/rebuilds/20240925-eb-4.9.4-NCCL-2.18.3-in-accel-prefix.yml rename to easystacks/software.eessi.io/2023.06/accel/nvidia/rebuilds/20240925-eb-4.9.4-NCCL-2.18.3-in-accel-prefix.yml diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023a.yml index 2fac9b8330..7ea65ba94e 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023a.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023a.yml @@ -17,3 +17,23 @@ easyconfigs: options: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21650 from-commit: 109998f6adcda7efb4174b1e5f73b41ee82d1f13 + - Solids4foam-2.1-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21606 + from-commit: 63562c58acf1be64407192b6862c3bd80253d2e0 + - Cassiopeia-2.0.0-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21657 + from-commit: 7f1f0e60487e7e1fcb5c4e6bc4fbc4f89994e3fd + - LightGBM-4.5.0-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21699 + from-commit: e3407bd127d248c08960f6b09c973da0fdecc2c3 + - OpenFOAM-v2406-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3519 + include-easyblocks-from-commit: e4a3ff1932350d575dffc7597435609fad6dd691 + - Paraver-4.11.4-GCC-12.3.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20230 + from-commit: 91c8df6b4c0810061e9f325427c9c79e961bc4b0 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml index 099df62c08..69b05fb02a 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml @@ -7,3 +7,17 @@ easyconfigs: options: # see https://github.com/easybuilders/easybuild-easyblocks/pull/3496 include-easyblocks-from-commit: 60633b0acfd41a0732992d9e16800dae71a056eb + - Cython-3.0.10-GCCcore-13.2.0.eb + - Mustache-1.3.3-foss-2023b.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21783 + from-commit: 5fa3db9eb36f91cba3fbf351549f8ba2849abc33 + - GDRCopy-2.4-GCCcore-13.2.0.eb + - GROMACS-2024.4-foss-2023b.eb: + options: + # https://github.com/easybuilders/easybuild-easyconfigs/pull/21851 + from-commit: f0fa64b440deaf5fb0a6d26ff1bb3e9f36626c8a + - SlurmViewer-1.0.1-GCCcore-13.2.0.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21899 + from-commit: 0bdeb23c9ea5a3caefd353ecd936919424c1bba4 diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/2024.05.06-eb-4.9.1-CUDA-12.1.1-ship-full-runtime.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240506-eb-4.9.1-CUDA-12.1.1-ship-full-runtime.yml similarity index 100% rename from easystacks/software.eessi.io/2023.06/rebuilds/2024.05.06-eb-4.9.1-CUDA-12.1.1-ship-full-runtime.yml rename to easystacks/software.eessi.io/2023.06/rebuilds/20240506-eb-4.9.1-CUDA-12.1.1-ship-full-runtime.yml diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml new file mode 100644 index 0000000000..e4c658784f --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml @@ -0,0 +1,6 @@ +# 2024.11.12 +# for installations under /cvmfs, if EESSI_ACCELERATOR_TARGET is set, +# EESSI-extend should adjust EASYBUILD_INSTALLPATH and set +# EASYBUILD_CUDA_COMPUTE_CAPABILITIES +easyconfigs: + - EESSI-extend-2023.06-easybuild.eb diff --git a/eb_hooks.py b/eb_hooks.py index 03642656ea..5bf42b6bbf 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -595,7 +595,7 @@ def pre_configure_hook_LAMMPS_zen4(self, *args, **kwargs): cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') if self.name == 'LAMMPS': - if self.version in ('2Aug2023_update2', '29Aug2024'): + if self.version in ('2Aug2023_update2', '2Aug2023_update4', '29Aug2024'): if get_cpu_architecture() == X86_64: if cpu_target == CPU_TARGET_ZEN4: # There is no support for ZEN4 in LAMMPS yet so falling back to ZEN3 @@ -786,6 +786,11 @@ def post_postproc_cuda(self, *args, **kwargs): for word in line.split(): if any(ext in word for ext in file_extensions): allowlist.append(os.path.splitext(word)[0]) + # The EULA of CUDA 12.4 introduced a typo (confirmed by NVIDIA): + # libnvrtx-builtins_static.so should be libnvrtc-builtins_static.so + if 'libnvrtx-builtins_static' in allowlist: + allowlist.remove('libnvrtx-builtins_static') + allowlist.append('libnvrtc-builtins_static') allowlist = sorted(set(allowlist)) self.log.info("Allowlist for files in CUDA installation that can be redistributed: " + ', '.join(allowlist)) diff --git a/install_scripts.sh b/install_scripts.sh index 8787888c79..c5a9a556c2 100755 --- a/install_scripts.sh +++ b/install_scripts.sh @@ -132,6 +132,7 @@ copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/script # Easystacks to be used to install software in host injections host_injections_easystacks=( eessi-2023.06-eb-4.9.4-2023a-CUDA-host-injections.yml + eessi-2023.06-eb-4.9.4-2023b-CUDA-host-injections.yml ) copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia/easystacks \ ${INSTALL_PREFIX}/scripts/gpu_support/nvidia/easystacks "${host_injections_easystacks[@]}" diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh new file mode 100755 index 0000000000..62b6e3f3ae --- /dev/null +++ b/load_eessi_extend_module.sh @@ -0,0 +1,116 @@ +# Script to load the environment module for EESSI-extend. +# If that module is not available yet, a specific version will be installed using the latest EasyBuild. +# +# This script must be sourced, since it makes changes in the current environment, like loading an EESSI-extend module. +# +# Assumptions (if one is not satisfied the script prints a message and exits) +# - EESSI version is given as first argument +# - TMPDIR is set +# - EB is set +# - EASYBUILD_INSTALLPATH needs to be set +# - Function check_exit_code is defined; +# scripts/utils.sh in EESSI/software-layer repository defines this function, hence +# scripts/utils.sh shall be sourced before this script is run +# +# This script is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Kenneth Hoste (@boegel, HPC-UGent) +# author: Alan O'Cais (@ocaisa, CECAM) +# author: Thomas Roeblitz (@trz42, University of Bergen) +# +# license: GPLv2 +# +# +set -o pipefail + +# this script is *sourced*, not executed, so can't rely on $0 to determine path to self or script name +# $BASH_SOURCE points to correct path or script name, see also http://mywiki.wooledge.org/BashFAQ/028 +if [ $# -ne 1 ]; then + echo "Usage: source ${BASH_SOURCE} " >&2 + exit 1 +fi + +EESSI_EXTEND_VERSION="${1}-easybuild" + +# make sure that environment variables that we expect to be set are indeed set +if [ -z "${TMPDIR}" ]; then + echo "\$TMPDIR is not set; exiting" >&2 + exit 2 +fi + +# ${EB} is used to specify which 'eb' command should be used; +# can potentially be more than just 'eb', for example when using 'eb --optarch=GENERIC' +if [ -z "${EB}" ]; then + echo "\$EB is not set; exiting" >&2 + exit 2 +fi + +# ${EASYBUILD_INSTALLPATH} points to the installation path and needs to be set +if [ -z "${EASYBUILD_INSTALLPATH}" ]; then + echo "\$EASYBUILD_INSTALLPATH is not set; exiting" >&2 + exit 2 +fi + +# make sure that utility functions are defined (cfr. scripts/utils.sh script in EESSI/software-layer repo) +type check_exit_code +if [ $? -ne 0 ]; then + echo "check_exit_code function is not defined; exiting" >&2 + exit 3 +fi + +echo ">> Checking for EESSI-extend module..." + +ml_av_eessi_extend_out=${TMPDIR}/ml_av_eessi_extend.out +# need to use --ignore_cache to avoid the case that the module was removed (to be +# rebuilt) but it is still in the cache +module --ignore_cache avail 2>&1 | grep -i EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out} + +if [[ $? -eq 0 ]]; then + echo_green ">> Module for EESSI-extend/${EESSI_EXTEND_VERSION} found!" +else + echo_yellow ">> No module yet for EESSI-extend/${EESSI_EXTEND_VERSION}, installing it..." + + EB_TMPDIR=${TMPDIR}/ebtmp + echo ">> Using temporary installation of EasyBuild (in ${EB_TMPDIR})..." + pip_install_out=${TMPDIR}/pip_install.out + pip3 install --prefix ${EB_TMPDIR} easybuild &> ${pip_install_out} + + # keep track of original $PATH and $PYTHONPATH values, so we can restore them + ORIG_PATH=${PATH} + ORIG_PYTHONPATH=${PYTHONPATH} + + # source configure_easybuild to use correct eb settings + ( + export EASYBUILD_PREFIX=${TMPDIR}/easybuild + export EASYBUILD_READ_ONLY_INSTALLDIR=1 + + echo ">> Final installation in ${EASYBUILD_INSTALLPATH}..." + export PATH=${EB_TMPDIR}/bin:${PATH} + export PYTHONPATH=$(ls -d ${EB_TMPDIR}/lib/python*/site-packages):${PYTHONPATH} + eb_install_out=${TMPDIR}/eb_install.out + ok_msg="EESSI-extend/${EESSI_EXTEND_VERSION} installed, let's go!" + fail_msg="Installing EESSI-extend/${EESSI_EXTEND_VERSION} failed, that's not good... (output: ${eb_install_out})" + # while always adding --try-amend=keep... may do no harm, we could make + # an attempt to figure out if it is needed, e.g., when we are rebuilding + ${EB} "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" --try-amend=keeppreviousinstall=True 2>&1 | tee ${eb_install_out} + check_exit_code $? "${ok_msg}" "${fail_msg}" + ) + + # restore origin $PATH and $PYTHONPATH values, and clean up environment variables that are no longer needed + export PATH=${ORIG_PATH} + export PYTHONPATH=${ORIG_PYTHONPATH} + unset EB_TMPDIR ORIG_PATH ORIG_PYTHONPATH + + module --ignore_cache avail EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out} + if [[ $? -eq 0 ]]; then + echo_green ">> EESSI-extend/${EESSI_EXTEND_VERSION} module installed!" + else + fatal_error "EESSI-extend/${EESSI_EXTEND_VERSION} module failed to install?! (output of 'pip install' in ${pip_install_out}, output of 'eb' in ${eb_install_out}, output of 'module avail EESSI-extend' in ${ml_av_eessi_extend_out})" + fi +fi + +echo ">> Loading EESSI-extend/${EESSI_EXTEND_VERSION} module..." +module --ignore_cache load EESSI-extend/${EESSI_EXTEND_VERSION} + +unset EESSI_EXTEND_VERSION diff --git a/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023b-CUDA-host-injections.yml b/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023b-CUDA-host-injections.yml new file mode 100644 index 0000000000..5cfec813f6 --- /dev/null +++ b/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023b-CUDA-host-injections.yml @@ -0,0 +1,4 @@ +# This EasyStack provides a list of all the EasyConfigs that should be installed in host_injections +# for nvidia GPU support, because they cannot (fully) be shipped as part of EESSI due to license constraints +easyconfigs: + - CUDA-12.4.0.eb diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh index 69b11e26da..741ead0559 100755 --- a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -77,10 +77,6 @@ done # Make sure EESSI is initialised check_eessi_initialised -# Make sure that `EESSI-extend` will install in the site installation path EESSI_SITE_SOFTWARE_PATH -export EESSI_SITE_INSTALL=1 -echo "EESSI_SITE_SOFTWARE_PATH=${EESSI_SITE_SOFTWARE_PATH}" - # we need a directory we can use for temporary storage if [[ -z "${TEMP_DIR}" ]]; then tmpdir=$(mktemp -d) @@ -93,22 +89,26 @@ else fi echo "Created temporary directory '${tmpdir}'" -# use EESSI_SITE_SOFTWARE_PATH/.modules/all as MODULEPATH +# Store MODULEPATH so it can be restored at the end of each loop iteration SAVE_MODULEPATH=${MODULEPATH} for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do echo -e "Processing easystack file ${easystack_file}...\n\n" - # We don't want hooks used in this install, we need vanilla installations - touch "${tmpdir}"/none.py - export EASYBUILD_HOOKS="${tmpdir}/none.py" - # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file eb_version=$(echo ${EASYSTACK_FILE} | sed 's/.*eb-\([0-9.]*\).*/\1/g') # Load EasyBuild version for this easystack file _before_ loading EESSI-extend - module avail EasyBuild + module_avail_out=${tmpdir}/ml.out + module avail 2>&1 | grep EasyBuild/${eb_version} &> ${module_avail_out} + if [[ $? -eq 0 ]]; then + echo_green ">> Found an EasyBuild/${eb_version} module" + else + echo_yellow ">> No EasyBuild/${eb_version} module found: skipping step to install easystack file ${easystack_file} (see output in ${module_avail_out})" + continue + fi module load EasyBuild/${eb_version} + # Make sure EESSI-extend does a site install here # We need to reload it with the current environment variables set unset EESSI_CVMFS_INSTALL @@ -116,7 +116,19 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do unset EESSI_USER_INSTALL export EESSI_SITE_INSTALL=1 module unload EESSI-extend - module load EESSI-extend/${EESSI_VERSION}-easybuild + ml_av_eessi_extend_out=${tmpdir}/ml_av_eessi_extend.out + # need to use --ignore_cache to avoid the case that the module was removed (to be + # rebuilt) but it is still in the cache and the rebuild failed + EESSI_EXTEND_VERSION=${EESSI_VERSION}-easybuild + module --ignore_cache avail 2>&1 | grep -i EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out} + if [[ $? -eq 0 ]]; then + echo_green ">> Module for EESSI-extend/${EESSI_EXTEND_VERSION} found!" + else + error="\nNo module for EESSI-extend/${EESSI_EXTEND_VERSION} found\nwhile EESSI has been initialised to use software under ${EESSI_SOFTWARE_PATH}\n" + fatal_error "${error}" + fi + module --ignore_cache load EESSI-extend/${EESSI_EXTEND_VERSION} + unset EESSI_EXTEND_VERSION # Install modules in hidden .modules dir to keep track of what was installed before # (this action is temporary, and we do not call Lmod again within the current shell context, but in EasyBuild @@ -124,6 +136,10 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do MODULEPATH=${EESSI_SITE_SOFTWARE_PATH}/.modules/all echo "set MODULEPATH=${MODULEPATH}" + # We don't want hooks used in this install, we need vanilla installations + touch "${tmpdir}"/none.py + export EASYBUILD_HOOKS="${tmpdir}/none.py" + # show EasyBuild configuration echo "Show EasyBuild configuration" eb --show-config @@ -239,9 +255,11 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do echo_green "all installations at ${EESSI_SITE_SOFTWARE_PATH}/software/... succeeded!" fi - # clean up tmpdir - rm -rf "${tmpdir}" + # clean up tmpdir content + rm -rf "${tmpdir}"/* # Restore MODULEPATH for next loop iteration MODULEPATH=${SAVE_MODULEPATH} done +# Remove the temporary directory +rm -rf "${tmpdir}"