Skip to content

Commit

Permalink
Merge branch '2023.06-software.eessi.io' of github.com:EESSI/software…
Browse files Browse the repository at this point in the history
…-layer into riscv
  • Loading branch information
bedroge committed Dec 3, 2024
2 parents 17daa4c + 9052eb2 commit 74dcb6a
Show file tree
Hide file tree
Showing 21 changed files with 357 additions and 82 deletions.
58 changes: 58 additions & 0 deletions .github/workflows/scripts/only_latest_easystacks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/bash
#
# This script figures out the latest version of EasyBuild being used for the installation of easystack
# files.
#
# This file is part of the EESSI software layer, see
# https://github.com/EESSI/software-layer.git
#
# author: Alan O'Cais (CECAM)
#
# license: GPLv2
#

EESSI_VERSION=${EESSI_VERSION:-"2023.06"}

directory="easystacks/software.eessi.io/${EESSI_VERSION}"
# List of example filenames
files=($(find "$directory" -name "*.yml" | grep -e '-eb-'))
[ -n "$DEBUG" ] && echo "${files[@]}"

versions=()
# Loop over each filename
for filename in "${files[@]}"; do
# Extract the semantic version using grep
version=$(echo "$filename" | grep -oP '(?<=eb-)\d+\.\d+\.\d+?(?=-)')

# Output the result
[ -n "$DEBUG" ] && echo "Filename: $filename"
[ -n "$DEBUG" ] && echo "Extracted version: $version"
[ -n "$DEBUG" ] && echo
versions+=("$version")
done
highest_version=$(printf "%s\n" "${versions[@]}" | sort -V | tail -n 1)

[ -n "$DEBUG" ] && echo "Highest version: $highest_version"
[ -n "$DEBUG" ] && echo
[ -n "$DEBUG" ] && echo "Matching files:"
all_latest_easystacks=($(find $directory -type f -name "*eb-$highest_version*.yml"))

accel_latest_easystacks=()
cpu_latest_easystacks=()

# Loop through the array and split based on partial matching of string
accel="/accel/"
for item in "${all_latest_easystacks[@]}"; do
if [[ "$item" == *"$accel"* ]]; then
accel_latest_easystacks+=("$item")
else
cpu_latest_easystacks+=("$item")
fi
done

# Output the results
if [ -n "$ACCEL_EASYSTACKS" ]; then
echo "${accel_latest_easystacks[@]}"
else
echo "${cpu_latest_easystacks[@]}"
fi
21 changes: 13 additions & 8 deletions .github/workflows/test-software.eessi.io.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,20 @@ jobs:
env | grep ^EESSI | sort
# first check the CPU-only builds for this CPU target
echo "just run check_missing_installations.sh (should use easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml)"
for easystack_file in $(ls easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do
echo "just run check_missing_installations.sh (should use easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml with latest EasyBuild release)"
for easystack_file in $(EESSI_VERSION=${{matrix.EESSI_VERSION}} .github/workflows/scripts/only_latest_easystacks.sh); do
if [ ${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} = "x86_64/amd/zen4" ]; then
if grep -q 2022b <<<"${easystack_file}"; then
# skip the check of installed software on zen4 for foss/2022b builds
continue
elif grep -q CUDA <<<"${easystack_file}"; then
# skip the check of install CUDA software in the CPU path for zen4
continue
# skip the check of installed software on zen4 for foss/2022b builds
continue
fi
if [[ $easystack_file == *"rebuilds"* ]]; then
# Also handle rebuilds, make a temporary EasyStack file where we clean out all 2022b stuff and use that
new_easystack=$(mktemp pruned_easystackXXX --suffix=.yml)
# first clean out the options then clean out the .eb name
sed '/2022b\|12\.2\.0/,/\.eb/{/\.eb/!d}' "${easystack_file}" | sed '/2022b\|12\.2\.0/d' > $new_easystack
diff --unified=0 "$easystack_file" "$new_easystack" || :
easystack_file="$new_easystack"
fi
fi
echo "check missing installations for ${easystack_file}..."
Expand All @@ -82,7 +87,7 @@ jobs:
for accel in ${accelerators}; do
module use ${EESSI_SOFTWARE_PATH}/accel/${accel}/modules/all
echo "checking missing installations for accelerator ${accel} using modulepath: ${MODULEPATH}"
for easystack_file in $(ls easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/accel/$(dirname ${accel})/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do
for easystack_file in $(EESSI_VERSION=${{matrix.EESSI_VERSION}} ACCEL_EASYSTACKS=1 .github/workflows/scripts/only_latest_easystacks.sh); do
echo "check missing installations for ${easystack_file}..."
./check_missing_installations.sh ${easystack_file}
ec=$?
Expand Down
15 changes: 15 additions & 0 deletions EESSI-extend-2023.06-easybuild.eb
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ if (os.getenv("EESSI_CVMFS_INSTALL") ~= nil) then
end
eessi_cvmfs_install = true
easybuild_installpath = os.getenv("EESSI_SOFTWARE_PATH")
eessi_accelerator_target = os.getenv("EESSI_ACCELERATOR_TARGET")
if (eessi_accelerator_target ~= nil) then
cuda_compute_capability = string.match(eessi_accelerator_target, "^nvidia/cc([0-9][0-9])$")
if (cuda_compute_capability ~= nil) then
easybuild_installpath = pathJoin(easybuild_installpath, 'accel', eessi_accelerator_target)
easybuild_cuda_compute_capabilities = cuda_compute_capability:sub(1, 1) .. "." .. cuda_compute_capability:sub(2, 2)
else
LmodError("Incorrect value for $EESSI_ACCELERATOR_TARGET: " .. eessi_accelerator_target)
end
end
elseif (os.getenv("EESSI_SITE_INSTALL") ~= nil) then
-- Make sure no other EESSI install environment variables are set
if ((os.getenv("EESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("EESSI_USER_INSTALL") ~= nil)) then
Expand Down Expand Up @@ -147,6 +157,11 @@ setenv ("EASYBUILD_UMASK", "022")
-- Allow this module to be loaded when running EasyBuild
setenv ("EASYBUILD_ALLOW_LOADED_MODULES", "EasyBuild,EESSI-extend")
-- Set environment variables if building for CUDA compute capabilities
if (easybuild_cuda_compute_capabilities ~= nil) then
setenv ("EASYBUILD_CUDA_COMPUTE_CAPABILITIES", easybuild_cuda_compute_capabilities)
end
-- Set all related environment variables if we have project or user installations (including extending MODULEPATH)
if (user_modulepath ~= nil) then
-- Use a more restrictive umask for this case
Expand Down
48 changes: 24 additions & 24 deletions EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -229,22 +229,34 @@ if [[ "${EESSI_CVMFS_REPO}" != /cvmfs/dev.eessi.io ]]; then
${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}
fi

echo ">> Configuring EasyBuild..."

# Make sure EESSI-extend is not loaded, and configure location variables for a
# CVMFS installation
module unload EESSI-extend
unset EESSI_USER_INSTALL
unset EESSI_PROJECT_INSTALL
unset EESSI_SITE_INSTALL
export EESSI_CVMFS_INSTALL=1

# We now run 'source load_eessi_extend_module.sh' to load or install and load the
# EESSI-extend module which sets up all build environment settings.
# The script requires the EESSI_VERSION given as argument, a couple of
# environment variables set (TMPDIR, EB and EASYBUILD_INSTALLPATH) and the
# function check_exit_code defined.
# NOTE 1, the script exits if those variables/functions are undefined.
# NOTE 2, loading the EESSI-extend module may adjust the value of EASYBUILD_INSTALLPATH,
# e.g., to point to the installation directory for accelerators.
# NOTE 3, we have to set a default for EASYBUILD_INSTALLPATH here in cases the
# EESSI-extend module itself needs to be installed.
export EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
source load_eessi_extend_module.sh ${EESSI_VERSION}

# Install full CUDA SDK and cu* libraries in host_injections
# Hardcode this for now, see if it works
# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
# Allow skipping CUDA SDK install in e.g. CI environments
# The install_cuda... script uses EasyBuild. So, we need to check if we have EB
# or skip this step.
echo "Going to install full CUDA SDK and cu* libraries under host_injections if necessary"
module_avail_out=$TMPDIR/ml.out
module avail 2>&1 | grep EasyBuild &> ${module_avail_out}
if [[ $? -eq 0 ]]; then
echo_green ">> Found an EasyBuild module"
else
echo_yellow ">> No EasyBuild module found: skipping step to install CUDA (see output in ${module_avail_out})"
export skip_cuda_install=True
fi

temp_install_storage=${TMPDIR}/temp_install_storage
mkdir -p ${temp_install_storage}
if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then
Expand All @@ -253,7 +265,7 @@ if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then
--accept-cuda-eula \
--accept-cudnn-eula
else
echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed OR no EasyBuild module was found"
echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed"
fi

# Install NVIDIA drivers in host_injections (if they exist)
Expand All @@ -263,18 +275,6 @@ if command_exists "nvidia-smi"; then
fi


echo ">> Configuring EasyBuild..."

# Make sure that we use the EESSI_CVMFS_INSTALL
# Since the path is set when loading EESSI-extend, we reload it to make sure it works - even if it is already loaded
# Note we need to do this after running install_cuda_and_libraries, since that does installations in the EESSI_SITE_INSTALL
unset EESSI_USER_INSTALL
unset EESSI_PROJECT_INSTALL
unset EESSI_SITE_INSTALL
export EESSI_CVMFS_INSTALL=1
module unload EESSI-extend
module load EESSI-extend/${EESSI_VERSION}-easybuild

if [ ! -z "${shared_fs_path}" ]; then
shared_eb_sourcepath=${shared_fs_path}/easybuild/sources
echo ">> Using ${shared_eb_sourcepath} as shared EasyBuild source path"
Expand Down
3 changes: 3 additions & 0 deletions EESSI-remove-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ if [ $EUID -eq 0 ]; then
echo_yellow "Removing ${app_dir} and ${app_module}..."
rm -rf ${app_dir}
rm -rf ${app_module}
# recreate some directory to work around permission denied
# issues when rebuilding the package
mkdir -p ${app_dir}/easybuild
done
else
fatal_error "Easystack file ${easystack_file} not found!"
Expand Down
1 change: 1 addition & 0 deletions bot/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ else
declare -a REMOVAL_STEP_ARGS=()
REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}")
REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}")

# add fakeroot option in order to be able to remove software, see:
# https://github.com/EESSI/software-layer/issues/312
REMOVAL_STEP_ARGS+=("--fakeroot")
Expand Down
20 changes: 20 additions & 0 deletions bot/check-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# - SUCCESS (all of)
# - working directory contains slurm-JOBID.out file
# - working directory contains eessi*tar.gz
# - no message FATAL
# - no message ERROR
# - no message FAILED
# - no message ' required modules missing:'
Expand All @@ -25,6 +26,7 @@
# - FAILED (one of ... implemented as NOT SUCCESS)
# - no slurm-JOBID.out file
# - no tarball
# - message with FATAL
# - message with ERROR
# - message with FAILED
# - message with ' required modules missing:'
Expand Down Expand Up @@ -105,6 +107,16 @@ else
[[ ${VERBOSE} -ne 0 ]] && echo " Slurm output file '"${job_out}"' NOT found"
fi

FATAL=-1
if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then
GP_fatal='FATAL: '
grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_fatal}")
[[ $? -eq 0 ]] && FATAL=1 || FATAL=0
# have to be careful to not add searched for pattern into slurm out file
[[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_fatal}"'"
[[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}"
fi

ERROR=-1
if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then
GP_error='ERROR: '
Expand Down Expand Up @@ -163,6 +175,7 @@ fi

[[ ${VERBOSE} -ne 0 ]] && echo "SUMMARY: ${job_dir}/${job_out}"
[[ ${VERBOSE} -ne 0 ]] && echo " <test name>: <actual result> (<expected result>)"
[[ ${VERBOSE} -ne 0 ]] && echo " FATAL......: $([[ $FATAL -eq 1 ]] && echo 'yes' || echo 'no') (no)"
[[ ${VERBOSE} -ne 0 ]] && echo " ERROR......: $([[ $ERROR -eq 1 ]] && echo 'yes' || echo 'no') (no)"
[[ ${VERBOSE} -ne 0 ]] && echo " FAILED.....: $([[ $FAILED -eq 1 ]] && echo 'yes' || echo 'no') (no)"
[[ ${VERBOSE} -ne 0 ]] && echo " REQ_MISSING: $([[ $MISSING -eq 1 ]] && echo 'yes' || echo 'no') (no)"
Expand Down Expand Up @@ -190,6 +203,7 @@ job_result_file=_bot_job${SLURM_JOB_ID}.result

# Default reason:
if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]] && \
[[ ${FATAL} -eq 0 ]] && \
[[ ${ERROR} -eq 0 ]] && \
[[ ${FAILED} -eq 0 ]] && \
[[ ${MISSING} -eq 0 ]] && \
Expand Down Expand Up @@ -223,6 +237,7 @@ fi
# <dt>_Details_</dt>
# <dd>
# :white_check_mark: job output file <code>slurm-4682.out</code><br/>
# :white_check_mark: no message matching <code>FATAL: </code><br/>
# :white_check_mark: no message matching <code>ERROR: </code><br/>
# :white_check_mark: no message matching <code>FAILED: </code><br/>
# :white_check_mark: no message matching <code> required modules missing:</code><br/>
Expand Down Expand Up @@ -264,6 +279,7 @@ fi
# <dt>_Details_</dt>
# <dd>
# :white_check_mark: job output file <code>slurm-4682.out</code><br/>
# :x: no message matching <code>FATAL: </code><br/>
# :x: no message matching <code>ERROR: </code><br/>
# :white_check_mark: no message matching <code>FAILED: </code><br/>
# :x: no message matching <code> required modules missing:</code><br/>
Expand Down Expand Up @@ -381,6 +397,10 @@ success_msg="job output file <code>${job_out}</code>"
failure_msg="no job output file <code>${job_out}</code>"
comment_details_list=${comment_details_list}$(add_detail ${SLURM_OUTPUT_FOUND} 1 "${success_msg}" "${failure_msg}")

success_msg="no message matching <code>${GP_fatal}</code>"
failure_msg="found message matching <code>${GP_fatal}</code>"
comment_details_list=${comment_details_list}$(add_detail ${FATAL} 0 "${success_msg}" "${failure_msg}")

success_msg="no message matching <code>${GP_error}</code>"
failure_msg="found message matching <code>${GP_error}</code>"
comment_details_list=${comment_details_list}$(add_detail ${ERROR} 0 "${success_msg}" "${failure_msg}")
Expand Down
36 changes: 1 addition & 35 deletions check_missing_installations.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,6 @@ easystack=$1

LOCAL_TMPDIR=$(mktemp -d)

# Clone the develop branch of EasyBuild and use that to search for easyconfigs

if [[ -z ${EASYBUILD_ROBOT_PATHS} ]]; then
git clone -b develop https://github.com/easybuilders/easybuild-easyconfigs.git $LOCAL_TMPDIR/easyconfigs
export EASYBUILD_ROBOT_PATHS=$LOCAL_TMPDIR/easyconfigs/easybuild/easyconfigs
fi

# All PRs used in EESSI are supposed to be merged, so we can strip out all cases of from-pr
tmp_easystack=${LOCAL_TMPDIR}/$(basename ${easystack})
grep -v from-pr ${easystack} > ${tmp_easystack}

source $TOPDIR/scripts/utils.sh

source $TOPDIR/configure_easybuild
Expand All @@ -45,34 +34,11 @@ ${EB:-eb} --show-config

echo ">> Checking for missing installations in ${EASYBUILD_INSTALLPATH}..."
eb_missing_out=$LOCAL_TMPDIR/eb_missing.out
${EB:-eb} --easystack ${tmp_easystack} --missing 2>&1 | tee ${eb_missing_out}
${EB:-eb} --easystack ${easystack} --missing 2>&1 | tee ${eb_missing_out}
exit_code=${PIPESTATUS[0]}

ok_msg="Command 'eb --missing ...' succeeded, analysing output..."
fail_msg="Command 'eb --missing ...' failed, check log '${eb_missing_out}'"
if [ "$exit_code" -ne 0 ] && [ ! -z "$pr_exceptions" ]; then
# We might have failed due to unmerged PRs. Try to make exceptions for --from-pr added in this PR
# to software-layer, and see if then it passes. If so, we can report a more specific fail_msg
# Note that if no --from-pr's were used in this PR, $pr_exceptions will be empty and we might as
# well skip this check - unmerged PRs can not be the reason for the non-zero exit code in that scenario

# Let's use awk so we can allow for exceptions if we are given a PR diff file
awk_command="awk '\!/'from-pr'/ EXCEPTIONS' $easystack"
awk_command=${awk_command/\\/} # Strip out the backslash we needed for !
eval ${awk_command/EXCEPTIONS/$pr_exceptions} > ${tmp_easystack}

msg=">> Checking for missing installations in ${EASYBUILD_INSTALLPATH},"
msg="${msg} allowing for --from-pr's that were added in this PR..."
echo ${msg}
eb_missing_out=$LOCAL_TMPDIR/eb_missing_with_from_pr.out
${EB:-eb} --easystack ${tmp_easystack} --missing 2>&1 | tee ${eb_missing_out}
exit_code_with_from_pr=${PIPESTATUS[0]}

# If now we succeeded, the reason must be that we originally stripped the --from-pr's
if [ "$exit_code_with_from_pr" -eq 0 ]; then
fail_msg="$fail_msg (are you sure all PRs referenced have been merged in EasyBuild?)"
fi
fi

check_exit_code ${exit_code} "${ok_msg}" "${fail_msg}"

Expand Down
2 changes: 1 addition & 1 deletion create_lmodsitepackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
-- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections.
-- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse
-- to load the CUDA module and print an informative message on how to set up GPU support for EESSI
local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n"
local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/site_specific_config/gpu/.\\n"
if simpleName == 'CUDA' then
-- get the full host_injections path
local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
easyconfigs:
- CUDA-12.1.1.eb
- cuDNN-8.9.2.26-CUDA-12.1.1.eb
- LightGBM-4.5.0-foss-2023a-CUDA-12.1.1.eb:
options:
# see https://github.com/easybuilders/easybuild-easyconfigs/pull/21699
from-commit: e3407bd127d248c08960f6b09c973da0fdecc2c3
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
easyconfigs:
- CUDA-12.4.0.eb:
options:
accept-eula-for: CUDA
- UCX-CUDA-1.15.0-GCCcore-13.2.0-CUDA-12.4.0.eb
- UCC-CUDA-1.2.0-GCCcore-13.2.0-CUDA-12.4.0.eb:
options:
# see https://github.com/easybuilders/easybuild-easyconfigs/pull/21565
from-commit: 46141a3f40e699433fac03af2d3ed81bd5a62da7
- OSU-Micro-Benchmarks-7.5-gompi-2023b-CUDA-12.4.0.eb:
options:
# see https://github.com/easybuilders/easybuild-easyconfigs/pull/21926
from-commit: de79ec74eb076e1aceda5e21235a73c05ed6764c
- GROMACS-2024.4-foss-2023b-CUDA-12.4.0.eb:
options:
# see https://github.com/easybuilders/easybuild-easyconfigs/pull/21549
from-commit: 12b53341343967ce5a402fe8190a3c85bce7d49b
# see https://github.com/easybuilders/easybuild-easyblocks/pull/3523
include-easyblocks-from-commit: 90495ed23d26b3d5fd8162bf5d7b4c073a0682fe
Loading

0 comments on commit 74dcb6a

Please sign in to comment.