Skip to content

Commit

Permalink
Merge pull request #810 from trz42/2023.06-software.eessi.io-use-acce…
Browse files Browse the repository at this point in the history
…l-target-with-eessi-extend

{2023.06} Rebuild `EESSI-extend` module to use `$EESSI_ACCELERATOR_TARGET`
  • Loading branch information
ocaisa authored Nov 21, 2024
2 parents 2626840 + 8e87c33 commit db16c37
Show file tree
Hide file tree
Showing 7 changed files with 188 additions and 31 deletions.
15 changes: 15 additions & 0 deletions EESSI-extend-2023.06-easybuild.eb
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,16 @@ if (os.getenv("EESSI_CVMFS_INSTALL") ~= nil) then
end
eessi_cvmfs_install = true
easybuild_installpath = os.getenv("EESSI_SOFTWARE_PATH")
eessi_accelerator_target = os.getenv("EESSI_ACCELERATOR_TARGET")
if (eessi_accelerator_target ~= nil) then
cuda_compute_capability = string.match(eessi_accelerator_target, "^nvidia/cc([0-9][0-9])$")
if (cuda_compute_capability ~= nil) then
easybuild_installpath = pathJoin(easybuild_installpath, 'accel', eessi_accelerator_target)
easybuild_cuda_compute_capabilities = cuda_compute_capability:sub(1, 1) .. "." .. cuda_compute_capability:sub(2, 2)
else
LmodError("Incorrect value for $EESSI_ACCELERATOR_TARGET: " .. eessi_accelerator_target)
end
end
elseif (os.getenv("EESSI_SITE_INSTALL") ~= nil) then
-- Make sure no other EESSI install environment variables are set
if ((os.getenv("EESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("EESSI_USER_INSTALL") ~= nil)) then
Expand Down Expand Up @@ -146,6 +156,11 @@ setenv ("EASYBUILD_UMASK", "022")
-- Allow this module to be loaded when running EasyBuild
setenv ("EASYBUILD_ALLOW_LOADED_MODULES", "EasyBuild,EESSI-extend")
-- Set environment variables if building for CUDA compute capabilities
if (easybuild_cuda_compute_capabilities ~= nil) then
setenv ("EASYBUILD_CUDA_COMPUTE_CAPABILITIES", easybuild_cuda_compute_capabilities)
end
-- Set all related environment variables if we have project or user installations (including extending MODULEPATH)
if (user_modulepath ~= nil) then
-- Use a more restrictive umask for this case
Expand Down
48 changes: 24 additions & 24 deletions EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -229,22 +229,34 @@ if [[ "${EESSI_CVMFS_REPO}" != /cvmfs/dev.eessi.io ]]; then
${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}
fi

echo ">> Configuring EasyBuild..."

# Make sure EESSI-extend is not loaded, and configure location variables for a
# CVMFS installation
module unload EESSI-extend
unset EESSI_USER_INSTALL
unset EESSI_PROJECT_INSTALL
unset EESSI_SITE_INSTALL
export EESSI_CVMFS_INSTALL=1

# We now run 'source load_eessi_extend_module.sh' to load or install and load the
# EESSI-extend module which sets up all build environment settings.
# The script requires the EESSI_VERSION given as argument, a couple of
# environment variables set (TMPDIR, EB and EASYBUILD_INSTALLPATH) and the
# function check_exit_code defined.
# NOTE 1, the script exits if those variables/functions are undefined.
# NOTE 2, loading the EESSI-extend module may adjust the value of EASYBUILD_INSTALLPATH,
# e.g., to point to the installation directory for accelerators.
# NOTE 3, we have to set a default for EASYBUILD_INSTALLPATH here in cases the
# EESSI-extend module itself needs to be installed.
export EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
source load_eessi_extend_module.sh ${EESSI_VERSION}

# Install full CUDA SDK and cu* libraries in host_injections
# Hardcode this for now, see if it works
# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
# Allow skipping CUDA SDK install in e.g. CI environments
# The install_cuda... script uses EasyBuild. So, we need to check if we have EB
# or skip this step.
echo "Going to install full CUDA SDK and cu* libraries under host_injections if necessary"
module_avail_out=$TMPDIR/ml.out
module avail 2>&1 | grep EasyBuild &> ${module_avail_out}
if [[ $? -eq 0 ]]; then
echo_green ">> Found an EasyBuild module"
else
echo_yellow ">> No EasyBuild module found: skipping step to install CUDA (see output in ${module_avail_out})"
export skip_cuda_install=True
fi

temp_install_storage=${TMPDIR}/temp_install_storage
mkdir -p ${temp_install_storage}
if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then
Expand All @@ -253,7 +265,7 @@ if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then
--accept-cuda-eula \
--accept-cudnn-eula
else
echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed OR no EasyBuild module was found"
echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed"
fi

# Install NVIDIA drivers in host_injections (if they exist)
Expand All @@ -263,18 +275,6 @@ if command_exists "nvidia-smi"; then
fi


echo ">> Configuring EasyBuild..."

# Make sure that we use the EESSI_CVMFS_INSTALL
# Since the path is set when loading EESSI-extend, we reload it to make sure it works - even if it is already loaded
# Note we need to do this after running install_cuda_and_libraries, since that does installations in the EESSI_SITE_INSTALL
unset EESSI_USER_INSTALL
unset EESSI_PROJECT_INSTALL
unset EESSI_SITE_INSTALL
export EESSI_CVMFS_INSTALL=1
module unload EESSI-extend
module load EESSI-extend/${EESSI_VERSION}-easybuild

if [ ! -z "${shared_fs_path}" ]; then
shared_eb_sourcepath=${shared_fs_path}/easybuild/sources
echo ">> Using ${shared_eb_sourcepath} as shared EasyBuild source path"
Expand Down
3 changes: 3 additions & 0 deletions EESSI-remove-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ if [ $EUID -eq 0 ]; then
echo_yellow "Removing ${app_dir} and ${app_module}..."
rm -rf ${app_dir}
rm -rf ${app_module}
# recreate some directory to work around permission denied
# issues when rebuilding the package
mkdir -p ${app_dir}/easybuild
done
else
fatal_error "Easystack file ${easystack_file} not found!"
Expand Down
1 change: 1 addition & 0 deletions bot/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ else
declare -a REMOVAL_STEP_ARGS=()
REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}")
REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}")

# add fakeroot option in order to be able to remove software, see:
# https://github.com/EESSI/software-layer/issues/312
REMOVAL_STEP_ARGS+=("--fakeroot")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# 2024.11.12
# for installations under /cvmfs, if EESSI_ACCELERATOR_TARGET is set,
# EESSI-extend should adjust EASYBUILD_INSTALLPATH and set
# EASYBUILD_CUDA_COMPUTE_CAPABILITIES
easyconfigs:
- EESSI-extend-2023.06-easybuild.eb
116 changes: 116 additions & 0 deletions load_eessi_extend_module.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Script to load the environment module for EESSI-extend.
# If that module is not available yet, a specific version will be installed using the latest EasyBuild.
#
# This script must be sourced, since it makes changes in the current environment, like loading an EESSI-extend module.
#
# Assumptions (if one is not satisfied the script prints a message and exits)
# - EESSI version is given as first argument
# - TMPDIR is set
# - EB is set
# - EASYBUILD_INSTALLPATH needs to be set
# - Function check_exit_code is defined;
# scripts/utils.sh in EESSI/software-layer repository defines this function, hence
# scripts/utils.sh shall be sourced before this script is run
#
# This script is part of the EESSI software layer, see
# https://github.com/EESSI/software-layer.git
#
# author: Kenneth Hoste (@boegel, HPC-UGent)
# author: Alan O'Cais (@ocaisa, CECAM)
# author: Thomas Roeblitz (@trz42, University of Bergen)
#
# license: GPLv2
#
#
set -o pipefail

# this script is *sourced*, not executed, so can't rely on $0 to determine path to self or script name
# $BASH_SOURCE points to correct path or script name, see also http://mywiki.wooledge.org/BashFAQ/028
if [ $# -ne 1 ]; then
echo "Usage: source ${BASH_SOURCE} <EESSI-extend version>" >&2
exit 1
fi

EESSI_EXTEND_VERSION="${1}-easybuild"

# make sure that environment variables that we expect to be set are indeed set
if [ -z "${TMPDIR}" ]; then
echo "\$TMPDIR is not set; exiting" >&2
exit 2
fi

# ${EB} is used to specify which 'eb' command should be used;
# can potentially be more than just 'eb', for example when using 'eb --optarch=GENERIC'
if [ -z "${EB}" ]; then
echo "\$EB is not set; exiting" >&2
exit 2
fi

# ${EASYBUILD_INSTALLPATH} points to the installation path and needs to be set
if [ -z "${EASYBUILD_INSTALLPATH}" ]; then
echo "\$EASYBUILD_INSTALLPATH is not set; exiting" >&2
exit 2
fi

# make sure that utility functions are defined (cfr. scripts/utils.sh script in EESSI/software-layer repo)
type check_exit_code
if [ $? -ne 0 ]; then
echo "check_exit_code function is not defined; exiting" >&2
exit 3
fi

echo ">> Checking for EESSI-extend module..."

ml_av_eessi_extend_out=${TMPDIR}/ml_av_eessi_extend.out
# need to use --ignore_cache to avoid the case that the module was removed (to be
# rebuilt) but it is still in the cache
module --ignore_cache avail 2>&1 | grep -i EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out}

if [[ $? -eq 0 ]]; then
echo_green ">> Module for EESSI-extend/${EESSI_EXTEND_VERSION} found!"
else
echo_yellow ">> No module yet for EESSI-extend/${EESSI_EXTEND_VERSION}, installing it..."

EB_TMPDIR=${TMPDIR}/ebtmp
echo ">> Using temporary installation of EasyBuild (in ${EB_TMPDIR})..."
pip_install_out=${TMPDIR}/pip_install.out
pip3 install --prefix ${EB_TMPDIR} easybuild &> ${pip_install_out}

# keep track of original $PATH and $PYTHONPATH values, so we can restore them
ORIG_PATH=${PATH}
ORIG_PYTHONPATH=${PYTHONPATH}

# source configure_easybuild to use correct eb settings
(
export EASYBUILD_PREFIX=${TMPDIR}/easybuild
export EASYBUILD_READ_ONLY_INSTALLDIR=1

echo ">> Final installation in ${EASYBUILD_INSTALLPATH}..."
export PATH=${EB_TMPDIR}/bin:${PATH}
export PYTHONPATH=$(ls -d ${EB_TMPDIR}/lib/python*/site-packages):${PYTHONPATH}
eb_install_out=${TMPDIR}/eb_install.out
ok_msg="EESSI-extend/${EESSI_EXTEND_VERSION} installed, let's go!"
fail_msg="Installing EESSI-extend/${EESSI_EXTEND_VERSION} failed, that's not good... (output: ${eb_install_out})"
# while always adding --try-amend=keep... may do no harm, we could make
# an attempt to figure out if it is needed, e.g., when we are rebuilding
${EB} "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" --try-amend=keeppreviousinstall=True 2>&1 | tee ${eb_install_out}
check_exit_code $? "${ok_msg}" "${fail_msg}"
)

# restore origin $PATH and $PYTHONPATH values, and clean up environment variables that are no longer needed
export PATH=${ORIG_PATH}
export PYTHONPATH=${ORIG_PYTHONPATH}
unset EB_TMPDIR ORIG_PATH ORIG_PYTHONPATH

module --ignore_cache avail EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out}
if [[ $? -eq 0 ]]; then
echo_green ">> EESSI-extend/${EESSI_EXTEND_VERSION} module installed!"
else
fatal_error "EESSI-extend/${EESSI_EXTEND_VERSION} module failed to install?! (output of 'pip install' in ${pip_install_out}, output of 'eb' in ${eb_install_out}, output of 'module avail EESSI-extend' in ${ml_av_eessi_extend_out})"
fi
fi

echo ">> Loading EESSI-extend/${EESSI_EXTEND_VERSION} module..."
module --ignore_cache load EESSI-extend/${EESSI_EXTEND_VERSION}

unset EESSI_EXTEND_VERSION
30 changes: 23 additions & 7 deletions scripts/gpu_support/nvidia/install_cuda_and_libraries.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,6 @@ done
# Make sure EESSI is initialised
check_eessi_initialised

# Make sure that `EESSI-extend` will install in the site installation path EESSI_SITE_SOFTWARE_PATH
export EESSI_SITE_INSTALL=1
echo "EESSI_SITE_SOFTWARE_PATH=${EESSI_SITE_SOFTWARE_PATH}"

# we need a directory we can use for temporary storage
if [[ -z "${TEMP_DIR}" ]]; then
tmpdir=$(mktemp -d)
Expand All @@ -93,7 +89,7 @@ else
fi
echo "Created temporary directory '${tmpdir}'"

# use EESSI_SITE_SOFTWARE_PATH/.modules/all as MODULEPATH
# Store MODULEPATH so it can be restored at the end of each loop iteration
SAVE_MODULEPATH=${MODULEPATH}

for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do
Expand All @@ -103,16 +99,36 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do
eb_version=$(echo ${EASYSTACK_FILE} | sed 's/.*eb-\([0-9.]*\).*/\1/g')

# Load EasyBuild version for this easystack file _before_ loading EESSI-extend
module avail EasyBuild
module_avail_out=${tmpdir}/ml.out
module avail 2>&1 | grep EasyBuild/${eb_version} &> ${module_avail_out}
if [[ $? -eq 0 ]]; then
echo_green ">> Found an EasyBuild/${eb_version} module"
else
echo_yellow ">> No EasyBuild/${eb_version} module found: skipping step to install easystack file ${easystack_file} (see output in ${module_avail_out})"
continue
fi
module load EasyBuild/${eb_version}

# Make sure EESSI-extend does a site install here
# We need to reload it with the current environment variables set
unset EESSI_CVMFS_INSTALL
unset EESSI_PROJECT_INSTALL
unset EESSI_USER_INSTALL
export EESSI_SITE_INSTALL=1
module unload EESSI-extend
module load EESSI-extend/${EESSI_VERSION}-easybuild
ml_av_eessi_extend_out=${tmpdir}/ml_av_eessi_extend.out
# need to use --ignore_cache to avoid the case that the module was removed (to be
# rebuilt) but it is still in the cache and the rebuild failed
EESSI_EXTEND_VERSION=${EESSI_VERSION}-easybuild
module --ignore_cache avail 2>&1 | grep -i EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out}
if [[ $? -eq 0 ]]; then
echo_green ">> Module for EESSI-extend/${EESSI_EXTEND_VERSION} found!"
else
error="\nNo module for EESSI-extend/${EESSI_EXTEND_VERSION} found\nwhile EESSI has been initialised to use software under ${EESSI_SOFTWARE_PATH}\n"
fatal_error "${error}"
fi
module --ignore_cache load EESSI-extend/${EESSI_EXTEND_VERSION}
unset EESSI_EXTEND_VERSION

# Install modules in hidden .modules dir to keep track of what was installed before
# (this action is temporary, and we do not call Lmod again within the current shell context, but in EasyBuild
Expand Down

0 comments on commit db16c37

Please sign in to comment.