From 9c0e4f09f0c446044e026c6c3e304ec1ab336a12 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Fri, 15 Mar 2024 13:12:56 +0000 Subject: [PATCH 01/71] {2023.06}[GCCcore/12.3.0] Perl-bundle-CPAN V5.36.1 --- .../pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml index 8eeb2f8c2c..e1b46296be 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml @@ -51,4 +51,5 @@ easyconfigs: # The Z3 dependency of PyTorch had it's versionsuffix removed # and we need to workaround the problem this creates, # see https://github.com/EESSI/software-layer/pull/501 for details - from-pr: 20050 + - PAN-5.36.1-GCCcore-12.3.0.eb + From d96d84d8446b7c64cf8b6390781893719bd996b2 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Fri, 15 Mar 2024 13:30:41 +0000 Subject: [PATCH 02/71] Fixed typo --- .../pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml index e1b46296be..4489917be7 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml @@ -51,5 +51,5 @@ easyconfigs: # The Z3 dependency of PyTorch had it's versionsuffix removed # and we need to workaround the problem this creates, # see https://github.com/EESSI/software-layer/pull/501 for details - - PAN-5.36.1-GCCcore-12.3.0.eb + - PAN-5.36.1-GCCcore-12.3.0.eb From bf2e69deeb3dbdd4af63811702465136782f3870 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Fri, 15 Mar 2024 13:50:17 +0000 Subject: [PATCH 03/71] Fixed typo --- .../pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml index 4489917be7..0061657519 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml @@ -51,5 +51,5 @@ easyconfigs: # The Z3 dependency of PyTorch had it's versionsuffix removed # and we need to workaround the problem this creates, # see https://github.com/EESSI/software-layer/pull/501 for details - - PAN-5.36.1-GCCcore-12.3.0.eb - + from-pr: 20050 + - Perl-bundle-CPAN-5.36.1-GCCcore-12.3.0.eb From 3a898b07388e0a552b31bc87f350f70480f40d09 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 5 May 2024 00:04:53 +0200 Subject: [PATCH 04/71] {2023.06}[foss/2023a] CUDA samples v12.1 --- .../2023.06/eessi-2023.06-eb-4.9.1-2023a.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml index 3018901ca9..e2d3ab496e 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml @@ -26,3 +26,11 @@ easyconfigs: # from-commit: ae2fc38307b56ae7ac12dff95c9d07404e1a8530 # trying from-pr as an alternative from-pr: 20379 + - CUDA-Samples-12.1-GCC-12.3.0-CUDA-12.1.1.eb: + # use easyconfig that only install subset of CUDA samples, + # to circumvent problem with nvcc linking to glibc of host OS, + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19189; + # and where additional samples are excluded because they fail to build on aarch64, + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19451; + options: + from-pr: 19451 From 2050a89364e6bc7c45c8a7e75187180bcba09336 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 5 May 2024 14:08:51 +0200 Subject: [PATCH 05/71] {2023.06}[foss/2023a] cuDNN v8.9.2.26 --- .../pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml index 3018901ca9..e948a2e55d 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml @@ -26,3 +26,4 @@ easyconfigs: # from-commit: ae2fc38307b56ae7ac12dff95c9d07404e1a8530 # trying from-pr as an alternative from-pr: 20379 + - cuDNN-8.9.2.26-CUDA-12.1.1.eb From 1f0206f56a6ce9a3797d7c8e60402de7390289b7 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 5 May 2024 19:00:41 +0200 Subject: [PATCH 06/71] add post sanitycheck hook for cuDNN --- eb_hooks.py | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/eb_hooks.py b/eb_hooks.py index 199dab8e54..44877103ea 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -688,6 +688,62 @@ def post_sanitycheck_cuda(self, *args, **kwargs): raise EasyBuildError("CUDA-specific hook triggered for non-CUDA easyconfig?!") +def post_sanitycheck_cuDNN(self, *args, **kwargs): + """ + Remove files from cuDNN installation that we are not allowed to ship, + and replace them with a symlink to a corresponding installation under host_injections. + """ + if self.name == 'cuDNN': + print_msg("Replacing files in cuDNN installation that we can not ship with symlinks to host_injections...") + + allowlist = ['LICENSE'] + + # read cuDNN LICENSE, construct allowlist based on section 2.6 that specifies list of files that can be shipped + license_path = os.path.join(self.installdir, 'LICENSE') + search_string = "2. Distribution. The following portions of the SDK are distributable under the Agreement:" + with open(license_path) as infile: + for line in infile: + if line.strip().startswidth(search_string): + # remove search string, split into words, remove trailing + # dots '.' and only retain words starting with a dot '.' + distributable = line[len(search_string):] + for word in distributable.split(): + if word[0] == '.': + allowlist.append(word.rstrip('.')) + + allowlist = sorted(set(allowlist)) + self.log.info("Allowlist for files in cuDNN installation that can be redistributed: " + ', '.join(allowlist)) + + # iterate over all files in the CUDA installation directory + for dir_path, _, files in os.walk(self.installdir): + for filename in files: + full_path = os.path.join(dir_path, filename) + # we only really care about real files, i.e. not symlinks + if not os.path.islink(full_path): + # check if the current file is part of the allowlist + basename = filename.split('.')[0] + if '.' in filename: + extension = '.' + filename.split('.')[1] + if basename in allowlist: + self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) + elif '.' in filename and extension in allowlist: + self.log.debug("%s is found in allowlist, so keeping it: %s", extension, full_path) + else: + self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s", + filename, full_path) + # if it is not in the allowlist, delete the file and create a symlink to host_injections + host_inj_path = full_path.replace('versions', 'host_injections') + # make sure source and target of symlink are not the same + if full_path == host_inj_path: + raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " + "are using this hook for a NESSI installation?", + full_path, host_inj_path) + remove_file(full_path) + symlink(host_inj_path, full_path) + else: + raise EasyBuildError("cuDNN-specific hook triggered for non-cuDNN easyconfig?!") + + def inject_gpu_property(ec): """ Add 'gpu' property, via modluafooter easyconfig parameter @@ -768,4 +824,5 @@ def inject_gpu_property(ec): POST_SANITYCHECK_HOOKS = { 'CUDA': post_sanitycheck_cuda, + 'cuDNN': post_sanitycheck_cuDNN, } From 889ee40a9e4422b20249faa77638127db8d8f1e4 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 5 May 2024 20:12:45 +0200 Subject: [PATCH 07/71] fix function name typo --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 44877103ea..223b14455d 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -703,7 +703,7 @@ def post_sanitycheck_cuDNN(self, *args, **kwargs): search_string = "2. Distribution. The following portions of the SDK are distributable under the Agreement:" with open(license_path) as infile: for line in infile: - if line.strip().startswidth(search_string): + if line.strip().startswith(search_string): # remove search string, split into words, remove trailing # dots '.' and only retain words starting with a dot '.' distributable = line[len(search_string):] From 5dcd453587463f3725d1e44bf61d4d33abae48a1 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Mon, 6 May 2024 13:22:27 +0000 Subject: [PATCH 08/71] Fix archdetect: ensure we use instructions introduced with ARM v8.2 for Neoverse N1 --- init/arch_specs/eessi_arch_arm.spec | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/init/arch_specs/eessi_arch_arm.spec b/init/arch_specs/eessi_arch_arm.spec index b5c9275043..62390e4c96 100755 --- a/init/arch_specs/eessi_arch_arm.spec +++ b/init/arch_specs/eessi_arch_arm.spec @@ -1,6 +1,6 @@ # ARM CPU architecture specifications -# Software path in EESSI | Vendor ID | List of defining CPU features -"aarch64/neoverse_n1" "ARM" "asimd" # Ampere Altra -"aarch64/neoverse_n1" "" "asimd" # AWS Graviton2 -"aarch64/neoverse_v1" "ARM" "asimd svei8mm" -"aarch64/neoverse_v1" "" "asimd svei8mm" # AWS Graviton3 +# ARM CPU architecture specifications (see https://gpages.juszkiewicz.com.pl/arm-socs-table/arm-socs.html for guidance) +"aarch64/neoverse_n1" "ARM" "asimddp" # Ampere Altra +"aarch64/neoverse_n1" "" "asimddp" # AWS Graviton2 +"aarch64/neoverse_v1" "ARM" "asimddp svei8mm" +"aarch64/neoverse_v1" "" "asimddp svei8mm" # AWS Graviton3 From 794314f3c6a7941bf1133fef25aadb86c678991d Mon Sep 17 00:00:00 2001 From: Richard Top Date: Mon, 6 May 2024 13:24:30 +0000 Subject: [PATCH 09/71] Fix archdetect: ensure we use instructions introduced with ARM v8.2 for Neoverse N1 --- init/arch_specs/eessi_arch_arm.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/arch_specs/eessi_arch_arm.spec b/init/arch_specs/eessi_arch_arm.spec index 62390e4c96..8c1bc34d20 100755 --- a/init/arch_specs/eessi_arch_arm.spec +++ b/init/arch_specs/eessi_arch_arm.spec @@ -1,5 +1,5 @@ -# ARM CPU architecture specifications # ARM CPU architecture specifications (see https://gpages.juszkiewicz.com.pl/arm-socs-table/arm-socs.html for guidance) +# Software path in EESSI | Vendor ID | List of defining CPU features "aarch64/neoverse_n1" "ARM" "asimddp" # Ampere Altra "aarch64/neoverse_n1" "" "asimddp" # AWS Graviton2 "aarch64/neoverse_v1" "ARM" "asimddp svei8mm" From 31c5e800f7b3955d5fb226ca20194f889a8a0e54 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 6 May 2024 17:50:04 +0200 Subject: [PATCH 10/71] add installation of cuDNN under host_injections --- EESSI-install-software.sh | 5 +- .../nvidia/install_cuDNN_host_injections.sh | 210 ++++++++++++++++++ 2 files changed, 213 insertions(+), 2 deletions(-) create mode 100755 scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 6c680571e2..a1591958d1 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -199,14 +199,15 @@ pr_diff=$(ls [0-9]*.diff | head -1) # for now, this just reinstalls all scripts. Note the most elegant, but works ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} -# Install full CUDA SDK in host_injections +# Install full CUDA SDK and cu* libraries in host_injections # Hardcode this for now, see if it works # TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install # Allow skipping CUDA SDK install in e.g. CI environments if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cudnn_host_injections.sh -c 12.1.1 -d 8.9.2.26 else - echo "Skipping installation of CUDA SDK in host_injections, since the --skip-cuda-install flag was passed" + echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed" fi # Install drivers in host_injections diff --git a/scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh b/scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh new file mode 100755 index 0000000000..7585e51458 --- /dev/null +++ b/scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh @@ -0,0 +1,210 @@ +#!/usr/bin/env bash + +# This script can be used to install cuDNN under the `.../host_injections` directory. +# This provides the parts of the cuDNN installation that cannot be redistributed as +# part of NESSI due to license limitations. While GPU-based software from NESSI will +# _run_ without these, installation of additional software that requires the cuDNN +# installation(s) under `host_injections` to be present. +# +# The `host_injections` directory is a variant symlink that by default points to +# `/opt/eessi`, unless otherwise defined in the local CVMFS configuration (see +# https://cvmfs.readthedocs.io/en/stable/cpt-repo.html#variant-symlinks). For the +# installation to be successful, this directory needs to be writeable by the user +# executing this script. + +# Initialise our bash functions +TOPDIR=$(dirname $(realpath $BASH_SOURCE)) +source "$TOPDIR"/../../utils.sh + +# Function to display help message +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " --help Display this help message" + echo " -c, --cuda-version CUDA_VERSION Specify a version of CUDA to be used" + echo " when installing cuDNN (must" + echo " have a corresponding easyconfig in the" + echo " EasyBuild release)" + echo " -d, --cudnn-version CUDNN_VERSION Specify a version of cuDNN to install (must" + echo " have a corresponding easyconfig in the" + echo " EasyBuild release)" + echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" + echo " storage during the cuDNN install" + echo " (must have >10GB available)" +} + +# Initialize variables +cuda_version="" +cudnn_version="" + +# Parse command-line options +while [[ $# -gt 0 ]]; do + case "$1" in + --help) + show_help + exit 0 + ;; + -c|--cuda-version) + if [ -n "$2" ]; then + cuda_version="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + -d|--cudnn-version) + if [ -n "$2" ]; then + cudnn_version="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + -t|--temp-dir) + if [ -n "$2" ]; then + CUDA_TEMP_DIR="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + *) + show_help + fatal_error "Error: Unknown option: $1" + ;; + esac +done + +# Make sure NESSI is initialised +check_eessi_initialised + +# Make sure the CUDA version supplied is a semantic version +is_semantic_version() { + local version=$1 + local regex='^[0-9]+\.[0-9]+\.[0-9]+$' + + if [[ $version =~ $regex ]]; then + return 0 # Return success (0) if it's a semantic version + else + return 1 # Return failure (1) if it's not a semantic version + fi +} +if ! is_semantic_version "$cuda_version"; then + show_help + error="\nYou must provide a semantic version for CUDA (e.g., 12.1.1) via the appropriate\n" + error="${error}command line option. This script is intended for use with NESSI so the 'correct'\n" + error="${error}version to provide is probably one of those available under\n" + error="${error}$EESSI_SOFTWARE_PATH/software/cuDNN\n" + fatal_error "${error}" +fi + +# As an installation location just use $EESSI_SOFTWARE_PATH but replacing `versions` with `host_injections` +cudnn_install_parent=${EESSI_SOFTWARE_PATH/versions/host_injections} + +# Only install cuDNN if specified version is not found. +# (existence of easybuild subdir implies a successful install) +if [ -d "${cudnn_install_parent}"/software/cuDNN/*-CUDA-"${cuda_version}"/easybuild ]; then + echo_green "cuDNN software found! No need to install cuDNN again." +else + # We need to be able write to the installation space so let's make sure we can + if ! create_directory_structure "${cudnn_install_parent}"/software/cuDNN ; then + fatal_error "No write permissions to directory ${cudnn_install_parent}/software/cuDNN" + fi + + # we need a directory we can use for temporary storage + if [[ -z "${CUDA_TEMP_DIR}" ]]; then + tmpdir=$(mktemp -d) + else + tmpdir="${CUDA_TEMP_DIR}"/temp + if ! mkdir "$tmpdir" ; then + fatal_error "Could not create directory ${tmpdir}" + fi + fi + + required_space_in_tmpdir=50000 + # Let's see if we have sources and build locations defined if not, we use the temporary space + if [[ -z "${EASYBUILD_BUILDPATH}" ]]; then + export EASYBUILD_BUILDPATH=${tmpdir}/build + required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000)) + fi + if [[ -z "${EASYBUILD_SOURCEPATH}" ]]; then + export EASYBUILD_SOURCEPATH=${tmpdir}/sources + required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000)) + fi + + # The install is pretty fat, you need lots of space for download/unpack/install (~3*5GB), + # need to do a space check before we proceed + avail_space=$(df --output=avail "${cudnn_install_parent}"/ | tail -n 1 | awk '{print $1}') + if (( avail_space < 5000000 )); then + fatal_error "Need at least 5GB disk space to install cuDNN under ${cudnn_install_parent}, exiting now..." + fi + avail_space=$(df --output=avail "${tmpdir}"/ | tail -n 1 | awk '{print $1}') + if (( avail_space < required_space_in_tmpdir )); then + error="Need at least ${required_space_in_tmpdir} disk space under ${tmpdir}.\n" + error="${error}Set the environment variable CUDA_TEMP_DIR to a location with adequate space to pass this check." + error="${error}You can alternatively set EASYBUILD_BUILDPATH and/or EASYBUILD_SOURCEPATH " + error="${error}to reduce this requirement. Exiting now..." + fatal_error "${error}" + fi + + if ! command -v "eb" &>/dev/null; then + echo_yellow "Attempting to load an EasyBuild module to do actual install" + module load EasyBuild + # There are some scenarios where this may fail + if [ $? -ne 0 ]; then + error="'eb' command not found in your environment and\n" + error="${error} module load EasyBuild\n" + error="${error}failed for some reason.\n" + error="${error}Please re-run this script with the 'eb' command available." + fatal_error "${error}" + fi + fi + + cudnn_easyconfig="cuDNN-${cudnn_version}-CUDA-${cuda_version}.eb" + + # Check the easyconfig file is available in the release + # (eb search always returns 0, so we need a grep to ensure a usable exit code) + eb --search ^${cudnn_easyconfig}|grep cuDNN > /dev/null 2>&1 + # Check the exit code + if [ $? -ne 0 ]; then + eb_version=$(eb --version) + available_cudnn_easyconfigs=$(eb --search ^cuDNN-*.eb|grep cuDNN) + + error="The easyconfig ${cudnn_easyconfig} was not found in EasyBuild version:\n" + error="${error} ${eb_version}\n" + error="${error}You either need to give a different version of CUDA to install _or_ \n" + error="${error}use a different version of EasyBuild for the installation.\n" + error="${error}\nThe versions of available with the current eb command are:\n" + error="${error}${available_cudnn_easyconfigs}" + fatal_error "${error}" + fi + + # We need the --rebuild option, as the cuDNN module may or may not be on the + # `MODULEPATH` yet. Even if it is, we still want to redo this installation + # since it will provide the symlinked targets for the parts of the cuDNN + # installation in the `.../versions/...` prefix + # We install the module in our `tmpdir` since we do not need the modulefile, + # we only care about providing the targets for the symlinks. + extra_args="--rebuild --installpath-modules=${tmpdir}" + + # We don't want hooks used in this install, we need a vanilla cuDNN installation + touch "$tmpdir"/none.py + # shellcheck disable=SC2086 # Intended splitting of extra_args + eb --prefix="$tmpdir" ${extra_args} --hooks="$tmpdir"/none.py --installpath="${cudnn_install_parent}"/ "${cudnn_easyconfig}" + ret=$? + if [ $ret -ne 0 ]; then + eb_last_log=$(unset EB_VERBOSE; eb --last-log) + cp -a ${eb_last_log} . + fatal_error "cuDNN installation failed, please check EasyBuild logs $(basename ${eb_last_log})..." + else + echo_green "cuDNN installation at ${cudnn_install_parent}/software/cuDNN/${cudnn_version}-CUDA-${cuda_version} succeeded!" + fi + # clean up tmpdir + rm -rf "${tmpdir}" +fi From c623117519a84ab87c4053590e741a48982bd4a3 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 6 May 2024 22:03:16 +0200 Subject: [PATCH 11/71] add option to provide additional lower directories for overlayfs --- eessi_container.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/eessi_container.sh b/eessi_container.sh index df008dd965..ad9397318a 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -86,6 +86,11 @@ display_help() { echo " -n | --nvidia MODE - configure the container to work with NVIDIA GPUs," echo " MODE==install for a CUDA installation, MODE==run to" echo " attach a GPU, MODE==all for both [default: false]" + echo " -o | --lower-dirs DIRS - list of ':' separated directories that are used" + echo " in front of the default lower dir (CVMFS repo);" + echo " fuse-overlayfs will merge all lower directories;" + echo " the option can be used to make certain directories" + echo " in the CVMFS repo writable [default: none]" echo " -r | --repository CFG - configuration file or identifier defining the" echo " repository to use [default: EESSI via" echo " default container, see --container]" @@ -119,6 +124,7 @@ FAKEROOT=0 VERBOSE=0 STORAGE= LIST_REPOS=0 +LOWER_DIRS= MODE="shell" SETUP_NVIDIA=0 REPOSITORY="EESSI" @@ -174,6 +180,10 @@ while [[ $# -gt 0 ]]; do NVIDIA_MODE="$2" shift 2 ;; + -o|--lower-dirs) + LOWER_DIRS="$2" + shift 2 + ;; -r|--repository) REPOSITORY="$2" shift 2 @@ -616,6 +626,14 @@ if [[ "${ACCESS}" == "rw" ]]; then EESSI_WRITABLE_OVERLAY="container:fuse-overlayfs" EESSI_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${repo_name}" + if [[ ! -z ${LOWER_DIRS} ]]; then + # need to convert ':' in LOWER_DIRS to ',' because bind mounts use ',' as + # separator while the lowerdir overlayfs option uses ':' + export BIND_PATHS="${BIND_PATHS},${LOWER_DIRS/:/,}" + EESSI_WRITABLE_OVERLAY+=" -o lowerdir=${LOWER_DIRS}:/cvmfs_ro/${repo_name}" + else + EESSI_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${repo_name}" + fi EESSI_WRITABLE_OVERLAY+=" -o upperdir=${TMP_IN_CONTAINER}/overlay-upper" EESSI_WRITABLE_OVERLAY+=" -o workdir=${TMP_IN_CONTAINER}/overlay-work" EESSI_WRITABLE_OVERLAY+=" ${EESSI_CVMFS_REPO}" From 776b5b8e0ed663b6f7dd3bd9eadd5cddeec8ceaa Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 6 May 2024 22:05:46 +0200 Subject: [PATCH 12/71] script to determine which modules have to be rebuilt --- EESSI-determine-rebuilds.sh | 122 ++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100755 EESSI-determine-rebuilds.sh diff --git a/EESSI-determine-rebuilds.sh b/EESSI-determine-rebuilds.sh new file mode 100755 index 0000000000..ec9a141978 --- /dev/null +++ b/EESSI-determine-rebuilds.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# +# Script to determine which parts of the EESSI software stack (version set through init/eessi_defaults) +# have to be rebuilt + +# see example parsing of command line arguments at +# https://wiki.bash-hackers.org/scripting/posparams#using_a_while_loop +# https://stackoverflow.com/questions/192249/how-do-i-parse-command-line-arguments-in-bash + +display_help() { + echo "usage: $0 [OPTIONS]" + echo " -g | --generic - instructs script to build for generic architecture target" + echo " -h | --help - display this usage information" +} + +POSITIONAL_ARGS=() + +while [[ $# -gt 0 ]]; do + case $1 in + -g|--generic) + DETECTION_PARAMETERS="--generic" + shift + ;; + -h|--help) + display_help # Call your function + # no shifting needed here, we're done. + exit 0 + ;; + -*|--*) + echo "Error: Unknown option: $1" >&2 + exit 1 + ;; + *) # No more options + POSITIONAL_ARGS+=("$1") # save positional arg + shift + ;; + esac +done + +set -- "${POSITIONAL_ARGS[@]}" + +TOPDIR=$(dirname $(realpath $0)) + +export TMPDIR=$(mktemp -d /tmp/eessi-remove.XXXXXXXX) + +source $TOPDIR/scripts/utils.sh + +echo ">> Determining software subdirectory to use for current build host..." +if [ -z $EESSI_SOFTWARE_SUBDIR_OVERRIDE ]; then + export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS) + echo ">> Determined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE via 'eessi_software_subdir.py $DETECTION_PARAMETERS' script" +else + echo ">> Picking up pre-defined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE: ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" +fi + +echo ">> Setting up environment..." + +source $TOPDIR/init/bash + +if [ -d $EESSI_CVMFS_REPO ]; then + echo_green "$EESSI_CVMFS_REPO available, OK!" +else + fatal_error "$EESSI_CVMFS_REPO is not available!" +fi + +if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then + fatal_error "Failed to determine software subdirectory?!" +elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then + fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!" +else + echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!" +fi + +echo ">> Configuring EasyBuild..." +EB="eb" +source $TOPDIR/configure_easybuild + +echo ">> Setting up \$MODULEPATH..." +# make sure no modules are loaded +module --force purge +# ignore current $MODULEPATH entirely +module unuse $MODULEPATH +module use $EASYBUILD_INSTALLPATH/modules/all +if [[ -z ${MODULEPATH} ]]; then + fatal_error "Failed to set up \$MODULEPATH?!" +else + echo_green ">> MODULEPATH set up: ${MODULEPATH}" +fi + +# assume there's only one diff file that corresponds to the PR patch file +pr_diff=$(ls [0-9]*.diff | head -1) + +# if this script is run as root, use PR patch file to determine if software needs to be removed first +changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") +if [ -z ${changed_easystacks_rebuilds} ]; then + echo "No software needs to be removed." +else + for easystack_file in ${changed_easystacks_rebuilds}; do + # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file + eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g') + + # load EasyBuild module (will be installed if it's not available yet) + source ${TOPDIR}/load_easybuild_module.sh ${eb_version} + + if [ -f ${easystack_file} ]; then + echo_green "Software rebuild(s) requested in ${easystack_file}, so determining which existing installation have to be removed..." + # we need to remove existing installation directories first, + # so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R]) + # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) + rebuild_apps=$(eb --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') + for app in ${rebuild_apps}; do + app_dir=${EASYBUILD_INSTALLPATH}/software/${app} + app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua + echo_yellow "Removing ${app_dir} and ${app_module}..." + echo "REMOVE_SOFTWARE ${app_dir}" + echo "REMOVE_MODULE ${app_module}" + done + else + fatal_error "Easystack file ${easystack_file} not found!" + fi + done +fi From 646804295f38a90e1c40f2eb32b35e7f6de20b06 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 6 May 2024 22:42:09 +0200 Subject: [PATCH 13/71] use alternative approach to remove software packages - bot/build.sh - first runs EESSI-determine-rebuilds.sh to determine which software package directories have to be removed - it then processes the output and creates lower directories which are writable - finally it uses these lower directories as additional parameter when running EESSI-remove-software.sh --- EESSI-remove-software.sh | 4 ++-- bot/build.sh | 43 +++++++++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index 4465c3546b..664fd8c6fd 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -118,8 +118,8 @@ if [ $EUID -ne 0 ]; then app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua echo_yellow "Removing ${app_dir} and ${app_module}... (just reporting what would have been done)" # echo_yellow "Removing ${app_dir} and ${app_module}..." - # rm -rf ${app_dir} - # rm -rf ${app_module} + rm -rf ${app_dir} + rm -rf ${app_module} done else fatal_error "Easystack file ${easystack_file} not found!" diff --git a/bot/build.sh b/bot/build.sh index 10befe50bd..566e5e79cb 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -188,6 +188,41 @@ changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed if [[ -z "${changed_easystacks_rebuilds}" ]]; then echo "This PR does not add any easystack files in a rebuilds subdirectory, so let's skip the removal step." else + # determine which software packages (and modules) have to be removed + TARBALL_TMP_DETERMINE_STEP_DIR=${PREVIOUS_TMP_DIR}/determine_step + mkdir -p ${TARBALL_TMP_DETERMINE_STEP_DIR} + + # prepare arguments to eessi_container.sh specific to determine step + declare -a DETERMINE_STEP_ARGS=() + DETERMINE_STEP_ARGS+=("--save" "${TARBALL_TMP_DETERMINE_STEP_DIR}") + DETERMINE_STEP_ARGS+=("--storage" "${STORAGE}") + + # create tmp file for output of determine step + determine_outerr=$(mktemp determine.outerr.XXXX) + + echo "Executing command to determine software to be removed:" + echo "./eessi_container.sh ${COMMON_ARGS[@]} ${DETERMINE_STEP_ARGS[@]}" + echo " -- ./EESSI-determine-rebuilds.sh \"${DETERMINE_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${determine_outerr}" + ./eessi_container.sh "${COMMON_ARGS[@]}" "${DETERMINE_STEP_ARGS[@]}" \ + -- ./EESSI-determine-rebuilds.sh "${DETERMINE_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${determine_outerr} + + # process output file + # for each line containing 'REMOVE_SOFTWARE some_path' + # create a new directory ${STORAGE}/lower_dirs/some_path_stripped + # where the prefix /cvmfs/repo_name is removed from some_path + # set permission of the directory to u+rwx + # add directory to LOWER_DIRS (':' separated list of directories) + LOWER_DIRS= + for remove_dir in $(grep REMOVE_SOFTWARE ${determine_outerr} | cut -f4- -d'/'); do + mkdir -p ${STORAGE}/lower_dirs/${remove_dir} + chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} + if [[ ! -z ${LOWER_DIRS} ]]; then + LOWER_DIRS="${LOWER_DIRS}:${STORAGE}/lower_dirs/${remove_dir}" + else + LOWER_DIRS="${STORAGE}/lower_dirs/${remove_dir}" + fi + done + # prepare directory to store tarball of tmp for removal and build steps TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} @@ -196,11 +231,9 @@ else declare -a REMOVAL_STEP_ARGS=() REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") - # add fakeroot option in order to be able to remove software, see: - # https://github.com/EESSI/software-layer/issues/312 - # CURRENTLY NOT SUPPORTED; software packages need to be removed from - # CernVM-FS repository first - # REMOVAL_STEP_ARGS+=("--fakeroot") + if [[ ! -z ${LOWER_DIRS} ]]; then + REMOVAL_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") + fi # create tmp file for output of removal step removal_outerr=$(mktemp remove.outerr.XXXX) From aec23047a1a99ffe5d406818fdac5e0b6c7751c8 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 6 May 2024 22:50:24 +0200 Subject: [PATCH 14/71] rebuild CUDA/12.1.1 to include full runtime --- .../20240506-eb-4.9.1-CUDA-12.1.1-ship-full-runtime.yml | 9 +++++++++ eb_hooks.py | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 easystacks/pilot.nessi.no/2023.06/rebuilds/20240506-eb-4.9.1-CUDA-12.1.1-ship-full-runtime.yml diff --git a/easystacks/pilot.nessi.no/2023.06/rebuilds/20240506-eb-4.9.1-CUDA-12.1.1-ship-full-runtime.yml b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240506-eb-4.9.1-CUDA-12.1.1-ship-full-runtime.yml new file mode 100644 index 0000000000..058ab75e80 --- /dev/null +++ b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240506-eb-4.9.1-CUDA-12.1.1-ship-full-runtime.yml @@ -0,0 +1,9 @@ +# 2024.05.06 +# Original matching of files we could ship was not done correctly. We were +# matching the basename for files (e.g., libcudart.so from libcudart.so.12) +# rather than the name stub (libcudart) +# See https://github.com/EESSI/software-layer/pull/559 +easyconfigs: + - CUDA-12.1.1.eb: + options: + accept-eula-for: CUDA diff --git a/eb_hooks.py b/eb_hooks.py index 199dab8e54..69e2376ccf 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -668,8 +668,8 @@ def post_sanitycheck_cuda(self, *args, **kwargs): full_path = os.path.join(dir_path, filename) # we only really care about real files, i.e. not symlinks if not os.path.islink(full_path): - # check if the current file is part of the allowlist - basename = os.path.splitext(filename)[0] + # check if the current file name stub is part of the allowlist + basename = filename.split('.')[0] if basename in allowlist: self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) else: From 7024f8443f5c0dfa53db63f70810be294471564c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 6 May 2024 23:25:02 +0200 Subject: [PATCH 15/71] fix logic to add lower dirs for overlayfs --- bot/build.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index 566e5e79cb..c537d725b9 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -216,10 +216,8 @@ else for remove_dir in $(grep REMOVE_SOFTWARE ${determine_outerr} | cut -f4- -d'/'); do mkdir -p ${STORAGE}/lower_dirs/${remove_dir} chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} - if [[ ! -z ${LOWER_DIRS} ]]; then - LOWER_DIRS="${LOWER_DIRS}:${STORAGE}/lower_dirs/${remove_dir}" - else - LOWER_DIRS="${STORAGE}/lower_dirs/${remove_dir}" + if [[ -z ${LOWER_DIRS} ]]; then + LOWER_DIRS="${STORAGE}/lower_dirs" fi done From 2828e05644f987ac544fa7ef45bc30154b9d773c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 6 May 2024 23:45:20 +0200 Subject: [PATCH 16/71] need to create full directory tree in lower dir --- EESSI-determine-rebuilds.sh | 3 ++- bot/build.sh | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/EESSI-determine-rebuilds.sh b/EESSI-determine-rebuilds.sh index ec9a141978..fac217e7e4 100755 --- a/EESSI-determine-rebuilds.sh +++ b/EESSI-determine-rebuilds.sh @@ -112,7 +112,8 @@ else app_dir=${EASYBUILD_INSTALLPATH}/software/${app} app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua echo_yellow "Removing ${app_dir} and ${app_module}..." - echo "REMOVE_SOFTWARE ${app_dir}" + # echo "REMOVE_SOFTWARE ${app_dir}" + find ${app_dir} -type d | sed -e 's/^/REMOVE_DIRECTORY /' echo "REMOVE_MODULE ${app_module}" done else diff --git a/bot/build.sh b/bot/build.sh index c537d725b9..aa5d7f8ad3 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -207,13 +207,13 @@ else -- ./EESSI-determine-rebuilds.sh "${DETERMINE_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${determine_outerr} # process output file - # for each line containing 'REMOVE_SOFTWARE some_path' + # for each line containing 'REMOVE_DIRECTORY some_path' # create a new directory ${STORAGE}/lower_dirs/some_path_stripped # where the prefix /cvmfs/repo_name is removed from some_path # set permission of the directory to u+rwx # add directory to LOWER_DIRS (':' separated list of directories) LOWER_DIRS= - for remove_dir in $(grep REMOVE_SOFTWARE ${determine_outerr} | cut -f4- -d'/'); do + for remove_dir in $(grep REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/'); do mkdir -p ${STORAGE}/lower_dirs/${remove_dir} chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} if [[ -z ${LOWER_DIRS} ]]; then From 14b207625f2306424841b7adeb24894aa801141d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 6 May 2024 23:58:11 +0200 Subject: [PATCH 17/71] tune grep and a bit debug output --- bot/build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bot/build.sh b/bot/build.sh index aa5d7f8ad3..4f9f9d9fe8 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -213,7 +213,8 @@ else # set permission of the directory to u+rwx # add directory to LOWER_DIRS (':' separated list of directories) LOWER_DIRS= - for remove_dir in $(grep REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/'); do + for remove_dir in $(grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/'); do + echo "PROCESS directory: --${remove_dir}--" mkdir -p ${STORAGE}/lower_dirs/${remove_dir} chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} if [[ -z ${LOWER_DIRS} ]]; then From e4a013f5fcdcdeae624a5541f554dd9c7cb6454e Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 7 May 2024 00:02:54 +0200 Subject: [PATCH 18/71] add double quotes --- bot/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/build.sh b/bot/build.sh index 4f9f9d9fe8..1b77be2a8c 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -213,7 +213,7 @@ else # set permission of the directory to u+rwx # add directory to LOWER_DIRS (':' separated list of directories) LOWER_DIRS= - for remove_dir in $(grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/'); do + for remove_dir in "$(grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/')"; do echo "PROCESS directory: --${remove_dir}--" mkdir -p ${STORAGE}/lower_dirs/${remove_dir} chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} From dbe0e3c724fb5000f2257abeaf4e64479d9e5462 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 7 May 2024 00:14:50 +0200 Subject: [PATCH 19/71] more debug output --- bot/build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bot/build.sh b/bot/build.sh index 1b77be2a8c..c8ac217c4f 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -213,7 +213,8 @@ else # set permission of the directory to u+rwx # add directory to LOWER_DIRS (':' separated list of directories) LOWER_DIRS= - for remove_dir in "$(grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/')"; do + head ${determine_outerr} | cat -n + for remove_dir in $(cat ${determine_outerr} | grep ^REMOVE_DIRECTORY | cut -f4- -d'/'); do echo "PROCESS directory: --${remove_dir}--" mkdir -p ${STORAGE}/lower_dirs/${remove_dir} chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} From 620695f711d7489e3ab1cb96df3a833203ae9ac4 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 7 May 2024 00:22:46 +0200 Subject: [PATCH 20/71] don't use cut --- bot/build.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index c8ac217c4f..47d807b843 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -213,8 +213,7 @@ else # set permission of the directory to u+rwx # add directory to LOWER_DIRS (':' separated list of directories) LOWER_DIRS= - head ${determine_outerr} | cat -n - for remove_dir in $(cat ${determine_outerr} | grep ^REMOVE_DIRECTORY | cut -f4- -d'/'); do + for remove_dir in $(cat ${determine_outerr} | grep ^REMOVE_DIRECTORY | sed -e 's/^REMOVE_DIRECTORY //'); do echo "PROCESS directory: --${remove_dir}--" mkdir -p ${STORAGE}/lower_dirs/${remove_dir} chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} From 4561c74941f5090d9500c8c7c8c811eebca619bd Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 7 May 2024 00:28:27 +0200 Subject: [PATCH 21/71] create temp file with stripped dirs --- bot/build.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bot/build.sh b/bot/build.sh index 47d807b843..3478a5bbba 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -213,7 +213,9 @@ else # set permission of the directory to u+rwx # add directory to LOWER_DIRS (':' separated list of directories) LOWER_DIRS= - for remove_dir in $(cat ${determine_outerr} | grep ^REMOVE_DIRECTORY | sed -e 's/^REMOVE_DIRECTORY //'); do + grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_dirs + #for remove_dir in $(cat ${determine_outerr} | grep ^REMOVE_DIRECTORY | sed -e 's/^REMOVE_DIRECTORY //'); do + for remove_dir in $(cat ${determine_outerr}.rm_dirs); do echo "PROCESS directory: --${remove_dir}--" mkdir -p ${STORAGE}/lower_dirs/${remove_dir} chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} From 290dc08f63e15cd573fb85c99a9007fdc0946e58 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 7 May 2024 00:37:55 +0200 Subject: [PATCH 22/71] alternative loop --- bot/build.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bot/build.sh b/bot/build.sh index 3478a5bbba..797508a3b9 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -215,7 +215,9 @@ else LOWER_DIRS= grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_dirs #for remove_dir in $(cat ${determine_outerr} | grep ^REMOVE_DIRECTORY | sed -e 's/^REMOVE_DIRECTORY //'); do - for remove_dir in $(cat ${determine_outerr}.rm_dirs); do + wc -l ${determine_outerr}.rm_dirs + #for remove_dir in "$(cat ${determine_outerr}.rm_dirs)"; do + cat ${determine_outerr}.rm_dirs | while read remove_dir; do echo "PROCESS directory: --${remove_dir}--" mkdir -p ${STORAGE}/lower_dirs/${remove_dir} chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} From b1e314cc237f545ff14018ec4220cb6b5fd717de Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 7 May 2024 00:50:26 +0200 Subject: [PATCH 23/71] always create LOWER_DIRS base dir --- bot/build.sh | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index 797508a3b9..3a1294e06e 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -212,19 +212,15 @@ else # where the prefix /cvmfs/repo_name is removed from some_path # set permission of the directory to u+rwx # add directory to LOWER_DIRS (':' separated list of directories) - LOWER_DIRS= + LOWER_DIRS="${STORAGE}/lower_dirs" + mkdir -p "${LOWER_DIRS}" grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_dirs - #for remove_dir in $(cat ${determine_outerr} | grep ^REMOVE_DIRECTORY | sed -e 's/^REMOVE_DIRECTORY //'); do - wc -l ${determine_outerr}.rm_dirs - #for remove_dir in "$(cat ${determine_outerr}.rm_dirs)"; do cat ${determine_outerr}.rm_dirs | while read remove_dir; do echo "PROCESS directory: --${remove_dir}--" mkdir -p ${STORAGE}/lower_dirs/${remove_dir} chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} - if [[ -z ${LOWER_DIRS} ]]; then - LOWER_DIRS="${STORAGE}/lower_dirs" - fi done + ls -lR "${STORAGE}/lower_dirs" # prepare directory to store tarball of tmp for removal and build steps TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step From d64c21eab92d21aa6c00af5d284d5d8399adb76c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 7 May 2024 01:15:50 +0200 Subject: [PATCH 24/71] tune debug output --- EESSI-remove-software.sh | 8 ++++---- bot/build.sh | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index 664fd8c6fd..de0c312e38 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -116,10 +116,10 @@ if [ $EUID -ne 0 ]; then for app in ${rebuild_apps}; do app_dir=${EASYBUILD_INSTALLPATH}/software/${app} app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua - echo_yellow "Removing ${app_dir} and ${app_module}... (just reporting what would have been done)" - # echo_yellow "Removing ${app_dir} and ${app_module}..." - rm -rf ${app_dir} - rm -rf ${app_module} + echo_yellow "Removing ${app_dir} and ${app_module}..." + ls -lR ${app_dir} + rm -rdfv ${app_dir} + rm -rdfv ${app_module} done else fatal_error "Easystack file ${easystack_file} not found!" diff --git a/bot/build.sh b/bot/build.sh index 3a1294e06e..6ac38160bb 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -216,11 +216,11 @@ else mkdir -p "${LOWER_DIRS}" grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_dirs cat ${determine_outerr}.rm_dirs | while read remove_dir; do - echo "PROCESS directory: --${remove_dir}--" + # echo "PROCESS directory: --${remove_dir}--" mkdir -p ${STORAGE}/lower_dirs/${remove_dir} chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} done - ls -lR "${STORAGE}/lower_dirs" + # ls -lR "${STORAGE}/lower_dirs" # prepare directory to store tarball of tmp for removal and build steps TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step From 29a3933c5c4139f0edb8da63d860679223ee6fbc Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 7 May 2024 01:25:25 +0200 Subject: [PATCH 25/71] make all files writable too --- EESSI-determine-rebuilds.sh | 1 + bot/build.sh | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/EESSI-determine-rebuilds.sh b/EESSI-determine-rebuilds.sh index fac217e7e4..553c794aa6 100755 --- a/EESSI-determine-rebuilds.sh +++ b/EESSI-determine-rebuilds.sh @@ -114,6 +114,7 @@ else echo_yellow "Removing ${app_dir} and ${app_module}..." # echo "REMOVE_SOFTWARE ${app_dir}" find ${app_dir} -type d | sed -e 's/^/REMOVE_DIRECTORY /' + find ${app_dir} -type f | sed -e 's/^/REMOVE_FILE /' echo "REMOVE_MODULE ${app_module}" done else diff --git a/bot/build.sh b/bot/build.sh index 6ac38160bb..0dbab6117b 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -220,6 +220,11 @@ else mkdir -p ${STORAGE}/lower_dirs/${remove_dir} chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} done + grep ^REMOVE_FILE ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_files + cat ${determine_outerr}.rm_files | while read remove_file; do + touch ${STORAGE}/lower_dirs/${remove_file} + chmod u+rw ${STORAGE}/lower_dirs/${remove_file} + done # ls -lR "${STORAGE}/lower_dirs" # prepare directory to store tarball of tmp for removal and build steps From 62429193c0ea7ffdad5812c3bc116ea6f88f6a5b Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 7 May 2024 01:49:01 +0200 Subject: [PATCH 26/71] polish code --- EESSI-determine-rebuilds.sh | 1 - EESSI-remove-software.sh | 71 ++++++++++++++++--------------------- bot/build.sh | 10 ++++-- 3 files changed, 37 insertions(+), 45 deletions(-) diff --git a/EESSI-determine-rebuilds.sh b/EESSI-determine-rebuilds.sh index 553c794aa6..4f4d5ab713 100755 --- a/EESSI-determine-rebuilds.sh +++ b/EESSI-determine-rebuilds.sh @@ -112,7 +112,6 @@ else app_dir=${EASYBUILD_INSTALLPATH}/software/${app} app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua echo_yellow "Removing ${app_dir} and ${app_module}..." - # echo "REMOVE_SOFTWARE ${app_dir}" find ${app_dir} -type d | sed -e 's/^/REMOVE_DIRECTORY /' find ${app_dir} -type f | sed -e 's/^/REMOVE_FILE /' echo "REMOVE_MODULE ${app_module}" diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index de0c312e38..651a22f311 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -89,46 +89,35 @@ fi # assume there's only one diff file that corresponds to the PR patch file pr_diff=$(ls [0-9]*.diff | head -1) -# if this script is run as root, use PR patch file to determine if software needs to be removed first -# if [ $EUID -eq 0 ]; then -# working around lacking support for `--fakeroot` and/or user namespaces -# we only run as non-root -if [ $EUID -ne 0 ]; then - changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") - if [ -z ${changed_easystacks_rebuilds} ]; then - echo "No software needs to be removed." - else - for easystack_file in ${changed_easystacks_rebuilds}; do - # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file - eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g') - - # load EasyBuild module (will be installed if it's not available yet) - source ${TOPDIR}/load_easybuild_module.sh ${eb_version} - - if [ -f ${easystack_file} ]; then - echo_green "Software rebuild(s) requested in ${easystack_file}, so determining which existing installation have to be removed..." - # we need to remove existing installation directories first, - # so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R]) - # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) - # rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') - # we cannot run as root so we removed `--allow-use-as-root...` - rebuild_apps=$(eb --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') - for app in ${rebuild_apps}; do - app_dir=${EASYBUILD_INSTALLPATH}/software/${app} - app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua - echo_yellow "Removing ${app_dir} and ${app_module}..." - ls -lR ${app_dir} - rm -rdfv ${app_dir} - rm -rdfv ${app_module} - done - else - fatal_error "Easystack file ${easystack_file} not found!" - fi - done - fi +changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") +if [ -z ${changed_easystacks_rebuilds} ]; then + echo "No software needs to be removed." else - fatal_error "This script can NOT be run by root! (lacking support for `--fakeroot` and/or user namespaces)" + for easystack_file in ${changed_easystacks_rebuilds}; do + # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file + eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g') + + # load EasyBuild module (will be installed if it's not available yet) + source ${TOPDIR}/load_easybuild_module.sh ${eb_version} + + if [ -f ${easystack_file} ]; then + echo_green "Software rebuild(s) requested in ${easystack_file}, so" + echo_green " determining which existing installation have to be removed (assuming contents" + echo_green " have been made writable/deletable)..." + # we need to remove existing installation directories first, + # so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R]) + # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) + # rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') + rebuild_apps=$(eb --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') + for app in ${rebuild_apps}; do + app_dir=${EASYBUILD_INSTALLPATH}/software/${app} + app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua + echo_yellow "Removing ${app_dir} and ${app_module}..." + rm -rdfv ${app_dir} + rm -rdfv ${app_module} + done + else + fatal_error "Easystack file ${easystack_file} not found!" + fi + done fi -# else -# fatal_error "This script can only be run by root!" -# fi diff --git a/bot/build.sh b/bot/build.sh index 0dbab6117b..23f5fd952b 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -211,21 +211,25 @@ else # create a new directory ${STORAGE}/lower_dirs/some_path_stripped # where the prefix /cvmfs/repo_name is removed from some_path # set permission of the directory to u+rwx - # add directory to LOWER_DIRS (':' separated list of directories) + # for each line containing 'REMOVE_FILE some_file_path' + # touch a new file ${STORAGE}/lower_dirs/some_file_path_stripped + # where the prefix /cvmfs/repo_name is removed from some_file_path + # set permission of the file to u+rw + LOWER_DIRS="${STORAGE}/lower_dirs" mkdir -p "${LOWER_DIRS}" + grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_dirs cat ${determine_outerr}.rm_dirs | while read remove_dir; do - # echo "PROCESS directory: --${remove_dir}--" mkdir -p ${STORAGE}/lower_dirs/${remove_dir} chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} done + grep ^REMOVE_FILE ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_files cat ${determine_outerr}.rm_files | while read remove_file; do touch ${STORAGE}/lower_dirs/${remove_file} chmod u+rw ${STORAGE}/lower_dirs/${remove_file} done - # ls -lR "${STORAGE}/lower_dirs" # prepare directory to store tarball of tmp for removal and build steps TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step From a3a320c639df59a1a350c3c7f3a2b2e3eaec5bee Mon Sep 17 00:00:00 2001 From: Richard Top Date: Tue, 7 May 2024 11:19:15 +0000 Subject: [PATCH 27/71] {2023.06}[gompi/2023a] OSU Microbenchmarks V7.2 w/ CUDA 12.1.1 --- .../pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml index e2d3ab496e..276bfa49f7 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml @@ -34,3 +34,4 @@ easyconfigs: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19451; options: from-pr: 19451 + - OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb From 6e699eba7b46695e5c25e8f1563a6a32f7126b54 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Wed, 8 May 2024 07:50:21 +0000 Subject: [PATCH 28/71] Easyconfig that can extend EESSI --- EESSI-extend-2023.06-easybuild.eb | 167 ++++++++++++++++++ create_tarball.sh | 2 +- .../eessi-2023.06-eb-4.9.1-001-system.yml | 1 + init/Magic_Castle/bash | 2 +- init/bash | 2 + init/eessi_environment_variables | 2 + install_scripts.sh | 6 + 7 files changed, 180 insertions(+), 2 deletions(-) create mode 100644 EESSI-extend-2023.06-easybuild.eb diff --git a/EESSI-extend-2023.06-easybuild.eb b/EESSI-extend-2023.06-easybuild.eb new file mode 100644 index 0000000000..76e96766f9 --- /dev/null +++ b/EESSI-extend-2023.06-easybuild.eb @@ -0,0 +1,167 @@ +easyblock = 'Bundle' + +name = 'EESSI-extend' +version = '2023.06' +# May have different ways to extend EESSI in future (manually, other tools,...) +versionsuffix = '-easybuild' + +homepage = 'https://eessi.io/docs/' + +description = """ + The goal of the European Environment for Scientific Software Installations + (EESSI, pronounced as "easy") is to build a common stack of scientific + software installations for HPC systems and beyond, including laptops, + personal workstations and cloud infrastructure. + + This module allows you to extend EESSI using the same configuration for + EasyBuild as EESSI itself uses. A number of environment variables control the + behaviour of the module: + - EESSI_USER_INSTALL can be set to a location to install modules for use by + the user only. The location must already exist on the filesystem. + - EESSI_PROJECT_INSTALL can be set to a location to install modules for use by + a project. The location must already exist on the filesystem and you should + ensure that the location has the correct Linux group and the SGID permission + is set on that directory (`chmod g+s $EESSI_PROJECT_INSTALL`) so that all + members of the group have permission to read and write installations. + - EESSI_SITE_INSTALL is either defined or not and cannot be used with another + environment variable. A site installation is done in a defined location and + any installations there are (by default) world readable. + - EESSI_CVMFS_INSTALL is either defined or not and cannot be used with another + environment variable. A CVMFS installation targets a defined location which + will be ingested into CVMFS and is only useful for CVMFS administrators. + - If none of the environment variables above are defined, an EESSI_USER_INSTALL + is assumed with a value of $HOME/EESSI + If both EESSI_USER_INSTALL and EESSI_PROJECT_INSTALL are defined, both sets of + installations are exposed, but new installations are created as user + installations. +""" + +toolchain = SYSTEM + +# All the dependencies we filter in EESSI +local_deps_to_filter = "Autoconf,Automake,Autotools,binutils,bzip2,DBus,flex,gettext,gperf,help2man,intltool,libreadline,libtool,M4,makeinfo,ncurses,util-linux,XZ,zlib" +local_arch_specific_deps_to_filter = {'aarch64': ',yasm', 'x86_64': ''} +local_deps_to_filter += local_arch_specific_deps_to_filter[ARCH] + +# Set the universal EasyBuild variables +modextravars = { + 'EASYBUILD_FILTER_DEPS': local_deps_to_filter, + 'EASYBUILD_IGNORE_OSDEPS': '1', + 'EASYBUILD_DEBUG': '1', + 'EASYBUILD_TRACE': '1', + 'EASYBUILD_ZIP_LOGS': 'bzip2', + 'EASYBUILD_RPATH': '1', + 'EASYBUILD_FILTER_ENV_VARS': 'LD_LIBRARY_PATH', + 'EASYBUILD_READ_ONLY_INSTALLDIR': '1', + 'EASYBUILD_MODULE_EXTENSIONS': '1', + 'EASYBUILD_EXPERIMENTAL': '1', +} + +# Need a few other variables, but they are more dynamic +# EASYBUILD_SYSROOT=${EPREFIX} +# EASYBUILD_PREFIX=${WORKDIR}/easybuild +# EASYBUILD_HOOKS=${EESSI_PREFIX}/init/easybuild/eb_hooks.py +# EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR} +# EASYBUILD_SOURCEPATH=${WORKDIR}/easybuild/sources:${EESSI_SOURCEPATH} +# +# And also some optional ones based on the kind of installation +# EASYBUILD_SET_GID_BIT +# EASYBUILD_GROUP_WRITABLE_INSTALLDIR +# EASYBUILD_UMASK +# EASYBUILD_STICKY_BIT +modluafooter = """ +if (mode() == "load") then + -- Use a working directory for temporary build files + if (os.getenv("WORKING_DIR") == nil) then + LmodMessage("-- Using /tmp/$USER as a temporary working directory for installations, you can override this by setting the environment variable WORKING_DIR and reloading the module (e.g., /dev/shm is a common option)") + end +end +working_dir = os.getenv("WORKING_DIR") or pathJoin("/tmp", os.getenv("USER")) +-- Gather the EPREFIX to use as a sysroot +sysroot = os.getenv("EESSI_EPREFIX") +-- Use an installation prefix that we _should_ have write access to +if (os.getenv("EESSI_CVMFS_INSTALL") ~= nil) then + -- Make sure no other EESSI install environment variables are set + if ((os.getenv("EESSI_SITE_INSTALL") ~= nil) or (os.getenv("EESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("EESSI_USER_INSTALL") ~= nil)) then + LmodError("You cannot use EESSI_CVMFS_INSTALL in combination with any other EESSI_*_INSTALL environment variables") + end + eessi_cvmfs_install = true + easybuild_installpath = os.getenv("EESSI_SOFTWARE_PATH") +elseif (os.getenv("EESSI_SITE_INSTALL") ~= nil) then + -- Make sure no other EESSI install environment variables are set + if ((os.getenv("EESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("EESSI_USER_INSTALL") ~= nil)) then + LmodError("You cannot use EESSI_SITE_INSTALL in combination with any other EESSI_*_INSTALL environment variables") + end + easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), 'versions', 'host_injections') +else + -- Deal with user and project installs + project_install = os.getenv("EESSI_PROJECT_INSTALL") + project_modulepath = nil + if (project_install ~= nil) then + -- Check the folder exists + if not isDir(project_install) then + LmodError("The location of EESSI_PROJECT_INSTALL (" .. project_install .. ") does not exist or is not a folder") + end + if (mode() == "load") then + LmodMessage("Configuring for use of EESSI_PROJECT_INSTALL under " .. project_install) + end + easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), os.getenv("EESSI_CVMFS_REPO"), project_install) + project_modulepath = pathJoin(easybuild_installpath, 'modules', 'all') + end + user_install = os.getenv("EESSI_USER_INSTALL") + user_modulepath = nil + if (user_install ~= nil) then + -- Check the folder exists + if not isDir(user_install) then + LmodError("The location of EESSI_USER_INSTALL (" .. user_install .. ") does not exist or is not a folder") + end + elseif (user_install == nil) and (project_install == nil) then + -- No need to check for existence when we use a HOME subdir + user_install = pathJoin(os.getenv("HOME"), "eessi") + end + if (user_install ~= nil) then + if (mode() == "load") then + LmodMessage("Configuring for use of EESSI_USER_INSTALL under " .. user_install) + end + easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), os.getenv("EESSI_CVMFS_REPO"), user_install) + user_modulepath = pathJoin(easybuild_installpath, 'modules', 'all') + end +end +if (mode() == "load") then + LmodMessage("-- To create installations for EESSI, you _must_ have write permissions to " .. easybuild_installpath) + -- Advise them to reuse sources + if (os.getenv("EASYBUILD_SOURCEPATH") == nil) then + LmodMessage("-- You may wish to configure a sources directory for EasyBuild (for example, via setting the environment variable EASYBUILD_SOURCEPATH) to allow you to reuse existing sources for packages.") + end +end +-- Set the relevant universal environment variables for EasyBuild +setenv ("EASYBUILD_SYSROOT", sysroot) +setenv ("EASYBUILD_PREFIX", pathJoin(working_dir, "easybuild")) +setenv ("EASYBUILD_INSTALLPATH", easybuild_installpath) +setenv ("EASYBUILD_HOOKS", pathJoin(os.getenv("EESSI_PREFIX"), "init", "easybuild", "eb_hooks.py")) +setenv ("EASYBUILD_UMASK", "002") + +-- Set all related environment variables if we have project or user installations (including extending MODULEPATH) +if (user_modulepath ~= nil) then + -- Use a more restrictive umask for this case + setenv ("EASYBUILD_UMASK", "022") + setenv ("EASYBUILD_STICKY_BIT", "1") + -- configure MODULEPATH + if (project_modulepath ~= nil) then + prepend_path("MODULEPATH", project_modulepath) + end + prepend_path("MODULEPATH", user_modulepath) +elseif (project_modulepath ~= nil) then + setenv ("EASYBUILD_SET_GID_BIT", "1") + setenv ("EASYBUILD_GROUP_WRITABLE_INSTALLDIR", "1") + setenv ("EASYBUILD_STICKY_BIT", "0") + -- configure MODULEPATH + prepend_path("MODULEPATH", project_modulepath) +end +-- Make sure EasyBuild itself is loaded +if not ( isloaded("EasyBuild") ) then + load("EasyBuild") +end +""" + +moduleclass = 'devel' diff --git a/create_tarball.sh b/create_tarball.sh index 2d77acfc43..0a7669f73f 100755 --- a/create_tarball.sh +++ b/create_tarball.sh @@ -28,7 +28,7 @@ fi overlay_upper_dir="${eessi_tmpdir}/overlay-upper" -software_dir_overlay="${overlay_upper_dir}/versions/${eessi_version}/software/${os}/${cpu_arch_subdir}" +software_dir_overlay="${overlay_upper_dir}/versions/${eessi_version}" if [ ! -d ${software_dir_overlay} ]; then echo "Software directory overlay ${software_dir_overlay} does not exist?!" >&2 exit 3 diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml index c5a08b5209..46ac979719 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml @@ -2,3 +2,4 @@ easyconfigs: - EasyBuild-4.9.1.eb: options: from-pr: 20299 + - EESSI-extend-2023.06-easybuild.eb diff --git a/init/Magic_Castle/bash b/init/Magic_Castle/bash index 0fb2f670a3..e2fded897d 100644 --- a/init/Magic_Castle/bash +++ b/init/Magic_Castle/bash @@ -10,7 +10,7 @@ source $(dirname "$BASH_SOURCE")/../eessi_environment_variables # Provide a clean MODULEPATH export MODULEPATH_ROOT=$EESSI_MODULEPATH -export MODULEPATH=$EESSI_MODULEPATH +export MODULEPATH=$EESSI_SITE_MODULEPATH:$EESSI_MODULEPATH # Extensions are too many, let's not print them by default (requires Lmod 8.4.12) export LMOD_AVAIL_EXTENSIONS=no diff --git a/init/bash b/init/bash index 2097f03617..d72df1f346 100644 --- a/init/bash +++ b/init/bash @@ -26,6 +26,8 @@ if [ $? -eq 0 ]; then # prepend location of modules for EESSI software stack to $MODULEPATH show_msg "Prepending $EESSI_MODULEPATH to \$MODULEPATH..." module use $EESSI_MODULEPATH + show_msg "Prepending site path $EESSI_SITE_MODULEPATH to \$MODULEPATH..." + module use $EESSI_SITE_MODULEPATH #show_msg "" #show_msg "*** Known problems in the ${EESSI_VERSION} software stack ***" diff --git a/init/eessi_environment_variables b/init/eessi_environment_variables index 815b46d0e1..1a44b53c41 100644 --- a/init/eessi_environment_variables +++ b/init/eessi_environment_variables @@ -89,6 +89,8 @@ if [ -d $EESSI_PREFIX ]; then if [ -d $EESSI_MODULEPATH ]; then export EESSI_MODULEPATH=$EESSI_MODULEPATH show_msg "Using ${EESSI_MODULEPATH} as the directory to be added to MODULEPATH." + export EESSI_SITE_MODULEPATH=${EESSI_MODULEPATH/versions/host_injections} + show_msg "Using ${EESSI_SITE_MODULEPATH} as the site extension directory to be added to MODULEPATH." else error "NESSI module path at $EESSI_MODULEPATH not found!" false diff --git a/install_scripts.sh b/install_scripts.sh index 508735975c..17f0b81008 100755 --- a/install_scripts.sh +++ b/install_scripts.sh @@ -113,3 +113,9 @@ nvidia_files=( install_cuda_host_injections.sh link_nvidia_host_libraries.sh ) copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}" + +# Copy over EasyBuild hooks file used for installations +hook_files=( + eb_hooks.py +) +copy_files_by_list ${TOPDIR} ${INSTALL_PREFIX}/init/easybuild "${hook_files[@]}" From da3f5f5d20dc5137a1d255e6bd1a734507e80ad6 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Wed, 8 May 2024 09:53:38 +0000 Subject: [PATCH 29/71] modified changes in EESSI-Extend easyconfig to meet NESSI requirements --- EESSI-extend-2023.06-easybuild.eb | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/EESSI-extend-2023.06-easybuild.eb b/EESSI-extend-2023.06-easybuild.eb index 76e96766f9..1d8362d956 100644 --- a/EESSI-extend-2023.06-easybuild.eb +++ b/EESSI-extend-2023.06-easybuild.eb @@ -1,6 +1,6 @@ easyblock = 'Bundle' -name = 'EESSI-extend' +name = 'NESSI-extend' version = '2023.06' # May have different ways to extend EESSI in future (manually, other tools,...) versionsuffix = '-easybuild' @@ -80,55 +80,55 @@ working_dir = os.getenv("WORKING_DIR") or pathJoin("/tmp", os.getenv("USER")) -- Gather the EPREFIX to use as a sysroot sysroot = os.getenv("EESSI_EPREFIX") -- Use an installation prefix that we _should_ have write access to -if (os.getenv("EESSI_CVMFS_INSTALL") ~= nil) then +if (os.getenv("NESSI_CVMFS_INSTALL") ~= nil) then -- Make sure no other EESSI install environment variables are set - if ((os.getenv("EESSI_SITE_INSTALL") ~= nil) or (os.getenv("EESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("EESSI_USER_INSTALL") ~= nil)) then - LmodError("You cannot use EESSI_CVMFS_INSTALL in combination with any other EESSI_*_INSTALL environment variables") + if ((os.getenv("NESSI_SITE_INSTALL") ~= nil) or (os.getenv("NESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("NESSI_USER_INSTALL") ~= nil)) then + LmodError("You cannot use NESSI_CVMFS_INSTALL in combination with any other NESSI_*_INSTALL environment variables") end eessi_cvmfs_install = true easybuild_installpath = os.getenv("EESSI_SOFTWARE_PATH") -elseif (os.getenv("EESSI_SITE_INSTALL") ~= nil) then +elseif (os.getenv("NESSI_SITE_INSTALL") ~= nil) then -- Make sure no other EESSI install environment variables are set - if ((os.getenv("EESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("EESSI_USER_INSTALL") ~= nil)) then - LmodError("You cannot use EESSI_SITE_INSTALL in combination with any other EESSI_*_INSTALL environment variables") + if ((os.getenv("NESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("NESSI_USER_INSTALL") ~= nil)) then + LmodError("You cannot use NESSI_SITE_INSTALL in combination with any other NESSI_*_INSTALL environment variables") end easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), 'versions', 'host_injections') else -- Deal with user and project installs - project_install = os.getenv("EESSI_PROJECT_INSTALL") + project_install = os.getenv("NESSI_PROJECT_INSTALL") project_modulepath = nil if (project_install ~= nil) then -- Check the folder exists if not isDir(project_install) then - LmodError("The location of EESSI_PROJECT_INSTALL (" .. project_install .. ") does not exist or is not a folder") + LmodError("The location of NESSI_PROJECT_INSTALL (" .. project_install .. ") does not exist or is not a folder") end if (mode() == "load") then - LmodMessage("Configuring for use of EESSI_PROJECT_INSTALL under " .. project_install) + LmodMessage("Configuring for use of NESSI_PROJECT_INSTALL under " .. project_install) end easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), os.getenv("EESSI_CVMFS_REPO"), project_install) project_modulepath = pathJoin(easybuild_installpath, 'modules', 'all') end - user_install = os.getenv("EESSI_USER_INSTALL") + user_install = os.getenv("NESSI_USER_INSTALL") user_modulepath = nil if (user_install ~= nil) then -- Check the folder exists if not isDir(user_install) then - LmodError("The location of EESSI_USER_INSTALL (" .. user_install .. ") does not exist or is not a folder") + LmodError("The location of NESSI_USER_INSTALL (" .. user_install .. ") does not exist or is not a folder") end elseif (user_install == nil) and (project_install == nil) then -- No need to check for existence when we use a HOME subdir - user_install = pathJoin(os.getenv("HOME"), "eessi") + user_install = pathJoin(os.getenv("HOME"), "nessi") end if (user_install ~= nil) then if (mode() == "load") then - LmodMessage("Configuring for use of EESSI_USER_INSTALL under " .. user_install) + LmodMessage("Configuring for use of NESSI_USER_INSTALL under " .. user_install) end easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), os.getenv("EESSI_CVMFS_REPO"), user_install) user_modulepath = pathJoin(easybuild_installpath, 'modules', 'all') end end if (mode() == "load") then - LmodMessage("-- To create installations for EESSI, you _must_ have write permissions to " .. easybuild_installpath) + LmodMessage("-- To create installations for NESSI, you _must_ have write permissions to " .. easybuild_installpath) -- Advise them to reuse sources if (os.getenv("EASYBUILD_SOURCEPATH") == nil) then LmodMessage("-- You may wish to configure a sources directory for EasyBuild (for example, via setting the environment variable EASYBUILD_SOURCEPATH) to allow you to reuse existing sources for packages.") From 4fabf148bc516d7df2cb6837b7d17ce6167a47e8 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Wed, 8 May 2024 10:42:31 +0000 Subject: [PATCH 30/71] bumping scorecard-action version to 2.3.1 --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index dc18fd584a..7eff557094 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -40,7 +40,7 @@ jobs: persist-credentials: false - name: "Run analysis" - uses: ossf/scorecard-action@99c53751e09b9529366343771cc321ec74e9bd3d # v2.0.6 + uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1 with: results_file: results.sarif results_format: sarif From 2eeb6e0f8b703e620230251a9fadc7a89a1dae31 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Wed, 8 May 2024 13:12:45 +0000 Subject: [PATCH 31/71] reverted changes to create_tarball as it will be done in a seperate PR --- create_tarball.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/create_tarball.sh b/create_tarball.sh index 0a7669f73f..2d77acfc43 100755 --- a/create_tarball.sh +++ b/create_tarball.sh @@ -28,7 +28,7 @@ fi overlay_upper_dir="${eessi_tmpdir}/overlay-upper" -software_dir_overlay="${overlay_upper_dir}/versions/${eessi_version}" +software_dir_overlay="${overlay_upper_dir}/versions/${eessi_version}/software/${os}/${cpu_arch_subdir}" if [ ! -d ${software_dir_overlay} ]; then echo "Software directory overlay ${software_dir_overlay} does not exist?!" >&2 exit 3 From cd70e1035343dd4a1f7428ca1ea5e8c135ac0e2e Mon Sep 17 00:00:00 2001 From: Richard Top Date: Wed, 8 May 2024 13:16:55 +0000 Subject: [PATCH 32/71] Modify create_tarball.sh to address changes under init --- create_tarball.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/create_tarball.sh b/create_tarball.sh index 2d77acfc43..0a7669f73f 100755 --- a/create_tarball.sh +++ b/create_tarball.sh @@ -28,7 +28,7 @@ fi overlay_upper_dir="${eessi_tmpdir}/overlay-upper" -software_dir_overlay="${overlay_upper_dir}/versions/${eessi_version}/software/${os}/${cpu_arch_subdir}" +software_dir_overlay="${overlay_upper_dir}/versions/${eessi_version}" if [ ! -d ${software_dir_overlay} ]; then echo "Software directory overlay ${software_dir_overlay} does not exist?!" >&2 exit 3 From 46ef3724b7bfdc9ad8d4b7f46bad7252c3c07aa5 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 8 May 2024 21:57:33 +0200 Subject: [PATCH 33/71] further adjust easyconfig to NESSI --- EESSI-extend-2023.06-easybuild.eb | 41 +++++++++++++++++-------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/EESSI-extend-2023.06-easybuild.eb b/EESSI-extend-2023.06-easybuild.eb index 1d8362d956..ed71ee5b53 100644 --- a/EESSI-extend-2023.06-easybuild.eb +++ b/EESSI-extend-2023.06-easybuild.eb @@ -2,43 +2,46 @@ easyblock = 'Bundle' name = 'NESSI-extend' version = '2023.06' -# May have different ways to extend EESSI in future (manually, other tools,...) +# May have different ways to extend NESSI in the future (manually, other tools,...) versionsuffix = '-easybuild' -homepage = 'https://eessi.io/docs/' +homepage = 'https://documentation.sigma2.no/software/nessi_eessi.html' description = """ - The goal of the European Environment for Scientific Software Installations - (EESSI, pronounced as "easy") is to build a common stack of scientific - software installations for HPC systems and beyond, including laptops, - personal workstations and cloud infrastructure. + NESSI is an innovative service to make optimized scientific software + installations available on any machine anywhere in the world in near + real-time - without the need to build or install the software. NESSI + works similarly to popular streaming services for videos and music. - This module allows you to extend EESSI using the same configuration for - EasyBuild as EESSI itself uses. A number of environment variables control the + NESSI is a sibling of EESSI, the European Environment for Scientific + Software Installations (see https://eessi.io/docs). + + This module allows you to extend NESSI using the same configuration for + EasyBuild as NESSI itself uses. A number of environment variables control the behaviour of the module: - - EESSI_USER_INSTALL can be set to a location to install modules for use by + - NESSI_USER_INSTALL can be set to a location to install modules for use by the user only. The location must already exist on the filesystem. - - EESSI_PROJECT_INSTALL can be set to a location to install modules for use by + - NESSI_PROJECT_INSTALL can be set to a location to install modules for use by a project. The location must already exist on the filesystem and you should ensure that the location has the correct Linux group and the SGID permission - is set on that directory (`chmod g+s $EESSI_PROJECT_INSTALL`) so that all + is set on that directory (`chmod g+s $NESSI_PROJECT_INSTALL`) so that all members of the group have permission to read and write installations. - - EESSI_SITE_INSTALL is either defined or not and cannot be used with another + - NESSI_SITE_INSTALL is either defined or not and cannot be used with another environment variable. A site installation is done in a defined location and any installations there are (by default) world readable. - - EESSI_CVMFS_INSTALL is either defined or not and cannot be used with another + - NESSI_CVMFS_INSTALL is either defined or not and cannot be used with another environment variable. A CVMFS installation targets a defined location which will be ingested into CVMFS and is only useful for CVMFS administrators. - - If none of the environment variables above are defined, an EESSI_USER_INSTALL - is assumed with a value of $HOME/EESSI - If both EESSI_USER_INSTALL and EESSI_PROJECT_INSTALL are defined, both sets of + - If none of the environment variables above are defined, a NESSI_USER_INSTALL + is assumed with a value of $HOME/NESSI + If both NESSI_USER_INSTALL and NESSI_PROJECT_INSTALL are defined, both sets of installations are exposed, but new installations are created as user installations. """ toolchain = SYSTEM -# All the dependencies we filter in EESSI +# All the dependencies we filter in NESSI local_deps_to_filter = "Autoconf,Automake,Autotools,binutils,bzip2,DBus,flex,gettext,gperf,help2man,intltool,libreadline,libtool,M4,makeinfo,ncurses,util-linux,XZ,zlib" local_arch_specific_deps_to_filter = {'aarch64': ',yasm', 'x86_64': ''} local_deps_to_filter += local_arch_specific_deps_to_filter[ARCH] @@ -81,14 +84,14 @@ working_dir = os.getenv("WORKING_DIR") or pathJoin("/tmp", os.getenv("USER")) sysroot = os.getenv("EESSI_EPREFIX") -- Use an installation prefix that we _should_ have write access to if (os.getenv("NESSI_CVMFS_INSTALL") ~= nil) then - -- Make sure no other EESSI install environment variables are set + -- Make sure no other NESSI install environment variables are set if ((os.getenv("NESSI_SITE_INSTALL") ~= nil) or (os.getenv("NESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("NESSI_USER_INSTALL") ~= nil)) then LmodError("You cannot use NESSI_CVMFS_INSTALL in combination with any other NESSI_*_INSTALL environment variables") end eessi_cvmfs_install = true easybuild_installpath = os.getenv("EESSI_SOFTWARE_PATH") elseif (os.getenv("NESSI_SITE_INSTALL") ~= nil) then - -- Make sure no other EESSI install environment variables are set + -- Make sure no other NESSI install environment variables are set if ((os.getenv("NESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("NESSI_USER_INSTALL") ~= nil)) then LmodError("You cannot use NESSI_SITE_INSTALL in combination with any other NESSI_*_INSTALL environment variables") end From b5f3c95d872a5d7267dfc5d3725cccbe9bf3ec6e Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 8 May 2024 21:58:10 +0200 Subject: [PATCH 34/71] adjust creation of SitePackage.lua to NESSI --- create_lmodsitepackage.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py index 28605beea5..f9053cdf9e 100755 --- a/create_lmodsitepackage.py +++ b/create_lmodsitepackage.py @@ -21,19 +21,19 @@ end local function from_eessi_prefix(t) - -- eessi_prefix is the prefix with official EESSI modules - -- e.g. /cvmfs/software.eessi.io/versions/2023.06 + -- eessi_prefix is the prefix with official NESSI modules + -- e.g. /cvmfs/pilot.nessi.no/versions/2023.06 local eessi_prefix = os.getenv("EESSI_PREFIX") - -- If EESSI_PREFIX wasn't defined, we cannot check if this module was from the EESSI environment + -- If EESSI_PREFIX wasn't defined, we cannot check if this module was from the NESSI environment -- In that case, we assume it isn't, otherwise EESSI_PREFIX would (probably) have been set if eessi_prefix == nil then return False else -- NOTE: exact paths for site so may need to be updated later. - -- See https://github.com/EESSI/software-layer/pull/371 + -- See https://github.com/NorESSI/software-layer/pull/358 -- eessi_prefix_host_injections is the prefix with site-extensions (i.e. additional modules) - -- to the official EESSI modules, e.g. /cvmfs/software.eessi.io/host_injections/2023.06 + -- to the official NESSI modules, e.g. /cvmfs/pilot.nessi.no/host_injections/2023.06 local eessi_prefix_host_injections = string.gsub(eessi_prefix, 'versions', 'host_injections') -- Check if the full modulepath starts with the eessi_prefix_* @@ -42,9 +42,9 @@ end local function load_site_specific_hooks() - -- This function will be run after the EESSI hooks are registered + -- This function will be run after the NESSI hooks are registered -- It will load a local SitePackage.lua that is architecture independent (if it exists) from e.g. - -- /cvmfs/software.eessi.io/host_injections/2023.06/.lmod/SitePackage.lua + -- /cvmfs/pilot.nessi.no/host_injections/2023.06/.lmod/SitePackage.lua -- That can define a new hook -- -- function site_specific_load_hook(t) @@ -58,7 +58,7 @@ -- site_specific_load_hook(t) -- end -- - -- Over overwrite the EESSI hook entirely: + -- Or overwrite the NESSI hook entirely: -- -- hook.register("load", final_load_hook) -- @@ -66,7 +66,7 @@ -- See https://github.com/TACC/Lmod/pull/696#issuecomment-1998765722 -- -- Subsequently, this function will look for an architecture-specific SitePackage.lua, e.g. from - -- /cvmfs/software.eessi.io/host_injections/2023.06/software/linux/x86_64/amd/zen2/.lmod/SitePackage.lua + -- /cvmfs/pilot.nessi.no/host_injections/2023.06/software/linux/x86_64/amd/zen2/.lmod/SitePackage.lua -- This can then register an additional hook, e.g. -- -- function arch_specific_load_hook(t) @@ -112,7 +112,7 @@ local simpleName = string.match(t.modFullName, "(.-)/") -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse - -- to load the CUDA module and print an informative message on how to set up GPU support for EESSI + -- to load the CUDA module and print an informative message on how to set up GPU support for NESSI local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" if simpleName == 'CUDA' then -- get the full host_injections path @@ -121,26 +121,26 @@ local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" local cudaDirExists = isDir(cudaEasyBuildDir) if not cudaDirExists then - local advice = "but while the module file exists, the actual software is not entirely shipped with EESSI " - advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where EESSI " + local advice = "but while the module file exists, the actual software is not entirely shipped with NESSI " + advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where NESSI " advice = advice .. "can find it.\\n" advice = advice .. refer_to_docs LmodError("\\nYou requested to load ", simpleName, " ", advice) end end - -- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the EESSI linker, + -- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the NESSI linker, -- otherwise, refuse to load the requested module and print error message local haveGpu = mt:haveProperty(simpleName,"arch","gpu") if haveGpu then local arch = os.getenv("EESSI_CPU_FAMILY") or "" - local cudaVersionFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" - local cudaDriverFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" + local cudaVersionFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" + local cudaDriverFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" local cudaDriverExists = isFile(cudaDriverFile) local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so") if not (cudaDriverExists or singularityCudaExists) then local advice = "which relies on the CUDA runtime environment and driver libraries. " advice = advice .. "In order to be able to use the module, you will need " - advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system.\\n" + advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system.\\n" advice = advice .. refer_to_docs LmodError("\\nYou requested to load ", simpleName, " ", advice) else @@ -162,7 +162,7 @@ if driver_libs_need_update == true then local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". " advice = advice .. "Please update your CUDA driver libraries and then " - advice = advice .. "let EESSI know about the update.\\n" + advice = advice .. "let NESSI know about the update.\\n" advice = advice .. refer_to_docs LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice) end @@ -174,7 +174,7 @@ -- Combine both functions into a single one, as we can only register one function as load hook in lmod -- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed function eessi_load_hook(t) - -- Only apply CUDA hooks if the loaded module is in the EESSI prefix + -- Only apply CUDA hooks if the loaded module is in the NESSI prefix -- This avoids getting an Lmod Error when trying to load a CUDA module from a local software stack if from_eessi_prefix(t) then eessi_cuda_enabled_load_hook(t) @@ -183,7 +183,7 @@ hook.register("load", eessi_load_hook) --- Note that this needs to happen at the end, so that any EESSI specific hooks can be overwritten by the site +-- Note that this needs to happen at the end, so that any NESSI specific hooks can be overwritten by the site load_site_specific_hooks() """ From f57b76dc53e0517f8e2ae405daedad3ee66590e4 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 15 May 2024 19:57:18 +0200 Subject: [PATCH 35/71] drop dependency on cuDNN to builddependency --- eb_hooks.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/eb_hooks.py b/eb_hooks.py index a778f8b7c1..cf1c911b23 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -768,6 +768,25 @@ def inject_gpu_property(ec): ec[key] = '\n'.join([ec_dict[key], value]) else: ec[key] = value + # Check if cuDNN is in the dependencies, if so add the 'gpu' Lmod property + if ('cuDNN' in [dep[0] for dep in iter(ec_dict['dependencies'])]): + ec.log.info("Injecting gpu as Lmod arch property and envvar with cuDNN version") + key = 'modluafooter' + value = 'add_property("arch","gpu")' + cudnn_version = 0 + for dep in iter(ec_dict['dependencies']): + # Make cuDNN a build dependency only (rpathing saves us from link errors) + if 'cuDNN' in dep[0]: + cudnn_version = dep[1] + ec_dict['dependencies'].remove(dep) + if dep not in ec_dict['builddependencies']: + ec_dict['builddependencies'].append(dep) + value = '\n'.join([value, 'setenv("EESSICUDNNVERSION","%s")' % cudnn_version]) + if key in ec_dict: + if not value in ec_dict[key]: + ec[key] = '\n'.join([ec_dict[key], value]) + else: + ec[key] = value return ec From e5f9fa719aad28a26d4e6da8925b910f9a3e360c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 15 May 2024 20:03:17 +0200 Subject: [PATCH 36/71] Lmod hook for cuDNN --- create_lmodsitepackage.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py index f9053cdf9e..5b32578d24 100755 --- a/create_lmodsitepackage.py +++ b/create_lmodsitepackage.py @@ -171,13 +171,38 @@ end end +local function eessi_cudnn_enabled_load_hook(t) + local frameStk = require("FrameStk"):singleton() + local mt = frameStk:mt() + local simpleName = string.match(t.modFullName, "(.-)/") + -- If we try to load cuDNN itself, check if the full cuDNN package was installed on the host in host_injections. + -- This is required for end users to build additional cuDNN dependent software. If the full SDK isn't present, refuse + -- to load the cuDNN module and print an informative message on how to set up GPU support for NESSI + local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" + if simpleName == 'cuDNN' then + -- get the full host_injections path + local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') + -- build final path where the cuDNN software should be installed + local cudnnEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" + local cudnnDirExists = isDir(cudnnEasyBuildDir) + if not cudnnDirExists then + local advice = "but while the module file exists, the actual software is not entirely shipped with NESSI " + advice = advice .. "due to licencing. You will need to install a full copy of the cuDNN package where NESSI " + advice = advice .. "can find it.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYou requested to load ", simpleName, " ", advice) + end + end +end + -- Combine both functions into a single one, as we can only register one function as load hook in lmod -- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed function eessi_load_hook(t) - -- Only apply CUDA hooks if the loaded module is in the NESSI prefix - -- This avoids getting an Lmod Error when trying to load a CUDA module from a local software stack + -- Only apply CUDA and cuDNN hooks if the loaded module is in the NESSI prefix + -- This avoids getting an Lmod Error when trying to load a CUDA or cuDNN module from a local software stack if from_eessi_prefix(t) then eessi_cuda_enabled_load_hook(t) + eessi_cudnn_enabled_load_hook(t) end end From fddacc697a94c442322e0530d44a754fb2c4320d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 15 May 2024 20:09:01 +0200 Subject: [PATCH 37/71] add psm2 to filter-deps --- configure_easybuild | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/configure_easybuild b/configure_easybuild index c1bd1d390b..16d07ab6ba 100644 --- a/configure_easybuild +++ b/configure_easybuild @@ -33,6 +33,12 @@ if [[ "$EESSI_CPU_FAMILY" == "aarch64" ]]; then DEPS_TO_FILTER="${DEPS_TO_FILTER},Yasm" fi +# Version 2023.06 of NESSI ships PSM2 in the compat layer, so we can filter this out while retaining support for OFA fabric +# (longer term this is probably not the right move as PSM2 should be configured with accelerator support, hence the restricted version) +if [[ "$EESSI_VERSION" == "2023.06" ]]; then + DEPS_TO_FILTER="${DEPS_TO_FILTER},PSM2" +fi + export EASYBUILD_FILTER_DEPS=$DEPS_TO_FILTER export EASYBUILD_MODULE_EXTENSIONS=1 From 72b9032981a53491a3c80769eafff773bc646419 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 15 May 2024 20:25:04 +0200 Subject: [PATCH 38/71] add rebuild for GROMACS and additional build dep --- ...b-4.9.1-GROMACS-correct-gmxapi-version.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 easystacks/pilot.nessi.no/2023.06/rebuilds/20240515-eb-4.9.1-GROMACS-correct-gmxapi-version.yml diff --git a/easystacks/pilot.nessi.no/2023.06/rebuilds/20240515-eb-4.9.1-GROMACS-correct-gmxapi-version.yml b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240515-eb-4.9.1-GROMACS-correct-gmxapi-version.yml new file mode 100644 index 0000000000..0a51c9ab92 --- /dev/null +++ b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240515-eb-4.9.1-GROMACS-correct-gmxapi-version.yml @@ -0,0 +1,19 @@ +# 2024.05.15 +# Originally shipped version forgot to bump the gmxapi version and source +# tarball, it was still using an older version from the 2023.3 tarball. Looking +# at https://gitlab.com/gromacs/gromacs/-/blob/v2024.1/python_packaging/gmxapi/src/gmxapi/version.py?ref_type=tags#L68, +# the 2024.1 release includes gmxapi 0.5.0. +# +# This also introduced a new build dependency on scikit-build-core for GROMACS +# +easyconfigs: + - scikit-build-core-0.9.3-GCCcore-13.2.0.eb: + options: + # from-commit: 61d07bff09afe63cfe1ae35dc58a0c8be01eed62 + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20526 + from-pr: 20526 + - GROMACS-2024.1-foss-2023b.eb: + options: + # from-commit: a0a467a88506c765a93a96b20d7a8fcb01d46b24 + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20522 + from-pr: 20522 From 6ab074cefec1d55ebe6d7d0fa05de875f32ff429 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 15 May 2024 23:19:46 +0200 Subject: [PATCH 39/71] use v0.2.0 of test suite --- run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_tests.sh b/run_tests.sh index 1dbb47db9d..ec44e22ade 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -18,7 +18,7 @@ base_dir=$(dirname $(realpath $0)) source ${base_dir}/init/eessi_defaults # Git clone has to be run in compat layer, to make the git command available -./run_in_compat_layer_env.sh "git clone https://github.com/EESSI/test-suite EESSI-test-suite" +./run_in_compat_layer_env.sh "git clone -b v0.2.0 https://github.com/EESSI/test-suite EESSI-test-suite" # Run the test suite ./test_suite.sh "$@" From 9a09643e88a59199a22273b3dbafbf85c8ae02df Mon Sep 17 00:00:00 2001 From: Richard Top Date: Thu, 16 May 2024 09:52:27 +0000 Subject: [PATCH 40/71] Allow overriding the Lmod GPU driver check --- EESSI-install-software.sh | 3 +++ create_lmodsitepackage.py | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 6c680571e2..c81e9e72c7 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -214,6 +214,9 @@ fi # if not, an error is produced, and the bot flags the whole build as failed (even when not installing GPU software) # ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh +# Don't run the Lmod GPU driver check when doing builds (may not have a GPU, and it's not relevant for vanilla builds anyway) +export EESSI_OVERRIDE_GPU_CHECK=1 + # use PR patch file to determine in which easystack files stuff was added changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing') if [ -z "${changed_easystacks}" ]; then diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py index f9053cdf9e..76ab646b9d 100755 --- a/create_lmodsitepackage.py +++ b/create_lmodsitepackage.py @@ -130,8 +130,9 @@ end -- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the NESSI linker, -- otherwise, refuse to load the requested module and print error message - local haveGpu = mt:haveProperty(simpleName,"arch","gpu") - if haveGpu then + local checkGpu = mt:haveProperty(simpleName,"arch","gpu") + local overrideGpuCheck = os.getenv("EESSI_OVERRIDE_GPU_CHECK") + if checkGpu and (overrideGpuCheck == nil) then local arch = os.getenv("EESSI_CPU_FAMILY") or "" local cudaVersionFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" local cudaDriverFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" @@ -140,7 +141,9 @@ if not (cudaDriverExists or singularityCudaExists) then local advice = "which relies on the CUDA runtime environment and driver libraries. " advice = advice .. "In order to be able to use the module, you will need " - advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system.\\n" + advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system. You can " + advice = advice .. "override this check by setting the environment variable EESSI_OVERRIDE_GPU_CHECK but " + advice = advice .. "the loaded application will not be able to execute on your system.\\n" advice = advice .. refer_to_docs LmodError("\\nYou requested to load ", simpleName, " ", advice) else From 0205e890f5810ab46836372798880b99a3e57edd Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 16 May 2024 19:50:48 +0200 Subject: [PATCH 41/71] copy cuDNN install file and fix name --- EESSI-install-software.sh | 2 +- install_scripts.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 4e06abf3d0..d840910516 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -205,7 +205,7 @@ ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} # Allow skipping CUDA SDK install in e.g. CI environments if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula - ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cudnn_host_injections.sh -c 12.1.1 -d 8.9.2.26 + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh -c 12.1.1 -d 8.9.2.26 else echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed" fi diff --git a/install_scripts.sh b/install_scripts.sh index 17f0b81008..8bbcb6a7bf 100755 --- a/install_scripts.sh +++ b/install_scripts.sh @@ -110,7 +110,7 @@ copy_files_by_list ${TOPDIR}/scripts ${INSTALL_PREFIX}/scripts "${script_files[@ # Copy files for the scripts/gpu_support/nvidia directory nvidia_files=( - install_cuda_host_injections.sh link_nvidia_host_libraries.sh + install_cuda_host_injections.sh install_cuDNN_host_injections.sh link_nvidia_host_libraries.sh ) copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}" From 020c2332f98b2c566d3a19bd783a635df586cfab Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 16:20:32 +0200 Subject: [PATCH 42/71] use NESSI_SITE_INSTALL when it is set --- EESSI-extend-2023.06-easybuild.eb | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/EESSI-extend-2023.06-easybuild.eb b/EESSI-extend-2023.06-easybuild.eb index ed71ee5b53..d514293706 100644 --- a/EESSI-extend-2023.06-easybuild.eb +++ b/EESSI-extend-2023.06-easybuild.eb @@ -95,7 +95,19 @@ elseif (os.getenv("NESSI_SITE_INSTALL") ~= nil) then if ((os.getenv("NESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("NESSI_USER_INSTALL") ~= nil)) then LmodError("You cannot use NESSI_SITE_INSTALL in combination with any other NESSI_*_INSTALL environment variables") end - easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), 'versions', 'host_injections') + site_install = os.getenv("NESSI_SITE_INSTALL") + site_modulepath = nil + if (site_install ~= nil) then + -- Check the folder exists + if not isDir(site_install) then + LmodError("The location of NESSI_SITE_INSTALL (" .. site_install .. ") does not exist or is not a folder") + end + if (mode() == "load") then + LmodMessage("Configuring for use of NESSI_SITE_INSTALL under " .. site_install) + end + easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), os.getenv("EESSI_CVMFS_REPO"), site_install) + site_modulepath = pathJoin(easybuild_installpath, 'modules', 'all') + end else -- Deal with user and project installs project_install = os.getenv("NESSI_PROJECT_INSTALL") From e250652be692576fd6e2b4e348032791f16d2b84 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 16:59:06 +0200 Subject: [PATCH 43/71] rebuild NESSI-extend module --- .../2023.06/rebuilds/20240519-update-NESSI-extend-module.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-update-NESSI-extend-module.yml diff --git a/easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-update-NESSI-extend-module.yml b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-update-NESSI-extend-module.yml new file mode 100644 index 0000000000..fbb323ff2e --- /dev/null +++ b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-update-NESSI-extend-module.yml @@ -0,0 +1,5 @@ +# 2024-05-19 +# Rebuild NESSI-extend/2023.06-easybuild +# The current version does not handle NESSI_SITE_INSTALL correctly. +easyconfigs: + - EESSI-extend-2023.06-easybuild.eb From 0bd90fa141564d0deabb3af15a0380ffa2656fac Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 18:27:42 +0200 Subject: [PATCH 44/71] rename rebuild easystack file --- ...dule.yml => 20240519-eb-4.9.1-rebuild-NESSI-extend-module.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename easystacks/pilot.nessi.no/2023.06/rebuilds/{20240519-update-NESSI-extend-module.yml => 20240519-eb-4.9.1-rebuild-NESSI-extend-module.yml} (100%) diff --git a/easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-update-NESSI-extend-module.yml b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-eb-4.9.1-rebuild-NESSI-extend-module.yml similarity index 100% rename from easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-update-NESSI-extend-module.yml rename to easystacks/pilot.nessi.no/2023.06/rebuilds/20240519-eb-4.9.1-rebuild-NESSI-extend-module.yml From a261b4c133ff4eab789a36b7cf824beae57f3dad Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 18:51:43 +0200 Subject: [PATCH 45/71] drop extra lowerdir parameter --- eessi_container.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi_container.sh b/eessi_container.sh index ad9397318a..962ce2c101 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -625,7 +625,7 @@ if [[ "${ACCESS}" == "rw" ]]; then EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}") EESSI_WRITABLE_OVERLAY="container:fuse-overlayfs" - EESSI_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${repo_name}" + # EESSI_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${repo_name}" if [[ ! -z ${LOWER_DIRS} ]]; then # need to convert ':' in LOWER_DIRS to ',' because bind mounts use ',' as # separator while the lowerdir overlayfs option uses ':' From 22c5cd4a98baa10a945da8cc9e492d47ceec8a39 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 19:01:08 +0200 Subject: [PATCH 46/71] show contents of extra lowerdir --- bot/build.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bot/build.sh b/bot/build.sh index 23f5fd952b..0d9a314a4c 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -231,6 +231,8 @@ else chmod u+rw ${STORAGE}/lower_dirs/${remove_file} done + ls -lR ${STORAGE}/lower_dirs + # prepare directory to store tarball of tmp for removal and build steps TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} From 27ca2fafbde0f3e7e79b0c0a8bbf066c98faa213 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 20:26:09 +0200 Subject: [PATCH 47/71] use lower dirs also for build step --- bot/build.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bot/build.sh b/bot/build.sh index 0d9a314a4c..2a690ecb20 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -272,6 +272,9 @@ BUILD_STEP_ARGS+=("--nvidia" "all") if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi +if [[ ! -z ${LOWER_DIRS} ]]; then + BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") +fi # create tmp file for output of build step build_outerr=$(mktemp build.outerr.XXXX) From 51671ee5055d75d543c6202127ac2f279c60d42f Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 20:48:41 +0200 Subject: [PATCH 48/71] list directory contents --- EESSI-install-software.sh | 2 ++ EESSI-remove-software.sh | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index d840910516..4c80a2649a 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -245,6 +245,8 @@ else if [ -f ${easystack_file} ]; then echo_green "Feeding easystack file ${easystack_file} to EasyBuild..." + ls -lisaR /cvmfs/pilot.nessi.no/versions/2023.06/software/linux/x86_64/amd/zen2/software/NESSI-extend + ${EB} --easystack ${TOPDIR}/${easystack_file} --robot ec=$? diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index 651a22f311..e464a586c6 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -112,9 +112,13 @@ else for app in ${rebuild_apps}; do app_dir=${EASYBUILD_INSTALLPATH}/software/${app} app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua + ls -lisaR ${app_dir} + ls -lisaR ${app_module} echo_yellow "Removing ${app_dir} and ${app_module}..." rm -rdfv ${app_dir} rm -rdfv ${app_module} + ls -lisaR ${app_dir} + ls -lisaR ${app_module} done else fatal_error "Easystack file ${easystack_file} not found!" From a960a5fecbd76f9330665a6f0f61ab0a564b26cc Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 22:02:45 +0200 Subject: [PATCH 49/71] create copy of lower dirs (dirs only) + skip test step --- bot/{test.sh => _test.sh} | 0 bot/build.sh | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) rename bot/{test.sh => _test.sh} (100%) diff --git a/bot/test.sh b/bot/_test.sh similarity index 100% rename from bot/test.sh rename to bot/_test.sh diff --git a/bot/build.sh b/bot/build.sh index 2a690ecb20..c5fc9bb8a5 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -273,7 +273,20 @@ if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi if [[ ! -z ${LOWER_DIRS} ]]; then - BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") + # make copy of LOWER_DIRS but only retain directories + lower_parent_dir=$(dirname ${LOWER_DIRS}) + the_lower_dir=$(basename ${LOWER_DIRS}) + LOWER_DIRS_ONLY="${lower_parent_dir}/${the_lower_dir}_2" + mkdir -p ${LOWER_DIRS_ONLY} + echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" + ls -lisaR ${LOWER_DIRS_ONLY} + cp -a ${LOWER_DIRS}/ ${LOWER_DIRS_ONLY} + echo "contents of LOWER_DIRS_ONLY (after cp -a)" + ls -lisaR ${LOWER_DIRS_ONLY} + find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; + echo "contents of LOWER_DIRS_ONLY (find ... rm)" + ls -lisaR ${LOWER_DIRS_ONLY} + BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS_ONLY}") fi # create tmp file for output of build step From 57555acd4a5ba98a0d5d7e04a6b01854a2a33ac7 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 22:14:20 +0200 Subject: [PATCH 50/71] fix copy command --- bot/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/build.sh b/bot/build.sh index c5fc9bb8a5..a12d3669fc 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -280,7 +280,7 @@ if [[ ! -z ${LOWER_DIRS} ]]; then mkdir -p ${LOWER_DIRS_ONLY} echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" ls -lisaR ${LOWER_DIRS_ONLY} - cp -a ${LOWER_DIRS}/ ${LOWER_DIRS_ONLY} + cp -a ${LOWER_DIRS}/. ${LOWER_DIRS_ONLY} echo "contents of LOWER_DIRS_ONLY (after cp -a)" ls -lisaR ${LOWER_DIRS_ONLY} find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; From 9f41c8eebe90a5589dd289707d6562659b59d011 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 22:41:39 +0200 Subject: [PATCH 51/71] move removal step into installation script --- EESSI-install-software.sh | 40 +++++++++++++++++++ bot/build.sh | 81 ++++++++++++++++++++------------------- 2 files changed, 82 insertions(+), 39 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 4c80a2649a..1d0f3ed470 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -218,6 +218,46 @@ fi # Don't run the Lmod GPU driver check when doing builds (may not have a GPU, and it's not relevant for vanilla builds anyway) export EESSI_OVERRIDE_GPU_CHECK=1 +# before we actually install software, we need to remove software that is requested +# to be rebuilt (need to do this here because installations of software are read-only; +# also, it should be done in the same container run or fuse-overlayfs might get confused) +changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") +if [ -z ${changed_easystacks_rebuilds} ]; then + echo "No software needs to be removed." +else + for easystack_file in ${changed_easystacks_rebuilds}; do + # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file + eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g') + + # load EasyBuild module (will be installed if it's not available yet) + source ${TOPDIR}/load_easybuild_module.sh ${eb_version} + + if [ -f ${easystack_file} ]; then + echo_green "Software rebuild(s) requested in ${easystack_file}, so" + echo_green " determining which existing installation have to be removed (assuming contents" + echo_green " have been made writable/deletable)..." + # we need to remove existing installation directories first, + # so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R]) + # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) + # rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') + rebuild_apps=$(eb --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') + for app in ${rebuild_apps}; do + app_dir=${EASYBUILD_INSTALLPATH}/software/${app} + app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua + ls -lisaR ${app_dir} + ls -lisaR ${app_module} + echo_yellow "Removing ${app_dir} and ${app_module}..." + rm -rdfv ${app_dir} + rm -rdfv ${app_module} + ls -lisaR ${app_dir} + ls -lisaR ${app_module} + done + else + fatal_error "Easystack file ${easystack_file} not found!" + fi + done +fi + # use PR patch file to determine in which easystack files stuff was added changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing') if [ -z "${changed_easystacks}" ]; then diff --git a/bot/build.sh b/bot/build.sh index a12d3669fc..2b08a8599c 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -233,31 +233,33 @@ else ls -lR ${STORAGE}/lower_dirs - # prepare directory to store tarball of tmp for removal and build steps - TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step - mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} - - # prepare arguments to eessi_container.sh specific to remove step - declare -a REMOVAL_STEP_ARGS=() - REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") - REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") - if [[ ! -z ${LOWER_DIRS} ]]; then - REMOVAL_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") - fi - - # create tmp file for output of removal step - removal_outerr=$(mktemp remove.outerr.XXXX) - - echo "Executing command to remove software:" - echo "./eessi_container.sh ${COMMON_ARGS[@]} ${REMOVAL_STEP_ARGS[@]}" - echo " -- ./EESSI-remove-software.sh \"${REMOVAL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${removal_outerr}" - ./eessi_container.sh "${COMMON_ARGS[@]}" "${REMOVAL_STEP_ARGS[@]}" \ - -- ./EESSI-remove-software.sh "${REMOVAL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${removal_outerr} - - # make sure that the build step resumes from the same temporary directory - # this is important, as otherwise the removed software will still be there - REMOVAL_TMPDIR=$(grep ' as tmp directory ' ${removal_outerr} | cut -d ' ' -f 2) - BUILD_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") +# # prepare directory to store tarball of tmp for removal and build steps +# TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step +# mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} +# +#### +# # prepare arguments to eessi_container.sh specific to remove step +# declare -a REMOVAL_STEP_ARGS=() +# REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") +# REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") +# if [[ ! -z ${LOWER_DIRS} ]]; then +# REMOVAL_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") +# fi +# +# # create tmp file for output of removal step +# removal_outerr=$(mktemp remove.outerr.XXXX) +# +# echo "Executing command to remove software:" +# echo "./eessi_container.sh ${COMMON_ARGS[@]} ${REMOVAL_STEP_ARGS[@]}" +# echo " -- ./EESSI-remove-software.sh \"${REMOVAL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${removal_outerr}" +# ./eessi_container.sh "${COMMON_ARGS[@]}" "${REMOVAL_STEP_ARGS[@]}" \ +# -- ./EESSI-remove-software.sh "${REMOVAL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${removal_outerr} +# +# # make sure that the build step resumes from the same temporary directory +# # this is important, as otherwise the removed software will still be there +# REMOVAL_TMPDIR=$(grep ' as tmp directory ' ${removal_outerr} | cut -d ' ' -f 2) +# BUILD_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") +#### fi # prepare directory to store tarball of tmp for build step @@ -273,20 +275,21 @@ if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi if [[ ! -z ${LOWER_DIRS} ]]; then - # make copy of LOWER_DIRS but only retain directories - lower_parent_dir=$(dirname ${LOWER_DIRS}) - the_lower_dir=$(basename ${LOWER_DIRS}) - LOWER_DIRS_ONLY="${lower_parent_dir}/${the_lower_dir}_2" - mkdir -p ${LOWER_DIRS_ONLY} - echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" - ls -lisaR ${LOWER_DIRS_ONLY} - cp -a ${LOWER_DIRS}/. ${LOWER_DIRS_ONLY} - echo "contents of LOWER_DIRS_ONLY (after cp -a)" - ls -lisaR ${LOWER_DIRS_ONLY} - find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; - echo "contents of LOWER_DIRS_ONLY (find ... rm)" - ls -lisaR ${LOWER_DIRS_ONLY} - BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS_ONLY}") +# # make copy of LOWER_DIRS but only retain directories +# lower_parent_dir=$(dirname ${LOWER_DIRS}) +# the_lower_dir=$(basename ${LOWER_DIRS}) +# LOWER_DIRS_ONLY="${lower_parent_dir}/${the_lower_dir}_2" +# mkdir -p ${LOWER_DIRS_ONLY} +# echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" +# ls -lisaR ${LOWER_DIRS_ONLY} +# cp -a ${LOWER_DIRS}/. ${LOWER_DIRS_ONLY} +# echo "contents of LOWER_DIRS_ONLY (after cp -a)" +# ls -lisaR ${LOWER_DIRS_ONLY} +# find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; +# echo "contents of LOWER_DIRS_ONLY (find ... rm)" +# ls -lisaR ${LOWER_DIRS_ONLY} +# BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS_ONLY}") + BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") fi # create tmp file for output of build step From a25cc1af28633dbff91037f6c009518b49cd1292 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:13:06 +0200 Subject: [PATCH 52/71] Revert "move removal step into installation script" This reverts commit 9f41c8eebe90a5589dd289707d6562659b59d011. --- EESSI-install-software.sh | 40 ------------------- bot/build.sh | 81 +++++++++++++++++++-------------------- 2 files changed, 39 insertions(+), 82 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 1d0f3ed470..4c80a2649a 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -218,46 +218,6 @@ fi # Don't run the Lmod GPU driver check when doing builds (may not have a GPU, and it's not relevant for vanilla builds anyway) export EESSI_OVERRIDE_GPU_CHECK=1 -# before we actually install software, we need to remove software that is requested -# to be rebuilt (need to do this here because installations of software are read-only; -# also, it should be done in the same container run or fuse-overlayfs might get confused) -changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") -if [ -z ${changed_easystacks_rebuilds} ]; then - echo "No software needs to be removed." -else - for easystack_file in ${changed_easystacks_rebuilds}; do - # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file - eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g') - - # load EasyBuild module (will be installed if it's not available yet) - source ${TOPDIR}/load_easybuild_module.sh ${eb_version} - - if [ -f ${easystack_file} ]; then - echo_green "Software rebuild(s) requested in ${easystack_file}, so" - echo_green " determining which existing installation have to be removed (assuming contents" - echo_green " have been made writable/deletable)..." - # we need to remove existing installation directories first, - # so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R]) - # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) - # rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') - rebuild_apps=$(eb --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') - for app in ${rebuild_apps}; do - app_dir=${EASYBUILD_INSTALLPATH}/software/${app} - app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua - ls -lisaR ${app_dir} - ls -lisaR ${app_module} - echo_yellow "Removing ${app_dir} and ${app_module}..." - rm -rdfv ${app_dir} - rm -rdfv ${app_module} - ls -lisaR ${app_dir} - ls -lisaR ${app_module} - done - else - fatal_error "Easystack file ${easystack_file} not found!" - fi - done -fi - # use PR patch file to determine in which easystack files stuff was added changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing') if [ -z "${changed_easystacks}" ]; then diff --git a/bot/build.sh b/bot/build.sh index 2b08a8599c..a12d3669fc 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -233,33 +233,31 @@ else ls -lR ${STORAGE}/lower_dirs -# # prepare directory to store tarball of tmp for removal and build steps -# TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step -# mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} -# -#### -# # prepare arguments to eessi_container.sh specific to remove step -# declare -a REMOVAL_STEP_ARGS=() -# REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") -# REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") -# if [[ ! -z ${LOWER_DIRS} ]]; then -# REMOVAL_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") -# fi -# -# # create tmp file for output of removal step -# removal_outerr=$(mktemp remove.outerr.XXXX) -# -# echo "Executing command to remove software:" -# echo "./eessi_container.sh ${COMMON_ARGS[@]} ${REMOVAL_STEP_ARGS[@]}" -# echo " -- ./EESSI-remove-software.sh \"${REMOVAL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${removal_outerr}" -# ./eessi_container.sh "${COMMON_ARGS[@]}" "${REMOVAL_STEP_ARGS[@]}" \ -# -- ./EESSI-remove-software.sh "${REMOVAL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${removal_outerr} -# -# # make sure that the build step resumes from the same temporary directory -# # this is important, as otherwise the removed software will still be there -# REMOVAL_TMPDIR=$(grep ' as tmp directory ' ${removal_outerr} | cut -d ' ' -f 2) -# BUILD_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") -#### + # prepare directory to store tarball of tmp for removal and build steps + TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step + mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} + + # prepare arguments to eessi_container.sh specific to remove step + declare -a REMOVAL_STEP_ARGS=() + REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") + REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") + if [[ ! -z ${LOWER_DIRS} ]]; then + REMOVAL_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") + fi + + # create tmp file for output of removal step + removal_outerr=$(mktemp remove.outerr.XXXX) + + echo "Executing command to remove software:" + echo "./eessi_container.sh ${COMMON_ARGS[@]} ${REMOVAL_STEP_ARGS[@]}" + echo " -- ./EESSI-remove-software.sh \"${REMOVAL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${removal_outerr}" + ./eessi_container.sh "${COMMON_ARGS[@]}" "${REMOVAL_STEP_ARGS[@]}" \ + -- ./EESSI-remove-software.sh "${REMOVAL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${removal_outerr} + + # make sure that the build step resumes from the same temporary directory + # this is important, as otherwise the removed software will still be there + REMOVAL_TMPDIR=$(grep ' as tmp directory ' ${removal_outerr} | cut -d ' ' -f 2) + BUILD_STEP_ARGS+=("--resume" "${REMOVAL_TMPDIR}") fi # prepare directory to store tarball of tmp for build step @@ -275,21 +273,20 @@ if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi if [[ ! -z ${LOWER_DIRS} ]]; then -# # make copy of LOWER_DIRS but only retain directories -# lower_parent_dir=$(dirname ${LOWER_DIRS}) -# the_lower_dir=$(basename ${LOWER_DIRS}) -# LOWER_DIRS_ONLY="${lower_parent_dir}/${the_lower_dir}_2" -# mkdir -p ${LOWER_DIRS_ONLY} -# echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" -# ls -lisaR ${LOWER_DIRS_ONLY} -# cp -a ${LOWER_DIRS}/. ${LOWER_DIRS_ONLY} -# echo "contents of LOWER_DIRS_ONLY (after cp -a)" -# ls -lisaR ${LOWER_DIRS_ONLY} -# find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; -# echo "contents of LOWER_DIRS_ONLY (find ... rm)" -# ls -lisaR ${LOWER_DIRS_ONLY} -# BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS_ONLY}") - BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") + # make copy of LOWER_DIRS but only retain directories + lower_parent_dir=$(dirname ${LOWER_DIRS}) + the_lower_dir=$(basename ${LOWER_DIRS}) + LOWER_DIRS_ONLY="${lower_parent_dir}/${the_lower_dir}_2" + mkdir -p ${LOWER_DIRS_ONLY} + echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" + ls -lisaR ${LOWER_DIRS_ONLY} + cp -a ${LOWER_DIRS}/. ${LOWER_DIRS_ONLY} + echo "contents of LOWER_DIRS_ONLY (after cp -a)" + ls -lisaR ${LOWER_DIRS_ONLY} + find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; + echo "contents of LOWER_DIRS_ONLY (find ... rm)" + ls -lisaR ${LOWER_DIRS_ONLY} + BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS_ONLY}") fi # create tmp file for output of build step From 1305649e986e4f743ccb418206461a06fa80d0b2 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:13:21 +0200 Subject: [PATCH 53/71] Revert "fix copy command" This reverts commit 57555acd4a5ba98a0d5d7e04a6b01854a2a33ac7. --- bot/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/build.sh b/bot/build.sh index a12d3669fc..c5fc9bb8a5 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -280,7 +280,7 @@ if [[ ! -z ${LOWER_DIRS} ]]; then mkdir -p ${LOWER_DIRS_ONLY} echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" ls -lisaR ${LOWER_DIRS_ONLY} - cp -a ${LOWER_DIRS}/. ${LOWER_DIRS_ONLY} + cp -a ${LOWER_DIRS}/ ${LOWER_DIRS_ONLY} echo "contents of LOWER_DIRS_ONLY (after cp -a)" ls -lisaR ${LOWER_DIRS_ONLY} find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; From fa4b77371d2d57133fc7b7ae73a1d6696bf3af7e Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:13:23 +0200 Subject: [PATCH 54/71] Revert "create copy of lower dirs (dirs only) + skip test step" This reverts commit a960a5fecbd76f9330665a6f0f61ab0a564b26cc. --- bot/build.sh | 15 +-------------- bot/{_test.sh => test.sh} | 0 2 files changed, 1 insertion(+), 14 deletions(-) rename bot/{_test.sh => test.sh} (100%) diff --git a/bot/build.sh b/bot/build.sh index c5fc9bb8a5..2a690ecb20 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -273,20 +273,7 @@ if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi if [[ ! -z ${LOWER_DIRS} ]]; then - # make copy of LOWER_DIRS but only retain directories - lower_parent_dir=$(dirname ${LOWER_DIRS}) - the_lower_dir=$(basename ${LOWER_DIRS}) - LOWER_DIRS_ONLY="${lower_parent_dir}/${the_lower_dir}_2" - mkdir -p ${LOWER_DIRS_ONLY} - echo "contents of LOWER_DIRS_ONLY (after mkdir -p)" - ls -lisaR ${LOWER_DIRS_ONLY} - cp -a ${LOWER_DIRS}/ ${LOWER_DIRS_ONLY} - echo "contents of LOWER_DIRS_ONLY (after cp -a)" - ls -lisaR ${LOWER_DIRS_ONLY} - find ${LOWER_DIRS_ONLY} -type f -exec rm {} \; - echo "contents of LOWER_DIRS_ONLY (find ... rm)" - ls -lisaR ${LOWER_DIRS_ONLY} - BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS_ONLY}") + BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") fi # create tmp file for output of build step diff --git a/bot/_test.sh b/bot/test.sh similarity index 100% rename from bot/_test.sh rename to bot/test.sh From 2d14ffebe6d44330197fc74bd867485d3e86e9f4 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:13:25 +0200 Subject: [PATCH 55/71] Revert "list directory contents" This reverts commit 51671ee5055d75d543c6202127ac2f279c60d42f. --- EESSI-install-software.sh | 2 -- EESSI-remove-software.sh | 4 ---- 2 files changed, 6 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 4c80a2649a..d840910516 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -245,8 +245,6 @@ else if [ -f ${easystack_file} ]; then echo_green "Feeding easystack file ${easystack_file} to EasyBuild..." - ls -lisaR /cvmfs/pilot.nessi.no/versions/2023.06/software/linux/x86_64/amd/zen2/software/NESSI-extend - ${EB} --easystack ${TOPDIR}/${easystack_file} --robot ec=$? diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index e464a586c6..651a22f311 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -112,13 +112,9 @@ else for app in ${rebuild_apps}; do app_dir=${EASYBUILD_INSTALLPATH}/software/${app} app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua - ls -lisaR ${app_dir} - ls -lisaR ${app_module} echo_yellow "Removing ${app_dir} and ${app_module}..." rm -rdfv ${app_dir} rm -rdfv ${app_module} - ls -lisaR ${app_dir} - ls -lisaR ${app_module} done else fatal_error "Easystack file ${easystack_file} not found!" From d3cf7065e40d3595b3126b29f2a99f0caf33b2f4 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:13:27 +0200 Subject: [PATCH 56/71] Revert "use lower dirs also for build step" This reverts commit 27ca2fafbde0f3e7e79b0c0a8bbf066c98faa213. --- bot/build.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index 2a690ecb20..0d9a314a4c 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -272,9 +272,6 @@ BUILD_STEP_ARGS+=("--nvidia" "all") if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi -if [[ ! -z ${LOWER_DIRS} ]]; then - BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") -fi # create tmp file for output of build step build_outerr=$(mktemp build.outerr.XXXX) From 2afb50c87b31c05c48f9ebf7753855bf34270e2f Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:21:07 +0200 Subject: [PATCH 57/71] less noise --- bot/build.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index 0d9a314a4c..23f5fd952b 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -231,8 +231,6 @@ else chmod u+rw ${STORAGE}/lower_dirs/${remove_file} done - ls -lR ${STORAGE}/lower_dirs - # prepare directory to store tarball of tmp for removal and build steps TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} From 5e4e2940ab49fffa9e44037a40d8b957cd27539d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 19 May 2024 23:22:47 +0200 Subject: [PATCH 58/71] only user lowerdir arg once --- eessi_container.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/eessi_container.sh b/eessi_container.sh index 962ce2c101..c9ed97e5c6 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -625,7 +625,6 @@ if [[ "${ACCESS}" == "rw" ]]; then EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}") EESSI_WRITABLE_OVERLAY="container:fuse-overlayfs" - # EESSI_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${repo_name}" if [[ ! -z ${LOWER_DIRS} ]]; then # need to convert ':' in LOWER_DIRS to ',' because bind mounts use ',' as # separator while the lowerdir overlayfs option uses ':' From 05773d3e5155d8d86d5175d38e9888e45909fb0d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 20 May 2024 19:53:21 +0200 Subject: [PATCH 59/71] Build NESSI-extend from scratch --- .../20240520-eb-4.9.1-rebuild-NESSI-extend-module.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 easystacks/pilot.nessi.no/2023.06/rebuilds/20240520-eb-4.9.1-rebuild-NESSI-extend-module.yml diff --git a/easystacks/pilot.nessi.no/2023.06/rebuilds/20240520-eb-4.9.1-rebuild-NESSI-extend-module.yml b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240520-eb-4.9.1-rebuild-NESSI-extend-module.yml new file mode 100644 index 0000000000..76ba2740c2 --- /dev/null +++ b/easystacks/pilot.nessi.no/2023.06/rebuilds/20240520-eb-4.9.1-rebuild-NESSI-extend-module.yml @@ -0,0 +1,5 @@ +# 2024-05-20 +# Rebuild NESSI-extend/2023.06-easybuild +# Need to revert to the original version. +easyconfigs: + - EESSI-extend-2023.06-easybuild.eb From b18ada9559af257ebd93fc55525c59be136e0555 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 21 May 2024 23:04:45 +0200 Subject: [PATCH 60/71] rebuild NESSI-extend (3rd attempt) --- EESSI-extend-2023.06-easybuild.eb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EESSI-extend-2023.06-easybuild.eb b/EESSI-extend-2023.06-easybuild.eb index d514293706..3735b3d31f 100644 --- a/EESSI-extend-2023.06-easybuild.eb +++ b/EESSI-extend-2023.06-easybuild.eb @@ -42,7 +42,7 @@ description = """ toolchain = SYSTEM # All the dependencies we filter in NESSI -local_deps_to_filter = "Autoconf,Automake,Autotools,binutils,bzip2,DBus,flex,gettext,gperf,help2man,intltool,libreadline,libtool,M4,makeinfo,ncurses,util-linux,XZ,zlib" +local_deps_to_filter = "Autoconf,Automake,Autotools,binutils,bzip2,DBus,flex,gettext,gperf,help2man,intltool,libreadline,libtool,M4,makeinfo,ncurses,util-linux,XZ,zlib,PSM2" local_arch_specific_deps_to_filter = {'aarch64': ',yasm', 'x86_64': ''} local_deps_to_filter += local_arch_specific_deps_to_filter[ARCH] From bdcb22ec27ab506c76799a4f06f74fecaf2077d5 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 21 May 2024 23:18:17 +0200 Subject: [PATCH 61/71] need addition to get it into diff file --- .../pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml index 46ac979719..63f8804820 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml @@ -3,3 +3,4 @@ easyconfigs: options: from-pr: 20299 - EESSI-extend-2023.06-easybuild.eb +# comment to trigger rebuild From 63c15c5cd11be4540e4e70f41d03f7ea772355ed Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 21 May 2024 23:25:56 +0200 Subject: [PATCH 62/71] fix ec file for NESSI-extend module --- EESSI-extend-2023.06-easybuild.eb | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/EESSI-extend-2023.06-easybuild.eb b/EESSI-extend-2023.06-easybuild.eb index 3735b3d31f..cdea2e58e4 100644 --- a/EESSI-extend-2023.06-easybuild.eb +++ b/EESSI-extend-2023.06-easybuild.eb @@ -95,19 +95,7 @@ elseif (os.getenv("NESSI_SITE_INSTALL") ~= nil) then if ((os.getenv("NESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("NESSI_USER_INSTALL") ~= nil)) then LmodError("You cannot use NESSI_SITE_INSTALL in combination with any other NESSI_*_INSTALL environment variables") end - site_install = os.getenv("NESSI_SITE_INSTALL") - site_modulepath = nil - if (site_install ~= nil) then - -- Check the folder exists - if not isDir(site_install) then - LmodError("The location of NESSI_SITE_INSTALL (" .. site_install .. ") does not exist or is not a folder") - end - if (mode() == "load") then - LmodMessage("Configuring for use of NESSI_SITE_INSTALL under " .. site_install) - end - easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), os.getenv("EESSI_CVMFS_REPO"), site_install) - site_modulepath = pathJoin(easybuild_installpath, 'modules', 'all') - end + easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), 'versions', 'host_injections') else -- Deal with user and project installs project_install = os.getenv("NESSI_PROJECT_INSTALL") From 6efb92f16faaf268ca7df8101710502dbeb79e4e Mon Sep 17 00:00:00 2001 From: Richard Top Date: Wed, 22 May 2024 15:42:46 +0000 Subject: [PATCH 63/71] sync local branch with remote --- .../pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml index 5163806807..739bb57f63 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml @@ -35,4 +35,5 @@ easyconfigs: options: from-pr: 19451 - cuDNN-8.9.2.26-CUDA-12.1.1.eb - - OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb + - OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb + - ABySS-2.3.7-foss-2023a.eb From 705ca714606597cf3ddd2d7ebecbe3f728790636 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 22 May 2024 20:18:43 +0200 Subject: [PATCH 64/71] {2023.06}[system] cuTENSOR v2.0.1.2 with CUDA/12.1.1 This PR attempts to add cuTENSOR/2.0.1.2 to NESSI and includes several improvements: - adds a generic script to install CUDA and cu* libraries under host_injections using the NESSI-extend module and an easystack file - creates a single Lmod hook for all CUDA and cu* libraries modules - uses a generic function that replaces non-distributable files with symlinks (used in post_sanitycheck_* EB hooks) - moves cuDNN from easystack file for foss/2023a to the one for system --- .../eessi-2023.06-cuda-and-libraries.yml | 3 + .../nvidia/install_cuda_and_libraries.sh | 204 ++++++++++++++++++ 2 files changed, 207 insertions(+) create mode 100644 scripts/gpu_support/nvidia/eessi-2023.06-cuda-and-libraries.yml create mode 100755 scripts/gpu_support/nvidia/install_cuda_and_libraries.sh diff --git a/scripts/gpu_support/nvidia/eessi-2023.06-cuda-and-libraries.yml b/scripts/gpu_support/nvidia/eessi-2023.06-cuda-and-libraries.yml new file mode 100644 index 0000000000..e0e47bf2d8 --- /dev/null +++ b/scripts/gpu_support/nvidia/eessi-2023.06-cuda-and-libraries.yml @@ -0,0 +1,3 @@ +easyconfigs: + - CUDA-12.1.1.eb + - cuDNN-8.9.2.26-CUDA-12.1.1.eb diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh new file mode 100755 index 0000000000..0c4a296701 --- /dev/null +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -0,0 +1,204 @@ +#!/usr/bin/env bash + +# This script can be used to install CUDA and other libraries by NVIDIA under +# the `.../host_injections` directory. +# +# This provides the parts of the CUDA installation and other libriaries that +# cannot be redistributed as part of NESSI due to license limitations. While +# GPU-based software from NESSI will _run_ without these, installation of +# additional software that builds upon CUDA or other libraries requires that +# these installation are present under `host_injections`. +# +# The `host_injections` directory is a variant symlink that by default points to +# `/opt/eessi`, unless otherwise defined in the local CVMFS configuration (see +# https://cvmfs.readthedocs.io/en/stable/cpt-repo.html#variant-symlinks). For the +# installation to be successful, this directory needs to be writeable by the user +# executing this script. + +# Initialise our bash functions +TOPDIR=$(dirname $(realpath $BASH_SOURCE)) +source "$TOPDIR"/../../utils.sh + +# Function to display help message +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " --help Display this help message" + echo " --accept-cuda-eula You _must_ accept the CUDA EULA to install" + echo " CUDA, see the EULA at" + echo " https://docs.nvidia.com/cuda/eula/index.html" + echo " -e, --easystack EASYSTACKFILE Path to easystack file that defines which" + echo " packages shall be installed" + echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" + echo " storage during the installation of CUDA" + echo " and/or other libraries (must have" + echo " several GB available; depends on the number of installations)" +} + +# Initialize variables +eula_accepted=0 +EASYSTACKFILE= +TEMP_DIR= + +# Parse command-line options +while [[ $# -gt 0 ]]; do + case "$1" in + --help) + show_help + exit 0 + ;; + --accept-cuda-eula) + eula_accepted=1 + shift 1 + ;; + -e|--easystack) + if [ -n "$2" ]; then + EASYSTACKFILE="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + -t|--temp-dir) + if [ -n "$2" ]; then + TEMP_DIR="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + *) + show_help + fatal_error "Error: Unknown option: $1" + ;; + esac +done + +if [[ -z "${EASYSTACKFILE}" ]]; then + fatal_error "Need the name/path to an easystack file. See command line options\n" +fi + +# Make sure NESSI is initialised +check_eessi_initialised + +# As an installation location just use $EESSI_SOFTWARE_PATH but replacing `versions` with `host_injections` +# (CUDA is a binary installation so no need to worry too much about the EasyBuild setup) +export NESSI_SITE_INSTALL=${EESSI_SOFTWARE_PATH/versions/host_injections} + +# we need a directory we can use for temporary storage +if [[ -z "${TEMP_DIR}" ]]; then + tmpdir=$(mktemp -d) +else + tmpdir=$(mktemp -d --tmpdir=${TEMP_DIR} cuda_n_co.XXX) + if [[ ! -d "$tmpdir" ]] ; then + fatal_error "Could not create directory ${tmpdir}" + fi +fi +echo "Created temporary directory '${tmpdir}'" + +# workaround for EasyBuild not being found when loading "extend" module +module load EasyBuild/4.9.1 + +# load NESSI-extend/2023.06-easybuild module && verify that it is loaded +NESSI_EXTEND_MODULE="NESSI-extend/2023.06-easybuild" +module load ${NESSI_EXTEND_MODULE} +ret=$? +if [ "${ret}" -ne 0 ]; then + fatal_error "An error occured while trying to load ${NESSI_EXTEND_MODULE}\n" +fi + +# do a 'eb --dry-run-short' with the EASYSTACKFILE and determine list of packages +# to be installed +echo ">> Determining if packages specified in ${EASYSTACKFILE} are missing under ${NESSI_SITE_INSTALL}" +eb_dry_run_short_out=${tmpdir}/eb_dry_run_short.out +eb --dry-run-short --rebuild --easystack ${EASYSTACKFILE} 2>&1 | tee ${eb_dry_run_short_out} +ret=$? + +# Check if CUDA shall be installed +cuda_install_needed=0 +cat ${eb_dry_run_short_out} | grep "^ \* \[[xR]\]" | grep "module: CUDA/" +ret=$? +if [ "${ret}" -eq 0 ]; then + cuda_install_needed=1 +fi + +# Make sure the CUDA EULA is accepted if it shall be installed +if [ "${cuda_install_needed}" -eq 1 ] && [ "${eula_accepted}" -ne 1 ]; then + show_help + error="\nCUDA shall be installed. However, the CUDA EULA has not been accepted\nYou _must_ accept the CUDA EULA via the appropriate command line option.\n" + fatal_error "${error}" +fi + +# determine the number of packages to be installed (assume 5 GB + num_packages * +# 3GB space needed) +number_of_packages=$(cat ${eb_dry_run_short_out} | grep "^ \* \[[xR]\]" | sed -e 's/^.*module: //' | uniq | wc -l) +echo "number of packages to be (re-)installed: '${number_of_packages}'" +base_storage_space=$((5000000 + ${number_of_packages} * 3000000)) + +required_space_in_tmpdir=${base_storage_space} +# Let's see if we have sources and build locations defined if not, we use the temporary space +if [[ -z "${EASYBUILD_BUILDPATH}" ]]; then + export EASYBUILD_BUILDPATH=${tmpdir}/build + required_space_in_tmpdir=$((required_space_in_tmpdir + ${base_storage_space})) +fi +if [[ -z "${EASYBUILD_SOURCEPATH}" ]]; then + export EASYBUILD_SOURCEPATH=${tmpdir}/sources + required_space_in_tmpdir=$((required_space_in_tmpdir + ${base_storage_space})) +fi + +# The install is pretty fat, you need lots of space for download/unpack/install +# (~3*${base_storage_space}*1000 Bytes), +# need to do a space check before we proceed +avail_space=$(df --output=avail "${NESSI_SITE_INSTALL}"/ | tail -n 1 | awk '{print $1}') +min_disk_storage=$((3 * ${base_storage_space})) +if (( avail_space < ${min_disk_storage} )); then + fatal_error "Need at least $(echo "${min_disk_storage} / 1000000" | bc) GB disk space to install CUDA and other libraries under ${NESSI_SITE_INSTALL}, exiting now..." +fi +avail_space=$(df --output=avail "${tmpdir}"/ | tail -n 1 | awk '{print $1}') +if (( avail_space < required_space_in_tmpdir )); then + error="Need at least $(echo "${required_space_in_tmpdir} / 1000000" | bc) temporary disk space under ${tmpdir}.\n" + error="${error}Set the environment variable TEMP_DIR to a location with adequate space to pass this check." + error="${error}You can alternatively set EASYBUILD_BUILDPATH and/or EASYBUILD_SOURCEPATH" + error="${error}to reduce this requirement. Exiting now..." + fatal_error "${error}" +fi + +# Brief explanation of parameters: +# - prefix: using $tmpdir as default base directory for several EB settings +# - rebuild: we need the --rebuild option, as the CUDA module may or may not be on the +# `MODULEPATH` yet. Even if it is, we still want to redo this installation +# since it will provide the symlinked targets for the parts of the CUDA +# and/or other installation in the `.../versions/...` prefix +# - installpath-modules: We install the module in our `tmpdir` since we do not need the modulefile, +# we only care about providing the targets for the symlinks. +# - ${cuda_arg}: We only set the --accept-eula-for=CUDA option if CUDA will be installed and if +# this script was called with the argument --accept-cuda-eula. +# - hooks: We don't want hooks used in this install, we need vanilla +# installations of CUDA and/or other libraries +# - easystack: Path to easystack file that defines which packages shall be +# installed +cuda_arg= +if [[ ${eula_accepted} -eq 1 ]]; then + cuda_arg="--accept-eula-for=CUDA" +fi +touch "$tmpdir"/none.py +eb --prefix="$tmpdir" \ + --rebuild \ + --installpath-modules=${tmpdir} \ + "${cuda_arg}" \ + --hooks="$tmpdir"/none.py \ + --easystack ${EASYSTACKFILE} +ret=$? +if [ $ret -ne 0 ]; then + eb_last_log=$(unset EB_VERBOSE; eb --last-log) + cp -a ${eb_last_log} . + fatal_error "some installation failed, please check EasyBuild logs $(basename ${eb_last_log})..." +else + echo_green "all installations at ${NESSI_SITE_INSTALL}/software/... succeeded!" +fi +# clean up tmpdir +rm -rf "${tmpdir}" From 433d588bbf4a150a17aa7cc4e6e9e47badc64dbf Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 22 May 2024 20:24:55 +0200 Subject: [PATCH 65/71] adding unstaged changes --- EESSI-install-software.sh | 6 +- create_lmodsitepackage.py | 72 +++----- .../eessi-2023.06-eb-4.9.1-001-system.yml | 3 +- .../2023.06/eessi-2023.06-eb-4.9.1-2023a.yml | 1 - eb_hooks.py | 174 +++++++++--------- install_scripts.sh | 6 +- 6 files changed, 122 insertions(+), 140 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index d840910516..6c59f46570 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -204,8 +204,10 @@ ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} # TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install # Allow skipping CUDA SDK install in e.g. CI environments if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then - ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula - ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh -c 12.1.1 -d 8.9.2.26 + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \ + -e ${EESSI_PREFIX}/scripts/gpu_support/nvidia/eessi-2023.06-cuda-and-libraries.yml \ + -t /tmp/temp \ + --accept-cuda-eula else echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed" fi diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py index bafbb63414..d6137eb901 100755 --- a/create_lmodsitepackage.py +++ b/create_lmodsitepackage.py @@ -105,45 +105,46 @@ end - -local function eessi_cuda_enabled_load_hook(t) +local function eessi_cuda_and_libraries_enabled_load_hook(t) local frameStk = require("FrameStk"):singleton() local mt = frameStk:mt() local simpleName = string.match(t.modFullName, "(.-)/") - -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. - -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse - -- to load the CUDA module and print an informative message on how to set up GPU support for NESSI + local packagesList = { ["CUDA"] = true, ["cuDNN"] = true, ["cuTENSOR"] = true } + -- If we try to load any of the modules in packagesList, we check if the + -- full package was installed on the host in host_injections. + -- This is required for end users to build additional software that depends + -- on the package. If the full SDK isn't present, refuse + -- to load the module and print an informative message on how to set up GPU support for NESSI local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" - if simpleName == 'CUDA' then + if packagesList[simpleName] then + -- simpleName is a module in packagesList -- get the full host_injections path local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') - -- build final path where the CUDA software should be installed - local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" - local cudaDirExists = isDir(cudaEasyBuildDir) - if not cudaDirExists then + -- build final path where the software should be installed + local packageEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" + local packageDirExists = isDir(packageEasyBuildDir) + if not packageDirExists then local advice = "but while the module file exists, the actual software is not entirely shipped with NESSI " - advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where NESSI " + advice = advice .. "due to licencing. You will need to install a full copy of the " .. simpleName .. " package where NESSI " advice = advice .. "can find it.\\n" advice = advice .. refer_to_docs LmodError("\\nYou requested to load ", simpleName, " ", advice) end end - -- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the NESSI linker, + -- when loading CUDA (and cu*) enabled modules check if the necessary driver libraries are accessible to the NESSI linker, -- otherwise, refuse to load the requested module and print error message - local checkGpu = mt:haveProperty(simpleName,"arch","gpu") - local overrideGpuCheck = os.getenv("EESSI_OVERRIDE_GPU_CHECK") - if checkGpu and (overrideGpuCheck == nil) then + local haveGpu = mt:haveProperty(simpleName,"arch","gpu") + if haveGpu then local arch = os.getenv("EESSI_CPU_FAMILY") or "" - local cudaVersionFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" - local cudaDriverFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" + local cvmfs_repo = os.getenv("EESSI_CVMFS_REPO") or "" + local cudaVersionFile = cvmfs_repo .. "/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" + local cudaDriverFile = cvmfs_repo .. "/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" local cudaDriverExists = isFile(cudaDriverFile) local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so") if not (cudaDriverExists or singularityCudaExists) then local advice = "which relies on the CUDA runtime environment and driver libraries. " advice = advice .. "In order to be able to use the module, you will need " - advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system. You can " - advice = advice .. "override this check by setting the environment variable EESSI_OVERRIDE_GPU_CHECK but " - advice = advice .. "the loaded application will not be able to execute on your system.\\n" + advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system.\\n" advice = advice .. refer_to_docs LmodError("\\nYou requested to load ", simpleName, " ", advice) else @@ -174,38 +175,13 @@ end end -local function eessi_cudnn_enabled_load_hook(t) - local frameStk = require("FrameStk"):singleton() - local mt = frameStk:mt() - local simpleName = string.match(t.modFullName, "(.-)/") - -- If we try to load cuDNN itself, check if the full cuDNN package was installed on the host in host_injections. - -- This is required for end users to build additional cuDNN dependent software. If the full SDK isn't present, refuse - -- to load the cuDNN module and print an informative message on how to set up GPU support for NESSI - local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" - if simpleName == 'cuDNN' then - -- get the full host_injections path - local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') - -- build final path where the cuDNN software should be installed - local cudnnEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" - local cudnnDirExists = isDir(cudnnEasyBuildDir) - if not cudnnDirExists then - local advice = "but while the module file exists, the actual software is not entirely shipped with NESSI " - advice = advice .. "due to licencing. You will need to install a full copy of the cuDNN package where NESSI " - advice = advice .. "can find it.\\n" - advice = advice .. refer_to_docs - LmodError("\\nYou requested to load ", simpleName, " ", advice) - end - end -end - -- Combine both functions into a single one, as we can only register one function as load hook in lmod -- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed function eessi_load_hook(t) - -- Only apply CUDA and cuDNN hooks if the loaded module is in the NESSI prefix - -- This avoids getting an Lmod Error when trying to load a CUDA or cuDNN module from a local software stack + -- Only apply CUDA and libraries hook if the loaded module is in the NESSI prefix + -- This avoids getting an Lmod Error when trying to load a CUDA or library module from a local software stack if from_eessi_prefix(t) then - eessi_cuda_enabled_load_hook(t) - eessi_cudnn_enabled_load_hook(t) + eessi_cuda_and_libraries_enabled_load_hook(t) end end diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml index 63f8804820..aa5a07d02a 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml @@ -3,4 +3,5 @@ easyconfigs: options: from-pr: 20299 - EESSI-extend-2023.06-easybuild.eb -# comment to trigger rebuild + - cuDNN-8.9.2.26-CUDA-12.1.1.eb + - cuTENSOR-2.0.1.2-CUDA-12.1.1.eb diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml index 5163806807..276bfa49f7 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml @@ -34,5 +34,4 @@ easyconfigs: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19451; options: from-pr: 19451 - - cuDNN-8.9.2.26-CUDA-12.1.1.eb - OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb diff --git a/eb_hooks.py b/eb_hooks.py index cf1c911b23..7edd67b1fc 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -623,6 +623,47 @@ def post_sanitycheck_hook(self, *args, **kwargs): POST_SANITYCHECK_HOOKS[self.name](self, *args, **kwargs) +def replace_non_distributable_files_with_symlinks(self, package, allowlist): + """ + Replace files that cannot be distributed with symlinks into host_injections + """ + extension_based = { "CUDA": False, "cuDNN": True, "cuTENSOR": True } + if package in extension_based: + raise EasyBuildError("Don't know how to strip non-distributable files from package %s.", package) + + # iterate over all files in the package installation directory + for dir_path, _, files in os.walk(self.installdir): + for filename in files: + full_path = os.path.join(dir_path, filename) + # we only really care about real files, i.e. not symlinks + if not os.path.islink(full_path): + # check if the current file name stub is part of the allowlist + basename = filename.split('.')[0] + if extension_based[package]: + if '.' in filename: + extension = '.' + filename.split('.')[1] + if basename in allowlist: + self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) + elif extension_based[package] and '.' in filename and extension in allowlist: + self.log.debug("%s is found in allowlist, so keeping it: %s", extension, full_path) + else: + if extension_based[package]: + print_name = filename + else: + print_name = basename + self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s", + print_name, full_path) + # if it is not in the allowlist, delete the file and create a symlink to host_injections + host_inj_path = full_path.replace('versions', 'host_injections') + # make sure source and target of symlink are not the same + if full_path == host_inj_path: + raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " + "are using this hook for an EESSI installation?", + full_path, host_inj_path) + remove_file(full_path) + symlink(host_inj_path, full_path) + + def post_sanitycheck_cuda(self, *args, **kwargs): """ Remove files from CUDA installation that we are not allowed to ship, @@ -662,33 +703,14 @@ def post_sanitycheck_cuda(self, *args, **kwargs): if 'libcudart' not in allowlist: raise EasyBuildError("Did not find 'libcudart' in allowlist: %s" % allowlist) - # iterate over all files in the CUDA installation directory - for dir_path, _, files in os.walk(self.installdir): - for filename in files: - full_path = os.path.join(dir_path, filename) - # we only really care about real files, i.e. not symlinks - if not os.path.islink(full_path): - # check if the current file name stub is part of the allowlist - basename = filename.split('.')[0] - if basename in allowlist: - self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) - else: - self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s", - basename, full_path) - # if it is not in the allowlist, delete the file and create a symlink to host_injections - host_inj_path = full_path.replace('versions', 'host_injections') - # make sure source and target of symlink are not the same - if full_path == host_inj_path: - raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " - "are using this hook for an EESSI installation?", - full_path, host_inj_path) - remove_file(full_path) - symlink(host_inj_path, full_path) + # replace files that are not distributable with symlinks into + # host_injections + replace_non_distributable_files_with_symlinks(self.name, allowlist) else: raise EasyBuildError("CUDA-specific hook triggered for non-CUDA easyconfig?!") -def post_sanitycheck_cuDNN(self, *args, **kwargs): +def post_sanitycheck_cudnn(self, *args, **kwargs): """ Remove files from cuDNN installation that we are not allowed to ship, and replace them with a symlink to a corresponding installation under host_injections. @@ -714,79 +736,57 @@ def post_sanitycheck_cuDNN(self, *args, **kwargs): allowlist = sorted(set(allowlist)) self.log.info("Allowlist for files in cuDNN installation that can be redistributed: " + ', '.join(allowlist)) - # iterate over all files in the CUDA installation directory - for dir_path, _, files in os.walk(self.installdir): - for filename in files: - full_path = os.path.join(dir_path, filename) - # we only really care about real files, i.e. not symlinks - if not os.path.islink(full_path): - # check if the current file is part of the allowlist - basename = filename.split('.')[0] - if '.' in filename: - extension = '.' + filename.split('.')[1] - if basename in allowlist: - self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) - elif '.' in filename and extension in allowlist: - self.log.debug("%s is found in allowlist, so keeping it: %s", extension, full_path) - else: - self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s", - filename, full_path) - # if it is not in the allowlist, delete the file and create a symlink to host_injections - host_inj_path = full_path.replace('versions', 'host_injections') - # make sure source and target of symlink are not the same - if full_path == host_inj_path: - raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " - "are using this hook for a NESSI installation?", - full_path, host_inj_path) - remove_file(full_path) - symlink(host_inj_path, full_path) + # replace files that are not distributable with symlinks into + # host_injections + replace_non_distributable_files_with_symlinks(self.name, allowlist) else: raise EasyBuildError("cuDNN-specific hook triggered for non-cuDNN easyconfig?!") def inject_gpu_property(ec): """ - Add 'gpu' property, via modluafooter easyconfig parameter + Add 'gpu' property EESSIVERSION envvars and drop dependencies to + build dependencies, via modluafooter easyconfig parameter """ ec_dict = ec.asdict() - # Check if CUDA is in the dependencies, if so add the 'gpu' Lmod property - if ('CUDA' in [dep[0] for dep in iter(ec_dict['dependencies'])]): - ec.log.info("Injecting gpu as Lmod arch property and envvar with CUDA version") + # check if CUDA, cuDNN, you-name-it is in the dependencies, if so + # - drop dependency to build dependency + # - add 'gpu' Lmod property + # - add envvar with package version + packages_list = ( "CUDA", "cuDNN", "cuTENSOR" ) + packages_version = { } + add_gpu_property = '' + + for package in packages_list: + # Check if package is in the dependencies, if so drop dependency to build + # dependency and set variable for later adding the 'gpu' Lmod property + if (package in [dep[0] for dep in iter(ec_dict['dependencies'])]): + add_gpu_property = 'add_property("arch","gpu")' + for dep in iter(ec_dict['dependencies']): + if package in dep[0]: + # make package a build dependency only (rpathing saves us from link errors) + ec.log.info("Dropping dependency on %s to build dependency" % package) + ec_dict['dependencies'].remove(dep) + if dep not in ec_dict['builddependencies']: + ec_dict['builddependencies'].append(dep) + # take note of version for creating the modluafooter + packages_version[package] = dep[1] + if add_gpu_property: + ec.log.info("Injecting gpu as Lmod arch property and envvars for dependencies with their version") key = 'modluafooter' - value = 'add_property("arch","gpu")' - cuda_version = 0 - for dep in iter(ec_dict['dependencies']): - # Make CUDA a build dependency only (rpathing saves us from link errors) - if 'CUDA' in dep[0]: - cuda_version = dep[1] - ec_dict['dependencies'].remove(dep) - if dep not in ec_dict['builddependencies']: - ec_dict['builddependencies'].append(dep) - value = '\n'.join([value, 'setenv("EESSICUDAVERSION","%s")' % cuda_version]) - if key in ec_dict: - if not value in ec_dict[key]: - ec[key] = '\n'.join([ec_dict[key], value]) + values = [add_gpu_property] + for package, version in packages_version.items(): + envvar = "EESSI%sVERSION" % package.upper() + values.append('setenv("%s","%s")' % (envvar, version)) + if not key in ec_dict: + ec[key] = '\n'.join(values) else: - ec[key] = value - # Check if cuDNN is in the dependencies, if so add the 'gpu' Lmod property - if ('cuDNN' in [dep[0] for dep in iter(ec_dict['dependencies'])]): - ec.log.info("Injecting gpu as Lmod arch property and envvar with cuDNN version") - key = 'modluafooter' - value = 'add_property("arch","gpu")' - cudnn_version = 0 - for dep in iter(ec_dict['dependencies']): - # Make cuDNN a build dependency only (rpathing saves us from link errors) - if 'cuDNN' in dep[0]: - cudnn_version = dep[1] - ec_dict['dependencies'].remove(dep) - if dep not in ec_dict['builddependencies']: - ec_dict['builddependencies'].append(dep) - value = '\n'.join([value, 'setenv("EESSICUDNNVERSION","%s")' % cudnn_version]) - if key in ec_dict: - if not value in ec_dict[key]: - ec[key] = '\n'.join([ec_dict[key], value]) - else: - ec[key] = value + new_value = ec_dict[key] + for value in values: + if not value in new_value: + new_value = '\n'.join([new_value, value]) + ec[key] = new_value + return ec @@ -843,5 +843,5 @@ def inject_gpu_property(ec): POST_SANITYCHECK_HOOKS = { 'CUDA': post_sanitycheck_cuda, - 'cuDNN': post_sanitycheck_cuDNN, + 'cuDNN': post_sanitycheck_cudnn, } diff --git a/install_scripts.sh b/install_scripts.sh index 8bbcb6a7bf..17712a0ae7 100755 --- a/install_scripts.sh +++ b/install_scripts.sh @@ -110,7 +110,11 @@ copy_files_by_list ${TOPDIR}/scripts ${INSTALL_PREFIX}/scripts "${script_files[@ # Copy files for the scripts/gpu_support/nvidia directory nvidia_files=( - install_cuda_host_injections.sh install_cuDNN_host_injections.sh link_nvidia_host_libraries.sh + eessi-2023.06-cuda-and-libraries.yml + install_cuda_and_libraries.sh + install_cuda_host_injections.sh + install_cuDNN_host_injections.sh + link_nvidia_host_libraries.sh ) copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}" From 8a9f5866b6a9987f75623c075673fbff55e431ef Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 22 May 2024 21:00:38 +0200 Subject: [PATCH 66/71] make sure that base directory for tmp dir exists --- scripts/gpu_support/nvidia/install_cuda_and_libraries.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh index 0c4a296701..ba3416c6c5 100755 --- a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -93,6 +93,7 @@ export NESSI_SITE_INSTALL=${EESSI_SOFTWARE_PATH/versions/host_injections} if [[ -z "${TEMP_DIR}" ]]; then tmpdir=$(mktemp -d) else + mkdir -p ${TEMP_DIR} tmpdir=$(mktemp -d --tmpdir=${TEMP_DIR} cuda_n_co.XXX) if [[ ! -d "$tmpdir" ]] ; then fatal_error "Could not create directory ${tmpdir}" From 8a9c9137b4455d51cc4f7715d91b8f4fc20d17bc Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 22 May 2024 21:38:30 +0200 Subject: [PATCH 67/71] add post_sanitycheck_cutensor --- eb_hooks.py | 36 ++++++++++++++++++- .../eessi-2023.06-cuda-and-libraries.yml | 5 +-- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index 7edd67b1fc..1f0adc8139 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -720,7 +720,7 @@ def post_sanitycheck_cudnn(self, *args, **kwargs): allowlist = ['LICENSE'] - # read cuDNN LICENSE, construct allowlist based on section 2.6 that specifies list of files that can be shipped + # read cuDNN LICENSE, construct allowlist based on section "2. Distribution" that specifies list of files that can be shipped license_path = os.path.join(self.installdir, 'LICENSE') search_string = "2. Distribution. The following portions of the SDK are distributable under the Agreement:" with open(license_path) as infile: @@ -743,6 +743,39 @@ def post_sanitycheck_cudnn(self, *args, **kwargs): raise EasyBuildError("cuDNN-specific hook triggered for non-cuDNN easyconfig?!") +def post_sanitycheck_cutensor(self, *args, **kwargs): + """ + Remove files from cuTENSOR installation that we are not allowed to ship, + and replace them with a symlink to a corresponding installation under host_injections. + """ + if self.name == 'cuTENSOR': + print_msg("Replacing files in cuTENSOR installation that we can not ship with symlinks to host_injections...") + + allowlist = ['LICENSE'] + + # read cuTENSOR LICENSE, construct allowlist based on section "2. Distribution" that specifies list of files that can be shipped + license_path = os.path.join(self.installdir, 'LICENSE') + search_string = "2. Distribution. The following portions of the SDK are distributable under the Agreement:" + with open(license_path) as infile: + for line in infile: + if line.strip().startswith(search_string): + # remove search string, split into words, remove trailing + # dots '.' and only retain words starting with a dot '.' + distributable = line[len(search_string):] + for word in distributable.split(): + if word[0] == '.': + allowlist.append(word.rstrip('.')) + + allowlist = sorted(set(allowlist)) + self.log.info("Allowlist for files in cuTENSOR installation that can be redistributed: " + ', '.join(allowlist)) + + # replace files that are not distributable with symlinks into + # host_injections + replace_non_distributable_files_with_symlinks(self.name, allowlist) + else: + raise EasyBuildError("cuTENSOR-specific hook triggered for non-cuTENSOR easyconfig?!") + + def inject_gpu_property(ec): """ Add 'gpu' property EESSIVERSION envvars and drop dependencies to @@ -844,4 +877,5 @@ def inject_gpu_property(ec): POST_SANITYCHECK_HOOKS = { 'CUDA': post_sanitycheck_cuda, 'cuDNN': post_sanitycheck_cudnn, + 'cuTENSOR': post_sanitycheck_cutensor, } diff --git a/scripts/gpu_support/nvidia/eessi-2023.06-cuda-and-libraries.yml b/scripts/gpu_support/nvidia/eessi-2023.06-cuda-and-libraries.yml index e0e47bf2d8..74615872d4 100644 --- a/scripts/gpu_support/nvidia/eessi-2023.06-cuda-and-libraries.yml +++ b/scripts/gpu_support/nvidia/eessi-2023.06-cuda-and-libraries.yml @@ -1,3 +1,4 @@ easyconfigs: - - CUDA-12.1.1.eb - - cuDNN-8.9.2.26-CUDA-12.1.1.eb + - CUDA-12.1.1.eb + - cuDNN-8.9.2.26-CUDA-12.1.1.eb + - cuTENSOR-2.0.1.2-CUDA-12.1.1.eb From 23406aae3c197dda739435c2b56fe19025e3bf84 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 22 May 2024 22:34:45 +0200 Subject: [PATCH 68/71] need to pass more args to replace func --- eb_hooks.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index 1f0adc8139..5fbbe192db 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -623,7 +623,7 @@ def post_sanitycheck_hook(self, *args, **kwargs): POST_SANITYCHECK_HOOKS[self.name](self, *args, **kwargs) -def replace_non_distributable_files_with_symlinks(self, package, allowlist): +def replace_non_distributable_files_with_symlinks(log, install_dir, package, allowlist): """ Replace files that cannot be distributed with symlinks into host_injections """ @@ -632,7 +632,7 @@ def replace_non_distributable_files_with_symlinks(self, package, allowlist): raise EasyBuildError("Don't know how to strip non-distributable files from package %s.", package) # iterate over all files in the package installation directory - for dir_path, _, files in os.walk(self.installdir): + for dir_path, _, files in os.walk(install_dir): for filename in files: full_path = os.path.join(dir_path, filename) # we only really care about real files, i.e. not symlinks @@ -643,16 +643,16 @@ def replace_non_distributable_files_with_symlinks(self, package, allowlist): if '.' in filename: extension = '.' + filename.split('.')[1] if basename in allowlist: - self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) + log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) elif extension_based[package] and '.' in filename and extension in allowlist: - self.log.debug("%s is found in allowlist, so keeping it: %s", extension, full_path) + log.debug("%s is found in allowlist, so keeping it: %s", extension, full_path) else: if extension_based[package]: print_name = filename else: print_name = basename - self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s", - print_name, full_path) + log.debug("%s is not found in allowlist, so replacing it with symlink: %s", + print_name, full_path) # if it is not in the allowlist, delete the file and create a symlink to host_injections host_inj_path = full_path.replace('versions', 'host_injections') # make sure source and target of symlink are not the same @@ -705,7 +705,7 @@ def post_sanitycheck_cuda(self, *args, **kwargs): # replace files that are not distributable with symlinks into # host_injections - replace_non_distributable_files_with_symlinks(self.name, allowlist) + replace_non_distributable_files_with_symlinks(self.log, self.installdir, self.name, allowlist) else: raise EasyBuildError("CUDA-specific hook triggered for non-CUDA easyconfig?!") @@ -738,7 +738,7 @@ def post_sanitycheck_cudnn(self, *args, **kwargs): # replace files that are not distributable with symlinks into # host_injections - replace_non_distributable_files_with_symlinks(self.name, allowlist) + replace_non_distributable_files_with_symlinks(self.log, self.installdir, self.name, allowlist) else: raise EasyBuildError("cuDNN-specific hook triggered for non-cuDNN easyconfig?!") @@ -771,7 +771,7 @@ def post_sanitycheck_cutensor(self, *args, **kwargs): # replace files that are not distributable with symlinks into # host_injections - replace_non_distributable_files_with_symlinks(self.name, allowlist) + replace_non_distributable_files_with_symlinks(self.log, self.installdir, self.name, allowlist) else: raise EasyBuildError("cuTENSOR-specific hook triggered for non-cuTENSOR easyconfig?!") From b3a62157f2ca3eac53559afb9d907d341669bf07 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 22 May 2024 22:52:23 +0200 Subject: [PATCH 69/71] fix silly mistake in replace function --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 5fbbe192db..c7b2a62f64 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -628,7 +628,7 @@ def replace_non_distributable_files_with_symlinks(log, install_dir, package, all Replace files that cannot be distributed with symlinks into host_injections """ extension_based = { "CUDA": False, "cuDNN": True, "cuTENSOR": True } - if package in extension_based: + if not package in extension_based: raise EasyBuildError("Don't know how to strip non-distributable files from package %s.", package) # iterate over all files in the package installation directory From b00b21e8ed57ae420288a9a47144398451b5a29b Mon Sep 17 00:00:00 2001 From: Richard Top Date: Thu, 23 May 2024 07:37:38 +0000 Subject: [PATCH 70/71] added a lately merged fix patch --- .../pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml | 1 - .../pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml index 2bc82e6f54..e402975917 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml @@ -57,4 +57,3 @@ easyconfigs: # This easyconfig is added to overcome the failing of check_missing_installations against the development branch - parallel-20230722-GCCcore-12.3.0.eb - Highway-1.0.4-GCCcore-12.3.0.eb - - Perl-bundle-CPAN-5.36.1-GCCcore-12.3.0.eb diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml index aeaf710837..eb9a16d239 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml @@ -36,3 +36,6 @@ easyconfigs: from-pr: 19451 - OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb - ABySS-2.3.7-foss-2023a.eb + - Perl-bundle-CPAN-5.36.1-GCCcore-12.3.0.eb: + options: + from-pr: 20540 From e9b55f5b3727b426c58e1b52a66a4150553950ed Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 23 May 2024 10:05:15 +0200 Subject: [PATCH 71/71] don't check for GPU driver libs when building --- create_lmodsitepackage.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py index d6137eb901..20c4098b8f 100755 --- a/create_lmodsitepackage.py +++ b/create_lmodsitepackage.py @@ -133,8 +133,9 @@ end -- when loading CUDA (and cu*) enabled modules check if the necessary driver libraries are accessible to the NESSI linker, -- otherwise, refuse to load the requested module and print error message - local haveGpu = mt:haveProperty(simpleName,"arch","gpu") - if haveGpu then + local checkGpu = mt:haveProperty(simpleName,"arch","gpu") + local overrideGpuCheck = os.getenv("EESSI_OVERRIDE_GPU_CHECK") + if checkGpu and (overrideGpuCheck == nil) then local arch = os.getenv("EESSI_CPU_FAMILY") or "" local cvmfs_repo = os.getenv("EESSI_CVMFS_REPO") or "" local cudaVersionFile = cvmfs_repo .. "/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" @@ -144,7 +145,9 @@ if not (cudaDriverExists or singularityCudaExists) then local advice = "which relies on the CUDA runtime environment and driver libraries. " advice = advice .. "In order to be able to use the module, you will need " - advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system.\\n" + advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system. You can " + advice = advice .. "override this check by setting the environment variable EESSI_OVERRIDE_GPU_CHECK but " + advice = advice .. "the loaded application will not be able to execute on your system.\\n" advice = advice .. refer_to_docs LmodError("\\nYou requested to load ", simpleName, " ", advice) else