From a55c6c9309d96a96f2a24221d24b0659c228f408 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 30 May 2024 13:47:50 +0200 Subject: [PATCH] missing changes provided in EESSI PR 579 --- bot/build.sh | 15 +++++++++------ eessi_container.sh | 5 +++++ run_in_compat_layer_env.sh | 6 +++--- scripts/utils.sh | 5 +++++ 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index 6e835cb6aa..7bd4179939 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -266,16 +266,19 @@ mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR} BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}") BUILD_STEP_ARGS+=("--storage" "${STORAGE}") # add options required to handle NVIDIA support -BUILD_STEP_ARGS+=("--nvidia" "all") +if command_exists "nvidia-smi"; then + echo "Command 'nvidia-smi' found, using available GPU" + BUILD_STEP_ARGS+=("--nvidia" "all") +else + echo "No 'nvidia-smi' found, no available GPU but allowing overriding this check" + BUILD_STEP_ARGS+=("--nvidia" "install") +fi +# Retain location for host injections so we don't reinstall CUDA +# (Always need to run the driver installation as available driver may change) if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi -# Don't run the Lmod GPU driver check when doing builds (may not have a GPU, and it's not relevant for vanilla builds anyway) -echo "EESSI_OVERRIDE_GPU_CHECK='${EESSI_OVERRIDE_GPU_CHECK}'" -export EESSI_OVERRIDE_GPU_CHECK=1 -echo "EESSI_OVERRIDE_GPU_CHECK='${EESSI_OVERRIDE_GPU_CHECK}'" - # create tmp file for output of build step build_outerr=$(mktemp build.outerr.XXXX) diff --git a/eessi_container.sh b/eessi_container.sh index a9405b6d8e..e6bb13cbe7 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -477,6 +477,11 @@ if [[ ${SETUP_NVIDIA} -eq 1 ]]; then mkdir -p ${EESSI_USR_LOCAL_CUDA} BIND_PATHS="${BIND_PATHS},${EESSI_VAR_LOG}:/var/log,${EESSI_USR_LOCAL_CUDA}:/usr/local/cuda" [[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}" + if [[ "${NVIDIA_MODE}" == "install" ]] ; then + # No GPU so we need to "trick" Lmod to allow us to load CUDA modules even without a CUDA driver + # (this variable means EESSI_OVERRIDE_GPU_CHECK=1 will be set inside the container) + export SINGULARITYENV_EESSI_OVERRIDE_GPU_CHECK=1 + fi fi fi diff --git a/run_in_compat_layer_env.sh b/run_in_compat_layer_env.sh index 393956a0c1..cc2cdae034 100755 --- a/run_in_compat_layer_env.sh +++ b/run_in_compat_layer_env.sh @@ -26,12 +26,12 @@ fi if [ ! -z ${EESSI_VERSION_OVERRIDE} ]; then INPUT="export EESSI_VERSION_OVERRIDE=${EESSI_VERSION_OVERRIDE}; ${INPUT}" fi -if [ ! -z ${http_proxy} ]; then - INPUT="export http_proxy=${http_proxy}; ${INPUT}" -fi if [ ! -z ${EESSI_OVERRIDE_GPU_CHECK} ]; then INPUT="export EESSI_OVERRIDE_GPU_CHECK=${EESSI_OVERRIDE_GPU_CHECK}; ${INPUT}" fi +if [ ! -z ${http_proxy} ]; then + INPUT="export http_proxy=${http_proxy}; ${INPUT}" +fi if [ ! -z ${https_proxy} ]; then INPUT="export https_proxy=${https_proxy}; ${INPUT}" fi diff --git a/scripts/utils.sh b/scripts/utils.sh index b2be3f6221..962decd20e 100644 --- a/scripts/utils.sh +++ b/scripts/utils.sh @@ -78,6 +78,11 @@ function create_directory_structure() { return $return_code } +# Function to check if a command exists +function command_exists() { + command -v "$1" >/dev/null 2>&1 +} + function get_path_for_tool { tool_name=$1 tool_envvar_name=$2