Skip to content

Commit

Permalink
sync with state of relevant EESSI PRs (579, 581, 586)
Browse files Browse the repository at this point in the history
  • Loading branch information
truib committed May 26, 2024
1 parent 0e4e824 commit 47eeac1
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 35 deletions.
70 changes: 43 additions & 27 deletions EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ display_help() {
echo " --skip-cuda-install - disable installing a full CUDA SDK in the host_injections prefix (e.g. in CI)"
}

# Function to check if a command exists
function command_exists() {
command -v "$1" >/dev/null 2>&1
}

function copy_build_log() {
# copy specified build log to specified directory, with some context added
build_log=${1}
Expand Down Expand Up @@ -147,6 +152,39 @@ else
mkdir -p ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
fi

# We need to ensure that certain files are present or updated before we source
# $TOPDIR/init/eessi_environment_variables
# Particularly the files we need to have present/updated in
# ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
# are:
# - .lmod/lmodrc.lua
# - .lmod/SitePackage.lua
# We run scripts to create them if they don't exist or if the scripts have been
# changed in the PR.

# Set base directory for software and for Lmod config files
_eessi_software_path=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
_lmod_cfg_dir=${_eessi_software_path}/.lmod

# We assume there's only one diff file that corresponds to the PR patch file
pr_diff=$(ls [0-9]*.diff | head -1)

# Create or update ${_eessi_software_path}/.lmod/lmodrc.lua
_lmodrc_file=${_lmod_cfg_dir}/lmodrc.lua
_lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?)
if [ ! -f "${_lmodrc_file}" ] || [ "${_lmodrc_changed}" == '0' ]; then
python3 ${TOPDIR}/create_lmodrc.py ${_eessi_software_path}
check_exit_code $? "${_lmodrc_file} created/updated" "Failed to create/update ${_lmodrc_file}"
fi

# Create or update ${_eessi_software_path}/.lmod/SitePackage.lua
_lmod_sitepackage_file=${_lmod_cfg_dir}/SitePackage.lua
_sitepackage_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodsitepackage.py$' > /dev/null; echo $?)
if [ ! -f "${_lmod_sitepackage_file}" ] || [ "${_sitepackage_changed}" == '0' ]; then
python3 ${TOPDIR}/create_lmodsitepackage.py ${_eessi_software_path}
check_exit_code $? "${_lmod_sitepackage_file} created/updated" "Failed to create/update ${_lmod_sitepackage_file}"
fi

# Set all the EESSI environment variables (respecting $EESSI_SOFTWARE_SUBDIR_OVERRIDE)
# $EESSI_SILENT - don't print any messages
# $EESSI_BASIC_ENV - give a basic set of environment variables
Expand Down Expand Up @@ -212,13 +250,11 @@ else
echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed"
fi

# Install drivers in host_injections
# TODO: this is commented out for now, because the script assumes that nvidia-smi is available and works;
# if not, an error is produced, and the bot flags the whole build as failed (even when not installing GPU software)
# ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh

# Don't run the Lmod GPU driver check when doing builds (may not have a GPU, and it's not relevant for vanilla builds anyway)
export EESSI_OVERRIDE_GPU_CHECK=1
# Install NVIDIA drivers in host_injections (if they exist)
if command_exists "nvidia-smi"; then
echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..."
${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
fi

# use PR patch file to determine in which easystack files stuff was added
changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing')
Expand Down Expand Up @@ -268,25 +304,5 @@ else
done
fi

### add packages here

echo ">> Creating/updating Lmod RC file..."
export LMOD_CONFIG_DIR="${EASYBUILD_INSTALLPATH}/.lmod"
lmod_rc_file="$LMOD_CONFIG_DIR/lmodrc.lua"
lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?)
if [ ! -f $lmod_rc_file ] || [ ${lmodrc_changed} == '0' ]; then
python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH}
check_exit_code $? "$lmod_rc_file created" "Failed to create $lmod_rc_file"
fi

echo ">> Creating/updating Lmod SitePackage.lua ..."
export LMOD_PACKAGE_PATH="${EASYBUILD_INSTALLPATH}/.lmod"
lmod_sitepackage_file="$LMOD_PACKAGE_PATH/SitePackage.lua"
sitepackage_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodsitepackage.py$' > /dev/null; echo $?)
if [ ! -f "$lmod_sitepackage_file" ] || [ "${sitepackage_changed}" == '0' ]; then
python3 $TOPDIR/create_lmodsitepackage.py ${EASYBUILD_INSTALLPATH}
check_exit_code $? "$lmod_sitepackage_file created" "Failed to create $lmod_sitepackage_file"
fi

echo ">> Cleaning up ${TMPDIR}..."
rm -r ${TMPDIR}
5 changes: 5 additions & 0 deletions bot/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,11 @@ if [[ ! -z ${SHARED_FS_PATH} ]]; then
BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections")
fi

# Don't run the Lmod GPU driver check when doing builds (may not have a GPU, and it's not relevant for vanilla builds anyway)
echo "EESSI_OVERRIDE_GPU_CHECK='${EESSI_OVERRIDE_GPU_CHECK}'"
export EESSI_OVERRIDE_GPU_CHECK=1
echo "EESSI_OVERRIDE_GPU_CHECK='${EESSI_OVERRIDE_GPU_CHECK}'"

# create tmp file for output of build step
build_outerr=$(mktemp build.outerr.XXXX)

Expand Down
6 changes: 0 additions & 6 deletions eessi_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -477,12 +477,6 @@ if [[ ${SETUP_NVIDIA} -eq 1 ]]; then
mkdir -p ${EESSI_USR_LOCAL_CUDA}
BIND_PATHS="${BIND_PATHS},${EESSI_VAR_LOG}:/var/log,${EESSI_USR_LOCAL_CUDA}:/usr/local/cuda"
[[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}"
if [[ "${NVIDIA_MODE}" == "install" ]] ; then
# We need to "trick" our LMOD_RC file to allow us to load CUDA modules even without a CUDA driver
# (this works because we build within a container and the LMOD_RC recognises that)
touch ${EESSI_TMPDIR}/libcuda.so
export SINGULARITY_CONTAINLIBS="${EESSI_TMPDIR}/libcuda.so"
fi
fi
fi

Expand Down
2 changes: 0 additions & 2 deletions install_scripts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,6 @@ copy_files_by_list ${TOPDIR}/scripts ${INSTALL_PREFIX}/scripts "${script_files[@
nvidia_files=(
eessi-2023.06-cuda-and-libraries.yml
install_cuda_and_libraries.sh
install_cuda_host_injections.sh
install_cuDNN_host_injections.sh
link_nvidia_host_libraries.sh
)
copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}"
Expand Down
3 changes: 3 additions & 0 deletions run_in_compat_layer_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ fi
if [ ! -z ${http_proxy} ]; then
INPUT="export http_proxy=${http_proxy}; ${INPUT}"
fi
if [ ! -z ${EESSI_OVERRIDE_GPU_CHECK} ]; then
INPUT="export EESSI_OVERRIDE_GPU_CHECK=${EESSI_OVERRIDE_GPU_CHECK}; ${INPUT}"
fi
if [ ! -z ${https_proxy} ]; then
INPUT="export https_proxy=${https_proxy}; ${INPUT}"
fi
Expand Down

0 comments on commit 47eeac1

Please sign in to comment.