Skip to content

Commit

Permalink
added a fix to avoid using local gcc
Browse files Browse the repository at this point in the history
  • Loading branch information
Richard Top committed Jun 6, 2024
2 parents 33c4c12 + 83a18b6 commit c0ddf72
Show file tree
Hide file tree
Showing 10 changed files with 112 additions and 36 deletions.
70 changes: 43 additions & 27 deletions EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ display_help() {
echo " --skip-cuda-install - disable installing a full CUDA SDK in the host_injections prefix (e.g. in CI)"
}

# Function to check if a command exists
function command_exists() {
command -v "$1" >/dev/null 2>&1
}

function copy_build_log() {
# copy specified build log to specified directory, with some context added
build_log=${1}
Expand Down Expand Up @@ -147,6 +152,39 @@ else
mkdir -p ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
fi

# We need to ensure that certain files are present or updated before we source
# $TOPDIR/init/eessi_environment_variables
# Particularly the files we need to have present/updated in
# ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
# are:
# - .lmod/lmodrc.lua
# - .lmod/SitePackage.lua
# We run scripts to create them if they don't exist or if the scripts have been
# changed in the PR.

# Set base directory for software and for Lmod config files
_eessi_software_path=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
_lmod_cfg_dir=${_eessi_software_path}/.lmod

# We assume there's only one diff file that corresponds to the PR patch file
pr_diff=$(ls [0-9]*.diff | head -1)

# Create or update ${_eessi_software_path}/.lmod/lmodrc.lua
_lmodrc_file=${_lmod_cfg_dir}/lmodrc.lua
_lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?)
if [ ! -f "${_lmodrc_file}" ] || [ "${_lmodrc_changed}" == '0' ]; then
python3 ${TOPDIR}/create_lmodrc.py ${_eessi_software_path}
check_exit_code $? "${_lmodrc_file} created/updated" "Failed to create/update ${_lmodrc_file}"
fi

# Create or update ${_eessi_software_path}/.lmod/SitePackage.lua
_lmod_sitepackage_file=${_lmod_cfg_dir}/SitePackage.lua
_sitepackage_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodsitepackage.py$' > /dev/null; echo $?)
if [ ! -f "${_lmod_sitepackage_file}" ] || [ "${_sitepackage_changed}" == '0' ]; then
python3 ${TOPDIR}/create_lmodsitepackage.py ${_eessi_software_path}
check_exit_code $? "${_lmod_sitepackage_file} created/updated" "Failed to create/update ${_lmod_sitepackage_file}"
fi

# Set all the EESSI environment variables (respecting $EESSI_SOFTWARE_SUBDIR_OVERRIDE)
# $EESSI_SILENT - don't print any messages
# $EESSI_BASIC_ENV - give a basic set of environment variables
Expand Down Expand Up @@ -212,13 +250,11 @@ else
echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed"
fi

# Install drivers in host_injections
# TODO: this is commented out for now, because the script assumes that nvidia-smi is available and works;
# if not, an error is produced, and the bot flags the whole build as failed (even when not installing GPU software)
# ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh

# Don't run the Lmod GPU driver check when doing builds (may not have a GPU, and it's not relevant for vanilla builds anyway)
export EESSI_OVERRIDE_GPU_CHECK=1
# Install NVIDIA drivers in host_injections (if they exist)
if command_exists "nvidia-smi"; then
echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..."
${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
fi

# use PR patch file to determine in which easystack files stuff was added
changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing')
Expand Down Expand Up @@ -268,25 +304,5 @@ else
done
fi

### add packages here

echo ">> Creating/updating Lmod RC file..."
export LMOD_CONFIG_DIR="${EASYBUILD_INSTALLPATH}/.lmod"
lmod_rc_file="$LMOD_CONFIG_DIR/lmodrc.lua"
lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?)
if [ ! -f $lmod_rc_file ] || [ ${lmodrc_changed} == '0' ]; then
python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH}
check_exit_code $? "$lmod_rc_file created" "Failed to create $lmod_rc_file"
fi

echo ">> Creating/updating Lmod SitePackage.lua ..."
export LMOD_PACKAGE_PATH="${EASYBUILD_INSTALLPATH}/.lmod"
lmod_sitepackage_file="$LMOD_PACKAGE_PATH/SitePackage.lua"
sitepackage_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodsitepackage.py$' > /dev/null; echo $?)
if [ ! -f "$lmod_sitepackage_file" ] || [ "${sitepackage_changed}" == '0' ]; then
python3 $TOPDIR/create_lmodsitepackage.py ${EASYBUILD_INSTALLPATH}
check_exit_code $? "$lmod_sitepackage_file created" "Failed to create $lmod_sitepackage_file"
fi

echo ">> Cleaning up ${TMPDIR}..."
rm -r ${TMPDIR}
5 changes: 5 additions & 0 deletions bot/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,11 @@ if [[ ! -z ${SHARED_FS_PATH} ]]; then
BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections")
fi

# Don't run the Lmod GPU driver check when doing builds (may not have a GPU, and it's not relevant for vanilla builds anyway)
echo "EESSI_OVERRIDE_GPU_CHECK='${EESSI_OVERRIDE_GPU_CHECK}'"
export EESSI_OVERRIDE_GPU_CHECK=1
echo "EESSI_OVERRIDE_GPU_CHECK='${EESSI_OVERRIDE_GPU_CHECK}'"

# create tmp file for output of build step
build_outerr=$(mktemp build.outerr.XXXX)

Expand Down
17 changes: 17 additions & 0 deletions create_lmodsitepackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,26 @@
end
end
local function eessi_espresso_deprecated_message(t)
local frameStk = require("FrameStk"):singleton()
local mt = frameStk:mt()
local simpleName = string.match(t.modFullName, "(.-)/")
local version = string.match(t.modFullName, "%d.%d.%d")
if simpleName == 'ESPResSo' and version == '4.2.1' then
-- Print a message on loading ESPreSso v <= 4.2.1 recommending using v 4.2.2 and above.
-- A message and not a warning as the exit code would break CI runs otherwise.
local advice = 'Prefer versions >= 4.2.2 which include important bugfixes.\\n'
advice = advice .. 'For details see https://github.com/espressomd/espresso/releases/tag/4.2.2\\n'
advice = advice .. 'Use version 4.2.1 at your own risk!\\n'
LmodWarning("\\nESPResSo v4.2.1 has known issues and has been deprecated. ", advice)
LmodMessage("\\nESPResSo v4.2.1 has known issues and has been deprecated. ", advice)
end
end
-- Combine both functions into a single one, as we can only register one function as load hook in lmod
-- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed
function eessi_load_hook(t)
eessi_espresso_deprecated_message(t)
-- Only apply CUDA and libraries hook if the loaded module is in the NESSI prefix
-- This avoids getting an Lmod Error when trying to load a CUDA or library module from a local software stack
if from_eessi_prefix(t) then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@ easyconfigs:
- MDAnalysis-2.4.2-foss-2022b.eb
- arrow-R-11.0.0.3-foss-2022b-R-4.2.2.eb
- biom-format-2.1.15-foss-2022b.eb
- ASE-3.22.1-gfbf-2022b.eb
- R-bundle-Bioconductor-3.16-foss-2022b-R-4.2.2.eb:
options:
from-pr: 20379
- ParaView-5.11.1-foss-2022b.eb
- SEPP-4.5.1-foss-2022b.eb
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ easyconfigs:
from-pr: 20540
- WhatsHap-2.2-foss-2023a.eb
- GATK-4.5.0.0-GCCcore-12.3.0-Java-17.eb
- ESPResSo-4.2.2-foss-2023a.eb:
options:
from-pr: 20595
- PyTorch-2.1.2-foss-2023a-CUDA-12.1.1.eb:
options:
cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0
- BLAST+-2.14.1-gompi-2023a.eb:
options:
from-pr: 20674
from-pr: 20751
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ easyconfigs:
from-pr: 20439
- GDB-13.2-GCCcore-13.2.0.eb
- IPython-8.17.2-GCCcore-13.2.0.eb
- Qt5-5.15.13-GCCcore-13.2.0.eb
30 changes: 30 additions & 0 deletions eb_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,29 @@ def pre_configure_hook_openblas_optarch_generic(self, *args, **kwargs):
raise EasyBuildError("OpenBLAS-specific hook triggered for non-OpenBLAS easyconfig?!")


def pre_configure_hook_pytorch_add_cupti_libdir(self, *args, **kwargs):
"""
Pre-configure hook for PyTorch: add directory $EESSI_SOFTWARE_PATH/software/CUDA/12.1.1/extras/CUPTI/lib64 to LIBRARY_PATH
"""
if self.name == 'PyTorch':
if 'cudaver' in self.cfg.template_values and self.cfg.template_values['cudaver'] == '12.1.1':
_cudaver = self.cfg.template_values['cudaver']
print_msg("pre_configure_hook_pytorch_add_cupti_libdir: CUDA version: '%s'" % _cudaver)
_library_path = os.getenv('LIBRARY_PATH')
print_msg("pre_configure_hook_pytorch_add_cupti_libdir: library_path: '%s'", _library_path)
_eessi_software_path = os.getenv('EESSI_SOFTWARE_PATH')
print_msg("pre_configure_hook_pytorch_add_cupti_libdir: eessi_software_path: '%s'", _eessi_software_path)
_cupti_lib_dir = os.path.join(_eessi_software_path, 'software', 'CUDA', _cudaver, 'extras', 'CUPTI', 'lib64')
print_msg("pre_configure_hook_pytorch_add_cupti_libdir: cupti_lib_dir: '%s'", _cupti_lib_dir)
if _library_path:
env.setvar('LIBRARY_PATH', ':'.join([_library_path, _cupti_lib_dir]))
else:
env.setvar('LIBRARY_PATH', _cupti_lib_dir)
print_msg("pre_configure_hook_pytorch_add_cupti_libdir: LIBRARY_PATH: '%s'", os.getenv('LIBRARY_PATH'))
else:
raise EasyBuildError("PyTorch-specific hook triggered for non-PyTorch easyconfig?!")


def pre_configure_hook_libfabric_disable_psm3_x86_64_generic(self, *args, **kwargs):
"""Add --disable-psm3 to libfabric configure options when building with --optarch=GENERIC on x86_64."""
if self.name == 'libfabric':
Expand Down Expand Up @@ -560,6 +583,12 @@ def pre_test_hook_increase_max_failed_tests_arm_PyTorch(self, *args, **kwargs):
"""
if self.name == 'PyTorch' and self.version == '2.1.2' and get_cpu_architecture() == AARCH64:
self.cfg['max_failed_tests'] = 10
if 'cudaver' in self.cfg.template_values and self.cfg.template_values['cudaver'] == '12.1.1':
_cudaver = self.cfg.template_values['cudaver']
_runtest = self.cfg['runtest']
self.cfg['runtest'] = _runtest.replace(
'PYTHONUNBUFFERED',
'PYTORCH_TEST_RUN_EVERYTHING_IN_SERIAL=1 PYTHONUNBUFFERED')


def pre_single_extension_hook(ext, *args, **kwargs):
Expand Down Expand Up @@ -851,6 +880,7 @@ def inject_gpu_property(ec):
'libfabric': pre_configure_hook_libfabric_disable_psm3_x86_64_generic,
'MetaBAT': pre_configure_hook_metabat_filtered_zlib_dep,
'OpenBLAS': pre_configure_hook_openblas_optarch_generic,
'PyTorch': pre_configure_hook_pytorch_add_cupti_libdir,
'WRF': pre_configure_hook_wrf_aarch64,
'at-spi2-core': pre_configure_hook_atspi2core_filter_ld_library_path,
}
Expand Down
6 changes: 0 additions & 6 deletions eessi_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -477,12 +477,6 @@ if [[ ${SETUP_NVIDIA} -eq 1 ]]; then
mkdir -p ${EESSI_USR_LOCAL_CUDA}
BIND_PATHS="${BIND_PATHS},${EESSI_VAR_LOG}:/var/log,${EESSI_USR_LOCAL_CUDA}:/usr/local/cuda"
[[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}"
if [[ "${NVIDIA_MODE}" == "install" ]] ; then
# We need to "trick" our LMOD_RC file to allow us to load CUDA modules even without a CUDA driver
# (this works because we build within a container and the LMOD_RC recognises that)
touch ${EESSI_TMPDIR}/libcuda.so
export SINGULARITY_CONTAINLIBS="${EESSI_TMPDIR}/libcuda.so"
fi
fi
fi

Expand Down
2 changes: 0 additions & 2 deletions install_scripts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,6 @@ copy_files_by_list ${TOPDIR}/scripts ${INSTALL_PREFIX}/scripts "${script_files[@
nvidia_files=(
eessi-2023.06-cuda-and-libraries.yml
install_cuda_and_libraries.sh
install_cuda_host_injections.sh
install_cuDNN_host_injections.sh
link_nvidia_host_libraries.sh
)
copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}"
Expand Down
3 changes: 3 additions & 0 deletions run_in_compat_layer_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ fi
if [ ! -z ${http_proxy} ]; then
INPUT="export http_proxy=${http_proxy}; ${INPUT}"
fi
if [ ! -z ${EESSI_OVERRIDE_GPU_CHECK} ]; then
INPUT="export EESSI_OVERRIDE_GPU_CHECK=${EESSI_OVERRIDE_GPU_CHECK}; ${INPUT}"
fi
if [ ! -z ${https_proxy} ]; then
INPUT="export https_proxy=${https_proxy}; ${INPUT}"
fi
Expand Down

0 comments on commit c0ddf72

Please sign in to comment.