Skip to content

Commit

Permalink
Build CUDA under
Browse files Browse the repository at this point in the history
  • Loading branch information
ocaisa committed Oct 18, 2023
1 parent 3b55b9e commit 3db7a53
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 5 deletions.
92 changes: 92 additions & 0 deletions gpu_support/nvidia/install_cuda_host_injections.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/usr/bin/env bash

# Initialise our bash functions
TOPDIR=$(dirname $(realpath $BASH_SOURCE))
source "$TOPDIR"/../../scripts/utils.sh

# Make sure EESSI is initialised
check_eessi_initialised()

if [[ $# -eq 0 ]] ; then
fatal_error "You must provide the CUDA version as an argument, e.g.:\n $0 11.3.1"
fi
install_cuda_version=$1
if [[ -z "${EESSI_SOFTWARE_PATH}" ]]; then
fatal_error "This script cannot be used without having first defined EESSI_SOFTWARE_PATH"
else
# As an installation location just use $EESSI_SOFTWARE_PATH but replacing `versions` with `host_injections`
# (CUDA is a binary installation so no need to worry too much about the EasyBuild setup)
cuda_install_parent=${EESSI_SOFTWARE_PATH/versions/host_injections}
fi

# Only install CUDA if specified version is not found.
# This is only relevant for users, the shipped CUDA installation will
# always be in versions instead of host_injections and have symlinks pointing
# to host_injections for everything we're not allowed to ship
# (existence of easybuild subdir implies a successful install)
if [ -d "${cuda_install_parent}"/software/CUDA/"${install_cuda_version}"/easybuild ]; then
echo_green "CUDA software found! No need to install CUDA again, proceed with testing."
else
# We need to be able write to the installation space so let's make sure we can
if ! create_directory_structure "${cuda_install_parent}"/software/CUDA ; then
fatal_error "No write permissions to directory ${cuda_install_parent}/software/CUDA"
fi

# we need a directory we can use for temporary storage
if [[ -z "${CUDA_TEMP_DIR}" ]]; then
tmpdir=$(mktemp -d)
else
tmpdir="${CUDA_TEMP_DIR}"/temp
if ! mkdir "$tmpdir" ; then
fatal_error "Could not create directory ${tmpdir}"
fi
fi

required_space_in_tmpdir=50000
# Let's see if we have sources and build locations defined if not, we use the temporary space
if [[ -z "${EASYBUILD_BUILDPATH}" ]]; then
export EASYBUILD_BUILDPATH=${tmpdir}/build
required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000))
fi
if [[ -z "${EASYBUILD_SOURCEPATH}" ]]; then
export EASYBUILD_SOURCEPATH=${tmpdir}/sources
required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000))
fi

# The install is pretty fat, you need lots of space for download/unpack/install (~3*5GB),
# need to do a space check before we proceed
avail_space=$(df --output=avail "${cuda_install_parent}"/ | tail -n 1 | awk '{print $1}')
if (( avail_space < 5000000 )); then
fatal_error "Need at least 5GB disk space to install CUDA under ${cuda_install_parent}, exiting now..."
fi
avail_space=$(df --output=avail "${tmpdir}"/ | tail -n 1 | awk '{print $1}')
if (( avail_space < required_space_in_tmpdir )); then
error="Need at least ${required_space_in_tmpdir} disk space under ${tmpdir}.\n"
error="${error}Set the environment variable CUDA_TEMP_DIR to a location with adequate space to pass this check."
error="${error}You can alternatively set EASYBUILD_BUILDPATH and/or EASYBUILD_SOURCEPATH "
error="${error}to reduce this requirement. Exiting now..."
fatal_error "${error}"
fi

if [[ -z "${EBROOTEASYBUILD}" ]]; then
echo_yellow "Loading EasyBuild module to do actual install"
module load EasyBuild
fi

# we need the --rebuild option and a (random) dir for the module since we are
# fixing the broken links of the EESSI-shipped installation
extra_args="--rebuild --installpath-modules=${tmpdir}"

# We don't want hooks used in this install, we need a vanilla CUDA installation
touch "$tmpdir"/none.py
# shellcheck disable=SC2086 # Intended splitting of extra_args
eb --prefix="$tmpdir" ${extra_args} --hooks="$tmpdir"/none.py --installpath="${cuda_install_parent}"/ CUDA-"${install_cuda_version}".eb
ret=$?
if [ $ret -ne 0 ]; then
fatal_error "CUDA installation failed, please check EasyBuild logs..."
else
echo_green "CUDA installation at ${cuda_install_parent}/software/CUDA/${install_cuda_version} succeeded!"
fi
# clean up tmpdir
rm -rf "${tmpdir}"
fi
80 changes: 75 additions & 5 deletions scripts/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ ANY_ERROR_EXITCODE=1
function fatal_error() {
echo_red "ERROR: $1" >&2
if [[ $# -gt 1 ]]; then
exit $2
exit "$2"
else
exit "${ANY_ERROR_EXITCODE}"
fi
Expand All @@ -32,11 +32,81 @@ function check_exit_code {
fi
}

function check_eessi_initialised() {
if [[ -z "${EESSI_SOFTWARE_PATH}" ]]; then
fatal_error "EESSI has not been initialised!"
else
return 0
fi
}

function float_greater_than() {
# Make sure we have two arguments
if [ $# -ne 2 ]; then
echo_red "greater_than_float requires two (float) numbers" >&2
return $ANY_ERROR_EXITCODE
fi
# Make sure the arguments are numbers
if [[ ! $1 =~ ^[+-]?[0-9]+\.?[0-9]*$ ]]; then
echo_yellow "Input to float_greater_than is not a float, ignoring"
return $ANY_ERROR_EXITCODE
fi
if [[ ! $2 =~ ^[+-]?[0-9]+\.?[0-9]*$ ]]; then
echo_yellow "Input to float_greater_than is not a float, ignoring"
return $ANY_ERROR_EXITCODE
fi
# Now do the actual evaluation
return_code=$ANY_ERROR_EXITCODE
result=$(echo "$1" "$2" | awk '{if ($1 > $2) print "true"}')
if [ "$result" = true ] ; then
return_code=0
fi
return $return_code
}

function check_in_prefix_shell() {
# Make sure EPREFIX is defined
if [[ -z "${EPREFIX}" ]]; then
fatal_error "This script cannot be used without having first defined EPREFIX"
fi
if [[ ! ${SHELL} = ${EPREFIX}/bin/bash ]]; then
fatal_error "Not running in Gentoo Prefix environment, run '${EPREFIX}/startprefix' first!"
fi
}

function create_directory_structure() {
# Ensure we are given a single path argument
if [ $# -ne 1 ]; then
echo_red "Function requires a single (relative or absolute) path argument" >&2
return $ANY_ERROR_EXITCODE
fi
dir_structure="$1"

# Attempt to create the directory structure
error_message=$(mkdir -p "$dir_structure" 2>&1)
return_code=$?
# If it fails be explicit about the error
if [ ${return_code} -ne 0 ]; then
real_dir=$(realpath -m "$dir_structure")
echo_red "Creating ${dir_structure} (real path ${real_dir}) failed with:\n ${error_message}" >&2
else
# If we're creating it, our use case is that we want to be able to write there
# (this is a check in case the directory already existed)
if [ ! -w "${dir_structure}" ]; then
real_dir=$(realpath -m "$dir_structure")
echo_red "You do not have (required) write permissions to ${dir_structure} (real path ${real_dir})!"
return_code=$ANY_ERROR_EXITCODE
fi
fi

return $return_code
}

function get_path_for_tool {
tool_name=$1
tool_envvar_name=$2

which_out=$(which ${tool_name} 2>&1)
which_out=$(which "${tool_name}" 2>&1)
exit_code=$?
if [[ ${exit_code} -eq 0 ]]; then
echo "INFO: found tool ${tool_name} in PATH (${which_out})" >&2
Expand Down Expand Up @@ -68,7 +138,7 @@ function get_host_from_url {
url=$1
re="(http|https)://([^/:]+)"
if [[ $url =~ $re ]]; then
echo ${BASH_REMATCH[2]}
echo "${BASH_REMATCH[2]}"
return 0
else
echo ""
Expand All @@ -80,7 +150,7 @@ function get_port_from_url {
url=$1
re="(http|https)://[^:]+:([0-9]+)"
if [[ $url =~ $re ]]; then
echo ${BASH_REMATCH[2]}
echo "${BASH_REMATCH[2]}"
return 0
else
echo ""
Expand All @@ -90,7 +160,7 @@ function get_port_from_url {

function get_ipv4_address {
hname=$1
hipv4=$(grep ${hname} /etc/hosts | grep -v '^[[:space:]]*#' | cut -d ' ' -f 1)
hipv4=$(grep "${hname}" /etc/hosts | grep -v '^[[:space:]]*#' | cut -d ' ' -f 1)
# TODO try other methods if the one above does not work --> tool that verifies
# what method can be used?
echo "${hipv4}"
Expand Down

0 comments on commit 3db7a53

Please sign in to comment.