Skip to content

Commit

Permalink
Add support for installing CUDA and CUDA-Samples
Browse files Browse the repository at this point in the history
  • Loading branch information
ocaisa committed Nov 10, 2023
1 parent 60ccab9 commit cac7d8c
Show file tree
Hide file tree
Showing 5 changed files with 194 additions and 1 deletion.
2 changes: 1 addition & 1 deletion EESSI-pilot-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ done

echo ">> Creating/updating Lmod cache..."
export LMOD_RC="${EASYBUILD_INSTALLPATH}/.lmod/lmodrc.lua"
if [ ! -f $LMOD_RC ]; then
if [ ! -f $LMOD_RC ] || 'create_lmodrc.py' == $(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' | egrep -v 'known-issues|missing'); then
python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH}
check_exit_code $? "$LMOD_RC created" "Failed to create $LMOD_RC"
fi
Expand Down
108 changes: 108 additions & 0 deletions create_lmodrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,113 @@
}
"""

GPU_LMOD_RC ="""require("strict")
local hook = require("Hook")
local open = io.open
local function read_file(path)
local file = open(path, "rb") -- r read mode and b binary mode
if not file then return nil end
local content = file:read "*a" -- *a or *all reads the whole file
file:close()
return content
end
-- from https://stackoverflow.com/a/40195356
--- Check if a file or directory exists in this path
function exists(file)
local ok, err, code = os.rename(file, file)
if not ok then
if code == 13 then
-- Permission denied, but it exists
return true
end
end
return ok, err
end
local function visible_hook(modT)
local frameStk = require("FrameStk"):singleton()
local mt = frameStk:mt()
local cudaDir = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
local cudaDirExists = exists(cudaDir)
if not cudaDirExists then
local haveGpu = mt:haveProperty(modT.sn,"arch","gpu")
if haveGpu then
modT.isVisible = false
end
end
end
local function cuda_enabled_load_hook(t)
local frameStk = require("FrameStk"):singleton()
local mt = frameStk:mt()
local simpleName = string.match(t.modFullName, "(.-)/")
local eprefix = os.getenv('EESSI_PREFIX') .. "/init/gpu_support"
-- if we try to load CUDA itself, check if the software exists in host_injections
-- otherwise, refuse to load CUDA and print error message
if simpleName == 'CUDA' then
-- get the full host_injections path
local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
-- build final path where the CUDA software should be installed
local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild"
local cudaDirExists = exists(cudaEasyBuildDir)
if not cudaDirExists then
io.stderr:write("You requested to load ",simpleName,"\\n")
io.stderr:write("While the module file exists, the actual software is not shipped with EESSI.\\n")
io.stderr:write("In order to be able to use the CUDA module, please follow the instructions in the\\n")
io.stderr:write("gpu_support folder. Adding the CUDA software can be as easy as:\\n")
io.stderr:write("export INSTALL_CUDA=true && ./add_nvidia_gpu_support.sh\\n")
frameStk:__clear()
end
end
-- when loading CUDA enabled modules check if the necessary matching compatibility libraries are installed
-- otherwise, refuse to load the requested module and print error message
local haveGpu = mt:haveProperty(simpleName,"arch","gpu")
if haveGpu then
local arch = os.getenv("EESSI_CPU_FAMILY") or ""
local cudaVersionFile = "/cvmfs/pilot.eessi-hpc.org/host_injections/nvidia/" .. arch .. "/latest/version.txt"
local cudaDriverExists = exists(cudaVersionFile)
local singularityCudaExists = exists("/.singularity.d/libs/libcuda.so")
if not (cudaDriverExists or singularityCudaExists) then
io.stderr:write("You requested to load ",simpleName,"\\n")
io.stderr:write("which relies on the CUDA runtime environment and its compatibility libraries.\\n")
io.stderr:write("In order to be able to use the module, please follow the instructions in the\\n")
io.stderr:write("gpu_support folder. Installing the needed compatibility libraries can be as easy as:\\n")
io.stderr:write("./add_nvidia_gpu_support.sh\\n")
frameStk:__clear()
else
if cudaDriverExists then
local cudaVersion = read_file(cudaVersionFile)
local cudaVersion_req = os.getenv("EESSICUDAVERSION")
local major, minor, patch = string.match(cudaVersion, "(%d+)%.(%d+)%.(%d+)")
local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)")
local compat_libs_need_update = false
if major < major_req then
compat_libs_need_update = true
elseif major == major_req then
if minor < minor_req then
compat_libs_need_update = true
elseif minor == minor_req then
if patch < patch_req then
compat_libs_need_update = true
end
end
end
if compat_libs_need_update == true then
io.stderr:write("You requested to load CUDA version ",cudaVersion)
io.stderr:write("but the module you want to load requires CUDA version ",cudaVersion_req,".\\n")
io.stderr:write("Please update your CUDA compatibility libraries in order to use ",simpleName,".\\n")
frameStk:__clear()
end
end
end
end
end
hook.register("load", cuda_enabled_load_hook)
hook.register("isVisibleHook", visible_hook)
"""

def error(msg):
sys.stderr.write("ERROR: %s\n" % msg)
Expand All @@ -36,6 +143,7 @@ def error(msg):
'dot_lmod': DOT_LMOD,
'prefix': prefix,
}
lmodrc_txt += '\n' + GPU_LMOD_RC
try:
os.makedirs(os.path.dirname(lmodrc_path), exist_ok=True)
with open(lmodrc_path, 'w') as fp:
Expand Down
82 changes: 82 additions & 0 deletions eb_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ def parse_hook(ec, *args, **kwargs):
if ec.name in PARSE_HOOKS:
PARSE_HOOKS[ec.name](ec, eprefix)

# inject the GPU property (if required)
ec = inject_gpu_property(ec)

def pre_prepare_hook(self, *args, **kwargs):
"""Main pre-prepare hook: trigger custom functions."""
Expand Down Expand Up @@ -209,6 +211,12 @@ def pre_configure_hook(self, *args, **kwargs):
PRE_CONFIGURE_HOOKS[self.name](self, *args, **kwargs)


def post_sanitycheck_hook(self, *args, **kwargs):
"""Main post-sanity-check hook: trigger custom functions based on software name."""
if self.name in POST_SANITYCHECK_HOOKS:
POST_SANITYCHECK_HOOKS[self.name](self, *args, **kwargs)


def pre_configure_hook_openblas_optarch_generic(self, *args, **kwargs):
"""
Pre-configure hook for OpenBLAS: add DYNAMIC_ARCH=1 to build/test/install options when using --optarch=GENERIC
Expand Down Expand Up @@ -328,6 +336,76 @@ def pre_single_extension_isoband(ext, *args, **kwargs):
# cfr. https://github.com/r-lib/isoband/commit/6984e6ce8d977f06e0b5ff73f5d88e5c9a44c027
ext.cfg['preinstallopts'] = "sed -i 's/SIGSTKSZ/32768/g' src/testthat/vendor/catch.h && "

def post_sanitycheck_cuda(self, *args, **kwargs):
"""Delete CUDA files we are not allowed to ship and replace them with a symlink to a possible installation under host_injections."""
print_msg("Replacing CUDA stuff we cannot ship with symlinks...")
# read CUDA EULA
eula_path = os.path.join(self.installdir, "EULA.txt")
tmp_buffer = []
with open(eula_path) as infile:
copy = False
for line in infile:
if line.strip() == "2.6. Attachment A":
copy = True
continue
elif line.strip() == "2.7. Attachment B":
copy = False
continue
elif copy:
tmp_buffer.append(line)
# create whitelist without file extensions, they're not really needed and they only complicate things
whitelist = ['EULA', 'README']
file_extensions = [".so", ".a", ".h", ".bc"]
for tmp in tmp_buffer:
for word in tmp.split():
if any(ext in word for ext in file_extensions):
whitelist.append(word.split(".")[0])
whitelist = list(set(whitelist))
# Do some quick checks for things we should or shouldn't have in the list
if "nvcc" in whitelist:
raise EasyBuildError("Found 'nvcc' in whitelist: %s" % whitelist)
if "libcudart" not in whitelist:
raise EasyBuildError("Did not find 'libcudart' in whitelist: %s" % whitelist)
# iterate over all files in the CUDA path
for root, dirs, files in os.walk(self.installdir):
for filename in files:
# we only really care about real files, i.e. not symlinks
if not os.path.islink(os.path.join(root, filename)):
# check if the current file is part of the whitelist
basename = filename.split(".")[0]
if basename not in whitelist:
# if it is not in the whitelist, delete the file and create a symlink to host_injections
source = os.path.join(root, filename)
target = source.replace("versions", "host_injections")
os.remove(source)
# Using os.symlink requires the existence of the target directory, so we use os.system
system_command="ln -s '%s' '%s'" % (target, source)
if os.system(system_command) != 0:
raise EasyBuildError("Failed to create symbolic link: %s" % system_command)


def inject_gpu_property(ec):
ec_dict = ec.asdict()
# Check if CUDA is in the dependencies, if so add the GPU Lmod tag
if ("CUDA" in [dep[0] for dep in iter(ec_dict["dependencies"])]):
ec.log.info("[parse hook] Injecting gpu as Lmod arch property and envvar with CUDA version")
key = "modluafooter"
value = 'add_property("arch","gpu")'
cuda_version = 0
for dep in iter(ec_dict["dependencies"]):
# Make CUDA a build dependency only (rpathing saves us from link errors)
if "CUDA" in dep[0]:
cuda_version = dep[1]
ec_dict["dependencies"].remove(dep)
ec_dict["builddependencies"].append(dep) if dep not in ec_dict["builddependencies"] else ec_dict["builddependencies"]
value = "\n".join([value, 'setenv("EESSICUDAVERSION","%s")' % cuda_version])
if key in ec_dict:
if not value in ec_dict[key]:
ec[key] = "\n".join([ec_dict[key], value])
else:
ec[key] = value
return ec


PARSE_HOOKS = {
'CGAL': parse_hook_cgal_toolchainopts_precise,
Expand Down Expand Up @@ -358,3 +436,7 @@ def pre_single_extension_isoband(ext, *args, **kwargs):
'isoband': pre_single_extension_isoband,
'testthat': pre_single_extension_testthat,
}

POST_SANITYCHECK_HOOKS = {
'CUDA': post_sanitycheck_cuda,
}
1 change: 1 addition & 0 deletions eessi-2023.06-eb-4.8.1-system.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ easyconfigs:
- EasyBuild-4.8.2.eb:
options:
from-pr: 19105
- CUDA-12.1.1.eb
2 changes: 2 additions & 0 deletions eessi-2023.06-eb-4.8.2-2023a.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
easyconfigs:
- CUDA-Samples-12.1-GCC-12.3.0-CUDA-12.1.1.eb

0 comments on commit cac7d8c

Please sign in to comment.