Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Print LmodError when loading GCCcore-12.2.0-based modules on zen4 #841

Draft
wants to merge 6 commits into
base: 2023.06-software.eessi.io
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions eb_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,28 @@
CPU_TARGET_ZEN4 = 'x86_64/amd/zen4'

EESSI_RPATH_OVERRIDE_ATTR = 'orig_rpath_override_dirs'
EESSI_MODULE_ONLY_ATTR = 'orig_module_only'
EESSI_FORCE_ATTR = 'orig_force'

SYSTEM = EASYCONFIG_CONSTANTS['SYSTEM'][0]

EESSI_INSTALLATION_REGEX = r"^/cvmfs/[^/]*.eessi.io/versions/"
HOST_INJECTIONS_LOCATION = "/cvmfs/software.eessi.io/host_injections/"

# Make sure a single environment variable name is used for this throughout the hooks
EESSI_IGNORE_ZEN4_GCC1220_ENVVAR="EESSI_IGNORE_LMOD_ERROR_ZEN4_GCC1220"

def is_gcccore_1220_based(ecname, ecversion, tcname, tcversion):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels brittle, there's no sanity checking on the arguments. If you give the arguments in the wrong order, the function will happily proceed. Don't particularly want you to bend over backwards to check the arguments, I think kwargs with default None would at least be clear and less error prone.

"""Checks if this easyconfig either _is_ or _uses_ a GCCcore-12.2.2 based toolchain"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"""Checks if this easyconfig either _is_ or _uses_ a GCCcore-12.2.2 based toolchain"""
"""Checks if this easyconfig either _is_ or _uses_ a GCCcore-12.2.0 based toolchain"""

gcccore_based_names = ['GCCcore', 'GCC']
foss_based_names = ['gfbf', 'gompi', 'foss']
return (
(tcname in foss_based_names and tcversion == '2022b') or
(tcname in gcccore_based_names and LooseVersion(tcversion) == LooseVersion('12.2.0')) or
(ecname in foss_based_names and ecversion == '2022b') or
(ecname in gcccore_based_names and LooseVersion(ecversion) == LooseVersion('12.2.0'))
)


def get_eessi_envvar(eessi_envvar):
"""Get an EESSI environment variable from the environment"""
Expand Down Expand Up @@ -77,6 +93,11 @@ def parse_hook(ec, *args, **kwargs):
if ec.name in PARSE_HOOKS:
PARSE_HOOKS[ec.name](ec, eprefix)

# Always trigger this one, regardless of ec.name
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if cpu_target == CPU_TARGET_ZEN4:
parse_hook_zen4_module_only(ec, eprefix)

# inject the GPU property (if required)
ec = inject_gpu_property(ec)

Expand Down Expand Up @@ -130,6 +151,11 @@ def pre_prepare_hook(self, *args, **kwargs):
if self.name in PRE_PREPARE_HOOKS:
PRE_PREPARE_HOOKS[self.name](self, *args, **kwargs)

# Always trigger this one, regardless of ec.name
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if cpu_target == CPU_TARGET_ZEN4:
pre_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs)


def post_prepare_hook_gcc_prefixed_ld_rpath_wrapper(self, *args, **kwargs):
"""
Expand Down Expand Up @@ -175,6 +201,11 @@ def post_prepare_hook(self, *args, **kwargs):
if self.name in POST_PREPARE_HOOKS:
POST_PREPARE_HOOKS[self.name](self, *args, **kwargs)

# Always trigger this one, regardless of ec.name
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if cpu_target == CPU_TARGET_ZEN4:
post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs)


def parse_hook_casacore_disable_vectorize(ec, eprefix):
"""
Expand Down Expand Up @@ -353,6 +384,86 @@ def parse_hook_CP2K_remove_deps_for_aarch64(ec, *args, **kwargs):
raise EasyBuildError("CP2K-specific hook triggered for non-CP2K easyconfig?!")


def parse_hook_zen4_module_only(ec, eprefix):
"""
Use --force --module-only if building a foss-2022b-based EasyConfig for Zen4.
This toolchain will not be supported on Zen4, so we will generate a modulefile
and have it print an LmodError.
"""
if is_gcccore_1220_based(ec['name'], ec['version'], ec['toolchain']['name'], ec['toolchain']['version']):
env_varname = EESSI_IGNORE_ZEN4_GCC1220_ENVVAR
# TODO: create a docs page to which we can refer for more info here
# TODO: then update the link to the known issues page to the _specific_ issue
# Need to escape newline character so that the newline character actually ends up in the module file
# (otherwise, it splits the string, and a 2-line string ends up in the modulefile, resulting in syntax error)
errmsg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported for the Zen4 architecture.\\n"
errmsg += "See https://www.eessi.io/docs/known_issues/eessi-2023.06/"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
errmsg += "See https://www.eessi.io/docs/known_issues/eessi-2023.06/"
errmsg += "See https://www.eessi.io/docs/known_issues/eessi-2023.06/#gcc-1220-and-foss-2022b-based-modules-cannot-be-loaded-on-zen4-architecture"

ec['modluafooter'] = 'if (not os.getenv("%s")) then LmodError("%s") end' % (env_varname, errmsg)


def pre_fetch_hook(self, *args, **kwargs):
"""Main pre fetch hook: trigger custom functions based on software name."""
if self.name in PRE_FETCH_HOOKS:
PRE_FETCH_HOOKS[ec.name](self, *args, **kwargs)

# Always trigger this one, regardless of self.name
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if cpu_target == CPU_TARGET_ZEN4:
pre_fetch_hook_zen4_gcccore1220(self, *args, **kwargs)


def pre_fetch_hook_zen4_gcccore1220(self, *args, **kwargs):
"""Use --force --module-only if building a foss-2022b-based EasyConfig for Zen4.
This toolchain will not be supported on Zen4, so we will generate a modulefile
and have it print an LmodError.
"""
if is_gcccore_1220_based(self.name, self.version, self.toolchain.name, self.toolchain.version):
if hasattr(self, EESSI_MODULE_ONLY_ATTR):
raise EasyBuildError("'self' already has attribute %s! Can't use pre_fetch hook.",
EESSI_MODULE_ONLY_ATTR)
setattr(self, EESSI_MODULE_ONLY_ATTR, build_option('module_only'))
update_build_option('module_only', 'True')
print_msg("Updated build option 'module-only' to 'True'")

if hasattr(self, EESSI_FORCE_ATTR):
raise EasyBuildError("'self' already has attribute %s! Can't use pre_fetch hook.",
EESSI_FORCE_ATTR)
setattr(self, EESSI_FORCE_ATTR, build_option('force'))
update_build_option('force', 'True')
print_msg("Updated build option 'force' to 'True'")


def post_module_hook_zen4_gcccore1220(self, *args, **kwargs):
"""Revert changes from pre_fetch_hook_zen4_gcccore1220"""
if is_gcccore_1220_based(self.name, self.version, self.toolchain.name, self.toolchain.version):
if hasattr(self, EESSI_MODULE_ONLY_ATTR):
update_build_option('module_only', getattr(self, EESSI_MODULE_ONLY_ATTR))
print_msg("Restored original build option 'module_only' to %s" % getattr(self, EESSI_MODULE_ONLY_ATTR))
else:
raise EasyBuildError("Cannot restore module_only to it's original value: 'self' is missing attribute %s.",
EESSI_MODULE_ONLY_ATTR)

if hasattr(self, EESSI_FORCE_ATTR):
update_build_option('force', getattr(self, EESSI_FORCE_ATTR))
print_msg("Restored original build option 'force' to %s" % getattr(self, EESSI_FORCE_ATTR))
else:
raise EasyBuildError("Cannot restore force to it's original value: 'self' is misisng attribute %s.",
EESSI_FORCE_ATTR)


# We do this as early as possible - and remove it all the way in the last step hook (post_testcases_hook)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is no longer accurate (I think)

def pre_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs):
ocaisa marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there really any harm in collapsing this into your pre_fetch hook? It would be nice to keep the setting and unsetting unified for our future selves to understand better what would need to be changed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or is the environment variable not persistent?

"""Set environment variable to ignore the LmodError from parse_hook_zen4_module_only during build phase"""
if is_gcccore_1220_based(self.name, self.version, self.toolchain.name, self.toolchain.version):
os.environ[EESSI_IGNORE_ZEN4_GCC1220_ENVVAR] = "1"


def post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs):
ocaisa marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, is there any harm to just adding this to post_module_hook?

"""Unset environment variable to ignore the LmodError from parse_hook_zen4_module_only during build phase"""
if is_gcccore_1220_based(self.name, self.version, self.toolchain.name, self.toolchain.version):
del os.environ[EESSI_IGNORE_ZEN4_GCC1220_ENVVAR]


def pre_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwargs):
"""
Solve issues with compiling or running the tests on both
Expand Down Expand Up @@ -967,6 +1078,17 @@ def inject_gpu_property(ec):
return ec


def post_module_hook(self, *args, **kwargs):
"""Main post module hook: trigger custom functions based on software name."""
if self.name in POST_MODULE_HOOKS:
POST_MODULE_HOOKS[ec.name](self, *args, **kwargs)

# Always trigger this one, regardless of self.name
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if cpu_target == CPU_TARGET_ZEN4:
post_module_hook_zen4_gcccore1220(self, *args, **kwargs)


PARSE_HOOKS = {
'casacore': parse_hook_casacore_disable_vectorize,
'CGAL': parse_hook_cgal_toolchainopts_precise,
Expand All @@ -981,6 +1103,8 @@ def inject_gpu_property(ec):
'UCX': parse_hook_ucx_eprefix,
}

PRE_FETCH_HOOKS = {}

PRE_PREPARE_HOOKS = {
'Highway': pre_prepare_hook_highway_handle_test_compilation_issues,
}
Expand Down Expand Up @@ -1026,3 +1150,5 @@ def inject_gpu_property(ec):
'CUDA': post_postproc_cuda,
'cuDNN': post_postproc_cudnn,
}

POST_MODULE_HOOKS = {}
Loading