From 3c9618b162fe08595aad5bc146172319dd767a77 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 24 Dec 2025 17:37:50 +0100 Subject: [PATCH 01/24] Add some initial changes to the hooks to make sure to install with --module-only if this is CUDA-12.6 based but targets CC100 or CC120 --- eb_hooks.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/eb_hooks.py b/eb_hooks.py index 720afb29..663cddcb 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -40,6 +40,9 @@ CPU_TARGET_SAPPHIRE_RAPIDS = 'x86_64/intel/sapphirerapids' CPU_TARGET_ZEN4 = 'x86_64/amd/zen4' +GPU_TARGET_CC100 = 'accel/nvidia/cc100' +GPU_TARGET_CC120 = 'accel/nvidia/cc120' + EESSI_RPATH_OVERRIDE_ATTR = 'orig_rpath_override_dirs' EESSI_MODULE_ONLY_ATTR = 'orig_module_only' EESSI_FORCE_ATTR = 'orig_force' @@ -51,6 +54,8 @@ # Make sure a single environment variable name is used for this throughout the hooks EESSI_IGNORE_ZEN4_GCC1220_ENVVAR="EESSI_IGNORE_LMOD_ERROR_ZEN4_GCC1220" +EESSI_IGNORE_CUDA126_CC1X0_ENVVAR="EESSI_IGNORE_LMOD_ERROR_CUDA126_CC1X0" + STACK_REPROD_SUBDIR = 'reprod' @@ -114,6 +119,25 @@ def is_gcccore_1220_based(**kwargs): ) +def is_cuda_126_or_older_based(**kwargs): +# ecname, ecversion, ecversionsuffix): + """ + Checks if this easyconfig either _is_ or _uses_ a CUDA-12.6 or older. + This function is, for example, used to generate errors in CUDA-12.6 based modules for CC100 and CC120 targets + since anything prior to CUDA 12.8 does not support that. + + :param str ecname: Name of the software specified in the EasyConfig + :param str ecversion: Version of the software specified in the EasyConfig + :param str ecversionsuffix: Versionsuffix specified in the EasyConfig + """ + + # TODO: implement proper function that returns 'true' when this is either an EasyConfig for CUDA-12.6 + # or older OR when it uses CUDA 12.6 or older as a dependency + # I can _probably_ get the dependencies directoy, instead of having to infer the CUDA version from the + # versionsuffix + return True + + def get_eessi_envvar(eessi_envvar): """Get an EESSI environment variable from the environment""" @@ -160,6 +184,11 @@ def parse_hook(ec, *args, **kwargs): if cpu_target == CPU_TARGET_ZEN4: parse_hook_zen4_module_only(ec, eprefix) + # Always trigger, regardless of ec.name + gpu_target = get_eessi_envvar('EESSI_ACCEL_SUBDIR') + if gpu_target == GPU_TARGET_CC100 or gpu_target == GPU_TARGET_CC120: + parse_hook_cuda_module_only(ec, eprefix) + # inject the GPU property (if required) ec = inject_gpu_property(ec) @@ -574,6 +603,23 @@ def parse_hook_zen4_module_only(ec, eprefix): ec['modluafooter'] = 'if (not os.getenv("%s")) then LmodError("%s") end' % (env_varname, errmsg) +def parse_hook_cuda_module_only(ec, eprefix): + """ + Use --force --module-only if building a CUDA-12.X based EasyConfig with X<=6 for CC100 or CC120. + CUDA-12.6 has no support for CC100 and CC120 targets, so we will generate a modulefile + and have it print an LmodError. + """ + if is_cuda_126_or_older_based(ecname=ec['name'], ecversion=ec['version'], ecversionsuffix=ec['versionsuffix']): + env_varname = EESSI_IGNORE_CUDA126_CC1X0_ENVVAR + # TODO: create a docs page to which we can refer for more info here + # TODO: then update the link to the known issues page to the _specific_ issue + # Need to escape the newline character so that the newline character actually ends up in the module file + # (otherwise, it splits the string, and a 2-line string ends up in the modulefile, resulting in syntax error) + errmsg = "EasyConfigs using CUDA 12.6 or older are not supported for the Compute Capabilities 100 and 120.\\n" + errmsg += "See https://gitlab.com/eessi/support/-/issues/210#note_2973460336" # TODO: should be a more user-friendly known issues page + ec['modluafooter'] = 'if (not os.getenv("%s")) then LmodError("%s") end' % (env_varname, errmsg) + + def pre_fetch_hook(self, *args, **kwargs): """Main pre fetch hook: trigger custom functions based on software name.""" if self.name in PRE_FETCH_HOOKS: @@ -625,6 +671,11 @@ def is_unsupported_module(ec): if cpu_target == CPU_TARGET_ZEN4 and is_gcccore_1220_based(ecname=ec.name, ecversion=ec.version, tcname=ec.toolchain.name, tcversion=ec.toolchain.version): return EESSI_IGNORE_ZEN4_GCC1220_ENVVAR + + # TODO: add case for CUDA 12.6 or older and (CC100 or CC120) and return the corresponding 'ignore' variable + # if gpu_target == ... and is_cuda_126_or_older_based(...) + # return ... + return False @@ -715,6 +766,8 @@ def post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs): del os.environ[EESSI_IGNORE_ZEN4_GCC1220_ENVVAR] +# TODO: create pre and post prepare hook to set/unset EESSI_IGNORE_CUDA126_CC1X0_ENVVAR + def pre_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwargs): """ Solve issues with compiling or running the tests on both From d096160273e14a28dc6821cdbdc1665178033c18 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 7 Jan 2026 18:11:42 +0100 Subject: [PATCH 02/24] Make mechanism to generate modules that print LmodErrors for unsupported configurations more generic. Then, also apply this to unsupported combinations of CUDA toolkit versions and requested CUDA compute capabilities. TODO: actually implement a function that checks this compatibility --- eb_hooks.py | 263 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 184 insertions(+), 79 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index 663cddcb..40fe59fe 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -46,6 +46,8 @@ EESSI_RPATH_OVERRIDE_ATTR = 'orig_rpath_override_dirs' EESSI_MODULE_ONLY_ATTR = 'orig_module_only' EESSI_FORCE_ATTR = 'orig_force' +EESSI_SUPPORTED_MODULE_ATTR = 'eessi_supported_module' +EESSI_UNSUPPORTED_MODULE_ATTR = 'eessi_unsupported_module' SYSTEM = EASYCONFIG_CONSTANTS['SYSTEM'][0] @@ -119,6 +121,38 @@ def is_gcccore_1220_based(**kwargs): ) + +def get_cuda_version(ec, check_deps=True, check_builddeps=True): + """ + Returns the CUDA version if this EasyConfig (ec) uses CUDA as a (build)dependency. + Otherwise, returns None + """ + cudaver = None + ec_dict = ec.asdict() + + # At this point, CUDA should be a builddependency due to inject_gpu_property + # changing any CUDA dep to a builddependency. But, for robustness, just check both + deps = [] + if check_deps: + deps = deps + ec_dict['dependencies'][:] + if check_builddeps: + deps = deps + ec_dict['builddependencies'][:] + + # Provide default + for dep in deps: + if dep['name'] == 'CUDA': + return dep['version'] + + +def is_cuda_cc_supported_by_toolkit(cuda_cc, toolkit_version): + """ + Checks if the CUDA Compute Capability passed in cuda_cc is supported by the CUDA toolkit version toolkit_version + Returns True if supported or False if not supported + """ + # TODO: implement actual lookup table + return False + + def is_cuda_126_or_older_based(**kwargs): # ecname, ecversion, ecversionsuffix): """ @@ -179,15 +213,15 @@ def parse_hook(ec, *args, **kwargs): if ec.name in PARSE_HOOKS: PARSE_HOOKS[ec.name](ec, eprefix) - # Always trigger this one, regardless of ec.name - cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') - if cpu_target == CPU_TARGET_ZEN4: - parse_hook_zen4_module_only(ec, eprefix) - - # Always trigger, regardless of ec.name - gpu_target = get_eessi_envvar('EESSI_ACCEL_SUBDIR') - if gpu_target == GPU_TARGET_CC100 or gpu_target == GPU_TARGET_CC120: - parse_hook_cuda_module_only(ec, eprefix) +# # Always trigger this one, regardless of ec.name +# cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') +# if cpu_target == CPU_TARGET_ZEN4: +# parse_hook_zen4_module_only(ec, eprefix) +# +# # Always trigger, regardless of ec.name +# gpu_target = get_eessi_envvar('EESSI_ACCEL_SUBDIR') +# if gpu_target == GPU_TARGET_CC100 or gpu_target == GPU_TARGET_CC120: +# parse_hook_cuda_module_only(ec, eprefix) # inject the GPU property (if required) ec = inject_gpu_property(ec) @@ -317,6 +351,22 @@ def post_ready_hook(self, *args, **kwargs): print_msg(msg % (new_parallel, curr_parallel, session_parallel, self.name, cpu_target), log=self.log) +def pre_prepare_hook_unsupported_modules(self, *args, **kwargs): + """Set env var to ignore specific LmodErrors from dependencies if this module is know to be unsupported""" + if is_unsupported_module(self): + unsup_mod = getattr(self, EESSI_UNSUPPORTED_MODULE_ATTR) + print_msg(f"Setting {unsup_mod.envvar} in to allow loading dependencies that otherwise throw an LmodError") + os.environ[unsup_mod.envvar] = "1" + + +def post_prepare_hook_unsupported_modules(self, *args, **kwargs): + """Unset env var to ignore specific LmodErrors from dependencies if this module is know to be unsupported""" + if is_unsupported_module(self): + unsup_mod = getattr(self, EESSI_UNSUPPORTED_MODULE_ATTR) + print_msg(f"Unsetting {unsup_mod.envvar}") + del os.environ[unsup_mod.envvar] + + def pre_prepare_hook(self, *args, **kwargs): """Main pre-prepare hook: trigger custom functions.""" @@ -347,10 +397,13 @@ def pre_prepare_hook(self, *args, **kwargs): if self.name in PRE_PREPARE_HOOKS: PRE_PREPARE_HOOKS[self.name](self, *args, **kwargs) - # Always trigger this one, regardless of ec.name - cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') - if cpu_target == CPU_TARGET_ZEN4: - pre_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs) + # Always trigger this, regardless of ec.name + pre_prepare_hook_unsupported_modules(self, *args, **kwargs) + +# # Always trigger this one, regardless of ec.name +# cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') +# if cpu_target == CPU_TARGET_ZEN4: +# pre_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs) def post_prepare_hook_gcc_prefixed_ld_rpath_wrapper(self, *args, **kwargs): @@ -416,10 +469,13 @@ def post_prepare_hook(self, *args, **kwargs): if self.name in POST_PREPARE_HOOKS: POST_PREPARE_HOOKS[self.name](self, *args, **kwargs) - # Always trigger this one, regardless of ec.name - cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') - if cpu_target == CPU_TARGET_ZEN4: - post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs) +# # Always trigger this one, regardless of ec.name +# cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') +# if cpu_target == CPU_TARGET_ZEN4: +# post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs) + + # Always trigger this, regardless of ec.name + post_prepare_hook_unsupported_modules(self, *args, **kwargs) def parse_hook_casacore_disable_vectorize(ec, eprefix): @@ -585,39 +641,39 @@ def parse_hook_freeimage_aarch64(ec, *args, **kwargs): print_msg("Changed toolchainopts for %s: %s", ec.name, ec['toolchainopts']) -def parse_hook_zen4_module_only(ec, eprefix): - """ - Use --force --module-only if building a foss-2022b-based EasyConfig for Zen4. - This toolchain will not be supported on Zen4, so we will generate a modulefile - and have it print an LmodError. - """ - if is_gcccore_1220_based(ecname=ec['name'], ecversion=ec['version'], tcname=ec['toolchain']['name'], - tcversion=ec['toolchain']['version']): - env_varname = EESSI_IGNORE_ZEN4_GCC1220_ENVVAR - # TODO: create a docs page to which we can refer for more info here - # TODO: then update the link to the known issues page to the _specific_ issue - # Need to escape the newline character so that the newline character actually ends up in the module file - # (otherwise, it splits the string, and a 2-line string ends up in the modulefile, resulting in syntax error) - errmsg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported for the Zen4 architecture.\\n" - errmsg += "See https://www.eessi.io/docs/known_issues/eessi-/#gcc-1220-and-foss-2022b-based-modules-cannot-be-loaded-on-zen4-architecture" - ec['modluafooter'] = 'if (not os.getenv("%s")) then LmodError("%s") end' % (env_varname, errmsg) - - -def parse_hook_cuda_module_only(ec, eprefix): - """ - Use --force --module-only if building a CUDA-12.X based EasyConfig with X<=6 for CC100 or CC120. - CUDA-12.6 has no support for CC100 and CC120 targets, so we will generate a modulefile - and have it print an LmodError. - """ - if is_cuda_126_or_older_based(ecname=ec['name'], ecversion=ec['version'], ecversionsuffix=ec['versionsuffix']): - env_varname = EESSI_IGNORE_CUDA126_CC1X0_ENVVAR - # TODO: create a docs page to which we can refer for more info here - # TODO: then update the link to the known issues page to the _specific_ issue - # Need to escape the newline character so that the newline character actually ends up in the module file - # (otherwise, it splits the string, and a 2-line string ends up in the modulefile, resulting in syntax error) - errmsg = "EasyConfigs using CUDA 12.6 or older are not supported for the Compute Capabilities 100 and 120.\\n" - errmsg += "See https://gitlab.com/eessi/support/-/issues/210#note_2973460336" # TODO: should be a more user-friendly known issues page - ec['modluafooter'] = 'if (not os.getenv("%s")) then LmodError("%s") end' % (env_varname, errmsg) +# def parse_hook_zen4_module_only(ec, eprefix): +# """ +# Use --force --module-only if building a foss-2022b-based EasyConfig for Zen4. +# This toolchain will not be supported on Zen4, so we will generate a modulefile +# and have it print an LmodError. +# """ +# if is_gcccore_1220_based(ecname=ec['name'], ecversion=ec['version'], tcname=ec['toolchain']['name'], +# tcversion=ec['toolchain']['version']): +# env_varname = EESSI_IGNORE_ZEN4_GCC1220_ENVVAR +# # TODO: create a docs page to which we can refer for more info here +# # TODO: then update the link to the known issues page to the _specific_ issue +# # Need to escape the newline character so that the newline character actually ends up in the module file +# # (otherwise, it splits the string, and a 2-line string ends up in the modulefile, resulting in syntax error) +# errmsg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported for the Zen4 architecture.\\n" +# errmsg += "See https://www.eessi.io/docs/known_issues/eessi-/#gcc-1220-and-foss-2022b-based-modules-cannot-be-loaded-on-zen4-architecture" +# ec['modluafooter'] = 'if (not os.getenv("%s")) then LmodError("%s") end' % (env_varname, errmsg) +# +# +# def parse_hook_cuda_module_only(ec, eprefix): +# """ +# Use --force --module-only if building a CUDA-12.X based EasyConfig with X<=6 for CC100 or CC120. +# CUDA-12.6 has no support for CC100 and CC120 targets, so we will generate a modulefile +# and have it print an LmodError. +# """ +# if is_cuda_126_or_older_based(ecname=ec['name'], ecversion=ec['version'], ecversionsuffix=ec['versionsuffix']): +# env_varname = EESSI_IGNORE_CUDA126_CC1X0_ENVVAR +# # TODO: create a docs page to which we can refer for more info here +# # TODO: then update the link to the known issues page to the _specific_ issue +# # Need to escape the newline character so that the newline character actually ends up in the module file +# # (otherwise, it splits the string, and a 2-line string ends up in the modulefile, resulting in syntax error) +# errmsg = "EasyConfigs using CUDA 12.6 or older are not supported for the Compute Capabilities 100 and 120.\\n" +# errmsg += "See https://gitlab.com/eessi/support/-/issues/210#note_2973460336" # TODO: should be a more user-friendly known issues page +# ec['modluafooter'] = 'if (not os.getenv("%s")) then LmodError("%s") end' % (env_varname, errmsg) def pre_fetch_hook(self, *args, **kwargs): @@ -660,22 +716,68 @@ def pre_fetch_hook_check_installation_path(self, *args, **kwargs): ) -def is_unsupported_module(ec): +from typing import NamedTuple + +class UnsupportedModule(NamedTuple): + """ + Environment variable and error message for an unsupported module. + envvar: the name of the environment variable that needs to be set to ignore the LmodError + that this unsupported module would otherwise generate + errmsg: the actual LmodError message that should be printed + """ + envvar: str + errmsg: str + + +def is_unsupported_module(self): """ Determine if the given module is unsupported in EESSI, and hence if a dummy module needs to be built that just prints an LmodError. If true, this function returns the name of the environment variable that can be used to ignore that particular LmodError, as this is still required to actually build the module itself (EasyBuild will load/test the module). Otherwise, it returns False. """ - cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') - - if cpu_target == CPU_TARGET_ZEN4 and is_gcccore_1220_based(ecname=ec.name, ecversion=ec.version, tcname=ec.toolchain.name, tcversion=ec.toolchain.version): - return EESSI_IGNORE_ZEN4_GCC1220_ENVVAR - # TODO: add case for CUDA 12.6 or older and (CC100 or CC120) and return the corresponding 'ignore' variable - # if gpu_target == ... and is_cuda_126_or_older_based(...) - # return ... + # If this function was already called by an earlier hook, evaluation of whether this is an unsupported module was + # already done. No need to redo it: save time and return early + if hasattr(self, EESSI_SUPPORTED_MODULE_ATTR): + return False + elif hasattr(self, EESSI_UNSUPPORTED_MODULE_ATTR): + return True + # Foss-2022b is not supported on Zen4 + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + if cpu_target == CPU_TARGET_ZEN4 and is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name, tcversion=self.toolchain.version): + errmsg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported for the Zen4 architecture.\\n" + errmsg += "See https://www.eessi.io/docs/known_issues/eessi-/#gcc-1220-and-foss-2022b-based-modules-cannot-be-loaded-on-zen4-architecture" + var=EESSI_IGNORE_ZEN4_GCC1220_ENVVAR + setattr(self, EESSI_UNSUPPORTED_MODULE_ATTR, UnsupportedModule(envvar=var, errmsg=errmsg)) + return True + + # If the CUDA toolkit is a dependency, check that it supports (all) requested CUDA Compute Capabilities + # Otherwise, mark this as unsupported + cudaver = get_cuda_version(ec=self.cfg, check_deps=True, check_builddeps=True) + print(f"[is_unsupported_module] CUDA DEP VERSION: {cudaver}") + if cudaver: + cuda_ccs_string = self.cfg.get_cuda_cc_template_value('cuda_compute_capabilities', required=False) + # cuda_ccs is empty if none are defined + if cuda_ccs_string: + # cuda_ccs is a comma-seperated string. Convert to list for easier handling + cuda_ccs = cuda_ccs_string.split(',') + print(f"[is_unsupported_module] CUDA COMPUTE CAPABILITY: {cuda_ccs}") + # Check if any of the CUDA CCs is unsupported. If so, append the error + if any( + [not is_cuda_cc_supported_by_toolkit(cuda_cc=cuda_cc, toolkit_version=cudaver) for cuda_cc in cuda_ccs] + ): + # Use a normalized variable name for the CUDA ccs, replacing commas by underscores + var=f"EESSI_IGNORE_CUDA_{cudaver}_CC_{cuda_ccs_string.replace(',', '_')}" + errmsg = f"EasyConfigs using CUDA {cudaver} or older are not supported for (all) requested Compute " + errmsg +=f"Capabilities: {cuda_ccs}.\\n" + UnsupportedModule(envvar=var,errmsg=errmsg) + setattr(self, EESSI_UNSUPPORTED_MODULE_ATTR, UnsupportedModule(envvar=var,errmsg=errmsg)) + return True + + # If all the above logic passed, this module is supported + setattr(self, EESSI_SUPPORTED_MODULE_ATTR, True) return False @@ -702,18 +804,21 @@ def pre_fetch_hook_unsupported_modules(self, *args, **kwargs): def pre_module_hook_unsupported_module(self, *args, **kwargs): """Make module load-able during module step""" - ignore_lmoderror_envvar = is_unsupported_module(self) - if ignore_lmoderror_envvar: + if is_unsupported_module(self): + unsup_mod = getattr(self, EESSI_UNSUPPORTED_MODULE_ATTR) if hasattr(self, 'initial_environ'): # Allow the module to be loaded in the module step (which uses initial environment) - print_msg(f"Setting {ignore_lmoderror_envvar} in initial environment") - self.initial_environ[ignore_lmoderror_envvar] = "1" + print_msg(f"Setting {unsup_mod.envvar} in initial environment") + self.initial_environ[unsup_mod.envvar] = "1" + extra_footer='if (not os.getenv("%s")) then LmodError("%s") end' % (unsup_mod.envvar, unsup_mod.errmsg) + # Append extra_footer if a modluafooter already exists. Otherwise, simply assign + self.cfg['modluafooter'] = self.cfg['modluafooter'] + '\n' + extra_footer if self.cfg['modluafooter'] else extra_footer def post_module_hook_unsupported_module(self, *args, **kwargs): """Revert changes from pre_fetch_hook_unsupported_modules""" - ignore_lmoderror_envvar = is_unsupported_module(self) - if ignore_lmoderror_envvar: + if is_unsupported_module(self): + unsup_mod = getattr(self, EESSI_UNSUPPORTED_MODULE_ATTR) if hasattr(self, EESSI_MODULE_ONLY_ATTR): update_build_option('module_only', getattr(self, EESSI_MODULE_ONLY_ATTR)) print_msg("Restored original build option 'module_only' to %s" % getattr(self, EESSI_MODULE_ONLY_ATTR)) @@ -730,9 +835,9 @@ def post_module_hook_unsupported_module(self, *args, **kwargs): # If the variable to allow loading is set, remove it if hasattr(self, 'initial_environ'): - if self.initial_environ.get(ignore_lmoderror_envvar, False): - print_msg(f"Removing {ignore_lmoderror_envvar} in initial environment") - del self.initial_environ[ignore_lmoderror_envvar] + if self.initial_environ.get(unsup_mod.envvar, False): + print_msg(f"Removing {unsup_mod.envvar} in initial environment") + del self.initial_environ[unsup_mod.envvar] def post_easyblock_hook_copy_easybuild_subdir(self, *args, **kwargs): @@ -752,18 +857,18 @@ def post_easyblock_hook_copy_easybuild_subdir(self, *args, **kwargs): # Modules for dependencies are loaded in the prepare step. Thus, that's where we need this variable to be set # so that the modules can be succesfully loaded without printing the error (so that we can create a module # _with_ the warning for the current software being installed) -def pre_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs): - """Set environment variable to ignore the LmodError from parse_hook_zen4_module_only during build phase""" - if is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name, - tcversion=self.toolchain.version): - os.environ[EESSI_IGNORE_ZEN4_GCC1220_ENVVAR] = "1" - - -def post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs): - """Unset environment variable to ignore the LmodError from parse_hook_zen4_module_only during build phase""" - if is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name, - tcversion=self.toolchain.version): - del os.environ[EESSI_IGNORE_ZEN4_GCC1220_ENVVAR] +# def pre_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs): +# """Set environment variable to ignore the LmodError from parse_hook_zen4_module_only during build phase""" +# if is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name, +# tcversion=self.toolchain.version): +# os.environ[EESSI_IGNORE_ZEN4_GCC1220_ENVVAR] = "1" +# +# +# def post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs): +# """Unset environment variable to ignore the LmodError from parse_hook_zen4_module_only during build phase""" +# if is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name, +# tcversion=self.toolchain.version): +# del os.environ[EESSI_IGNORE_ZEN4_GCC1220_ENVVAR] # TODO: create pre and post prepare hook to set/unset EESSI_IGNORE_CUDA126_CC1X0_ENVVAR From b5fa942d346b606b7b3d8df0430ccf32d01b3270 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 7 Jan 2026 18:14:18 +0100 Subject: [PATCH 03/24] Remove some variables that have become obsolete, and make sure get_cuda_version actually returns 'None' if CUDA was not in the deps --- eb_hooks.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index 40fe59fe..ac5fdf0d 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -40,9 +40,6 @@ CPU_TARGET_SAPPHIRE_RAPIDS = 'x86_64/intel/sapphirerapids' CPU_TARGET_ZEN4 = 'x86_64/amd/zen4' -GPU_TARGET_CC100 = 'accel/nvidia/cc100' -GPU_TARGET_CC120 = 'accel/nvidia/cc120' - EESSI_RPATH_OVERRIDE_ATTR = 'orig_rpath_override_dirs' EESSI_MODULE_ONLY_ATTR = 'orig_module_only' EESSI_FORCE_ATTR = 'orig_force' @@ -56,8 +53,6 @@ # Make sure a single environment variable name is used for this throughout the hooks EESSI_IGNORE_ZEN4_GCC1220_ENVVAR="EESSI_IGNORE_LMOD_ERROR_ZEN4_GCC1220" -EESSI_IGNORE_CUDA126_CC1X0_ENVVAR="EESSI_IGNORE_LMOD_ERROR_CUDA126_CC1X0" - STACK_REPROD_SUBDIR = 'reprod' @@ -124,8 +119,8 @@ def is_gcccore_1220_based(**kwargs): def get_cuda_version(ec, check_deps=True, check_builddeps=True): """ - Returns the CUDA version if this EasyConfig (ec) uses CUDA as a (build)dependency. - Otherwise, returns None + Returns the CUDA version that this EasyConfig (ec) uses as a (build)dependency. + If no CUDA is used as (build)dependency, this function returns None. """ cudaver = None ec_dict = ec.asdict() @@ -141,7 +136,9 @@ def get_cuda_version(ec, check_deps=True, check_builddeps=True): # Provide default for dep in deps: if dep['name'] == 'CUDA': - return dep['version'] + cudaver = dep['version'] + + return cudaver def is_cuda_cc_supported_by_toolkit(cuda_cc, toolkit_version): From 74351d423100c6ff14d7e9c8d93ea132c3218264 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 7 Jan 2026 18:16:06 +0100 Subject: [PATCH 04/24] Remove the now obsolete zen4 parse hook - we now inject the lmodfooter in the pre-module hook --- eb_hooks.py | 64 ----------------------------------------------------- 1 file changed, 64 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index ac5fdf0d..4b0acd4c 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -150,25 +150,6 @@ def is_cuda_cc_supported_by_toolkit(cuda_cc, toolkit_version): return False -def is_cuda_126_or_older_based(**kwargs): -# ecname, ecversion, ecversionsuffix): - """ - Checks if this easyconfig either _is_ or _uses_ a CUDA-12.6 or older. - This function is, for example, used to generate errors in CUDA-12.6 based modules for CC100 and CC120 targets - since anything prior to CUDA 12.8 does not support that. - - :param str ecname: Name of the software specified in the EasyConfig - :param str ecversion: Version of the software specified in the EasyConfig - :param str ecversionsuffix: Versionsuffix specified in the EasyConfig - """ - - # TODO: implement proper function that returns 'true' when this is either an EasyConfig for CUDA-12.6 - # or older OR when it uses CUDA 12.6 or older as a dependency - # I can _probably_ get the dependencies directoy, instead of having to infer the CUDA version from the - # versionsuffix - return True - - def get_eessi_envvar(eessi_envvar): """Get an EESSI environment variable from the environment""" @@ -210,16 +191,6 @@ def parse_hook(ec, *args, **kwargs): if ec.name in PARSE_HOOKS: PARSE_HOOKS[ec.name](ec, eprefix) -# # Always trigger this one, regardless of ec.name -# cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') -# if cpu_target == CPU_TARGET_ZEN4: -# parse_hook_zen4_module_only(ec, eprefix) -# -# # Always trigger, regardless of ec.name -# gpu_target = get_eessi_envvar('EESSI_ACCEL_SUBDIR') -# if gpu_target == GPU_TARGET_CC100 or gpu_target == GPU_TARGET_CC120: -# parse_hook_cuda_module_only(ec, eprefix) - # inject the GPU property (if required) ec = inject_gpu_property(ec) @@ -638,41 +609,6 @@ def parse_hook_freeimage_aarch64(ec, *args, **kwargs): print_msg("Changed toolchainopts for %s: %s", ec.name, ec['toolchainopts']) -# def parse_hook_zen4_module_only(ec, eprefix): -# """ -# Use --force --module-only if building a foss-2022b-based EasyConfig for Zen4. -# This toolchain will not be supported on Zen4, so we will generate a modulefile -# and have it print an LmodError. -# """ -# if is_gcccore_1220_based(ecname=ec['name'], ecversion=ec['version'], tcname=ec['toolchain']['name'], -# tcversion=ec['toolchain']['version']): -# env_varname = EESSI_IGNORE_ZEN4_GCC1220_ENVVAR -# # TODO: create a docs page to which we can refer for more info here -# # TODO: then update the link to the known issues page to the _specific_ issue -# # Need to escape the newline character so that the newline character actually ends up in the module file -# # (otherwise, it splits the string, and a 2-line string ends up in the modulefile, resulting in syntax error) -# errmsg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported for the Zen4 architecture.\\n" -# errmsg += "See https://www.eessi.io/docs/known_issues/eessi-/#gcc-1220-and-foss-2022b-based-modules-cannot-be-loaded-on-zen4-architecture" -# ec['modluafooter'] = 'if (not os.getenv("%s")) then LmodError("%s") end' % (env_varname, errmsg) -# -# -# def parse_hook_cuda_module_only(ec, eprefix): -# """ -# Use --force --module-only if building a CUDA-12.X based EasyConfig with X<=6 for CC100 or CC120. -# CUDA-12.6 has no support for CC100 and CC120 targets, so we will generate a modulefile -# and have it print an LmodError. -# """ -# if is_cuda_126_or_older_based(ecname=ec['name'], ecversion=ec['version'], ecversionsuffix=ec['versionsuffix']): -# env_varname = EESSI_IGNORE_CUDA126_CC1X0_ENVVAR -# # TODO: create a docs page to which we can refer for more info here -# # TODO: then update the link to the known issues page to the _specific_ issue -# # Need to escape the newline character so that the newline character actually ends up in the module file -# # (otherwise, it splits the string, and a 2-line string ends up in the modulefile, resulting in syntax error) -# errmsg = "EasyConfigs using CUDA 12.6 or older are not supported for the Compute Capabilities 100 and 120.\\n" -# errmsg += "See https://gitlab.com/eessi/support/-/issues/210#note_2973460336" # TODO: should be a more user-friendly known issues page -# ec['modluafooter'] = 'if (not os.getenv("%s")) then LmodError("%s") end' % (env_varname, errmsg) - - def pre_fetch_hook(self, *args, **kwargs): """Main pre fetch hook: trigger custom functions based on software name.""" if self.name in PRE_FETCH_HOOKS: From 2d2cdff63d869c44a07f0914eba4d45bbd482b82 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 7 Jan 2026 18:17:38 +0100 Subject: [PATCH 05/24] Remove zen4-specific pre and post prepare hooks, as these were replaced by the generic X_prepare_hook_unsupported_modules --- eb_hooks.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index 4b0acd4c..2a2a33ac 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -368,11 +368,6 @@ def pre_prepare_hook(self, *args, **kwargs): # Always trigger this, regardless of ec.name pre_prepare_hook_unsupported_modules(self, *args, **kwargs) -# # Always trigger this one, regardless of ec.name -# cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') -# if cpu_target == CPU_TARGET_ZEN4: -# pre_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs) - def post_prepare_hook_gcc_prefixed_ld_rpath_wrapper(self, *args, **kwargs): """ @@ -437,11 +432,6 @@ def post_prepare_hook(self, *args, **kwargs): if self.name in POST_PREPARE_HOOKS: POST_PREPARE_HOOKS[self.name](self, *args, **kwargs) -# # Always trigger this one, regardless of ec.name -# cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') -# if cpu_target == CPU_TARGET_ZEN4: -# post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs) - # Always trigger this, regardless of ec.name post_prepare_hook_unsupported_modules(self, *args, **kwargs) From e5f5cd226961be05843d749ec4d1416fc8d7d480 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 7 Jan 2026 18:18:47 +0100 Subject: [PATCH 06/24] Remove the prepare_hooks that were specific to zen4, as they were replaced by generic hooks --- eb_hooks.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index 2a2a33ac..6e872c74 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -777,25 +777,6 @@ def post_easyblock_hook_copy_easybuild_subdir(self, *args, **kwargs): copy_dir(app_easybuild_dir, app_reprod_dir) -# Modules for dependencies are loaded in the prepare step. Thus, that's where we need this variable to be set -# so that the modules can be succesfully loaded without printing the error (so that we can create a module -# _with_ the warning for the current software being installed) -# def pre_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs): -# """Set environment variable to ignore the LmodError from parse_hook_zen4_module_only during build phase""" -# if is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name, -# tcversion=self.toolchain.version): -# os.environ[EESSI_IGNORE_ZEN4_GCC1220_ENVVAR] = "1" -# -# -# def post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs): -# """Unset environment variable to ignore the LmodError from parse_hook_zen4_module_only during build phase""" -# if is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name, -# tcversion=self.toolchain.version): -# del os.environ[EESSI_IGNORE_ZEN4_GCC1220_ENVVAR] - - -# TODO: create pre and post prepare hook to set/unset EESSI_IGNORE_CUDA126_CC1X0_ENVVAR - def pre_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwargs): """ Solve issues with compiling or running the tests on both From 0d40193319a150b9397198e2d0f1787d1b13e742 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Thu, 8 Jan 2026 17:23:30 +0100 Subject: [PATCH 07/24] Actually implement is_cuda_cc_supported_by_toolkit. Also, make sure environment variables don't contain invalid characters like commas and periods. Add some warning messages if installing a module that's unsupported. --- eb_hooks.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 7 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index 6e872c74..9e1459d5 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -73,6 +73,40 @@ {'name': 'lfoss', 'version': '2025b'} ) +# Supported compute capabilities by CUDA toolkit version +# Obtained by installing all CUDAs from 12.0.0 to 13.1.0, then using: + +# #!/bin/bash +# +# CUDA_VERS=(12.0.0 12.1.0 12.1.1 12.2.0 12.2.2 12.3.0 12.3.2 12.4.0 12.5.0 12.6.0 12.8.0 12.9.0 12.9.1 13.0.0 13.0.1 13.0.2 13.1.0) +# +# for ver in ${CUDA_VERS[@]}; do +# module load CUDA/${ver} +# ccs=$(nvcc --list-gpu-arch) +# ccs=$(echo ${ccs} | sed "s/ /', /g" | sed "s/compute_/'/g") +# echo " '${ver}': [${ccs}']," +# module unload CUDA +# done + +CUDA_SUPPORTED_CCS = { + '12.0.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'], + '12.1.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'], + '12.1.1': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'], + '12.2.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'], + '12.2.2': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'], + '12.3.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'], + '12.3.2': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'], + '12.4.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'], + '12.5.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'], + '12.6.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'], + '12.8.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90', '100', '101', '120'], + '12.9.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90', '100', '101', '103', '120', '121'], + '12.9.1': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90', '100', '101', '103', '120', '121'], + '13.0.0': ['75', '80', '86', '87', '88', '89', '90', '100', '110', '103', '120', '121'], + '13.0.1': ['75', '80', '86', '87', '88', '89', '90', '100', '110', '103', '120', '121'], + '13.0.2': ['75', '80', '86', '87', '88', '89', '90', '100', '110', '103', '120', '121'], + '13.1.0': ['75', '80', '86', '87', '88', '89', '90', '100', '110', '103', '120', '121'], +} # Ensure that we don't print any messages in --terse mode # Note that --terse was introduced in EB 4.9.1 @@ -120,11 +154,16 @@ def is_gcccore_1220_based(**kwargs): def get_cuda_version(ec, check_deps=True, check_builddeps=True): """ Returns the CUDA version that this EasyConfig (ec) uses as a (build)dependency. + If (ec) is simply CUDA itself, it will return the version. If no CUDA is used as (build)dependency, this function returns None. """ cudaver = None ec_dict = ec.asdict() + # Is this CUDA itself? + if ec.name == 'CUDA': + cudaver = ec.version + # At this point, CUDA should be a builddependency due to inject_gpu_property # changing any CUDA dep to a builddependency. But, for robustness, just check both deps = [] @@ -146,8 +185,18 @@ def is_cuda_cc_supported_by_toolkit(cuda_cc, toolkit_version): Checks if the CUDA Compute Capability passed in cuda_cc is supported by the CUDA toolkit version toolkit_version Returns True if supported or False if not supported """ - # TODO: implement actual lookup table - return False + # Clean cuda_cc of any suffixes like the 'a' in '9.0a' + # The regex expects one or more digits, a dot, one or more digits, and then optionally any number of characters + # It will strip all characters by only return the first capture group (the digits and dot) + cuda_cc = re.sub(r'^(\d+\.\d+)[a-zA-Z]*$', r'\1', cuda_cc) + + # Strip the dot + cuda_cc = cuda_cc.replace('.', '') + + if cuda_cc in CUDA_SUPPORTED_CCS[toolkit_version]: + return True + else: + return False def get_eessi_envvar(eessi_envvar): @@ -323,7 +372,7 @@ def pre_prepare_hook_unsupported_modules(self, *args, **kwargs): """Set env var to ignore specific LmodErrors from dependencies if this module is know to be unsupported""" if is_unsupported_module(self): unsup_mod = getattr(self, EESSI_UNSUPPORTED_MODULE_ATTR) - print_msg(f"Setting {unsup_mod.envvar} in to allow loading dependencies that otherwise throw an LmodError") + print_msg(f"Setting {unsup_mod.envvar} to allow loading dependencies that otherwise throw an LmodError") os.environ[unsup_mod.envvar] = "1" @@ -670,6 +719,9 @@ def is_unsupported_module(self): # Foss-2022b is not supported on Zen4 cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') if cpu_target == CPU_TARGET_ZEN4 and is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name, tcversion=self.toolchain.version): + msg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported on Zen4 architectures. " + msg += "Building with '--module-only --force' and injecting an LmodError into the modulefile." + print_warning(msg) errmsg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported for the Zen4 architecture.\\n" errmsg += "See https://www.eessi.io/docs/known_issues/eessi-/#gcc-1220-and-foss-2022b-based-modules-cannot-be-loaded-on-zen4-architecture" var=EESSI_IGNORE_ZEN4_GCC1220_ENVVAR @@ -679,20 +731,25 @@ def is_unsupported_module(self): # If the CUDA toolkit is a dependency, check that it supports (all) requested CUDA Compute Capabilities # Otherwise, mark this as unsupported cudaver = get_cuda_version(ec=self.cfg, check_deps=True, check_builddeps=True) - print(f"[is_unsupported_module] CUDA DEP VERSION: {cudaver}") if cudaver: + # cuda_ccs_string is e.g. "8.0,9.0" cuda_ccs_string = self.cfg.get_cuda_cc_template_value('cuda_compute_capabilities', required=False) # cuda_ccs is empty if none are defined if cuda_ccs_string: # cuda_ccs is a comma-seperated string. Convert to list for easier handling cuda_ccs = cuda_ccs_string.split(',') - print(f"[is_unsupported_module] CUDA COMPUTE CAPABILITY: {cuda_ccs}") # Check if any of the CUDA CCs is unsupported. If so, append the error if any( [not is_cuda_cc_supported_by_toolkit(cuda_cc=cuda_cc, toolkit_version=cudaver) for cuda_cc in cuda_ccs] ): - # Use a normalized variable name for the CUDA ccs, replacing commas by underscores - var=f"EESSI_IGNORE_CUDA_{cudaver}_CC_{cuda_ccs_string.replace(',', '_')}" + msg = f"Requested a CUDA Compute Capability ({cuda_ccs}) that is not supported by the CUDA " + msg += f"toolkit version ({cudaver}) used by this software. Switching to '--module-only --force' " + msg += "and injectiong an LmodError into the modulefile." + print_warning(msg) + # Use a normalized variable name for the CUDA ccs: strip any suffix, and replace commas + cuda_ccs_string = re.sub(r'[a-zA-Z]', '', cuda_ccs_string).replace(',', '_') + # Also replace periods, those are not officially supported in environment variable names + var=f"EESSI_IGNORE_CUDA_{cudaver}_CC_{cuda_ccs_string}".replace('.', '_') errmsg = f"EasyConfigs using CUDA {cudaver} or older are not supported for (all) requested Compute " errmsg +=f"Capabilities: {cuda_ccs}.\\n" UnsupportedModule(envvar=var,errmsg=errmsg) From 5a2256bdaa03581a509539baeb3ae2e8af462b52 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Thu, 8 Jan 2026 18:11:35 +0100 Subject: [PATCH 08/24] Move import to the top --- eb_hooks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index 9e1459d5..6302c641 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -6,6 +6,7 @@ import json import os import re +from typing import NamedTuple import easybuild.tools.environment as env from easybuild.easyblocks.generic.configuremake import obtain_config_guess @@ -688,8 +689,6 @@ def pre_fetch_hook_check_installation_path(self, *args, **kwargs): ) -from typing import NamedTuple - class UnsupportedModule(NamedTuple): """ Environment variable and error message for an unsupported module. From 0d745e7c1f94a84dc65201dbce53093d62b38e7a Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Thu, 8 Jan 2026 18:19:40 +0100 Subject: [PATCH 09/24] Fix description for 'is_supported_module' as it no longer returns an environment name --- eb_hooks.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index 6302c641..ae8a896e 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -703,9 +703,10 @@ class UnsupportedModule(NamedTuple): def is_unsupported_module(self): """ Determine if the given module is unsupported in EESSI, and hence if a dummy module needs to be built that just prints an LmodError. - If true, this function returns the name of the environment variable that can be used to ignore that particular LmodError, - as this is still required to actually build the module itself (EasyBuild will load/test the module). - Otherwise, it returns False. + If a module is unsupported, this function will set the EESSI_UNSUPPORTED_MODULE_ATTR attribute on `self`, + and assign an `UnsupportedModule` NamedTuple to it. + If a module is supported, this function will set the EESSI_SUPPORTED_MODULE_ATTR attribut on `self` + (and set it to True). """ # If this function was already called by an earlier hook, evaluation of whether this is an unsupported module was From a6d52c6a5a1159d72da4b2b11ae3e8232489b77b Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 12 Jan 2026 13:56:42 +0100 Subject: [PATCH 10/24] Add hook to handle stripping suffix from --cuda-compute-capabilities when needed for specific cuDNN versions --- eb_hooks.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/eb_hooks.py b/eb_hooks.py index ae8a896e..2465d230 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -834,6 +834,32 @@ def post_easyblock_hook_copy_easybuild_subdir(self, *args, **kwargs): copy_dir(app_easybuild_dir, app_reprod_dir) +def pre_prepare_hook_cudnn(self, *args, **kwargs): + """ + cuDNN is a binary install, that doesn't always have the device code for the suffixed CUDA + Compute Capabilities such as 9.0a, 10.0f, 12.0f etc. This hooks strips the suffices for + cuDNN versions that don't have suffix-specific device code embedded in (all) their files, + as retaining the suffixes would lead to the EasyBuild CUDA sanity check failing. + """ + + if self.name == 'cuDNN': + # cuDNN 9.5.0.50 doesn't have support for 9.0a in all binaries + if self.version == "9.5.0.50": + cuda_cc = build_option('cuda_compute_capabilities') + if cuda_cc and '9.0a' in cuda_cc: + updated_cuda_cc = [v.replace('9.0a', '9.0') for v in cuda_cc] + update_build_option('cuda_compute_capabilities', updated_cuda_cc) + # cuDNN 9.10.1.4 doesn't have support for 10.0f and 12.0f in all binaries + elif self.version == "9.10.1.4": + cuda_cc = build_option('cuda_compute_capabilities') + if cuda_cc and '10.0f' in cuda_cc: + updated_cuda_cc = [v.replace('10.0f', '10.0') for v in cuda_cc] + update_build_option('cuda_compute_capabilities', updated_cuda_cc) + elif cuda_cc and '12.0f' in cuda_cc: + updated_cuda_cc = [v.replace('12.0f', '12.0') for v in cuda_cc] + update_build_option('cuda_compute_capabilities', updated_cuda_cc) + + def pre_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwargs): """ Solve issues with compiling or running the tests on both @@ -1759,6 +1785,7 @@ def post_easyblock_hook(self, *args, **kwargs): PRE_FETCH_HOOKS = {} PRE_PREPARE_HOOKS = { + 'cuDNN': pre_prepare_hook_cudnn, 'Highway': pre_prepare_hook_highway_handle_test_compilation_issues, 'LLVM': pre_prepare_hook_llvm_a64fx, 'Rust': pre_prepare_hook_llvm_a64fx, From e69b665fd4efefd477b57393205bc758c64b42f7 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 12 Jan 2026 16:33:36 +0100 Subject: [PATCH 11/24] Use eb_hooks.py from the cloned software-layer-scripts, rather than the /cvmfs mount. This makes it easier to update the hooks and immedately test those changes from a software-layer PR --- EESSI-install-software.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index e4834545..1136e17f 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -318,6 +318,9 @@ echo "DEBUG: before loading EESSI-extend // EASYBUILD_INSTALLPATH='${EASYBUILD_I source $TOPDIR/load_eessi_extend_module.sh ${EESSI_VERSION} echo "DEBUG: after loading EESSI-extend // EASYBUILD_INSTALLPATH='${EASYBUILD_INSTALLPATH}'" +# Set EASYBUILD_HOOKS to use the hooks from the software-layer-scripts, to make it easier to use updated hooks +export EASYBUILD_HOOKS=$TOPDIR/eb_hooks.py + # Install full CUDA SDK and cu* libraries in host_injections # (This is done *before* configuring EasyBuild as it may rely on an older EB version) # Hardcode this for now, see if it works From 04a7f4e3746c1198e706803350118955b6cb5d06 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 12 Jan 2026 17:12:25 +0100 Subject: [PATCH 12/24] Add more debugging output --- EESSI-install-software.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 1136e17f..9ae9e953 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -320,6 +320,7 @@ echo "DEBUG: after loading EESSI-extend // EASYBUILD_INSTALLPATH='${EASYBUILD_I # Set EASYBUILD_HOOKS to use the hooks from the software-layer-scripts, to make it easier to use updated hooks export EASYBUILD_HOOKS=$TOPDIR/eb_hooks.py +echo "DEBUG: set EASYBUILD_HOOKS=${EASYBUILD_HOOKS}" # Install full CUDA SDK and cu* libraries in host_injections # (This is done *before* configuring EasyBuild as it may rely on an older EB version) @@ -399,6 +400,7 @@ else # load EasyBuild module (will be installed if it's not available yet) source ${TOPDIR}/load_easybuild_module.sh ${eb_version} + echo "DEBUG: right before show-config, EASYBUILD_HOOKS=${EASYBUILD_HOOKS}" ${EB} --show-config echo_green "All set, let's start installing some software with EasyBuild v${eb_version} in ${EASYBUILD_INSTALLPATH}..." From 791dcdd9919001d3a802eebb4faaa137022f7601 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 12 Jan 2026 17:17:13 +0100 Subject: [PATCH 13/24] Set later --- EESSI-install-software.sh | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 9ae9e953..b596e00c 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -318,10 +318,6 @@ echo "DEBUG: before loading EESSI-extend // EASYBUILD_INSTALLPATH='${EASYBUILD_I source $TOPDIR/load_eessi_extend_module.sh ${EESSI_VERSION} echo "DEBUG: after loading EESSI-extend // EASYBUILD_INSTALLPATH='${EASYBUILD_INSTALLPATH}'" -# Set EASYBUILD_HOOKS to use the hooks from the software-layer-scripts, to make it easier to use updated hooks -export EASYBUILD_HOOKS=$TOPDIR/eb_hooks.py -echo "DEBUG: set EASYBUILD_HOOKS=${EASYBUILD_HOOKS}" - # Install full CUDA SDK and cu* libraries in host_injections # (This is done *before* configuring EasyBuild as it may rely on an older EB version) # Hardcode this for now, see if it works @@ -400,7 +396,10 @@ else # load EasyBuild module (will be installed if it's not available yet) source ${TOPDIR}/load_easybuild_module.sh ${eb_version} - echo "DEBUG: right before show-config, EASYBUILD_HOOKS=${EASYBUILD_HOOKS}" + # Set EASYBUILD_HOOKS to use the hooks from the software-layer-scripts, to make it easier to use updated hooks + export EASYBUILD_HOOKS=$TOPDIR/eb_hooks.py + echo "DEBUG: set EASYBUILD_HOOKS=${EASYBUILD_HOOKS}" + ${EB} --show-config echo_green "All set, let's start installing some software with EasyBuild v${eb_version} in ${EASYBUILD_INSTALLPATH}..." From c68c4956db6b80f64f0c9c040821880d2acd1819 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 12 Jan 2026 17:26:24 +0100 Subject: [PATCH 14/24] Clarify messageing --- EESSI-install-software.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index b596e00c..db7d9efe 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -398,7 +398,7 @@ else # Set EASYBUILD_HOOKS to use the hooks from the software-layer-scripts, to make it easier to use updated hooks export EASYBUILD_HOOKS=$TOPDIR/eb_hooks.py - echo "DEBUG: set EASYBUILD_HOOKS=${EASYBUILD_HOOKS}" + echo "Overwrite EASYBUILD_HOOKS to use the eb_hooks from software-layer-scripts: EASYBUILD_HOOKS=${EASYBUILD_HOOKS}" ${EB} --show-config From 8292fa3fde0ba46fb1d909f40ae212c0e4296041 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 12 Jan 2026 18:02:02 +0100 Subject: [PATCH 15/24] Use EB 5.2.0 instead to install the CUDA host injections, so that there is support for the 'f' suffix in e.g. CC 10.0f --- ...ctions.yml => eessi-2025.06-eb-5.2.0-CUDA-host-injections.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/gpu_support/nvidia/easystacks/2025.06/{eessi-2025.06-eb-5.1.2-CUDA-host-injections.yml => eessi-2025.06-eb-5.2.0-CUDA-host-injections.yml} (100%) diff --git a/scripts/gpu_support/nvidia/easystacks/2025.06/eessi-2025.06-eb-5.1.2-CUDA-host-injections.yml b/scripts/gpu_support/nvidia/easystacks/2025.06/eessi-2025.06-eb-5.2.0-CUDA-host-injections.yml similarity index 100% rename from scripts/gpu_support/nvidia/easystacks/2025.06/eessi-2025.06-eb-5.1.2-CUDA-host-injections.yml rename to scripts/gpu_support/nvidia/easystacks/2025.06/eessi-2025.06-eb-5.2.0-CUDA-host-injections.yml From 5a6b43c51991ad070c2ea256aa507755d20c195e Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 13 Jan 2026 10:54:50 +0100 Subject: [PATCH 16/24] Use the GPU install scripts from the cloned software-layer-scripts so that updates are immediately taken into account --- EESSI-install-software.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index db7d9efe..a4c3af36 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -327,7 +327,7 @@ echo "Going to install full CUDA SDK and cu* libraries under host_injections if temp_install_storage=${TMPDIR}/temp_install_storage mkdir -p ${temp_install_storage} if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then - ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \ + ${TOPDIR}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \ -t ${temp_install_storage} \ --accept-cuda-eula \ --accept-cudnn-eula @@ -338,7 +338,7 @@ fi # Install NVIDIA drivers in host_injections (if they exist) if nvidia_gpu_available; then echo "Installing NVIDIA drivers for use in prefix shell..." - ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh + ${TOPDIR}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh fi From 24228048da52569255e86bb7fd0faa552f0cd4ad Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com> Date: Tue, 13 Jan 2026 12:38:15 +0100 Subject: [PATCH 17/24] Apply suggestion from @casparvl --- eb_hooks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 2465d230..ce006eea 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -752,7 +752,6 @@ def is_unsupported_module(self): var=f"EESSI_IGNORE_CUDA_{cudaver}_CC_{cuda_ccs_string}".replace('.', '_') errmsg = f"EasyConfigs using CUDA {cudaver} or older are not supported for (all) requested Compute " errmsg +=f"Capabilities: {cuda_ccs}.\\n" - UnsupportedModule(envvar=var,errmsg=errmsg) setattr(self, EESSI_UNSUPPORTED_MODULE_ATTR, UnsupportedModule(envvar=var,errmsg=errmsg)) return True From 26425be9ffdac6047dbc485ba5e02ff47d56d414 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 13 Jan 2026 13:16:02 +0100 Subject: [PATCH 18/24] Revert changes that were now split off to https://github.com/EESSI/software-layer-scripts/pull/147 --- EESSI-install-software.sh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index a4c3af36..e4834545 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -327,7 +327,7 @@ echo "Going to install full CUDA SDK and cu* libraries under host_injections if temp_install_storage=${TMPDIR}/temp_install_storage mkdir -p ${temp_install_storage} if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then - ${TOPDIR}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \ + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \ -t ${temp_install_storage} \ --accept-cuda-eula \ --accept-cudnn-eula @@ -338,7 +338,7 @@ fi # Install NVIDIA drivers in host_injections (if they exist) if nvidia_gpu_available; then echo "Installing NVIDIA drivers for use in prefix shell..." - ${TOPDIR}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh fi @@ -396,10 +396,6 @@ else # load EasyBuild module (will be installed if it's not available yet) source ${TOPDIR}/load_easybuild_module.sh ${eb_version} - # Set EASYBUILD_HOOKS to use the hooks from the software-layer-scripts, to make it easier to use updated hooks - export EASYBUILD_HOOKS=$TOPDIR/eb_hooks.py - echo "Overwrite EASYBUILD_HOOKS to use the eb_hooks from software-layer-scripts: EASYBUILD_HOOKS=${EASYBUILD_HOOKS}" - ${EB} --show-config echo_green "All set, let's start installing some software with EasyBuild v${eb_version} in ${EASYBUILD_INSTALLPATH}..." From ffed74ab436a9fe6603acf9f90a446d9ad2e2801 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com> Date: Tue, 13 Jan 2026 14:36:16 +0100 Subject: [PATCH 19/24] Apply suggestions from code review --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index ce006eea..941fada4 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -723,7 +723,7 @@ def is_unsupported_module(self): msg += "Building with '--module-only --force' and injecting an LmodError into the modulefile." print_warning(msg) errmsg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported for the Zen4 architecture.\\n" - errmsg += "See https://www.eessi.io/docs/known_issues/eessi-/#gcc-1220-and-foss-2022b-based-modules-cannot-be-loaded-on-zen4-architecture" + errmsg += "See https://www.eessi.io/docs/known_issues/eessi-2023.06/#gcc-1220-and-foss-2022b-based-modules-cannot-be-loaded-on-zen4-architecture" var=EESSI_IGNORE_ZEN4_GCC1220_ENVVAR setattr(self, EESSI_UNSUPPORTED_MODULE_ATTR, UnsupportedModule(envvar=var, errmsg=errmsg)) return True From d1ccc89a98dea87be22ee8acd46fedf2904cb7ed Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 13 Jan 2026 14:45:32 +0100 Subject: [PATCH 20/24] Raise EB error if the toolkit version isn't defined (yet) in the lookup table --- eb_hooks.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/eb_hooks.py b/eb_hooks.py index 941fada4..daef123a 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -194,6 +194,15 @@ def is_cuda_cc_supported_by_toolkit(cuda_cc, toolkit_version): # Strip the dot cuda_cc = cuda_cc.replace('.', '') + # Raise informative error if `toolkit_version` is not yet covered in CUDA_SUPPORTED_CCS + if not toolkit_version in CUDA_SUPPORTED_CCS: + msg = f"Trying to determine compatibility between requested CUDA Compute Capability ({cuda_cc})" + msg +=f" and CUDA toolkit version {toolkit_version} failed: support for CUDA Compute Capabilities" + msg +=" not known for this toolkit version. Please install the toolkit version manually, run" + msg +=" 'nvcc --list-gpu-arch' to determine he supported CUDA Compute Capabilities, and then add these" + msg +=f" to the CUDA_SUPPORTED_CCS table in the EasyBuild hooks ({build_option('hooks')})" + raise EasyBuildError(msg) + if cuda_cc in CUDA_SUPPORTED_CCS[toolkit_version]: return True else: From e719983bff1a6d02f23dc175b156f3d202fa1d3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bob=20Dr=C3=B6ge?= Date: Tue, 13 Jan 2026 14:50:45 +0100 Subject: [PATCH 21/24] Apply suggestion from @casparvl Co-authored-by: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com> --- eb_hooks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index daef123a..a1d04ae9 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -173,7 +173,6 @@ def get_cuda_version(ec, check_deps=True, check_builddeps=True): if check_builddeps: deps = deps + ec_dict['builddependencies'][:] - # Provide default for dep in deps: if dep['name'] == 'CUDA': cudaver = dep['version'] From 15324ec17fe9e270cbdc48d953c936dc8d353bad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bob=20Dr=C3=B6ge?= Date: Tue, 13 Jan 2026 14:51:06 +0100 Subject: [PATCH 22/24] Apply suggestion from @casparvl Co-authored-by: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com> --- eb_hooks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eb_hooks.py b/eb_hooks.py index a1d04ae9..f11efc6b 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -158,6 +158,7 @@ def get_cuda_version(ec, check_deps=True, check_builddeps=True): If (ec) is simply CUDA itself, it will return the version. If no CUDA is used as (build)dependency, this function returns None. """ + # Provide default cudaver = None ec_dict = ec.asdict() From 079e04c4de7907291465def8eb38b586dd1181bb Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 13 Jan 2026 15:06:51 +0100 Subject: [PATCH 23/24] Create env var to skip CC toolkit compatibility check completely --- eb_hooks.py | 62 ++++++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index daef123a..05a54433 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -196,11 +196,13 @@ def is_cuda_cc_supported_by_toolkit(cuda_cc, toolkit_version): # Raise informative error if `toolkit_version` is not yet covered in CUDA_SUPPORTED_CCS if not toolkit_version in CUDA_SUPPORTED_CCS: - msg = f"Trying to determine compatibility between requested CUDA Compute Capability ({cuda_cc})" - msg +=f" and CUDA toolkit version {toolkit_version} failed: support for CUDA Compute Capabilities" - msg +=" not known for this toolkit version. Please install the toolkit version manually, run" - msg +=" 'nvcc --list-gpu-arch' to determine he supported CUDA Compute Capabilities, and then add these" - msg +=f" to the CUDA_SUPPORTED_CCS table in the EasyBuild hooks ({build_option('hooks')})" + msg = f"Trying to determine compatibility between requested CUDA Compute Capability ({cuda_cc}) " + msg +=f"and CUDA toolkit version {toolkit_version} failed: support for CUDA Compute Capabilities " + msg +="not known for this toolkit version. Please install the toolkit version manually, run " + msg +="'nvcc --list-gpu-arch' to determine he supported CUDA Compute Capabilities, and then add these " + msg +=f"to the CUDA_SUPPORTED_CCS table in the EasyBuild hooks ({build_option('hooks')}). " + msg += "Alternatively, you can skip the compatiblity check alltogether by setting the " + msg += "EESSI_OVERRIDE_CUDA_CC_TOOLKIT_CHECK environment variable." raise EasyBuildError(msg) if cuda_cc in CUDA_SUPPORTED_CCS[toolkit_version]: @@ -739,30 +741,32 @@ def is_unsupported_module(self): # If the CUDA toolkit is a dependency, check that it supports (all) requested CUDA Compute Capabilities # Otherwise, mark this as unsupported - cudaver = get_cuda_version(ec=self.cfg, check_deps=True, check_builddeps=True) - if cudaver: - # cuda_ccs_string is e.g. "8.0,9.0" - cuda_ccs_string = self.cfg.get_cuda_cc_template_value('cuda_compute_capabilities', required=False) - # cuda_ccs is empty if none are defined - if cuda_ccs_string: - # cuda_ccs is a comma-seperated string. Convert to list for easier handling - cuda_ccs = cuda_ccs_string.split(',') - # Check if any of the CUDA CCs is unsupported. If so, append the error - if any( - [not is_cuda_cc_supported_by_toolkit(cuda_cc=cuda_cc, toolkit_version=cudaver) for cuda_cc in cuda_ccs] - ): - msg = f"Requested a CUDA Compute Capability ({cuda_ccs}) that is not supported by the CUDA " - msg += f"toolkit version ({cudaver}) used by this software. Switching to '--module-only --force' " - msg += "and injectiong an LmodError into the modulefile." - print_warning(msg) - # Use a normalized variable name for the CUDA ccs: strip any suffix, and replace commas - cuda_ccs_string = re.sub(r'[a-zA-Z]', '', cuda_ccs_string).replace(',', '_') - # Also replace periods, those are not officially supported in environment variable names - var=f"EESSI_IGNORE_CUDA_{cudaver}_CC_{cuda_ccs_string}".replace('.', '_') - errmsg = f"EasyConfigs using CUDA {cudaver} or older are not supported for (all) requested Compute " - errmsg +=f"Capabilities: {cuda_ccs}.\\n" - setattr(self, EESSI_UNSUPPORTED_MODULE_ATTR, UnsupportedModule(envvar=var,errmsg=errmsg)) - return True + if not os.getenv("EESSI_OVERRIDE_CUDA_CC_TOOLKIT_CHECK"): + cudaver = get_cuda_version(ec=self.cfg, check_deps=True, check_builddeps=True) + if cudaver: + # cuda_ccs_string is e.g. "8.0,9.0" + cuda_ccs_string = self.cfg.get_cuda_cc_template_value('cuda_compute_capabilities', required=False) + # cuda_ccs is empty if none are defined + if cuda_ccs_string: + # cuda_ccs is a comma-seperated string. Convert to list for easier handling + cuda_ccs = cuda_ccs_string.split(',') + # Check if any of the CUDA CCs is unsupported. If so, append the error + if any( + [not is_cuda_cc_supported_by_toolkit(cuda_cc=cuda_cc, toolkit_version=cudaver) for cuda_cc in cuda_ccs] + ): + msg = f"Requested a CUDA Compute Capability ({cuda_ccs}) that is not supported by the CUDA " + msg += f"toolkit version ({cudaver}) used by this software. Switching to '--module-only --force' " + msg += "and injectiong an LmodError into the modulefile. You can override this behaviour by " + msg += "setting the EESSI_OVERRIDE_CUDA_CC_TOOLKIT_CHECK environment variable." + print_warning(msg) + # Use a normalized variable name for the CUDA ccs: strip any suffix, and replace commas + cuda_ccs_string = re.sub(r'[a-zA-Z]', '', cuda_ccs_string).replace(',', '_') + # Also replace periods, those are not officially supported in environment variable names + var=f"EESSI_IGNORE_CUDA_{cudaver}_CC_{cuda_ccs_string}".replace('.', '_') + errmsg = f"EasyConfigs using CUDA {cudaver} or older are not supported for (all) requested Compute " + errmsg +=f"Capabilities: {cuda_ccs}.\\n" + setattr(self, EESSI_UNSUPPORTED_MODULE_ATTR, UnsupportedModule(envvar=var,errmsg=errmsg)) + return True # If all the above logic passed, this module is supported setattr(self, EESSI_SUPPORTED_MODULE_ATTR, True) From 451931c1b0c976de98238ba777937b978eea398f Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com> Date: Tue, 13 Jan 2026 15:13:32 +0100 Subject: [PATCH 24/24] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bob Dröge --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 57193977..54470ea4 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -201,7 +201,7 @@ def is_cuda_cc_supported_by_toolkit(cuda_cc, toolkit_version): msg +="not known for this toolkit version. Please install the toolkit version manually, run " msg +="'nvcc --list-gpu-arch' to determine he supported CUDA Compute Capabilities, and then add these " msg +=f"to the CUDA_SUPPORTED_CCS table in the EasyBuild hooks ({build_option('hooks')}). " - msg += "Alternatively, you can skip the compatiblity check alltogether by setting the " + msg += "Alternatively, you can skip the compatibility check altogether by setting the " msg += "EESSI_OVERRIDE_CUDA_CC_TOOLKIT_CHECK environment variable." raise EasyBuildError(msg)