From 13dd41882a8e58c6d1c41742068a42af4c6aba1d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 28 Oct 2024 13:11:41 +0100 Subject: [PATCH 1/6] tweak libpaths by adding directory containing libnccl.so.2 --- easybuild/easyblocks/t/tensorflow.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/easybuild/easyblocks/t/tensorflow.py b/easybuild/easyblocks/t/tensorflow.py index c72aec1ea1..b45bc8a781 100644 --- a/easybuild/easyblocks/t/tensorflow.py +++ b/easybuild/easyblocks/t/tensorflow.py @@ -562,6 +562,13 @@ def configure_step(self): tensorrt_root = get_software_root('TensorRT') nccl_root = get_software_root('NCCL') + # add path to libnccl.so.2 directory provided by NCCL when both sysroot + # and RPATH are used (such as in EESSI) + if build_option('sysroot') and self.toolchain.use_rpath: + libpaths = self.system_libs_info[2] + libpaths.append(os.path.join(nccl_root, 'lib')) + self.system_libs_info[2] = libpaths + self._with_cuda = bool(cuda_root) config_env_vars = { From 0f331a81b817ef5b448fc33ce6ef0de7e9fda0fd Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 29 Oct 2024 19:35:55 +0100 Subject: [PATCH 2/6] fix logic to add directory to NCCL libs --- easybuild/easyblocks/t/tensorflow.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/easybuild/easyblocks/t/tensorflow.py b/easybuild/easyblocks/t/tensorflow.py index b45bc8a781..260cd919cc 100644 --- a/easybuild/easyblocks/t/tensorflow.py +++ b/easybuild/easyblocks/t/tensorflow.py @@ -565,9 +565,11 @@ def configure_step(self): # add path to libnccl.so.2 directory provided by NCCL when both sysroot # and RPATH are used (such as in EESSI) if build_option('sysroot') and self.toolchain.use_rpath: - libpaths = self.system_libs_info[2] - libpaths.append(os.path.join(nccl_root, 'lib')) - self.system_libs_info[2] = libpaths + system_libs_info_as_list = list(self.system_libs_info) + new_libpaths = system_libs_info_as_list[2] + new_libpaths.append(os.path.join(nccl_root, 'lib')) + system_libs_info_as_list[2] = new_libpaths + self.system_libs_info = tuple(system_libs_info_as_list) self._with_cuda = bool(cuda_root) From f6a9afd6fc8a9794d7a2879986db5f97acf9663d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 4 Nov 2024 14:32:44 +0100 Subject: [PATCH 3/6] moved code block and simplified processing --- easybuild/easyblocks/t/tensorflow.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/easybuild/easyblocks/t/tensorflow.py b/easybuild/easyblocks/t/tensorflow.py index 260cd919cc..e047456791 100644 --- a/easybuild/easyblocks/t/tensorflow.py +++ b/easybuild/easyblocks/t/tensorflow.py @@ -562,15 +562,6 @@ def configure_step(self): tensorrt_root = get_software_root('TensorRT') nccl_root = get_software_root('NCCL') - # add path to libnccl.so.2 directory provided by NCCL when both sysroot - # and RPATH are used (such as in EESSI) - if build_option('sysroot') and self.toolchain.use_rpath: - system_libs_info_as_list = list(self.system_libs_info) - new_libpaths = system_libs_info_as_list[2] - new_libpaths.append(os.path.join(nccl_root, 'lib')) - system_libs_info_as_list[2] = new_libpaths - self.system_libs_info = tuple(system_libs_info_as_list) - self._with_cuda = bool(cuda_root) config_env_vars = { @@ -710,6 +701,7 @@ def configure_step(self): }) else: raise EasyBuildError("TensorFlow has a strict dependency on cuDNN if CUDA is enabled") + if nccl_root: nccl_version = get_software_version('NCCL') # Ignore the PKG_REVISION identifier if it exists (i.e., report 2.4.6 for 2.4.6-1 or 2.4.6-2) @@ -717,6 +709,14 @@ def configure_step(self): config_env_vars.update({ 'NCCL_INSTALL_PATH': nccl_root, }) + + # add path to libnccl.so.2 directory provided by NCCL when both sysroot + # and RPATH are used (such as in EESSI) + if build_option('sysroot') and self.toolchain.use_rpath: + system_libs_info_as_list = list(self.system_libs_info) + system_libs_info_as_list[2].append(os.path.join(nccl_root, get_software_libdir('NCCL'))) + self.system_libs_info = tuple(system_libs_info_as_list) + else: nccl_version = '1.3' # Use simple downloadable version config_env_vars.update({ From edc9bfe6456d573d6ab0a95f37ea87962965b5a1 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Fri, 8 Nov 2024 08:37:27 +0100 Subject: [PATCH 4/6] use different conditions for when to tweak LIBRARY_PATH --- easybuild/easyblocks/t/tensorflow.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/easybuild/easyblocks/t/tensorflow.py b/easybuild/easyblocks/t/tensorflow.py index e047456791..5d4c42c4a0 100644 --- a/easybuild/easyblocks/t/tensorflow.py +++ b/easybuild/easyblocks/t/tensorflow.py @@ -710,9 +710,11 @@ def configure_step(self): 'NCCL_INSTALL_PATH': nccl_root, }) - # add path to libnccl.so.2 directory provided by NCCL when both sysroot - # and RPATH are used (such as in EESSI) - if build_option('sysroot') and self.toolchain.use_rpath: + # add absolute path to libnccl.so.2 directory provided by NCCL + # when LD_LIBRARY_PATH is filtered and LIBRARY_PATH is not + # filtered, e.g., in an environment such as EESSI + filtered_env_vars = build_option('filter_env_vars') or [] + if 'LD_LIBRARY_PATH' in filtered_env_vars and 'LIBRARY_PATH' not in filtered_env_vars: system_libs_info_as_list = list(self.system_libs_info) system_libs_info_as_list[2].append(os.path.join(nccl_root, get_software_libdir('NCCL'))) self.system_libs_info = tuple(system_libs_info_as_list) From f697d9749186d731ddb005c1c8220954919072d6 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Fri, 8 Nov 2024 10:24:54 +0100 Subject: [PATCH 5/6] move and simplify code to add NCCL-lib path --- easybuild/easyblocks/t/tensorflow.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/easybuild/easyblocks/t/tensorflow.py b/easybuild/easyblocks/t/tensorflow.py index 5d4c42c4a0..d89232a160 100644 --- a/easybuild/easyblocks/t/tensorflow.py +++ b/easybuild/easyblocks/t/tensorflow.py @@ -709,16 +709,6 @@ def configure_step(self): config_env_vars.update({ 'NCCL_INSTALL_PATH': nccl_root, }) - - # add absolute path to libnccl.so.2 directory provided by NCCL - # when LD_LIBRARY_PATH is filtered and LIBRARY_PATH is not - # filtered, e.g., in an environment such as EESSI - filtered_env_vars = build_option('filter_env_vars') or [] - if 'LD_LIBRARY_PATH' in filtered_env_vars and 'LIBRARY_PATH' not in filtered_env_vars: - system_libs_info_as_list = list(self.system_libs_info) - system_libs_info_as_list[2].append(os.path.join(nccl_root, get_software_libdir('NCCL'))) - self.system_libs_info = tuple(system_libs_info_as_list) - else: nccl_version = '1.3' # Use simple downloadable version config_env_vars.update({ @@ -894,6 +884,16 @@ def build_step(self): # Make TF find our modules. LD_LIBRARY_PATH gets automatically added by configure.py cpaths, libpaths = self.system_libs_info[1:] + + # add absolute path to libnccl.so.2 directory provided by NCCL + # when LD_LIBRARY_PATH is filtered and LIBRARY_PATH is not + # filtered, e.g., in an environment such as EESSI + nccl_root = get_software_root('NCCL') + if nccl_root: + filtered_env_vars = build_option('filter_env_vars') or [] + if 'LD_LIBRARY_PATH' in filtered_env_vars and 'LIBRARY_PATH' not in filtered_env_vars: + libpaths.append(os.path.join(nccl_root, get_software_libdir('NCCL'))) + if cpaths: action_env['CPATH'] = ':'.join(cpaths) if libpaths: From 68d89b954b9a67ac157e8030f2ad670ff9374964 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20R=C3=B6blitz?= Date: Fri, 8 Nov 2024 18:49:40 +0100 Subject: [PATCH 6/6] minor wording / line-wrapping change Co-authored-by: Alexander Grund --- easybuild/easyblocks/t/tensorflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/easybuild/easyblocks/t/tensorflow.py b/easybuild/easyblocks/t/tensorflow.py index d89232a160..cd567547d6 100644 --- a/easybuild/easyblocks/t/tensorflow.py +++ b/easybuild/easyblocks/t/tensorflow.py @@ -886,8 +886,8 @@ def build_step(self): cpaths, libpaths = self.system_libs_info[1:] # add absolute path to libnccl.so.2 directory provided by NCCL - # when LD_LIBRARY_PATH is filtered and LIBRARY_PATH is not - # filtered, e.g., in an environment such as EESSI + # when LD_LIBRARY_PATH is filtered and LIBRARY_PATH is not filtered. + # E.g. in an environment such as EESSI. nccl_root = get_software_root('NCCL') if nccl_root: filtered_env_vars = build_option('filter_env_vars') or []