diff --git a/.gitignore b/.gitignore index 4f74949f..59a26d7a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ __pycache__/ *.egg-info/ build/ + +# Vim +*.sw[op] +*~ diff --git a/CI/hortense_local_ss/ci_config.sh b/CI/hortense_local_ss/ci_config.sh index 24667b2c..4e01cedc 100644 --- a/CI/hortense_local_ss/ci_config.sh +++ b/CI/hortense_local_ss/ci_config.sh @@ -1,14 +1,16 @@ # Configurable items -if [ -z "${TEST_SUITE_PARTITION}" ]; then - echo "You have to indicate on which partition the test-suite will run on vsc-Hortense" - echo "This environment variable needs to be set TEST_SUITE_PARTITION=cpu_rome_256gb" - echo "Can only set to 'cpu_rome_256gb' untill new functionality of 'sched_options' is part of" - echo "the ReFrame release https://github.com/reframe-hpc/reframe/issues/2970" - exit 1 +if [[ "$TEST_SUITE_PARTITION" == "GPU" ]]; then + module --force purge + if [ -z "${SET_LOCAL_MODULE_ENV}"]; then + export SET_LOCAL_MODULE_ENV=True + fi + if [ -z "${LOCAL_MODULES}"]; then + export LOCAL_MODULES="cluster/dodrio/gpu_rome_a100" + fi fi if [ -z "${REFRAME_ARGS}" ]; then - REFRAME_ARGS="--tag CI --tag 1_node|2_nodes --system hortense:${TEST_SUITE_PARTITION}" + REFRAME_ARGS="--tag CI --tag 1_node|2_nodes" fi if [ -z "${USE_EESSI_SOFTWARE_STACK}" ]; then @@ -21,13 +23,4 @@ fi if [ -z "${UNSET_MODULEPATH}" ]; then export UNSET_MODULEPATH=False - module --force purge -fi - -if [ -z "${SET_LOCAL_MODULE_ENV}"]; then - export SET_LOCAL_MODULE_ENV=True -fi - -if [ -z "${LOCAL_MODULES}"]; then - export LOCAL_MODULES="cluster/dodrio/${TEST_SUITE_PARTITION}" fi diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 8ad36b75..df09e9eb 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -4,8 +4,7 @@ # authors: Samuel Moors (VUB-HPC), Kenneth Hoste (HPC-UGent), Lara Peeters (HPC-UGent) # Use generated topology file by ReFrame for CPU partitions -# Cannot use autodetection untill new functionality of `sched_options` is part of -# the ReFrame release https://github.com/reframe-hpc/reframe/issues/2970 +# `shed_access_in_submit` does not work with setting `'remote_detect': True,` # Instructions on generating topology file # ``` @@ -64,6 +63,9 @@ def command(self, job): 'scheduler': 'slurm', 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=cpu_rome'], + 'sched_options': { + 'shed_access_in_submit': True, + }, 'environs': ['default'], 'descr': 'CPU nodes (AMD Rome, 256GiB RAM)', 'max_jobs': 20, @@ -89,6 +91,9 @@ def command(self, job): 'scheduler': 'slurm', 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=cpu_rome_512'], + 'sched_options': { + 'shed_access_in_submit': True, + }, 'environs': ['default'], 'descr': 'CPU nodes (AMD Rome, 512GiB RAM)', 'max_jobs': 20, @@ -114,6 +119,9 @@ def command(self, job): 'scheduler': 'slurm', 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=cpu_milan'], + 'sched_options': { + 'shed_access_in_submit': True, + }, 'environs': ['default'], 'descr': 'CPU nodes (AMD Milan, 256GiB RAM)', 'max_jobs': 20, @@ -139,6 +147,9 @@ def command(self, job): 'scheduler': 'slurm', 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=gpu_rome_a100_40'], + 'sched_options': { + 'shed_access_in_submit': True, + }, 'environs': ['default'], 'descr': 'GPU nodes (A100 40GB)', 'max_jobs': 20, @@ -176,6 +187,9 @@ def command(self, job): 'scheduler': 'slurm', 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], 'access': hortense_access + ['--partition=gpu_rome_a100_80'], + 'sched_options': { + 'shed_access_in_submit': True, + }, 'environs': ['default'], 'descr': 'GPU nodes (A100 80GB)', 'max_jobs': 20, diff --git a/eessi/testsuite/eessi_mixin.py b/eessi/testsuite/eessi_mixin.py index cde5fbf0..3dd5b224 100644 --- a/eessi/testsuite/eessi_mixin.py +++ b/eessi/testsuite/eessi_mixin.py @@ -40,6 +40,7 @@ class EESSI_Mixin(RegressionMixin): # Set defaults for these class variables, can be overwritten by child class if desired measure_memory_usage = variable(bool, value=False) + exact_memory = variable(bool, value=False) scale = parameter(SCALES.keys()) bench_name = None bench_name_ci = None diff --git a/eessi/testsuite/hooks.py b/eessi/testsuite/hooks.py index ca09d999..caa81fa7 100644 --- a/eessi/testsuite/hooks.py +++ b/eessi/testsuite/hooks.py @@ -3,7 +3,6 @@ """ import math import shlex -import warnings import reframe as rfm import reframe.core.logging as rflog @@ -432,10 +431,10 @@ def _set_or_append_valid_systems(test: rfm.RegressionTest, valid_systems: str): elif len(test.valid_systems) == 1: test.valid_systems[0] = f'{test.valid_systems[0]} {valid_systems}' else: - warn_msg = f"valid_systems has multiple ({len(test.valid_systems)}) items," - warn_msg += " which is not supported by this hook." - warn_msg += " Make sure to handle filtering yourself." - warnings.warn(warn_msg) + msg = f"valid_systems has multiple ({len(test.valid_systems)}) items," + msg += " which is not supported by this hook." + msg += " Make sure to handle filtering yourself." + rflog.getlogger().warning(msg) return @@ -529,7 +528,6 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float): # and return from this hook (as setting test.extra_resources will be ignored in that case according to # https://reframe-hpc.readthedocs.io/en/stable/regression_test_api.html#reframe.core.pipeline.RegressionTest.extra_resources if 'memory' not in test.current_partition.resources: - logger = rflog.getlogger() msg = "Your ReFrame configuration file does not specify any resource called 'memory' for this partition " msg += f" ({test.current_partition.name})." msg += " Without this, an explicit memory request cannot be made from the scheduler. This test will run," @@ -538,7 +536,7 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float): msg += " 'memory' in your ReFrame configuration file for this partition." msg += " For a SLURM system, one would e.g. define:" msg += " 'resources': [{'name': 'memory', 'options': ['--mem={size}']}]" - logger.warning(msg) + rflog.getlogger().warning(msg) # We return, as setting a test.extra_resources is pointless - it would be ignored anyway # This way, we also don't add any lines to the log that a specific amount of memory was requested return @@ -557,8 +555,12 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float): log(f"Memory requested by application: {app_mem_req} MiB") log(f"Memory proportional to the core count: {proportional_mem} MiB") - # Request the maximum of the proportional_mem, and app_mem_req to the scheduler - req_mem_per_node = max(proportional_mem, app_mem_req) + if test.exact_memory: + # Request the exact amount of required memory + req_mem_per_node = app_mem_req + else: + # Request the maximum of the proportional_mem, and app_mem_req to the scheduler + req_mem_per_node = max(proportional_mem, app_mem_req) test.extra_resources = {'memory': {'size': f'{req_mem_per_node}M'}} log(f"Requested {req_mem_per_node} MiB per node from the SLURM batch scheduler") @@ -580,14 +582,13 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float): log(f"Requested {req_mem_per_task} MiB per task from the torque batch scheduler") else: - logger = rflog.getlogger() msg = "hooks.req_memory_per_node does not support the scheduler you configured" msg += f" ({test.current_partition.scheduler.registered_name})." msg += " The test will run, but since it doesn't request the required amount of memory explicitely," msg += " it may result in an out-of-memory error." msg += " Please expand the functionality of hooks.req_memory_per_node for your scheduler." # Warnings will, at default loglevel, be printed on stdout when executing the ReFrame command - logger.warning(msg) + rflog.getlogger().warning(msg) def set_modules(test: rfm.RegressionTest): @@ -671,14 +672,13 @@ def set_compact_process_binding(test: rfm.RegressionTest): log(f'Set environment variable SLURM_DISTRIBUTION to {test.env_vars["SLURM_DISTRIBUTION"]}') log(f'Set environment variable SLURM_CPU_BIND to {test.env_vars["SLURM_CPU_BIND"]}') else: - logger = rflog.getlogger() msg = "hooks.set_compact_process_binding does not support the current launcher" msg += f" ({test.current_partition.launcher_type().registered_name})." msg += " The test will run, but using the default binding strategy of your parallel launcher." msg += " This may lead to suboptimal performance." msg += " Please expand the functionality of hooks.set_compact_process_binding for your parallel launcher." # Warnings will, at default loglevel, be printed on stdout when executing the ReFrame command - logger.warning(msg) + rflog.getlogger().warning(msg) def set_compact_thread_binding(test: rfm.RegressionTest): diff --git a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py index 13171143..f91fe11b 100644 --- a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py +++ b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py @@ -2,79 +2,40 @@ import reframe as rfm import reframe.utility.sanity as sn -# Added only to make the linter happy -from reframe.core.builtins import parameter, variable, run_after, sanity_function, performance_function +from reframe.core.builtins import parameter, run_after, sanity_function, performance_function -from eessi.testsuite import hooks -from eessi.testsuite.constants import SCALES, TAGS, DEVICE_TYPES, COMPUTE_UNIT, CPU, NUMA_NODE, GPU +from eessi.testsuite.constants import DEVICE_TYPES, COMPUTE_UNIT, CPU, NUMA_NODE, GPU +from eessi.testsuite.eessi_mixin import EESSI_Mixin from eessi.testsuite.utils import find_modules -class EESSI_PyTorch_torchvision(rfm.RunOnlyRegressionTest): +class EESSI_PyTorch_torchvision(rfm.RunOnlyRegressionTest, EESSI_Mixin): + descr = 'Benchmark that runs a selected torchvision model on synthetic data' + nn_model = parameter(['vgg16', 'resnet50', 'resnet152', 'densenet121', 'mobilenet_v3_large']) - scale = parameter(SCALES.keys()) + bench_name_ci = 'resnet50' parallel_strategy = parameter([None, 'ddp']) - compute_device = variable(str) # Both torchvision and PyTorch-bundle modules have everything needed to run this test module_name = parameter(chain(find_modules('torchvision'), find_modules('PyTorch-bundle'))) - - descr = 'Benchmark that runs a selected torchvision model on synthetic data' - executable = 'python' - - valid_prog_environs = ['default'] - valid_systems = ['*'] - time_limit = '30m' + def required_mem_per_node(self): + return self.num_tasks_per_node * 1024 + @run_after('init') def prepare_test(self): - # Set nn_model as executable option self.executable_opts = ['pytorch_synthetic_benchmark.py --model %s' % self.nn_model] + self.bench_name = self.nn_model # If not a GPU run, disable CUDA - if self.compute_device != DEVICE_TYPES[GPU]: + if self.device_type != DEVICE_TYPES[GPU]: self.executable_opts += ['--no-cuda'] - @run_after('init') - def apply_init_hooks(self): - # Filter on which scales are supported by the partitions defined in the ReFrame configuration - hooks.filter_supported_scales(self) - - # Make sure that GPU tests run in partitions that support running on a GPU, - # and that CPU-only tests run in partitions that support running CPU-only. - # Also support setting valid_systems on the cmd line. - hooks.filter_valid_systems_by_device_type(self, required_device_type=self.compute_device) - - # Support selecting modules on the cmd line. - hooks.set_modules(self) - - # Support selecting scales on the cmd line via tags. - hooks.set_tag_scale(self) - - @run_after('init') - def set_tag_ci(self): - if self.nn_model == 'resnet50': - self.tags.add(TAGS['CI']) - - @run_after('setup') - def apply_setup_hooks(self): - if self.compute_device == DEVICE_TYPES[GPU]: - hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[GPU]) - else: - # Hybrid code, for which launching one task per NUMA_NODE is typically the most efficient - hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[NUMA_NODE]) - - # This is a hybrid test, binding is important for performance - hooks.set_compact_process_binding(self) - - # Set OMP_NUM_THREADS based on the number of cores per task - self.env_vars["OMP_NUM_THREADS"] = self.num_cpus_per_task - @run_after('setup') def set_ddp_options(self): - # Set environment variables for PyTorch DDP + "Set environment variables for PyTorch DDP" if self.parallel_strategy == 'ddp': # Set additional options required by DDP self.executable_opts += ["--master-port $(python get_free_socket.py)"] @@ -94,7 +55,7 @@ def filter_invalid_parameter_combinations(self): @run_after('setup') def pass_parallel_strategy(self): - # Set parallelization strategy when using more than one process + "Set parallelization strategy when using more than one process" if self.num_tasks != 1: self.executable_opts += ['--use-%s' % self.parallel_strategy] @@ -110,8 +71,8 @@ def total_throughput(self): @performance_function('img/sec') def througput_per_CPU(self): - '''Training througput per CPU''' - if self.compute_device == DEVICE_TYPES[CPU]: + '''Training througput per device type''' + if self.device_type == DEVICE_TYPES[CPU]: return sn.extractsingle(r'Img/sec per CPU:\s+(?P\S+)', self.stdout, 'perf_per_cpu', float) else: return sn.extractsingle(r'Img/sec per GPU:\s+(?P\S+)', self.stdout, 'perf_per_gpu', float) @@ -119,12 +80,14 @@ def througput_per_CPU(self): @rfm.simple_test class EESSI_PyTorch_torchvision_CPU(EESSI_PyTorch_torchvision): - compute_device = DEVICE_TYPES[CPU] + device_type = DEVICE_TYPES[CPU] + compute_unit = COMPUTE_UNIT[NUMA_NODE] @rfm.simple_test class EESSI_PyTorch_torchvision_GPU(EESSI_PyTorch_torchvision): - compute_device = DEVICE_TYPES[GPU] + device_type = DEVICE_TYPES[GPU] + compute_unit = COMPUTE_UNIT[GPU] precision = parameter(['default', 'mixed']) @run_after('init') diff --git a/eessi/testsuite/tests/apps/QuantumESPRESSO.py b/eessi/testsuite/tests/apps/QuantumESPRESSO.py index 288354b2..1f1f6270 100644 --- a/eessi/testsuite/tests/apps/QuantumESPRESSO.py +++ b/eessi/testsuite/tests/apps/QuantumESPRESSO.py @@ -30,52 +30,30 @@ import reframe as rfm from hpctestlib.sciapps.qespresso.benchmarks import QEspressoPWCheck -from reframe.core.builtins import ( # added only to make the linter happy - parameter, run_after) +from reframe.core.builtins import parameter, run_after -from eessi.testsuite import hooks -from eessi.testsuite.constants import (COMPUTE_UNIT, CPU, DEVICE_TYPES, GPU, - SCALES, TAGS) -from eessi.testsuite.utils import find_modules, log +from eessi.testsuite.constants import COMPUTE_UNIT, CPU, DEVICE_TYPES, GPU +from eessi.testsuite.eessi_mixin import EESSI_Mixin +from eessi.testsuite.utils import find_modules @rfm.simple_test -class EESSI_QuantumESPRESSO_PW(QEspressoPWCheck): - scale = parameter(SCALES.keys()) - valid_prog_environs = ['default'] - valid_systems = ['*'] +class EESSI_QuantumESPRESSO_PW(QEspressoPWCheck, EESSI_Mixin): time_limit = '30m' module_name = parameter(find_modules('QuantumESPRESSO')) - # For now, QE is being build for CPU targets only - # compute_device = parameter([DEVICE_TYPES[CPU], DEVICE_TYPES[GPU]]) - compute_device = parameter([DEVICE_TYPES[CPU], ]) + # For now, QE is built for CPU targets only + device_type = parameter([DEVICE_TYPES[CPU]]) - @run_after('init') - def run_after_init(self): - """Hooks to run after the init phase""" - - # Filter on which scales are supported by the partitions defined in the ReFrame configuration - hooks.filter_supported_scales(self) - - # Make sure that GPU tests run in partitions that support running on a GPU, - # and that CPU-only tests run in partitions that support running CPU-only. - # Also support setting valid_systems on the cmd line. - hooks.filter_valid_systems_by_device_type(self, required_device_type=self.compute_device) - - # Support selecting modules on the cmd line. - hooks.set_modules(self) - - # Support selecting scales on the cmd line via tags. - hooks.set_tag_scale(self) + def required_mem_per_node(self): + return (self.num_tasks_per_node * 0.9 + 4) * 1024 @run_after('init') - def set_tag_ci(self): + def set_ci(self): """Set tag CI on smallest benchmark, so it can be selected on the cmd line via --tag CI""" min_ecut = min(QEspressoPWCheck.ecut.values) min_nbnd = min(QEspressoPWCheck.nbnd.values) if self.ecut == min_ecut and self.nbnd == min_nbnd: - self.tags.add(TAGS['CI']) - log(f'tags set to {self.tags}') + self.bench_name = self.bench_name_ci = 'bench_ci' @run_after('init') def set_increased_walltime(self): @@ -85,29 +63,14 @@ def set_increased_walltime(self): if self.ecut == max_ecut and self.nbnd == max_nbnd: self.time_limit = '60m' - @run_after('setup') - def run_after_setup(self): - """Hooks to run after the setup phase""" - - # Calculate default requested resources based on the scale: - # 1 task per CPU for CPU-only tests, 1 task per GPU for GPU tests. - # Also support setting the resources on the cmd line. - if self.compute_device == DEVICE_TYPES[GPU]: - hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[GPU]) - else: - hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[CPU]) - - @run_after('setup') - def request_mem(self): - memory_required = self.num_tasks_per_node * 0.9 + 4 - hooks.req_memory_per_node(test=self, app_mem_req=memory_required * 1024) - - @run_after('setup') - def set_omp_num_threads(self): + @run_after('init') + def set_compute_unit(self): """ - Set number of OpenMP threads via OMP_NUM_THREADS. - Set default number of OpenMP threads equal to number of CPUs per task. + Set the compute unit to which tasks will be assigned: + one task per CPU core for CPU runs, and one task per GPU for GPU runs. """ - - self.env_vars['OMP_NUM_THREADS'] = self.num_cpus_per_task - log(f'env_vars set to {self.env_vars}') + device_to_compute_unit = { + DEVICE_TYPES[CPU]: COMPUTE_UNIT[CPU], + DEVICE_TYPES[GPU]: COMPUTE_UNIT[GPU], + } + self.compute_unit = device_to_compute_unit.get(self.device_type) diff --git a/eessi/testsuite/tests/apps/tensorflow/src/__init__.py b/eessi/testsuite/tests/apps/tensorflow/src/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/eessi/testsuite/tests/apps/tensorflow/tensorflow.py b/eessi/testsuite/tests/apps/tensorflow/tensorflow.py index f37194f8..fbacf06e 100644 --- a/eessi/testsuite/tests/apps/tensorflow/tensorflow.py +++ b/eessi/testsuite/tests/apps/tensorflow/tensorflow.py @@ -5,32 +5,34 @@ """ import reframe as rfm +from reframe.core.builtins import deferrable, parameter, run_after, sanity_function, performance_function import reframe.utility.sanity as sn from eessi.testsuite import hooks, utils -from eessi.testsuite.constants import * # noqa +from eessi.testsuite.constants import COMPUTE_UNIT, CPU, CPU_SOCKET, DEVICE_TYPES, GPU +from eessi.testsuite.eessi_mixin import EESSI_Mixin @rfm.simple_test -class EESSI_TensorFlow(rfm.RunOnlyRegressionTest): - - # This test can run at any scale, so parameterize over all known SCALES - scale = parameter(SCALES.keys()) - valid_prog_environs = ['default'] - valid_systems = ['*'] +class EESSI_TensorFlow(rfm.RunOnlyRegressionTest, EESSI_Mixin): # Parameterize over all modules that start with TensorFlow module_name = parameter(utils.find_modules('TensorFlow')) # Make CPU and GPU versions of this test - device_type = parameter(['cpu', 'gpu']) + device_type = parameter([DEVICE_TYPES[CPU], DEVICE_TYPES[GPU]]) executable = 'python tf_test.py' time_limit = '30m' # This test should be run as part of EESSI CI - tags = {TAGS['CI']} + bench_name = bench_name_ci = 'bench_ci' + + readonly_files = ['mnist_setup.py', 'tf_test.py'] + + def required_mem_per_node(self): + return self.num_tasks_per_node * 2048 @deferrable def assert_tf_config_ranks(self): @@ -44,9 +46,7 @@ def assert_completion(self): '''Assert that the test ran until completion''' n_fit_completed = sn.count(sn.extractall('^Rank [0-9]+: Keras fit completed', self.stdout)) - return sn.all([ - sn.assert_eq(n_fit_completed, self.num_tasks), - ]) + return sn.assert_eq(n_fit_completed, self.num_tasks) @deferrable def assert_convergence(self): @@ -68,16 +68,6 @@ def assert_sanity(self): def perf(self): return sn.extractsingle(r'^Performance:\s+(?P\S+)', self.stdout, 'perf', float) - @run_after('init') - def run_after_init(self): - """hooks to run after the init phase""" - # Filter on which scales are supported by the partitions defined in the ReFrame configuration - hooks.filter_supported_scales(self) - - hooks.filter_valid_systems_by_device_type(self, required_device_type=self.device_type) - hooks.set_modules(self) - hooks.set_tag_scale(self) - @run_after('init') def set_executable_opts(self): """Set executable opts based on device_type parameter""" @@ -91,33 +81,22 @@ def set_executable_opts(self): def set_test_descr(self): self.descr = f'TensorFlow benchmark on {self.device_type}' - @run_after('setup') - def run_after_setup(self): - """hooks to run after the setup phase""" - # TODO: implement - # It should bind to socket, but different MPIs may have different arguments to do that... - # We should at very least prevent that it binds to single core per process, - # as that results in many threads being scheduled to one core. - # binding may also differ per launcher used. It'll be hard to support a wide range and still get proper binding - if self.device_type == 'cpu': - hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT['CPU_SOCKET']) - elif self.device_type == 'gpu': - hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT['GPU']) - else: - raise NotImplementedError(f'Failed to set number of tasks and cpus per task for device {self.device_type}') + @run_after('init') + def set_compute_unit(self): + """ + Set the compute unit to which tasks will be assigned: + one task per CPU socket for CPU runs, and one task per GPU for GPU runs. + """ + device_to_compute_unit = { + DEVICE_TYPES[CPU]: COMPUTE_UNIT[CPU_SOCKET], + DEVICE_TYPES[GPU]: COMPUTE_UNIT[GPU], + } + self.compute_unit = device_to_compute_unit.get(self.device_type) @run_after('setup') def set_thread_count_args(self): - """Set exectuable opts defining the thread count""" + """Set executable opts defining the thread count""" if not self.has_custom_executable_opts: self.executable_opts += ['--intra-op-parallelism', '%s' % self.num_cpus_per_task] self.executable_opts += ['--inter-op-parallelism', '1'] utils.log(f'executable_opts set to {self.executable_opts}') - - @run_after('setup') - def set_binding_policy(self): - """ - Sets a binding policy for tasks. We don't bind threads because of - https://github.com/tensorflow/tensorflow/issues/60843 - """ - hooks.set_compact_process_binding(self)