From 29aab0ae4498ec7e740ff8ef4a02142e16b0a6a7 Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Sat, 30 Nov 2024 19:37:42 +0100 Subject: [PATCH 1/4] use mixin class for pytorch --- .../tests/apps/PyTorch/PyTorch_torchvision.py | 80 +++++++------------ 1 file changed, 28 insertions(+), 52 deletions(-) diff --git a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py index 13171143..391be095 100644 --- a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py +++ b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py @@ -2,79 +2,55 @@ import reframe as rfm import reframe.utility.sanity as sn -# Added only to make the linter happy from reframe.core.builtins import parameter, variable, run_after, sanity_function, performance_function -from eessi.testsuite import hooks -from eessi.testsuite.constants import SCALES, TAGS, DEVICE_TYPES, COMPUTE_UNIT, CPU, NUMA_NODE, GPU +from eessi.testsuite.constants import DEVICE_TYPES, COMPUTE_UNIT, CPU, NUMA_NODE, GPU +from eessi.testsuite.eessi_mixin import EESSI_Mixin from eessi.testsuite.utils import find_modules -class EESSI_PyTorch_torchvision(rfm.RunOnlyRegressionTest): +class EESSI_PyTorch_torchvision(rfm.RunOnlyRegressionTest, EESSI_Mixin): + descr = 'Benchmark that runs a selected torchvision model on synthetic data' + nn_model = parameter(['vgg16', 'resnet50', 'resnet152', 'densenet121', 'mobilenet_v3_large']) - scale = parameter(SCALES.keys()) + bench_name_ci = 'resnet50' parallel_strategy = parameter([None, 'ddp']) - compute_device = variable(str) + device_type = variable(str) # Both torchvision and PyTorch-bundle modules have everything needed to run this test module_name = parameter(chain(find_modules('torchvision'), find_modules('PyTorch-bundle'))) - - descr = 'Benchmark that runs a selected torchvision model on synthetic data' - executable = 'python' - - valid_prog_environs = ['default'] - valid_systems = ['*'] - time_limit = '30m' + def required_mem_per_node(self): + return self.num_tasks_per_node * 1024 + @run_after('init') def prepare_test(self): # Set nn_model as executable option self.executable_opts = ['pytorch_synthetic_benchmark.py --model %s' % self.nn_model] + self.bench_name = self.nn_model # If not a GPU run, disable CUDA - if self.compute_device != DEVICE_TYPES[GPU]: + if self.device_type != DEVICE_TYPES[GPU]: self.executable_opts += ['--no-cuda'] @run_after('init') - def apply_init_hooks(self): - # Filter on which scales are supported by the partitions defined in the ReFrame configuration - hooks.filter_supported_scales(self) - - # Make sure that GPU tests run in partitions that support running on a GPU, - # and that CPU-only tests run in partitions that support running CPU-only. - # Also support setting valid_systems on the cmd line. - hooks.filter_valid_systems_by_device_type(self, required_device_type=self.compute_device) - - # Support selecting modules on the cmd line. - hooks.set_modules(self) - - # Support selecting scales on the cmd line via tags. - hooks.set_tag_scale(self) - - @run_after('init') - def set_tag_ci(self): - if self.nn_model == 'resnet50': - self.tags.add(TAGS['CI']) - - @run_after('setup') - def apply_setup_hooks(self): - if self.compute_device == DEVICE_TYPES[GPU]: - hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[GPU]) - else: - # Hybrid code, for which launching one task per NUMA_NODE is typically the most efficient - hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[NUMA_NODE]) - - # This is a hybrid test, binding is important for performance - hooks.set_compact_process_binding(self) - - # Set OMP_NUM_THREADS based on the number of cores per task - self.env_vars["OMP_NUM_THREADS"] = self.num_cpus_per_task + def set_compute_unit(self): + """ + Set the compute unit to which tasks will be assigned: + one task per NUMA node for CPU runs, and one task per GPU for GPU runs. + """ + device_to_compute_unit = { + # Hybrid execution with one task per NUMA_NODE is typically the most efficient + DEVICE_TYPES[CPU]: COMPUTE_UNIT[NUMA_NODE], + DEVICE_TYPES[GPU]: COMPUTE_UNIT[GPU], + } + self.compute_unit = device_to_compute_unit.get(self.device_type) @run_after('setup') def set_ddp_options(self): - # Set environment variables for PyTorch DDP + "Set environment variables for PyTorch DDP" if self.parallel_strategy == 'ddp': # Set additional options required by DDP self.executable_opts += ["--master-port $(python get_free_socket.py)"] @@ -94,7 +70,7 @@ def filter_invalid_parameter_combinations(self): @run_after('setup') def pass_parallel_strategy(self): - # Set parallelization strategy when using more than one process + "Set parallelization strategy when using more than one process" if self.num_tasks != 1: self.executable_opts += ['--use-%s' % self.parallel_strategy] @@ -111,7 +87,7 @@ def total_throughput(self): @performance_function('img/sec') def througput_per_CPU(self): '''Training througput per CPU''' - if self.compute_device == DEVICE_TYPES[CPU]: + if self.device_type == DEVICE_TYPES[CPU]: return sn.extractsingle(r'Img/sec per CPU:\s+(?P\S+)', self.stdout, 'perf_per_cpu', float) else: return sn.extractsingle(r'Img/sec per GPU:\s+(?P\S+)', self.stdout, 'perf_per_gpu', float) @@ -119,12 +95,12 @@ def througput_per_CPU(self): @rfm.simple_test class EESSI_PyTorch_torchvision_CPU(EESSI_PyTorch_torchvision): - compute_device = DEVICE_TYPES[CPU] + device_type = DEVICE_TYPES[CPU] @rfm.simple_test class EESSI_PyTorch_torchvision_GPU(EESSI_PyTorch_torchvision): - compute_device = DEVICE_TYPES[GPU] + device_type = DEVICE_TYPES[GPU] precision = parameter(['default', 'mixed']) @run_after('init') From 88b6df66bd0cc13e6f0f8fc00ed8016b4af9c131 Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Sun, 1 Dec 2024 13:44:06 +0100 Subject: [PATCH 2/4] set compute_unit in child classes --- .../tests/apps/PyTorch/PyTorch_torchvision.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py index 391be095..1fd1e5c4 100644 --- a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py +++ b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py @@ -26,7 +26,6 @@ def required_mem_per_node(self): @run_after('init') def prepare_test(self): - # Set nn_model as executable option self.executable_opts = ['pytorch_synthetic_benchmark.py --model %s' % self.nn_model] self.bench_name = self.nn_model @@ -35,19 +34,6 @@ def prepare_test(self): if self.device_type != DEVICE_TYPES[GPU]: self.executable_opts += ['--no-cuda'] - @run_after('init') - def set_compute_unit(self): - """ - Set the compute unit to which tasks will be assigned: - one task per NUMA node for CPU runs, and one task per GPU for GPU runs. - """ - device_to_compute_unit = { - # Hybrid execution with one task per NUMA_NODE is typically the most efficient - DEVICE_TYPES[CPU]: COMPUTE_UNIT[NUMA_NODE], - DEVICE_TYPES[GPU]: COMPUTE_UNIT[GPU], - } - self.compute_unit = device_to_compute_unit.get(self.device_type) - @run_after('setup') def set_ddp_options(self): "Set environment variables for PyTorch DDP" @@ -86,7 +72,7 @@ def total_throughput(self): @performance_function('img/sec') def througput_per_CPU(self): - '''Training througput per CPU''' + '''Training througput per device type''' if self.device_type == DEVICE_TYPES[CPU]: return sn.extractsingle(r'Img/sec per CPU:\s+(?P\S+)', self.stdout, 'perf_per_cpu', float) else: @@ -96,11 +82,13 @@ def througput_per_CPU(self): @rfm.simple_test class EESSI_PyTorch_torchvision_CPU(EESSI_PyTorch_torchvision): device_type = DEVICE_TYPES[CPU] + compute_unit = COMPUTE_UNIT[NUMA_NODE] @rfm.simple_test class EESSI_PyTorch_torchvision_GPU(EESSI_PyTorch_torchvision): device_type = DEVICE_TYPES[GPU] + compute_unit = COMPUTE_UNIT[GPU] precision = parameter(['default', 'mixed']) @run_after('init') From 42796f30f090536124aaa398d067c9f7e2b1ac53 Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Thu, 12 Dec 2024 11:02:46 +0100 Subject: [PATCH 3/4] remove device_type from base class --- eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py | 1 - 1 file changed, 1 deletion(-) diff --git a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py index 1fd1e5c4..c9c9a044 100644 --- a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py +++ b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py @@ -15,7 +15,6 @@ class EESSI_PyTorch_torchvision(rfm.RunOnlyRegressionTest, EESSI_Mixin): nn_model = parameter(['vgg16', 'resnet50', 'resnet152', 'densenet121', 'mobilenet_v3_large']) bench_name_ci = 'resnet50' parallel_strategy = parameter([None, 'ddp']) - device_type = variable(str) # Both torchvision and PyTorch-bundle modules have everything needed to run this test module_name = parameter(chain(find_modules('torchvision'), find_modules('PyTorch-bundle'))) executable = 'python' From e9237ee800b5b50a6ee4051bbeb7c78c002b735f Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Thu, 12 Dec 2024 11:04:42 +0100 Subject: [PATCH 4/4] remove variable import --- eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py index c9c9a044..f91fe11b 100644 --- a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py +++ b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py @@ -2,7 +2,7 @@ import reframe as rfm import reframe.utility.sanity as sn -from reframe.core.builtins import parameter, variable, run_after, sanity_function, performance_function +from reframe.core.builtins import parameter, run_after, sanity_function, performance_function from eessi.testsuite.constants import DEVICE_TYPES, COMPUTE_UNIT, CPU, NUMA_NODE, GPU from eessi.testsuite.eessi_mixin import EESSI_Mixin