From 29aab0ae4498ec7e740ff8ef4a02142e16b0a6a7 Mon Sep 17 00:00:00 2001
From: Samuel Moors <samuel.moors@vub.be>
Date: Sat, 30 Nov 2024 19:37:42 +0100
Subject: [PATCH 1/4] use mixin class for pytorch

---
 .../tests/apps/PyTorch/PyTorch_torchvision.py | 80 +++++++------------
 1 file changed, 28 insertions(+), 52 deletions(-)

diff --git a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py
index 13171143..391be095 100644
--- a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py
+++ b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py
@@ -2,79 +2,55 @@
 
 import reframe as rfm
 import reframe.utility.sanity as sn
-# Added only to make the linter happy
 from reframe.core.builtins import parameter, variable, run_after, sanity_function, performance_function
 
-from eessi.testsuite import hooks
-from eessi.testsuite.constants import SCALES, TAGS, DEVICE_TYPES, COMPUTE_UNIT, CPU, NUMA_NODE, GPU
+from eessi.testsuite.constants import DEVICE_TYPES, COMPUTE_UNIT, CPU, NUMA_NODE, GPU
+from eessi.testsuite.eessi_mixin import EESSI_Mixin
 from eessi.testsuite.utils import find_modules
 
 
-class EESSI_PyTorch_torchvision(rfm.RunOnlyRegressionTest):
+class EESSI_PyTorch_torchvision(rfm.RunOnlyRegressionTest, EESSI_Mixin):
+    descr = 'Benchmark that runs a selected torchvision model on synthetic data'
+
     nn_model = parameter(['vgg16', 'resnet50', 'resnet152', 'densenet121', 'mobilenet_v3_large'])
-    scale = parameter(SCALES.keys())
+    bench_name_ci = 'resnet50'
     parallel_strategy = parameter([None, 'ddp'])
-    compute_device = variable(str)
+    device_type = variable(str)
     # Both torchvision and PyTorch-bundle modules have everything needed to run this test
     module_name = parameter(chain(find_modules('torchvision'), find_modules('PyTorch-bundle')))
-
-    descr = 'Benchmark that runs a selected torchvision model on synthetic data'
-
     executable = 'python'
-
-    valid_prog_environs = ['default']
-    valid_systems = ['*']
-
     time_limit = '30m'
 
+    def required_mem_per_node(self):
+        return self.num_tasks_per_node * 1024
+
     @run_after('init')
     def prepare_test(self):
 
         # Set nn_model as executable option
         self.executable_opts = ['pytorch_synthetic_benchmark.py --model %s' % self.nn_model]
+        self.bench_name = self.nn_model
 
         # If not a GPU run, disable CUDA
-        if self.compute_device != DEVICE_TYPES[GPU]:
+        if self.device_type != DEVICE_TYPES[GPU]:
             self.executable_opts += ['--no-cuda']
 
     @run_after('init')
-    def apply_init_hooks(self):
-        # Filter on which scales are supported by the partitions defined in the ReFrame configuration
-        hooks.filter_supported_scales(self)
-
-        # Make sure that GPU tests run in partitions that support running on a GPU,
-        # and that CPU-only tests run in partitions that support running CPU-only.
-        # Also support setting valid_systems on the cmd line.
-        hooks.filter_valid_systems_by_device_type(self, required_device_type=self.compute_device)
-
-        # Support selecting modules on the cmd line.
-        hooks.set_modules(self)
-
-        # Support selecting scales on the cmd line via tags.
-        hooks.set_tag_scale(self)
-
-    @run_after('init')
-    def set_tag_ci(self):
-        if self.nn_model == 'resnet50':
-            self.tags.add(TAGS['CI'])
-
-    @run_after('setup')
-    def apply_setup_hooks(self):
-        if self.compute_device == DEVICE_TYPES[GPU]:
-            hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[GPU])
-        else:
-            # Hybrid code, for which launching one task per NUMA_NODE is typically the most efficient
-            hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[NUMA_NODE])
-
-        # This is a hybrid test, binding is important for performance
-        hooks.set_compact_process_binding(self)
-
-        # Set OMP_NUM_THREADS based on the number of cores per task
-        self.env_vars["OMP_NUM_THREADS"] = self.num_cpus_per_task
+    def set_compute_unit(self):
+        """
+        Set the compute unit to which tasks will be assigned:
+        one task per NUMA node for CPU runs, and one task per GPU for GPU runs.
+        """
+        device_to_compute_unit = {
+            # Hybrid execution with one task per NUMA_NODE is typically the most efficient
+            DEVICE_TYPES[CPU]: COMPUTE_UNIT[NUMA_NODE],
+            DEVICE_TYPES[GPU]: COMPUTE_UNIT[GPU],
+        }
+        self.compute_unit = device_to_compute_unit.get(self.device_type)
 
     @run_after('setup')
     def set_ddp_options(self):
-        # Set environment variables for PyTorch DDP
+        "Set environment variables for PyTorch DDP"
         if self.parallel_strategy == 'ddp':
             # Set additional options required by DDP
             self.executable_opts += ["--master-port $(python get_free_socket.py)"]
@@ -94,7 +70,7 @@ def filter_invalid_parameter_combinations(self):
 
     @run_after('setup')
     def pass_parallel_strategy(self):
-        # Set parallelization strategy when using more than one process
+        "Set parallelization strategy when using more than one process"
         if self.num_tasks != 1:
             self.executable_opts += ['--use-%s' % self.parallel_strategy]
 
@@ -111,7 +87,7 @@ def total_throughput(self):
     @performance_function('img/sec')
     def througput_per_CPU(self):
         '''Training througput per CPU'''
-        if self.compute_device == DEVICE_TYPES[CPU]:
+        if self.device_type == DEVICE_TYPES[CPU]:
             return sn.extractsingle(r'Img/sec per CPU:\s+(?P<perf_per_cpu>\S+)', self.stdout, 'perf_per_cpu', float)
         else:
             return sn.extractsingle(r'Img/sec per GPU:\s+(?P<perf_per_gpu>\S+)', self.stdout, 'perf_per_gpu', float)
@@ -119,12 +95,12 @@ def througput_per_CPU(self):
 
 @rfm.simple_test
 class EESSI_PyTorch_torchvision_CPU(EESSI_PyTorch_torchvision):
-    compute_device = DEVICE_TYPES[CPU]
+    device_type = DEVICE_TYPES[CPU]
 
 
 @rfm.simple_test
 class EESSI_PyTorch_torchvision_GPU(EESSI_PyTorch_torchvision):
-    compute_device = DEVICE_TYPES[GPU]
+    device_type = DEVICE_TYPES[GPU]
     precision = parameter(['default', 'mixed'])
 
     @run_after('init')

From 88b6df66bd0cc13e6f0f8fc00ed8016b4af9c131 Mon Sep 17 00:00:00 2001
From: Samuel Moors <samuel.moors@vub.be>
Date: Sun, 1 Dec 2024 13:44:06 +0100
Subject: [PATCH 2/4] set compute_unit in child classes

---
 .../tests/apps/PyTorch/PyTorch_torchvision.py  | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py
index 391be095..1fd1e5c4 100644
--- a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py
+++ b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py
@@ -26,7 +26,6 @@ def required_mem_per_node(self):
 
     @run_after('init')
     def prepare_test(self):
-
         # Set nn_model as executable option
         self.executable_opts = ['pytorch_synthetic_benchmark.py --model %s' % self.nn_model]
         self.bench_name = self.nn_model
@@ -35,19 +34,6 @@ def prepare_test(self):
         if self.device_type != DEVICE_TYPES[GPU]:
             self.executable_opts += ['--no-cuda']
 
-    @run_after('init')
-    def set_compute_unit(self):
-        """
-        Set the compute unit to which tasks will be assigned:
-        one task per NUMA node for CPU runs, and one task per GPU for GPU runs.
-        """
-        device_to_compute_unit = {
-            # Hybrid execution with one task per NUMA_NODE is typically the most efficient
-            DEVICE_TYPES[CPU]: COMPUTE_UNIT[NUMA_NODE],
-            DEVICE_TYPES[GPU]: COMPUTE_UNIT[GPU],
-        }
-        self.compute_unit = device_to_compute_unit.get(self.device_type)
-
     @run_after('setup')
     def set_ddp_options(self):
         "Set environment variables for PyTorch DDP"
@@ -86,7 +72,7 @@ def total_throughput(self):
 
     @performance_function('img/sec')
     def througput_per_CPU(self):
-        '''Training througput per CPU'''
+        '''Training througput per device type'''
         if self.device_type == DEVICE_TYPES[CPU]:
             return sn.extractsingle(r'Img/sec per CPU:\s+(?P<perf_per_cpu>\S+)', self.stdout, 'perf_per_cpu', float)
         else:
@@ -96,11 +82,13 @@ def througput_per_CPU(self):
 @rfm.simple_test
 class EESSI_PyTorch_torchvision_CPU(EESSI_PyTorch_torchvision):
     device_type = DEVICE_TYPES[CPU]
+    compute_unit = COMPUTE_UNIT[NUMA_NODE]
 
 
 @rfm.simple_test
 class EESSI_PyTorch_torchvision_GPU(EESSI_PyTorch_torchvision):
     device_type = DEVICE_TYPES[GPU]
+    compute_unit = COMPUTE_UNIT[GPU]
     precision = parameter(['default', 'mixed'])
 
     @run_after('init')

From 42796f30f090536124aaa398d067c9f7e2b1ac53 Mon Sep 17 00:00:00 2001
From: Samuel Moors <samuel.moors@vub.be>
Date: Thu, 12 Dec 2024 11:02:46 +0100
Subject: [PATCH 3/4] remove device_type from base class

---
 eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py
index 1fd1e5c4..c9c9a044 100644
--- a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py
+++ b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py
@@ -15,7 +15,6 @@ class EESSI_PyTorch_torchvision(rfm.RunOnlyRegressionTest, EESSI_Mixin):
     nn_model = parameter(['vgg16', 'resnet50', 'resnet152', 'densenet121', 'mobilenet_v3_large'])
     bench_name_ci = 'resnet50'
     parallel_strategy = parameter([None, 'ddp'])
-    device_type = variable(str)
     # Both torchvision and PyTorch-bundle modules have everything needed to run this test
     module_name = parameter(chain(find_modules('torchvision'), find_modules('PyTorch-bundle')))
     executable = 'python'

From e9237ee800b5b50a6ee4051bbeb7c78c002b735f Mon Sep 17 00:00:00 2001
From: Samuel Moors <samuel.moors@vub.be>
Date: Thu, 12 Dec 2024 11:04:42 +0100
Subject: [PATCH 4/4] remove variable import

---
 eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py
index c9c9a044..f91fe11b 100644
--- a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py
+++ b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py
@@ -2,7 +2,7 @@
 
 import reframe as rfm
 import reframe.utility.sanity as sn
-from reframe.core.builtins import parameter, variable, run_after, sanity_function, performance_function
+from reframe.core.builtins import parameter, run_after, sanity_function, performance_function
 
 from eessi.testsuite.constants import DEVICE_TYPES, COMPUTE_UNIT, CPU, NUMA_NODE, GPU
 from eessi.testsuite.eessi_mixin import EESSI_Mixin