Neuron + GPU: report utilization % of accelerators regardless of exec…

…ution time (#840) * Minor formatting tweak. * Updates. * Assertions for sanity checking. * Moving back to pies for now. * More util fixes. * Function-level averages. * Type cleanup. * Blackened. * Fixed. * Removing 3.8 * Hopefully fixing a setuptools issue. * Restored 3.8 smoketest.
plasma-umass · Jul 19, 2024 · 29f83f4 · 29f83f4
1 parent 0aa1c7d
commit 29f83f4
Show file tree

Hide file tree

Showing 11 changed files with 177 additions and 114 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -54,7 +54,7 @@ scalene = "scalene.__main__:main"
 [build-system]
 build-backend = "setuptools.build_meta"
 requires = [
-    "setuptools>=65.5.1",
+    "setuptools>=65.5.1,<71.0", # Pin to setuptools<71.0 to avoid this bug: https://github.com/pypa/setuptools/issues/4496
     "setuptools_scm>=8",
     "wheel",
     "cython",

diff --git a/requirements.txt b/requirements.txt
@@ -9,6 +9,6 @@ packaging==20.9
 psutil>=5.9.2
 pyperf==2.0.0
 rich>=10.7.0
-setuptools>=65.5.1
+setuptools>=65.5.1,<71.0  # Pin to setuptools<71.0 to avoid this bug: https://github.com/pypa/setuptools/issues/4496
 nvidia-ml-py>=12.555.43; platform_system !='Darwin'
 wheel~=0.38.1
diff --git a/scalene/scalene-gui/scalene-gui.js b/scalene/scalene-gui/scalene-gui.js
@@ -904,7 +904,7 @@ function makeBar(python, native, system, params) {
 }
 
 
-function makeGPUPie(util) {
+function makeGPUPie(util, gpu_device, params) {
   return {
     $schema: "https://vega.github.io/schema/vega-lite/v5.json",
     config: {
@@ -940,7 +940,7 @@ function makeGPUPie(util) {
         legend: false,
         scale: { range: ["goldenrod", "#f4e6c2"] },
       },
-      tooltip: [{ field: "c", type: "nominal", title: "GPU" }],
+      tooltip: [{ field: "c", type: "nominal", title: gpu_device }],
     },
   };
 }
@@ -986,7 +986,7 @@ function makeGPUBar(util, gpu_device, params) {
             legend: false,
             scale: { range:  ["goldenrod", "#f4e6c2"] },
           },
-          tooltip: [{ field: "dd", type: "nominal", title: gpu_device }],
+          tooltip: [{ field: "dd", type: "nominal", title: gpu_device + ":" } ],
         },
       },
       {
@@ -1004,7 +1004,7 @@ function makeGPUBar(util, gpu_device, params) {
           },
           text: { field: "d" },
           color: { value: "white" },
-          tooltip: [{ field: "dd", type: "nominal", title: gpu_device }],
+          tooltip: [{ field: "dd", type: "nominal", title: gpu_device + ":" } ],
         },
       },
     ],
@@ -1560,8 +1560,8 @@ function makeProfileLine(
       s += `<td style="width: 50; vertical-align: middle" align="right" data-sort="${line.n_gpu_percent}">`;
       s += `<span style="height: 20; width: 30; vertical-align: middle" id="gpu_pie${gpu_pies.length}"></span>`;
       s += "</td>";
-      // gpu_pies.push(makeGPUPie(line.n_gpu_percent));
-	gpu_pies.push(makeGPUBar(line.n_gpu_percent, prof.gpu_device, { height: 20, width: 100 }));
+      gpu_pies.push(makeGPUPie(line.n_gpu_percent, prof.gpu_device, { height: 20, width: 100 }));
+      // gpu_pies.push(makeGPUBar(line.n_gpu_percent, prof.gpu_device, { height: 20, width: 100 }));
     }
     if (true) {
       if (line.n_gpu_peak_memory_mb < 1.0 || line.n_gpu_percent < 1.0) {

diff --git a/scalene/scalene_accelerator.py b/scalene/scalene_accelerator.py
@@ -1,9 +1,10 @@
 from typing import Tuple
 from abc import ABC, abstractmethod
 
+
 # Base class for accelerators (GPUs, TPUs, etc.)
 class ScaleneAccelerator(ABC):
-    
+
     @abstractmethod
     def has_gpu(self) -> bool:
         pass
@@ -20,4 +21,6 @@ def reinit(self) -> None:
     def get_stats(self) -> Tuple[float, float]:
         pass
 
-
+    @abstractmethod
+    def get_num_cores(self) -> int:
+        pass
diff --git a/scalene/scalene_apple_gpu.py b/scalene/scalene_apple_gpu.py
@@ -6,6 +6,7 @@
 
 from scalene.scalene_accelerator import ScaleneAccelerator
 
+
 class ScaleneAppleGPU(ScaleneAccelerator):
     """Wrapper class for Apple integrated GPU statistics."""
 
@@ -22,7 +23,7 @@ def __init__(self, sampling_frequency: int = 100) -> None:
 
     def gpu_device(self) -> str:
         return "GPU"
-    
+
     def has_gpu(self) -> bool:
         """True iff there is a GPU"""
         # Disabling Apple GPU, since it does not collect per-process statistics.
@@ -32,6 +33,10 @@ def reinit(self) -> None:
         """A NOP, here for compatibility with the nvidia wrapper."""
         return
 
+    def get_num_cores(self) -> int:
+        # FIXME: not yet implemented
+        return 1
+
     def get_stats(self) -> Tuple[float, float]:
         """Returns a tuple of (utilization%, memory in use)"""
         if not self.has_gpu():

diff --git a/scalene/scalene_json.py b/scalene/scalene_json.py
@@ -13,6 +13,7 @@
 
 import numpy as np
 
+
 class ScaleneJSON:
     @staticmethod
     def memory_consumed_str(size_in_mb: float) -> str:
@@ -65,19 +66,25 @@ def __init__(self) -> None:
         self.gpu_device = ""
 
     def rdp(self, points, epsilon):
-        """                                                                                                           
-        Ramer-Douglas-Peucker algorithm implementation using NumPy                                                    
         """
+        Ramer-Douglas-Peucker algorithm implementation using NumPy
+        """
+
         def perpendicular_distance(point, start, end):
             if np.all(start == end):
                 return np.linalg.norm(point - start)
-            return np.abs(np.cross(end - start, start - point) / np.linalg.norm(end - start))
+            return np.abs(
+                np.cross(end - start, start - point)
+                / np.linalg.norm(end - start)
+            )
 
         def recursive_rdp(points, start: int, end: int, epsilon: float):
             dmax = 0.0
             index = start
             for i in range(start + 1, end):
-                d = perpendicular_distance(points[i], points[start], points[end])
+                d = perpendicular_distance(
+                    points[i], points[start], points[end]
+                )
                 if d > dmax:
                     index = i
                     dmax = d
@@ -92,9 +99,9 @@ def recursive_rdp(points, start: int, end: int, epsilon: float):
         start = 0
         end = len(points) - 1
         return np.array(recursive_rdp(points, start, end, epsilon))
-    
+
     def compress_samples(
-            self, samples: List[Any], max_footprint: float
+        self, samples: List[Any], max_footprint: float
     ) -> Any:
         # Try to reduce the number of samples with the
         # Ramer-Douglas-Peucker algorithm, which attempts to
@@ -107,19 +114,28 @@ def compress_samples(
 
         if True:
             # FIXME: bypassing RDP for now
-            #return samples[:self.max_sparkline_samples]
+            # return samples[:self.max_sparkline_samples]
 
-            new_samples = sorted(random.sample(list(map(tuple, samples)), self.max_sparkline_samples))
+            new_samples = sorted(
+                random.sample(
+                    list(map(tuple, samples)), self.max_sparkline_samples
+                )
+            )
             return new_samples
-    
+
         else:
             epsilon = (len(samples) / (3 * self.max_sparkline_samples)) * 2
 
             # Use NumPy for RDP algorithm
             new_samples = self.rdp(np.array(samples), epsilon)
 
             if len(new_samples) > self.max_sparkline_samples:
-                new_samples = sorted(random.sample(list(map(tuple, new_samples)), self.max_sparkline_samples))
+                new_samples = sorted(
+                    random.sample(
+                        list(map(tuple, new_samples)),
+                        self.max_sparkline_samples,
+                    )
+                )
 
             return new_samples
 
@@ -180,11 +196,13 @@ def output_profile_line(
             n_cpu_percent_c = 0
             n_cpu_percent_python = 0
 
-        n_gpu_percent = n_gpu_samples * 100
+        # n_gpu_percent = n_gpu_samples * 100
 
-        if False:
-            if stats.total_gpu_samples:
-                n_gpu_percent = n_gpu_samples * 100 / stats.total_gpu_samples
+        if True:
+            if stats.n_gpu_samples[fname][line_no]:
+                n_gpu_percent = (
+                    n_gpu_samples * 100 / stats.n_gpu_samples[fname][line_no]
+                )  # total_gpu_samples
             else:
                 n_gpu_percent = 0
 
@@ -358,7 +376,7 @@ def output_profiles(
             ),
             "files": {},
             "gpu": self.gpu,
-            "gpu_device" : self.gpu_device,
+            "gpu_device": self.gpu_device,
             "memory": profile_memory,
             "samples": stats.memory_footprint_samples,
             "stacks": stks,