Skip to content

Commit

Permalink
Neuron + GPU: report utilization % of accelerators regardless of exec…
Browse files Browse the repository at this point in the history
…ution time (#840)

* Minor formatting tweak.

* Updates.

* Assertions for sanity checking.

* Moving back to pies for now.

* More util fixes.

* Function-level averages.

* Type cleanup.

* Blackened.

* Fixed.

* Removing 3.8

* Hopefully fixing a setuptools issue.

* Restored 3.8 smoketest.
  • Loading branch information
emeryberger authored Jul 19, 2024
1 parent 0aa1c7d commit 29f83f4
Show file tree
Hide file tree
Showing 11 changed files with 177 additions and 114 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ scalene = "scalene.__main__:main"
[build-system]
build-backend = "setuptools.build_meta"
requires = [
"setuptools>=65.5.1",
"setuptools>=65.5.1,<71.0", # Pin to setuptools<71.0 to avoid this bug: https://github.com/pypa/setuptools/issues/4496
"setuptools_scm>=8",
"wheel",
"cython",
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ packaging==20.9
psutil>=5.9.2
pyperf==2.0.0
rich>=10.7.0
setuptools>=65.5.1
setuptools>=65.5.1,<71.0 # Pin to setuptools<71.0 to avoid this bug: https://github.com/pypa/setuptools/issues/4496
nvidia-ml-py>=12.555.43; platform_system !='Darwin'
wheel~=0.38.1
12 changes: 6 additions & 6 deletions scalene/scalene-gui/scalene-gui.js
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,7 @@ function makeBar(python, native, system, params) {
}


function makeGPUPie(util) {
function makeGPUPie(util, gpu_device, params) {
return {
$schema: "https://vega.github.io/schema/vega-lite/v5.json",
config: {
Expand Down Expand Up @@ -940,7 +940,7 @@ function makeGPUPie(util) {
legend: false,
scale: { range: ["goldenrod", "#f4e6c2"] },
},
tooltip: [{ field: "c", type: "nominal", title: "GPU" }],
tooltip: [{ field: "c", type: "nominal", title: gpu_device }],
},
};
}
Expand Down Expand Up @@ -986,7 +986,7 @@ function makeGPUBar(util, gpu_device, params) {
legend: false,
scale: { range: ["goldenrod", "#f4e6c2"] },
},
tooltip: [{ field: "dd", type: "nominal", title: gpu_device }],
tooltip: [{ field: "dd", type: "nominal", title: gpu_device + ":" } ],
},
},
{
Expand All @@ -1004,7 +1004,7 @@ function makeGPUBar(util, gpu_device, params) {
},
text: { field: "d" },
color: { value: "white" },
tooltip: [{ field: "dd", type: "nominal", title: gpu_device }],
tooltip: [{ field: "dd", type: "nominal", title: gpu_device + ":" } ],
},
},
],
Expand Down Expand Up @@ -1560,8 +1560,8 @@ function makeProfileLine(
s += `<td style="width: 50; vertical-align: middle" align="right" data-sort="${line.n_gpu_percent}">`;
s += `<span style="height: 20; width: 30; vertical-align: middle" id="gpu_pie${gpu_pies.length}"></span>`;
s += "</td>";
// gpu_pies.push(makeGPUPie(line.n_gpu_percent));
gpu_pies.push(makeGPUBar(line.n_gpu_percent, prof.gpu_device, { height: 20, width: 100 }));
gpu_pies.push(makeGPUPie(line.n_gpu_percent, prof.gpu_device, { height: 20, width: 100 }));
// gpu_pies.push(makeGPUBar(line.n_gpu_percent, prof.gpu_device, { height: 20, width: 100 }));
}
if (true) {
if (line.n_gpu_peak_memory_mb < 1.0 || line.n_gpu_percent < 1.0) {
Expand Down
7 changes: 5 additions & 2 deletions scalene/scalene_accelerator.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from typing import Tuple
from abc import ABC, abstractmethod


# Base class for accelerators (GPUs, TPUs, etc.)
class ScaleneAccelerator(ABC):

@abstractmethod
def has_gpu(self) -> bool:
pass
Expand All @@ -20,4 +21,6 @@ def reinit(self) -> None:
def get_stats(self) -> Tuple[float, float]:
pass


@abstractmethod
def get_num_cores(self) -> int:
pass
7 changes: 6 additions & 1 deletion scalene/scalene_apple_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from scalene.scalene_accelerator import ScaleneAccelerator


class ScaleneAppleGPU(ScaleneAccelerator):
"""Wrapper class for Apple integrated GPU statistics."""

Expand All @@ -22,7 +23,7 @@ def __init__(self, sampling_frequency: int = 100) -> None:

def gpu_device(self) -> str:
return "GPU"

def has_gpu(self) -> bool:
"""True iff there is a GPU"""
# Disabling Apple GPU, since it does not collect per-process statistics.
Expand All @@ -32,6 +33,10 @@ def reinit(self) -> None:
"""A NOP, here for compatibility with the nvidia wrapper."""
return

def get_num_cores(self) -> int:
# FIXME: not yet implemented
return 1

def get_stats(self) -> Tuple[float, float]:
"""Returns a tuple of (utilization%, memory in use)"""
if not self.has_gpu():
Expand Down
48 changes: 33 additions & 15 deletions scalene/scalene_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import numpy as np


class ScaleneJSON:
@staticmethod
def memory_consumed_str(size_in_mb: float) -> str:
Expand Down Expand Up @@ -65,19 +66,25 @@ def __init__(self) -> None:
self.gpu_device = ""

def rdp(self, points, epsilon):
"""
Ramer-Douglas-Peucker algorithm implementation using NumPy
"""
Ramer-Douglas-Peucker algorithm implementation using NumPy
"""

def perpendicular_distance(point, start, end):
if np.all(start == end):
return np.linalg.norm(point - start)
return np.abs(np.cross(end - start, start - point) / np.linalg.norm(end - start))
return np.abs(
np.cross(end - start, start - point)
/ np.linalg.norm(end - start)
)

def recursive_rdp(points, start: int, end: int, epsilon: float):
dmax = 0.0
index = start
for i in range(start + 1, end):
d = perpendicular_distance(points[i], points[start], points[end])
d = perpendicular_distance(
points[i], points[start], points[end]
)
if d > dmax:
index = i
dmax = d
Expand All @@ -92,9 +99,9 @@ def recursive_rdp(points, start: int, end: int, epsilon: float):
start = 0
end = len(points) - 1
return np.array(recursive_rdp(points, start, end, epsilon))

def compress_samples(
self, samples: List[Any], max_footprint: float
self, samples: List[Any], max_footprint: float
) -> Any:
# Try to reduce the number of samples with the
# Ramer-Douglas-Peucker algorithm, which attempts to
Expand All @@ -107,19 +114,28 @@ def compress_samples(

if True:
# FIXME: bypassing RDP for now
#return samples[:self.max_sparkline_samples]
# return samples[:self.max_sparkline_samples]

new_samples = sorted(random.sample(list(map(tuple, samples)), self.max_sparkline_samples))
new_samples = sorted(
random.sample(
list(map(tuple, samples)), self.max_sparkline_samples
)
)
return new_samples

else:
epsilon = (len(samples) / (3 * self.max_sparkline_samples)) * 2

# Use NumPy for RDP algorithm
new_samples = self.rdp(np.array(samples), epsilon)

if len(new_samples) > self.max_sparkline_samples:
new_samples = sorted(random.sample(list(map(tuple, new_samples)), self.max_sparkline_samples))
new_samples = sorted(
random.sample(
list(map(tuple, new_samples)),
self.max_sparkline_samples,
)
)

return new_samples

Expand Down Expand Up @@ -180,11 +196,13 @@ def output_profile_line(
n_cpu_percent_c = 0
n_cpu_percent_python = 0

n_gpu_percent = n_gpu_samples * 100
# n_gpu_percent = n_gpu_samples * 100

if False:
if stats.total_gpu_samples:
n_gpu_percent = n_gpu_samples * 100 / stats.total_gpu_samples
if True:
if stats.n_gpu_samples[fname][line_no]:
n_gpu_percent = (
n_gpu_samples * 100 / stats.n_gpu_samples[fname][line_no]
) # total_gpu_samples
else:
n_gpu_percent = 0

Expand Down Expand Up @@ -358,7 +376,7 @@ def output_profiles(
),
"files": {},
"gpu": self.gpu,
"gpu_device" : self.gpu_device,
"gpu_device": self.gpu_device,
"memory": profile_memory,
"samples": stats.memory_footprint_samples,
"stacks": stks,
Expand Down
Loading

0 comments on commit 29f83f4

Please sign in to comment.