Skip to content

Commit

Permalink
Capture GPU metrics from PA (#519)
Browse files Browse the repository at this point in the history
* Can parse GPU metrics provided by PA. Unit testing added

* Added type checking + checking for new PA cases

* Making uuid a string

* Combining find and create methods

* Fixing type checking errors

* Changed return to continue
  • Loading branch information
nv-braf authored and mc-nv committed Sep 12, 2022
1 parent 24e2e78 commit 25b27d6
Show file tree
Hide file tree
Showing 4 changed files with 296 additions and 33 deletions.
128 changes: 101 additions & 27 deletions model_analyzer/perf_analyzer/perf_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, Dict, List, Union, Tuple
from model_analyzer.model_analyzer_exceptions \
import TritonModelAnalyzerException
from model_analyzer.record.types.perf_latency_avg import PerfLatencyAvg
Expand All @@ -33,6 +34,12 @@
from model_analyzer.record.types.perf_server_compute_output \
import PerfServerComputeOutput

from model_analyzer.record.record import Record
from model_analyzer.record.types.gpu_utilization import GPUUtilization
from model_analyzer.record.types.gpu_power_usage import GPUPowerUsage
from model_analyzer.record.types.gpu_used_memory import GPUUsedMemory
from model_analyzer.record.types.gpu_total_memory import GPUTotalMemory

from model_analyzer.constants import \
INTERVAL_SLEEP_TIME, LOGGER_NAME, MEASUREMENT_REQUEST_COUNT_STEP, \
MEASUREMENT_WINDOW_STEP, PERF_ANALYZER_MEASUREMENT_WINDOW, \
Expand All @@ -56,23 +63,33 @@ class PerfAnalyzer:
with perf_analyzer.
"""

GPU_METRIC_UUID = 0
GPU_METRIC_VALUE = 1

#yapf: disable
PA_SUCCESS, PA_FAIL, PA_RETRY = 0, 1, 2

METRIC_TAG, CSV_STRING, RECORD_CLASS, REDUCTION_FACTOR = 0, 1, 2, 3
perf_metric_table = [
["perf_latency_avg", "Avg latency", PerfLatencyAvg, 1000],
["perf_latency_p90", "p90 latency", PerfLatencyP90, 1000],
["perf_latency_p95", "p95 latency", PerfLatencyP95, 1000],
["perf_latency_p99", "p99 latency", PerfLatencyP99, 1000],
["perf_throughput", "Inferences/Second", PerfThroughput, 1],
["perf_client_send_recv", "request/response", PerfClientSendRecv, 1000],
["perf_client_send_recv", "send/recv", PerfClientSendRecv, 1000],
["perf_client_response_wait", "response wait", PerfClientResponseWait, 1000],
["perf_server_queue", "Server Queue", PerfServerQueue, 1000],
["perf_server_compute_infer", "Server Compute Infer", PerfServerComputeInfer, 1000],
["perf_server_compute_input", "Server Compute Input", PerfServerComputeInput, 1000],
["perf_server_compute_output", "Server Compute Output", PerfServerComputeOutput, 1000]
["perf_latency_avg", "Avg latency", PerfLatencyAvg, "1000"],
["perf_latency_p90", "p90 latency", PerfLatencyP90, "1000"],
["perf_latency_p95", "p95 latency", PerfLatencyP95, "1000"],
["perf_latency_p99", "p99 latency", PerfLatencyP99, "1000"],
["perf_throughput", "Inferences/Second", PerfThroughput, "1"],
["perf_client_send_recv", "request/response", PerfClientSendRecv, "1000"],
["perf_client_send_recv", "send/recv", PerfClientSendRecv, "1000"],
["perf_client_response_wait", "response wait", PerfClientResponseWait, "1000"],
["perf_server_queue", "Server Queue", PerfServerQueue, "1000"],
["perf_server_compute_infer", "Server Compute Infer", PerfServerComputeInfer, "1000"],
["perf_server_compute_input", "Server Compute Input", PerfServerComputeInput, "1000"],
["perf_server_compute_output", "Server Compute Output", PerfServerComputeOutput, "1000"]
]

gpu_metric_table = [
["gpu_utilization", "Avg GPU Utilizations", GPUUtilization],
["gpu_power_usage", "Avg GPU Power Usages", GPUPowerUsage],
["gpu_used_memory", "Max GPU Memory Usages", GPUUsedMemory],
["gpu_total_memory", "Total GPU Memory Usages", GPUTotalMemory]
]
#yapf: enable

Expand All @@ -84,6 +101,14 @@ def get_perf_metrics():
]
return perf_metrics

@staticmethod
def get_gpu_metrics():
gpu_metrics = [
gpu_metric[PerfAnalyzer.RECORD_CLASS]
for gpu_metric in PerfAnalyzer.gpu_metric_table
]
return gpu_metrics

def __init__(self, path, config, max_retries, timeout, max_cpu_util):
"""
Parameters
Expand Down Expand Up @@ -402,26 +427,75 @@ def _parse_outputs(self, metrics):
]:
os.remove(perf_config['latency-report-file'])

def _extract_metrics_from_row(self, requested_metrics, row_metrics):
def _extract_metrics_from_row(self, requested_metrics: List[Record],
row_metrics: Dict[str, str]) -> List[Record]:
"""
Extracts the requested metrics from the CSV's row and creates a list of Records
"""
perf_records = []
for perf_metric in PerfAnalyzer.perf_metric_table:
if self._is_perf_metric_requested_and_in_row(
perf_metric, requested_metrics, row_metrics):
value = float(row_metrics[perf_metric[PerfAnalyzer.CSV_STRING]]
) / perf_metric[PerfAnalyzer.REDUCTION_FACTOR]
perf_records = self._create_records_from_perf_metrics(
requested_metrics, row_metrics)

gpu_records = self._create_records_from_gpu_metrics(
requested_metrics, row_metrics)

perf_records.append(
perf_metric[PerfAnalyzer.RECORD_CLASS](value))
return perf_records + gpu_records

def _create_records_from_perf_metrics(
self, requested_metrics: List[Record],
row_metrics: Dict[str, str]) -> List[Record]:
perf_records: List[Record] = []
for perf_metric in PerfAnalyzer.perf_metric_table:
if self._is_metric_requested_and_in_row(perf_metric,
requested_metrics,
row_metrics):
value = float(row_metrics[str(
perf_metric[PerfAnalyzer.CSV_STRING])])
reduction_factor = float(
str(perf_metric[PerfAnalyzer.REDUCTION_FACTOR]))
perf_value = value / reduction_factor

perf_records.append(perf_metric[PerfAnalyzer.RECORD_CLASS](
perf_value)) # type: ignore

return perf_records

def _is_perf_metric_requested_and_in_row(self, perf_metric,
requested_metrics, row_metrics):
tag_match = any(
perf_metric[PerfAnalyzer.METRIC_TAG] in requested_metric.tag
for requested_metric in requested_metrics)
def _create_records_from_gpu_metrics(
self, requested_metrics: List[Record],
row_metrics: Dict[str, str]) -> List[Record]:
# GPU metrics have the following format: UUID0:value0;UUID1:value1;...
gpu_records: List[Record] = []
for gpu_metric in PerfAnalyzer.gpu_metric_table:
if self._is_metric_requested_and_in_row(gpu_metric,
requested_metrics,
row_metrics):
gpu_metric_string = row_metrics[str(
gpu_metric[PerfAnalyzer.CSV_STRING])]

# Covers the case where PA didn't provide data
if not gpu_metric_string:
continue

# Needed because PA might terminate substring with a ;
if gpu_metric_string and gpu_metric_string[-1] == ';':
gpu_metric_string = gpu_metric_string[:-1]

gpu_metric_string_tuples = gpu_metric_string.split(';')

for gpu_metric_string_tuple in gpu_metric_string_tuples:
gpu_metric_tuple = gpu_metric_string_tuple.split(':')

gpu_records.append(gpu_metric[PerfAnalyzer.RECORD_CLASS](
value=float(
gpu_metric_tuple[PerfAnalyzer.GPU_METRIC_VALUE]),
device_uuid=gpu_metric_tuple[
PerfAnalyzer.GPU_METRIC_UUID])) # type: ignore

return gpu_records

def _is_metric_requested_and_in_row(self, metric: List[object],
requested_metrics: List[Record],
row_metrics: Dict[str, str]) -> bool:
tag_match = any(metric[PerfAnalyzer.METRIC_TAG] in requested_metric.tag
for requested_metric in requested_metrics)

return tag_match and perf_metric[PerfAnalyzer.CSV_STRING] in row_metrics
return tag_match and metric[PerfAnalyzer.CSV_STRING] in row_metrics
95 changes: 95 additions & 0 deletions model_analyzer/record/types/gpu_total_memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import total_ordering
from model_analyzer.record.gpu_record import GPURecord


@total_ordering
class GPUTotalMemory(GPURecord):
"""
The total memory in the GPU.
"""

tag = "gpu_total_memory"

def __init__(self, value, device_uuid=None, timestamp=0):
"""
Parameters
----------
value : float
The value of the GPU metrtic
device_uuid : str
The GPU device uuid this metric is associated
with.
timestamp : int
The timestamp for the record in nanoseconds
"""

super().__init__(value, device_uuid, timestamp)

@staticmethod
def header(aggregation_tag=False):
"""
Parameters
----------
aggregation_tag: bool
An optional tag that may be displayed
as part of the header indicating that
this record has been aggregated using
max, min or average etc.
Returns
-------
str
The full name of the
metric.
"""

return ("Max " if aggregation_tag else "") + "GPU Memory Available (MB)"

def __eq__(self, other):
"""
Allows checking for
equality between two records
"""

return self.value() == other.value()

def __lt__(self, other):
"""
Allows checking if
this record is less than
the other
"""

return self.value() < other.value()

def __add__(self, other):
"""
Allows adding two records together
to produce a brand new record.
"""

return GPUTotalMemory(device_uuid=None,
value=(self.value() + other.value()))

def __sub__(self, other):
"""
Allows subtracting two records together
to produce a brand new record.
"""

return GPUTotalMemory(device_uuid=None,
value=(self.value() - other.value()))
Loading

0 comments on commit 25b27d6

Please sign in to comment.