Capture GPU metrics from PA (#519)

* Can parse GPU metrics provided by PA. Unit testing added * Added type checking + checking for new PA cases * Making uuid a string * Combining find and create methods * Fixing type checking errors * Changed return to continue
triton-inference-server · Sep 12, 2022 · 25b27d6 · 25b27d6
1 parent 24e2e78
commit 25b27d6
Show file tree

Hide file tree

Showing 4 changed files with 296 additions and 33 deletions.
diff --git a/model_analyzer/perf_analyzer/perf_analyzer.py b/model_analyzer/perf_analyzer/perf_analyzer.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Any, Dict, List, Union, Tuple
 from model_analyzer.model_analyzer_exceptions \
     import TritonModelAnalyzerException
 from model_analyzer.record.types.perf_latency_avg import PerfLatencyAvg
@@ -33,6 +34,12 @@
 from model_analyzer.record.types.perf_server_compute_output \
     import PerfServerComputeOutput
 
+from model_analyzer.record.record import Record
+from model_analyzer.record.types.gpu_utilization import GPUUtilization
+from model_analyzer.record.types.gpu_power_usage import GPUPowerUsage
+from model_analyzer.record.types.gpu_used_memory import GPUUsedMemory
+from model_analyzer.record.types.gpu_total_memory import GPUTotalMemory
+
 from model_analyzer.constants import \
     INTERVAL_SLEEP_TIME, LOGGER_NAME, MEASUREMENT_REQUEST_COUNT_STEP, \
     MEASUREMENT_WINDOW_STEP, PERF_ANALYZER_MEASUREMENT_WINDOW, \
@@ -56,23 +63,33 @@ class PerfAnalyzer:
     with perf_analyzer.
     """
 
+    GPU_METRIC_UUID = 0
+    GPU_METRIC_VALUE = 1
+
     #yapf: disable
     PA_SUCCESS, PA_FAIL, PA_RETRY = 0, 1, 2
 
     METRIC_TAG,                        CSV_STRING,             RECORD_CLASS,             REDUCTION_FACTOR = 0, 1, 2, 3
     perf_metric_table = [
-        ["perf_latency_avg",           "Avg latency",           PerfLatencyAvg,          1000],
-        ["perf_latency_p90",           "p90 latency",           PerfLatencyP90,          1000],
-        ["perf_latency_p95",           "p95 latency",           PerfLatencyP95,          1000],
-        ["perf_latency_p99",           "p99 latency",           PerfLatencyP99,          1000],
-        ["perf_throughput",            "Inferences/Second",     PerfThroughput,             1],
-        ["perf_client_send_recv",      "request/response",      PerfClientSendRecv,      1000],
-        ["perf_client_send_recv",      "send/recv",             PerfClientSendRecv,      1000],
-        ["perf_client_response_wait",  "response wait",         PerfClientResponseWait,  1000],
-        ["perf_server_queue",          "Server Queue",          PerfServerQueue,         1000],
-        ["perf_server_compute_infer",  "Server Compute Infer",  PerfServerComputeInfer,  1000],
-        ["perf_server_compute_input",  "Server Compute Input",  PerfServerComputeInput,  1000],
-        ["perf_server_compute_output", "Server Compute Output", PerfServerComputeOutput, 1000]
+        ["perf_latency_avg",           "Avg latency",           PerfLatencyAvg,          "1000"],
+        ["perf_latency_p90",           "p90 latency",           PerfLatencyP90,          "1000"],
+        ["perf_latency_p95",           "p95 latency",           PerfLatencyP95,          "1000"],
+        ["perf_latency_p99",           "p99 latency",           PerfLatencyP99,          "1000"],
+        ["perf_throughput",            "Inferences/Second",     PerfThroughput,             "1"],
+        ["perf_client_send_recv",      "request/response",      PerfClientSendRecv,      "1000"],
+        ["perf_client_send_recv",      "send/recv",             PerfClientSendRecv,      "1000"],
+        ["perf_client_response_wait",  "response wait",         PerfClientResponseWait,  "1000"],
+        ["perf_server_queue",          "Server Queue",          PerfServerQueue,         "1000"],
+        ["perf_server_compute_infer",  "Server Compute Infer",  PerfServerComputeInfer,  "1000"],
+        ["perf_server_compute_input",  "Server Compute Input",  PerfServerComputeInput,  "1000"],
+        ["perf_server_compute_output", "Server Compute Output", PerfServerComputeOutput, "1000"]
+    ]
+
+    gpu_metric_table = [
+        ["gpu_utilization",            "Avg GPU Utilizations",    GPUUtilization],
+        ["gpu_power_usage",            "Avg GPU Power Usages",    GPUPowerUsage],
+        ["gpu_used_memory",            "Max GPU Memory Usages",   GPUUsedMemory],
+        ["gpu_total_memory",           "Total GPU Memory Usages", GPUTotalMemory]
     ]
     #yapf: enable
 
@@ -84,6 +101,14 @@ def get_perf_metrics():
         ]
         return perf_metrics
 
+    @staticmethod
+    def get_gpu_metrics():
+        gpu_metrics = [
+            gpu_metric[PerfAnalyzer.RECORD_CLASS]
+            for gpu_metric in PerfAnalyzer.gpu_metric_table
+        ]
+        return gpu_metrics
+
     def __init__(self, path, config, max_retries, timeout, max_cpu_util):
         """
         Parameters
@@ -402,26 +427,75 @@ def _parse_outputs(self, metrics):
         ]:
             os.remove(perf_config['latency-report-file'])
 
-    def _extract_metrics_from_row(self, requested_metrics, row_metrics):
+    def _extract_metrics_from_row(self, requested_metrics: List[Record],
+                                  row_metrics: Dict[str, str]) -> List[Record]:
         """ 
         Extracts the requested metrics from the CSV's row and creates a list of Records
         """
-        perf_records = []
-        for perf_metric in PerfAnalyzer.perf_metric_table:
-            if self._is_perf_metric_requested_and_in_row(
-                    perf_metric, requested_metrics, row_metrics):
-                value = float(row_metrics[perf_metric[PerfAnalyzer.CSV_STRING]]
-                             ) / perf_metric[PerfAnalyzer.REDUCTION_FACTOR]
+        perf_records = self._create_records_from_perf_metrics(
+            requested_metrics, row_metrics)
+
+        gpu_records = self._create_records_from_gpu_metrics(
+            requested_metrics, row_metrics)
 
-                perf_records.append(
-                    perf_metric[PerfAnalyzer.RECORD_CLASS](value))
+        return perf_records + gpu_records
+
+    def _create_records_from_perf_metrics(
+            self, requested_metrics: List[Record],
+            row_metrics: Dict[str, str]) -> List[Record]:
+        perf_records: List[Record] = []
+        for perf_metric in PerfAnalyzer.perf_metric_table:
+            if self._is_metric_requested_and_in_row(perf_metric,
+                                                    requested_metrics,
+                                                    row_metrics):
+                value = float(row_metrics[str(
+                    perf_metric[PerfAnalyzer.CSV_STRING])])
+                reduction_factor = float(
+                    str(perf_metric[PerfAnalyzer.REDUCTION_FACTOR]))
+                perf_value = value / reduction_factor
+
+                perf_records.append(perf_metric[PerfAnalyzer.RECORD_CLASS](
+                    perf_value))  # type: ignore
 
         return perf_records
 
-    def _is_perf_metric_requested_and_in_row(self, perf_metric,
-                                             requested_metrics, row_metrics):
-        tag_match = any(
-            perf_metric[PerfAnalyzer.METRIC_TAG] in requested_metric.tag
-            for requested_metric in requested_metrics)
+    def _create_records_from_gpu_metrics(
+            self, requested_metrics: List[Record],
+            row_metrics: Dict[str, str]) -> List[Record]:
+        # GPU metrics have the following format: UUID0:value0;UUID1:value1;...
+        gpu_records: List[Record] = []
+        for gpu_metric in PerfAnalyzer.gpu_metric_table:
+            if self._is_metric_requested_and_in_row(gpu_metric,
+                                                    requested_metrics,
+                                                    row_metrics):
+                gpu_metric_string = row_metrics[str(
+                    gpu_metric[PerfAnalyzer.CSV_STRING])]
+
+                # Covers the case where PA didn't provide data
+                if not gpu_metric_string:
+                    continue
+
+                # Needed because PA might terminate substring with a ;
+                if gpu_metric_string and gpu_metric_string[-1] == ';':
+                    gpu_metric_string = gpu_metric_string[:-1]
+
+                gpu_metric_string_tuples = gpu_metric_string.split(';')
+
+                for gpu_metric_string_tuple in gpu_metric_string_tuples:
+                    gpu_metric_tuple = gpu_metric_string_tuple.split(':')
+
+                    gpu_records.append(gpu_metric[PerfAnalyzer.RECORD_CLASS](
+                        value=float(
+                            gpu_metric_tuple[PerfAnalyzer.GPU_METRIC_VALUE]),
+                        device_uuid=gpu_metric_tuple[
+                            PerfAnalyzer.GPU_METRIC_UUID]))  # type: ignore
+
+        return gpu_records
+
+    def _is_metric_requested_and_in_row(self, metric: List[object],
+                                        requested_metrics: List[Record],
+                                        row_metrics: Dict[str, str]) -> bool:
+        tag_match = any(metric[PerfAnalyzer.METRIC_TAG] in requested_metric.tag
+                        for requested_metric in requested_metrics)
 
-        return tag_match and perf_metric[PerfAnalyzer.CSV_STRING] in row_metrics
+        return tag_match and metric[PerfAnalyzer.CSV_STRING] in row_metrics
diff --git a/model_analyzer/record/types/gpu_total_memory.py b/model_analyzer/record/types/gpu_total_memory.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import total_ordering
+from model_analyzer.record.gpu_record import GPURecord
+
+
+@total_ordering
+class GPUTotalMemory(GPURecord):
+    """
+    The total memory in the GPU.
+    """
+
+    tag = "gpu_total_memory"
+
+    def __init__(self, value, device_uuid=None, timestamp=0):
+        """
+        Parameters
+        ----------
+        value : float
+            The value of the GPU metrtic
+        device_uuid : str
+            The  GPU device uuid this metric is associated
+            with.
+        timestamp : int
+            The timestamp for the record in nanoseconds
+        """
+
+        super().__init__(value, device_uuid, timestamp)
+
+    @staticmethod
+    def header(aggregation_tag=False):
+        """
+        Parameters
+        ----------
+        aggregation_tag: bool
+            An optional tag that may be displayed 
+            as part of the header indicating that 
+            this record has been aggregated using 
+            max, min or average etc. 
+             
+        Returns
+        -------
+        str
+            The full name of the
+            metric.
+        """
+
+        return ("Max " if aggregation_tag else "") + "GPU Memory Available (MB)"
+
+    def __eq__(self, other):
+        """
+        Allows checking for
+        equality between two records
+        """
+
+        return self.value() == other.value()
+
+    def __lt__(self, other):
+        """
+        Allows checking if 
+        this record is less than 
+        the other
+        """
+
+        return self.value() < other.value()
+
+    def __add__(self, other):
+        """
+        Allows adding two records together
+        to produce a brand new record.
+        """
+
+        return GPUTotalMemory(device_uuid=None,
+                              value=(self.value() + other.value()))
+
+    def __sub__(self, other):
+        """
+        Allows subtracting two records together
+        to produce a brand new record.
+        """
+
+        return GPUTotalMemory(device_uuid=None,
+                              value=(self.value() - other.value()))