diff --git a/model_analyzer/constants.py b/model_analyzer/constants.py index 34858c4ea..f2bfcd315 100644 --- a/model_analyzer/constants.py +++ b/model_analyzer/constants.py @@ -49,6 +49,7 @@ INTERVAL_SLEEP_TIME = 1 PERF_ANALYZER_MEASUREMENT_WINDOW = 5000 PERF_ANALYZER_MINIMUM_REQUEST_COUNT = 50 +SECONDS_TO_MILLISECONDS_MULTIPLIER = 1000 # Triton Server SERVER_OUTPUT_TIMEOUT_SECS = 5 diff --git a/model_analyzer/perf_analyzer/perf_analyzer.py b/model_analyzer/perf_analyzer/perf_analyzer.py index f76fc2a33..1a20945c3 100644 --- a/model_analyzer/perf_analyzer/perf_analyzer.py +++ b/model_analyzer/perf_analyzer/perf_analyzer.py @@ -38,7 +38,7 @@ from model_analyzer.record.types.gpu_utilization import GPUUtilization from model_analyzer.record.types.gpu_power_usage import GPUPowerUsage from model_analyzer.record.types.gpu_used_memory import GPUUsedMemory -from model_analyzer.record.types.gpu_total_memory import GPUTotalMemory +from model_analyzer.record.types.gpu_free_memory import GPUFreeMemory from model_analyzer.constants import \ INTERVAL_SLEEP_TIME, LOGGER_NAME, MEASUREMENT_REQUEST_COUNT_STEP, \ @@ -86,10 +86,10 @@ class PerfAnalyzer: ] gpu_metric_table = [ - ["gpu_utilization", "Avg GPU Utilizations", GPUUtilization], - ["gpu_power_usage", "Avg GPU Power Usages", GPUPowerUsage], - ["gpu_used_memory", "Max GPU Memory Usages", GPUUsedMemory], - ["gpu_total_memory", "Total GPU Memory Usages", GPUTotalMemory] + ["gpu_utilization", "Avg GPU Utilization", GPUUtilization, "0.01"], + ["gpu_power_usage", "Avg GPU Power Usage", GPUPowerUsage, "1"], + ["gpu_used_memory", "Max GPU Memory Usage", GPUUsedMemory, "1000000"], + ["gpu_free_memory", "Total GPU Memory", GPUFreeMemory, "1000000"] ] #yapf: enable @@ -133,6 +133,7 @@ def __init__(self, path, config, max_retries, timeout, max_cpu_util): self._timeout = timeout self._output = "" self._perf_records = {} + self._gpu_records = [] self._max_cpu_util = max_cpu_util def run(self, metrics, env=None): @@ -183,11 +184,11 @@ def run(self, metrics, env=None): return self.PA_SUCCESS - def get_records(self): + def get_perf_records(self): """ Returns ------- - The records from the last perf_analyzer run + The perf records from the last perf_analyzer run """ if self._perf_records: @@ -196,6 +197,15 @@ def get_records(self): "Attempted to get perf_analyzer results" "without calling run first.") + def get_gpu_records(self): + """ + Returns + ------- + The gpu records from the last perf_analyzer run + """ + + return self._gpu_records + def output(self): """ Returns @@ -331,7 +341,16 @@ def _get_process_output(self): self._cmd_log.seek(0) tmp_output = self._cmd_log.read() self._cmd_log.close() - return tmp_output.decode('utf-8') + + # PA has occasionally output non-UTF-8 bytes which would cause MA + # to assert. In that case, just ignore the result instead of asserting + result = "" + try: + result = tmp_output.decode('utf-8') + except: + pass + + return result def _auto_adjust_parameters(self, process): """ @@ -419,28 +438,17 @@ def _parse_outputs(self, metrics): for row in csv_reader: self._perf_records[perf_config[ - 'model-name']] = self._extract_metrics_from_row( + 'model-name']] = self._extract_perf_records_from_row( metrics, row) + self._gpu_records = self._extract_gpu_records_from_row( + metrics, row) for perf_config in [ mrc.perf_config() for mrc in self._config.model_run_configs() ]: os.remove(perf_config['latency-report-file']) - def _extract_metrics_from_row(self, requested_metrics: List[Record], - row_metrics: Dict[str, str]) -> List[Record]: - """ - Extracts the requested metrics from the CSV's row and creates a list of Records - """ - perf_records = self._create_records_from_perf_metrics( - requested_metrics, row_metrics) - - gpu_records = self._create_records_from_gpu_metrics( - requested_metrics, row_metrics) - - return perf_records + gpu_records - - def _create_records_from_perf_metrics( + def _extract_perf_records_from_row( self, requested_metrics: List[Record], row_metrics: Dict[str, str]) -> List[Record]: perf_records: List[Record] = [] @@ -459,7 +467,7 @@ def _create_records_from_perf_metrics( return perf_records - def _create_records_from_gpu_metrics( + def _extract_gpu_records_from_row( self, requested_metrics: List[Record], row_metrics: Dict[str, str]) -> List[Record]: # GPU metrics have the following format: UUID0:value0;UUID1:value1;... @@ -484,14 +492,41 @@ def _create_records_from_gpu_metrics( for gpu_metric_string_tuple in gpu_metric_string_tuples: gpu_metric_tuple = gpu_metric_string_tuple.split(':') - gpu_records.append(gpu_metric[PerfAnalyzer.RECORD_CLASS]( - value=float( - gpu_metric_tuple[PerfAnalyzer.GPU_METRIC_VALUE]), - device_uuid=gpu_metric_tuple[ - PerfAnalyzer.GPU_METRIC_UUID])) # type: ignore + uuid = gpu_metric_tuple[PerfAnalyzer.GPU_METRIC_UUID] + tmp_value = float( + gpu_metric_tuple[PerfAnalyzer.GPU_METRIC_VALUE]) + reduction_factor = float( + str(gpu_metric[PerfAnalyzer.REDUCTION_FACTOR])) + value = tmp_value / reduction_factor + + record = gpu_metric[PerfAnalyzer.RECORD_CLASS]( + value=value, device_uuid=uuid) # type: ignore + gpu_records.append(record) + + self._cleanup_gpu_records(gpu_records) return gpu_records + def _cleanup_gpu_records(self, gpu_records): + # Recalculate GPUFreeMemory by removing the value of the associated GPUUsedMemory + # Remove any GPUFreeMemory records that don't have a matching GPUUsedMemory + indexes_to_remove = [] + for i, record in enumerate(gpu_records): + if type(record) == GPUFreeMemory: + # Find matching UUID UsedMemory + found = False + for other_record in gpu_records: + if type(other_record + ) == GPUUsedMemory and record.device_uuid( + ) == other_record.device_uuid(): + found = True + record._value = record.value() - other_record.value() + break + if not found: + indexes_to_remove.append(i) + for i in reversed(indexes_to_remove): + del gpu_records[i] + def _is_metric_requested_and_in_row(self, metric: List[object], requested_metrics: List[Record], row_metrics: Dict[str, str]) -> bool: diff --git a/model_analyzer/perf_analyzer/perf_config.py b/model_analyzer/perf_analyzer/perf_config.py index 93e43ad13..ad5c567d1 100644 --- a/model_analyzer/perf_analyzer/perf_config.py +++ b/model_analyzer/perf_analyzer/perf_config.py @@ -15,6 +15,7 @@ from model_analyzer.model_analyzer_exceptions \ import TritonModelAnalyzerException from model_analyzer.config.input.config_defaults import DEFAULT_MEASUREMENT_MODE +from model_analyzer.constants import SECONDS_TO_MILLISECONDS_MULTIPLIER class PerfAnalyzerConfig: @@ -39,7 +40,8 @@ class PerfAnalyzerConfig: 'ssl-https-verify-host', 'ssl-https-ca-certificates-file', 'ssl-https-client-certificate-type', 'ssl-https-client-certificate-file', 'ssl-https-private-key-type', - 'ssl-https-private-key-file' + 'ssl-https-private-key-file', 'collect-metrics', 'metrics-url', + 'metrics-interval' ] input_to_options = [ @@ -52,7 +54,8 @@ class PerfAnalyzerConfig: additive_args = ['input-data', 'shape'] boolean_args = [ - 'streaming', 'async', 'sync', 'binary-search', 'ssl-grpc-use-ssl' + 'streaming', 'async', 'sync', 'binary-search', 'ssl-grpc-use-ssl', + 'collect-metrics' ] def __init__(self): @@ -166,6 +169,14 @@ def update_config_from_profile_config(self, model_name, profile_config): 'protocol': profile_config.client_protocol, 'url': url }) + + metrics_interval = profile_config.monitoring_interval * SECONDS_TO_MILLISECONDS_MULTIPLIER + params.update({ + 'collect-metrics': 'True', + 'metrics-url': profile_config.triton_metrics_url, + 'metrics-interval': metrics_interval + }) + self.update_config(params) @classmethod diff --git a/model_analyzer/record/metrics_manager.py b/model_analyzer/record/metrics_manager.py index d222b9ceb..119d906e6 100644 --- a/model_analyzer/record/metrics_manager.py +++ b/model_analyzer/record/metrics_manager.py @@ -142,8 +142,7 @@ def _categorize_metrics(metric_tags, collect_cpu_metrics=False): gpu_metrics, perf_metrics, cpu_metrics = [], [], [] # Separates metrics and objectives into related lists for metric in MetricsManager.get_metric_types(metric_tags): - if metric in DCGMMonitor.model_analyzer_to_dcgm_field or metric in RemoteMonitor.gpu_metrics.values( - ): + if metric in PerfAnalyzer.get_gpu_metrics(): gpu_metrics.append(metric) elif metric in PerfAnalyzer.get_perf_metrics(): perf_metrics.append(metric) @@ -223,8 +222,8 @@ def profile_models(self, run_config): self._start_monitors(cpu_only=cpu_only) - perf_analyzer_metrics = self._run_perf_analyzer(run_config, - perf_output_writer) + perf_analyzer_metrics, model_gpu_metrics = self._run_perf_analyzer( + run_config, perf_output_writer) if not perf_analyzer_metrics: self._stop_monitors(cpu_only=cpu_only) @@ -232,8 +231,7 @@ def profile_models(self, run_config): return None # Get metrics for model inference and combine metrics that do not have GPU UUID - model_gpu_metrics = {} - if not cpu_only: + if not cpu_only and not model_gpu_metrics: model_gpu_metrics = self._get_gpu_inference_metrics() model_cpu_metrics = self._get_cpu_inference_metrics() @@ -369,6 +367,7 @@ def _start_monitors(self, cpu_only=False): Start any metrics monitors """ + self._gpu_monitor = None if not cpu_only: try: if self._config.use_local_gpu_monitor: @@ -446,7 +445,8 @@ def _run_perf_analyzer(self, run_config, perf_output_writer): timeout=self._config.perf_analyzer_timeout, max_cpu_util=self._config.perf_analyzer_cpu_util) - status = perf_analyzer.run(self._perf_metrics, env=perf_analyzer_env) + metrics_to_gather = self._perf_metrics + self._gpu_metrics + status = perf_analyzer.run(metrics_to_gather, env=perf_analyzer_env) if perf_output_writer: perf_output_writer.write( @@ -459,16 +459,23 @@ def _run_perf_analyzer(self, run_config, perf_output_writer): # PerfAnalyzer run was not succesful if status == 1: - return None + return (None, None) + + perf_records = perf_analyzer.get_perf_records() + gpu_records = perf_analyzer.get_gpu_records() - per_model_perf_records = perf_analyzer.get_records() + aggregated_perf_records = self._aggregate_perf_records(perf_records) + aggregated_gpu_records = self._aggregate_gpu_records(gpu_records) - for (model, perf_records) in per_model_perf_records.items(): + return aggregated_perf_records, aggregated_gpu_records + + def _aggregate_perf_records(self, perf_records): + per_model_perf_records = {} + for (model, records) in perf_records.items(): perf_record_aggregator = RecordAggregator() - perf_record_aggregator.insert_all(perf_records) + perf_record_aggregator.insert_all(records) per_model_perf_records[model] = perf_record_aggregator.aggregate() - return per_model_perf_records def _get_gpu_inference_metrics(self): @@ -485,6 +492,10 @@ def _get_gpu_inference_metrics(self): # Stop and destroy DCGM monitor gpu_records = self._gpu_monitor.stop_recording_metrics() + gpu_metrics = self._aggregate_gpu_records(gpu_records) + return gpu_metrics + + def _aggregate_gpu_records(self, gpu_records): # Insert all records into aggregator and get aggregated DCGM records gpu_record_aggregator = RecordAggregator() gpu_record_aggregator.insert_all(gpu_records) diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py index 48bddf654..202f4820a 100644 --- a/tests/common/test_utils.py +++ b/tests/common/test_utils.py @@ -25,11 +25,12 @@ from model_analyzer.record.metrics_manager import MetricsManager from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig from model_analyzer.state.analyzer_state_manager import AnalyzerStateManager - +from model_analyzer.constants import SECONDS_TO_MILLISECONDS_MULTIPLIER from model_analyzer.config.input.config_defaults import \ DEFAULT_BATCH_SIZES, DEFAULT_TRITON_LAUNCH_MODE, DEFAULT_CLIENT_PROTOCOL, \ DEFAULT_MEASUREMENT_MODE, DEFAULT_TRITON_GRPC_ENDPOINT, DEFAULT_TRITON_HTTP_ENDPOINT, \ - DEFAULT_TRITON_INSTALL_PATH, DEFAULT_OUTPUT_MODEL_REPOSITORY + DEFAULT_TRITON_INSTALL_PATH, DEFAULT_OUTPUT_MODEL_REPOSITORY, DEFAULT_TRITON_METRICS_URL, \ + DEFAULT_MONITORING_INTERVAL import os @@ -221,6 +222,10 @@ def construct_perf_analyzer_config(model_name='my-model', pa_config._args['triton-server-directory'] = DEFAULT_TRITON_INSTALL_PATH pa_config._args['model-repository'] = DEFAULT_OUTPUT_MODEL_REPOSITORY else: + pa_config._args['collect-metrics'] = 'True' + pa_config._args['metrics-url'] = DEFAULT_TRITON_METRICS_URL + pa_config._args[ + 'metrics-interval'] = SECONDS_TO_MILLISECONDS_MULTIPLIER * DEFAULT_MONITORING_INTERVAL pa_config._options['-i'] = client_protocol if client_protocol == 'http': pa_config._options['-u'] = DEFAULT_TRITON_HTTP_ENDPOINT diff --git a/tests/test_perf_analyzer.py b/tests/test_perf_analyzer.py index 05f11948d..26f07ce1e 100755 --- a/tests/test_perf_analyzer.py +++ b/tests/test_perf_analyzer.py @@ -54,7 +54,7 @@ from model_analyzer.record.types.gpu_utilization import GPUUtilization from model_analyzer.record.types.gpu_power_usage import GPUPowerUsage from model_analyzer.record.types.gpu_used_memory import GPUUsedMemory -from model_analyzer.record.types.gpu_total_memory import GPUTotalMemory +from model_analyzer.record.types.gpu_free_memory import GPUFreeMemory from .common import test_result_collector as trc from model_analyzer.constants import PERF_ANALYZER_MEASUREMENT_WINDOW, MEASUREMENT_WINDOW_STEP, PERF_ANALYZER_MINIMUM_REQUEST_COUNT, MEASUREMENT_REQUEST_COUNT_STEP @@ -132,6 +132,15 @@ def test_perf_analyzer_config(self): self.config['extra-verbose'] = True self.assertTrue(self.config['extra-verbose']) + def test_perf_analyzer_boolean_args(self): + """ Test that only positive boolean args get added """ + expected_cli_str = '-m test_model --measurement-interval=1000 --binary-search --measurement-request-count=50' + + self.config['async'] = "False" + self.config['binary-search'] = "True" + + self.assertEqual(self.config.to_cli_string(), expected_cli_str) + def test_perf_analyzer_additive_args(self): shape = ['name1:1,2,3', 'name2:4,5,6'] expected_cli_str = '-m test_model --measurement-interval=1000 --shape=name1:1,2,3 --shape=name2:4,5,6 --measurement-request-count=50' @@ -245,19 +254,19 @@ def test_run(self): pa_csv_mock = """Concurrency,Inferences/Second,Client Send,Network+Server Send/Recv,Server Queue,Server Compute Input,Server Compute Infer,Server Compute Output,""" pa_csv_mock += """Client Recv,p50 latency,p90 latency,p95 latency,p99 latency,Avg latency,request/response,response wait,""" - pa_csv_mock += """Avg GPU Utilizations,Avg GPU Power Usages,Max GPU Memory Usages,Total GPU Memory Usages\n""" + pa_csv_mock += """Avg GPU Utilization,Avg GPU Power Usage,Max GPU Memory Usage,Total GPU Memory\n""" pa_csv_mock += """1,46.8,2,187,18,34,65,16,1,4600,4700,4800,4900,5000,3,314,""" - pa_csv_mock += """GPU-aaf4fea0:80.9;GPU-aaf4fea1:90.1;GPU-aaf4fea2:74.5;,GPU-aaf4fea0:91.2;GPU-aaf4fea1:100;,GPU-aaf4fea0:1000;,GPU-aaf4fea0:1500""" + pa_csv_mock += """GPU-aaf4fea0:0.809;GPU-aaf4fea1:0.901;GPU-aaf4fea2:0.745;,GPU-aaf4fea0:91.2;GPU-aaf4fea1:100;,GPU-aaf4fea0:1000000000;GPU-aaf4fea1:2000000000,GPU-aaf4fea0:1500000000;GPU-aaf4fea2:3000000000""" - # Test avg latency parsing - perf_metrics = [PerfLatencyAvg] + # Test avg latency parsing. GPU metric is ignored for get_perf_records() + perf_metrics = [PerfLatencyAvg, GPUUtilization] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) - records = perf_analyzer.get_records() + records = perf_analyzer.get_perf_records() self.assertEqual(len(records[TEST_MODEL_NAME]), 1) self.assertEqual(records[TEST_MODEL_NAME][0].value(), 5) @@ -269,7 +278,7 @@ def test_run(self): 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) - records = perf_analyzer.get_records() + records = perf_analyzer.get_perf_records() self.assertEqual(len(records[TEST_MODEL_NAME]), 1) self.assertEqual(records[TEST_MODEL_NAME][0].value(), 4.7) @@ -281,7 +290,7 @@ def test_run(self): 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) - records = perf_analyzer.get_records() + records = perf_analyzer.get_perf_records() self.assertEqual(len(records[TEST_MODEL_NAME]), 1) self.assertEqual(records[TEST_MODEL_NAME][0].value(), 4.8) @@ -293,7 +302,7 @@ def test_run(self): 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) - records = perf_analyzer.get_records() + records = perf_analyzer.get_perf_records() self.assertEqual(len(records[TEST_MODEL_NAME]), 1) self.assertEqual(records[TEST_MODEL_NAME][0].value(), 4.9) @@ -305,7 +314,7 @@ def test_run(self): 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) - records = perf_analyzer.get_records() + records = perf_analyzer.get_perf_records() self.assertEqual(len(records[TEST_MODEL_NAME]), 1) self.assertEqual(records[TEST_MODEL_NAME][0].value(), 46.8) @@ -317,7 +326,7 @@ def test_run(self): 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) - records = perf_analyzer.get_records() + records = perf_analyzer.get_perf_records() self.assertEqual(len(records[TEST_MODEL_NAME]), 1) self.assertEqual(records[TEST_MODEL_NAME][0].value(), 0.314) @@ -329,7 +338,7 @@ def test_run(self): 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) - records = perf_analyzer.get_records() + records = perf_analyzer.get_perf_records() self.assertEqual(len(records[TEST_MODEL_NAME]), 1) self.assertEqual(records[TEST_MODEL_NAME][0].value(), 0.018) @@ -341,7 +350,7 @@ def test_run(self): 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) - records = perf_analyzer.get_records() + records = perf_analyzer.get_perf_records() self.assertEqual(len(records[TEST_MODEL_NAME]), 1) self.assertEqual(records[TEST_MODEL_NAME][0].value(), 0.065) @@ -353,7 +362,7 @@ def test_run(self): 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) - records = perf_analyzer.get_records() + records = perf_analyzer.get_perf_records() self.assertEqual(len(records[TEST_MODEL_NAME]), 1) self.assertEqual(records[TEST_MODEL_NAME][0].value(), 0.034) @@ -365,31 +374,28 @@ def test_run(self): 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) - records = perf_analyzer.get_records() + records = perf_analyzer.get_perf_records() self.assertEqual(len(records[TEST_MODEL_NAME]), 1) self.assertEqual(records[TEST_MODEL_NAME][0].value(), 0.016) - # Test Avg GPU Utilizations - gpu_metrics = [GPUUtilization] + # Test Avg GPU Utilizations. Perf metric is ignored for get_gpu_records() + gpu_metrics = [GPUUtilization, PerfLatencyAvg] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(gpu_metrics) - records = perf_analyzer.get_records() - self.assertEqual(len(records[TEST_MODEL_NAME]), 3) - self.assertEqual(records[TEST_MODEL_NAME][0].device_uuid(), - "GPU-aaf4fea0") - self.assertEqual(records[TEST_MODEL_NAME][0].value(), 80.9) - self.assertEqual(records[TEST_MODEL_NAME][1].device_uuid(), - "GPU-aaf4fea1") - self.assertEqual(records[TEST_MODEL_NAME][1].value(), 90.1) - self.assertEqual(records[TEST_MODEL_NAME][2].device_uuid(), - "GPU-aaf4fea2") - self.assertEqual(records[TEST_MODEL_NAME][2].value(), 74.5) - - # Test GPU Power Usages + records = perf_analyzer.get_gpu_records() + self.assertEqual(len(records), 3) + self.assertEqual(records[0].device_uuid(), "GPU-aaf4fea0") + self.assertEqual(records[0].value(), 80.9) + self.assertEqual(records[1].device_uuid(), "GPU-aaf4fea1") + self.assertEqual(records[1].value(), 90.1) + self.assertEqual(records[2].device_uuid(), "GPU-aaf4fea2") + self.assertEqual(records[2].value(), 74.5) + + # Test GPU Power Usage gpu_metrics = [GPUPowerUsage] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', @@ -397,16 +403,14 @@ def test_run(self): 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(gpu_metrics) - records = perf_analyzer.get_records() - self.assertEqual(len(records[TEST_MODEL_NAME]), 2) - self.assertEqual(records[TEST_MODEL_NAME][0].device_uuid(), - "GPU-aaf4fea0") - self.assertEqual(records[TEST_MODEL_NAME][0].value(), 91.2) - self.assertEqual(records[TEST_MODEL_NAME][1].device_uuid(), - "GPU-aaf4fea1") - self.assertEqual(records[TEST_MODEL_NAME][1].value(), 100) + records = perf_analyzer.get_gpu_records() + self.assertEqual(len(records), 2) + self.assertEqual(records[0].device_uuid(), "GPU-aaf4fea0") + self.assertEqual(records[0].value(), 91.2) + self.assertEqual(records[1].device_uuid(), "GPU-aaf4fea1") + self.assertEqual(records[1].value(), 100) - # Test Max GPU Memory Usages + # Test GPU Memory Usage gpu_metrics = [GPUUsedMemory] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', @@ -414,30 +418,36 @@ def test_run(self): 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(gpu_metrics) - records = perf_analyzer.get_records() - self.assertEqual(len(records[TEST_MODEL_NAME]), 1) - self.assertEqual(records[TEST_MODEL_NAME][0].device_uuid(), - "GPU-aaf4fea0") - self.assertEqual(records[TEST_MODEL_NAME][0].value(), 1000) + records = perf_analyzer.get_gpu_records() + self.assertEqual(len(records), 2) + self.assertEqual(records[0].device_uuid(), "GPU-aaf4fea0") + self.assertEqual(records[0].value(), 1000) + self.assertEqual(records[1].device_uuid(), "GPU-aaf4fea1") + self.assertEqual(records[1].value(), 2000) - # Test Total GPU Memory - gpu_metrics = [GPUTotalMemory] + # Test Free GPU Memory (Must be measured with GPUUsedMemory) + # GPU a0 has 1500 total memory and 1000 used memory, so free == 500 + # GPU a1 has no value reported for total, so it is ignored + # GPU a2 has no value reported for used, so it is ignored + gpu_metrics = [GPUFreeMemory, GPUUsedMemory] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(gpu_metrics) - records = perf_analyzer.get_records() - self.assertEqual(len(records[TEST_MODEL_NAME]), 1) - self.assertEqual(records[TEST_MODEL_NAME][0].device_uuid(), - "GPU-aaf4fea0") - self.assertEqual(records[TEST_MODEL_NAME][0].value(), 1500) + records = perf_analyzer.get_gpu_records() + self.assertEqual(len(records), 3) + self.assertTrue(type(records[0]) == GPUUsedMemory) + self.assertTrue(type(records[1]) == GPUUsedMemory) + self.assertTrue(type(records[2]) == GPUFreeMemory) + self.assertEqual(records[2].device_uuid(), "GPU-aaf4fea0") + self.assertEqual(records[2].value(), 1500 - 1000) # # Test parsing for subset perf_metrics = [ PerfThroughput, PerfLatencyAvg, PerfLatencyP90, PerfLatencyP95, - PerfLatencyP99, GPUTotalMemory, GPUPowerUsage + PerfLatencyP99, GPUUtilization, GPUPowerUsage ] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', @@ -445,16 +455,19 @@ def test_run(self): 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) - records = perf_analyzer.get_records() - # GPUPowerUsage has 2 devices, so we have 8 (not 7) records - self.assertEqual(len(records[TEST_MODEL_NAME]), 8) + perf_records = perf_analyzer.get_perf_records() + gpu_records = perf_analyzer.get_gpu_records() + + self.assertEqual(len(perf_records[TEST_MODEL_NAME]), 5) + # GPUPowerUsage has 2 devices and GPUUtilization has 3 + self.assertEqual(len(gpu_records), 5) # Test no exceptions are raised when nothing can be parsed pa_csv_empty = "" perf_metrics = [ PerfThroughput, PerfClientSendRecv, PerfClientResponseWait, PerfServerQueue, PerfServerComputeInfer, PerfServerComputeInput, - PerfServerComputeOutput, GPUTotalMemory + PerfServerComputeOutput, GPUFreeMemory ] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_empty)), patch( @@ -464,11 +477,11 @@ def test_run(self): # Test case where PA returns blank values for some GPU metrics pa_csv_mock = """Concurrency,Inferences/Second,Client Send,Network+Server Send/Recv,Server Queue,Server Compute Input,Server Compute Infer,Server Compute Output,""" pa_csv_mock += """Client Recv,p50 latency,p90 latency,p95 latency,p99 latency,Avg latency,request/response,response wait,""" - pa_csv_mock += """Avg GPU Utilizations,Avg GPU Power Usages,Max GPU Memory Usages,Total GPU Memory Usages\n""" + pa_csv_mock += """Avg GPU Utilization,Avg GPU Power Usage,Max GPU Memory Usage,Total GPU Memory\n""" pa_csv_mock += """1,46.8,2,187,18,34,65,16,1,4600,4700,4800,4900,5000,3,314,""" pa_csv_mock += """,,,7:1500""" - # Test Max GPU Memory Usages + # Test Max GPU Memory gpu_metrics = [GPUUsedMemory] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', @@ -476,8 +489,8 @@ def test_run(self): 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(gpu_metrics) - records = perf_analyzer.get_records() - self.assertEqual(len(records[TEST_MODEL_NAME]), 0) + records = perf_analyzer.get_gpu_records() + self.assertEqual(len(records), 0) # Test exception handling self.perf_mock.set_perf_analyzer_return_code(1)