diff --git a/genai-perf/README.md b/genai-perf/README.md index 8fc93ac2..7cd1edf4 100644 --- a/genai-perf/README.md +++ b/genai-perf/README.md @@ -508,6 +508,7 @@ when `--output-tokens-mean` is provided. (default: `0`) The seed used to generate random values. (default: `0`) ##### `--request-count ` +##### `--num-requests ` The number of requests to use for measurement. By default, the benchmark does not terminate based on request count. @@ -532,6 +533,7 @@ being concatenated, the number of tokens in the final prompt may be off by one. (default: `100`) ##### `--warmup-request-count ` +##### `--num-warmup-requests ` The number of warmup requests to send before benchmarking. (default: `0`) diff --git a/genai-perf/genai_perf/export_data/console_exporter.py b/genai-perf/genai_perf/export_data/console_exporter.py index 74e1868d..cf22f045 100644 --- a/genai-perf/genai_perf/export_data/console_exporter.py +++ b/genai-perf/genai_perf/export_data/console_exporter.py @@ -25,11 +25,16 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from genai_perf.export_data import telemetry_data_exporter_util as telem_utils -from genai_perf.export_data.exporter_config import ExporterConfig +import genai_perf.logging as logging from rich.console import Console from rich.table import Table +from . import exporter_utils +from . import telemetry_data_exporter_util as telem_utils +from .exporter_config import ExporterConfig + +logger = logging.getLogger(__name__) + class ConsoleExporter: """ @@ -76,32 +81,35 @@ def export(self) -> None: ) def _construct_table(self, table: Table) -> None: + for metric in self._metrics.request_metrics: if self._should_skip(metric.name): continue - metric_str = metric.name.replace("_", " ").capitalize() - metric_str += f" ({metric.unit})" if metric.unit != "tokens" else "" + metric_str = exporter_utils.format_metric_name(metric.name, metric.unit) row_values = [metric_str] + for stat in self.STAT_COLUMN_KEYS: - value = self._stats[metric.name].get(stat, None) - row_values.append(f"{value:,.2f}" if value else "N/A") + row_values.append( + exporter_utils.fetch_stat(self._stats, metric.name, stat) + ) table.add_row(*row_values) for metric in self._metrics.system_metrics: - metric_str = metric.name.replace("_", " ").capitalize() - if metric.name == "request_goodput": - if not self._args.goodput: - continue - metric_str += f" ({metric.unit})" if metric.unit != "tokens" else "" + metric_str = exporter_utils.format_metric_name(metric.name, metric.unit) + if metric.name == "request_goodput" and not self._args.goodput: + continue + row_values = [metric_str] for stat in self.STAT_COLUMN_KEYS: if stat == "avg": - value = self._stats[metric.name]["avg"] - row_values.append(f"{value:,.2f}") + row_values.append( + exporter_utils.fetch_stat(self._stats, metric.name, "avg") + ) else: row_values.append("N/A") + table.add_row(*row_values) # (TMA-1976) Refactor this method as the csv exporter shares identical method. diff --git a/genai-perf/genai_perf/export_data/csv_exporter.py b/genai-perf/genai_perf/export_data/csv_exporter.py index 2fc98ac1..db46648d 100644 --- a/genai-perf/genai_perf/export_data/csv_exporter.py +++ b/genai-perf/genai_perf/export_data/csv_exporter.py @@ -28,8 +28,10 @@ import csv import genai_perf.logging as logging -from genai_perf.export_data import telemetry_data_exporter_util as telem_utils -from genai_perf.export_data.exporter_config import ExporterConfig + +from . import exporter_utils +from . import telemetry_data_exporter_util as telem_utils +from .exporter_config import ExporterConfig logger = logging.getLogger(__name__) @@ -83,25 +85,23 @@ def _write_request_metrics(self, csv_writer) -> None: if self._should_skip(metric.name): continue - metric_str = metric.name.replace("_", " ").title() - metric_str += f" ({metric.unit})" if metric.unit != "tokens" else "" + metric_str = exporter_utils.format_metric_name(metric.name, metric.unit) row_values = [metric_str] for stat in self.REQUEST_METRICS_HEADER[1:]: - value = self._stats[metric.name].get(stat, None) - row_values.append(f"{value:,.2f}" if value else "N/A") + row_values.append( + exporter_utils.fetch_stat(self._stats, metric.name, stat) + ) csv_writer.writerow(row_values) def _write_system_metrics(self, csv_writer) -> None: csv_writer.writerow(self.SYSTEM_METRICS_HEADER) for metric in self._metrics.system_metrics: - metric_str = metric.name.replace("_", " ").title() - metric_str += f" ({metric.unit})" - if metric.name == "request_goodput": - if not self._args.goodput: - continue - value = self._stats[metric.name]["avg"] - csv_writer.writerow([metric_str, f"{value:.2f}"]) + metric_str = exporter_utils.format_metric_name(metric.name, metric.unit) + if metric.name == "request_goodput" and not self._args.goodput: + continue + value = exporter_utils.fetch_stat(self._stats, metric.name, "avg") + csv_writer.writerow([metric_str, exporter_utils.format_stat_value(value)]) def _should_skip(self, metric_name: str) -> bool: if self._args.endpoint_type == "embeddings": diff --git a/genai-perf/genai_perf/export_data/exporter_utils.py b/genai-perf/genai_perf/export_data/exporter_utils.py new file mode 100644 index 00000000..c86ebfea --- /dev/null +++ b/genai-perf/genai_perf/export_data/exporter_utils.py @@ -0,0 +1,96 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import logging +from typing import Any, Dict, Optional + +logger = logging.getLogger(__name__) + + +def format_metric_name(name: str, unit: Optional[str]) -> str: + """ + Formats a metric name into a human-readable string with an optional unit. + + Args: + name: The raw metric name with underscores. + unit: The unit of the metric (e.g., 'ms'). + + Returns: + The formatted metric name with the unit if provided. + """ + metric_str = name.replace("_", " ").title() + return f"{metric_str} ({unit})" if unit else metric_str + + +def format_stat_value(value: Any) -> str: + """ + Formats a statistic value for human-readable output. + + Args: + value: The value to format. Supports int and float types. + + Returns: + The formatted value as a string. If not a number, returns the string representation. + """ + return f"{value:,.2f}" if isinstance(value, (int, float)) else str(value) + + +def fetch_stat( + stats: Dict[str, Dict[str, float]], + metric_name: str, + stat: str, +) -> str: + """ + Fetches a statistic value for a metric. + Logs warnings for missing metrics or stats and returns 'N/A' if the value is missing. + + Args: + stats: Dictionary containing statistics for metrics. + metric_name: The name of the metric. + stat: The statistic to fetch (e.g., 'avg', 'min', 'max'). + + Returns: + The formatted statistic value or 'N/A' if missing. + """ + if metric_name not in stats: + logger.error(f"Metric '{metric_name}' is missing in the provided statistics.") + return "N/A" + + metric_stats = stats[metric_name] + if not isinstance(metric_stats, dict): + logger.error( + f"Expected statistics for metric '{metric_name}' to be a dictionary. Got: {type(metric_stats).__name__}." + ) + return "N/A" + + if stat not in metric_stats: + logger.error( + f"Statistic '{stat}' for metric '{metric_name}' is missing. " + f"Available stats: {list(metric_stats.keys())}." + ) + return "N/A" + + return format_stat_value(metric_stats[stat]) diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index c786e42b..58a63573 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -839,6 +839,7 @@ def _add_input_args(parser): input_group.add_argument( "--request-count", + "--num-requests", type=int, default=ic.DEFAULT_REQUEST_COUNT, required=False, @@ -877,6 +878,7 @@ def _add_input_args(parser): input_group.add_argument( "--warmup-request-count", + "--num-warmup-requests", type=int, default=ic.DEFAULT_WARMUP_REQUEST_COUNT, required=False, diff --git a/genai-perf/tests/test_cli.py b/genai-perf/tests/test_cli.py index 8655435d..8adc52cf 100644 --- a/genai-perf/tests/test_cli.py +++ b/genai-perf/tests/test_cli.py @@ -227,7 +227,9 @@ def test_help_version_arguments_output_and_exit( ), (["--random-seed", "8"], {"random_seed": 8}), (["--request-count", "100"], {"request_count": 100}), + (["--num-requests", "100"], {"request_count": 100}), (["--warmup-request-count", "100"], {"warmup_request_count": 100}), + (["--num-warmup-requests", "100"], {"warmup_request_count": 100}), (["--request-rate", "9.0"], {"request_rate": 9.0}), (["-s", "99.5"], {"stability_percentage": 99.5}), (["--service-kind", "triton"], {"service_kind": "triton"}), diff --git a/genai-perf/tests/test_exporters/test_console_exporter.py b/genai-perf/tests/test_exporters/test_console_exporter.py index 37de151c..e1680c9d 100644 --- a/genai-perf/tests/test_exporters/test_console_exporter.py +++ b/genai-perf/tests/test_exporters/test_console_exporter.py @@ -44,6 +44,38 @@ class TestConsoleExporter: + @pytest.fixture + def exporter_config(self, monkeypatch): + argv = [ + "genai-perf", + "profile", + "-m", + "model_name", + "--service-kind", + "openai", + "--endpoint-type", + "chat", + ] + monkeypatch.setattr("sys.argv", argv) + args, _ = parser.parse_args() + + metrics = LLMMetrics( + request_throughputs=[123], + request_latencies=[4, 5, 6], + time_to_first_tokens=[7, 8, 9], + time_to_second_tokens=[1, 2, 3], + inter_token_latencies=[10, 11, 12], + output_token_throughputs=[456], + output_sequence_lengths=[1, 2, 3], + input_sequence_lengths=[5, 6, 7], + ) + stats = Statistics(metrics=metrics) + assert isinstance(stats.metrics, Metrics) + config = create_default_exporter_config( + stats=stats.stats_dict, metrics=stats.metrics, args=args + ) + return config + def test_streaming_llm_output(self, monkeypatch, capsys) -> None: argv = [ "genai-perf", @@ -83,15 +115,15 @@ def test_streaming_llm_output(self, monkeypatch, capsys) -> None: "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━━┳━━━━━━┓\n" "┃ Statistic ┃ avg ┃ min ┃ max ┃ p99 ┃ p90 ┃ p75 ┃\n" "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━━╇━━━━━━┩\n" - "│ Time to first token (ms) │ 8.00 │ 7.00 │ 9.00 │ 8.98 │ 8.80 │ 8.50 │\n" - "│ Time to second token (ms) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" - "│ Request latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" - "│ Inter token latency (ms) │ 11.… │ 10.… │ 12.… │ 11.… │ 11.80 │ 11.… │\n" - "│ Output sequence length │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" - "│ Input sequence length │ 6.00 │ 5.00 │ 7.00 │ 6.98 │ 6.80 │ 6.50 │\n" - "│ Output token throughput (per sec) │ 456… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request throughput (per sec) │ 123… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Time To First Token (ms) │ 8.00 │ 7.00 │ 9.00 │ 8.98 │ 8.80 │ 8.50 │\n" + "│ Time To Second Token (ms) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" + "│ Request Latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" + "│ Inter Token Latency (ms) │ 11.… │ 10.… │ 12.… │ 11.… │ 11.80 │ 11.… │\n" + "│ Output Sequence Length (tokens) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" + "│ Input Sequence Length (tokens) │ 6.00 │ 5.00 │ 7.00 │ 6.98 │ 6.80 │ 6.50 │\n" + "│ Output Token Throughput (per sec) │ 456… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Throughput (per sec) │ 123… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" "└───────────────────────────────────┴──────┴──────┴──────┴──────┴───────┴──────┘\n" ) @@ -138,12 +170,12 @@ def test_nonstreaming_llm_output(self, monkeypatch, capsys) -> None: "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┓\n" "┃ Statistic ┃ avg ┃ min ┃ max ┃ p99 ┃ p90 ┃ p75 ┃\n" "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━┩\n" - "│ Request latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" - "│ Output sequence length │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" - "│ Input sequence length │ 6.00 │ 5.00 │ 7.00 │ 6.98 │ 6.80 │ 6.50 │\n" - "│ Output token throughput (per sec) │ 456.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request throughput (per sec) │ 123.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" + "│ Output Sequence Length (tokens) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" + "│ Input Sequence Length (tokens) │ 6.00 │ 5.00 │ 7.00 │ 6.98 │ 6.80 │ 6.50 │\n" + "│ Output Token Throughput (per sec) │ 456.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Throughput (per sec) │ 123.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" "└───────────────────────────────────┴───────┴──────┴──────┴──────┴──────┴──────┘\n" ) @@ -183,9 +215,9 @@ def test_embedding_output(self, monkeypatch, capsys) -> None: "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┓\n" "┃ Statistic ┃ avg ┃ min ┃ max ┃ p99 ┃ p90 ┃ p75 ┃\n" "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━┩\n" - "│ Request latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" - "│ Request throughput (per sec) │ 123.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" + "│ Request Throughput (per sec) │ 123.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" "└──────────────────────────────┴────────┴──────┴──────┴──────┴──────┴──────┘\n" ) @@ -235,16 +267,16 @@ def test_valid_goodput(self, monkeypatch, capsys) -> None: "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━━┳━━━━━━┓\n" "┃ Statistic ┃ avg ┃ min ┃ max ┃ p99 ┃ p90 ┃ p75 ┃\n" "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━━╇━━━━━━┩\n" - "│ Time to first token (ms) │ 8.00 │ 7.00 │ 9.00 │ 8.98 │ 8.80 │ 8.50 │\n" - "│ Time to second token (ms) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" - "│ Request latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" - "│ Inter token latency (ms) │ 11.… │ 10.… │ 12.… │ 11.… │ 11.80 │ 11.… │\n" - "│ Output sequence length │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" - "│ Input sequence length │ 6.00 │ 5.00 │ 7.00 │ 6.98 │ 6.80 │ 6.50 │\n" - "│ Output token throughput (per sec) │ 456… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request throughput (per sec) │ 123… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request goodput (per sec) │ 100… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Time To First Token (ms) │ 8.00 │ 7.00 │ 9.00 │ 8.98 │ 8.80 │ 8.50 │\n" + "│ Time To Second Token (ms) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" + "│ Request Latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" + "│ Inter Token Latency (ms) │ 11.… │ 10.… │ 12.… │ 11.… │ 11.80 │ 11.… │\n" + "│ Output Sequence Length (tokens) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" + "│ Input Sequence Length (tokens) │ 6.00 │ 5.00 │ 7.00 │ 6.98 │ 6.80 │ 6.50 │\n" + "│ Output Token Throughput (per sec) │ 456… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Throughput (per sec) │ 123… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Goodput (per sec) │ 100… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" "└───────────────────────────────────┴──────┴──────┴──────┴──────┴───────┴──────┘\n" ) returned_data = capsys.readouterr().out @@ -294,16 +326,16 @@ def test_invalid_goodput_output(self, monkeypatch, capsys) -> None: "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━━┳━━━━━━┓\n" "┃ Statistic ┃ avg ┃ min ┃ max ┃ p99 ┃ p90 ┃ p75 ┃\n" "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━━╇━━━━━━┩\n" - "│ Time to first token (ms) │ 8.00 │ 7.00 │ 9.00 │ 8.98 │ 8.80 │ 8.50 │\n" - "│ Time to second token (ms) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" - "│ Request latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" - "│ Inter token latency (ms) │ 11.… │ 10.… │ 12.… │ 11.… │ 11.80 │ 11.… │\n" - "│ Output sequence length │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" - "│ Input sequence length │ 6.00 │ 5.00 │ 7.00 │ 6.98 │ 6.80 │ 6.50 │\n" - "│ Output token throughput (per sec) │ 456… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request throughput (per sec) │ 123… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request goodput (per sec) │ -1.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Time To First Token (ms) │ 8.00 │ 7.00 │ 9.00 │ 8.98 │ 8.80 │ 8.50 │\n" + "│ Time To Second Token (ms) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" + "│ Request Latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" + "│ Inter Token Latency (ms) │ 11.… │ 10.… │ 12.… │ 11.… │ 11.80 │ 11.… │\n" + "│ Output Sequence Length (tokens) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" + "│ Input Sequence Length (tokens) │ 6.00 │ 5.00 │ 7.00 │ 6.98 │ 6.80 │ 6.50 │\n" + "│ Output Token Throughput (per sec) │ 456… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Throughput (per sec) │ 123… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Goodput (per sec) │ -1.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" "└───────────────────────────────────┴──────┴──────┴──────┴──────┴───────┴──────┘\n" ) returned_data = capsys.readouterr().out @@ -411,15 +443,15 @@ def test_valid_telemetry_verbose(self, monkeypatch, capsys) -> None: "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━━┳━━━━━━┓\n" "┃ Statistic ┃ avg ┃ min ┃ max ┃ p99 ┃ p90 ┃ p75 ┃\n" "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━━╇━━━━━━┩\n" - "│ Time to first token (ms) │ 8.00 │ 7.00 │ 9.00 │ 8.98 │ 8.80 │ 8.50 │\n" - "│ Time to second token (ms) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" - "│ Request latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" - "│ Inter token latency (ms) │ 11.… │ 10.… │ 12.… │ 11.… │ 11.80 │ 11.… │\n" - "│ Output sequence length │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" - "│ Input sequence length │ 6.00 │ 5.00 │ 7.00 │ 6.98 │ 6.80 │ 6.50 │\n" - "│ Output token throughput (per sec) │ 456… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request throughput (per sec) │ 123… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Time To First Token (ms) │ 8.00 │ 7.00 │ 9.00 │ 8.98 │ 8.80 │ 8.50 │\n" + "│ Time To Second Token (ms) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" + "│ Request Latency (ms) │ 5.00 │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" + "│ Inter Token Latency (ms) │ 11.… │ 10.… │ 12.… │ 11.… │ 11.80 │ 11.… │\n" + "│ Output Sequence Length (tokens) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" + "│ Input Sequence Length (tokens) │ 6.00 │ 5.00 │ 7.00 │ 6.98 │ 6.80 │ 6.50 │\n" + "│ Output Token Throughput (per sec) │ 456… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Throughput (per sec) │ 123… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" "└───────────────────────────────────┴──────┴──────┴──────┴──────┴───────┴──────┘\n" " NVIDIA GenAI-Perf | Power Metrics \n" "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n" @@ -522,14 +554,89 @@ def test_missing_data(self, monkeypatch, capsys) -> None: "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┓\n" "┃ Statistic ┃ avg ┃ min ┃ max ┃ p99 ┃ p90 ┃ p75 ┃\n" "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━┩\n" - "│ Request latency (ms) │ N/A │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" - "│ Output sequence length │ 2.00 │ 1.00 │ N/A │ 2.98 │ 2.80 │ 2.50 │\n" - "│ Input sequence length │ N/A │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Output token throughput (per sec) │ 456.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request throughput (per sec) │ 123.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" - "│ Request count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Latency (ms) │ N/A │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" + "│ Output Sequence Length (tokens) │ 2.00 │ 1.00 │ N/A │ 2.98 │ 2.80 │ 2.50 │\n" + "│ Input Sequence Length (tokens) │ N/A │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Output Token Throughput (per sec) │ 456.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Throughput (per sec) │ 123.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "└───────────────────────────────────┴───────┴──────┴──────┴──────┴──────┴──────┘\n" + ) + + returned_data = capsys.readouterr().out + assert returned_data == expected_content + + @patch("genai_perf.export_data.exporter_utils.logger") + def test_missing_statistics(self, mock_logger, exporter_config, capsys): + """ + Test behavior when specific statistics are missing from the stats dictionary. + """ + # Remove specific statistics to simulate missing data + del exporter_config.stats["request_latency"]["avg"] + del exporter_config.stats["output_sequence_length"]["max"] + + exporter = ConsoleExporter(exporter_config) + exporter.export() + + returned_data = capsys.readouterr().out + + mock_logger.error.assert_any_call( + "Statistic 'avg' for metric 'request_latency' is missing. " + "Available stats: ['unit', 'p25', 'p50', 'p75', 'p90', 'p95', 'p99', 'min', 'max', 'std']." + ) + mock_logger.error.assert_any_call( + "Statistic 'max' for metric 'output_sequence_length' is missing. " + "Available stats: ['unit', 'avg', 'p25', 'p50', 'p75', 'p90', 'p95', 'p99', 'min', 'std']." + ) + + # Validate output reflects missing statistics as 'N/A' + expected_output = ( + " NVIDIA GenAI-Perf | LLM Metrics \n" + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┓\n" + "┃ Statistic ┃ avg ┃ min ┃ max ┃ p99 ┃ p90 ┃ p75 ┃\n" + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━┩\n" + "│ Request Latency (ms) │ N/A │ 4.00 │ 6.00 │ 5.98 │ 5.80 │ 5.50 │\n" + "│ Output Sequence Length (tokens) │ 2.00 │ 1.00 │ N/A │ 2.98 │ 2.80 │ 2.50 │\n" + "│ Input Sequence Length (tokens) │ 6.00 │ 5.00 │ 7.00 │ 6.98 │ 6.80 │ 6.50 │\n" + "│ Output Token Throughput (per sec) │ 456.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Throughput (per sec) │ 123.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" "└───────────────────────────────────┴───────┴──────┴──────┴──────┴──────┴──────┘\n" ) + assert returned_data == expected_output + + @patch("genai_perf.export_data.exporter_utils.logger") + def test_invalid_stat_structure(self, mock_logger, exporter_config, capsys): + """ + Test behavior when the stats structure is invalid. + """ + # Simulate an invalid stats structure + exporter_config.stats["request_latency"] = "invalid_structure" + + exporter = ConsoleExporter(exporter_config) + exporter.export() + returned_data = capsys.readouterr().out + + # Check that the invalid structure is logged + mock_logger.error.assert_any_call( + "Expected statistics for metric 'request_latency' to be a dictionary. Got: str." + ) + + # Validate the output reflects invalid stats as 'N/A' + expected_content = ( + " NVIDIA GenAI-Perf | LLM Metrics \n" + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┳━━━━━━┓\n" + "┃ Statistic ┃ avg ┃ min ┃ max ┃ p99 ┃ p90 ┃ p75 ┃\n" + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━╇━━━━━━┩\n" + "│ Request Latency (ms) │ N/A │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Output Sequence Length (tokens) │ 2.00 │ 1.00 │ 3.00 │ 2.98 │ 2.80 │ 2.50 │\n" + "│ Input Sequence Length (tokens) │ 6.00 │ 5.00 │ 7.00 │ 6.98 │ 6.80 │ 6.50 │\n" + "│ Output Token Throughput (per sec) │ 456.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Throughput (per sec) │ 123.… │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "│ Request Count (count) │ 3.00 │ N/A │ N/A │ N/A │ N/A │ N/A │\n" + "└───────────────────────────────────┴───────┴──────┴──────┴──────┴──────┴──────┘\n" + ) + assert returned_data == expected_content diff --git a/genai-perf/tests/test_exporters/test_csv_exporter.py b/genai-perf/tests/test_exporters/test_csv_exporter.py index c9b0dd3b..752cc2d1 100644 --- a/genai-perf/tests/test_exporters/test_csv_exporter.py +++ b/genai-perf/tests/test_exporters/test_csv_exporter.py @@ -26,13 +26,12 @@ import os from io import StringIO -from pathlib import Path from typing import Any, List, Tuple +from unittest.mock import patch import pytest from genai_perf import parser from genai_perf.export_data.csv_exporter import CsvExporter -from genai_perf.export_data.exporter_config import ExporterConfig from genai_perf.metrics import ( LLMMetrics, Metrics, @@ -119,8 +118,8 @@ def test_streaming_llm_csv_output( "Time To Second Token (ms),2.00,1.00,3.00,2.98,2.90,2.80,2.50,2.00,1.50\r\n", "Request Latency (ms),5.00,4.00,6.00,5.98,5.90,5.80,5.50,5.00,4.50\r\n", "Inter Token Latency (ms),11.00,10.00,12.00,11.98,11.90,11.80,11.50,11.00,10.50\r\n", - "Output Sequence Length,2.00,1.00,3.00,2.98,2.90,2.80,2.50,2.00,1.50\r\n", - "Input Sequence Length,6.00,5.00,7.00,6.98,6.90,6.80,6.50,6.00,5.50\r\n", + "Output Sequence Length (tokens),2.00,1.00,3.00,2.98,2.90,2.80,2.50,2.00,1.50\r\n", + "Input Sequence Length (tokens),6.00,5.00,7.00,6.98,6.90,6.80,6.50,6.00,5.50\r\n", "\r\n", "Metric,Value\r\n", "Output Token Throughput (per sec),456.00\r\n", @@ -172,8 +171,8 @@ def test_nonstreaming_llm_csv_output( expected_content = [ "Metric,avg,min,max,p99,p95,p90,p75,p50,p25\r\n", "Request Latency (ms),5.00,4.00,6.00,5.98,5.90,5.80,5.50,5.00,4.50\r\n", - "Output Sequence Length,2.00,1.00,3.00,2.98,2.90,2.80,2.50,2.00,1.50\r\n", - "Input Sequence Length,6.00,5.00,7.00,6.98,6.90,6.80,6.50,6.00,5.50\r\n", + "Output Sequence Length (tokens),2.00,1.00,3.00,2.98,2.90,2.80,2.50,2.00,1.50\r\n", + "Input Sequence Length (tokens),6.00,5.00,7.00,6.98,6.90,6.80,6.50,6.00,5.50\r\n", "\r\n", "Metric,Value\r\n", "Output Token Throughput (per sec),456.00\r\n", @@ -374,8 +373,8 @@ def test_triton_telemetry_output( "Time To Second Token (ms),2.00,1.00,3.00,2.98,2.90,2.80,2.50,2.00,1.50\r\n", "Request Latency (ms),5.00,4.00,6.00,5.98,5.90,5.80,5.50,5.00,4.50\r\n", "Inter Token Latency (ms),11.00,10.00,12.00,11.98,11.90,11.80,11.50,11.00,10.50\r\n", - "Output Sequence Length,2.00,1.00,3.00,2.98,2.90,2.80,2.50,2.00,1.50\r\n", - "Input Sequence Length,6.00,5.00,7.00,6.98,6.90,6.80,6.50,6.00,5.50\r\n", + "Output Sequence Length (tokens),2.00,1.00,3.00,2.98,2.90,2.80,2.50,2.00,1.50\r\n", + "Input Sequence Length (tokens),6.00,5.00,7.00,6.98,6.90,6.80,6.50,6.00,5.50\r\n", "\r\n", "Metric,Value\r\n", "Output Token Throughput (per sec),456.00\r\n", @@ -402,11 +401,16 @@ def test_triton_telemetry_output( assert returned_data == expected_content + @patch("genai_perf.export_data.exporter_utils.logger") def test_missing_data( - self, monkeypatch, mock_read_write: pytest.MonkeyPatch, llm_metrics: LLMMetrics + self, + mock_logger, + monkeypatch, + mock_read_write: pytest.MonkeyPatch, + llm_metrics: LLMMetrics, ) -> None: """ - Test if missing data does not throw an error and are marked as "N/A". + Test if missing data do not throw an error and are marked as "N/A". """ argv = [ "genai-perf", @@ -438,12 +442,23 @@ def test_missing_data( exporter = CsvExporter(config) exporter.export() + mock_logger.error.assert_any_call( + "Statistic 'avg' for metric 'request_latency' is missing. " + "Available stats: ['unit', 'p25', 'p50', 'p75', 'p90', 'p95', 'p99', 'min', 'max', 'std']." + ) + mock_logger.error.assert_any_call( + "Statistic 'max' for metric 'output_sequence_length' is missing. " + "Available stats: ['unit', 'avg', 'p25', 'p50', 'p75', 'p90', 'p95', 'p99', 'min', 'std']." + ) + mock_logger.error.assert_any_call( + "Metric 'input_sequence_length' is missing in the provided statistics." + ) expected_filename = f"custom_export_genai_perf.csv" expected_content = [ "Metric,avg,min,max,p99,p95,p90,p75,p50,p25\r\n", "Request Latency (ms),N/A,4.00,6.00,5.98,5.90,5.80,5.50,5.00,4.50\r\n", - "Output Sequence Length,2.00,1.00,N/A,2.98,2.90,2.80,2.50,2.00,1.50\r\n", - "Input Sequence Length,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A\r\n", + "Output Sequence Length (tokens),2.00,1.00,N/A,2.98,2.90,2.80,2.50,2.00,1.50\r\n", + "Input Sequence Length (tokens),N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A\r\n", "\r\n", "Metric,Value\r\n", "Output Token Throughput (per sec),456.00\r\n", diff --git a/genai-perf/tests/test_exporters/test_json_exporter.py b/genai-perf/tests/test_exporters/test_json_exporter.py index 02fe0b58..62d40dcb 100644 --- a/genai-perf/tests/test_exporters/test_json_exporter.py +++ b/genai-perf/tests/test_exporters/test_json_exporter.py @@ -275,7 +275,7 @@ def mock_read_write(self, monkeypatch: pytest.MonkeyPatch) -> List[Tuple[str, st def custom_open(filename, *args, **kwargs): def write(self: Any, content: str) -> int: - print(f"Writing to {filename}") + print(f"Writing to {filename}") # To help with debugging failures written_data.append((str(filename), content)) return len(content) diff --git a/templates/genai-perf-templates/README_template b/templates/genai-perf-templates/README_template index 610990c5..8b085b27 100644 --- a/templates/genai-perf-templates/README_template +++ b/templates/genai-perf-templates/README_template @@ -508,6 +508,7 @@ when `--output-tokens-mean` is provided. (default: `0`) The seed used to generate random values. (default: `0`) ##### `--request-count ` +##### `--num-requests ` The number of requests to use for measurement. By default, the benchmark does not terminate based on request count. @@ -532,6 +533,7 @@ being concatenated, the number of tokens in the final prompt may be off by one. (default: `100`) ##### `--warmup-request-count ` +##### `--num-warmup-requests ` The number of warmup requests to send before benchmarking. (default: `0`)