Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve error visibility when printing metrics #234

Merged
merged 6 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions genai-perf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,7 @@ when `--output-tokens-mean` is provided. (default: `0`)
The seed used to generate random values. (default: `0`)

##### `--request-count <int>`
##### `--num-requests <int>`
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added aliases. While PA uses request-count, all other GAP args that use counts start with --num (most notably, --num-prompts).

We can decide for GA what arg name to use (if only one), but having this consistency would make it less confusing to be adding args. I often would type out the wrong one, so I imagine some users might too.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the previous name right?
An alias is fine, just having deja vu here.


The number of requests to use for measurement.
By default, the benchmark does not terminate based on request count.
Expand All @@ -532,6 +533,7 @@ being concatenated, the number of tokens in the final prompt may be off by one.
(default: `100`)

##### `--warmup-request-count <int>`
##### `--num-warmup-requests <int>`

The number of warmup requests to send before benchmarking. (default: `0`)

Expand Down
34 changes: 21 additions & 13 deletions genai-perf/genai_perf/export_data/console_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,16 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


from genai_perf.export_data import telemetry_data_exporter_util as telem_utils
from genai_perf.export_data.exporter_config import ExporterConfig
import genai_perf.logging as logging
from rich.console import Console
from rich.table import Table

from . import exporter_utils
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Switching to relative imports for conciseness. This will also make any future refactors easier, so that we don't need to update the path if files are reorganized.

from . import telemetry_data_exporter_util as telem_utils
from .exporter_config import ExporterConfig

logger = logging.getLogger(__name__)


class ConsoleExporter:
"""
Expand Down Expand Up @@ -76,32 +81,35 @@ def export(self) -> None:
)

def _construct_table(self, table: Table) -> None:

for metric in self._metrics.request_metrics:
if self._should_skip(metric.name):
continue

metric_str = metric.name.replace("_", " ").capitalize()
metric_str += f" ({metric.unit})" if metric.unit != "tokens" else ""
metric_str = exporter_utils.format_metric_name(metric.name, metric.unit)
row_values = [metric_str]

for stat in self.STAT_COLUMN_KEYS:
value = self._stats[metric.name].get(stat, None)
row_values.append(f"{value:,.2f}" if value else "N/A")
row_values.append(
exporter_utils.fetch_stat(self._stats, metric.name, stat)
)

table.add_row(*row_values)

for metric in self._metrics.system_metrics:
metric_str = metric.name.replace("_", " ").capitalize()
if metric.name == "request_goodput":
if not self._args.goodput:
continue
metric_str += f" ({metric.unit})" if metric.unit != "tokens" else ""
metric_str = exporter_utils.format_metric_name(metric.name, metric.unit)
if metric.name == "request_goodput" and not self._args.goodput:
continue

row_values = [metric_str]
for stat in self.STAT_COLUMN_KEYS:
if stat == "avg":
value = self._stats[metric.name]["avg"]
row_values.append(f"{value:,.2f}")
row_values.append(
exporter_utils.fetch_stat(self._stats, metric.name, "avg")
)
else:
row_values.append("N/A")

table.add_row(*row_values)

# (TMA-1976) Refactor this method as the csv exporter shares identical method.
Expand Down
26 changes: 13 additions & 13 deletions genai-perf/genai_perf/export_data/csv_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@
import csv

import genai_perf.logging as logging
from genai_perf.export_data import telemetry_data_exporter_util as telem_utils
from genai_perf.export_data.exporter_config import ExporterConfig

from . import exporter_utils
from . import telemetry_data_exporter_util as telem_utils
from .exporter_config import ExporterConfig

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -83,25 +85,23 @@ def _write_request_metrics(self, csv_writer) -> None:
if self._should_skip(metric.name):
continue

metric_str = metric.name.replace("_", " ").title()
metric_str += f" ({metric.unit})" if metric.unit != "tokens" else ""
metric_str = exporter_utils.format_metric_name(metric.name, metric.unit)
row_values = [metric_str]
for stat in self.REQUEST_METRICS_HEADER[1:]:
value = self._stats[metric.name].get(stat, None)
row_values.append(f"{value:,.2f}" if value else "N/A")
row_values.append(
exporter_utils.fetch_stat(self._stats, metric.name, stat)
)

csv_writer.writerow(row_values)

def _write_system_metrics(self, csv_writer) -> None:
csv_writer.writerow(self.SYSTEM_METRICS_HEADER)
for metric in self._metrics.system_metrics:
metric_str = metric.name.replace("_", " ").title()
metric_str += f" ({metric.unit})"
if metric.name == "request_goodput":
if not self._args.goodput:
continue
value = self._stats[metric.name]["avg"]
csv_writer.writerow([metric_str, f"{value:.2f}"])
metric_str = exporter_utils.format_metric_name(metric.name, metric.unit)
if metric.name == "request_goodput" and not self._args.goodput:
continue
value = exporter_utils.fetch_stat(self._stats, metric.name, "avg")
csv_writer.writerow([metric_str, exporter_utils.format_stat_value(value)])

def _should_skip(self, metric_name: str) -> bool:
if self._args.endpoint_type == "embeddings":
Expand Down
96 changes: 96 additions & 0 deletions genai-perf/genai_perf/export_data/exporter_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import logging
from typing import Any, Dict, Optional

logger = logging.getLogger(__name__)


def format_metric_name(name: str, unit: Optional[str]) -> str:
"""
Formats a metric name into a human-readable string with an optional unit.

Args:
name: The raw metric name with underscores.
unit: The unit of the metric (e.g., 'ms').

Returns:
The formatted metric name with the unit if provided.
"""
metric_str = name.replace("_", " ").title()
return f"{metric_str} ({unit})" if unit else metric_str


def format_stat_value(value: Any) -> str:
"""
Formats a statistic value for human-readable output.

Args:
value: The value to format. Supports int and float types.

Returns:
The formatted value as a string. If not a number, returns the string representation.
"""
return f"{value:,.2f}" if isinstance(value, (int, float)) else str(value)


def fetch_stat(
stats: Dict[str, Dict[str, float]],
metric_name: str,
stat: str,
) -> str:
"""
Fetches a statistic value for a metric.
Logs warnings for missing metrics or stats and returns 'N/A' if the value is missing.

Args:
stats: Dictionary containing statistics for metrics.
metric_name: The name of the metric.
stat: The statistic to fetch (e.g., 'avg', 'min', 'max').

Returns:
The formatted statistic value or 'N/A' if missing.
"""
if metric_name not in stats:
logger.error(f"Metric '{metric_name}' is missing in the provided statistics.")
return "N/A"

metric_stats = stats[metric_name]
if not isinstance(metric_stats, dict):
logger.error(
f"Expected statistics for metric '{metric_name}' to be a dictionary. Got: {type(metric_stats).__name__}."
)
return "N/A"

if stat not in metric_stats:
logger.error(
f"Statistic '{stat}' for metric '{metric_name}' is missing. "
f"Available stats: {list(metric_stats.keys())}."
)
return "N/A"

return format_stat_value(metric_stats[stat])
2 changes: 2 additions & 0 deletions genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,7 @@ def _add_input_args(parser):

input_group.add_argument(
"--request-count",
"--num-requests",
type=int,
default=ic.DEFAULT_REQUEST_COUNT,
required=False,
Expand Down Expand Up @@ -877,6 +878,7 @@ def _add_input_args(parser):

input_group.add_argument(
"--warmup-request-count",
"--num-warmup-requests",
type=int,
default=ic.DEFAULT_WARMUP_REQUEST_COUNT,
required=False,
Expand Down
2 changes: 2 additions & 0 deletions genai-perf/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,9 @@ def test_help_version_arguments_output_and_exit(
),
(["--random-seed", "8"], {"random_seed": 8}),
(["--request-count", "100"], {"request_count": 100}),
(["--num-requests", "100"], {"request_count": 100}),
(["--warmup-request-count", "100"], {"warmup_request_count": 100}),
(["--num-warmup-requests", "100"], {"warmup_request_count": 100}),
(["--request-rate", "9.0"], {"request_rate": 9.0}),
(["-s", "99.5"], {"stability_percentage": 99.5}),
(["--service-kind", "triton"], {"service_kind": "triton"}),
Expand Down
Loading
Loading