Skip to content

Commit

Permalink
Add formatting for GenAi-PA report (#491)
Browse files Browse the repository at this point in the history
  • Loading branch information
dyastremsky authored and debermudez committed Mar 12, 2024
1 parent 9e11fc5 commit 5fcf512
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 2 deletions.
35 changes: 35 additions & 0 deletions src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@

import numpy as np
from genai_pa.utils import load_json
from rich.console import Console
from rich.table import Table

# Silence tokenizer warning on import
with contextlib.redirect_stdout(io.StringIO()) as stdout, contextlib.redirect_stderr(
Expand Down Expand Up @@ -121,6 +123,38 @@ def __repr__(self):
attr_strs = ",".join([f"{k}={v}" for k, v in self.__dict__.items()])
return f"Statistics({attr_strs})"

def _is_time_field(self, field: str):
time_fields = [
"inter_token_latency",
"time_to_first_token",
"end_to_end_latency",
]
return field in time_fields

def pretty_print(self):
table = Table(title="PA LLM Metrics")

table.add_column("Statistic", justify="right", style="cyan", no_wrap=True)
stats = ["avg", "min", "max", "p99", "p95", "p90", "p75", "p50", "p25"]
for stat in stats:
table.add_column(stat, justify="right", style="green")

metrics = ["inter_token_latency", "time_to_first_token"]
for metric in metrics:
formatted_metric = metric.replace("_", " ").capitalize()
is_time_field = self._is_time_field(metric)
if is_time_field:
formatted_metric += " (ns)"
row_values = [formatted_metric]

for stat in stats:
value = self.__dict__.get(f"{stat}_{metric}", -1)
row_values.append("{:,.0f}".format(value))
table.add_row(*row_values)

console = Console()
console.print(table)


class LLMProfileData:
"""A class that calculates and aggregates all the LLM performance statistics
Expand All @@ -141,6 +175,7 @@ class LLMProfileData:
>>> stats = pd.get_statistics(infer_mode="concurrency", level=10)
>>>
>>> print(stats) # output: Statistics(avg_time_to_first_token=...)
>>> stats.pretty_print() # Output: time_to_first_token_s: ...
"""

def __init__(self, filename: str, tokenizer: AutoTokenizer) -> None:
Expand Down
4 changes: 2 additions & 2 deletions src/c++/perf_analyzer/genai-pa/genai_pa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ def report_output(metrics: LLMProfileData, args):
raise GenAiPAException(
"Neither concurrency_range nor request_rate_range was found in args when reporting metrics"
)
# TODO: metrics reporter class that consumes Stats class for nicer formatting
print(metrics.get_statistics(infer_mode, int(load_level)))
stats = metrics.get_statistics(infer_mode, int(load_level))
stats.pretty_print()


# Separate function that can raise exceptions used for testing
Expand Down

0 comments on commit 5fcf512

Please sign in to comment.