From 5fcf5124ca112aa0a5f17ab66bd1e8f06522739a Mon Sep 17 00:00:00 2001 From: dyastremsky <58150256+dyastremsky@users.noreply.github.com> Date: Wed, 6 Mar 2024 17:12:42 -0800 Subject: [PATCH] Add formatting for GenAi-PA report (#491) --- .../genai-pa/genai_pa/llm_metrics.py | 35 +++++++++++++++++++ .../perf_analyzer/genai-pa/genai_pa/main.py | 4 +-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py b/src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py index 80230d176..29badd843 100755 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py @@ -33,6 +33,8 @@ import numpy as np from genai_pa.utils import load_json +from rich.console import Console +from rich.table import Table # Silence tokenizer warning on import with contextlib.redirect_stdout(io.StringIO()) as stdout, contextlib.redirect_stderr( @@ -121,6 +123,38 @@ def __repr__(self): attr_strs = ",".join([f"{k}={v}" for k, v in self.__dict__.items()]) return f"Statistics({attr_strs})" + def _is_time_field(self, field: str): + time_fields = [ + "inter_token_latency", + "time_to_first_token", + "end_to_end_latency", + ] + return field in time_fields + + def pretty_print(self): + table = Table(title="PA LLM Metrics") + + table.add_column("Statistic", justify="right", style="cyan", no_wrap=True) + stats = ["avg", "min", "max", "p99", "p95", "p90", "p75", "p50", "p25"] + for stat in stats: + table.add_column(stat, justify="right", style="green") + + metrics = ["inter_token_latency", "time_to_first_token"] + for metric in metrics: + formatted_metric = metric.replace("_", " ").capitalize() + is_time_field = self._is_time_field(metric) + if is_time_field: + formatted_metric += " (ns)" + row_values = [formatted_metric] + + for stat in stats: + value = self.__dict__.get(f"{stat}_{metric}", -1) + row_values.append("{:,.0f}".format(value)) + table.add_row(*row_values) + + console = Console() + console.print(table) + class LLMProfileData: """A class that calculates and aggregates all the LLM performance statistics @@ -141,6 +175,7 @@ class LLMProfileData: >>> stats = pd.get_statistics(infer_mode="concurrency", level=10) >>> >>> print(stats) # output: Statistics(avg_time_to_first_token=...) + >>> stats.pretty_print() # Output: time_to_first_token_s: ... """ def __init__(self, filename: str, tokenizer: AutoTokenizer) -> None: diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/main.py b/src/c++/perf_analyzer/genai-pa/genai_pa/main.py index 3a9246c55..df4ca1bc0 100755 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/main.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/main.py @@ -76,8 +76,8 @@ def report_output(metrics: LLMProfileData, args): raise GenAiPAException( "Neither concurrency_range nor request_rate_range was found in args when reporting metrics" ) - # TODO: metrics reporter class that consumes Stats class for nicer formatting - print(metrics.get_statistics(infer_mode, int(load_level))) + stats = metrics.get_statistics(infer_mode, int(load_level)) + stats.pretty_print() # Separate function that can raise exceptions used for testing