diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py b/src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py index a2c3c794b..55e4dd9a3 100755 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import contextlib +import csv import io import json from dataclasses import dataclass @@ -47,6 +48,26 @@ class Metrics: """A base class for all the metrics class that contains common metrics.""" + metric_labels = [ + "time_to_first_token", + "inter_token_latency", + "request_latency", + "output_token_throughput", + "request_throughput", + "num_output_token", + ] + + time_fields = [ + "inter_token_latency", + "time_to_first_token", + "request_latency", + ] + + throughput_fields = [ + "request_throughput", + "output_token_throughput", + ] + def __init__( self, request_throughputs: list[float] = [], @@ -152,38 +173,81 @@ def __repr__(self): attr_strs = ",".join([f"{k}={v}" for k, v in self.__dict__.items()]) return f"Statistics({attr_strs})" + def _is_throughput_field(self, field: str): + return field in Metrics.throughput_fields + def _is_time_field(self, field: str): - time_fields = [ - "inter_token_latency", - "time_to_first_token", - "end_to_end_latency", - ] - return field in time_fields + return field in Metrics.time_fields def pretty_print(self): table = Table(title="PA LLM Metrics") table.add_column("Statistic", justify="right", style="cyan", no_wrap=True) - stats = ["avg", "min", "max", "p99", "p95", "p90", "p75", "p50", "p25"] + stats = ["avg", "min", "max", "p99", "p90", "p75"] for stat in stats: table.add_column(stat, justify="right", style="green") - metrics = ["inter_token_latency", "time_to_first_token"] - for metric in metrics: + for metric in Metrics.metric_labels: formatted_metric = metric.replace("_", " ").capitalize() - is_time_field = self._is_time_field(metric) - if is_time_field: + if self._is_time_field(metric): formatted_metric += " (ns)" + elif self._is_throughput_field(metric): + formatted_metric += " (per sec)" + row_values = [formatted_metric] for stat in stats: value = self.__dict__.get(f"{stat}_{metric}", -1) row_values.append("{:,.0f}".format(value)) - table.add_row(*row_values) + + # Without streaming, there is no inter-token latency available. + if metric == "inter_token_latency": + if all(value == -1 for value in row_values[1:]): + continue + else: + table.add_row(*row_values) console = Console() console.print(table) + def export_to_csv(self, csv_filename: str): + header = [ + "Statistic", + "avg", + "min", + "max", + "p99", + "p95", + "p90", + "p75", + "p50", + "p25", + ] + + with open(csv_filename, mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + csv_writer.writerow(header) + + for metric in Metrics.metric_labels: + formatted_metric = metric + if self._is_time_field(metric): + formatted_metric += "(ns)" + elif self._is_throughput_field(metric): + formatted_metric += "(per sec)" + + row_values = [formatted_metric] + + for stat in header[1:]: + value = self.__dict__.get(f"{stat}_{metric}", -1) + row_values.append(f"{value:.0f}") + + # Without streaming, there is no inter-token latency available. + if metric == "inter_token_latency": + if all(value == -1 for value in row_values[1:]): + continue + else: + csv_writer.writerow(row_values) + class ProfileDataParser: """Base profile data parser class that reads the profile data JSON file to diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/main.py b/src/c++/perf_analyzer/genai-pa/genai_pa/main.py index 6e39d5c89..75894495b 100755 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/main.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/main.py @@ -85,6 +85,10 @@ def report_output(metrics: LLMProfileDataParser, args): "Neither concurrency_range nor request_rate_range was found in args when reporting metrics" ) stats = metrics.get_statistics(infer_mode, int(load_level)) + export_csv_name = args.profile_export_file.with_name( + args.profile_export_file.stem + "_genai_pa.csv" + ) + stats.export_to_csv(export_csv_name) stats.pretty_print() diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py b/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py index b41d91070..9e5890eef 100644 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py @@ -153,9 +153,11 @@ def _add_profile_args(parser): "--profile-export-file", type=Path, default="profile_export.json", - help="Specifies the path where the profile export will be " - "generated. By default, the profile export will not be " - "generated.", + help="Specifies the path where the perf_analyzer profile export will be " + "generated. By default, the profile export will be to profile_export.json. " + "The GenAi-PA file will be exported to _genai_pa.csv. " + "For example, if the profile export file is profile_export.json, the GenAi-PA file will be " + "exported to profile_export_genai_pa.csv.", ) load_management_group.add_argument( "--request-rate",