diff --git a/onnxruntime/python/tools/transformers/metrics.py b/onnxruntime/python/tools/transformers/metrics.py new file mode 100644 index 0000000000000..282c75ba8f6a5 --- /dev/null +++ b/onnxruntime/python/tools/transformers/metrics.py @@ -0,0 +1,164 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +import datetime +import json +from typing import Optional + +import pandas as pd + + +class BaseObject: + def __init__(self): + self.customized = {} + + def to_dict(self): + default_values = self.__dict__.copy() + default_values.pop("customized", None) + default_values.update(self.customized) + + for k, v in default_values.items(): + if isinstance(v, BaseObject): + default_values[k] = v.to_dict() + + return {k: v for k, v in default_values.items() if v} + + +class ModelInfo(BaseObject): + def __init__( + self, + full_name: Optional[str] = None, + is_huggingface: Optional[bool] = False, + is_text_generation: Optional[bool] = False, + short_name: Optional[str] = None, + ): + super().__init__() + self.full_name = full_name + self.is_huggingface = is_huggingface + self.is_text_generation = is_text_generation + self.short_name = short_name + self.input_shape = [] + + +class BackendOptions(BaseObject): + def __init__( + self, + enable_profiling: Optional[bool] = False, + execution_provider: Optional[str] = None, + use_io_binding: Optional[bool] = False, + ): + super().__init__() + self.enable_profiling = enable_profiling + self.execution_provider = execution_provider + self.use_io_binding = use_io_binding + + +class Config(BaseObject): + def __init__( + self, + backend: Optional[str] = "onnxruntime", + batch_size: Optional[int] = 1, + seq_length: Optional[int] = 0, + precision: Optional[str] = "fp32", + warmup_runs: Optional[int] = 1, + measured_runs: Optional[int] = 10, + ): + super().__init__() + self.backend = backend + self.batch_size = batch_size + self.seq_length = seq_length + self.precision = precision + self.warmup_runs = warmup_runs + self.measured_runs = measured_runs + self.model_info = ModelInfo() + self.backend_options = BackendOptions() + + +class Metadata(BaseObject): + def __init__( + self, + device: Optional[str] = None, + package_name: Optional[str] = None, + package_version: Optional[str] = None, + platform: Optional[str] = None, + python_version: Optional[str] = None, + ): + super().__init__() + self.device = device + self.package_name = package_name + self.package_version = package_version + self.platform = platform + self.python_version = python_version + + +class Metrics(BaseObject): + def __init__( + self, + latency_ms_mean: Optional[float] = 0.0, + throughput_qps: Optional[float] = 0.0, + max_memory_usage_GB: Optional[float] = 0.0, + ): + super().__init__() + self.latency_ms_mean = latency_ms_mean + self.throughput_qps = throughput_qps + self.max_memory_usage_GB = max_memory_usage_GB + + +class BenchmarkRecord: + def __init__( + self, + model_name: str, + precision: str, + backend: str, + device: str, + package_name: str, + package_version: str, + batch_size: Optional[int] = 1, + warmup_runs: Optional[int] = 1, + measured_runs: Optional[int] = 10, + trigger_date: Optional[str] = None, + ): + self.config = Config() + self.metrics = Metrics() + self.metadata = Metadata() + self.trigger_date = trigger_date or datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + self.config.model_info.full_name = model_name + self.config.precision = precision + self.config.backend = backend + self.config.batch_size = batch_size + self.config.warmup_runs = warmup_runs + self.config.measured_runs = measured_runs + self.metadata.device = device + self.metadata.package_name = package_name + self.metadata.package_version = package_version + + def to_dict(self) -> dict: + return { + "config": self.config.to_dict(), + "metadata": self.metadata.to_dict(), + "metrics": self.metrics.to_dict(), + "trigger_date": self.trigger_date, + } + + def to_json(self) -> str: + return json.dumps(self.to_dict(), default=str) + + @classmethod + def save_as_csv(cls, file_name: str, records: list) -> None: + if records is None or len(records) == 0: + return + rds = [record.to_dict() for record in records] + df = pd.json_normalize(rds) + df.to_csv(file_name, index=False) + + @classmethod + def save_as_json(cls, file_name: str, records: list) -> None: + if records is None or len(records) == 0: + return + rds = [record.to_dict() for record in records] + with open(file_name, "w") as f: + json.dump(rds, f, indent=4, default=str) diff --git a/onnxruntime/python/tools/transformers/models/llama/benchmark_all.py b/onnxruntime/python/tools/transformers/models/llama/benchmark_all.py index b35a5e27f9ea3..d8b3c8ec2eb18 100644 --- a/onnxruntime/python/tools/transformers/models/llama/benchmark_all.py +++ b/onnxruntime/python/tools/transformers/models/llama/benchmark_all.py @@ -7,6 +7,7 @@ import torch from benchmark_helper import setup_logger +from metrics import BenchmarkRecord logger = logging.getLogger(__name__) @@ -121,11 +122,19 @@ def get_args(): help="Number of mins to attempt the benchmark before moving on", ) + parser.add_argument( + "--log-folder", + type=str, + default=None, + help="Path to folder to save logs and results", + ) + args = parser.parse_args() setattr(args, "model_size", args.model_name.split("/")[-1].replace(".", "-")) # noqa: B010 log_folder_name = f"./{args.model_size}_{args.precision}" - setattr(args, "log_folder", log_folder_name) # noqa: B010 + if not args.log_folder: + args.log_folder=log_folder_name # noqa: B010 os.makedirs(args.log_folder, exist_ok=True) # Convert timeout value to secs @@ -197,6 +206,9 @@ def save_results(results, filename): df = pd.DataFrame( results, columns=[ + "Warmup Runs", + "Measured Runs", + "Model Name", "Engine", "Precision", "Device", @@ -211,6 +223,8 @@ def save_results(results, filename): ) # Set column types + df["Warmup Runs"] = df["Warmup Runs"].astype("int") + df["Measured Runs"] = df["Measured Runs"].astype("int") df["Batch Size"] = df["Batch Size"].astype("int") df["Sequence Length"] = df["Sequence Length"].astype("int") df["Latency (s)"] = df["Latency (s)"].astype("float") @@ -218,7 +232,43 @@ def save_results(results, filename): df["Throughput (tps)"] = df["Throughput (tps)"].astype("float") df["Memory (GB)"] = df["Memory (GB)"].astype("float") - df.to_csv(filename, index=False) + # get pakcage name and version + import pkg_resources + installed_packages = pkg_resources.working_set + installed_packages_list = sorted([f"{i.key}=={i.version}" for i in installed_packages if i.key in ['ort-nightly-gpu', 'ort-nightly', "onnxruntime", "onnxruntime-gpu"]]) + + ort_pkg_name = "" + ort_pkg_version = "" + if installed_packages_list: + ort_pkg_name = installed_packages_list[0].split('==')[0] + ort_pkg_version = installed_packages_list[0].split('==')[1] + + # Save results to csv with standard format + records = [] + for _, row in df.iterrows(): + if row['Engine'] == 'optimum-ort': + record = BenchmarkRecord(row['Model Name'], row['Precision'], "onnxruntime", row['Device'], ort_pkg_name, ort_pkg_version) + elif row['Engine'] in ['pytorch-eager', 'pytorch-compile']: + record = BenchmarkRecord(row['Model Name'], row['Precision'], "pytorch", row['Device'], torch.__name__, torch.__version__) + else: + record = BenchmarkRecord(row['Model Name'], row['Precision'], row['Engine'], row['Device'], "", "") + + record.config.warmup_runs = row["Warmup Runs"] + record.config.measured_runs = row["Measured Runs"] + record.config.batch_size = row["Batch Size"] + record.config.seq_length = row["Sequence Length"] + record.config.customized["measure_step"] = row["Step"] + record.config.customized["engine"] = row["Engine"] + record.metrics.customized["latency_s_mean"] = row["Latency (s)"] + record.metrics.latency_ms_mean = row["Latency (ms)"] + record.metrics.customized["throughput_tps"] = row["Throughput (tps)"] + record.metrics.max_memory_usage_GB = row["Memory (GB)"] + + records.append(record) + + BenchmarkRecord.save_as_csv(filename, records) + BenchmarkRecord.save_as_json(filename.replace(".csv", ".json"), records) + # df.to_csv(filename, index=False) logger.info(f"Results saved in {filename}!") @@ -234,7 +284,7 @@ def benchmark(args, benchmark_cmd, engine): # Create entries for csv logger.info("Gathering data from log files...") - base_results = [engine, args.precision, args.device] + base_results = [args.warmup_runs, args.num_runs, args.model_name, engine, args.precision, args.device] results = process_log_file(args.device_id, log_path, base_results) return results diff --git a/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py b/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py index 071b539ac1899..9da9dd7ded5af 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py +++ b/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py @@ -10,6 +10,8 @@ from benchmark_helper import setup_logger from transformers import WhisperConfig, WhisperProcessor +from metrics import BenchmarkRecord + logger = logging.getLogger(__name__) @@ -123,13 +125,21 @@ def get_args(): help="Number of mins to attempt the benchmark before moving on", ) + parser.add_argument( + "--log-folder", + type=str, + default=None, + help="Path to folder to save logs and results", + ) + parser.add_argument("--tune", default=False, action="store_true") args = parser.parse_args() setattr(args, "model_size", args.model_name.split("/")[-1].replace(".", "-")) # noqa: B010 log_folder_name = f"./{args.model_size}-{args.precision}" - setattr(args, "log_folder", log_folder_name) # noqa: B010 + if not args.log_folder: + args.log_folder = log_folder_name os.makedirs(args.log_folder, exist_ok=True) # Convert timeout value to secs @@ -235,6 +245,9 @@ def save_results(results, filename): df = pd.DataFrame( results, columns=[ + "Warmup Runs", + "Measured Runs", + "Model Name", "Engine", "Precision", "Device", @@ -254,6 +267,8 @@ def save_results(results, filename): ) # Set column types + df["Warmup Runs"] = df["Warmup Runs"].astype("int") + df["Measured Runs"] = df["Measured Runs"].astype("int") df["Duration (s)"] = df["Duration (s)"].astype("float") df["Token Length"] = df["Token Length"].astype("int") df["Load Audio Latency (s)"] = df["Load Audio Latency (s)"].astype("float") @@ -266,7 +281,47 @@ def save_results(results, filename): df["Memory (GB)"] = df["Memory (GB)"].astype("float") df["Real Time Factor (RTF)"] = df["Real Time Factor (RTF)"].astype("float") - df.to_csv(filename, index=False) + # get pakcage name and version + import pkg_resources + installed_packages = pkg_resources.working_set + installed_packages_list = sorted([f"{i.key}=={i.version}" for i in installed_packages if i.key in ['ort-nightly-gpu', 'ort-nightly', "onnxruntime", "onnxruntime-gpu"]]) + + ort_pkg_name = "" + ort_pkg_version = "" + if installed_packages_list: + ort_pkg_name = installed_packages_list[0].split('==')[0] + ort_pkg_version = installed_packages_list[0].split('==')[1] + + # Save results to csv with standard format + records = [] + for _, row in df.iterrows(): + if row['Engine'] == 'onnxruntime': + record = BenchmarkRecord(row['Model Name'], row['Precision'], row['Engine'], row['Device'], ort_pkg_name, ort_pkg_version) + else: + record = BenchmarkRecord(row['Model Name'], row['Precision'], row['Engine'], row['Device'], torch.__name__, torch.__version__) + + record.config.customized["audio_file"] = row["Audio File"] + record.config.warmup_runs = row["Warmup Runs"] + record.config.measured_runs = row["Measured Runs"] + + record.metrics.customized["duration"] = row["Duration (s)"] + record.metrics.customized["token_length"] = row["Token Length"] + record.metrics.customized["load_audio_latency"] = row["Load Audio Latency (s)"] + record.metrics.customized["load_audio_throughput"] = row["Load Audio Throughput (qps)"] + record.metrics.customized["feature_extractor_latency_s"] = row["Feature Extractor Latency (s)"] + record.metrics.customized["feature_extractor_throughput_qps"] = row["Feature Extractor Throughput (qps)"] + record.metrics.customized["per_token_latency_ms"] = row["Per Token Latency (ms/token)"] + record.metrics.customized["rtf"] = row["Real Time Factor (RTF)"] + + record.metrics.latency_ms_mean = row["Latency (s)"] * 1000 + record.metrics.throughput_qps = row["Throughput (qps)"] + record.metrics.max_memory_usage_GB = row["Memory (GB)"] + + records.append(record) + + BenchmarkRecord.save_as_csv(filename, records) + BenchmarkRecord.save_as_json(filename.replace(".csv", ".json"), records) + # df.to_csv(filename, index=False) logger.info(f"Results saved in {filename}!") @@ -282,7 +337,7 @@ def benchmark(args, benchmark_cmd, engine, audio_file, duration): # Create entries for csv logger.info("Gathering data from log files...") - base_results = [engine, args.precision, args.device, audio_file, duration] + base_results = [args.warmup_runs, args.num_runs, args.model_name, engine, args.precision, args.device, audio_file, duration] results = process_log_file(args.device_id, log_path, base_results) return results