Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benchmark Metrics #18927

Closed
wants to merge 15 commits into from
172 changes: 172 additions & 0 deletions onnxruntime/python/tools/transformers/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
# -------------------------------------------------------------------------

Check warning

Code scanning / lintrunner

BLACK-ISORT/format Warning

Run lintrunner -a to apply this patch.
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

import datetime
import json
from typing import Optional

import pandas as pd


class BaseObject:
def __init__(self):
self.customized = {}

def to_dict(self):
default_values = self.__dict__.copy()
default_values.pop("customized", None)
default_values.update(self.customized)

for k, v in default_values.items():
if isinstance(v, BaseObject):
default_values[k] = v.to_dict()

return {k: v for k, v in default_values.items() if v}


class ModelInfo(BaseObject):
def __init__(
self,
full_name: Optional[str] = None,
is_huggingface: Optional[bool] = False,
is_text_generation: Optional[bool] = False,
short_name: Optional[str] = None,
):
super().__init__()
self.full_name = full_name
self.is_huggingface = is_huggingface
self.is_text_generation = is_text_generation
self.short_name = short_name
self.input_shape = []


class BackendOptions(BaseObject):
def __init__(
self,
enable_profiling: Optional[bool] = False,
execution_provider: Optional[str] = None,
use_io_binding: Optional[bool] = False,
):
super().__init__()
self.enable_profiling = enable_profiling
self.execution_provider = execution_provider
self.use_io_binding = use_io_binding


class Config(BaseObject):
def __init__(
self,
backend: Optional[str] = "onnxruntime",
batch_size: Optional[int] = 1,
seq_length: Optional[int] = 0,
precision: Optional[str] = "fp32",
warmup_runs: Optional[int] = 1,
measured_runs: Optional[int] = 10,
measure_step: Optional[str] = "",
engine: Optional[str] = ""
):
super().__init__()
self.backend = backend
self.batch_size = batch_size
self.seq_length = seq_length
self.precision = precision
self.warmup_runs = warmup_runs
self.measured_runs = measured_runs
self.model_info = ModelInfo()
self.backend_options = BackendOptions()
self.measure_step = measure_step
self.engine = engine


class Metadata(BaseObject):
def __init__(
self,
device: Optional[str] = None,
package_name: Optional[str] = None,
package_version: Optional[str] = None,
platform: Optional[str] = None,
python_version: Optional[str] = None,
):
super().__init__()
self.device = device
self.package_name = package_name
self.package_version = package_version
self.platform = platform
self.python_version = python_version


class Metrics(BaseObject):
def __init__(
self,
latency_ms_mean: Optional[float] = 0.0,
latency_s_mean: Optional[float] = 0.0,
throughput_qps: Optional[float] = 0.0,
throughput_tps: Optional[float] = 0.0,
max_memory_usage_GB: Optional[float] = 0.0,
):
super().__init__()
self.latency_ms_mean = latency_ms_mean
self.latency_s_mean = latency_s_mean
self.throughput_qps = throughput_qps
self.throughput_tps = throughput_tps
self.max_memory_usage_GB = max_memory_usage_GB


class BenchmarkRecord:
def __init__(
self,
model_name: str,
precision: str,
backend: str,
device: str,
package_name: str,
package_version: str,
batch_size: Optional[int] = 1,
warmup_runs: Optional[int] = 1,
measured_runs: Optional[int] = 10,
trigger_date: Optional[str] = None,
):
self.config = Config()
self.metrics = Metrics()
self.metadata = Metadata()
self.trigger_date = trigger_date or datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

self.config.model_info.full_name = model_name
self.config.precision = precision
self.config.backend = backend
self.config.batch_size = batch_size
self.config.warmup_runs = warmup_runs
self.config.measured_runs = measured_runs
self.metadata.device = device
self.metadata.package_name = package_name
self.metadata.package_version = package_version

def to_dict(self) -> dict:
return {
"config": self.config.to_dict(),
"metadata": self.metadata.to_dict(),
"metrics": self.metrics.to_dict(),
"trigger_date": self.trigger_date,
}

def to_json(self) -> str:
return json.dumps(self.to_dict(), default=str)

@classmethod
def save_as_csv(cls, file_name: str, records: list) -> None:
if records is None or len(records) == 0:
return
rds = [record.to_dict() for record in records]
df = pd.json_normalize(rds)
df.to_csv(file_name, index=False)

@classmethod
def save_as_json(cls, file_name: str, records: list) -> None:
if records is None or len(records) == 0:
return
rds = [record.to_dict() for record in records]
with open(file_name, "w") as f:
json.dump(rds, f, indent=4, default=str)
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import torch
from benchmark_helper import setup_logger
from metrics import BenchmarkRecord

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -121,11 +122,19 @@
help="Number of mins to attempt the benchmark before moving on",
)

parser.add_argument(
"--log-folder",
type=str,
default=None,
help="Path to folder to save logs and results",
)

args = parser.parse_args()

setattr(args, "model_size", args.model_name.split("/")[-1].replace(".", "-")) # noqa: B010
log_folder_name = f"./{args.model_size}_{args.precision}"
setattr(args, "log_folder", log_folder_name) # noqa: B010
if not args.log_folder:
args.log_folder=log_folder_name # noqa: B010

Check warning

Code scanning / lintrunner

RUFF/RUF100 Warning

Unused noqa directive (unused: B010).
See https://docs.astral.sh/ruff/rules/unused-noqa
os.makedirs(args.log_folder, exist_ok=True)

# Convert timeout value to secs
Expand Down Expand Up @@ -197,6 +206,9 @@
df = pd.DataFrame(
results,
columns=[
"Warmup Runs",
"Measured Runs",
"Model Name",
"Engine",
"Precision",
"Device",
Expand All @@ -211,14 +223,52 @@
)

# Set column types
df["Warmup Runs"] = df["Warmup Runs"].astype("int")
df["Measured Runs"] = df["Measured Runs"].astype("int")
df["Batch Size"] = df["Batch Size"].astype("int")
df["Sequence Length"] = df["Sequence Length"].astype("int")
df["Latency (s)"] = df["Latency (s)"].astype("float")
df["Latency (ms)"] = df["Latency (ms)"].astype("float")
df["Throughput (tps)"] = df["Throughput (tps)"].astype("float")
df["Memory (GB)"] = df["Memory (GB)"].astype("float")

df.to_csv(filename, index=False)
# get pakcage name and version
import pkg_resources
installed_packages = pkg_resources.working_set
installed_packages_list = sorted([f"{i.key}=={i.version}" for i in installed_packages if i.key in ['ort-nightly-gpu', 'ort-nightly', "onnxruntime", "onnxruntime-gpu"]])

ort_pkg_name = ""
ort_pkg_version = ""
if installed_packages_list:
ort_pkg_name = installed_packages_list[0].split('==')[0]
ort_pkg_version = installed_packages_list[0].split('==')[1]

# Save results to csv with standard format
records = []
for _, row in df.iterrows():
if row['Engine'] == 'optimum-ort':
record = BenchmarkRecord(row['Model Name'], row['Precision'], "onnxruntime", row['Device'], ort_pkg_name, ort_pkg_version)
elif row['Engine'] in ['pytorch-eager', 'pytorch-compile']:
record = BenchmarkRecord(row['Model Name'], row['Precision'], "pytorch", row['Device'], torch.__name__, torch.__version__)
else:
record = BenchmarkRecord(row['Model Name'], row['Precision'], row['Engine'], row['Device'], "", "")

record.config.warmup_runs = row["Warmup Runs"]
record.config.measured_runs = row["Measured Runs"]
record.config.batch_size = row["Batch Size"]
record.config.seq_length = row["Sequence Length"]
record.config.measure_step = row["Step"]
record.config.engine = row["Engine"]
record.metrics.latency_s_mean = row["Latency (s)"]
record.metrics.latency_ms_mean = row["Latency (ms)"]
record.metrics.throughput_tps = row["Throughput (tps)"]
record.metrics.max_memory_usage_GB = row["Memory (GB)"]

records.append(record)

BenchmarkRecord.save_as_csv(filename, records)
BenchmarkRecord.save_as_json(filename.replace(".csv", ".json"), records)
# df.to_csv(filename, index=False)
logger.info(f"Results saved in {filename}!")


Expand All @@ -234,7 +284,7 @@

# Create entries for csv
logger.info("Gathering data from log files...")
base_results = [engine, args.precision, args.device]
base_results = [args.warmup_runs, args.num_runs, args.model_name, engine, args.precision, args.device]
results = process_log_file(args.device_id, log_path, base_results)

return results
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from benchmark_helper import setup_logger
from transformers import WhisperConfig, WhisperProcessor

from metrics import BenchmarkRecord

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -123,13 +125,21 @@ def get_args():
help="Number of mins to attempt the benchmark before moving on",
)

parser.add_argument(
"--log-folder",
type=str,
default=None,
help="Path to folder to save logs and results",
)

parser.add_argument("--tune", default=False, action="store_true")

args = parser.parse_args()

setattr(args, "model_size", args.model_name.split("/")[-1].replace(".", "-")) # noqa: B010
log_folder_name = f"./{args.model_size}-{args.precision}"
setattr(args, "log_folder", log_folder_name) # noqa: B010
if not args.log_folder:
args.log_folder = log_folder_name
os.makedirs(args.log_folder, exist_ok=True)

# Convert timeout value to secs
Expand Down Expand Up @@ -235,6 +245,9 @@ def save_results(results, filename):
df = pd.DataFrame(
results,
columns=[
"Warmup Runs",
"Measured Runs",
"Model Name",
"Engine",
"Precision",
"Device",
Expand All @@ -254,6 +267,8 @@ def save_results(results, filename):
)

# Set column types
df["Warmup Runs"] = df["Warmup Runs"].astype("int")
df["Measured Runs"] = df["Measured Runs"].astype("int")
df["Duration (s)"] = df["Duration (s)"].astype("float")
df["Token Length"] = df["Token Length"].astype("int")
df["Load Audio Latency (s)"] = df["Load Audio Latency (s)"].astype("float")
Expand All @@ -266,7 +281,47 @@ def save_results(results, filename):
df["Memory (GB)"] = df["Memory (GB)"].astype("float")
df["Real Time Factor (RTF)"] = df["Real Time Factor (RTF)"].astype("float")

df.to_csv(filename, index=False)
# get pakcage name and version
import pkg_resources
installed_packages = pkg_resources.working_set
installed_packages_list = sorted([f"{i.key}=={i.version}" for i in installed_packages if i.key in ['ort-nightly-gpu', 'ort-nightly', "onnxruntime", "onnxruntime-gpu"]])

ort_pkg_name = ""
ort_pkg_version = ""
if installed_packages_list:
ort_pkg_name = installed_packages_list[0].split('==')[0]
ort_pkg_version = installed_packages_list[0].split('==')[1]

# Save results to csv with standard format
records = []
for _, row in df.iterrows():
if row['Engine'] == 'onnxruntime':
record = BenchmarkRecord(row['Model Name'], row['Precision'], row['Engine'], row['Device'], ort_pkg_name, ort_pkg_version)
else:
record = BenchmarkRecord(row['Model Name'], row['Precision'], row['Engine'], row['Device'], torch.__name__, torch.__version__)

record.config.customized["audio_file"] = row["Audio File"]
record.config.warmup_runs = row["Warmup Runs"]
record.config.measured_runs = row["Measured Runs"]

record.metrics.customized["duration"] = row["Duration (s)"]
record.metrics.customized["token_length"] = row["Token Length"]
record.metrics.customized["load_audio_latency"] = row["Load Audio Latency (s)"]
record.metrics.customized["load_audio_throughput"] = row["Load Audio Throughput (qps)"]
record.metrics.customized["feature_extractor_latency_s"] = row["Feature Extractor Latency (s)"]
record.metrics.customized["feature_extractor_throughput_qps"] = row["Feature Extractor Throughput (qps)"]
record.metrics.customized["per_token_latency_ms"] = row["Per Token Latency (ms/token)"]
record.metrics.customized["rtf"] = row["Real Time Factor (RTF)"]

record.metrics.latency_ms_mean = row["Latency (s)"] * 1000
record.metrics.throughput_qps = row["Throughput (qps)"]
record.metrics.max_memory_usage_GB = row["Memory (GB)"]

records.append(record)

BenchmarkRecord.save_as_csv(filename, records)
BenchmarkRecord.save_as_json(filename.replace(".csv", ".json"), records)
# df.to_csv(filename, index=False)
logger.info(f"Results saved in {filename}!")


Expand All @@ -282,7 +337,7 @@ def benchmark(args, benchmark_cmd, engine, audio_file, duration):

# Create entries for csv
logger.info("Gathering data from log files...")
base_results = [engine, args.precision, args.device, audio_file, duration]
base_results = [args.warmup_runs, args.num_runs, args.model_name, engine, args.precision, args.device, audio_file, duration]
results = process_log_file(args.device_id, log_path, base_results)

return results
Expand Down
Loading