-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Benchmark Metrics #18927
Closed
Closed
Benchmark Metrics #18927
Changes from 14 commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
bae9ad3
add log-folder arg
gunandrose4u 435dd82
u
gunandrose4u adfc227
u
gunandrose4u 4bd813a
u
gunandrose4u dd6c1e3
u
gunandrose4u f84febb
u
gunandrose4u 105f753
u
gunandrose4u d080e18
u
gunandrose4u d1567bf
llama logs
idiskyle a692cb5
fix metrics
idiskyle 365dba6
fix log folder
idiskyle d4c36f7
add measure step
idiskyle 6881e69
Merge branch 'benchmark_llama' of https://github.com/ironmangithub/on…
idiskyle 09150a3
add engine
idiskyle f4856e0
move fields to customized fields
idiskyle File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
# ------------------------------------------------------------------------- | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. See License.txt in the project root for | ||
# license information. | ||
# -------------------------------------------------------------------------- | ||
|
||
import datetime | ||
import json | ||
from typing import Optional | ||
|
||
import pandas as pd | ||
|
||
|
||
class BaseObject: | ||
def __init__(self): | ||
self.customized = {} | ||
|
||
def to_dict(self): | ||
default_values = self.__dict__.copy() | ||
default_values.pop("customized", None) | ||
default_values.update(self.customized) | ||
|
||
for k, v in default_values.items(): | ||
if isinstance(v, BaseObject): | ||
default_values[k] = v.to_dict() | ||
|
||
return {k: v for k, v in default_values.items() if v} | ||
|
||
|
||
class ModelInfo(BaseObject): | ||
def __init__( | ||
self, | ||
full_name: Optional[str] = None, | ||
is_huggingface: Optional[bool] = False, | ||
is_text_generation: Optional[bool] = False, | ||
short_name: Optional[str] = None, | ||
): | ||
super().__init__() | ||
self.full_name = full_name | ||
self.is_huggingface = is_huggingface | ||
self.is_text_generation = is_text_generation | ||
self.short_name = short_name | ||
self.input_shape = [] | ||
|
||
|
||
class BackendOptions(BaseObject): | ||
def __init__( | ||
self, | ||
enable_profiling: Optional[bool] = False, | ||
execution_provider: Optional[str] = None, | ||
use_io_binding: Optional[bool] = False, | ||
): | ||
super().__init__() | ||
self.enable_profiling = enable_profiling | ||
self.execution_provider = execution_provider | ||
self.use_io_binding = use_io_binding | ||
|
||
|
||
class Config(BaseObject): | ||
def __init__( | ||
self, | ||
backend: Optional[str] = "onnxruntime", | ||
batch_size: Optional[int] = 1, | ||
seq_length: Optional[int] = 0, | ||
precision: Optional[str] = "fp32", | ||
warmup_runs: Optional[int] = 1, | ||
measured_runs: Optional[int] = 10, | ||
measure_step: Optional[str] = "", | ||
engine: Optional[str] = "" | ||
): | ||
super().__init__() | ||
self.backend = backend | ||
self.batch_size = batch_size | ||
self.seq_length = seq_length | ||
self.precision = precision | ||
self.warmup_runs = warmup_runs | ||
self.measured_runs = measured_runs | ||
self.model_info = ModelInfo() | ||
self.backend_options = BackendOptions() | ||
self.measure_step = measure_step | ||
self.engine = engine | ||
|
||
|
||
class Metadata(BaseObject): | ||
def __init__( | ||
self, | ||
device: Optional[str] = None, | ||
package_name: Optional[str] = None, | ||
package_version: Optional[str] = None, | ||
platform: Optional[str] = None, | ||
python_version: Optional[str] = None, | ||
): | ||
super().__init__() | ||
self.device = device | ||
self.package_name = package_name | ||
self.package_version = package_version | ||
self.platform = platform | ||
self.python_version = python_version | ||
|
||
|
||
class Metrics(BaseObject): | ||
def __init__( | ||
self, | ||
latency_ms_mean: Optional[float] = 0.0, | ||
latency_s_mean: Optional[float] = 0.0, | ||
throughput_qps: Optional[float] = 0.0, | ||
throughput_tps: Optional[float] = 0.0, | ||
max_memory_usage_GB: Optional[float] = 0.0, | ||
): | ||
super().__init__() | ||
self.latency_ms_mean = latency_ms_mean | ||
self.latency_s_mean = latency_s_mean | ||
self.throughput_qps = throughput_qps | ||
self.throughput_tps = throughput_tps | ||
self.max_memory_usage_GB = max_memory_usage_GB | ||
|
||
|
||
class BenchmarkRecord: | ||
def __init__( | ||
self, | ||
model_name: str, | ||
precision: str, | ||
backend: str, | ||
device: str, | ||
package_name: str, | ||
package_version: str, | ||
batch_size: Optional[int] = 1, | ||
warmup_runs: Optional[int] = 1, | ||
measured_runs: Optional[int] = 10, | ||
trigger_date: Optional[str] = None, | ||
): | ||
self.config = Config() | ||
self.metrics = Metrics() | ||
self.metadata = Metadata() | ||
self.trigger_date = trigger_date or datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") | ||
|
||
self.config.model_info.full_name = model_name | ||
self.config.precision = precision | ||
self.config.backend = backend | ||
self.config.batch_size = batch_size | ||
self.config.warmup_runs = warmup_runs | ||
self.config.measured_runs = measured_runs | ||
self.metadata.device = device | ||
self.metadata.package_name = package_name | ||
self.metadata.package_version = package_version | ||
|
||
def to_dict(self) -> dict: | ||
return { | ||
"config": self.config.to_dict(), | ||
"metadata": self.metadata.to_dict(), | ||
"metrics": self.metrics.to_dict(), | ||
"trigger_date": self.trigger_date, | ||
} | ||
|
||
def to_json(self) -> str: | ||
return json.dumps(self.to_dict(), default=str) | ||
|
||
@classmethod | ||
def save_as_csv(cls, file_name: str, records: list) -> None: | ||
if records is None or len(records) == 0: | ||
return | ||
rds = [record.to_dict() for record in records] | ||
df = pd.json_normalize(rds) | ||
df.to_csv(file_name, index=False) | ||
|
||
@classmethod | ||
def save_as_json(cls, file_name: str, records: list) -> None: | ||
if records is None or len(records) == 0: | ||
return | ||
rds = [record.to_dict() for record in records] | ||
with open(file_name, "w") as f: | ||
json.dump(rds, f, indent=4, default=str) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
|
||
import torch | ||
from benchmark_helper import setup_logger | ||
from metrics import BenchmarkRecord | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
@@ -121,11 +122,19 @@ | |
help="Number of mins to attempt the benchmark before moving on", | ||
) | ||
|
||
parser.add_argument( | ||
"--log-folder", | ||
type=str, | ||
default=None, | ||
help="Path to folder to save logs and results", | ||
) | ||
|
||
args = parser.parse_args() | ||
|
||
setattr(args, "model_size", args.model_name.split("/")[-1].replace(".", "-")) # noqa: B010 | ||
log_folder_name = f"./{args.model_size}_{args.precision}" | ||
setattr(args, "log_folder", log_folder_name) # noqa: B010 | ||
if not args.log_folder: | ||
args.log_folder=log_folder_name # noqa: B010 | ||
Check warning Code scanning / lintrunner RUFF/RUF100 Warning
Unused noqa directive (unused: B010).
See https://docs.astral.sh/ruff/rules/unused-noqa |
||
os.makedirs(args.log_folder, exist_ok=True) | ||
|
||
# Convert timeout value to secs | ||
|
@@ -197,6 +206,9 @@ | |
df = pd.DataFrame( | ||
results, | ||
columns=[ | ||
"Warmup Runs", | ||
"Measured Runs", | ||
"Model Name", | ||
"Engine", | ||
"Precision", | ||
"Device", | ||
|
@@ -211,14 +223,52 @@ | |
) | ||
|
||
# Set column types | ||
df["Warmup Runs"] = df["Warmup Runs"].astype("int") | ||
df["Measured Runs"] = df["Measured Runs"].astype("int") | ||
df["Batch Size"] = df["Batch Size"].astype("int") | ||
df["Sequence Length"] = df["Sequence Length"].astype("int") | ||
df["Latency (s)"] = df["Latency (s)"].astype("float") | ||
df["Latency (ms)"] = df["Latency (ms)"].astype("float") | ||
df["Throughput (tps)"] = df["Throughput (tps)"].astype("float") | ||
df["Memory (GB)"] = df["Memory (GB)"].astype("float") | ||
|
||
df.to_csv(filename, index=False) | ||
# get pakcage name and version | ||
import pkg_resources | ||
installed_packages = pkg_resources.working_set | ||
installed_packages_list = sorted([f"{i.key}=={i.version}" for i in installed_packages if i.key in ['ort-nightly-gpu', 'ort-nightly', "onnxruntime", "onnxruntime-gpu"]]) | ||
|
||
ort_pkg_name = "" | ||
ort_pkg_version = "" | ||
if installed_packages_list: | ||
ort_pkg_name = installed_packages_list[0].split('==')[0] | ||
ort_pkg_version = installed_packages_list[0].split('==')[1] | ||
|
||
# Save results to csv with standard format | ||
records = [] | ||
for _, row in df.iterrows(): | ||
if row['Engine'] == 'optimum-ort': | ||
record = BenchmarkRecord(row['Model Name'], row['Precision'], "onnxruntime", row['Device'], ort_pkg_name, ort_pkg_version) | ||
elif row['Engine'] in ['pytorch-eager', 'pytorch-compile']: | ||
record = BenchmarkRecord(row['Model Name'], row['Precision'], "pytorch", row['Device'], torch.__name__, torch.__version__) | ||
else: | ||
record = BenchmarkRecord(row['Model Name'], row['Precision'], row['Engine'], row['Device'], "", "") | ||
|
||
record.config.warmup_runs = row["Warmup Runs"] | ||
record.config.measured_runs = row["Measured Runs"] | ||
record.config.batch_size = row["Batch Size"] | ||
record.config.seq_length = row["Sequence Length"] | ||
record.config.measure_step = row["Step"] | ||
record.config.engine = row["Engine"] | ||
record.metrics.latency_s_mean = row["Latency (s)"] | ||
record.metrics.latency_ms_mean = row["Latency (ms)"] | ||
record.metrics.throughput_tps = row["Throughput (tps)"] | ||
record.metrics.max_memory_usage_GB = row["Memory (GB)"] | ||
|
||
records.append(record) | ||
|
||
BenchmarkRecord.save_as_csv(filename, records) | ||
BenchmarkRecord.save_as_json(filename.replace(".csv", ".json"), records) | ||
# df.to_csv(filename, index=False) | ||
logger.info(f"Results saved in {filename}!") | ||
|
||
|
||
|
@@ -234,7 +284,7 @@ | |
|
||
# Create entries for csv | ||
logger.info("Gathering data from log files...") | ||
base_results = [engine, args.precision, args.device] | ||
base_results = [args.warmup_runs, args.num_runs, args.model_name, engine, args.precision, args.device] | ||
results = process_log_file(args.device_id, log_path, base_results) | ||
|
||
return results | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Check warning
Code scanning / lintrunner
BLACK-ISORT/format Warning