From d0a3ee5066d6afec9310b54bcb44e5486cf875c5 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Thu, 20 Jun 2024 17:46:06 +0200 Subject: [PATCH] add code to run multiple export in one call Signed-off-by: Xavier Dupre --- onnxscript/tools/benchmark/__init__.py | 6 + .../tools/benchmark/benchmark_helpers.py | 26 ++ .../tools/benchmark/benchmark_helpers_test.py | 53 ++++ onnxscript/tools/benchmark/benchmark_run.py | 129 +++++++++ onnxscript/tools/benchmark/export_model.py | 250 ++++++++++-------- 5 files changed, 352 insertions(+), 112 deletions(-) create mode 100644 onnxscript/tools/benchmark/benchmark_helpers_test.py create mode 100644 onnxscript/tools/benchmark/benchmark_run.py diff --git a/onnxscript/tools/benchmark/__init__.py b/onnxscript/tools/benchmark/__init__.py index ccc9d81ed..8f1b6f4d3 100644 --- a/onnxscript/tools/benchmark/__init__.py +++ b/onnxscript/tools/benchmark/__init__.py @@ -5,6 +5,9 @@ from onnxscript.tools.benchmark.benchmark_helpers import ( common_export, get_parsed_args, + make_configs, + make_dataframe_from_benchmark_data, + multi_run, run_inference, run_onnx_inference, ) @@ -12,6 +15,9 @@ __all__ = [ "get_parsed_args", "common_export", + "make_configs", + "multi_run", + "make_dataframe_from_benchmark_data", "run_inference", "run_onnx_inference", ] diff --git a/onnxscript/tools/benchmark/benchmark_helpers.py b/onnxscript/tools/benchmark/benchmark_helpers.py index 36d9084fa..191784aff 100644 --- a/onnxscript/tools/benchmark/benchmark_helpers.py +++ b/onnxscript/tools/benchmark/benchmark_helpers.py @@ -5,6 +5,7 @@ from __future__ import annotations import argparse +import itertools import multiprocessing import os import platform @@ -697,3 +698,28 @@ def run_onnx_inference( print(f"[run_inference] measure done in {time.perf_counter() - begin}") return stats + + +def multi_run(kwargs: dict[str, Any]) -> bool: + """Checks if multiple values were sent for one argument.""" + return any(isinstance(v, str) and "," in v for v in kwargs.values()) + + +def make_configs(kwargs: dict[str, Any]) -> list[dict[str, Any]]: + """Creates all the configurations based on the command line arguments.""" + print(kwargs) + args = [] + for k, v in kwargs.items(): + if isinstance(v, str): + args.append([(k, s) for s in v.split(",")]) + else: + args.append([(k, v)]) + configs = list(itertools.product(*args)) + return [dict(c) for c in configs] + + +def make_dataframe_from_benchmark_data(data: dict) -> Any: + """Creates a dataframe from the received data.""" + import pandas + + return pandas.DataFrame(data) diff --git a/onnxscript/tools/benchmark/benchmark_helpers_test.py b/onnxscript/tools/benchmark/benchmark_helpers_test.py new file mode 100644 index 000000000..ec88ffd9e --- /dev/null +++ b/onnxscript/tools/benchmark/benchmark_helpers_test.py @@ -0,0 +1,53 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +import unittest + +import onnxscript.tools.benchmark.benchmark_helpers as bh + + +class BenchmarkHelperTest(unittest.TestCase): + def test_make_configs(self): + value = { + "warmup": 5, + "model": "llama,phi", + "device": "cpu,cuda", + "config": "medium", + "dump_folder": "", + } + self.assertTrue(bh.multi_run(value)) + configs = bh.make_configs(value) + expected = [ + { + "warmup": 5, + "model": "llama", + "device": "cpu", + "config": "medium", + "dump_folder": "", + }, + { + "warmup": 5, + "model": "llama", + "device": "cuda", + "config": "medium", + "dump_folder": "", + }, + { + "warmup": 5, + "model": "phi", + "device": "cpu", + "config": "medium", + "dump_folder": "", + }, + { + "warmup": 5, + "model": "phi", + "device": "cuda", + "config": "medium", + "dump_folder": "", + }, + ] + self.assertEqual(expected, configs) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/onnxscript/tools/benchmark/benchmark_run.py b/onnxscript/tools/benchmark/benchmark_run.py new file mode 100644 index 000000000..13bf6a9d4 --- /dev/null +++ b/onnxscript/tools/benchmark/benchmark_run.py @@ -0,0 +1,129 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +from __future__ import annotations + +import multiprocessing +import os +import platform +import re +import subprocess +import sys + + +class BenchmarkError(RuntimeError): + pass + + +def get_machine() -> dict[str, str | int | float | tuple[int, int]]: + """Returns the machine specification.""" + cpu: dict[str, str | int | float | tuple[int, int]] = dict( + machine=str(platform.machine()), + processor=str(platform.processor()), + version=str(sys.version), + cpu=int(multiprocessing.cpu_count()), + executable=str(sys.executable), + ) + try: + import torch.cuda + except ImportError: + return cpu + + cpu["has_cuda"] = bool(torch.cuda.is_available()) + if cpu["has_cuda"]: + cpu["capability"] = torch.cuda.get_device_capability(0) + cpu["device_name"] = str(torch.cuda.get_device_name(0)) + return cpu + + +def _cmd_line(script_name: str, **kwargs: dict[str, str | int | float]) -> list[str]: + args = [sys.executable, "-m", script_name] + for k, v in kwargs.items(): + args.append(f"--{k}") + args.append(str(v)) + return args + + +def _extract_metrics(text: str) -> dict[str, str]: + reg = re.compile(":(.*?),(.*.?);") + res = reg.findall(text) + if len(res) == 0: + return {} + return dict(res) + + +def _make_prefix(script_name: str, index: int) -> str: + name = os.path.splitext(script_name)[0] + return f"{name}_dort_c{index}_" + + +def run_benchmark( + script_name: str, + configs: list[dict[str, str | int | float]], + verbose: int = 0, + stop_if_exception: bool = True, + dort_dump: bool = False, +) -> list[dict[str, str | int | float | tuple[int, int]]]: + """ + Runs a script multiple times and extract information from the output + following the pattern ``:,;``. + + :param script_name: python script to run + :param configs: list of execution to do + :param stop_if_exception: stop if one experiment failed, otherwise continue + :param verbose: use tqdm to follow the progress + :param dort_dump: dump onnx file if dort is used + :return: values + """ + if verbose: + from tqdm import tqdm + + loop = tqdm(configs) + else: + loop = configs + + data: list[dict[str, str | int | float | tuple[int, int]]] = [] + for i, config in enumerate(loop): + cmd = _cmd_line(script_name, **config) + + if dort_dump: + os.environ["ONNXRT_DUMP_PATH"] = _make_prefix(script_name, i) + else: + os.environ["ONNXRT_DUMP_PATH"] = "" + if verbose > 3: + print(f"[run_benchmark] cmd={cmd if isinstance(cmd, str) else ' '.join(cmd)}") + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + res = p.communicate() + out, err = res + sout = out.decode("utf-8", errors="ignore") + serr = err.decode("utf-8", errors="ignore") + + if "ONNXRuntimeError" in serr or "ONNXRuntimeError" in sout: + if stop_if_exception: + raise RuntimeError( + f"Unable to continue with config {config} due to the " + f"following error\n{serr}" + f"\n----OUTPUT--\n{sout}" + ) + + metrics = _extract_metrics(sout) + if len(metrics) == 0: + if stop_if_exception: + raise BenchmarkError( + f"Unable (2) to continue with config {config}, no metric was " + f"collected.\n--ERROR--\n{serr}\n--OUTPUT--\n{sout}" + ) + else: + metrics = {} + metrics.update(config) + metrics["ERROR"] = serr + metrics["OUTPUT"] = sout + metrics["CMD"] = f"[{' '.join(cmd)}]" + data.append(metrics) + if verbose > 5: + print("--------------- ERROR") + print(serr) + if verbose >= 10: + print("--------------- OUTPUT") + print(sout) + + return data diff --git a/onnxscript/tools/benchmark/export_model.py b/onnxscript/tools/benchmark/export_model.py index 88d40dc27..efa09989a 100644 --- a/onnxscript/tools/benchmark/export_model.py +++ b/onnxscript/tools/benchmark/export_model.py @@ -19,6 +19,10 @@ def main(args=None): This script can be used to quickly evaluate the improvment made by a pattern optimization for a particular model. + If one value contains ",", the script understand multiple commands + must be run. It computes all the possible configurations. + In that case, it produces a csv file (if output_data is not empty) with all the results. + Example with a large phi model:: python -m onnxscript.tools.benchmark.export_model --model phi --device cuda --config large --num_hidden_layers=6 --dtype=float32 --dynamic=0 --verbose=1 --exporter=dynamo @@ -50,130 +54,152 @@ def main(args=None): ), implementation=("eager", "eager or sdpa"), memory_peak=(0, "measure the memory peak during conversion"), + output_data=( + "export_model.csv", + "produces a csv file with the data if multiple configurations are tested", + ), new_args=args, ) - - print("-------------------") - print("[export_model]") - pprint.pprint(kwargs) - print("-------------------") - - # Import is delayed so that help is being display faster (without having to import heavy packages). - import onnxscript.tools - import onnxscript.tools.memory_peak - import onnxscript.tools.transformers_models - - print( - f"[export_model] create the model and inputs for {kwargs['model']!r} and config {kwargs['config']!r}" - ) - begin = time.perf_counter() - model, example_inputs, dynamic_shapes = ( - onnxscript.tools.transformers_models.get_model_and_inputs( - warmup=kwargs["warmup"], - repeat=kwargs["repeat"], - model=kwargs["model"], - config=kwargs["config"], - dynamic_shapes=kwargs["dynamic"], - device=kwargs["device"], - num_hidden_layers=kwargs["num_hidden_layers"], - with_mask=kwargs["with_mask"], - implementation=kwargs["implementation"], - dtype=kwargs["dtype"], + if onnxscript.tools.benchmark.multi_run(kwargs): + import onnxscript.tools.benchmark.benchmark_run + + configs = onnxscript.tools.benchmark.make_configs(kwargs) + data = onnxscript.tools.benchmark.benchmark_run.run_benchmark( + "onnxscript.tools.benchmark.export_model", + configs, + kwargs["verbose"], + stop_if_exception=False, ) - ) - print(f"[export_model] model created in {time.perf_counter() - begin}") - if kwargs["dynamic"]: - print(f"[export_model] dynamic_shapes={dynamic_shapes}") - msg = [tuple(i.shape for i in inp) for inp in example_inputs] - print(f"[export_model] input_shapes={msg}") - conversion: dict[str, Any] = {} - memory_stats: dict[str, float] = {} - - if kwargs["exporter"] == "eager": - print("[export_model] start benchmark") - begin = time.perf_counter() - result = onnxscript.tools.benchmark.run_inference( - model, - example_inputs, - warmup=kwargs["warmup"], - repeat=kwargs["repeat"], - verbose=kwargs["verbose"], - ) - print(f"[export_model] benchmark done in {time.perf_counter() - begin}") + if kwargs["verbose"] > 2: + pprint.pprint(data if kwargs["verbose"] > 3 else data[:2]) + if kwargs["output_data"]: + df = onnxscript.tools.benchmark.make_dataframe_from_benchmark_data(data) + df.to_csv(kwargs["output_data"], index=False) + df.to_excel(kwargs["output_data"] + ".xlsx", index=False) + if kwargs["verbose"]: + print(df) else: + print("-------------------") + print("[export_model]") + pprint.pprint(kwargs) + print("-------------------") + + # Import is delayed so that help is being display faster (without having to import heavy packages). + import onnxscript.tools + import onnxscript.tools.memory_peak + import onnxscript.tools.transformers_models + print( - f"[export_model] export to onnx with exporter={kwargs['exporter']!r} " - f"and optimization={kwargs['optimization']!r}" + f"[export_model] create the model and inputs for {kwargs['model']!r} and config {kwargs['config']!r}" ) begin = time.perf_counter() - if kwargs["optimization"]: - m = hashlib.sha256() - m.update(kwargs["optimization"].encode()) - so = m.hexdigest()[:5] - else: - so = "" - name = "_".join( - [ - kwargs["model"], - kwargs["exporter"], - "dynamic" if kwargs["dynamic"] else "static", - kwargs["dtype"].replace("float", "fp"), - kwargs["device"], - kwargs["config"], - f"h{kwargs['num_hidden_layers']}", - so, - ], - ) - filename = f"em_{name}.onnx" - - memory_session = ( - onnxscript.tools.memory_peak.start_spying_on(cuda=kwargs["device"] == "cuda") - if kwargs["memory_peak"] - else None - ) - print(f"[export_model] start memory peak monitoring {memory_session}") - proto = onnxscript.tools.benchmark.common_export( - model=model, - inputs=example_inputs[0], - exporter=kwargs["exporter"], - target_opset=kwargs["target_opset"], - folder=kwargs["dump_folder"], - filename=filename, - dynamic_shapes=dynamic_shapes if kwargs["dynamic"] else None, - optimization=kwargs["optimization"], - verbose=kwargs["verbose"], - stats=conversion, + model, example_inputs, dynamic_shapes = ( + onnxscript.tools.transformers_models.get_model_and_inputs( + warmup=kwargs["warmup"], + repeat=kwargs["repeat"], + model=kwargs["model"], + config=kwargs["config"], + dynamic_shapes=kwargs["dynamic"], + device=kwargs["device"], + num_hidden_layers=kwargs["num_hidden_layers"], + with_mask=kwargs["with_mask"], + implementation=kwargs["implementation"], + dtype=kwargs["dtype"], + ) ) - print(f"[export_model] export to onnx done in {time.perf_counter() - begin}") - if memory_session is not None: - memory_results = memory_session.stop() - print(f"[export_model] ends memory monitoring {memory_results}") - memory_stats = onnxscript.tools.memory_peak.flatten( - memory_results, prefix="memory_" + print(f"[export_model] model created in {time.perf_counter() - begin}") + if kwargs["dynamic"]: + print(f"[export_model] dynamic_shapes={dynamic_shapes}") + msg = [tuple(i.shape for i in inp) for inp in example_inputs] + print(f"[export_model] input_shapes={msg}") + conversion: dict[str, Any] = {} + memory_stats: dict[str, float] = {} + + if kwargs["exporter"] == "eager": + print("[export_model] start benchmark") + begin = time.perf_counter() + result = onnxscript.tools.benchmark.run_inference( + model, + example_inputs, + warmup=kwargs["warmup"], + repeat=kwargs["repeat"], + verbose=kwargs["verbose"], ) + print(f"[export_model] benchmark done in {time.perf_counter() - begin}") else: - memory_stats = {} - - result = onnxscript.tools.benchmark.run_onnx_inference( - proto, - example_inputs, - warmup=kwargs["warmup"], - repeat=kwargs["repeat"], - verbose=kwargs["verbose"], - ort_optimize=kwargs["ort_optimize"], - ) + print( + f"[export_model] export to onnx with exporter={kwargs['exporter']!r} " + f"and optimization={kwargs['optimization']!r}" + ) + begin = time.perf_counter() + if kwargs["optimization"]: + m = hashlib.sha256() + m.update(kwargs["optimization"].encode()) + so = m.hexdigest()[:5] + else: + so = "" + name = "_".join( + [ + kwargs["model"], + kwargs["exporter"], + "dynamic" if kwargs["dynamic"] else "static", + kwargs["dtype"].replace("float", "fp"), + kwargs["device"], + kwargs["config"], + f"h{kwargs['num_hidden_layers']}", + so, + ], + ) + filename = f"em_{name}.onnx" - print("[export_model] end") - print("------------------------------") - for k, v in sorted(kwargs.items()): - print(f":{k},{v};") - for k, v in sorted(conversion.items()): - print(f":{k},{v};") - if memory_stats: - for k, v in memory_stats.items(): + memory_session = ( + onnxscript.tools.memory_peak.start_spying_on(cuda=kwargs["device"] == "cuda") + if kwargs["memory_peak"] + else None + ) + print(f"[export_model] start memory peak monitoring {memory_session}") + proto = onnxscript.tools.benchmark.common_export( + model=model, + inputs=example_inputs[0], + exporter=kwargs["exporter"], + target_opset=kwargs["target_opset"], + folder=kwargs["dump_folder"], + filename=filename, + dynamic_shapes=dynamic_shapes if kwargs["dynamic"] else None, + optimization=kwargs["optimization"], + verbose=kwargs["verbose"], + stats=conversion, + ) + print(f"[export_model] export to onnx done in {time.perf_counter() - begin}") + if memory_session is not None: + memory_results = memory_session.stop() + print(f"[export_model] ends memory monitoring {memory_results}") + memory_stats = onnxscript.tools.memory_peak.flatten( + memory_results, prefix="memory_" + ) + else: + memory_stats = {} + + result = onnxscript.tools.benchmark.run_onnx_inference( + proto, + example_inputs, + warmup=kwargs["warmup"], + repeat=kwargs["repeat"], + verbose=kwargs["verbose"], + ort_optimize=kwargs["ort_optimize"], + ) + + print("[export_model] end") + print("------------------------------") + for k, v in sorted(kwargs.items()): + print(f":{k},{v};") + for k, v in sorted(conversion.items()): + print(f":{k},{v};") + if memory_stats: + for k, v in memory_stats.items(): + print(f":{k},{v};") + for k, v in sorted(result.items()): print(f":{k},{v};") - for k, v in sorted(result.items()): - print(f":{k},{v};") if __name__ == "__main__":