diff --git a/onnxruntime/python/tools/transformers/models/llama/benchmark.py b/onnxruntime/python/tools/transformers/models/llama/benchmark.py index a53dead77dea6..f597cead40331 100644 --- a/onnxruntime/python/tools/transformers/models/llama/benchmark.py +++ b/onnxruntime/python/tools/transformers/models/llama/benchmark.py @@ -243,7 +243,7 @@ def get_model(args: argparse.Namespace): decoder_file_name=decoder_file_name, decoder_with_past_file_name=decoder_with_past_file_name, use_auth_token=args.auth, - use_io_binding=(args.device != "cpu"), + use_io_binding=True, # Large perf gain even for cpu due to avoiding output copy. use_merged=(True if decoder_file_name == "model.onnx" else None), provider=provider, provider_options=provider_options, diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py index 6c337af78e0a9..3879e25386d53 100755 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py @@ -315,13 +315,13 @@ def get_optimum_ort_pipeline( directory, provider=provider, session_options=None, - use_io_binding=False, + use_io_binding=False, # Not supported by Optimum version 1.17.1 at the time of verification. ) else: pipeline = ORTStableDiffusionPipeline.from_pretrained( directory, provider=provider, - use_io_binding=False, + use_io_binding=False, # Not supported by Optimum version 1.17.1 at the time of verification. ) elif "xl" in model_name: pipeline = ORTStableDiffusionXLPipeline.from_pretrained( @@ -329,7 +329,7 @@ def get_optimum_ort_pipeline( export=True, provider=provider, session_options=None, - use_io_binding=False, + use_io_binding=False, # Not supported by Optimum version 1.17.1 at the time of verification. ) pipeline.save_pretrained(directory) else: @@ -337,7 +337,7 @@ def get_optimum_ort_pipeline( model_name, export=True, provider=provider, - use_io_binding=False, + use_io_binding=False, # Not supported by Optimum version 1.17.1 at the time of verification. ) pipeline.save_pretrained(directory) diff --git a/onnxruntime/python/tools/transformers/models/whisper/benchmark.py b/onnxruntime/python/tools/transformers/models/whisper/benchmark.py index 11e596cadc2cb..3f7a292a02748 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/benchmark.py +++ b/onnxruntime/python/tools/transformers/models/whisper/benchmark.py @@ -145,10 +145,10 @@ def get_model(args: argparse.Namespace): start_time = time.time() model = ORTModelForSpeechSeq2Seq.from_pretrained( args.hf_ort_dir_path, - use_io_binding=(args.device != "cpu"), provider=provider, provider_options=provider_options, session_options=sess_options, + use_io_binding=True, # Avoid memory copy overhead ) end_time = time.time()