diff --git a/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.cc b/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.cc index ea80e01a7adda..927d3a58e5a6f 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.cc +++ b/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.cc @@ -1095,8 +1095,7 @@ Status UpdateDecoderCrossQK( [[maybe_unused]] float* cross_qk_buffer_data, [[maybe_unused]] int max_length, [[maybe_unused]] AllocatorPtr allocator) { - throw std::runtime_error("CPU beam search current not support output cross QK."); - return Status::OK(); + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "CPU beam search current not support output cross QK."); } Status FinalizeDecoderCrossQK( @@ -1114,8 +1113,7 @@ Status FinalizeDecoderCrossQK( [[maybe_unused]] int num_return_sequences, [[maybe_unused]] const int* cache_indir_data, [[maybe_unused]] gsl::span beam_indices) { - throw std::runtime_error("CPU beam search current not support output cross QK."); - return Status::OK(); + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "CPU beam search current not support output cross QK."); } } // namespace GenerationCpuDeviceHelper diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py index 193039f71537a..4609aeadcbedf 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py @@ -2,15 +2,14 @@ import os import onnx -from onnx import TensorProto, helper -from transformers import WhisperConfig - from benchmark_helper import Precision from convert_generation import ( get_shared_initializers, update_decoder_subgraph_output_cross_attention, update_decoder_subgraph_share_buffer_and_use_decoder_masked_mha, ) +from onnx import TensorProto, helper +from transformers import WhisperConfig logger = logging.getLogger(__name__)