diff --git a/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.cc b/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.cc
index ea80e01a7adda..927d3a58e5a6f 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.cc
+++ b/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.cc
@@ -1095,8 +1095,7 @@ Status UpdateDecoderCrossQK(
     [[maybe_unused]] float* cross_qk_buffer_data,
     [[maybe_unused]] int max_length,
     [[maybe_unused]] AllocatorPtr allocator) {
-  throw std::runtime_error("CPU beam search current not support output cross QK.");
-  return Status::OK();
+  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "CPU beam search current not support output cross QK.");
 }
 
 Status FinalizeDecoderCrossQK(
@@ -1114,8 +1113,7 @@ Status FinalizeDecoderCrossQK(
     [[maybe_unused]] int num_return_sequences,
     [[maybe_unused]] const int* cache_indir_data,
     [[maybe_unused]] gsl::span<const int32_t> beam_indices) {
-  throw std::runtime_error("CPU beam search current not support output cross QK.");
-  return Status::OK();
+  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "CPU beam search current not support output cross QK.");
 }
 
 }  // namespace GenerationCpuDeviceHelper
diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py
index 193039f71537a..4609aeadcbedf 100644
--- a/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py
+++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py
@@ -2,15 +2,14 @@
 import os
 
 import onnx
-from onnx import TensorProto, helper
-from transformers import WhisperConfig
-
 from benchmark_helper import Precision
 from convert_generation import (
     get_shared_initializers,
     update_decoder_subgraph_output_cross_attention,
     update_decoder_subgraph_share_buffer_and_use_decoder_masked_mha,
 )
+from onnx import TensorProto, helper
+from transformers import WhisperConfig
 
 logger = logging.getLogger(__name__)