From 2bf03ae12983460fa95da896213302811b05da8e Mon Sep 17 00:00:00 2001 From: Hector Li Date: Tue, 10 Dec 2024 14:42:44 -0800 Subject: [PATCH] change the default value for session option ep.context_embed_mode to 0 since it avoid the model loading memory overhead --- .../core/session/onnxruntime_session_options_config_keys.h | 4 ++-- onnxruntime/core/providers/openvino/contexts.h | 2 +- .../core/providers/openvino/openvino_execution_provider.h | 2 +- .../core/providers/openvino/openvino_provider_factory.cc | 2 +- onnxruntime/core/providers/qnn/qnn_execution_provider.cc | 2 +- .../core/providers/vitisai/vitisai_execution_provider.h | 2 +- onnxruntime/core/session/provider_bridge_ort.cc | 2 +- onnxruntime/test/onnx/main.cc | 5 ++++- 8 files changed, 12 insertions(+), 9 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h index 6a01602e634f8..8f1bc98ce7b49 100644 --- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h +++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h @@ -261,8 +261,8 @@ static const char* const kOrtSessionOptionEpContextEnable = "ep.context_enable"; static const char* const kOrtSessionOptionEpContextFilePath = "ep.context_file_path"; // Flag to specify whether to dump the EP context into the Onnx model. -// "0": dump the EP context into separate file, keep the file name in the Onnx model. -// "1": dump the EP context into the Onnx model. (default). +// "0": dump the EP context into separate file, keep the file name in the Onnx model. (default). +// "1": dump the EP context into the Onnx model. static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode"; // Specify the EPContext node name prefix to make it unique diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index a2f4b236213cc..4f970bc7bc287 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -18,7 +18,7 @@ struct GlobalContext { bool is_wholly_supported_graph = false; bool enable_opencl_throttling = false; bool disable_dynamic_shapes = false; - bool ep_context_embed_mode = true; + bool ep_context_embed_mode = false; bool export_ep_ctx_blob = false; bool enable_qdq_optimizer = false; bool disable_cpu_fallback = false; diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h index bea9badea475a..59dbd141f4782 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h @@ -90,7 +90,7 @@ struct OpenVINOExecutionProviderInfo { bool export_ep_ctx_blob_{false}; bool enable_qdq_optimizer_{false}; bool disable_cpu_fallback_{false}; - bool so_epctx_embed_mode_{true}; + bool so_epctx_embed_mode_{false}; OpenVINOExecutionProviderInfo() = delete; diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 57c4e92685c96..66f9bcb7b2a5e 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -53,7 +53,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory { std::unique_ptr OpenVINOProviderFactory::CreateProvider() { bool so_disable_cpu_fallback = config_options_.GetConfigOrDefault(kOrtSessionOptionsDisableCPUEPFallback, "0") == "1"; bool so_export_ep_ctx_blob = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") == "1"; - bool so_epctx_embed_mode = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "1") == "1"; + bool so_epctx_embed_mode = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "0") == "1"; std::string so_cache_path = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "").c_str(); if (so_export_ep_ctx_blob && !so_cache_path.empty()) { diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 060bbd4f79bf2..27e195dea73d2 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -204,7 +204,7 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio LOGS_DEFAULT(VERBOSE) << "Context cache enable: " << context_cache_enabled_; std::string embed_mode = session_options->config_options.GetConfigOrDefault( - kOrtSessionOptionEpContextEmbedMode, "1"); + kOrtSessionOptionEpContextEmbedMode, "0"); if ("1" == embed_mode) { qnn_context_embed_mode_ = true; } else if ("0" == embed_mode) { diff --git a/onnxruntime/core/providers/vitisai/vitisai_execution_provider.h b/onnxruntime/core/providers/vitisai/vitisai_execution_provider.h index 9864a40bd1d3b..77dede6035b4c 100644 --- a/onnxruntime/core/providers/vitisai/vitisai_execution_provider.h +++ b/onnxruntime/core/providers/vitisai/vitisai_execution_provider.h @@ -52,7 +52,7 @@ class VitisAIExecutionProvider : public IExecutionProvider { std::shared_ptr registry_; // EP context related. bool ep_ctx_enabled_ = false; - bool ep_ctx_embed_mode_ = true; + bool ep_ctx_embed_mode_ = false; std::string ep_ctx_model_path_cfg_{""}; mutable PathString ep_ctx_model_file_loc_{}; // It might need to be called before loading diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index c3832498af584..e0c479dbc7637 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -2242,7 +2242,7 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2, new_tensorrt_options.trt_ep_context_file_path = (context_cache_path.size() == 0) ? nullptr : context_cache_path.c_str(); LOGS_DEFAULT(VERBOSE) << "User specified context cache path: " << context_cache_path; - embed_mode = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "1"); + embed_mode = (options->value).config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "0"); if ("1" == embed_mode) { new_tensorrt_options.trt_ep_context_embed_mode = 1; } else if ("0" == embed_mode) { diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index ddc453f84feb6..99c3e44e13013 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -454,8 +454,11 @@ int real_main(int argc, char* argv[], Ort::Env& env) { if (ep_context_enable) sf.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); - if (disable_ep_context_embed_mode) + if (disable_ep_context_embed_mode) { sf.AddConfigEntry(kOrtSessionOptionEpContextEmbedMode, "0"); + } else { + sf.AddConfigEntry(kOrtSessionOptionEpContextEmbedMode, "1"); + } for (auto& it : session_config_entries) { sf.AddConfigEntry(it.first.c_str(), it.second.c_str());