diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h index 17ae649e6f174..209fd4279cc99 100644 --- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h +++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h @@ -265,6 +265,10 @@ static const char* const kOrtSessionOptionEpContextFilePath = "ep.context_file_p // "1": dump the EP context into the Onnx model. (default). static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode"; +// Specify the EPContext node name prefix to make it unique +// in case user need to merge/connect multiple EPContext nodes in one model +static const char* const kOrtSessionOptionEpContextNodeNamePrefix = "ep.context_node_name_prefix"; + // Gemm fastmath mode provides fp32 gemm acceleration with bfloat16 based matmul. // Option values: // - "0": Gemm FastMath mode is not enabled. [DEFAULT] diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 9e094a2876aeb..a76f9bef5ef3f 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -199,6 +199,13 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio context_cache_path_cfg_ = session_options->config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, ""); LOGS_DEFAULT(VERBOSE) << "User specified context cache path: " << context_cache_path_cfg_; + + // For the case that workaround QNN context PD memory limit, user need split the model into pieces and + // generate the QNN context model separately. + // It could happen that the generated EPContext node in separate graph has same node name. + // User can set this context_node_name_prefix for each split pieces to avoid that happens. + context_node_name_prefix_ = session_options->config_options.GetConfigOrDefault(kOrtSessionOptionEpContextNodeNamePrefix, ""); + LOGS_DEFAULT(VERBOSE) << "User specified QNN context node name prefix: " << context_node_name_prefix_; } static const std::string BACKEND_PATH = "backend_path"; @@ -378,17 +385,6 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio LOGS_DEFAULT(VERBOSE) << "User specified enable_htp_fp16_precision: " << enable_HTP_FP16_precision_; } - // For the case that workaround QNN context PD memory limit, user need split the model into pieces and - // generate the QNN context model separately. - // It could happen that the generated EPContext node in separate graph has same node name. - // User can set this context_node_name_prefix for each split pieces to avoid that happens. - static const std::string QNN_CONTEXT_NODE_NAME_PREFIX = "context_node_name_prefix"; - auto context_node_name_prefix_pos = provider_options_map.find(QNN_CONTEXT_NODE_NAME_PREFIX); - if (context_node_name_prefix_pos != provider_options_map.end()) { - context_node_name_prefix_ = context_node_name_prefix_pos->second; - LOGS_DEFAULT(VERBOSE) << "User specified QNN context node name prefix: " << context_node_name_prefix_; - } - qnn_backend_manager_ = std::make_unique( std::move(backend_path), profiling_level_etw, diff --git a/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc b/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc index 45cbb43d623ed..941e619363c2c 100644 --- a/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc @@ -287,7 +287,6 @@ TEST_F(QnnHTPBackendTests, QnnContextGenerationNodeNamePrefix) { provider_options["backend_path"] = "libQnnHtp.so"; #endif std::string node_name_prefix = "node_name_prefix_test"; - provider_options["context_node_name_prefix"] = node_name_prefix; // Add kMSDomain to cover contrib op like Gelu const std::unordered_map domain_to_version = {{"", 13}, {kMSDomain, 1}}; @@ -299,6 +298,7 @@ TEST_F(QnnHTPBackendTests, QnnContextGenerationNodeNamePrefix) { Ort::SessionOptions so; so.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); so.AddConfigEntry(kOrtSessionOptionEpContextFilePath, context_binary_file.c_str()); + so.AddConfigEntry(kOrtSessionOptionEpContextNodeNamePrefix, node_name_prefix.c_str()); so.AppendExecutionProvider("QNN", provider_options); Ort::Session session(*ort_env, ORT_TSTR("testdata/qnn_ctx_2_inputs_order_test.onnx"), so);