From 8a00784019a07a3755e11731274133cb62ef12d2 Mon Sep 17 00:00:00 2001 From: Hector Li Date: Mon, 18 Dec 2023 01:33:05 -0800 Subject: [PATCH] Remove QNN EP options: qnn_context_cache_enable, qnn_context_cache_path, qnn_context_embed_mode. Add session option accordingly. --- .../core/session/onnxruntime_c_api.h | 6 - .../onnxruntime_session_options_config_keys.h | 18 +++ .../core/framework/graph_partitioner.cc | 103 ++++++++++++------ .../core/framework/graph_partitioner.h | 3 + .../qnn/builder/onnx_ctx_model_helper.cc | 12 +- .../providers/qnn/qnn_execution_provider.cc | 30 ++--- onnxruntime/core/session/inference_session.cc | 14 ++- onnxruntime/test/onnx/main.cc | 20 +++- .../test/perftest/command_args_parser.cc | 2 - onnxruntime/test/perftest/ort_test_session.cc | 6 - 10 files changed, 135 insertions(+), 79 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index c41700453a73b..dbd5ad41255fa 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -3593,17 +3593,11 @@ struct OrtApi { * * QNN supported keys: * "backend_path": file path to QNN backend library. - * "qnn_context_cache_enable": 1 to enable QNN graph creation from cached QNN context file. If it's enabled: QNN EP will - * load from cached QNN context binary if it exist. It will generate a context binary file if it's not exist - * "qnn_context_cache_path": explicitly provide the QNN context cache file. Default to model_file.onnx.bin if not provided. * "profiling_level": QNN profiling level, options: "off", "basic", "detailed". Default to off. * "rpc_control_latency": QNN RPC control latency. * "vtcm_mb": QNN VTCM size in MB. default to 0(not set). * "htp_performance_mode": QNN performance mode, options: "burst", "balanced", "default", "high_performance", * "high_power_saver", "low_balanced", "low_power_saver", "power_saver", "sustained_high_performance". Default to "default". - * "qnn_context_embed_mode", 1 means dump the QNN context binary into node attribute EPContext->ep_cache_context in the ONNX skeleton model. - * 0 means dump the QNN context binary into separate bin file and set the path to EPContext->ep_cache_context. - * The path is relative path to the ONNX skeleton model file. * "qnn_saver_path": File path to the QNN Saver backend library. If specified, QNN Saver will be enabled and will * dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and * may alter model/EP partitioning. Use only for debugging. diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h index a94973b2cc5d7..c0f503ea02821 100644 --- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h +++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h @@ -235,3 +235,21 @@ static const char* const kOrtSessionOptionsOptimizedModelExternalInitializersFil // Use this config to control the minimum size of the initializer when externalizing it during serialization static const char* const kOrtSessionOptionsOptimizedModelExternalInitializersMinSizeInBytes = "session.optimized_model_external_initializers_min_size_in_bytes"; + +// Enable EP context feature to dump the partitioned graph which include the EP context into Onnx file. +// The dumped Onnx model with EP context can be used for future inference to avoid the EP graph partitioning/compile overhead. +// "0": disable. (default) +// "1": enable. +static const char* const kOrtSessionOptionEpContextEnable = "ep.ep_context_enable"; + +// Specify the file path for the Onnx model which has EP context. +// Default to original_file_name_ctx.onnx if not specified +static const char* const kOrtSessionOptionEpContextFilePath = "ep.ep_context_file_path"; + +// Flag to specify whether to dump the EP context into the Onnx model. +// "0": dump the EP context into separate file, keep the file name in the Onnx model. +// "1": dump the EP context into the Onnx model. (default). +static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.ep_context_embed_mode"; + +// Dump the model after graph partitioning to file "partitioned_graph.onnx". +static const char* const kDumpPartitionedGraph = "session.dump_partitioned_graph"; \ No newline at end of file diff --git a/onnxruntime/core/framework/graph_partitioner.cc b/onnxruntime/core/framework/graph_partitioner.cc index 55e3bf6e05a7c..f6f1a1e6aba93 100644 --- a/onnxruntime/core/framework/graph_partitioner.cc +++ b/onnxruntime/core/framework/graph_partitioner.cc @@ -16,6 +16,7 @@ #include "core/graph/function_utils.h" #include "core/graph/graph_viewer.h" #include "core/graph/model.h" +#include "core/session/onnxruntime_session_options_config_keys.h" // uncomment this line to count non-CUDA ops in ONNX domain // #define COUNT_NON_CUDA_OPS @@ -510,34 +511,6 @@ static Status PartitionOnnxFormatModelImpl(Graph& graph, FuncManager& func_mgr, ORT_RETURN_IF_ERROR(graph.Resolve()); } - const std::vector ep_context_nodes = current_ep.GetEpContextNodes(); - auto get_ep_context_node = [&ep_context_nodes](const std::string& node_name) -> std::pair { - for (auto& node : ep_context_nodes) { - if (node_name == node->Name()) { - return std::make_pair(true, node); - } - } - return std::make_pair(false, static_cast(nullptr)); - }; - - if (ep_context_nodes.size() > 0) { - Model ep_model(graph.Name(), false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), - graph.DomainToVersionMap(), {}, *current_ep.GetLogger()); - auto& ep_graph = ep_model.MainGraph(); - ep_graph.SetDescription(graph.Description()); - for (const auto& node : graph.Nodes()) { - // the fused node and EPContext node has same node name - auto ep_context_node = get_ep_context_node(node.Name()); - // Use EpContext node created by current EP if name matched, otherwise use original node - if (ep_context_node.first) { - ep_graph.AddNode(*ep_context_node.second); - } else { - ep_graph.AddNode(node); - } - } - ORT_RETURN_IF_ERROR(Model::Save(ep_model, "ep_partition.onnx")); - } - // For some cases, like fp16 on cpu, right now we don't have any kernel support that. // But we will insert cast op to run the model, so skip the error checking here. // If after graph transform phase, the node still not assigned, we will report error @@ -662,9 +635,68 @@ static Status InlineFunctionsAOTImpl(const ExecutionProviders& execution_provide return Status::OK(); } +static Status CreateEpContextModel(const ExecutionProviders& execution_providers, + const Graph& graph, + const std::string& ep_context_path, + const logging::Logger& logger) { + std::vector all_ep_context_nodes; + for (const auto& ep : execution_providers) { + const std::vector ep_context_nodes = ep->GetEpContextNodes(); + all_ep_context_nodes.insert(all_ep_context_nodes.begin(), ep_context_nodes.begin(), ep_context_nodes.end()); + } + + auto get_ep_context_node = [&all_ep_context_nodes](const std::string& node_name) -> std::pair { + for (auto& node : all_ep_context_nodes) { + if (node_name == node->Name()) { + return std::make_pair(true, node); + } + } + return std::make_pair(false, static_cast(nullptr)); + }; + + onnxruntime::PathString context_cache_path; + PathString model_pathstring = graph.ModelPath().ToPathString(); + if (all_ep_context_nodes.size() > 0) { + if (!ep_context_path.empty()) { + context_cache_path = ToPathString(ep_context_path); + } else if (!model_pathstring.empty()) { + context_cache_path = model_pathstring + ToPathString("_ctx.onnx"); + } + + bool file_exist = std::filesystem::is_regular_file(context_cache_path) && std::filesystem::exists(context_cache_path); + + if (file_exist) { + // User need to remove the existing file if want to re-generate it + LOGS(logger, INFO) << "Ep context file exist already."; + return Status::OK(); + } + + Model ep_context_model(graph.Name(), false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), + graph.DomainToVersionMap(), {}, logger); + auto& ep_graph = ep_context_model.MainGraph(); + ep_graph.SetDescription(graph.Description()); + for (const auto& node : graph.Nodes()) { + // the fused node and EPContext node has same node name + auto ep_context_node = get_ep_context_node(node.Name()); + // Use EpContext node created by the EPs if name matched, otherwise use node from original model + if (ep_context_node.first) { + ep_graph.AddNode(*ep_context_node.second); + } else { + ep_graph.AddNode(node); + } + } + ORT_RETURN_IF_ERROR(Model::Save(ep_context_model, context_cache_path)); + } + + return Status::OK(); +} + static Status PartitionOnnxFormatModel(const PartitionParams& partition_params, GraphPartitioner::Mode mode, const ExecutionProviders& execution_providers, - KernelRegistryManager& kernel_registry_manager) { + KernelRegistryManager& kernel_registry_manager, + bool ep_context_enabled, + std::string ep_context_path, + const logging::Logger& logger) { bool modified_graph = false; auto& graph = partition_params.graph.get(); @@ -682,6 +714,10 @@ static Status PartitionOnnxFormatModel(const PartitionParams& partition_params, partition_params.debug_graph_fn)); } + if (ep_context_enabled) { + ORT_RETURN_IF_ERROR(CreateEpContextModel(execution_providers, graph, ep_context_path, logger)); + } + // expand any nodes that have an ONNX function definition but no matching ORT kernel. modified_graph = false; ORT_RETURN_IF_ERROR(InlineNodes(graph, modified_graph)); @@ -868,6 +904,8 @@ Status GraphPartitioner::InlineFunctionsAOT(Model& model, Status GraphPartitioner::Partition(Graph& graph, FuncManager& func_mgr, const layout_transformation::TransformLayoutFunction& transform_layout_function, + const ConfigOptions& config_options, + const logging::Logger& logger, Mode mode, const layout_transformation::DebugGraphFn& debug_graph_fn) const { // It is a greedy partitioning algorithm per provider preferences user provided when calling ONNX RUNTIME right now. @@ -912,8 +950,11 @@ Status GraphPartitioner::Partition(Graph& graph, FuncManager& func_mgr, if (mode == Mode::kNormal || mode == Mode::kAssignOnly) { #if !defined(ORT_MINIMAL_BUILD) - ORT_RETURN_IF_ERROR(PartitionOnnxFormatModel(partition_params, mode, - providers_, kernel_registry_mgr_)); + bool ep_context_enabled = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") == "1"; + std::string ep_context_path = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, ""); + ORT_RETURN_IF_ERROR(PartitionOnnxFormatModel(partition_params, mode, providers_, + kernel_registry_mgr_, ep_context_enabled, + ep_context_path, logger)); #else return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "ONNX models are not supported in this build."); #endif //! defined(ORT_MINIMAL_BUILD) diff --git a/onnxruntime/core/framework/graph_partitioner.h b/onnxruntime/core/framework/graph_partitioner.h index 4fc85c2588260..d1ef193cf1520 100644 --- a/onnxruntime/core/framework/graph_partitioner.h +++ b/onnxruntime/core/framework/graph_partitioner.h @@ -13,6 +13,7 @@ namespace onnxruntime { class ExecutionProviders; class KernelRegistryManager; class Model; +struct ConfigOptions; class GraphPartitioner { public: @@ -31,6 +32,8 @@ class GraphPartitioner { // Run partitioning. Status Partition(Graph& graph, FuncManager& func_mgr, const layout_transformation::TransformLayoutFunction& transform_layout_function, + const ConfigOptions& config_options, + const logging::Logger& logger, Mode mode = Mode::kNormal, const layout_transformation::DebugGraphFn& debug_graph_fn = {}) const; diff --git a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc index a9075cb984734..08f862d2b4dcb 100644 --- a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc @@ -57,10 +57,10 @@ Status GetMainContextNode(const std::vector& fused_nodes_and_graphs, - const onnxruntime::PathString& ctx_onnx_model_path, - QnnBackendManager* qnn_backend_manager, - const logging::Logger& logger, - std::unordered_map>& qnn_models) { + const onnxruntime::PathString& ctx_onnx_model_path, + QnnBackendManager* qnn_backend_manager, + const logging::Logger& logger, + std::unordered_map>& qnn_models) { for (const auto& fused_node_and_graph : fused_nodes_and_graphs) { const Node& fused_node = fused_node_and_graph.fused_node; qnn_models.emplace(fused_node.Name(), @@ -204,7 +204,7 @@ bool IsContextCacheFileExists(const std::string& customer_context_cache_path, if (!customer_context_cache_path.empty()) { context_cache_path = ToPathString(customer_context_cache_path); } else if (!model_pathstring.empty()) { - context_cache_path = model_pathstring + ToPathString("_qnn_ctx.onnx"); + context_cache_path = model_pathstring + ToPathString("_ctx.onnx"); } return std::filesystem::is_regular_file(context_cache_path) && std::filesystem::exists(context_cache_path); @@ -305,7 +305,7 @@ Status GenerateCtxCacheOnnxModel(Model* model, nullptr, kMSDomain); - // Only dump the context buffer once since all QNN graph are in one single context + // Only dump the context buffer once since all QNN graphs are in one single context if (0 == index) { if (qnn_context_embed_mode) { std::string cache_payload(buffer, buffer + buffer_size); diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 30f3fddf59019..7acec57f51db5 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -114,29 +114,17 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio if (session_options) { disable_cpu_ep_fallback_ = session_options->config_options.GetConfigOrDefault( kOrtSessionOptionsDisableCPUEPFallback, "0") == "1"; - } - - static const std::string CONTEXT_CACHE_ENABLED = "qnn_context_cache_enable"; - auto context_cache_enabled_pos = provider_options_map.find(CONTEXT_CACHE_ENABLED); - if (context_cache_enabled_pos != provider_options_map.end()) { - if (context_cache_enabled_pos->second == "1") { - context_cache_enabled_ = true; - LOGS_DEFAULT(VERBOSE) << "Context cache enabled."; - } - } - static const std::string CONTEXT_CACHE_PATH = "qnn_context_cache_path"; - auto context_cache_path_pos = provider_options_map.find(CONTEXT_CACHE_PATH); - if (context_cache_path_pos != provider_options_map.end()) { - context_cache_path_cfg_ = context_cache_path_pos->second; - LOGS_DEFAULT(VERBOSE) << "User specified context cache path: " << context_cache_path_cfg_; - } + context_cache_enabled_ = session_options->config_options.GetConfigOrDefault( + kOrtSessionOptionEpContextEnable, "0") == "1"; + LOGS_DEFAULT(VERBOSE) << "Context cache enable: " << context_cache_enabled_; - static const std::string CONTEXT_CACHE_EMBED_MODE = "qnn_context_embed_mode"; - auto context_cache_embed_mode_pos = provider_options_map.find(CONTEXT_CACHE_EMBED_MODE); - if (context_cache_embed_mode_pos != provider_options_map.end()) { - qnn_context_embed_mode_ = context_cache_embed_mode_pos->second == "1"; + qnn_context_embed_mode_ = session_options->config_options.GetConfigOrDefault( + kOrtSessionOptionEpContextEmbedMode, "1") == "1"; LOGS_DEFAULT(VERBOSE) << "User specified context cache embed mode: " << qnn_context_embed_mode_; + + context_cache_path_cfg_ = session_options->config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, ""); + LOGS_DEFAULT(VERBOSE) << "User specified context cache path: " << context_cache_path_cfg_; } static const std::string BACKEND_PATH = "backend_path"; @@ -557,7 +545,7 @@ Status QNNExecutionProvider::Compile(const std::vector& fused bool is_qnn_ctx_model = qnn::IsFusedGraphHasCtxNode(fused_nodes_and_graphs); onnxruntime::PathString context_cache_path; - bool is_ctx_file_exist = false; + bool is_ctx_file_exist = false; if (!is_qnn_ctx_model) { const onnxruntime::GraphViewer& graph_viewer_0(fused_nodes_and_graphs[0].filtered_graph); is_ctx_file_exist = qnn::IsContextCacheFileExists(context_cache_path_cfg_, diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index fcc33a75ce9a0..fda67b3685bd9 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -1166,6 +1166,7 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool // Do partitioning based on execution providers' capabilities. ORT_RETURN_IF_ERROR_SESSIONID_(partitioner.Partition(graph, session_state_->GetMutableFuncMgr(), transform_layout_fn, + session_options_.config_options, *session_logger_, mode, debug_graph_fn)); // apply Level2 and higher transformers. @@ -1198,7 +1199,10 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool ORT_RETURN_IF_ERROR_SESSIONID_(apply_transformer_once(copy_transformer, *session_logger_, graph)); } - ORT_RETURN_IF_ERROR_SESSIONID_(Model::Save(*model_, "partitioned_graph.onnx")); + bool dump_partitioned_graph = session_options_.config_options.GetConfigOrDefault(kDumpPartitionedGraph, "0") == "1"; + if (dump_partitioned_graph) { + ORT_RETURN_IF_ERROR_SESSIONID_(Model::Save(*model_, "partitioned_graph.onnx")); + } #ifdef ENABLE_TRAINING // Enable memory optimizations (mainly insert recomputation nodes with priority). @@ -1462,7 +1466,9 @@ namespace { Status PartitionOrtFormatModel(onnxruntime::Graph& graph, const ExecutionProviders& providers, KernelRegistryManager& kernel_registry_manager, - SessionState& session_state) { + SessionState& session_state, + const ConfigOptions& config_options, + const logging::Logger& logger) { layout_transformation::TransformLayoutFunction transform_layout_fn = nullptr; #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) @@ -1483,6 +1489,8 @@ Status PartitionOrtFormatModel(onnxruntime::Graph& graph, ORT_RETURN_IF_ERROR(partitioner.Partition(graph, session_state.GetMutableFuncMgr(), transform_layout_fn, + config_options, + logger, GraphPartitioner::Mode::kOrtFormatLoad)); return Status::OK(); @@ -1836,7 +1844,7 @@ common::Status InferenceSession::Initialize() { #endif // !defined(ORT_MINIMAL_BUILD) } else { ORT_RETURN_IF_ERROR_SESSIONID_(PartitionOrtFormatModel(graph, execution_providers_, kernel_registry_manager_, - *session_state_)); + *session_state_, session_options_.config_options, *session_logger_)); #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) const auto& cpu_ep = *execution_providers_.Get(onnxruntime::kCpuExecutionProvider); diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 646ff7c95b229..cc0c9c69754fe 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -50,15 +50,12 @@ void usage() { "\t-a: Specify custom absolute tolerance values for output value comparison. default: 1e-5\n" "\t-i: Specify EP specific runtime options as key value pairs. Different runtime options available are: \n" "\t [QNN only] [backend_path]: QNN backend path. e.g '/folderpath/libQnnHtp.so', '/folderpath/libQnnCpu.so'.\n" - "\t [QNN only] [qnn_context_cache_enable]: 1 to enable cache QNN context. Default to false.\n" - "\t [QNN only] [qnn_context_cache_path]: File path to the qnn context cache. Default to model_file.onnx.bin if not set.\n" "\t [QNN only] [profiling_level]: QNN profiling level, options: 'basic', 'detailed', default 'off'.\n" "\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n" "\t [QNN only] [vtcm_mb]: QNN VTCM size in MB. default to 0(not set).\n" "\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n" "\t 'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n" "\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. Default to 'normal'. \n" - "\t [QNN only] [qnn_context_embed_mode]: 1 means dump the QNN context binary into the Onnx skeleton model.\n" "\t 0 means dump the QNN context binary into separate bin file and set the path in the Onnx skeleton model.\n" "\t [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g '/folderpath/libQnnSaver.so'.\n" "\t [QNN only] [htp_graph_finalization_optimization_mode]: QNN graph finalization optimization mode, options: \n" @@ -73,6 +70,8 @@ void usage() { "\t [Example] [For SNPE EP] -e snpe -i \"runtime|CPU priority|low\" \n\n" "\t-o [optimization level]: Default is 99. Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all).\n" "\t\tPlease see onnxruntime_c_api.h (enum GraphOptimizationLevel) for the full list of all optimization levels. " + "\t-f: Enable EP context cache generation.\n" + "\t-b: Disable EP context embed mode.\n" "\n" "\t-h: help\n" "\n" @@ -179,11 +178,13 @@ int real_main(int argc, char* argv[], Ort::Env& env) { OrtLoggingLevel logging_level = ORT_LOGGING_LEVEL_ERROR; bool verbose_logging_required = false; + bool ep_context_enable = false; + bool disable_ep_context_embed_mode = false; bool pause = false; { int ch; - while ((ch = getopt(argc, argv, ORT_TSTR("Ac:hj:Mn:r:e:t:a:xvo:d:i:pz"))) != -1) { + while ((ch = getopt(argc, argv, ORT_TSTR("Ac:hj:Mn:r:e:t:a:xvo:d:i:pzfb"))) != -1) { switch (ch) { case 'A': enable_cpu_mem_arena = false; @@ -312,6 +313,12 @@ int real_main(int argc, char* argv[], Ort::Env& env) { case 'z': set_denormal_as_zero = true; break; + case 'b': + disable_ep_context_embed_mode = true; + break; + case 'f': + ep_context_enable = true; + break; case '?': case 'h': default: @@ -386,6 +393,11 @@ int real_main(int argc, char* argv[], Ort::Env& env) { if (set_denormal_as_zero) sf.AddConfigEntry(kOrtSessionOptionsConfigSetDenormalAsZero, "1"); + if (ep_context_enable) + sf.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); + if (disable_ep_context_embed_mode) + sf.AddConfigEntry(kOrtSessionOptionEpContextEmbedMode, "0"); + if (enable_tensorrt) { #ifdef USE_TENSORRT OrtCUDAProviderOptions cuda_options; diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 27e26fe0b3c45..6e3252aaeb4b8 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -65,8 +65,6 @@ namespace perftest { "\t [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n" "\t [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n" "\t [QNN only] [backend_path]: QNN backend path. e.g '/folderpath/libQnnHtp.so', '/folderpath/libQnnCpu.so'.\n" - "\t [QNN only] [qnn_context_cache_enable]: 1 to enable cache QNN context. Default to false.\n" - "\t [QNN only] [qnn_context_cache_path]: File path to the qnn context cache. Default to model_file.onnx.bin if not set.\n" "\t [QNN only] [profiling_level]: QNN profiling level, options: 'basic', 'detailed', default 'off'.\n" "\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n" "\t [QNN only] [vtcm_mb]: QNN VTCM size in MB. default to 0(not set).\n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index eb2a77c07f803..abec16f787895 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -332,12 +332,6 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device if (value.empty()) { ORT_THROW("Please provide the QNN backend path."); } - } else if (key == "qnn_context_cache_enable") { - if (value != "1") { - ORT_THROW("Set to 1 to enable qnn_context_cache_enable."); - } - } else if (key == "qnn_context_cache_path") { - // no validation } else if (key == "profiling_level") { std::set supported_profiling_level = {"off", "basic", "detailed"}; if (supported_profiling_level.find(value) == supported_profiling_level.end()) {