From dfc0c04112298f31a991e8a44d4d493262f1e821 Mon Sep 17 00:00:00 2001 From: Hector Li Date: Mon, 6 Nov 2023 22:04:30 -0800 Subject: [PATCH 1/3] Enable option qnn_context_priority. QNN context priority, options: "low", "normal", "normal_high", "high". --- .../core/session/onnxruntime_c_api.h | 1 + .../qnn/builder/qnn_backend_manager.cc | 41 +++++++++++- .../qnn/builder/qnn_backend_manager.h | 4 +- .../core/providers/qnn/builder/qnn_def.h | 8 +++ .../providers/qnn/qnn_execution_provider.cc | 62 +++++++++++++------ .../providers/qnn/qnn_execution_provider.h | 3 +- onnxruntime/test/onnx/main.cc | 9 ++- .../test/perftest/command_args_parser.cc | 1 + onnxruntime/test/perftest/ort_test_session.cc | 7 ++- .../test/providers/qnn/qnn_basic_test.cc | 14 ++++- 10 files changed, 125 insertions(+), 25 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 729a302f3dd0f..71d8e26e86a96 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -3604,6 +3604,7 @@ struct OrtApi { * "qnn_saver_path": File path to the QNN Saver backend library. If specified, QNN Saver will be enabled and will * dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and * may alter model/EP partitioning. Use only for debugging. + * "qnn_context_priority": QNN context priority, options: "low", "normal", "normal_high", "high". * * SNPE supported keys: * "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16", diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index fa859ce81be98..2465ff0ccf7e3 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -380,15 +380,47 @@ Status QnnBackendManager::ReleaseProfilehandle() { return Status::OK(); } +void SetQnnContextConfig(ContextPriority context_priority, QnnContext_Config_t& qnn_context_config) { + qnn_context_config.option = QNN_CONTEXT_CONFIG_OPTION_PRIORITY; + switch (context_priority) { + case ContextPriority::LOW: { + qnn_context_config.priority = QNN_PRIORITY_LOW; + break; + } + case ContextPriority::NORMAL: { + qnn_context_config.priority = QNN_PRIORITY_NORMAL; + break; + } + case ContextPriority::NORMAL_HIGH: { + qnn_context_config.priority = QNN_PRIORITY_NORMAL_HIGH; + break; + } + case ContextPriority::HIGH: { + qnn_context_config.priority = QNN_PRIORITY_HIGH; + break; + } + case ContextPriority::UNDEFINED: { + qnn_context_config.priority = QNN_PRIORITY_UNDEFINED; + break; + } + default: + qnn_context_config.priority = QNN_PRIORITY_UNDEFINED; + } // switch +} + Status QnnBackendManager::CreateContext() { if (true == context_created_) { LOGS_DEFAULT(INFO) << "Context created already."; return Status::OK(); } + QnnContext_Config_t qnn_context_config = QNN_CONTEXT_CONFIG_INIT; + SetQnnContextConfig(context_priority_, qnn_context_config); + const QnnContext_Config_t* context_configs[] = {&qnn_context_config, nullptr}; + auto result = qnn_interface_.contextCreate(backend_handle_, device_handle_, - (const QnnContext_Config_t**)&context_config_, + context_configs, &context_); ORT_RETURN_IF(QNN_CONTEXT_NO_ERROR != result, "Failed to create context."); @@ -486,9 +518,14 @@ Status QnnBackendManager::LoadCachedQnnContextFromBuffer(char* buffer, uint64_t ORT_RETURN_IF(nullptr == qnn_interface_.contextCreateFromBinary, "Invalid function pointer for contextCreateFromBinary."); + + QnnContext_Config_t qnn_context_config = QNN_CONTEXT_CONFIG_INIT; + SetQnnContextConfig(context_priority_, qnn_context_config); + const QnnContext_Config_t* context_configs[] = {&qnn_context_config, nullptr}; + rt = qnn_interface_.contextCreateFromBinary(backend_handle_, device_handle_, - (const QnnContext_Config_t**)&context_config_, + context_configs, static_cast(buffer), buffer_length, &context_, diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h index 9cb6a322149b9..f194e43d94ad3 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h @@ -30,11 +30,13 @@ class QnnBackendManager { ProfilingLevel profiling_level, uint32_t rpc_control_latency, HtpPerformanceMode htp_performance_mode, + ContextPriority context_priority, std::string&& qnn_saver_path) : backend_path_(backend_path), profiling_level_(profiling_level), rpc_control_latency_(rpc_control_latency), htp_performance_mode_(htp_performance_mode), + context_priority_(context_priority), qnn_saver_path_(qnn_saver_path) { } ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(QnnBackendManager); @@ -184,7 +186,6 @@ class QnnBackendManager { Qnn_LogHandle_t log_handle_ = nullptr; Qnn_DeviceHandle_t device_handle_ = nullptr; Qnn_ContextHandle_t context_ = nullptr; - QnnContext_Config_t** context_config_ = nullptr; ProfilingLevel profiling_level_; bool backend_initialized_ = false; bool device_created_ = false; @@ -196,6 +197,7 @@ class QnnBackendManager { std::vector op_package_paths_; uint32_t rpc_control_latency_ = 0; HtpPerformanceMode htp_performance_mode_; + ContextPriority context_priority_; std::string sdk_build_version_ = ""; #ifdef _WIN32 std::set mod_handles_; diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h index 8649db92be027..fb3c556ab6507 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h @@ -48,6 +48,14 @@ enum class HtpPerformanceMode : uint8_t { kHtpBalanced, }; +enum class ContextPriority : uint8_t { + LOW = 0, + NORMAL, + NORMAL_HIGH, + HIGH, + UNDEFINED +}; + enum class QnnBackendType : uint8_t { CPU = 0, GPU, diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index d3aafcbecd322..e65fa1bc52a06 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -76,18 +76,37 @@ void QNNExecutionProvider::ParseHtpPerformanceMode(std::string htp_performance_m } } +void QNNExecutionProvider::ParseQnnContextPriority(std::string context_priority_string) { + std::transform(context_priority_string.begin(), + context_priority_string.end(), + context_priority_string.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + LOGS_DEFAULT(VERBOSE) << "QNN context priority: " << context_priority_string; + if (context_priority_string == "low") { + context_priority_ = qnn::ContextPriority::LOW; + } else if (context_priority_string == "normal") { + context_priority_ = qnn::ContextPriority::NORMAL; + } else if (context_priority_string == "normal_high") { + context_priority_ = qnn::ContextPriority::NORMAL_HIGH; + } else if (context_priority_string == "high") { + context_priority_ = qnn::ContextPriority::HIGH; + } else { + context_priority_ = qnn::ContextPriority::UNDEFINED; + LOGS_DEFAULT(WARNING) << "QNN context priority not valid, set to undefined."; + } +} + QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_options_map, const SessionOptions* session_options) - : IExecutionProvider{onnxruntime::kQnnExecutionProvider, true}, - runtime_options_(provider_options_map) { + : IExecutionProvider{onnxruntime::kQnnExecutionProvider, true} { if (session_options) { disable_cpu_ep_fallback_ = session_options->config_options.GetConfigOrDefault( kOrtSessionOptionsDisableCPUEPFallback, "0") == "1"; } static const std::string CONTEXT_CACHE_ENABLED = "qnn_context_cache_enable"; - auto context_cache_enabled_pos = runtime_options_.find(CONTEXT_CACHE_ENABLED); - if (context_cache_enabled_pos != runtime_options_.end()) { + auto context_cache_enabled_pos = provider_options_map.find(CONTEXT_CACHE_ENABLED); + if (context_cache_enabled_pos != provider_options_map.end()) { if (context_cache_enabled_pos->second == "1") { context_cache_enabled_ = true; LOGS_DEFAULT(VERBOSE) << "Context cache enabled."; @@ -95,25 +114,25 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio } static const std::string CONTEXT_CACHE_PATH = "qnn_context_cache_path"; - auto context_cache_path_pos = runtime_options_.find(CONTEXT_CACHE_PATH); - if (context_cache_path_pos != runtime_options_.end()) { + auto context_cache_path_pos = provider_options_map.find(CONTEXT_CACHE_PATH); + if (context_cache_path_pos != provider_options_map.end()) { context_cache_path_ = context_cache_path_pos->second; LOGS_DEFAULT(VERBOSE) << "User specified context cache path: " << context_cache_path_; } bool qnn_context_embed_mode = true; static const std::string CONTEXT_CACHE_EMBED_MODE = "qnn_context_embed_mode"; - auto context_cache_embed_mode_pos = runtime_options_.find(CONTEXT_CACHE_EMBED_MODE); - if (context_cache_embed_mode_pos != runtime_options_.end()) { + auto context_cache_embed_mode_pos = provider_options_map.find(CONTEXT_CACHE_EMBED_MODE); + if (context_cache_embed_mode_pos != provider_options_map.end()) { qnn_context_embed_mode = context_cache_embed_mode_pos->second == "1"; LOGS_DEFAULT(VERBOSE) << "User specified context cache embed mode: " << qnn_context_embed_mode; } static const std::string BACKEND_PATH = "backend_path"; - auto backend_path_pos = runtime_options_.find(BACKEND_PATH); + auto backend_path_pos = provider_options_map.find(BACKEND_PATH); std::string backend_path; - if (backend_path_pos != runtime_options_.end()) { + if (backend_path_pos != provider_options_map.end()) { backend_path = backend_path_pos->second; LOGS_DEFAULT(VERBOSE) << "Backend path: " << backend_path; } else { @@ -121,39 +140,46 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio } static const std::string PROFILING_LEVEL = "profiling_level"; - auto profiling_level_pos = runtime_options_.find(PROFILING_LEVEL); - if (profiling_level_pos != runtime_options_.end()) { + auto profiling_level_pos = provider_options_map.find(PROFILING_LEVEL); + if (profiling_level_pos != provider_options_map.end()) { ParseProfilingLevel(profiling_level_pos->second); } static const std::string RPC_CONTROL_LANTENCY = "rpc_control_latency"; - auto latency_pos = runtime_options_.find(RPC_CONTROL_LANTENCY); - if (latency_pos != runtime_options_.end()) { + auto latency_pos = provider_options_map.find(RPC_CONTROL_LANTENCY); + if (latency_pos != provider_options_map.end()) { rpc_control_latency_ = static_cast(std::stoul(latency_pos->second)); LOGS_DEFAULT(VERBOSE) << "rpc_control_latency: " << rpc_control_latency_; } htp_performance_mode_ = qnn::HtpPerformanceMode::kHtpDefault; static const std::string HTP_PERFORMANCE_MODE = "htp_performance_mode"; - auto htp_performance_mode_pos = runtime_options_.find(HTP_PERFORMANCE_MODE); - if (htp_performance_mode_pos != runtime_options_.end()) { + auto htp_performance_mode_pos = provider_options_map.find(HTP_PERFORMANCE_MODE); + if (htp_performance_mode_pos != provider_options_map.end()) { ParseHtpPerformanceMode(htp_performance_mode_pos->second); } // Enable use of QNN Saver if the user provides a path the QNN Saver backend library. static const std::string QNN_SAVER_PATH_KEY = "qnn_saver_path"; std::string qnn_saver_path; - auto qnn_saver_path_pos = runtime_options_.find(QNN_SAVER_PATH_KEY); - if (qnn_saver_path_pos != runtime_options_.end()) { + auto qnn_saver_path_pos = provider_options_map.find(QNN_SAVER_PATH_KEY); + if (qnn_saver_path_pos != provider_options_map.end()) { qnn_saver_path = qnn_saver_path_pos->second; LOGS_DEFAULT(VERBOSE) << "User specified QNN Saver path: " << qnn_saver_path; } + static const std::string QNN_CONTEXT_PRIORITY = "qnn_context_priority"; + auto qnn_context_priority_pos = provider_options_map.find(QNN_CONTEXT_PRIORITY); + if (qnn_context_priority_pos != provider_options_map.end()) { + ParseQnnContextPriority(qnn_context_priority_pos->second); + } + qnn_backend_manager_ = std::make_unique( std::move(backend_path), profiling_level_, rpc_control_latency_, htp_performance_mode_, + context_priority_, std::move(qnn_saver_path)); qnn_cache_model_handler_ = std::make_unique(qnn_context_embed_mode); } diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.h b/onnxruntime/core/providers/qnn/qnn_execution_provider.h index c63a60018aca8..a72370126b95c 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.h +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.h @@ -56,9 +56,9 @@ class QNNExecutionProvider : public IExecutionProvider { const logging::Logger& logger); void ParseHtpPerformanceMode(std::string htp_performance_mode_string); + void ParseQnnContextPriority(std::string context_priority_string); private: - ProviderOptions runtime_options_; qnn::ProfilingLevel profiling_level_ = qnn::ProfilingLevel::OFF; qnn::HtpPerformanceMode htp_performance_mode_ = qnn::HtpPerformanceMode::kHtpDefault; std::unique_ptr qnn_backend_manager_; @@ -68,6 +68,7 @@ class QNNExecutionProvider : public IExecutionProvider { std::string context_cache_path_ = ""; bool disable_cpu_ep_fallback_ = false; // True if CPU EP fallback has been disabled for this session. std::unique_ptr qnn_cache_model_handler_; + qnn::ContextPriority context_priority_ = qnn::ContextPriority::NORMAL; }; } // namespace onnxruntime diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 0526ccca5bb4e..721541c9a025f 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -56,6 +56,7 @@ void usage() { "\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n" "\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n" "\t 'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n" + "\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. \n" "\t [QNN only] [qnn_context_embed_mode]: 1 means dump the QNN context binary into the Onnx skeleton model.\n" "\t 0 means dump the QNN context binary into separate bin file and set the path in the Onnx skeleton model.\n" "\t [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g '/folderpath/libQnnSaver.so'.\n" @@ -486,11 +487,17 @@ int real_main(int argc, char* argv[], Ort::Env& env) { std::string str = str_stream.str(); ORT_THROW("Wrong value for htp_performance_mode. select from: " + str); } + } else if (key == "qnn_context_priority") { + std::set supported_qnn_context_priority = {"low", "normal", "normal_high", "high"}; + if (supported_qnn_context_priority.find(value) == supported_qnn_context_priority.end()) { + ORT_THROW("Supported qnn_context_priority: low, normal, normal_high, high"); + } } else if (key == "qnn_saver_path") { // no validation } else { ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable', -'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode'])"); +'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode', 'qnn_saver_path', + 'qnn_context_priority'])"); } qnn_options[key] = value; diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 6d075fec997b5..597b5b1adcaf6 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -71,6 +71,7 @@ namespace perftest { "\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n" "\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n" "\t 'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n" + "\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. \n" "\t [Usage]: -e -i '| |'\n\n" "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"\"\"\n" "\t [Example] [For QNN EP] -e qnn -i \"backend_path|/folderpath/libQnnCpu.so\" \n\n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index b7a111783fc94..6dccaae60ed54 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -356,9 +356,14 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device std::string str = str_stream.str(); ORT_THROW("Supported htp_performance_mode: " + str); } + } else if (key == "qnn_context_priority") { + std::set supported_qnn_context_priority = {"low", "normal", "normal_high", "high"}; + if (supported_qnn_context_priority.find(value) == supported_qnn_context_priority.end()) { + ORT_THROW("Supported qnn_context_priority: low, normal, normal_high, high"); + } } else { ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable', -'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode'])"); +'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode', 'qnn_context_priority'])"); } qnn_options[key] = value; diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc index 5f63813d8d84e..a8db5e770dfa5 100644 --- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc @@ -173,7 +173,8 @@ TEST(QnnEP, TestDisableCPUFallback_ConflictingConfig) { // The models passed to this function are subgraphs extracted from a larger model that exhibited // shape inferencing issues on QNN. Thus, the models are expected to have a specific input/output // types and shapes. -static void RunNHWCResizeModel(const ORTCHAR_T* ort_model_path, bool use_htp, bool enable_qnn_saver = false) { +static void RunNHWCResizeModel(const ORTCHAR_T* ort_model_path, bool use_htp, bool enable_qnn_saver = false, + const std::string& qnn_context_priority = "") { Ort::SessionOptions so; // Ensure all type/shape inference warnings result in errors! @@ -194,6 +195,9 @@ static void RunNHWCResizeModel(const ORTCHAR_T* ort_model_path, bool use_htp, bo } #endif + if (!qnn_context_priority.empty()) { + options["qnn_context_priority"] = qnn_context_priority; + } so.AppendExecutionProvider("QNN", options); Ort::Session session(*ort_env, ort_model_path, so); @@ -302,6 +306,14 @@ TEST_F(QnnHTPBackendTests, QnnSaver_OutputFiles) { EXPECT_TRUE(std::filesystem::exists(qnn_saver_output_dir / "params.bin")); } +// Test that models run with high QNN context priority. +TEST_F(QnnHTPBackendTests, QnnContextPriorityHigh) { + RunNHWCResizeModel(ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.quant.onnx", + true, // use_htp + false, // enable_qnn_saver + "high"); // qnn_context_priority +} + #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) #endif // !defined(ORT_MINIMAL_BUILD) From 85d06ba1a2760d5692a9ff0d3adfd6368711043c Mon Sep 17 00:00:00 2001 From: Hector Li Date: Wed, 8 Nov 2023 12:58:25 -0800 Subject: [PATCH 2/3] Default to normal. Report INVALID_ARGUMENT error if undefined priority is detected. --- .../onnxruntime/core/session/onnxruntime_c_api.h | 2 +- .../providers/qnn/builder/qnn_backend_manager.cc | 13 +++++++------ .../core/providers/qnn/qnn_execution_provider.cc | 2 +- onnxruntime/test/onnx/main.cc | 2 +- onnxruntime/test/perftest/command_args_parser.cc | 6 +++--- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 71d8e26e86a96..1ad76544ec646 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -3604,7 +3604,7 @@ struct OrtApi { * "qnn_saver_path": File path to the QNN Saver backend library. If specified, QNN Saver will be enabled and will * dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and * may alter model/EP partitioning. Use only for debugging. - * "qnn_context_priority": QNN context priority, options: "low", "normal", "normal_high", "high". + * "qnn_context_priority": QNN context priority, options: "low", "normal", "normal_high", "high". Default to "normal". * * SNPE supported keys: * "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16", diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index 2465ff0ccf7e3..d3df654f0b693 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -380,7 +380,7 @@ Status QnnBackendManager::ReleaseProfilehandle() { return Status::OK(); } -void SetQnnContextConfig(ContextPriority context_priority, QnnContext_Config_t& qnn_context_config) { +Status SetQnnContextConfig(ContextPriority context_priority, QnnContext_Config_t& qnn_context_config) { qnn_context_config.option = QNN_CONTEXT_CONFIG_OPTION_PRIORITY; switch (context_priority) { case ContextPriority::LOW: { @@ -400,12 +400,13 @@ void SetQnnContextConfig(ContextPriority context_priority, QnnContext_Config_t& break; } case ContextPriority::UNDEFINED: { - qnn_context_config.priority = QNN_PRIORITY_UNDEFINED; - break; + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Invalid Qnn context priority."); } default: - qnn_context_config.priority = QNN_PRIORITY_UNDEFINED; + qnn_context_config.priority = QNN_PRIORITY_NORMAL; } // switch + + return Status::OK(); } Status QnnBackendManager::CreateContext() { @@ -415,7 +416,7 @@ Status QnnBackendManager::CreateContext() { } QnnContext_Config_t qnn_context_config = QNN_CONTEXT_CONFIG_INIT; - SetQnnContextConfig(context_priority_, qnn_context_config); + ORT_RETURN_IF_ERROR(SetQnnContextConfig(context_priority_, qnn_context_config)); const QnnContext_Config_t* context_configs[] = {&qnn_context_config, nullptr}; auto result = qnn_interface_.contextCreate(backend_handle_, @@ -520,7 +521,7 @@ Status QnnBackendManager::LoadCachedQnnContextFromBuffer(char* buffer, uint64_t "Invalid function pointer for contextCreateFromBinary."); QnnContext_Config_t qnn_context_config = QNN_CONTEXT_CONFIG_INIT; - SetQnnContextConfig(context_priority_, qnn_context_config); + ORT_RETURN_IF_ERROR(SetQnnContextConfig(context_priority_, qnn_context_config)); const QnnContext_Config_t* context_configs[] = {&qnn_context_config, nullptr}; rt = qnn_interface_.contextCreateFromBinary(backend_handle_, diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index e65fa1bc52a06..4a5e58969cc67 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -92,7 +92,7 @@ void QNNExecutionProvider::ParseQnnContextPriority(std::string context_priority_ context_priority_ = qnn::ContextPriority::HIGH; } else { context_priority_ = qnn::ContextPriority::UNDEFINED; - LOGS_DEFAULT(WARNING) << "QNN context priority not valid, set to undefined."; + LOGS_DEFAULT(WARNING) << "QNN context priority: " << context_priority_string << " not valid, set to undefined."; } } diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 721541c9a025f..271913ec77c42 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -56,7 +56,7 @@ void usage() { "\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n" "\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n" "\t 'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n" - "\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. \n" + "\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. Default to 'normal'. \n" "\t [QNN only] [qnn_context_embed_mode]: 1 means dump the QNN context binary into the Onnx skeleton model.\n" "\t 0 means dump the QNN context binary into separate bin file and set the path in the Onnx skeleton model.\n" "\t [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g '/folderpath/libQnnSaver.so'.\n" diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 597b5b1adcaf6..9c7e9ab922343 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -34,8 +34,8 @@ namespace perftest { "\t-A: Disable memory arena\n" "\t-I: Generate tensor input binding (Free dimensions are treated as 1.)\n" "\t-c [parallel runs]: Specifies the (max) number of runs to invoke simultaneously. Default:1.\n" - "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|snpe|rocm|migraphx|xnnpack|vitisai]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " - "'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'snpe', 'rocm', 'migraphx', 'xnnpack' or 'vitisai'. " + "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " + "'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack' or 'vitisai'. " "Default:'cpu'.\n" "\t-b [tf|ort]: backend to use. Default:ort\n" "\t-r [repeated_times]: Specifies the repeated times if running in 'times' test mode.Default:1000.\n" @@ -71,7 +71,7 @@ namespace perftest { "\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n" "\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n" "\t 'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n" - "\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. \n" + "\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. Default to 'normal'. \n" "\t [Usage]: -e -i '| |'\n\n" "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"\"\"\n" "\t [Example] [For QNN EP] -e qnn -i \"backend_path|/folderpath/libQnnCpu.so\" \n\n" From 377b1c0ee9c62e2a5ef554ef8d55fd876d214859 Mon Sep 17 00:00:00 2001 From: Hector Li Date: Wed, 8 Nov 2023 16:09:37 -0800 Subject: [PATCH 3/3] add comments for parameters --- onnxruntime/test/providers/qnn/qnn_basic_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc index b891653cff3ed..2e2acb36e8071 100644 --- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc @@ -331,8 +331,8 @@ TEST_F(QnnHTPBackendTests, HTPGraphFinalizationOptimizationModes) { TEST_F(QnnHTPBackendTests, QnnContextPriorityHigh) { RunNHWCResizeModel(ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.quant.onnx", true, // use_htp - false, // - "", + false, // enable_qnn_saver + "", // htp_graph_finalization_opt_mode "high"); // qnn_context_priority }