From d035fb42b46cbcc26eea6f802c56e76e33f71093 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sat, 7 Dec 2024 00:09:44 -0800 Subject: [PATCH 01/64] Copy shared utils into qnn ep --- cmake/onnxruntime_providers_qnn.cmake | 8 - cmake/onnxruntime_unittests.cmake | 3 +- .../qnn/builder/onnx_ctx_model_helper.cc | 7 +- .../qnn/builder/onnx_ctx_model_helper.h | 1 - .../core/providers/qnn/builder/op_builder.h | 1 - .../opbuilder/argmax_min_op_builder.cc | 4 +- .../qnn/builder/opbuilder/base_op_builder.cc | 3 +- .../qnn/builder/opbuilder/base_op_builder.h | 4 +- .../opbuilder/batch_norm_op_builder.cc | 3 +- .../qnn/builder/opbuilder/clip_op_builder.cc | 1 - .../qnn/builder/opbuilder/conv_op_builder.cc | 5 +- .../builder/opbuilder/expand_op_builder.cc | 1 - .../builder/opbuilder/gather_op_builder.cc | 3 +- .../qnn/builder/opbuilder/gemm_op_builder.cc | 5 +- .../opbuilder/instance_norm_op_builder.cc | 5 +- .../opbuilder/layer_norm_op_builder.cc | 3 +- .../qnn/builder/opbuilder/lrn_op_builder.cc | 6 +- .../qnn/builder/opbuilder/pad_op_builder.cc | 4 +- .../qnn/builder/opbuilder/pool_op_builder.cc | 9 +- .../builder/opbuilder/reduce_op_builder.cc | 5 +- .../builder/opbuilder/reshape_op_builder.cc | 3 +- .../builder/opbuilder/resize_op_builder.cc | 6 +- .../builder/opbuilder/simple_op_builder.cc | 15 +- .../qnn/builder/opbuilder/slice_op_builder.cc | 3 +- .../builder/opbuilder/softmax_op_builder.cc | 1 - .../qnn/builder/opbuilder/split_op_builder.cc | 4 +- .../qnn/builder/opbuilder/tile_op_builder.cc | 2 - .../providers/qnn/builder/opbuilder/topk.cc | 3 +- .../builder/opbuilder/transpose_op_builder.cc | 2 +- .../core/providers/qnn/builder/qnn_model.cc | 1 - .../qnn/builder/qnn_model_wrapper.cc | 1 - .../providers/qnn/builder/qnn_model_wrapper.h | 1 - .../qnn_node_group/conv_activation_fusion.cc | 5 +- .../qnn/builder/qnn_node_group/dq_q_fusion.cc | 1 - .../qnn_node_group/hardsigmoid_mul_fusion.cc | 3 +- .../core/providers/qnn/builder/qnn_utils.cc | 243 ++++++++++++++++++ .../core/providers/qnn/builder/qnn_utils.h | 52 ++++ .../providers/qnn/qnn_execution_provider.cc | 8 +- .../test/providers/qnn/qnn_ep_context_test.cc | 29 ++- onnxruntime/test/qnn_ctx_gen/main.cc | 29 ++- 40 files changed, 391 insertions(+), 102 deletions(-) diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake index b68d84c23bb32..52ccdbf7c9ecc 100644 --- a/cmake/onnxruntime_providers_qnn.cmake +++ b/cmake/onnxruntime_providers_qnn.cmake @@ -3,13 +3,6 @@ add_compile_definitions(USE_QNN=1) - # These are shared utils, - # TODO, move to a separate lib when used by EPs other than QNN, NNAPI and CoreML - file(GLOB onnxruntime_providers_shared_utils_cc_srcs CONFIGURE_DEPENDS - "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h" - "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc" - ) - file(GLOB_RECURSE onnxruntime_providers_qnn_ep_cc_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/providers/qnn/*.h" @@ -23,7 +16,6 @@ ) set(onnxruntime_providers_qnn_cc_srcs - ${onnxruntime_providers_shared_utils_cc_srcs} ${onnxruntime_providers_qnn_ep_cc_srcs} ${onnxruntime_providers_qnn_builder_cc_srcs} ) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index e822f0a3655fc..306096db128a7 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1283,7 +1283,8 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) file(GLOB onnxruntime_qnn_ctx_gen_src CONFIGURE_DEPENDS ${onnxruntime_qnn_ctx_gen_src_patterns} - ) + ) + onnxruntime_add_executable(onnxruntime_qnn_ctx_gen ${onnxruntime_qnn_ctx_gen_src}) target_include_directories(onnxruntime_qnn_ctx_gen PRIVATE ${onnx_test_runner_src_dir} ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} diff --git a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc index 57ae8c354abb7..d017d9503b8cc 100644 --- a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc @@ -3,6 +3,7 @@ #include "core/providers/qnn/builder/onnx_ctx_model_helper.h" #include "core/graph/constants.h" +#include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model.h" #include @@ -17,7 +18,7 @@ bool GraphHasEpContextNode(const onnxruntime::GraphViewer& graph_viewer) { // and the source is QNN or QNNExecutionProvider. for (const auto& node : graph_viewer.Nodes()) { if (EPCONTEXT_OP == node.OpType()) { - NodeAttrHelper node_helper(node); + utils::NodeAttrHelper node_helper(node); std::string cache_source = node_helper.Get(SOURCE, ""); std::transform(cache_source.begin(), @@ -53,7 +54,7 @@ Status GetMainContextNode(const std::vectorOpType(), "Should only filter in the EPContext node."); - NodeAttrHelper node_helper(*ep_context_node); + utils::NodeAttrHelper node_helper(*ep_context_node); int64_t is_main_context = node_helper.Get(MAIN_CONTEXT, static_cast(0)); if (1 == is_main_context) { main_context_pos.push_back(static_cast(i)); @@ -89,7 +90,7 @@ Status GetEpContextFromMainNode(const onnxruntime::Node& main_context_node, QnnBackendManager* qnn_backend_manager, QnnModelLookupTable& qnn_models) { ORT_RETURN_IF_NOT(EPCONTEXT_OP == main_context_node.OpType(), "Should only filter in the EPContext node."); - NodeAttrHelper node_helper(main_context_node); + utils::NodeAttrHelper node_helper(main_context_node); bool is_embed_mode = node_helper.Get(EMBED_MODE, true); if (is_embed_mode) { const std::string& context_binary = node_helper.Get(EP_CACHE_CONTEXT, ""); diff --git a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h index f308a7456d46c..d6c65c2725211 100644 --- a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h @@ -9,7 +9,6 @@ #include "qnn_def.h" #include "core/common/logging/logging.h" #include "core/graph/graph_viewer.h" -#include "core/providers/shared/utils/utils.h" #include "core/graph/model.h" #include "core/framework/execution_provider.h" diff --git a/onnxruntime/core/providers/qnn/builder/op_builder.h b/onnxruntime/core/providers/qnn/builder/op_builder.h index 05398c3f22ea2..b729503320f05 100644 --- a/onnxruntime/core/providers/qnn/builder/op_builder.h +++ b/onnxruntime/core/providers/qnn/builder/op_builder.h @@ -5,7 +5,6 @@ #include "core/graph/graph_viewer.h" #include "core/framework/node_unit.h" -#include "core/providers/shared/utils/utils.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/argmax_min_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/argmax_min_op_builder.cc index c685fa065e2ba..192c9496f0999 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/argmax_min_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/argmax_min_op_builder.cc @@ -2,8 +2,8 @@ // Licensed under the MIT License. #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/framework/tensorprotoutils.h" +#include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" @@ -57,7 +57,7 @@ Status ArgMaxMinOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_mode param_tensor_names.push_back(axis_param.GetParamTensorName()); qnn_model_wrapper.AddParamWrapper(std::move(axis_param)); - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); auto select_last_index = node_helper.Get("select_last_index", static_cast(0)); if (select_last_index != 0) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN ArgMax/ArgMin only support select_last_index=0."); diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc index ed70111087e19..af070fc01a279 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc @@ -6,7 +6,6 @@ #include -#include "core/providers/shared/utils/utils.h" #include "core/framework/tensorprotoutils.h" #include "core/providers/cpu/tensor/transpose.h" #include "core/common/safeint.h" @@ -311,7 +310,7 @@ Status BaseOpBuilder::ProcessAxisAttribute(const QnnModelWrapper& qnn_model_wrap ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[0].node_arg, input_shape), "Cannot get shape"); auto rank = static_cast(input_shape.size()); - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); int32_t onnx_axis = node_helper.Get("axis", default_axis_value); if (onnx_axis < 0) { onnx_axis += rank; diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h index 055c0f6ccf2fa..20d3bac5964b7 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h @@ -3,7 +3,7 @@ #pragma once -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder.h" #include "core/providers/qnn/builder/qnn_quant_params_wrapper.h" @@ -352,7 +352,7 @@ struct OnnxAttrInfo { }; template -inline ValType GetOnnxAttr(const NodeAttrHelper& node_helper, const OnnxAttrInfo& attr_info) { +inline ValType GetOnnxAttr(const qnn::utils::NodeAttrHelper& node_helper, const OnnxAttrInfo& attr_info) { return node_helper.Get(attr_info.name, attr_info.default_val); } diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc index 07abcf1c7bf84..9c7f1d374e5b7 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc @@ -6,7 +6,6 @@ #include #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/framework/float16.h" #include "core/framework/tensorprotoutils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" @@ -546,7 +545,7 @@ Status BatchNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, std::vector scale_double_tensor; std::vector bias_double_tensor; - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); const float epsilon = node_helper.Get("epsilon", 1e-05f); // Default is 1e-05 according to ONNX spec. double scale_rmax = std::numeric_limits::min(); diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc index e5dc4d04afefd..aa6080eb1195d 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc @@ -5,7 +5,6 @@ #include #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc index 12887f0fb72d6..2aeb8a47000c2 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc @@ -2,7 +2,6 @@ // Licensed under the MIT License. #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/framework/tensorprotoutils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" @@ -97,7 +96,7 @@ Status ConvOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, "QNN EP: Data type ", input_data_type->c_str(), " is not supported for Conv operator in CPU backend."); - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); auto auto_pad = node_helper.Get("auto_pad", std::string("NOTSET")); ORT_RETURN_IF(auto_pad != "NOTSET" && auto_pad != "SAME_LOWER" && auto_pad != "SAME_UPPER", "QNN Conv operators do not support 'auto_pad' value: ", auto_pad.c_str()); @@ -539,7 +538,7 @@ Status ConvOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra OnnxConvType conv_type = {}; ORT_RETURN_IF_ERROR(GetOnnxConvType(node_unit.OpType(), conv_type)); - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); std::vector param_tensor_names; const auto& input_0 = node_unit.Inputs()[0]; diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc index 64f676aaa9875..20978f41b529b 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc @@ -2,7 +2,6 @@ // Licensed under the MIT License. #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc index 3737fcb54f4cf..df02d12bd59c9 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc @@ -3,7 +3,6 @@ #include #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" @@ -100,7 +99,7 @@ static Status GetInpu0AxisDimValue(const QnnModelWrapper& qnn_model_wrapper, "Cannot get shape for ", node_unit.OpType(), " input[0] ", input0.node_arg.Name()); int64_t rank = static_cast(input0_shape.size()); - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); int64_t onnx_axis = node_helper.Get("axis", default_axis_value); if (onnx_axis < 0) { onnx_axis += rank; diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/gemm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/gemm_op_builder.cc index eeee26c177281..20f2f4383044c 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/gemm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/gemm_op_builder.cc @@ -2,7 +2,6 @@ // Licensed under the MIT License. #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" @@ -36,7 +35,7 @@ class GemmOpBuilder : public BaseOpBuilder { }; Status GemmOpBuilder::ExplictOpCheck(const NodeUnit& node_unit) const { - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); auto alpha = node_helper.Get("alpha", (float)1.0); if (alpha != 1.0) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN FullyConnected Op only support alpha=1.0."); @@ -79,7 +78,7 @@ Status GemmOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, // for Input A, B, C: 1 -- need transpose, 0 -- not needed std::vector input_trans_flag(3, 0); - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); input_trans_flag.at(0) = node_helper.Get("transA", (int64_t)0); auto transB = node_helper.Get("transB", (int64_t)0); // QNN input_1 [m, n] vs Onnx [n, m] diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc index 4b8d079c0062a..53bc93e2fa832 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc @@ -2,7 +2,6 @@ // Licensed under the MIT License. #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/framework/tensorprotoutils.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" @@ -74,7 +73,7 @@ Status InstanceNormOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN InstanceNorm input 2 (bias) must have 1D shape [channel]."); } - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); const float epsilon = node_helper.Get("epsilon", 1e-05f); // Default is 1e-05 according to ONNX spec. if (epsilon <= 0.0f) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN InstanceNorm epsilon must be greater than 0.0"); @@ -160,7 +159,7 @@ Status InstanceNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_m std::vector&& input_names, const logging::Logger& logger, bool do_op_validation) const { - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); std::vector param_tensor_names; const float epsilon = node_helper.Get("epsilon", 1e-05f); // Default is 1e-05 according to ONNX spec. diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/layer_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/layer_norm_op_builder.cc index d1a0e88686f39..b0394be15aba2 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/layer_norm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/layer_norm_op_builder.cc @@ -3,7 +3,6 @@ #include #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/framework/tensorprotoutils.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" @@ -114,7 +113,7 @@ Status LayerNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_mode std::vector&& input_names, const logging::Logger& logger, bool do_op_validation) const { - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); std::vector param_tensor_names; const float epsilon = node_helper.Get("epsilon", 1e-05f); // Default is 1e-05 according to ONNX spec. diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc index 2f66069b6609e..dbb29557cccc4 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc @@ -2,7 +2,7 @@ // Licensed under the MIT License. #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/common/safeint.h" @@ -75,7 +75,7 @@ Status LRNOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, ORT_RETURN_IF(output_shape != input_shape, "QNN EP: LRN operator's output must have the same shape as the input."); - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); // 'size' attribute must be odd and > 0. const int64_t onnx_size = GetOnnxAttr(node_helper, onnx_size_attr); @@ -98,7 +98,7 @@ Status LRNOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrap const logging::Logger& logger, bool do_op_validation) const { std::vector param_tensor_names; - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); const int64_t onnx_size = GetOnnxAttr(node_helper, onnx_size_attr); diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc index 5fc6d42a8a179..3035da2723907 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc @@ -2,11 +2,9 @@ // Licensed under the MIT License. #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/cpu/tensor/slice_helper.h" -#include "core/providers/qnn/builder/op_builder_factory.h" #include "core/common/safeint.h" #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" @@ -204,7 +202,7 @@ Status PadOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrap std::vector input_shape; ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[0].node_arg, input_shape), "Cannot get shape of input 0."); - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); std::string mode = node_helper.Get("mode", "constant"); Qnn_Scalar_t mode_qnn_scalar = QNN_SCALAR_INIT; mode_qnn_scalar.dataType = QNN_DATATYPE_UINT_32; diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc index ef1990ad8e69a..0ed11bed30929 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc @@ -2,7 +2,6 @@ // Licensed under the MIT License. #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/framework/tensorprotoutils.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" @@ -39,7 +38,7 @@ class PoolOpBuilder : public BaseOpBuilder { QnnQuantParamsWrapper& quant_param) const override ORT_MUST_USE_RESULT; private: - Status SetCommonPoolParams(const NodeAttrHelper& node_helper, std::vector& filter_size, + Status SetCommonPoolParams(const utils::NodeAttrHelper& node_helper, std::vector& filter_size, std::vector& pad_amount, std::vector& stride, int32_t& ceil_mode, std::vector&& input_shape, @@ -79,7 +78,7 @@ Status PoolOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, return Status::OK(); } - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); auto dilation_values = node_helper.Get("dilations", std::vector{1, 1}); if (dilation_values != std::vector{1, 1}) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN does not support Dilation attribute"); @@ -94,7 +93,7 @@ Status PoolOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, return Status::OK(); } -Status PoolOpBuilder::SetCommonPoolParams(const NodeAttrHelper& node_helper, +Status PoolOpBuilder::SetCommonPoolParams(const utils::NodeAttrHelper& node_helper, std::vector& filter_size, std::vector& pad_amount, std::vector& strides, int32_t& ceil_mode, @@ -155,7 +154,7 @@ Status PoolOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra std::vector&& input_names, const logging::Logger& logger, bool do_op_validation) const { - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); // Get the NCHW from input data, use HW for the pool filter size and pool stride const auto& inputs = node_unit.Inputs(); std::vector input_shape; diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc index 77bc58bd6f833..ce6654b3906d7 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc @@ -10,7 +10,6 @@ #include "onnx/defs/data_type_utils.h" #include "core/providers/common.h" #include "core/framework/endian_utils.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" @@ -102,7 +101,7 @@ Status ReduceOpBuilder::GetAxesSet(QnnModelWrapper& qnn_model_wrapper, const Nod const int opset_axes_as_input = ReduceOpBuilder::opset_with_axes_as_input[reduce_op_type]; const int opset = node_unit.SinceVersion(); - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); // Extract the axes values from either the attribute or initializer input (depending on opset). if (opset < opset_axes_as_input) { // Axes is in ONNX node attribute. @@ -212,7 +211,7 @@ Status ReduceOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, const Status ReduceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, std::vector&& input_names, const logging::Logger& logger, bool do_op_validation) const { - NodeAttrHelper node_attr_helper(node_unit); + utils::NodeAttrHelper node_attr_helper(node_unit); std::vector param_tensor_names; // diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc index b6f414da950d8..c374a3c64b350 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc @@ -2,7 +2,6 @@ // Licensed under the MIT License. #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/framework/tensorprotoutils.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" @@ -39,7 +38,7 @@ Status ReshapeOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, std::vector& input_names, bool do_op_validation) const { if (do_op_validation) { - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); auto allowzero = node_helper.Get("allowzero", static_cast(0)); if (0 != allowzero) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN Reshape doesn't support dynamic shape!"); diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc index c62fca88b6ec2..6b1088e488c31 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc @@ -6,12 +6,10 @@ #include #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/framework/tensorprotoutils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/cpu/tensor/slice_helper.h" -#include "core/providers/qnn/builder/op_builder_factory.h" #include "core/common/safeint.h" #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" @@ -124,7 +122,7 @@ Status ResizeOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, } const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType()); - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); // QNN doesn't support anti-aliasing (added in opset 18) if (node_unit.SinceVersion() >= 18) { @@ -260,7 +258,7 @@ Status ResizeOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w const logging::Logger& logger, bool do_op_validation) const { std::vector param_tensor_names; - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); const auto& input_0 = node_unit.Inputs()[0]; std::vector input_shape; diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc index a6c4203ad92e4..f23b6b240389d 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc @@ -2,7 +2,6 @@ // Licensed under the MIT License. #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/framework/tensorprotoutils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" @@ -143,7 +142,7 @@ Status SimpleOpBuilder::ExplicitOpCheck(QnnModelWrapper& qnn_model_wrapper, const std::string& op_type = node_unit.OpType(); if (op_type == "GridSample") { - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); std::string mode = node_helper.Get("mode", "linear"); ORT_RETURN_IF_NOT(utils::ArrayHasString(gridsample_supported_modes, mode), "GridSample does not support mode ", mode.c_str()); @@ -193,7 +192,7 @@ Status ProcessNodeAttribute(QnnModelWrapper& qnn_model_wrapper, const std::string& qnn_param_key, std::vector& param_tensor_names, const float default_value = 1.0f) { - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); float attr_value = node_helper.Get(onnx_attr_key, default_value); Qnn_Scalar_t attr_qnn_scalar = QNN_SCALAR_INIT; attr_qnn_scalar.dataType = QNN_DATATYPE_FLOAT_32; @@ -209,7 +208,7 @@ Status ProcessNodeAttribute(QnnModelWrapper& qnn_model_wrapper, Status ProcessBlockSizeAttribute(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, std::vector& param_tensor_names) { - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); uint32_t block_size = node_helper.Get("blocksize", static_cast(0)); std::vector block_size_shape{2}; std::vector block_size_data(2, block_size); @@ -224,7 +223,7 @@ Status ProcessBlockSizeAttribute(QnnModelWrapper& qnn_model_wrapper, Status ProcessModeAttribute(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, std::vector& param_tensor_names) { - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); std::string mode = node_helper.Get("mode", "DCR"); Qnn_Scalar_t mode_qnn_scalar = QNN_SCALAR_INIT; mode_qnn_scalar.dataType = QNN_DATATYPE_UINT_32; @@ -247,7 +246,7 @@ Status ProcessModeAttribute(QnnModelWrapper& qnn_model_wrapper, Status ProcessAlphaAttributeAsInput(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, const std::string input_name) { - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); QnnQuantParamsWrapper quantize_param; Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32; union { @@ -293,7 +292,7 @@ Status ProcessAlphaAttributeAsInput(QnnModelWrapper& qnn_model_wrapper, Status ProcessGridSampleAttributes(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, std::vector& param_tensor_names) { - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); int64_t align_corners = node_helper.Get("align_corners", static_cast(0)); Qnn_Scalar_t align_corners_qnn_scalar = QNN_SCALAR_INIT; align_corners_qnn_scalar.dataType = QNN_DATATYPE_BOOL_8; @@ -373,7 +372,7 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w param_tensor_names.push_back(axis_param.GetParamTensorName()); qnn_model_wrapper.AddParamWrapper(std::move(axis_param)); - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); int64_t norm_p_order = node_helper.Get("p", static_cast(2)); ORT_RETURN_IF(norm_p_order != 2, "QNN EP only supports LpNormalization with 'p' attribute equal to 2."); } diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc index b033c8723ea86..13b106d3c1bde 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc @@ -2,7 +2,6 @@ // Licensed under the MIT License. #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" @@ -63,7 +62,7 @@ void SliceOpBuilder::GetDataFromAttribute(const NodeUnit& node_unit, TensorShapeVector& raw_starts, TensorShapeVector& raw_ends, TensorShapeVector& raw_axes) const { - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); auto starts = node_helper.Get("starts", std::vector{0}); raw_starts.assign(starts.begin(), starts.end()); auto ends = node_helper.Get("ends", std::vector{0}); diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc index b62534bacf426..bc5339d90660e 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc @@ -2,7 +2,6 @@ // Licensed under the MIT License. #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/framework/tensorprotoutils.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc index ba5ad2cf03cef..f435b1d6d802f 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc @@ -2,12 +2,10 @@ // Licensed under the MIT License. #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/cpu/tensor/slice_helper.h" -#include "core/providers/qnn/builder/op_builder_factory.h" #include "core/common/safeint.h" #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" @@ -98,7 +96,7 @@ Status SplitOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wr return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN doesn't support dynamic split"); } } else { - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); if (node_helper.HasAttr("split")) { auto split_lengths = node_helper.Get("split", std::vector{0}); ConvertSplitLengthsToSplitIndices(split_lengths, split_index); diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc index 851ca84dce075..e66c4cd350235 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc @@ -2,12 +2,10 @@ // Licensed under the MIT License. #include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/cpu/tensor/slice_helper.h" -#include "core/providers/qnn/builder/op_builder_factory.h" #include "core/common/safeint.h" #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc index d22c0811682d0..9cb8f91a9db0b 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc @@ -3,6 +3,7 @@ #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/framework/utils.h" #include "core/providers/qnn/builder/op_builder_factory.h" +#include "core/providers/qnn/builder/qnn_utils.h" namespace onnxruntime { namespace qnn { const int TOPK_MIN_INPUT = 2; @@ -48,7 +49,7 @@ Status TopKOpBuilder::ExplictOpCheck(QnnModelWrapper& qnn_model_wrapper, const N if (!qnn_model_wrapper.IsInitializerInput(input_1)) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "The number of top elements to retrieve must be specified as constant input."); } - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); auto largest = node_helper.Get("largest", 1); auto sorted = node_helper.Get("sorted", 1); if (0 == sorted) { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/transpose_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/transpose_op_builder.cc index a42d7312f0203..1290a012d5902 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/transpose_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/transpose_op_builder.cc @@ -45,7 +45,7 @@ Status TransposeOpBuilder::ProcessPermAttribute(QnnModelWrapper& qnn_model_wrapp transpose_perm[i] = rank - 1 - i; } - NodeAttrHelper node_helper(node_unit); + utils::NodeAttrHelper node_helper(node_unit); transpose_perm = node_helper.Get("perm", transpose_perm); auto perm_size = static_cast(transpose_perm.size()); std::vector perm_shape{perm_size}; diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model.cc b/onnxruntime/core/providers/qnn/builder/qnn_model.cc index 88fa6429fc01e..75a02e3834567 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model.cc @@ -8,7 +8,6 @@ #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_node_group.h" -#include "core/providers/shared/utils/utils.h" #include "core/framework/utils.h" #include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h" #include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h" diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc index 2c7f3c8b22ddd..20ec422774845 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc @@ -11,7 +11,6 @@ #include "qnn_model_wrapper.h" #include "core/common/safeint.h" #include "core/framework/tensorprotoutils.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/qnn_utils.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h index f3e52050e79e0..9e308aa33a560 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h @@ -13,7 +13,6 @@ #include "core/common/logging/logging.h" #include "core/framework/node_unit.h" #include "core/graph/graph_viewer.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/qnn_quant_params_wrapper.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.cc b/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.cc index 813bba8a5952b..76316250a88ad 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.cc @@ -8,7 +8,6 @@ #include #include "core/graph/graph_utils.h" #include "core/framework/node_unit.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_node_group/utils.h" @@ -110,8 +109,8 @@ static bool CanClipBeRemoved(const QnnModelWrapper& qnn_model_wrapper, float clip_min = std::numeric_limits::lowest(); float clip_max = std::numeric_limits::max(); - if (!onnxruntime::GetClipMinMax(qnn_model_wrapper.GetGraphViewer(), clip_node_unit.GetNode(), - clip_min, clip_max, logger)) { + if (!qnn::utils::GetClipMinMax(qnn_model_wrapper.GetGraphViewer(), clip_node_unit.GetNode(), + clip_min, clip_max, logger)) { return false; } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.cc b/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.cc index caf4725626338..17af5725a01ee 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.cc @@ -8,7 +8,6 @@ #include #include "core/graph/graph_utils.h" #include "core/framework/node_unit.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_node_group/utils.h" diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.cc b/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.cc index 76b1726646486..aceaf0399a6cb 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.cc @@ -8,7 +8,6 @@ #include #include "core/graph/graph_utils.h" #include "core/framework/node_unit.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" @@ -39,7 +38,7 @@ std::unique_ptr HardSigmoidMulFusion::TryFusion( return nullptr; } - NodeAttrHelper hs_attr_helper(hardsigmoid_node_unit); + utils::NodeAttrHelper hs_attr_helper(hardsigmoid_node_unit); float alpha = hs_attr_helper.Get("alpha", 0.2f); float beta = hs_attr_helper.Get("beta", 0.5f); constexpr float req_alpha = 1.0f / 6.0f; diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc index 8d2cb5bdb6da0..9457877ddfc93 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc @@ -9,8 +9,10 @@ #include "core/common/common.h" #include "core/framework/data_types.h" +#include "core/framework/tensorprotoutils.h" #include "qnn_utils.h" #include "core/providers/qnn/builder/qnn_def.h" +#include "core/graph/graph_viewer.h" namespace onnxruntime { namespace qnn { @@ -570,6 +572,247 @@ Status Quantize(const double double_value, return Status::OK(); } +static bool GetType(const NodeArg& node_arg, int32_t& type, const logging::Logger& logger) { + type = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED; + const auto* type_proto = node_arg.TypeAsProto(); + if (!type_proto || !type_proto->has_tensor_type() || !type_proto->tensor_type().has_elem_type()) { + LOGS(logger, WARNING) << "NodeArg [" << node_arg.Name() << "] has no input type"; + return false; + } + + type = type_proto->tensor_type().elem_type(); + return true; +} + +NodeAttrHelper::NodeAttrHelper(const onnxruntime::Node& node) + : node_attributes_(node.GetAttributes()) {} + +NodeAttrHelper::NodeAttrHelper(const NodeUnit& node_unit) + : node_attributes_(node_unit.GetNode().GetAttributes()) {} + +float NodeAttrHelper::Get(const std::string& key, float def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + return entry->second.f(); + } + + return def_val; +} + +int32_t NodeAttrHelper::Get(const std::string& key, int32_t def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + return narrow(entry->second.i()); + } + + return def_val; +} + +uint32_t NodeAttrHelper::Get(const std::string& key, uint32_t def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + return narrow(entry->second.i()); + } + + return def_val; +} + +int64_t NodeAttrHelper::Get(const std::string& key, int64_t def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + return entry->second.i(); + } + + return def_val; +} + +const std::string& NodeAttrHelper::Get(const std::string& key, const std::string& def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + return entry->second.s(); + } + + return def_val; +} + +std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + const auto& attr = entry->second; + std::vector v; + v.reserve(static_cast(attr.ints_size())); + std::transform(attr.ints().cbegin(), attr.ints().cend(), std::back_inserter(v), + [](int64_t val) -> int32_t { return narrow(val); }); + return v; + } + + return def_val; +} + +std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + const auto& attr = entry->second; + std::vector v; + v.reserve(static_cast(attr.ints_size())); + std::transform(attr.ints().cbegin(), attr.ints().cend(), std::back_inserter(v), + [](int64_t val) -> uint32_t { return narrow(val); }); + return v; + } + + return def_val; +} + +std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + const auto& values = entry->second.ints(); + return std::vector{values.cbegin(), values.cend()}; + } + + return def_val; +} + +std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + const auto& values = entry->second.strings(); + return std::vector{values.cbegin(), values.cend()}; + } + + return def_val; +} + +std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + const auto& values = entry->second.floats(); + return std::vector{values.cbegin(), values.cend()}; + } + + return def_val; +} + +std::optional NodeAttrHelper::GetFloat(const std::string& key) const { + std::optional result; + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + result = entry->second.f(); + } + + return result; +} + +std::optional NodeAttrHelper::GetInt64(const std::string& key) const { + std::optional result; + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + result = entry->second.i(); + } + + return result; +} + +std::optional> NodeAttrHelper::GetFloats(const std::string& key) const { + std::optional> result; + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + const auto& values = entry->second.floats(); + result = std::vector(values.begin(), values.end()); + } + + return result; +} + +std::optional> NodeAttrHelper::GetInt64s(const std::string& key) const { + std::optional> result; + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + const auto& values = entry->second.ints(); + result = std::vector(values.begin(), values.end()); + } + + return result; +} + +std::optional NodeAttrHelper::GetString(const std::string& key) const { + std::optional result; + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + result = entry->second.s(); + } + + return result; +} + +bool NodeAttrHelper::HasAttr(const std::string& key) const { + return Contains(node_attributes_, key); +} +static bool GetClipMinMaxImpl(const GraphViewer& graph_viewer, const Node& node, float& min, float& max, + const logging::Logger& logger) { + const auto& node_name = node.Name(); + int32_t input_type; + if (!GetType(*node.InputDefs()[0], input_type, logger)) { + return false; + } + + min = std::numeric_limits::lowest(); + max = std::numeric_limits::max(); + + if (node.SinceVersion() < 11) { // Clip opset 1, 6 is using attributes for min/max + NodeAttrHelper helper(node); + // attributes will be always float + min = helper.Get("min", std::numeric_limits::lowest()); + max = helper.Get("max", std::numeric_limits::max()); + } else { + auto get_value = + [&](const ONNX_NAMESPACE::TensorProto* initializer, std::string_view type, float& value) -> bool { + if (!initializer) { + LOGS(logger, VERBOSE) << type << " input of Clip must be a constant initializer"; + return false; + } + + switch (input_type) { + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { + auto status = onnxruntime::utils::UnpackTensor(*initializer, graph_viewer.ModelPath(), &value, 1); + if (!status.IsOK()) { + LOGS(logger, ERROR) << "GetClipMinMax() failed to unpack float initializer: " << status.ErrorMessage(); + return false; + } + break; + } + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: { + MLFloat16 f16_val{}; + auto status = onnxruntime::utils::UnpackTensor(*initializer, graph_viewer.ModelPath(), &f16_val, 1); + if (!status.IsOK()) { + LOGS(logger, ERROR) << "GetClipMinMax() failed to unpack float16 initializer: " << status.ErrorMessage(); + return false; + } + value = f16_val.ToFloat(); + break; + } + default: + LOGS(logger, VERBOSE) << "GetClipMinMax() only supports float and float16 as min and max inputs for now." + << " The node [" << node_name << "] has input type: " << input_type; + return false; + } + + return true; + }; + + // min and max are both optional. could have neither, one or both. + if (node.InputDefs().size() > 1 && node.InputDefs()[1]->Exists()) { + // we have input min + const auto& min_name = node.InputDefs()[1]->Name(); + const auto* min_value = graph_viewer.GetConstantInitializer(min_name); + if (!get_value(min_value, "Min", min)) { + return false; + } + } + + if (node.InputDefs().size() > 2 && node.InputDefs()[2]->Exists()) { + // we have input max + const auto& max_name = node.InputDefs()[2]->Name(); + const auto* max_value = graph_viewer.GetConstantInitializer(max_name); + if (!get_value(max_value, "Max", max)) { + return false; + } + } + } + + return true; +} + +bool GetClipMinMax(const GraphViewer& graph_viewer, const Node& node, float& min, float& max, + const logging::Logger& logger) { + return GetClipMinMaxImpl(graph_viewer, node, min, max, logger); +} + } // namespace utils } // namespace qnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.h b/onnxruntime/core/providers/qnn/builder/qnn_utils.h index aa4a27460563f..0d69242958666 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.h @@ -104,6 +104,58 @@ Status Quantize(const double double_value, const Qnn_DataType_t qnn_data_type, int& quant_value); +/** + * Wrapping onnxruntime::Node for retrieving attribute values + */ +class NodeAttrHelper { + public: + explicit NodeAttrHelper(const Node& node); + + // Get the attributes from the target node of the node_unit + explicit NodeAttrHelper(const NodeUnit& node_unit); + + /* + * Get with default + */ + float Get(const std::string& key, float def_val) const; + std::vector Get(const std::string& key, const std::vector& def_val) const; + + int64_t Get(const std::string& key, int64_t def_val) const; + std::vector Get(const std::string& key, const std::vector& def_val) const; + + const std::string& Get(const std::string& key, const std::string& def_val) const; + std::vector Get(const std::string& key, const std::vector& def_val) const; + + // Convert the i() or ints() of the attribute from int64_t to int32_t + int32_t Get(const std::string& key, int32_t def_val) const; + std::vector Get(const std::string& key, const std::vector& def_val) const; + + // Convert the i() or ints() of the attribute from int64_t to uint32_t + uint32_t Get(const std::string& key, uint32_t def_val) const; + std::vector Get(const std::string& key, const std::vector& def_val) const; + + /* + * Get without default. + */ + std::optional GetFloat(const std::string& key) const; + std::optional> GetFloats(const std::string& key) const; + + std::optional GetInt64(const std::string& key) const; + std::optional> GetInt64s(const std::string& key) const; + + std::optional GetString(const std::string& key) const; + + bool HasAttr(const std::string& key) const; + + private: + const NodeAttributes& node_attributes_; +}; + +// Get the min/max of a Clip operator. Reads values from attributes for opset < 11 and inputs after that. +// For opset 11+, if min/max are not constant initializers, will return false. +// For now we only support getting float min/max. +bool GetClipMinMax(const GraphViewer& graph_viewer, const Node& node, + float& min, float& max, const logging::Logger& logger); } // namespace utils } // namespace qnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 6735528bebbf9..960fafd1fa2c4 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -16,7 +16,7 @@ #include "core/platform/env.h" #include "core/providers/common.h" #include "core/providers/partitioning_utils.h" -#include "core/providers/partitioning_utils.h" +#include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_node_group.h" @@ -565,7 +565,7 @@ QNNExecutionProvider::GetSupportedNodes(const GraphViewer& graph_viewer, static bool EpSharedContextsHasAllGraphs(const onnxruntime::GraphViewer& graph_viewer, const logging::Logger& logger) { for (const auto& node : graph_viewer.Nodes()) { - NodeAttrHelper node_helper(node); + qnn::utils::NodeAttrHelper node_helper(node); std::string cache_source = node_helper.Get(qnn::SOURCE, ""); std::transform(cache_source.begin(), @@ -591,7 +591,7 @@ static bool EpSharedContextsHasAllGraphs(const std::vectorName(); @@ -615,7 +615,7 @@ static void PartitionCtxModel(const onnxruntime::GraphViewer& graph_viewer, std::vector> supported_groups{}; for (const auto& node : graph_viewer.Nodes()) { - NodeAttrHelper node_helper(node); + qnn::utils::NodeAttrHelper node_helper(node); std::string cache_source = node_helper.Get(qnn::SOURCE, ""); std::transform(cache_source.begin(), diff --git a/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc b/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc index a3f0ed55b83f2..38fde332ca992 100644 --- a/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc @@ -7,7 +7,6 @@ #include "core/session/onnxruntime_cxx_api.h" #include "core/session/onnxruntime_session_options_config_keys.h" #include "core/session/inference_session.h" -#include "core/providers/shared/utils/utils.h" #include "test/providers/qnn/qnn_test_utils.h" @@ -25,6 +24,24 @@ namespace test { #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) +static int64_t GetNodeAttr(const Node& node, const std::string& attr_name, int64_t default_val) { + const auto& attributes = node.GetAttributes(); + if (auto entry = attributes.find(attr_name); entry != attributes.end()) { + return entry->second.i(); + } + + return default_val; +} + +static const std::string& GetNodeAttr(const Node& node, const std::string& attr_name, const std::string& default_val) { + const auto& attributes = node.GetAttributes(); + if (auto entry = attributes.find(attr_name); entry != attributes.end()) { + return entry->second.s(); + } + + return default_val; +} + // Create a model with FusedMatMul + Add (quantized) // input1 -> Add -> Q -> DQ \ // FusedMatMul -> Q -> DQ -> output @@ -873,10 +890,9 @@ static void GetLastContextBinaryFileName(const std::string last_onnx_ctx_file, auto& ctx_graph = ctx_model->MainGraph(); for (auto& node : ctx_graph.Nodes()) { if (node.OpType() == "EPContext") { - NodeAttrHelper node_helper(node); - int64_t is_main_context = node_helper.Get("main_context", static_cast(0)); + int64_t is_main_context = GetNodeAttr(node, "main_context", static_cast(0)); if (1 == is_main_context) { - last_ctx_bin_file = node_helper.Get("ep_cache_context", ""); + last_ctx_bin_file = GetNodeAttr(node, "ep_cache_context", ""); return; } } @@ -899,10 +915,9 @@ static void UpdateEpContextModel(const std::vector& ep_ctx_files, for (auto& node : ctx_graph.Nodes()) { if (node.OpType() == "EPContext") { - NodeAttrHelper node_helper(node); - int64_t is_main_context = node_helper.Get("main_context", static_cast(0)); + int64_t is_main_context = GetNodeAttr(node, "main_context", static_cast(0)); if (1 == is_main_context) { - std::string old_qnn_ctx_binary_file_name = node_helper.Get("ep_cache_context", ""); + std::string old_qnn_ctx_binary_file_name = GetNodeAttr(node, "ep_cache_context", ""); auto file_path = path.replace_filename(old_qnn_ctx_binary_file_name); std::remove(file_path.string().c_str()); node.ClearAttribute("ep_cache_context"); diff --git a/onnxruntime/test/qnn_ctx_gen/main.cc b/onnxruntime/test/qnn_ctx_gen/main.cc index d568d5e78688a..b7b01cd6fbc20 100644 --- a/onnxruntime/test/qnn_ctx_gen/main.cc +++ b/onnxruntime/test/qnn_ctx_gen/main.cc @@ -16,7 +16,6 @@ #include "core/common/logging/sinks/clog_sink.h" #include "core/graph/model.h" -#include "core/providers/shared/utils/utils.h" #include "core/session/environment.h" #include "core/common/logging/logging.h" @@ -31,6 +30,24 @@ static void CheckStatus(const Status& status) { } } +static int64_t GetNodeAttr(const Node& node, const std::string& attr_name, int64_t default_val) { + const auto& attributes = node.GetAttributes(); + if (auto entry = attributes.find(attr_name); entry != attributes.end()) { + return entry->second.i(); + } + + return default_val; +} + +static const std::string& GetNodeAttr(const Node& node, const std::string& attr_name, const std::string& default_val) { + const auto& attributes = node.GetAttributes(); + if (auto entry = attributes.find(attr_name); entry != attributes.end()) { + return entry->second.s(); + } + + return default_val; +} + // from the last context cache Onnx model, find the EPContext node with main_context=1, // and get the QNN context binary file name, this context binary contains all graphs from all Onnx models static void GetLastContextBinaryFileName(const std::basic_string last_onnx_ctx_file, @@ -41,10 +58,9 @@ static void GetLastContextBinaryFileName(const std::basic_string last auto& ctx_graph = ctx_model->MainGraph(); for (auto& node : ctx_graph.Nodes()) { if (node.OpType() == "EPContext") { - NodeAttrHelper node_helper(node); - int64_t is_main_context = node_helper.Get("main_context", static_cast(0)); + int64_t is_main_context = GetNodeAttr(node, "main_context", static_cast(0)); if (1 == is_main_context) { - last_ctx_bin_file = node_helper.Get("ep_cache_context", ""); + last_ctx_bin_file = GetNodeAttr(node, "ep_cache_context", ""); return; } } @@ -67,10 +83,9 @@ static void UpdateEpContextModel(const std::vector> for (auto& node : ctx_graph.Nodes()) { if (node.OpType() == "EPContext") { - NodeAttrHelper node_helper(node); - int64_t is_main_context = node_helper.Get("main_context", static_cast(0)); + int64_t is_main_context = GetNodeAttr(node, "main_context", static_cast(0)); if (1 == is_main_context) { - std::string old_qnn_ctx_binary_file_name = node_helper.Get("ep_cache_context", ""); + std::string old_qnn_ctx_binary_file_name = GetNodeAttr(node, "ep_cache_context", ""); auto file_path = path.replace_filename(old_qnn_ctx_binary_file_name); std::remove(file_path.string().c_str()); node.ClearAttribute("ep_cache_context"); From 7e46a7de63686141cb580bf4918d15624ec85880 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 9 Dec 2024 19:17:15 -0800 Subject: [PATCH 02/64] Update QNN EP's initializer transpose logic to use only functions exposed by the provider bridge. --- .../qnn/builder/opbuilder/base_op_builder.cc | 59 ++++++++++++------- .../qnn/builder/qnn_model_wrapper.cc | 24 ++------ .../core/providers/qnn/builder/qnn_utils.h | 29 +++++++++ 3 files changed, 71 insertions(+), 41 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc index af070fc01a279..3c0ca0dab2da3 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc @@ -274,29 +274,46 @@ Status BaseOpBuilder::TransposeInitializer(const QnnModelWrapper& qnn_model_wrap const onnx::TensorProto& initializer, const std::vector& perm, std::vector& transposed_data) const { - const DataTypeImpl* tensor_dtype = DataTypeImpl::TensorTypeFromONNXEnum(initializer.data_type())->GetElementType(); - const auto tensor_shape_dims = onnxruntime::utils::GetTensorShapeFromTensorProto(initializer); - TensorShape tensor_shape{tensor_shape_dims}; - AllocatorPtr cpu_allocator = std::make_shared(); - Tensor in_tensor = Tensor(tensor_dtype, tensor_shape, cpu_allocator); - + int32_t onnx_type = initializer.data_type(); + const DataTypeImpl* tensor_dtype = DataTypeImpl::TensorTypeFromONNXEnum(onnx_type)->GetElementType(); + const TensorShape in_tensor_shape = onnxruntime::utils::GetTensorShapeFromTensorProto(initializer); + + // Unpack initializer data into an input Tensor. + size_t tensor_data_size = Tensor::CalculateTensorStorageSize(tensor_dtype, in_tensor_shape); + std::vector input_tensor_data(tensor_data_size); + ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(initializer, + qnn_model_wrapper.GetGraphViewer().ModelPath(), + input_tensor_data)); + Tensor in_tensor(tensor_dtype, in_tensor_shape, input_tensor_data.data(), OrtMemoryInfo{}); + + // Determine the new transposed shape. auto rank = perm.size(); - std::vector new_tensor_shape_dims; - std::vector permutations; - new_tensor_shape_dims.reserve(rank); - permutations.reserve(rank); - for (int64_t p : perm) { - permutations.push_back(p); - new_tensor_shape_dims.push_back(tensor_shape_dims[p]); + std::vector out_tensor_shape_dims; + out_tensor_shape_dims.reserve(rank); + for (size_t p : perm) { + out_tensor_shape_dims.push_back(in_tensor_shape[p]); + } + const TensorShape out_tensor_shape = TensorShape::FromExistingBuffer(out_tensor_shape_dims); + + // Create an output tensor that does not own the pre-allocated `transposed_data` buffer. + // DoTranspose() will write the new transposed elements directly into the `transposed_data` buffer. + // We do this to eliminate unnecessary weight copies. + transposed_data.resize(tensor_data_size); + Tensor out_tensor(tensor_dtype, out_tensor_shape, transposed_data.data(), OrtMemoryInfo{}); + ORT_RETURN_IF_ERROR(Transpose::DoTranspose(perm, in_tensor, out_tensor)); + + // If this is an int4, we need to unpack it because QNN treats int4 as a full int8. + // TODO: Improve memory usage! Transpose::DoTranspose() internally copies Tensor to Tensor, + // does the transpose, and then copies the result to a new Tensor. Afterwards, QNN EP will unpack + // the new Tensor back to 8-bits. This is wasteful. A better approach would be for QNN EP to do the following: + // - Explicitly unpack Tensor to Tensor + // - Call Transpose::DoTranspose() with the Tensor. This generates a new transposed Tensor. + // - Clear the top 4-bits to zero for every int8 element in the transposed Tensor. + if (onnx_type == ONNX_NAMESPACE::TensorProto_DataType_INT4) { + ORT_RETURN_IF_ERROR(qnn::utils::UnpackInt4ToInt8(out_tensor_shape.Size(), transposed_data)); + } else if (onnx_type == ONNX_NAMESPACE::TensorProto_DataType_UINT4) { + ORT_RETURN_IF_ERROR(qnn::utils::UnpackInt4ToInt8(out_tensor_shape.Size(), transposed_data)); } - - TensorShape new_tensor_shape(new_tensor_shape_dims); - Tensor out_tensor = Tensor(tensor_dtype, new_tensor_shape, cpu_allocator); - ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor( - Env::Default(), qnn_model_wrapper.GetGraphViewer().ModelPath(), initializer, in_tensor)); - ORT_RETURN_IF_ERROR(Transpose::DoTranspose(permutations, in_tensor, out_tensor)); - onnx::TensorProto new_tensor_proto = onnxruntime::utils::TensorToTensorProto(out_tensor, "test"); - ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(new_tensor_proto, transposed_data)); return Status::OK(); } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc index 20ec422774845..a6bd17e75b6c0 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc @@ -624,28 +624,12 @@ Status QnnModelWrapper::UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& // If this is an int4, we need to unpack it because QNN treats int4 as a full int8. if (onnx_data_type == ONNX_NAMESPACE::TensorProto_DataType_INT4) { TensorShape shape = onnxruntime::utils::GetTensorShapeFromTensorProto(initializer); - const size_t num_elems = shape.Size(); - std::vector packed_int4_bytes = std::move(unpacked_tensor); - unpacked_tensor = std::vector(num_elems); - - auto dst = gsl::make_span(reinterpret_cast(unpacked_tensor.data()), unpacked_tensor.size()); - auto src = gsl::make_span(reinterpret_cast(packed_int4_bytes.data()), packed_int4_bytes.size()); - ORT_RETURN_IF_NOT(Int4x2::Unpack(dst, src), "Failed to unpack Tensor for QNN"); - - // NOTE: Masking off top 4 bits to workaround a QNN INT4 accuracy bug. - // Docs explicitly state that masking off top 4 bits should not be required. - for (size_t i = 0; i < dst.size(); i++) { - dst[i] &= 0x0F; // -3 (0b1111_1101) becomes 13 (0b0000_1101) - } + const size_t num_int4_elems = shape.Size(); + ORT_RETURN_IF_ERROR(qnn::utils::UnpackInt4ToInt8(num_int4_elems, unpacked_tensor)); } else if (onnx_data_type == ONNX_NAMESPACE::TensorProto_DataType_UINT4) { TensorShape shape = onnxruntime::utils::GetTensorShapeFromTensorProto(initializer); - const size_t num_elems = shape.Size(); - std::vector packed_int4_bytes = std::move(unpacked_tensor); - unpacked_tensor = std::vector(num_elems); - - auto dst = gsl::make_span(reinterpret_cast(unpacked_tensor.data()), unpacked_tensor.size()); - auto src = gsl::make_span(reinterpret_cast(packed_int4_bytes.data()), packed_int4_bytes.size()); - ORT_RETURN_IF_NOT(UInt4x2::Unpack(dst, src), "Failed to unpack Tensor for QNN"); + const size_t num_uint4_elems = shape.Size(); + ORT_RETURN_IF_ERROR(qnn::utils::UnpackInt4ToInt8(num_uint4_elems, unpacked_tensor)); } return Status::OK(); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.h b/onnxruntime/core/providers/qnn/builder/qnn_utils.h index 0d69242958666..11ecf57ada357 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.h @@ -104,6 +104,35 @@ Status Quantize(const double double_value, const Qnn_DataType_t qnn_data_type, int& quant_value); +// Re-writes a buffer of packed 4-bit elements to a buffer of unpacked 8-bit elements. +// QNN requires that 4-bit weights are unpacked to 8-bit. +template +Status UnpackInt4ToInt8(size_t num_int4_elems, std::vector& data_bytes) { + if constexpr (Signed) { // INT4 + std::vector packed_int4_bytes = std::move(data_bytes); + data_bytes = std::vector(num_int4_elems); + + auto dst = gsl::make_span(reinterpret_cast(data_bytes.data()), data_bytes.size()); + auto src = gsl::make_span(reinterpret_cast(packed_int4_bytes.data()), packed_int4_bytes.size()); + ORT_RETURN_IF_NOT(Int4x2::Unpack(dst, src), "Failed to unpack Tensor for QNN"); + + // NOTE: Masking off top 4 bits to workaround a QNN INT4 accuracy bug. + // Docs explicitly state that masking off top 4 bits should not be required, but we have to do it. + for (size_t i = 0; i < dst.size(); i++) { + dst[i] &= 0x0F; // -3 (0b1111_1101) becomes 13 (0b0000_1101) + } + } else { // UINT4 + std::vector packed_uint4_bytes = std::move(data_bytes); + data_bytes = std::vector(num_int4_elems); + + auto dst = gsl::make_span(reinterpret_cast(data_bytes.data()), data_bytes.size()); + auto src = gsl::make_span(reinterpret_cast(packed_uint4_bytes.data()), packed_uint4_bytes.size()); + ORT_RETURN_IF_NOT(UInt4x2::Unpack(dst, src), "Failed to unpack Tensor for QNN"); + } + + return Status::OK(); +} + /** * Wrapping onnxruntime::Node for retrieving attribute values */ From a155b33b8e2ff40854e8146857aecfd7a18501d5 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 9 Dec 2024 19:19:18 -0800 Subject: [PATCH 03/64] Update comment --- .../core/providers/qnn/builder/opbuilder/base_op_builder.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc index 3c0ca0dab2da3..f8a6c1c602fe9 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc @@ -304,11 +304,11 @@ Status BaseOpBuilder::TransposeInitializer(const QnnModelWrapper& qnn_model_wrap // If this is an int4, we need to unpack it because QNN treats int4 as a full int8. // TODO: Improve memory usage! Transpose::DoTranspose() internally copies Tensor to Tensor, - // does the transpose, and then copies the result to a new Tensor. Afterwards, QNN EP will unpack + // does the transpose in 8-bits, and then copies the result back to a new Tensor. Afterwards, QNN EP unpacks // the new Tensor back to 8-bits. This is wasteful. A better approach would be for QNN EP to do the following: - // - Explicitly unpack Tensor to Tensor + // - Explicitly unpack Tensor to Tensor in QNN EP. // - Call Transpose::DoTranspose() with the Tensor. This generates a new transposed Tensor. - // - Clear the top 4-bits to zero for every int8 element in the transposed Tensor. + // - Clear the top 4-bits to zero for every int8 element in the transposed Tensor. [ONLY if signed int4] if (onnx_type == ONNX_NAMESPACE::TensorProto_DataType_INT4) { ORT_RETURN_IF_ERROR(qnn::utils::UnpackInt4ToInt8(out_tensor_shape.Size(), transposed_data)); } else if (onnx_type == ONNX_NAMESPACE::TensorProto_DataType_UINT4) { From e9c5f1420078cfbc5d48a080c0cd7556031e718e Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Tue, 10 Dec 2024 10:53:10 -0800 Subject: [PATCH 04/64] Added TransposeBase::DoTranspose() to provider bridge. May elect to revert this in favor of doing the transpose manually in QNN EP --- onnxruntime/core/providers/cpu/cpu_provider_shared.cc | 8 ++++++++ onnxruntime/core/providers/cpu/cpu_provider_shared.h | 5 +++++ .../providers/qnn/builder/opbuilder/base_op_builder.cc | 4 ++-- onnxruntime/core/providers/shared_library/provider_api.h | 1 + .../providers/shared_library/provider_bridge_provider.cc | 7 +++++++ 5 files changed, 23 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/providers/cpu/cpu_provider_shared.cc b/onnxruntime/core/providers/cpu/cpu_provider_shared.cc index ce9780031a250..3778c89a00e19 100644 --- a/onnxruntime/core/providers/cpu/cpu_provider_shared.cc +++ b/onnxruntime/core/providers/cpu/cpu_provider_shared.cc @@ -23,6 +23,7 @@ #include "core/providers/cpu/tensor/slice.h" #include "core/providers/cpu/tensor/onehot.h" #include "core/providers/cpu/tensor/tile.h" +#include "core/providers/cpu/tensor/transpose.h" #include "core/providers/cpu/tensor/gather_elements.h" #include "core/providers/cpu/tensor/unsqueeze.h" #include "core/providers/cpu/tensor/upsamplebase.h" @@ -81,6 +82,13 @@ struct ProviderHostCPUImpl : ProviderHostCPU { Status NonMaxSuppressionBase__PrepareCompute(OpKernelContext* ctx, PrepareContext& pc) override { return NonMaxSuppressionBase::PrepareCompute(ctx, pc); } Status NonMaxSuppressionBase__GetThresholdsFromInputs(const PrepareContext& pc, int64_t& max_output_boxes_per_class, float& iou_threshold, float& score_threshold) override { return NonMaxSuppressionBase::GetThresholdsFromInputs(pc, max_output_boxes_per_class, iou_threshold, score_threshold); } + // TransposeBase (direct) + Status TransposeBase__DoTranspose(const gsl::span& permutations, const Tensor& input, Tensor& output, + const TensorShape* input_shape_override, + concurrency::ThreadPool* tp) override { + return TransposeBase::DoTranspose(permutations, input, output, input_shape_override, tp); + } + #if defined(USE_CUDA) || defined(USE_ROCM) // From cpu/tensor/size.h (direct) Status Size__Compute(const Size* p, OpKernelContext* context) override { return p->Size::Compute(context); } diff --git a/onnxruntime/core/providers/cpu/cpu_provider_shared.h b/onnxruntime/core/providers/cpu/cpu_provider_shared.h index eb1569c3e499e..ce7cd6155c38c 100644 --- a/onnxruntime/core/providers/cpu/cpu_provider_shared.h +++ b/onnxruntime/core/providers/cpu/cpu_provider_shared.h @@ -38,6 +38,11 @@ struct ProviderHostCPU { virtual Status NonMaxSuppressionBase__PrepareCompute(OpKernelContext* ctx, PrepareContext& pc) = 0; virtual Status NonMaxSuppressionBase__GetThresholdsFromInputs(const PrepareContext& pc, int64_t& max_output_boxes_per_class, float& iou_threshold, float& score_threshold) = 0; + // TransposeBase + virtual Status TransposeBase__DoTranspose(const gsl::span& permutations, const Tensor& input, Tensor& output, + const TensorShape* input_shape_override, + concurrency::ThreadPool* tp) = 0; + #if defined(USE_CUDA) || defined(USE_ROCM) // From cpu/tensor/size.h diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc index f8a6c1c602fe9..d9d83ba085859 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc @@ -300,10 +300,10 @@ Status BaseOpBuilder::TransposeInitializer(const QnnModelWrapper& qnn_model_wrap // We do this to eliminate unnecessary weight copies. transposed_data.resize(tensor_data_size); Tensor out_tensor(tensor_dtype, out_tensor_shape, transposed_data.data(), OrtMemoryInfo{}); - ORT_RETURN_IF_ERROR(Transpose::DoTranspose(perm, in_tensor, out_tensor)); + ORT_RETURN_IF_ERROR(TransposeBase::DoTranspose(perm, in_tensor, out_tensor)); // If this is an int4, we need to unpack it because QNN treats int4 as a full int8. - // TODO: Improve memory usage! Transpose::DoTranspose() internally copies Tensor to Tensor, + // TODO: Reduce copies for INT4! Transpose::DoTranspose() internally copies Tensor to Tensor, // does the transpose in 8-bits, and then copies the result back to a new Tensor. Afterwards, QNN EP unpacks // the new Tensor back to 8-bits. This is wasteful. A better approach would be for QNN EP to do the following: // - Explicitly unpack Tensor to Tensor in QNN EP. diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index b84825236a453..35014b39335f2 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -184,6 +184,7 @@ class GatherBase; class Size; class SliceBase; class SplitBase; +class TransposeBase; class TensorShape; struct Prepare; struct PrepareContext; diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc index d3b12f9728135..92e5eb1ed5eb0 100644 --- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc +++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc @@ -23,6 +23,7 @@ #include "core/providers/cpu/tensor/split.h" #include "core/providers/cpu/tensor/size.h" #include "core/providers/cpu/tensor/scatter_nd.h" +#include "core/providers/cpu/tensor/transpose.h" #include "core/providers/cpu/tensor/unsqueeze.h" #include "core/providers/cpu/tensor/upsamplebase.h" #include "core/providers/cpu/tensor/tile.h" @@ -513,6 +514,12 @@ Status NonMaxSuppressionBase::GetThresholdsFromInputs(const PrepareContext& pc, Status GatherBase::PrepareForCompute(OpKernelContext* context, GatherBase::Prepare& p) const { return g_host_cpu.GatherBase__PrepareForCompute(this, context, reinterpret_cast(p)); } Status UnsqueezeBase::PrepareCompute(OpKernelContext* ctx, UnsqueezeBase::Prepare& p) const { return g_host_cpu.UnsqueezeBase__PrepareCompute(this, ctx, reinterpret_cast(p)); } +Status TransposeBase::DoTranspose(const gsl::span& permutations, const Tensor& input, Tensor& output, + const TensorShape* input_shape_override, + concurrency::ThreadPool* tp) { + return g_host_cpu.TransposeBase__DoTranspose(permutations, input, output, input_shape_override, tp); +} + #if defined(USE_CUDA) || defined(USE_ROCM) bool TileOp::IsTileMemcpy(const TensorShape& input_shape, const int64_t* repeats, size_t rank, bool& is_batched_memcpy, size_t& num_of_elements_per_batch, size_t& num_of_copies_per_batch, size_t& num_of_batch_copies) { return g_host_cpu.TileOp__IsTileMemcpy(input_shape, repeats, rank, is_batched_memcpy, num_of_elements_per_batch, num_of_copies_per_batch, num_of_batch_copies); From d0f64dc1d69395e5408037c012d775c8e528c11e Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Tue, 10 Dec 2024 11:05:36 -0800 Subject: [PATCH 05/64] Add TypeProto_Tensor_has_elem_type() to provider bridge --- onnxruntime/core/providers/shared_library/provider_interfaces.h | 1 + .../core/providers/shared_library/provider_wrappedtypes.h | 1 + onnxruntime/core/session/provider_bridge_ort.cc | 1 + 3 files changed, 3 insertions(+) diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index f9f2bb69a9d1a..dc3f5e60f2745 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -325,6 +325,7 @@ struct ProviderHost { virtual bool TypeProto_Tensor__has_shape(const ONNX_NAMESPACE::TypeProto_Tensor* p) = 0; virtual const ONNX_NAMESPACE::TensorShapeProto& TypeProto_Tensor__shape(const ONNX_NAMESPACE::TypeProto_Tensor* p) = 0; virtual ONNX_NAMESPACE::TensorShapeProto* TypeProto_Tensor__mutable_shape(ONNX_NAMESPACE::TypeProto_Tensor* p) = 0; + virtual bool TypeProto_Tensor__has_elem_type(const ONNX_NAMESPACE::TypeProto_Tensor* p) = 0; virtual int32_t TypeProto_Tensor__elem_type(const ONNX_NAMESPACE::TypeProto_Tensor* p) = 0; virtual void TypeProto_Tensor__set_elem_type(ONNX_NAMESPACE::TypeProto_Tensor* p, int32_t value) = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index a82ddfe64c64b..0efa3833a978b 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -304,6 +304,7 @@ struct TypeProto_Tensor final { bool has_shape() const { return g_host->TypeProto_Tensor__has_shape(this); } const TensorShapeProto& shape() const { return g_host->TypeProto_Tensor__shape(this); } TensorShapeProto* mutable_shape() { return g_host->TypeProto_Tensor__mutable_shape(this); } + bool has_elem_type() const { return g_host->TypeProto_Tensor__has_elem_type(this); } int32_t elem_type() const { return g_host->TypeProto_Tensor__elem_type(this); } void set_elem_type(int32_t value) { g_host->TypeProto_Tensor__set_elem_type(this, value); } diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index d55fd34d5a8f2..29ffd9487925b 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -423,6 +423,7 @@ struct ProviderHostImpl : ProviderHost { bool TypeProto_Tensor__has_shape(const ONNX_NAMESPACE::TypeProto_Tensor* p) override { return p->has_shape(); } const ONNX_NAMESPACE::TensorShapeProto& TypeProto_Tensor__shape(const ONNX_NAMESPACE::TypeProto_Tensor* p) override { return p->shape(); } ONNX_NAMESPACE::TensorShapeProto* TypeProto_Tensor__mutable_shape(ONNX_NAMESPACE::TypeProto_Tensor* p) override { return p->mutable_shape(); } + bool TypeProto_Tensor__has_elem_type(const ONNX_NAMESPACE::TypeProto_Tensor* p) override { return p->has_elem_type(); } int32_t TypeProto_Tensor__elem_type(const ONNX_NAMESPACE::TypeProto_Tensor* p) override { return p->elem_type(); } void TypeProto_Tensor__set_elem_type(ONNX_NAMESPACE::TypeProto_Tensor* p, int32_t value) override { p->set_elem_type(value); }; From f8bd2f6338fdee7c739922aef79557f416c712b0 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Tue, 10 Dec 2024 12:51:26 -0800 Subject: [PATCH 06/64] Add to provider bridge: TensorTypeBase class, TensorTypeBase::GetElementType(), DataTypeImpl::TensorTypeFromONNXEnum() --- onnxruntime/core/providers/shared_library/provider_api.h | 1 + .../core/providers/shared_library/provider_interfaces.h | 5 +++++ .../core/providers/shared_library/provider_wrappedtypes.h | 7 +++++++ onnxruntime/core/session/provider_bridge_ort.cc | 7 +++++++ 4 files changed, 20 insertions(+) diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index 35014b39335f2..6e17947af3389 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -169,6 +169,7 @@ class OpKernel; struct OpKernelContext; struct OpKernelInfo; struct PrimitiveDataTypeBase; +struct TensorTypeBase; struct OrtRunOptions; struct Tensor; struct SparseTensor; diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index dc3f5e60f2745..dfe46e0ee32b5 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -677,6 +677,9 @@ struct ProviderHost { virtual int32_t PrimitiveDataTypeBase__GetNumSubElems(const PrimitiveDataTypeBase* p) = 0; virtual bool PrimitiveDataTypeBase__HasSubElems(const PrimitiveDataTypeBase* p) = 0; + // TensorTypeBase + virtual MLDataType TensorTypeBase__GetElementType(const TensorTypeBase* p) = 0; + // DataTypeImpl virtual MLDataType DataTypeImpl__GetType_Tensor() = 0; #if !defined(DISABLE_SPARSE_TENSORS) @@ -795,6 +798,8 @@ struct ProviderHost { virtual size_t DataTypeImpl__Size(const DataTypeImpl* p) = 0; virtual const PrimitiveDataTypeBase* DataTypeImpl__AsPrimitiveDataType(const DataTypeImpl* p) = 0; + virtual const TensorTypeBase* DataTypeImpl__TensorTypeFromONNXEnum(int type) = 0; + // Function virtual const Graph& Function__Body(const Function* p) = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index 0efa3833a978b..04818245d146f 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -703,6 +703,12 @@ struct PrimitiveDataTypeBase final { PROVIDER_DISALLOW_ALL(PrimitiveDataTypeBase) }; +struct TensorTypeBase final { + MLDataType GetElementType() const { return g_host->TensorTypeBase__GetElementType(this); } + + PROVIDER_DISALLOW_ALL(TensorTypeBase) +}; + class DataTypeImpl final { public: size_t Size() const { return g_host->DataTypeImpl__Size(this); } @@ -759,6 +765,7 @@ class DataTypeImpl final { const PrimitiveDataTypeBase* AsPrimitiveDataType() const { return g_host->DataTypeImpl__AsPrimitiveDataType(this); } + static const TensorTypeBase* TensorTypeFromONNXEnum(int type) { return g_host->DataTypeImpl__TensorTypeFromONNXEnum(type); } static const char* ToString(MLDataType type) { return g_host->DataTypeImpl__ToString(type); } PROVIDER_DISALLOW_ALL(DataTypeImpl) diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 29ffd9487925b..a3a12e1ba32a2 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -812,6 +812,9 @@ struct ProviderHostImpl : ProviderHost { int32_t PrimitiveDataTypeBase__GetNumSubElems(const PrimitiveDataTypeBase* p) override { return p->GetNumSubElems(); } bool PrimitiveDataTypeBase__HasSubElems(const PrimitiveDataTypeBase* p) override { return p->HasSubElems(); } + // TensorTypeBase (wrapped) + MLDataType TensorTypeBase__GetElementType(const TensorTypeBase* p) override { return p->GetElementType(); } + // DataTypeImpl (wrapped) MLDataType DataTypeImpl__GetType_Tensor() override { return DataTypeImpl::GetType(); } #if !defined(DISABLE_SPARSE_TENSORS) @@ -932,6 +935,10 @@ struct ProviderHostImpl : ProviderHost { size_t DataTypeImpl__Size(const DataTypeImpl* p) override { return p->Size(); } const PrimitiveDataTypeBase* DataTypeImpl__AsPrimitiveDataType(const DataTypeImpl* p) override { return p->AsPrimitiveDataType(); } + const TensorTypeBase* DataTypeImpl__TensorTypeFromONNXEnum(int type) override { + return DataTypeImpl::TensorTypeFromONNXEnum(type); + } + // Function (wrapped) const Graph& Function__Body(const Function* p) override { return p->Body(); } From 1f533a99a5eb9e6af955971b2f2c72c7d32e964f Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 11 Dec 2024 03:12:15 -0800 Subject: [PATCH 07/64] Transpose initializers within QNN EP without using CPU EP utils --- .../core/providers/cpu/cpu_provider_shared.cc | 8 - .../core/providers/cpu/cpu_provider_shared.h | 5 - .../qnn/builder/opbuilder/base_op_builder.cc | 229 ++++++++++++++---- .../qnn/builder/opbuilder/base_op_builder.h | 42 ++-- .../qnn/builder/opbuilder/conv_op_builder.cc | 26 +- .../qnn/builder/qnn_model_wrapper.cc | 4 +- .../core/providers/qnn/builder/qnn_utils.cc | 36 +++ .../core/providers/qnn/builder/qnn_utils.h | 28 +++ .../provider_bridge_provider.cc | 7 - 9 files changed, 284 insertions(+), 101 deletions(-) diff --git a/onnxruntime/core/providers/cpu/cpu_provider_shared.cc b/onnxruntime/core/providers/cpu/cpu_provider_shared.cc index 3778c89a00e19..ce9780031a250 100644 --- a/onnxruntime/core/providers/cpu/cpu_provider_shared.cc +++ b/onnxruntime/core/providers/cpu/cpu_provider_shared.cc @@ -23,7 +23,6 @@ #include "core/providers/cpu/tensor/slice.h" #include "core/providers/cpu/tensor/onehot.h" #include "core/providers/cpu/tensor/tile.h" -#include "core/providers/cpu/tensor/transpose.h" #include "core/providers/cpu/tensor/gather_elements.h" #include "core/providers/cpu/tensor/unsqueeze.h" #include "core/providers/cpu/tensor/upsamplebase.h" @@ -82,13 +81,6 @@ struct ProviderHostCPUImpl : ProviderHostCPU { Status NonMaxSuppressionBase__PrepareCompute(OpKernelContext* ctx, PrepareContext& pc) override { return NonMaxSuppressionBase::PrepareCompute(ctx, pc); } Status NonMaxSuppressionBase__GetThresholdsFromInputs(const PrepareContext& pc, int64_t& max_output_boxes_per_class, float& iou_threshold, float& score_threshold) override { return NonMaxSuppressionBase::GetThresholdsFromInputs(pc, max_output_boxes_per_class, iou_threshold, score_threshold); } - // TransposeBase (direct) - Status TransposeBase__DoTranspose(const gsl::span& permutations, const Tensor& input, Tensor& output, - const TensorShape* input_shape_override, - concurrency::ThreadPool* tp) override { - return TransposeBase::DoTranspose(permutations, input, output, input_shape_override, tp); - } - #if defined(USE_CUDA) || defined(USE_ROCM) // From cpu/tensor/size.h (direct) Status Size__Compute(const Size* p, OpKernelContext* context) override { return p->Size::Compute(context); } diff --git a/onnxruntime/core/providers/cpu/cpu_provider_shared.h b/onnxruntime/core/providers/cpu/cpu_provider_shared.h index ce7cd6155c38c..eb1569c3e499e 100644 --- a/onnxruntime/core/providers/cpu/cpu_provider_shared.h +++ b/onnxruntime/core/providers/cpu/cpu_provider_shared.h @@ -38,11 +38,6 @@ struct ProviderHostCPU { virtual Status NonMaxSuppressionBase__PrepareCompute(OpKernelContext* ctx, PrepareContext& pc) = 0; virtual Status NonMaxSuppressionBase__GetThresholdsFromInputs(const PrepareContext& pc, int64_t& max_output_boxes_per_class, float& iou_threshold, float& score_threshold) = 0; - // TransposeBase - virtual Status TransposeBase__DoTranspose(const gsl::span& permutations, const Tensor& input, Tensor& output, - const TensorShape* input_shape_override, - concurrency::ThreadPool* tp) = 0; - #if defined(USE_CUDA) || defined(USE_ROCM) // From cpu/tensor/size.h diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc index d9d83ba085859..06b02a5e5e31b 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc @@ -270,54 +270,199 @@ Status BaseOpBuilder::SetOutputQParamEqualToInputIfNearlyEqual(QnnModelWrapper& return Status::OK(); } -Status BaseOpBuilder::TransposeInitializer(const QnnModelWrapper& qnn_model_wrapper, - const onnx::TensorProto& initializer, - const std::vector& perm, - std::vector& transposed_data) const { - int32_t onnx_type = initializer.data_type(); - const DataTypeImpl* tensor_dtype = DataTypeImpl::TensorTypeFromONNXEnum(onnx_type)->GetElementType(); - const TensorShape in_tensor_shape = onnxruntime::utils::GetTensorShapeFromTensorProto(initializer); - - // Unpack initializer data into an input Tensor. - size_t tensor_data_size = Tensor::CalculateTensorStorageSize(tensor_dtype, in_tensor_shape); - std::vector input_tensor_data(tensor_data_size); - ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(initializer, - qnn_model_wrapper.GetGraphViewer().ModelPath(), - input_tensor_data)); - Tensor in_tensor(tensor_dtype, in_tensor_shape, input_tensor_data.data(), OrtMemoryInfo{}); - - // Determine the new transposed shape. - auto rank = perm.size(); - std::vector out_tensor_shape_dims; - out_tensor_shape_dims.reserve(rank); - for (size_t p : perm) { - out_tensor_shape_dims.push_back(in_tensor_shape[p]); +// Internal function to transpose input from either (N,C,H,W,D) or (C,N,H,W,D) to (H,W,D,C,N). +static Status TransposeToHwdcn(const TensorShape& input_shape, + gsl::span perm, + size_t elem_byte_size, + gsl::span input_buffer, + gsl::span output_buffer) { + const size_t rank = input_shape.NumDimensions(); + ORT_RETURN_IF_NOT(rank == 5 && perm.size() == 5, "Invalid input tensor rank"); + std::vector perm_inverse(perm.size()); + ORT_RETURN_IF_ERROR(qnn::utils::InvertPerm(perm, perm_inverse)); + + std::vector output_shape_dims(rank); + ORT_RETURN_IF_ERROR((qnn::utils::PermuteShape(input_shape.GetDims(), perm, output_shape_dims))); + const TensorShape output_shape = TensorShape::FromExistingBuffer(output_shape_dims); + + std::array src_strides = {}; + for (size_t i = 0; i < rank; ++i) { + int64_t stride = (i < rank - 1) ? input_shape.SizeFromDimension(i + 1) : 1; + ORT_RETURN_IF_NOT(stride > 0, "Expected positive shape dims when computing strides."); + src_strides[i] = static_cast(stride); } - const TensorShape out_tensor_shape = TensorShape::FromExistingBuffer(out_tensor_shape_dims); - - // Create an output tensor that does not own the pre-allocated `transposed_data` buffer. - // DoTranspose() will write the new transposed elements directly into the `transposed_data` buffer. - // We do this to eliminate unnecessary weight copies. - transposed_data.resize(tensor_data_size); - Tensor out_tensor(tensor_dtype, out_tensor_shape, transposed_data.data(), OrtMemoryInfo{}); - ORT_RETURN_IF_ERROR(TransposeBase::DoTranspose(perm, in_tensor, out_tensor)); - - // If this is an int4, we need to unpack it because QNN treats int4 as a full int8. - // TODO: Reduce copies for INT4! Transpose::DoTranspose() internally copies Tensor to Tensor, - // does the transpose in 8-bits, and then copies the result back to a new Tensor. Afterwards, QNN EP unpacks - // the new Tensor back to 8-bits. This is wasteful. A better approach would be for QNN EP to do the following: - // - Explicitly unpack Tensor to Tensor in QNN EP. - // - Call Transpose::DoTranspose() with the Tensor. This generates a new transposed Tensor. - // - Clear the top 4-bits to zero for every int8 element in the transposed Tensor. [ONLY if signed int4] - if (onnx_type == ONNX_NAMESPACE::TensorProto_DataType_INT4) { - ORT_RETURN_IF_ERROR(qnn::utils::UnpackInt4ToInt8(out_tensor_shape.Size(), transposed_data)); - } else if (onnx_type == ONNX_NAMESPACE::TensorProto_DataType_UINT4) { - ORT_RETURN_IF_ERROR(qnn::utils::UnpackInt4ToInt8(out_tensor_shape.Size(), transposed_data)); + + std::array dst_strides = {}; + for (size_t i = 0; i < rank; ++i) { + int64_t stride = (i < rank - 1) ? output_shape.SizeFromDimension(i + 1) : 1; + ORT_RETURN_IF_NOT(stride > 0, "Expected positive shape dims when computing strides."); + dst_strides[i] = static_cast(stride); + } + + for (int64_t d0 = 0; d0 < input_shape[0]; ++d0) { + for (int64_t d1 = 0; d1 < input_shape[1]; ++d1) { + for (int64_t d2 = 0; d2 < input_shape[2]; ++d2) { + for (int64_t d3 = 0; d3 < input_shape[3]; ++d3) { + for (int64_t d4 = 0; d4 < input_shape[4]; ++d4) { + const size_t src_elem_index = ((d0 * src_strides[0]) + + (d1 * src_strides[1]) + + (d2 * src_strides[2]) + + (d3 * src_strides[3]) + + (d4 * src_strides[4])); + const size_t dst_elem_index = ((d0 * dst_strides[perm_inverse[0]]) + + (d1 * dst_strides[perm_inverse[1]]) + + (d2 * dst_strides[perm_inverse[2]]) + + (d3 * dst_strides[perm_inverse[3]]) + + (d4 * dst_strides[perm_inverse[4]])); + + const size_t src_byte_index = src_elem_index * elem_byte_size; + const size_t dst_byte_index = dst_elem_index * elem_byte_size; + assert(src_byte_index < input_buffer.size()); + assert(dst_byte_index < output_buffer.size()); + + std::memcpy(&output_buffer[dst_byte_index], &input_buffer[src_byte_index], elem_byte_size); + } + } + } + } + } + + return Status::OK(); +} + +Status BaseOpBuilder::TwoDimensionTranspose(const QnnModelWrapper& qnn_model_wrapper, + std::vector& data_shape, + const onnx::TensorProto& initializer, + std::vector& transposed_data) const { + ORT_RETURN_IF_NOT(data_shape.size() == 2, "Expected shape of rank 2"); + + std::array perm = {1, 0}; + std::vector output_shape(data_shape.size()); + ORT_RETURN_IF_ERROR((qnn::utils::PermuteShape(data_shape, perm, output_shape))); + + auto onnx_type = static_cast(initializer.data_type()); + const size_t elem_byte_size = qnn::utils::GetElementSizeByType(onnx_type); + ORT_RETURN_IF_NOT(elem_byte_size != 0, "Can't get element byte size from given ONNX type"); + + std::vector input_buffer; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(initializer, input_buffer)); + transposed_data.resize(input_buffer.size()); + + for (size_t row = 0; row < data_shape[0]; row++) { + for (size_t col = 0; col < data_shape[1]; col++) { + const size_t src_elem_index = (row * data_shape[1] + col); + const size_t dst_elem_index = (col * output_shape[1] + row); + const size_t src_byte_index = src_elem_index * elem_byte_size; + const size_t dst_byte_index = dst_elem_index * elem_byte_size; + assert(src_byte_index < input_buffer.size()); + assert(dst_byte_index < transposed_data.size()); + + std::memcpy(&transposed_data[dst_byte_index], &input_buffer[src_byte_index], elem_byte_size); + } } + data_shape = std::move(output_shape); // Update parameter with final transposed shape return Status::OK(); } +Status BaseOpBuilder::TransposeFromNchwToHwcn(const QnnModelWrapper& qnn_model_wrapper, + const onnx::TensorProto& initializer, + std::vector& transposed_data, + bool is_3d) const { + auto onnx_type = static_cast(initializer.data_type()); + const size_t elem_byte_size = qnn::utils::GetElementSizeByType(onnx_type); + ORT_RETURN_IF_NOT(elem_byte_size != 0, "Can't get element byte size from given ONNX type"); + + std::vector input_shape = qnn::utils::GetInitializerShape(initializer); + ORT_RETURN_IF_NOT((is_3d && input_shape.size() == 5) || (!is_3d && input_shape.size() == 4), + "Unexpected rank: only support rank 4 or rank 5 input shapes"); + + if (!is_3d) { + input_shape.push_back(1); // Make it 3D by making shape (N,C,H,W,1) + } + + std::vector input_buffer; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(initializer, input_buffer)); + transposed_data.resize(input_buffer.size()); + + return TransposeToHwdcn(TensorShape::FromExistingBuffer(input_shape), + nchw2hwcn_perm_3d, + elem_byte_size, + input_buffer, + transposed_data); +} + +Status BaseOpBuilder::TransposeFromNchwToHwcn(std::vector input_shape_dims, + size_t elem_byte_size, + gsl::span input_buffer, + gsl::span output_buffer, + bool is_3d) const { + const size_t rank = input_shape_dims.size(); + ORT_RETURN_IF_NOT((is_3d && rank == 5) || (!is_3d && rank == 4), "Invalid input tensor rank"); + ORT_RETURN_IF_NOT(input_buffer.size() == output_buffer.size(), + "Expected input_buffer.size() == output_buffer.size()"); + ORT_RETURN_IF_NOT(elem_byte_size != 0, "Can't get element byte size from given ONNX type"); + + if (!is_3d) { + input_shape_dims.push_back(1); // Make it 3D by making shape (N,C,H,W,1) + } + + return TransposeToHwdcn(TensorShape::FromExistingBuffer(input_shape_dims), + nchw2hwcn_perm_3d, + elem_byte_size, + input_buffer, + output_buffer); +} + +Status BaseOpBuilder::TransposeFromCnhwToHwcn(const QnnModelWrapper& qnn_model_wrapper, + const onnx::TensorProto& initializer, + std::vector& transposed_data, + bool is_3d) const { + auto onnx_type = static_cast(initializer.data_type()); + const size_t elem_byte_size = qnn::utils::GetElementSizeByType(onnx_type); + ORT_RETURN_IF_NOT(elem_byte_size != 0, "Can't get element byte size from given ONNX type"); + + std::vector input_shape = qnn::utils::GetInitializerShape(initializer); + ORT_RETURN_IF_NOT((is_3d && input_shape.size() == 5) || (!is_3d && input_shape.size() == 4), + "Unexpected rank: only support rank 4 or rank 5 input shapes"); + + if (!is_3d) { + input_shape.push_back(1); // Make it 3D by making shape (C,N,H,W,1) + } + + std::vector input_buffer; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(initializer, input_buffer)); + transposed_data.resize(input_buffer.size()); + + return TransposeToHwdcn(TensorShape::FromExistingBuffer(input_shape), + cnhw2hwcn_perm_3d, + elem_byte_size, + input_buffer, + transposed_data); +} + +Status BaseOpBuilder::TransposeFromCnhwToHwcn(std::vector input_shape_dims, + size_t elem_byte_size, + gsl::span input_buffer, + gsl::span output_buffer, + bool is_3d) const { + const size_t rank = input_shape_dims.size(); + ORT_RETURN_IF_NOT((is_3d && rank == 5) || (!is_3d && rank == 4), "Invalid input tensor rank"); + ORT_RETURN_IF_NOT(input_buffer.size() == output_buffer.size(), + "Expected input_buffer.size() == output_buffer.size()"); + ORT_RETURN_IF_NOT(elem_byte_size != 0, "Can't get element byte size from given ONNX type"); + + if (!is_3d) { + input_shape_dims.push_back(1); // Make it 3D by making shape (C,N,H,W,1) + } + + return TransposeToHwdcn(TensorShape::FromExistingBuffer(input_shape_dims), + cnhw2hwcn_perm_3d, + elem_byte_size, + input_buffer, + output_buffer); +} + Status BaseOpBuilder::ProcessAxisAttribute(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, Qnn_Scalar_t& axis_qnn_scalar, diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h index 20d3bac5964b7..b2bb3f043eecd 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h @@ -215,7 +215,8 @@ class BaseOpBuilder : public IOpBuilder { } // NCHW shape to channel last - Status NchwShapeToNhwc(const std::vector& nchw_shape, std::vector& nhwc_shape) const { + template + Status NchwShapeToNhwc(gsl::span nchw_shape, gsl::span nhwc_shape) const { ORT_RETURN_IF_NOT(nchw_shape.size() == 4, "shape should have 4 dimension NCHW."); nhwc_shape[0] = nchw_shape[0]; nhwc_shape[1] = nchw_shape[2]; @@ -226,7 +227,8 @@ class BaseOpBuilder : public IOpBuilder { } // NCHW shape to HWCN shape, required for Conv weight - Status NchwShapeToHwcn(const std::vector& nchw_shape, std::vector& hwcn_shape) const { + template + Status NchwShapeToHwcn(gsl::span nchw_shape, gsl::span hwcn_shape) const { if (nchw_shape.size() == 4) { hwcn_shape[0] = nchw_shape[2]; hwcn_shape[1] = nchw_shape[3]; @@ -246,7 +248,8 @@ class BaseOpBuilder : public IOpBuilder { } // CNHW shape to HWCN shape, required for Conv weight - Status CnhwShapeToHwcn(const std::vector& cnhw_shape, std::vector& hwcn_shape) const { + template + Status CnhwShapeToHwcn(gsl::span cnhw_shape, gsl::span hwcn_shape) const { if (cnhw_shape.size() == 4) { hwcn_shape[0] = cnhw_shape[2]; hwcn_shape[1] = cnhw_shape[3]; @@ -264,37 +267,32 @@ class BaseOpBuilder : public IOpBuilder { return Status::OK(); } - Status TransposeInitializer(const QnnModelWrapper& qnn_model_wrapper, - const onnx::TensorProto& initializer, - const std::vector& perm, - std::vector& transposed_data) const; Status TransposeFromNchwToHwcn(const QnnModelWrapper& qnn_model_wrapper, const onnx::TensorProto& initializer, std::vector& transposed_data, - bool is_3d = false) const { - auto& perm = is_3d ? nchw2hwcn_perm_3d : nchw2hwcn_perm; - return TransposeInitializer(qnn_model_wrapper, initializer, perm, transposed_data); - } + bool is_3d = false) const; + + Status TransposeFromNchwToHwcn(std::vector input_shape_dims, + size_t elem_byte_size, + gsl::span input_buffer, + gsl::span output_buffer, + bool is_3d = false) const; Status TransposeFromCnhwToHwcn(const QnnModelWrapper& qnn_model_wrapper, const onnx::TensorProto& initializer, std::vector& transposed_data, - bool is_3d = false) const { - auto& perm = is_3d ? cnhw2hwcn_perm_3d : cnhw2hwcn_perm; - return TransposeInitializer(qnn_model_wrapper, initializer, perm, transposed_data); - } + bool is_3d = false) const; + Status TransposeFromCnhwToHwcn(std::vector input_shape_dims, + size_t elem_byte_size, + gsl::span input_buffer, + gsl::span output_buffer, + bool is_3d = false) const; Status TwoDimensionTranspose(const QnnModelWrapper& qnn_model_wrapper, std::vector& data_shape, const onnx::TensorProto& initializer, - std::vector& transposed_data) const { - auto tmp = data_shape[0]; - data_shape[0] = data_shape[1]; - data_shape[1] = tmp; - std::vector two_dim_trans_perm{1, 0}; - return TransposeInitializer(qnn_model_wrapper, initializer, two_dim_trans_perm, transposed_data); - } + std::vector& transposed_data) const; // Onnx Pads is [x1_begin, x2_begin, x1_end, x2_end], QNN requires [x1_begin, x1_end, x2_begin, x2_end] void ReArranagePads(std::vector& pads) const { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc index 2aeb8a47000c2..f0c6f53affecd 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc @@ -210,9 +210,9 @@ Status ConvOpBuilder::ProcessConv2D3DInputs(QnnModelWrapper& qnn_model_wrapper, // Change shape to HWCN, it could be initializer or normal input if (conv_type == OnnxConvType::kConv) { - ORT_RETURN_IF_ERROR(NchwShapeToHwcn(input_info.shape, actual_shape)); + ORT_RETURN_IF_ERROR(NchwShapeToHwcn(input_info.shape, actual_shape)); } else if (conv_type == OnnxConvType::kConvTranspose) { - ORT_RETURN_IF_ERROR(CnhwShapeToHwcn(input_info.shape, actual_shape)); + ORT_RETURN_IF_ERROR(CnhwShapeToHwcn(input_info.shape, actual_shape)); } else { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN EP: Unexpected convolution op type: ", node_unit.OpType().c_str()); } @@ -412,9 +412,9 @@ Status ConvOpBuilder::ProcessConv1DInputs(QnnModelWrapper& qnn_model_wrapper, // Create the final shape after the weights are transposed to HWCN. if (conv_type == OnnxConvType::kConv) { - ORT_RETURN_IF_ERROR(NchwShapeToHwcn(shape_2d, final_shape)); + ORT_RETURN_IF_ERROR(NchwShapeToHwcn(shape_2d, final_shape)); } else if (conv_type == OnnxConvType::kConvTranspose) { - ORT_RETURN_IF_ERROR(CnhwShapeToHwcn(shape_2d, final_shape)); + ORT_RETURN_IF_ERROR(CnhwShapeToHwcn(shape_2d, final_shape)); } else { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN EP: Unexpected convolution op type: ", node_unit.OpType().c_str()); } @@ -433,15 +433,11 @@ Status ConvOpBuilder::ProcessConv1DInputs(QnnModelWrapper& qnn_model_wrapper, return static_cast(dim); }); - const TensorShape tensor_shape = TensorShape::FromExistingBuffer(shape_2d_int64); // Does not own shape data. - const DataTypeImpl* tensor_dtype = DataTypeImpl::TensorTypeFromONNXEnum( - input_info.initializer_tensor->data_type()) - ->GetElementType(); - ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info.initializer_tensor, unpacked_tensor)); - - Tensor tensor_2d(tensor_dtype, tensor_shape, unpacked_tensor.data(), OrtMemoryInfo{}); // Does not own data. - ONNX_NAMESPACE::TensorProto reshaped_initializer = onnxruntime::utils::TensorToTensorProto(tensor_2d, - reshape_output); + std::vector original_tensor_bytes; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info.initializer_tensor, original_tensor_bytes)); + unpacked_tensor.resize(original_tensor_bytes.size()); + size_t elem_byte_size = qnn::utils::GetElementSizeByType( + static_cast(input_info.initializer_tensor->data_type())); // The reshape (unsqueeze) may require us to shift the quant parameter's axis. if (input_info.quant_param.IsPerChannel()) { @@ -452,9 +448,9 @@ Status ConvOpBuilder::ProcessConv1DInputs(QnnModelWrapper& qnn_model_wrapper, // Get transposed initializer bytes. // if (conv_type == OnnxConvType::kConv) { - ORT_RETURN_IF_ERROR(TransposeFromNchwToHwcn(qnn_model_wrapper, reshaped_initializer, unpacked_tensor)); + ORT_RETURN_IF_ERROR(TransposeFromNchwToHwcn(shape_2d_int64, elem_byte_size, original_tensor_bytes, unpacked_tensor)); } else if (conv_type == OnnxConvType::kConvTranspose) { - ORT_RETURN_IF_ERROR(TransposeFromCnhwToHwcn(qnn_model_wrapper, reshaped_initializer, unpacked_tensor)); + ORT_RETURN_IF_ERROR(TransposeFromCnhwToHwcn(shape_2d_int64, elem_byte_size, original_tensor_bytes, unpacked_tensor)); } else { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN EP: Unexpected convolution op type: ", node_unit.OpType().c_str()); } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc index a6bd17e75b6c0..e7f4c37eaef66 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc @@ -623,11 +623,11 @@ Status QnnModelWrapper::UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& // If this is an int4, we need to unpack it because QNN treats int4 as a full int8. if (onnx_data_type == ONNX_NAMESPACE::TensorProto_DataType_INT4) { - TensorShape shape = onnxruntime::utils::GetTensorShapeFromTensorProto(initializer); + TensorShape shape(qnn::utils::GetInitializerShape(initializer)); const size_t num_int4_elems = shape.Size(); ORT_RETURN_IF_ERROR(qnn::utils::UnpackInt4ToInt8(num_int4_elems, unpacked_tensor)); } else if (onnx_data_type == ONNX_NAMESPACE::TensorProto_DataType_UINT4) { - TensorShape shape = onnxruntime::utils::GetTensorShapeFromTensorProto(initializer); + TensorShape shape(qnn::utils::GetInitializerShape(initializer)); const size_t num_uint4_elems = shape.Size(); ORT_RETURN_IF_ERROR(qnn::utils::UnpackInt4ToInt8(num_uint4_elems, unpacked_tensor)); } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc index 9457877ddfc93..f6c30e85b53f9 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc @@ -65,6 +65,42 @@ size_t GetElementSizeByType(ONNXTensorElementDataType elem_type) { return pos->second; } +size_t GetElementSizeByType(ONNX_NAMESPACE::TensorProto_DataType onnx_type) { + switch (onnx_type) { + case ONNX_NAMESPACE::TensorProto_DataType_INT4: + return sizeof(Int4x2); + case ONNX_NAMESPACE::TensorProto_DataType_UINT4: + return sizeof(UInt4x2); + case ONNX_NAMESPACE::TensorProto_DataType_INT8: + return sizeof(int8_t); + case ONNX_NAMESPACE::TensorProto_DataType_UINT8: + return sizeof(uint8_t); + case ONNX_NAMESPACE::TensorProto_DataType_INT16: + return sizeof(int16_t); + case ONNX_NAMESPACE::TensorProto_DataType_UINT16: + return sizeof(uint16_t); + case ONNX_NAMESPACE::TensorProto_DataType_INT32: + return sizeof(int32_t); + case ONNX_NAMESPACE::TensorProto_DataType_UINT32: + return sizeof(uint32_t); + case ONNX_NAMESPACE::TensorProto_DataType_INT64: + return sizeof(int64_t); + case ONNX_NAMESPACE::TensorProto_DataType_UINT64: + return sizeof(uint64_t); + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: + return 2; + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: + return sizeof(float); + case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: + return sizeof(double); + case ONNX_NAMESPACE::TensorProto_DataType_BOOL: + return sizeof(bool); + default: + return 0; + } + // Unreachable +} + std::ostream& operator<<(std::ostream& out, const Qnn_Scalar_t& scalar) { switch (scalar.dataType) { case QNN_DATATYPE_INT_8: diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.h b/onnxruntime/core/providers/qnn/builder/qnn_utils.h index 11ecf57ada357..c76c99b8454ef 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.h @@ -11,6 +11,7 @@ #include "QnnTypes.h" #include "core/session/onnxruntime_cxx_api.h" #include "core/framework/node_unit.h" +#include "core/framework/tensor_shape.h" #include "core/util/qmath.h" namespace onnxruntime { @@ -22,6 +23,8 @@ size_t GetElementSizeByType(const Qnn_DataType_t& data_type); size_t GetElementSizeByType(ONNXTensorElementDataType elem_type); +size_t GetElementSizeByType(ONNX_NAMESPACE::TensorProto_DataType onnx_type); + // TODO: make these work with Wrappers? std::ostream& operator<<(std::ostream& out, const Qnn_Param_t& qnn_param); std::ostream& operator<<(std::ostream& out, const Qnn_Tensor_t& tensor); @@ -133,6 +136,31 @@ Status UnpackInt4ToInt8(size_t num_int4_elems, std::vector& data_bytes) return Status::OK(); } +template +std::vector GetInitializerShape(const ONNX_NAMESPACE::TensorProto& tensor_proto) { + const auto& dims = tensor_proto.dims(); + std::vector tensor_shape_vec(static_cast(dims.size())); + for (int i = 0; i < dims.size(); ++i) { + tensor_shape_vec[i] = static_cast(dims[i]); + } + + return tensor_shape_vec; +} + +template +Status PermuteShape(gsl::span input_shape, gsl::span perm, gsl::span output_shape) { + const size_t rank = input_shape.size(); + ORT_RETURN_IF_NOT(rank == perm.size() && rank == output_shape.size(), + "PermuteShape(): expect all arguments to have the same rank."); + + for (size_t i = 0; i < rank; ++i) { + size_t p = static_cast(perm[i]); + output_shape[i] = input_shape[p]; + } + + return Status::OK(); +} + /** * Wrapping onnxruntime::Node for retrieving attribute values */ diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc index 92e5eb1ed5eb0..d3b12f9728135 100644 --- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc +++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc @@ -23,7 +23,6 @@ #include "core/providers/cpu/tensor/split.h" #include "core/providers/cpu/tensor/size.h" #include "core/providers/cpu/tensor/scatter_nd.h" -#include "core/providers/cpu/tensor/transpose.h" #include "core/providers/cpu/tensor/unsqueeze.h" #include "core/providers/cpu/tensor/upsamplebase.h" #include "core/providers/cpu/tensor/tile.h" @@ -514,12 +513,6 @@ Status NonMaxSuppressionBase::GetThresholdsFromInputs(const PrepareContext& pc, Status GatherBase::PrepareForCompute(OpKernelContext* context, GatherBase::Prepare& p) const { return g_host_cpu.GatherBase__PrepareForCompute(this, context, reinterpret_cast(p)); } Status UnsqueezeBase::PrepareCompute(OpKernelContext* ctx, UnsqueezeBase::Prepare& p) const { return g_host_cpu.UnsqueezeBase__PrepareCompute(this, ctx, reinterpret_cast(p)); } -Status TransposeBase::DoTranspose(const gsl::span& permutations, const Tensor& input, Tensor& output, - const TensorShape* input_shape_override, - concurrency::ThreadPool* tp) { - return g_host_cpu.TransposeBase__DoTranspose(permutations, input, output, input_shape_override, tp); -} - #if defined(USE_CUDA) || defined(USE_ROCM) bool TileOp::IsTileMemcpy(const TensorShape& input_shape, const int64_t* repeats, size_t rank, bool& is_batched_memcpy, size_t& num_of_elements_per_batch, size_t& num_of_copies_per_batch, size_t& num_of_batch_copies) { return g_host_cpu.TileOp__IsTileMemcpy(input_shape, repeats, rank, is_batched_memcpy, num_of_elements_per_batch, num_of_copies_per_batch, num_of_batch_copies); From ccaefb3ed899a2e044bd288faa0e28cf34cdf1a2 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 11 Dec 2024 03:17:57 -0800 Subject: [PATCH 08/64] Rename transpose func --- .../qnn/builder/opbuilder/base_op_builder.cc | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc index 06b02a5e5e31b..7690427416770 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc @@ -271,11 +271,11 @@ Status BaseOpBuilder::SetOutputQParamEqualToInputIfNearlyEqual(QnnModelWrapper& } // Internal function to transpose input from either (N,C,H,W,D) or (C,N,H,W,D) to (H,W,D,C,N). -static Status TransposeToHwdcn(const TensorShape& input_shape, - gsl::span perm, - size_t elem_byte_size, - gsl::span input_buffer, - gsl::span output_buffer) { +static Status TransposeDataRank5(const TensorShape& input_shape, + gsl::span perm, + size_t elem_byte_size, + gsl::span input_buffer, + gsl::span output_buffer) { const size_t rank = input_shape.NumDimensions(); ORT_RETURN_IF_NOT(rank == 5 && perm.size() == 5, "Invalid input tensor rank"); std::vector perm_inverse(perm.size()); @@ -385,11 +385,11 @@ Status BaseOpBuilder::TransposeFromNchwToHwcn(const QnnModelWrapper& qnn_model_w ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(initializer, input_buffer)); transposed_data.resize(input_buffer.size()); - return TransposeToHwdcn(TensorShape::FromExistingBuffer(input_shape), - nchw2hwcn_perm_3d, - elem_byte_size, - input_buffer, - transposed_data); + return TransposeDataRank5(TensorShape::FromExistingBuffer(input_shape), + nchw2hwcn_perm_3d, + elem_byte_size, + input_buffer, + transposed_data); } Status BaseOpBuilder::TransposeFromNchwToHwcn(std::vector input_shape_dims, @@ -407,11 +407,11 @@ Status BaseOpBuilder::TransposeFromNchwToHwcn(std::vector input_shape_d input_shape_dims.push_back(1); // Make it 3D by making shape (N,C,H,W,1) } - return TransposeToHwdcn(TensorShape::FromExistingBuffer(input_shape_dims), - nchw2hwcn_perm_3d, - elem_byte_size, - input_buffer, - output_buffer); + return TransposeDataRank5(TensorShape::FromExistingBuffer(input_shape_dims), + nchw2hwcn_perm_3d, + elem_byte_size, + input_buffer, + output_buffer); } Status BaseOpBuilder::TransposeFromCnhwToHwcn(const QnnModelWrapper& qnn_model_wrapper, @@ -434,11 +434,11 @@ Status BaseOpBuilder::TransposeFromCnhwToHwcn(const QnnModelWrapper& qnn_model_w ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(initializer, input_buffer)); transposed_data.resize(input_buffer.size()); - return TransposeToHwdcn(TensorShape::FromExistingBuffer(input_shape), - cnhw2hwcn_perm_3d, - elem_byte_size, - input_buffer, - transposed_data); + return TransposeDataRank5(TensorShape::FromExistingBuffer(input_shape), + cnhw2hwcn_perm_3d, + elem_byte_size, + input_buffer, + transposed_data); } Status BaseOpBuilder::TransposeFromCnhwToHwcn(std::vector input_shape_dims, @@ -456,11 +456,11 @@ Status BaseOpBuilder::TransposeFromCnhwToHwcn(std::vector input_shape_d input_shape_dims.push_back(1); // Make it 3D by making shape (C,N,H,W,1) } - return TransposeToHwdcn(TensorShape::FromExistingBuffer(input_shape_dims), - cnhw2hwcn_perm_3d, - elem_byte_size, - input_buffer, - output_buffer); + return TransposeDataRank5(TensorShape::FromExistingBuffer(input_shape_dims), + cnhw2hwcn_perm_3d, + elem_byte_size, + input_buffer, + output_buffer); } Status BaseOpBuilder::ProcessAxisAttribute(const QnnModelWrapper& qnn_model_wrapper, From fb765c7f237524b44c5fe68a6de6b43c7af500f3 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 11 Dec 2024 03:22:05 -0800 Subject: [PATCH 09/64] Remove TransposeBase forward declaration from provider bridge --- onnxruntime/core/providers/shared_library/provider_api.h | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index 6e17947af3389..d68dbf8ce3ec5 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -185,7 +185,6 @@ class GatherBase; class Size; class SliceBase; class SplitBase; -class TransposeBase; class TensorShape; struct Prepare; struct PrepareContext; From 0237bca5a5f61e6be8ad674e3aac4996cc45c07e Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 11 Dec 2024 03:41:59 -0800 Subject: [PATCH 10/64] Rewrite SliceOpBuilder util GetInitializerInputData() to not use functions not available in the provider bridge. --- .../qnn/builder/opbuilder/slice_op_builder.cc | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc index 13b106d3c1bde..e383e71d2a497 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc @@ -85,26 +85,22 @@ static Status GetInitializerInputData(const NodeUnitIODef& input, const QnnModel ORT_RETURN_IF_NOT(initializer_proto->has_data_type(), "Expected initializer ", input_name.c_str(), " to have a proto data type."); - // Create empty Tensor. - const auto* dtype = DataTypeImpl::TensorTypeFromONNXEnum(initializer_proto->data_type())->GetElementType(); - TensorShape shape = onnxruntime::utils::GetTensorShapeFromTensorProto(*initializer_proto); - Tensor tensor(dtype, shape, std::make_shared()); - - // Deserialize initializer into Tensor. - ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor( - onnxruntime::Env::Default(), qnn_model_wrapper.GetGraphViewer().ModelPath(), *initializer_proto, tensor)); + // Deserialize initializer into byte buffer + std::vector initializer_bytes; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*initializer_proto, initializer_bytes)); Status status; // Copy Tensor of int32_t or int64_t elems into output (int64_ts). - if (tensor.IsDataType()) { - gsl::span tensor_elems = tensor.DataAsSpan(); + auto onnx_type = static_cast(initializer_proto->data_type()); + if (onnx_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) { + gsl::span tensor_elems = ReinterpretAsSpan(initializer_bytes); output.insert(output.end(), tensor_elems.begin(), tensor_elems.end()); - } else if (tensor.IsDataType()) { - gsl::span tensor_elems = tensor.DataAsSpan(); + } else if (onnx_type == ONNX_NAMESPACE::TensorProto_DataType_INT32) { + gsl::span tensor_elems = ReinterpretAsSpan(initializer_bytes); output.insert(output.end(), tensor_elems.begin(), tensor_elems.end()); } else { - status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Data type ", DataTypeImpl::ToString(dtype), + status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Data type ", onnx_type, " is not supported for Slice initializer input ", input.node_arg.Name().c_str()); } From e3705b260a67a66c4c71bfa3020b8b6af537e842 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 11 Dec 2024 03:50:40 -0800 Subject: [PATCH 11/64] Revert addition of TensorTypeBase to provider bridge --- onnxruntime/core/providers/shared_library/provider_api.h | 1 - .../core/providers/shared_library/provider_interfaces.h | 5 ----- .../core/providers/shared_library/provider_wrappedtypes.h | 7 ------- onnxruntime/core/session/provider_bridge_ort.cc | 7 ------- 4 files changed, 20 deletions(-) diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index d68dbf8ce3ec5..b84825236a453 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -169,7 +169,6 @@ class OpKernel; struct OpKernelContext; struct OpKernelInfo; struct PrimitiveDataTypeBase; -struct TensorTypeBase; struct OrtRunOptions; struct Tensor; struct SparseTensor; diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index dfe46e0ee32b5..dc3f5e60f2745 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -677,9 +677,6 @@ struct ProviderHost { virtual int32_t PrimitiveDataTypeBase__GetNumSubElems(const PrimitiveDataTypeBase* p) = 0; virtual bool PrimitiveDataTypeBase__HasSubElems(const PrimitiveDataTypeBase* p) = 0; - // TensorTypeBase - virtual MLDataType TensorTypeBase__GetElementType(const TensorTypeBase* p) = 0; - // DataTypeImpl virtual MLDataType DataTypeImpl__GetType_Tensor() = 0; #if !defined(DISABLE_SPARSE_TENSORS) @@ -798,8 +795,6 @@ struct ProviderHost { virtual size_t DataTypeImpl__Size(const DataTypeImpl* p) = 0; virtual const PrimitiveDataTypeBase* DataTypeImpl__AsPrimitiveDataType(const DataTypeImpl* p) = 0; - virtual const TensorTypeBase* DataTypeImpl__TensorTypeFromONNXEnum(int type) = 0; - // Function virtual const Graph& Function__Body(const Function* p) = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index 04818245d146f..0efa3833a978b 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -703,12 +703,6 @@ struct PrimitiveDataTypeBase final { PROVIDER_DISALLOW_ALL(PrimitiveDataTypeBase) }; -struct TensorTypeBase final { - MLDataType GetElementType() const { return g_host->TensorTypeBase__GetElementType(this); } - - PROVIDER_DISALLOW_ALL(TensorTypeBase) -}; - class DataTypeImpl final { public: size_t Size() const { return g_host->DataTypeImpl__Size(this); } @@ -765,7 +759,6 @@ class DataTypeImpl final { const PrimitiveDataTypeBase* AsPrimitiveDataType() const { return g_host->DataTypeImpl__AsPrimitiveDataType(this); } - static const TensorTypeBase* TensorTypeFromONNXEnum(int type) { return g_host->DataTypeImpl__TensorTypeFromONNXEnum(type); } static const char* ToString(MLDataType type) { return g_host->DataTypeImpl__ToString(type); } PROVIDER_DISALLOW_ALL(DataTypeImpl) diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index a3a12e1ba32a2..29ffd9487925b 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -812,9 +812,6 @@ struct ProviderHostImpl : ProviderHost { int32_t PrimitiveDataTypeBase__GetNumSubElems(const PrimitiveDataTypeBase* p) override { return p->GetNumSubElems(); } bool PrimitiveDataTypeBase__HasSubElems(const PrimitiveDataTypeBase* p) override { return p->HasSubElems(); } - // TensorTypeBase (wrapped) - MLDataType TensorTypeBase__GetElementType(const TensorTypeBase* p) override { return p->GetElementType(); } - // DataTypeImpl (wrapped) MLDataType DataTypeImpl__GetType_Tensor() override { return DataTypeImpl::GetType(); } #if !defined(DISABLE_SPARSE_TENSORS) @@ -935,10 +932,6 @@ struct ProviderHostImpl : ProviderHost { size_t DataTypeImpl__Size(const DataTypeImpl* p) override { return p->Size(); } const PrimitiveDataTypeBase* DataTypeImpl__AsPrimitiveDataType(const DataTypeImpl* p) override { return p->AsPrimitiveDataType(); } - const TensorTypeBase* DataTypeImpl__TensorTypeFromONNXEnum(int type) override { - return DataTypeImpl::TensorTypeFromONNXEnum(type); - } - // Function (wrapped) const Graph& Function__Body(const Function* p) override { return p->Body(); } From 6f0b3c61330adfc5b41261e4f08cda92f1924978 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 11 Dec 2024 03:59:54 -0800 Subject: [PATCH 12/64] Remove last use of GetTensorShapeFromTensorProto --- onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc index e7f4c37eaef66..147c740313d7d 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc @@ -444,7 +444,7 @@ Status QnnModelWrapper::IsPerChannelQuantized(const onnxruntime::NodeUnitIODef& ORT_RETURN_IF(iter == graph_initializers.end(), "Unable to find initializer for scale(s): ", scale_name.c_str()); gsl::not_null scale_tensor_proto = iter->second; - TensorShape scale_shape = onnxruntime::utils::GetTensorShapeFromTensorProto(*scale_tensor_proto); + TensorShape scale_shape(qnn::utils::GetInitializerShape(*scale_tensor_proto)); // Check the number of scale values to determine if the tensor is per-channel. // This is consistent with CPU EP's Quant/Dequant logic. We can't use the presence of an axis because even a From 5939bf64f6d78633e214c6f20f0a36690684b50c Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 11 Dec 2024 04:18:06 -0800 Subject: [PATCH 13/64] Add DataTypeUtils::ToType(std::string&) to provider bridge --- .../core/providers/shared_library/provider_interfaces.h | 1 + .../core/providers/shared_library/provider_wrappedtypes.h | 1 + onnxruntime/core/session/provider_bridge_ort.cc | 3 +++ 3 files changed, 5 insertions(+) diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index dc3f5e60f2745..77ee3bc01d212 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -284,6 +284,7 @@ struct ProviderHost { // Utils::DataTypeUtils virtual const std::string* Utils__DataTypeUtils__ToType(const ONNX_NAMESPACE::TypeProto& type_proto) = 0; + virtual const std::string* Utils__DataTypeUtils__ToType(const std::string& type_str) = 0; // int64s virtual int int64s__size(const ONNX_NAMESPACE::int64s* p) = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index 0efa3833a978b..a9e4bafe9f3c6 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -475,6 +475,7 @@ namespace Utils { struct DataTypeUtils final { static const std::string* ToType(const ONNX_NAMESPACE::TypeProto& type_proto) { return g_host->Utils__DataTypeUtils__ToType(type_proto); } + static const std::string* ToType(const std::string& type_str) { return g_host->Utils__DataTypeUtils__ToType(type_str); } PROVIDER_DISALLOW_ALL(DataTypeUtils) }; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 29ffd9487925b..65917f035020b 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -382,6 +382,9 @@ struct ProviderHostImpl : ProviderHost { // Utils::DataTypeUtils (wrapped) const std::string* Utils__DataTypeUtils__ToType(const ONNX_NAMESPACE::TypeProto& type_proto) override { return ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(type_proto); } + const std::string* Utils__DataTypeUtils__ToType(const std::string& type_str) override { + return ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(type_str); + } // int64s (wrapped) int int64s__size(const ONNX_NAMESPACE::int64s* p) override { return p->size(); } From 58dbf494618f8339c06c07a1e7e967e9b4a436e1 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 11 Dec 2024 04:21:37 -0800 Subject: [PATCH 14/64] Add Logger::GetSeverity() to provider bridge --- .../core/providers/shared_library/provider_interfaces.h | 1 + .../core/providers/shared_library/provider_wrappedtypes.h | 3 +++ onnxruntime/core/session/provider_bridge_ort.cc | 3 +++ 3 files changed, 7 insertions(+) diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 77ee3bc01d212..81b8b00d7777a 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -270,6 +270,7 @@ struct ProviderHost { // logging::Logger virtual bool logging__Logger__OutputIsEnabled(const logging::Logger* p, logging::Severity severity, logging::DataType data_type) = 0; + virtual logging::Severity logging__Logger__GetSeverity(const logging::Logger* p) = 0; // logging::LoggingManager virtual const logging::Logger& logging__LoggingManager__DefaultLogger() = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index a9e4bafe9f3c6..365ee987f4930 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -23,6 +23,9 @@ namespace logging { struct Logger final { bool OutputIsEnabled(Severity severity, DataType data_type) const noexcept { return g_host->logging__Logger__OutputIsEnabled(this, severity, data_type); } + Severity GetSeverity() const noexcept { + return g_host->logging__Logger__GetSeverity(this); + } PROVIDER_DISALLOW_ALL(Logger) }; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 65917f035020b..457176136fe49 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -366,6 +366,9 @@ struct ProviderHostImpl : ProviderHost { // logging::Logger (wrapped) bool logging__Logger__OutputIsEnabled(const logging::Logger* p, logging::Severity severity, logging::DataType data_type) override { return p->OutputIsEnabled(severity, data_type); } + logging::Severity logging__Logger__GetSeverity(const logging::Logger* p) override { + return p->GetSeverity(); + } // logging::LoggingManager (wrapped) const logging::Logger& logging__LoggingManager__DefaultLogger() override { return logging::LoggingManager::DefaultLogger(); } From f76b09a2b665318c76fa24666c830b0837a522c3 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 11 Dec 2024 04:28:19 -0800 Subject: [PATCH 15/64] Add TensorShapeProto_Dimensions__size to provider bridge --- .../core/providers/shared_library/provider_interfaces.h | 1 + .../core/providers/shared_library/provider_wrappedtypes.h | 1 + onnxruntime/core/session/provider_bridge_ort.cc | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 81b8b00d7777a..e4c96b57a9f99 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -494,6 +494,7 @@ struct ProviderHost { // TensorShapeProto_Dimensions virtual std::unique_ptr TensorShapeProto_Dimensions__begin(const ONNX_NAMESPACE::TensorShapeProto_Dimensions* p) = 0; virtual std::unique_ptr TensorShapeProto_Dimensions__end(const ONNX_NAMESPACE::TensorShapeProto_Dimensions* p) = 0; + virtual size_t TensorShapeProto_Dimensions__size(const ONNX_NAMESPACE::TensorShapeProto_Dimensions* p) = 0; // TensorShapeProto virtual int TensorShapeProto__dim_size(const ONNX_NAMESPACE::TensorShapeProto* p) = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index 365ee987f4930..aeae15ee4b4d2 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -288,6 +288,7 @@ struct TensorShapeProto_Dimension final { struct TensorShapeProto_Dimensions final { IteratorHolder begin() const { return g_host->TensorShapeProto_Dimensions__begin(this); } IteratorHolder end() const { return g_host->TensorShapeProto_Dimensions__end(this); } + size_t size() const { return g_host->TensorShapeProto_Dimensions__size(this); } PROVIDER_DISALLOW_ALL(TensorShapeProto_Dimensions) }; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 457176136fe49..83935293a221d 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -615,6 +615,10 @@ struct ProviderHostImpl : ProviderHost { return std::make_unique(p->end()); } + size_t TensorShapeProto_Dimensions__size(const ONNX_NAMESPACE::TensorShapeProto_Dimensions* p) override { + return p->size(); + } + // TensorShapeProto (wrapped) int TensorShapeProto__dim_size(const ONNX_NAMESPACE::TensorShapeProto* p) override { return p->dim_size(); } const ONNX_NAMESPACE::TensorShapeProto_Dimensions& TensorShapeProto__dim(const ONNX_NAMESPACE::TensorShapeProto* p) override { return p->dim(); } From d189fe66ceedfc8feb86f7f66ff191ce44a91492 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 11 Dec 2024 17:35:26 -0800 Subject: [PATCH 16/64] Add utils::CreateSupportedPartitions() to provider bridege --- .../providers/shared_library/provider_api.h | 15 +++++++++++++ .../shared_library/provider_interfaces.h | 11 ++++++++++ .../core/session/provider_bridge_ort.cc | 21 +++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index b84825236a453..31e7e0d47dfe1 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -215,6 +215,7 @@ using DeleteFunc = void (*)(void*); using NodeArgInfo = ONNX_NAMESPACE::ValueInfoProto; using NameMLValMap = std::unordered_map; + } // namespace onnxruntime #include "core/platform/threadpool.h" @@ -367,6 +368,20 @@ template <> constexpr ONNXTensorElementDataType GetONNXTensorElementDataType() { return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT4; } + +inline std::vector> +CreateSupportedPartitions(const GraphViewer& graph_viewer, + const std::unordered_set& supported_nodes, + const std::unordered_set& stop_ops, + const std::function& generate_metadef_name, + const std::string& execution_provider_name, + const std::string& execution_provider_type, + const std::unordered_map* node_unit_map, + bool drop_constant_initializers = false) { + return g_host->Utils__CreateSupportedPartitions(graph_viewer, supported_nodes, stop_ops, generate_metadef_name, + execution_provider_name, execution_provider_type, node_unit_map, + drop_constant_initializers); +} } // namespace utils namespace QDQ { diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index e4c96b57a9f99..71b46d27906bc 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -896,6 +896,17 @@ struct ProviderHost { virtual std::pair>, std::unordered_map> QDQ__GetAllNodeUnits(const GraphViewer* graph_viewer) = 0; + // Partitioning utils + virtual std::vector> + Utils__CreateSupportedPartitions(const GraphViewer& graph_viewer, + const std::unordered_set& supported_nodes, + const std::unordered_set& stop_ops, + const std::function& generate_metadef_name, + const std::string& execution_provider_name, + const std::string& execution_provider_type, + const std::unordered_map* node_unit_map, + bool drop_constant_initializers) = 0; + // Model virtual std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path, const IOnnxRuntimeOpSchemaRegistryList* local_registries, diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 83935293a221d..94e9d09301dff 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -81,6 +81,7 @@ using IndexedSubGraph_SourceOfSchema = IndexedSubGraph::SourceOfSchema; #include "core/common/cpuid_info.h" #include "core/common/logging/logging.h" #include "core/providers/shared_library/provider_interfaces.h" +#include "core/providers/partitioning_utils.h" #include "core/providers/cuda/cuda_provider_factory_creator.h" #include "core/providers/cann/cann_provider_factory_creator.h" @@ -1072,6 +1073,26 @@ struct ProviderHostImpl : ProviderHost { return QDQ::GetAllNodeUnits(*graph_viewer); } + // Partitioning utils + std::vector> + Utils__CreateSupportedPartitions(const GraphViewer& graph_viewer, + const std::unordered_set& supported_nodes, + const std::unordered_set& stop_ops, + const utils::GenerateMetadefNameFn& generate_metadef_name, + const std::string& execution_provider_name, + const std::string& execution_provider_type, + const std::unordered_map* node_unit_map, + bool drop_constant_initializers) override { + return onnxruntime::utils::CreateSupportedPartitions(graph_viewer, + supported_nodes, + stop_ops, + generate_metadef_name, + execution_provider_name, + execution_provider_type, + node_unit_map, + drop_constant_initializers); + } + // Model (wrapped) std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path, const IOnnxRuntimeOpSchemaRegistryList* local_registries, From 48191eaac27fb9ad90a851b0da7e20592a30ddce Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Fri, 13 Dec 2024 10:01:59 -0800 Subject: [PATCH 17/64] Use new namespace for NodeAttrHelper --- onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc index f4feab303d374..1bb764913b6ea 100644 --- a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc @@ -163,7 +163,7 @@ Status TryGetMaxSpillFillSize(const std::vector(0)); if (max_size > max_spill_fill_size) { max_spill_fill_size = max_size; From e6afd7298376c5cd7545aacad09e9bdc60bcab55 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Fri, 13 Dec 2024 10:02:38 -0800 Subject: [PATCH 18/64] Add to provider bridge: GraphViewer::Nodes(), ConstGraphNodes struct + iterators --- .../providers/shared_library/provider_api.h | 1 + .../shared_library/provider_interfaces.h | 16 +++++++++++ .../shared_library/provider_wrappedtypes.h | 20 +++++++++++++ .../core/session/provider_bridge_ort.cc | 28 +++++++++++++++++++ 4 files changed, 65 insertions(+) diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index 88d4e0d4cd18b..561744d30844b 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -157,6 +157,7 @@ struct KernelRegistry; struct Function; struct Graph; class GraphViewer; +struct ConstGraphNodes; enum class DataLayout; struct Model; struct Path; diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index d045fd27c0d25..7b586f6a71642 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -123,6 +123,14 @@ struct Node__EdgeIterator { virtual int GetDstArgIndex() const = 0; }; +struct ConstGraphNodes_Iterator { + virtual ~ConstGraphNodes_Iterator() {} + + virtual bool operator!=(const ConstGraphNodes_Iterator& other) const = 0; + virtual void operator++() = 0; + virtual const Node& operator*() = 0; +}; + // There are two ways to route a function, one is a virtual method and the other is a function pointer (or pointer to // member function). // The function pointers are nicer in that they directly call the target function, but they cannot be used in cases @@ -982,6 +990,7 @@ struct ProviderHost { virtual const std::string& GraphViewer__Name(const GraphViewer* p) noexcept = 0; virtual const std::filesystem::path& GraphViewer__ModelPath(const GraphViewer* p) noexcept = 0; + virtual const ConstGraphNodes& GraphViewer__Nodes(const GraphViewer* p) noexcept = 0; virtual const Node* GraphViewer__GetNode(const GraphViewer* p, NodeIndex node_index) = 0; virtual const NodeArg* GraphViewer__GetNodeArg(const GraphViewer* p, const std::string& name) = 0; @@ -1015,6 +1024,13 @@ struct ProviderHost { virtual const Node* GraphViewer__GetProducerNode(const GraphViewer* p, const std::string& node_arg_name) const = 0; virtual IOnnxRuntimeOpSchemaCollectionPtr GraphViewer__GetSchemaRegistry(const GraphViewer* p) const = 0; + // ConstGraphNodes + virtual std::unique_ptr ConstGraphNodes__begin(const ConstGraphNodes* p) = 0; + virtual std::unique_ptr ConstGraphNodes__end(const ConstGraphNodes* p) = 0; + virtual std::unique_ptr ConstGraphNodes__cbegin(const ConstGraphNodes* p) = 0; + virtual std::unique_ptr ConstGraphNodes__cend(const ConstGraphNodes* p) = 0; + virtual bool ConstGraphNodes__empty(const ConstGraphNodes* p) noexcept = 0; + // OpKernel virtual const Node& OpKernel__Node(const OpKernel* p) = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index ae9eed6f66f0b..c8a8fefdac71e 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -1040,6 +1040,7 @@ class GraphViewer final { const std::string& Name() const noexcept { return g_host->GraphViewer__Name(this); } const std::filesystem::path& ModelPath() const noexcept { return g_host->GraphViewer__ModelPath(this); } + const ConstGraphNodes& Nodes() const noexcept { return g_host->GraphViewer__Nodes(this); } const Node* GetNode(NodeIndex node_index) const { return g_host->GraphViewer__GetNode(this, node_index); } const NodeArg* GetNodeArg(const std::string& name) const { return g_host->GraphViewer__GetNodeArg(this, name); } @@ -1084,6 +1085,25 @@ class GraphViewer final { void operator=(const GraphViewer&) = delete; }; +struct ConstGraphNodes final { + IteratorHolder begin() const { + return g_host->ConstGraphNodes__begin(this); + } + IteratorHolder end() const { + return g_host->ConstGraphNodes__end(this); + } + IteratorHolder cbegin() const { + return g_host->ConstGraphNodes__cbegin(this); + } + IteratorHolder cend() const { + return g_host->ConstGraphNodes__cend(this); + } + + bool empty() const noexcept { return g_host->ConstGraphNodes__empty(this); } + + PROVIDER_DISALLOW_ALL(ConstGraphNodes) +}; + struct OpKernelContext final { template const T& RequiredInput(int index) const; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 87be39b6b70f0..873a0d10cd094 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -189,6 +189,18 @@ struct Node__EdgeIterator_Impl : Node__EdgeIterator { Node::EdgeConstIterator v_; }; +struct ConstGraphNodes_Iterator_Impl : ConstGraphNodes_Iterator { + ConstGraphNodes_Iterator_Impl(ConstGraphNodes::ConstNodeIterator&& v) : v_{std::move(v)} {} + + bool operator!=(const ConstGraphNodes_Iterator& other) const override { + return v_ != static_cast(&other)->v_; + } + void operator++() override { v_.operator++(); } + const Node& operator*() override { return *v_; } + + ConstGraphNodes::ConstNodeIterator v_; +}; + #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS) common::Status LoadDynamicLibraryFromProvider(onnxruntime::PathString library_name) { const auto& platform_env = onnxruntime::Env::Default(); @@ -1203,6 +1215,7 @@ struct ProviderHostImpl : ProviderHost { const std::string& GraphViewer__Name(const GraphViewer* p) noexcept override { return p->Name(); } const std::filesystem::path& GraphViewer__ModelPath(const GraphViewer* p) noexcept override { return p->ModelPath(); } + const ConstGraphNodes& GraphViewer__Nodes(const GraphViewer* p) noexcept override { return p->Nodes(); } const Node* GraphViewer__GetNode(const GraphViewer* p, NodeIndex node_index) override { return p->GetNode(node_index); } const NodeArg* GraphViewer__GetNodeArg(const GraphViewer* p, const std::string& name) override { return p->GetNodeArg(name); } @@ -1248,6 +1261,21 @@ struct ProviderHostImpl : ProviderHost { const Node* GraphViewer__GetProducerNode(const GraphViewer* p, const std::string& node_arg_name) const override { return p->GetProducerNode(node_arg_name); } IOnnxRuntimeOpSchemaCollectionPtr GraphViewer__GetSchemaRegistry(const GraphViewer* p) const override { return p->GetSchemaRegistry(); } + // ConstGraphNodes + std::unique_ptr ConstGraphNodes__begin(const ConstGraphNodes* p) override { + return std::make_unique(p->begin()); + } + std::unique_ptr ConstGraphNodes__end(const ConstGraphNodes* p) override { + return std::make_unique(p->end()); + } + std::unique_ptr ConstGraphNodes__cbegin(const ConstGraphNodes* p) override { + return std::make_unique(p->cbegin()); + } + std::unique_ptr ConstGraphNodes__cend(const ConstGraphNodes* p) override { + return std::make_unique(p->cend()); + } + bool ConstGraphNodes__empty(const ConstGraphNodes* p) noexcept override { return p->empty(); } + // OpKernel (direct) const Node& OpKernel__Node(const OpKernel* p) override { return p->OpKernel::Node(); } From 0b1e538642fd5901120c8b58784d76efa9112732 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Fri, 13 Dec 2024 12:01:39 -0800 Subject: [PATCH 19/64] Replace usage of cbegin() and cend() in NodeAttrHelper with version that does not need to add new functionality to the provider bridge --- .../core/providers/qnn/builder/qnn_utils.cc | 41 ++++++++++--------- .../core/providers/qnn/builder/qnn_utils.h | 1 - 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc index f6c30e85b53f9..889b6e21647b5 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc @@ -668,10 +668,12 @@ const std::string& NodeAttrHelper::Get(const std::string& key, const std::string std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - const auto& attr = entry->second; + const auto& values = entry->second.ints(); + const int64_t* cbegin = values.data(); + const int64_t* cend = values.data() + values.size(); std::vector v; - v.reserve(static_cast(attr.ints_size())); - std::transform(attr.ints().cbegin(), attr.ints().cend(), std::back_inserter(v), + v.reserve(static_cast(values.size())); + std::transform(cbegin, cend, std::back_inserter(v), [](int64_t val) -> int32_t { return narrow(val); }); return v; } @@ -681,10 +683,12 @@ std::vector NodeAttrHelper::Get(const std::string& key, const std::vect std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - const auto& attr = entry->second; + const auto& values = entry->second.ints(); + const int64_t* cbegin = values.data(); + const int64_t* cend = values.data() + values.size(); std::vector v; - v.reserve(static_cast(attr.ints_size())); - std::transform(attr.ints().cbegin(), attr.ints().cend(), std::back_inserter(v), + v.reserve(static_cast(values.size())); + std::transform(cbegin, cend, std::back_inserter(v), [](int64_t val) -> uint32_t { return narrow(val); }); return v; } @@ -695,16 +699,9 @@ std::vector NodeAttrHelper::Get(const std::string& key, const std::vec std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { const auto& values = entry->second.ints(); - return std::vector{values.cbegin(), values.cend()}; - } - - return def_val; -} - -std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { - if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - const auto& values = entry->second.strings(); - return std::vector{values.cbegin(), values.cend()}; + const int64_t* cbegin = values.data(); + const int64_t* cend = values.data() + values.size(); + return std::vector{cbegin, cend}; } return def_val; @@ -713,7 +710,9 @@ std::vector NodeAttrHelper::Get(const std::string& key, const std:: std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { const auto& values = entry->second.floats(); - return std::vector{values.cbegin(), values.cend()}; + const float* cbegin = values.data(); + const float* cend = values.data() + values.size(); + return std::vector{cbegin, cend}; } return def_val; @@ -741,7 +740,9 @@ std::optional> NodeAttrHelper::GetFloats(const std::string& k std::optional> result; if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { const auto& values = entry->second.floats(); - result = std::vector(values.begin(), values.end()); + const float* cbegin = values.data(); + const float* cend = values.data() + values.size(); + result = std::vector(cbegin, cend); } return result; @@ -751,7 +752,9 @@ std::optional> NodeAttrHelper::GetInt64s(const std::string& std::optional> result; if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { const auto& values = entry->second.ints(); - result = std::vector(values.begin(), values.end()); + const int64_t* cbegin = values.data(); + const int64_t* cend = values.data() + values.size(); + result = std::vector(cbegin, cend); } return result; diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.h b/onnxruntime/core/providers/qnn/builder/qnn_utils.h index c76c99b8454ef..1e01a9d76a5b0 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.h @@ -181,7 +181,6 @@ class NodeAttrHelper { std::vector Get(const std::string& key, const std::vector& def_val) const; const std::string& Get(const std::string& key, const std::string& def_val) const; - std::vector Get(const std::string& key, const std::vector& def_val) const; // Convert the i() or ints() of the attribute from int64_t to int32_t int32_t Get(const std::string& key, int32_t def_val) const; From fb3618dc33e4cbcb4855f849955b1903f4bc28ba Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Fri, 13 Dec 2024 12:03:15 -0800 Subject: [PATCH 20/64] Add convenience function to get the default Env to provider bridge --- .../core/providers/qnn/builder/qnn_backend_manager.cc | 5 ++++- onnxruntime/core/providers/qnn/qnn_execution_provider.cc | 1 + onnxruntime/core/providers/shared_library/provider_api.h | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index 3af646c3ce13a..40730a535bd43 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -1099,6 +1099,7 @@ Status QnnBackendManager::ExtractBackendProfilingInfo() { bool tracelogging_provider_ep_enabled = false; const Env& env = Env::Default(); + // const Env& env = GetDefaultEnv(); auto& provider = env.GetTelemetryProvider(); auto level = provider.Level(); if (provider.IsEnabled()) { @@ -1492,7 +1493,9 @@ void* QnnBackendManager::LoadLib(const char* file_name, int flags, std::string& auto file_path = std::filesystem::path(file_name); if (!file_path.is_absolute()) { // construct an absolute path from ORT runtime path + file_name and check whether it exists. - auto pathstring = Env::Default().GetRuntimePath() + ToPathString(file_name); + const Env& env = Env::Default(); + // const Env& env = GetDefaultEnv(); + auto pathstring = env.GetRuntimePath() + ToPathString(file_name); auto absolute_path = pathstring.c_str(); if (std::filesystem::exists(std::filesystem::path(absolute_path))) { // load library from absolute path and search for dependencies there. diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index ddc9d15e4ea71..ed246b66c2784 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -247,6 +247,7 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio // set to invalid to indicate that ETW is no enabled when we setup QNN qnn::ProfilingLevel profiling_level_etw = qnn::ProfilingLevel::INVALID; const Env& env = Env::Default(); + // const Env& env = GetDefaultEnv(); auto& provider = env.GetTelemetryProvider(); if (provider.IsEnabled()) { auto level = provider.Level(); diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index 561744d30844b..50d94249d4916 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -397,6 +397,10 @@ GetAllNodeUnits(const GraphViewer* graph_viewer, const logging::Logger& logger) // So the C API (and C++) becomes available when ORT_API_MANUAL_INIT is used. void InitProviderOrtApi(); +// This is a replacement for Env::Default(). Returns a reference to the default ORT Environment. +inline Env& GetDefaultEnv() { + return g_host->Env__Default(); +} } // namespace onnxruntime #define CREATE_MESSAGE(logger, severity, category, datatype) \ From 6b581fd698d3888835854693e7ad2a4ff963f996 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Fri, 13 Dec 2024 17:16:52 -0800 Subject: [PATCH 21/64] Moving ORT includes to a separate header --- .../builder/opbuilder/reduce_op_builder.cc | 6 +-- .../qnn/builder/qnn_backend_manager.cc | 4 +- .../qnn/builder/qnn_backend_manager.h | 5 +- .../core/providers/qnn/builder/qnn_utils.cc | 18 ++++--- .../core/providers/qnn/builder/qnn_utils.h | 5 +- onnxruntime/core/providers/qnn/ort_api.h | 35 +++++++++++++ .../providers/qnn/qnn_execution_provider.cc | 40 +++++++------- .../providers/qnn/qnn_execution_provider.h | 21 +++++--- .../providers/qnn/qnn_provider_factory.cc | 52 ++++++++++++++++--- .../qnn/qnn_provider_factory_creator.h | 1 + onnxruntime/core/providers/qnn/symbols.def | 2 + .../core/providers/qnn/version_script.lds | 9 ++++ .../core/session/provider_bridge_ort.cc | 7 +++ 13 files changed, 149 insertions(+), 56 deletions(-) create mode 100644 onnxruntime/core/providers/qnn/ort_api.h create mode 100644 onnxruntime/core/providers/qnn/symbols.def create mode 100644 onnxruntime/core/providers/qnn/version_script.lds diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc index ce6654b3906d7..a2eeeee4453e4 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc @@ -9,7 +9,6 @@ #include "core/common/safeint.h" #include "onnx/defs/data_type_utils.h" #include "core/providers/common.h" -#include "core/framework/endian_utils.h" #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" @@ -145,10 +144,7 @@ Status ReduceOpBuilder::GetAxesSet(QnnModelWrapper& qnn_model_wrapper, const Nod auto src_span = gsl::make_span(axes_bytes.data(), axes_bytes.size()); auto dst_span = gsl::make_span(reduce_axes.data(), reduce_axes.size()); - // Copy initializer bytes (stored in little-endian order) to vector of int64_t. - // ReadLittleEndian returns a status error if the source and destination spans do not have - // matching byte sizes. - ORT_RETURN_IF_ERROR(onnxruntime::utils::ReadLittleEndian(src_span, dst_span)); + std::memcpy(dst_span.data(), src_span.data(), src_span.size_bytes()); } } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index 40730a535bd43..7f575257a77f7 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -17,8 +17,8 @@ #include "HTP/QnnHtpContext.h" #include "Saver/QnnSaver.h" #include -#include "core/framework/endian_utils.h" -#include "core/common/logging/capture.h" + +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/onnx_ctx_model_helper.h" #include "core/providers/qnn/builder/qnn_configs_helper.h" diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h index b145f2a2cd724..661a830bfb733 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h @@ -21,9 +21,8 @@ #include "QnnLog.h" #include "QnnTypes.h" #include "System/QnnSystemInterface.h" -#include "core/common/status.h" -#include "core/common/logging/logging.h" -#include "core/common/path_string.h" + +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_def.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc index 889b6e21647b5..8e7017c063bc0 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc @@ -1,18 +1,16 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include "qnn_utils.h" + #include #include #include #include #include -#include "core/common/common.h" -#include "core/framework/data_types.h" -#include "core/framework/tensorprotoutils.h" -#include "qnn_utils.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_def.h" -#include "core/graph/graph_viewer.h" namespace onnxruntime { namespace qnn { @@ -560,6 +558,14 @@ Status GetQminQmax(const Qnn_DataType_t qnn_data_type, return Status::OK(); } +inline float RoundHalfToEven(float input) { + if (!std::isfinite(input)) { + return input; + } + // std::remainder returns x - n, where n is the integral value nearest to x. When |x - n| = 0.5, n is chosen to be even + return input - std::remainderf(input, 1.f); +} + Status GetQuantParams(float rmin, float rmax, const Qnn_DataType_t qnn_data_type, @@ -584,7 +590,7 @@ Status GetQuantParams(float rmin, } else { initial_zero_point = qmin - (rmin / scale); } - zero_point = static_cast(RoundHalfToEven(Saturate(qmax, qmin, initial_zero_point))); + zero_point = static_cast(qnn::utils::RoundHalfToEven(Saturate(qmax, qmin, initial_zero_point))); // To match QNN quantization definition zero_point = 0 - zero_point; return Status::OK(); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.h b/onnxruntime/core/providers/qnn/builder/qnn_utils.h index 1e01a9d76a5b0..578f50ba895cf 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.h @@ -9,10 +9,7 @@ #include #include "QnnTypes.h" -#include "core/session/onnxruntime_cxx_api.h" -#include "core/framework/node_unit.h" -#include "core/framework/tensor_shape.h" -#include "core/util/qmath.h" +#include "core/providers/qnn/ort_api.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/ort_api.h b/onnxruntime/core/providers/qnn/ort_api.h new file mode 100644 index 0000000000000..0c26d9c99c200 --- /dev/null +++ b/onnxruntime/core/providers/qnn/ort_api.h @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License + +#pragma once + +#define BUILD_QNN_EP_STATIC 1 + +#if BUILD_QNN_EP_STATIC +#include "core/common/common.h" +#include "core/common/status.h" +#include "core/common/logging/logging.h" +#include "core/common/logging/capture.h" +#include "core/common/path_string.h" +#include "core/platform/env.h" +#include "core/framework/data_types.h" +#include "core/framework/run_options.h" +#include "core/framework/execution_provider.h" +#include "core/framework/model_metadef_id_generator.h" +#include "core/framework/compute_capability.h" +#include "core/framework/tensor_shape.h" +#include "core/framework/node_unit.h" +#include "core/framework/tensorprotoutils.h" +#include "core/graph/model.h" +#include "core/graph/graph_viewer.h" +#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h" +#include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h" +#include "core/providers/common.h" +#include "core/providers/partitioning_utils.h" +#else +#include "core/providers/shared_library/provider_api.h" +#endif + +#include "core/session/onnxruntime_session_options_config_keys.h" +#include "core/session/onnxruntime_run_options_config_keys.h" +#include "core/session/onnxruntime_cxx_api.h" diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index ed246b66c2784..413db0489e37a 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -5,29 +5,21 @@ #include #include -#include "core/framework/compute_capability.h" -#include "core/graph/graph_viewer.h" -#include "core/session/onnxruntime_session_options_config_keys.h" -#include "core/session/onnxruntime_run_options_config_keys.h" -#include "core/session/onnxruntime_cxx_api.h" -#include "core/framework/kernel_registry.h" -#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h" -#include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h" -#include "core/platform/env.h" -#include "core/providers/common.h" -#include "core/providers/partitioning_utils.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_node_group.h" #include "core/providers/qnn/builder/qnn_def.h" #include "core/providers/qnn/builder/onnx_ctx_model_helper.h" -#include "core/framework/run_options.h" #ifdef _WIN32 #include +// TODO: Enable once QNN is built as a DLL +#if 0 #include "core/platform/windows/logging/etw_sink.h" #endif +#endif // _WIN32 namespace onnxruntime { @@ -35,6 +27,7 @@ constexpr const char* QNN = "QNN"; static std::unique_ptr>> s_run_on_unload_; +// TODO: Remove and use versions in EP provider bridge. void RunOnUnload(std::function function) { static std::mutex mutex; std::lock_guard guard(mutex); @@ -44,6 +37,7 @@ void RunOnUnload(std::function function) { s_run_on_unload_->push_back(std::move(function)); } +// TODO: Remove and use versions in EP provider bridge. struct OnUnload { ~OnUnload() { if (!s_run_on_unload_) @@ -193,17 +187,17 @@ qnn::ProfilingLevel QNNExecutionProvider::GetProfilingLevelFromETWLevel(unsigned } QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_options_map, - const SessionOptions* session_options) + const ConfigOptions* config_options) : IExecutionProvider{onnxruntime::kQnnExecutionProvider} { - if (session_options) { - disable_cpu_ep_fallback_ = session_options->config_options.GetConfigOrDefault( + if (config_options) { + disable_cpu_ep_fallback_ = config_options->GetConfigOrDefault( kOrtSessionOptionsDisableCPUEPFallback, "0") == "1"; - context_cache_enabled_ = session_options->config_options.GetConfigOrDefault( + context_cache_enabled_ = config_options->GetConfigOrDefault( kOrtSessionOptionEpContextEnable, "0") == "1"; LOGS_DEFAULT(VERBOSE) << "Context cache enable: " << context_cache_enabled_; - std::string embed_mode = session_options->config_options.GetConfigOrDefault( + std::string embed_mode = config_options->GetConfigOrDefault( kOrtSessionOptionEpContextEmbedMode, "0"); if ("1" == embed_mode) { qnn_context_embed_mode_ = true; @@ -214,18 +208,18 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio } LOGS_DEFAULT(VERBOSE) << "User specified context cache embed mode: " << qnn_context_embed_mode_; - context_cache_path_cfg_ = session_options->config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, ""); + context_cache_path_cfg_ = config_options->GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, ""); LOGS_DEFAULT(VERBOSE) << "User specified context cache path: " << context_cache_path_cfg_; // For the case that workaround QNN context PD memory limit, user need split the model into pieces and // generate the QNN context model separately. // It could happen that the generated EPContext node in separate graph has same node name. // User can set this context_node_name_prefix for each split pieces to avoid that happens. - context_node_name_prefix_ = session_options->config_options.GetConfigOrDefault(kOrtSessionOptionEpContextNodeNamePrefix, ""); + context_node_name_prefix_ = config_options->GetConfigOrDefault(kOrtSessionOptionEpContextNodeNamePrefix, ""); LOGS_DEFAULT(VERBOSE) << "User specified QNN context node name prefix: " << context_node_name_prefix_; share_ep_contexts_ = - session_options->config_options.GetConfigOrDefault(kOrtSessionOptionShareEpContexts, "0") == "1"; + config_options->GetConfigOrDefault(kOrtSessionOptionShareEpContexts, "0") == "1"; LOGS_DEFAULT(VERBOSE) << "User specified option - share EP contexts across sessions: " << share_ep_contexts_; } @@ -403,6 +397,8 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio soc_model, enable_htp_weight_sharing); +// TODO: Renable once QNN is a dll +#if 0 #ifdef _WIN32 auto& etwRegistrationManager = logging::EtwRegistrationManager::Instance(); // Register callback for ETW capture state (rundown) @@ -445,6 +441,7 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio }); etwRegistrationManager.RegisterInternalCallback(callback_ETWSink_provider_); #endif +#endif } QNNExecutionProvider::~QNNExecutionProvider() { @@ -458,10 +455,13 @@ QNNExecutionProvider::~QNNExecutionProvider() { // Unregister the ETW callback #ifdef _WIN32 + // TODO: Re-enable when QNN EP is a DLL +#if 0 if (callback_ETWSink_provider_ != nullptr) { logging::EtwRegistrationManager::Instance().UnregisterInternalCallback(callback_ETWSink_provider_); } #endif +#endif } // Logs information about the supported/unsupported nodes. diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.h b/onnxruntime/core/providers/qnn/qnn_execution_provider.h index a0577e8fd87f2..b390988f39da4 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.h +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.h @@ -3,24 +3,26 @@ #pragma once -#include "core/framework/execution_provider.h" -#include "core/framework/session_options.h" -#include "core/framework/model_metadef_id_generator.h" -#include "core/graph/model.h" +#include #include +#include +#include + +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_backend_manager.h" #include "core/providers/qnn/builder/qnn_model.h" #include "core/providers/qnn/builder/qnn_configs_helper.h" #include "HTP/QnnHtpGraph.h" -#include -#include -#include #ifdef _WIN32 +// TODO: Reenable when QNN ep is a dll +#if 0 #include "core/platform/windows/logging/etw_sink.h" #endif +#endif namespace onnxruntime { +// TODO: Remove. It's in provider bridge. void RunOnUnload(std::function function); class SharedContext { @@ -87,7 +89,7 @@ class SharedContext { // Logical device representation. class QNNExecutionProvider : public IExecutionProvider { public: - explicit QNNExecutionProvider(const ProviderOptions& provider_options_map, const SessionOptions* session_options); + explicit QNNExecutionProvider(const ProviderOptions& provider_options_map, const ConfigOptions* config_options); virtual ~QNNExecutionProvider(); ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(QNNExecutionProvider); @@ -151,7 +153,10 @@ class QNNExecutionProvider : public IExecutionProvider { bool share_ep_contexts_ = false; bool enable_spill_fill_buffer_ = false; #ifdef _WIN32 + // TODO: Re-enable when QNN is a DLL +#if 0 onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback callback_ETWSink_provider_ = nullptr; +#endif #endif qnn::ModelSettings model_settings_ = {}; diff --git a/onnxruntime/core/providers/qnn/qnn_provider_factory.cc b/onnxruntime/core/providers/qnn/qnn_provider_factory.cc index 4095d7ff02a33..fdeb9dc106386 100644 --- a/onnxruntime/core/providers/qnn/qnn_provider_factory.cc +++ b/onnxruntime/core/providers/qnn/qnn_provider_factory.cc @@ -2,32 +2,68 @@ // Licensed under the MIT License #include "core/providers/qnn/qnn_provider_factory_creator.h" - -#include "core/session/abi_session_options_impl.h" #include "core/providers/qnn/qnn_execution_provider.h" -#include "core/session/ort_apis.h" namespace onnxruntime { struct QNNProviderFactory : IExecutionProviderFactory { - QNNProviderFactory(const ProviderOptions& provider_options_map, const SessionOptions* session_options) - : provider_options_map_(provider_options_map), session_options_(session_options) { + QNNProviderFactory(const ProviderOptions& provider_options_map, const ConfigOptions* config_options) + : provider_options_map_(provider_options_map), config_options_(config_options) { } ~QNNProviderFactory() override { } std::unique_ptr CreateProvider() override { - return std::make_unique(provider_options_map_, session_options_); + return std::make_unique(provider_options_map_, config_options_); } private: ProviderOptions provider_options_map_; - const SessionOptions* session_options_; + const ConfigOptions* config_options_; }; +// TODO: Move to core/session/provider_bridge_ort.cc std::shared_ptr QNNProviderFactoryCreator::Create(const ProviderOptions& provider_options_map, const SessionOptions* session_options) { - return std::make_shared(provider_options_map, session_options); + const ConfigOptions* config_options = nullptr; + if (session_options != nullptr) { + config_options = &session_options->config_options; + } + + return std::make_shared(provider_options_map, config_options); } +struct QNN_Provider /*: Provider*/ { + std::shared_ptr CreateExecutionProviderFactory(const void* param) /*override*/ { + if (param == nullptr) { + LOGS_DEFAULT(ERROR) << "[QNN EP] Passed NULL options to CreateExecutionProviderFactory()"; + return nullptr; + } + + std::array pointers_array = *reinterpret_cast*>(param); + const ProviderOptions* provider_options = reinterpret_cast(pointers_array[0]); + const ConfigOptions* config_options = reinterpret_cast(pointers_array[1]); + + if (provider_options == nullptr) { + LOGS_DEFAULT(ERROR) << "[QNN EP] Passed NULL ProviderOptions to CreateExecutionProviderFactory()"; + return nullptr; + } + + return std::make_shared(*provider_options, config_options); + } + + void Initialize() /*override*/ {} + void Shutdown() /*override*/ {} +} g_provider; + } // namespace onnxruntime + +// TODO: Uncomment when it is an EP dll +#if 0 +extern "C" { + +ORT_API(onnxruntime::Provider*, GetProvider) { + return &onnxruntime::g_provider; +} +} +#endif diff --git a/onnxruntime/core/providers/qnn/qnn_provider_factory_creator.h b/onnxruntime/core/providers/qnn/qnn_provider_factory_creator.h index 80f9d99b804e7..859152752893e 100644 --- a/onnxruntime/core/providers/qnn/qnn_provider_factory_creator.h +++ b/onnxruntime/core/providers/qnn/qnn_provider_factory_creator.h @@ -11,6 +11,7 @@ namespace onnxruntime { struct SessionOptions; +// defined in core/session/provider_bridge_ort.cc struct QNNProviderFactoryCreator { static std::shared_ptr Create(const ProviderOptions& provider_options_map, const SessionOptions* session_options); diff --git a/onnxruntime/core/providers/qnn/symbols.def b/onnxruntime/core/providers/qnn/symbols.def new file mode 100644 index 0000000000000..4ec2f7914c208 --- /dev/null +++ b/onnxruntime/core/providers/qnn/symbols.def @@ -0,0 +1,2 @@ +EXPORTS + GetProvider diff --git a/onnxruntime/core/providers/qnn/version_script.lds b/onnxruntime/core/providers/qnn/version_script.lds new file mode 100644 index 0000000000000..094abb3329781 --- /dev/null +++ b/onnxruntime/core/providers/qnn/version_script.lds @@ -0,0 +1,9 @@ +#_init and _fini should be local +VERS_1.0 { + global: + GetProvider; + + # Hide everything else. + local: + *; +}; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 873a0d10cd094..af23274825aa6 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1702,6 +1702,12 @@ static ProviderLibrary s_library_tensorrt(LIBRARY_PREFIX ORT_TSTR("onnxruntime_p #endif ); static ProviderLibrary s_library_migraphx(LIBRARY_PREFIX ORT_TSTR("onnxruntime_providers_migraphx") LIBRARY_EXTENSION); +static ProviderLibrary s_library_qnn(LIBRARY_PREFIX ORT_TSTR("onnxruntime_providers_qnn") LIBRARY_EXTENSION +#ifndef _WIN32 + , + false /* unload - On Linux if we unload the vitisai shared provider we crash */ +#endif +); void UnloadSharedProviders() { s_library_dnnl.Unload(); @@ -1714,6 +1720,7 @@ void UnloadSharedProviders() { s_library_rocm.Unload(); s_library_shared.Unload(); s_library_migraphx.Unload(); + s_library_qnn.Unload(); } // Used by test code From a1129e5dae1cce802da0ad6d48e64d235dc5fccf Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 15 Dec 2024 00:40:49 -0800 Subject: [PATCH 22/64] Add Node::EdgeEnd wrapper class to provider bridge. Add NodeUnit constructor --- .../providers/shared_library/provider_api.h | 1 + .../shared_library/provider_interfaces.h | 15 ++++++++++ .../shared_library/provider_wrappedtypes.h | 29 ++++++++++++++++++ .../core/session/provider_bridge_ort.cc | 30 +++++++++++++++++++ 4 files changed, 75 insertions(+) diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index 50d94249d4916..ceb654931ae61 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -162,6 +162,7 @@ enum class DataLayout; struct Model; struct Path; struct Node; +struct Node_EdgeEnd; struct NodeArg; struct NodeAttributes; struct NodeUnitIODef; diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 7b586f6a71642..474eddf5af310 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -118,6 +118,7 @@ struct Node__EdgeIterator { virtual bool operator!=(const Node__EdgeIterator& p) const = 0; virtual void operator++() = 0; + virtual const Node_EdgeEnd& operator*() const = 0; virtual const Node& GetNode() const = 0; virtual int GetSrcArgIndex() const = 0; virtual int GetDstArgIndex() const = 0; @@ -851,6 +852,14 @@ struct ProviderHost { virtual const std::unordered_map>& Node__GetAttributeNameToMutableSubgraphMap(Node* p) = 0; virtual std::unordered_map> Node__GetAttributeNameToSubgraphMap(const Node* p) const = 0; + // Node_EdgeEnd + virtual std::unique_ptr Node_EdgeEnd__construct(const Node& node, int src_arg_index, int dst_arg_index) = 0; + virtual void Node_EdgeEnd__operator_delete(Node_EdgeEnd* p) noexcept = 0; + + virtual const Node& Node_EdgeEnd__GetNode(const Node_EdgeEnd* p) = 0; + virtual int Node_EdgeEnd__GetSrcArgIndex(const Node_EdgeEnd* p) = 0; + virtual int Node_EdgeEnd__GetDstArgIndex(const Node_EdgeEnd* p) = 0; + // NodeArg virtual const std::string& NodeArg__Name(const NodeArg* p) noexcept = 0; virtual const ONNX_NAMESPACE::TensorShapeProto* NodeArg__Shape(const NodeArg* p) = 0; @@ -881,6 +890,12 @@ struct ProviderHost { virtual void NodeAttributes__reserve(NodeAttributes* p, size_t size) = 0; // NodeUnit + virtual std::unique_ptr NodeUnit__construct(gsl::span dq_nodes, const Node& target_node, + gsl::span q_nodes, uint8_t unit_type, + gsl::span inputs, gsl::span outputs, + size_t input_edge_count, gsl::span output_edges) = 0; + virtual void NodeUnit__operator_delete(NodeUnit* p) noexcept = 0; + virtual int NodeUnit__UnitType(const NodeUnit* p) noexcept = 0; virtual const std::vector& NodeUnit__Inputs(const NodeUnit* p) noexcept = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index c8a8fefdac71e..e4ecab5740af9 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -776,6 +776,21 @@ struct Function final { PROVIDER_DISALLOW_ALL(Function) }; +struct Node_EdgeEnd final { + static std::unique_ptr Create(const Node& node, int src_arg_index, int dst_arg_index) { + return g_host->Node_EdgeEnd__construct(node, src_arg_index, dst_arg_index); + } + static void operator delete(void* p) { g_host->Node_EdgeEnd__operator_delete(reinterpret_cast(p)); } + + const Node& GetNode() const { return g_host->Node_EdgeEnd__GetNode(this); } + int GetSrcArgIndex() const { return g_host->Node_EdgeEnd__GetSrcArgIndex(this); } + int GetDstArgIndex() const { return g_host->Node_EdgeEnd__GetDstArgIndex(this); } + + Node_EdgeEnd() = delete; + Node_EdgeEnd(const Node_EdgeEnd&) = delete; + void operator=(const Node_EdgeEnd&) = delete; +}; + struct Node final { enum class Type { Primitive = 0, @@ -838,6 +853,7 @@ struct Node final { } void operator++() { impl_->operator++(); } + const Node_EdgeEnd& operator*() { return impl_->operator*(); } const Node__EdgeIterator* operator->() const { return impl_.get(); } std::unique_ptr impl_; @@ -912,6 +928,15 @@ struct NodeUnit final { QDQGroup, // The NodeUnit contain a QDQ group of nodes, such as "DQ->Sigmoid->Q" }; + static std::unique_ptr Create(gsl::span dq_nodes, const Node& target_node, + gsl::span q_nodes, Type unit_type, + gsl::span inputs, gsl::span outputs, + size_t input_edge_count, gsl::span output_edges) { + return g_host->NodeUnit__construct(dq_nodes, target_node, q_nodes, static_cast(unit_type), + inputs, outputs, input_edge_count, output_edges); + } + static void operator delete(void* p) { g_host->NodeUnit__operator_delete(reinterpret_cast(p)); } + Type UnitType() const noexcept { return static_cast(g_host->NodeUnit__UnitType(this)); } const std::vector& Inputs() const noexcept { return g_host->NodeUnit__Inputs(this); } @@ -938,6 +963,10 @@ struct NodeUnit final { // output. any Q nodes are hidden. Node::EdgeConstIterator OutputEdgesBegin() const { return g_host->NodeUnit__OutputEdgesBegin(this); } Node::EdgeConstIterator OutputEdgesEnd() const { return g_host->NodeUnit__OutputEdgesEnd(this); } + + NodeUnit() = delete; + NodeUnit(const NodeUnit&) = delete; + void operator=(const NodeUnit& v) = delete; }; struct Model final { diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index af23274825aa6..b6a9aa287e924 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -76,6 +76,7 @@ using FunctionProtos = google::protobuf::RepeatedPtrField; namespace onnxruntime { using IndexedSubGraph_MetaDef = IndexedSubGraph::MetaDef; using IndexedSubGraph_SourceOfSchema = IndexedSubGraph::SourceOfSchema; +using Node_EdgeEnd = Node::EdgeEnd; } // namespace onnxruntime #include "core/common/cpuid_info.h" @@ -182,6 +183,7 @@ struct Node__EdgeIterator_Impl : Node__EdgeIterator { bool operator!=(const Node__EdgeIterator& p) const override { return v_ != static_cast(&p)->v_; } void operator++() override { v_.operator++(); } + const Node_EdgeEnd& operator*() const override { return v_.operator*(); } const Node& GetNode() const override { return v_->GetNode(); } int GetSrcArgIndex() const override { return v_->GetSrcArgIndex(); } int GetDstArgIndex() const override { return v_->GetDstArgIndex(); } @@ -1005,6 +1007,16 @@ struct ProviderHostImpl : ProviderHost { std::unordered_map> Node__GetAttributeNameToSubgraphMap(const Node* p) const override { return p->GetAttributeNameToSubgraphMap(); } int Node__NodeType(const Node* p) const noexcept override { return int(p->NodeType()); } + // Node_EdgeEnd (wrapped). Maps to Node::EdgeEnd struct. + std::unique_ptr Node_EdgeEnd__construct(const Node& node, int src_arg_index, int dst_arg_index) override { + return std::make_unique(node, src_arg_index, dst_arg_index); + } + void Node_EdgeEnd__operator_delete(Node_EdgeEnd* p) noexcept override { delete p; } + + const Node& Node_EdgeEnd__GetNode(const Node_EdgeEnd* p) override { return p->GetNode(); } + int Node_EdgeEnd__GetSrcArgIndex(const Node_EdgeEnd* p) override { return p->GetSrcArgIndex(); } + int Node_EdgeEnd__GetDstArgIndex(const Node_EdgeEnd* p) override { return p->GetDstArgIndex(); } + // NodeArg (wrapped) const std::string& NodeArg__Name(const NodeArg* p) noexcept override { return p->Name(); } const ONNX_NAMESPACE::TensorShapeProto* NodeArg__Shape(const NodeArg* p) override { return p->Shape(); } @@ -1040,6 +1052,24 @@ struct ProviderHostImpl : ProviderHost { void NodeAttributes__reserve(NodeAttributes* p, size_t size) override { p->reserve(size); } // NodeUnit (wrapped) + std::unique_ptr NodeUnit__construct(gsl::span dq_nodes, + const Node& target_node, + gsl::span q_nodes, + uint8_t unit_type, + gsl::span inputs, + gsl::span outputs, + size_t input_edge_count, + gsl::span output_edges) override { + Node::EdgeSet output_edge_set; + for (const Node_EdgeEnd* edge_end : output_edges) { + output_edge_set.insert(*edge_end); + } + + return std::make_unique(dq_nodes, target_node, q_nodes, static_cast(unit_type), + inputs, outputs, input_edge_count, output_edge_set); + } + void NodeUnit__operator_delete(NodeUnit* p) noexcept override { delete p; } + int NodeUnit__UnitType(const NodeUnit* p) noexcept override { return static_cast(p->UnitType()); } const std::vector& NodeUnit__Inputs(const NodeUnit* p) noexcept override { From ba86c418df30114d3cc4cba803b33b249129f570 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 15 Dec 2024 01:18:18 -0800 Subject: [PATCH 23/64] Move more header includes to ort_api.h --- .../core/providers/qnn/builder/onnx_ctx_model_helper.cc | 7 ++++--- .../core/providers/qnn/builder/onnx_ctx_model_helper.h | 5 +---- onnxruntime/core/providers/qnn/builder/op_builder.h | 3 +-- .../core/providers/qnn/builder/op_builder_factory.cc | 2 -- .../core/providers/qnn/builder/qnn_configs_helper.h | 8 ++++---- onnxruntime/core/providers/qnn/builder/qnn_def.h | 3 +-- onnxruntime/core/providers/qnn/builder/qnn_model.cc | 4 +--- onnxruntime/core/providers/qnn/builder/qnn_model.h | 7 ++----- .../core/providers/qnn/builder/qnn_model_wrapper.cc | 6 +++--- .../core/providers/qnn/builder/qnn_model_wrapper.h | 6 ++---- onnxruntime/core/providers/qnn/builder/qnn_node_group.h | 3 +-- .../core/providers/qnn/builder/qnn_node_group/utils.cc | 3 +-- .../core/providers/qnn/builder/qnn_node_group/utils.h | 3 +-- .../core/providers/qnn/builder/qnn_quant_params_wrapper.h | 6 +++--- onnxruntime/core/providers/qnn/ort_api.h | 4 ++++ onnxruntime/core/providers/qnn/qnn_execution_provider.cc | 3 +++ 16 files changed, 32 insertions(+), 41 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc index 1bb764913b6ea..9fe1ff3da381a 100644 --- a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc @@ -2,14 +2,15 @@ // Licensed under the MIT License. #include "core/providers/qnn/builder/onnx_ctx_model_helper.h" -#include "core/graph/constants.h" -#include "core/providers/qnn/builder/qnn_utils.h" -#include "core/providers/qnn/builder/qnn_model.h" #include #include #include +#include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/builder/qnn_utils.h" +#include "core/providers/qnn/builder/qnn_model.h" + namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h index f1ca1374be0b7..48d68aec55c38 100644 --- a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h @@ -7,10 +7,7 @@ #include #include "qnn_def.h" -#include "core/common/logging/logging.h" -#include "core/graph/graph_viewer.h" -#include "core/graph/model.h" -#include "core/framework/execution_provider.h" +#include "core/providers/qnn/ort_api.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/op_builder.h b/onnxruntime/core/providers/qnn/builder/op_builder.h index b729503320f05..0846275496ebf 100644 --- a/onnxruntime/core/providers/qnn/builder/op_builder.h +++ b/onnxruntime/core/providers/qnn/builder/op_builder.h @@ -3,8 +3,7 @@ #pragma once -#include "core/graph/graph_viewer.h" -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc index 6ef17b40d274b..3e337f679056f 100644 --- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc +++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc @@ -5,8 +5,6 @@ #include #include -#include - #include "op_builder_factory.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_configs_helper.h b/onnxruntime/core/providers/qnn/builder/qnn_configs_helper.h index 9dd9bbaa08d64..1f0680782d76b 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_configs_helper.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_configs_helper.h @@ -3,7 +3,7 @@ #pragma once -#include +#include namespace onnxruntime { namespace qnn { @@ -81,9 +81,9 @@ class QnnConfigsBuilder { BaseConfigType base_config_init_; CustomConfigType custom_config_init_; - InlinedVector custom_configs_; - InlinedVector configs_; - InlinedVector config_ptrs_; + std::vector custom_configs_; + std::vector configs_; + std::vector config_ptrs_; }; } // namespace qnn diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h index ffd2dc9b11010..705212ae52c77 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h @@ -9,8 +9,7 @@ #include #include #include -#include "core/graph/basic_types.h" -#include "core/common/common.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_quant_params_wrapper.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model.cc b/onnxruntime/core/providers/qnn/builder/qnn_model.cc index 79d13ba77ec86..8bafd17b2648e 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model.cc @@ -6,11 +6,9 @@ #include #include "QnnOpDef.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_node_group.h" -#include "core/framework/utils.h" -#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h" -#include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h" #include "core/providers/qnn/builder/qnn_utils.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model.h b/onnxruntime/core/providers/qnn/builder/qnn_model.h index 2e0935391ca78..489acaacde4fe 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_model.h @@ -3,16 +3,13 @@ #pragma once +#include #include -#include "core/common/status.h" -#include "core/framework/node_unit.h" -#include "core/graph/graph_viewer.h" -#include +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_def.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/qnn_backend_manager.h" -#include "core/session/onnxruntime_cxx_api.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc index 147c740313d7d..79e9e1408a9ca 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include "qnn_model_wrapper.h" + #include #include #include @@ -8,9 +10,7 @@ #include #include -#include "qnn_model_wrapper.h" -#include "core/common/safeint.h" -#include "core/framework/tensorprotoutils.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_utils.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h index 9e308aa33a560..8cd7360606d71 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h @@ -7,12 +7,10 @@ #include #include -#include "core/common/status.h" #include "QnnInterface.h" #include "qnn_def.h" -#include "core/common/logging/logging.h" -#include "core/framework/node_unit.h" -#include "core/graph/graph_viewer.h" + +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_quant_params_wrapper.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group.h b/onnxruntime/core/providers/qnn/builder/qnn_node_group.h index f9ef01411310f..276fbaae3b3c9 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group.h @@ -8,8 +8,7 @@ #include #include -#include "core/common/logging/logging.h" -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.cc index 5548d7d37c378..93b2fca296389 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.cc @@ -4,8 +4,7 @@ #include #include -#include "core/graph/graph_viewer.h" -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_node_group.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.h b/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.h index 0d11d21906ccb..c4cf4e8a20a92 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.h @@ -7,8 +7,7 @@ #include #include -#include "core/graph/graph_viewer.h" -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_node_group.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_quant_params_wrapper.h b/onnxruntime/core/providers/qnn/builder/qnn_quant_params_wrapper.h index 23330f5616d73..01c15cf4bebe6 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_quant_params_wrapper.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_quant_params_wrapper.h @@ -4,10 +4,10 @@ #pragma once #include #include -#include "QnnTypes.h" -#include "core/common/common.h" #include -#include "core/framework/node_unit.h" + +#include "core/providers/qnn/ort_api.h" +#include "QnnTypes.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/ort_api.h b/onnxruntime/core/providers/qnn/ort_api.h index 0c26d9c99c200..1e6f96b565385 100644 --- a/onnxruntime/core/providers/qnn/ort_api.h +++ b/onnxruntime/core/providers/qnn/ort_api.h @@ -8,6 +8,7 @@ #if BUILD_QNN_EP_STATIC #include "core/common/common.h" #include "core/common/status.h" +#include "core/common/safeint.h" #include "core/common/logging/logging.h" #include "core/common/logging/capture.h" #include "core/common/path_string.h" @@ -20,6 +21,9 @@ #include "core/framework/tensor_shape.h" #include "core/framework/node_unit.h" #include "core/framework/tensorprotoutils.h" +#include "core/framework/utils.h" +#include "core/graph/constants.h" +#include "core/graph/basic_types.h" #include "core/graph/model.h" #include "core/graph/graph_viewer.h" #include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h" diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 413db0489e37a..2cc954db5ad7f 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -189,6 +189,9 @@ qnn::ProfilingLevel QNNExecutionProvider::GetProfilingLevelFromETWLevel(unsigned QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_options_map, const ConfigOptions* config_options) : IExecutionProvider{onnxruntime::kQnnExecutionProvider} { + // TODO: Uncomment when QNN EP is built as a DLL + // InitProviderOrtApi(); + if (config_options) { disable_cpu_ep_fallback_ = config_options->GetConfigOrDefault( kOrtSessionOptionsDisableCPUEPFallback, "0") == "1"; From 2b1ea09dfa11cf482c72fc8f57328c64276f8358 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 15 Dec 2024 01:26:46 -0800 Subject: [PATCH 24/64] Add GraphViewer::NodeProducesGraphOutput() to provider bridge --- .../core/providers/shared_library/provider_interfaces.h | 1 + .../core/providers/shared_library/provider_wrappedtypes.h | 3 +++ onnxruntime/core/session/provider_bridge_ort.cc | 3 +++ 3 files changed, 7 insertions(+) diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 474eddf5af310..3e4b81fb75773 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -1021,6 +1021,7 @@ struct ProviderHost { virtual const std::vector& GraphViewer__GetInputs(const GraphViewer* p) noexcept = 0; virtual const std::vector& GraphViewer__GetOutputs(const GraphViewer* p) noexcept = 0; + virtual bool GraphViewer__NodeProducesGraphOutput(const GraphViewer* p, const Node& node) = 0; virtual const std::unordered_set& GraphViewer__GetValueInfo(const GraphViewer* p) noexcept = 0; virtual const InitializedTensorSet& GraphViewer__GetAllInitializedTensors(const GraphViewer* p) = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index e4ecab5740af9..c2d99e1d5786f 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -1087,6 +1087,9 @@ class GraphViewer final { const std::vector& GetInputs() const noexcept { return g_host->GraphViewer__GetInputs(this); } const std::vector& GetOutputs() const noexcept { return g_host->GraphViewer__GetOutputs(this); } + bool NodeProducesGraphOutput(const Node& node) const { + return g_host->GraphViewer__NodeProducesGraphOutput(this, node); + } const std::unordered_set& GetValueInfo() const noexcept { return g_host->GraphViewer__GetValueInfo(this); } const InitializedTensorSet& GetAllInitializedTensors() const noexcept { return g_host->GraphViewer__GetAllInitializedTensors(this); } diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index b6a9aa287e924..08943e5a4a2a6 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1263,6 +1263,9 @@ struct ProviderHostImpl : ProviderHost { const std::vector& GraphViewer__GetInputs(const GraphViewer* p) noexcept override { return p->GetInputs(); } const std::vector& GraphViewer__GetOutputs(const GraphViewer* p) noexcept override { return p->GetOutputs(); } + bool GraphViewer__NodeProducesGraphOutput(const GraphViewer* p, const Node& node) override { + return p->NodeProducesGraphOutput(node); + } const std::unordered_set& GraphViewer__GetValueInfo(const GraphViewer* p) noexcept override { return p->GetValueInfo(); } const InitializedTensorSet& GraphViewer__GetAllInitializedTensors(const GraphViewer* p) override { return p->GetAllInitializedTensors(); } From 421cd7814ae1ea5da93becfcee03691a37e95893 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 15 Dec 2024 04:25:04 -0800 Subject: [PATCH 25/64] Replace use of InlinedVector with std::vector and fix newly discovered bug in qnn_configs_helper --- .../qnn/builder/qnn_backend_manager.cc | 28 ++++++------- .../qnn/builder/qnn_configs_helper.h | 26 +++++++----- .../providers/qnn/qnn_execution_provider.cc | 40 +++++++++---------- 3 files changed, 50 insertions(+), 44 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index 7f575257a77f7..6a1dd37d01b65 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -394,25 +394,25 @@ Status QnnBackendManager::CreateDevice() { // Set SoC Model. The *enum* Qnn_SocModel_t is deprecated and will not be updated in the future. Therefore, // must use the latest SDK documentation to get the SoC model of the latest HW. if (soc_model_ != QNN_SOC_MODEL_UNKNOWN) { - QnnHtpDevice_CustomConfig_t& custom_config = device_configs_builder.PushCustomConfig(); - custom_config.option = QNN_HTP_DEVICE_CONFIG_OPTION_SOC; - custom_config.socModel = soc_model_; + gsl::not_null custom_config = device_configs_builder.PushCustomConfig(); + custom_config->option = QNN_HTP_DEVICE_CONFIG_OPTION_SOC; + custom_config->socModel = soc_model_; - QnnDevice_Config_t& device_config = device_configs_builder.PushConfig(); - device_config.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; - device_config.customConfig = &custom_config; + gsl::not_null device_config = device_configs_builder.PushConfig(); + device_config->option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; + device_config->customConfig = custom_config; } // Set the minimum HTP architecture. The driver will use ops that are compatible with this minimum architecture. if (htp_arch_ != QNN_HTP_DEVICE_ARCH_NONE) { - QnnHtpDevice_CustomConfig_t& custom_config = device_configs_builder.PushCustomConfig(); - custom_config.option = QNN_HTP_DEVICE_CONFIG_OPTION_ARCH; - custom_config.arch.arch = htp_arch_; - custom_config.arch.deviceId = device_id_; - - QnnDevice_Config_t& device_config = device_configs_builder.PushConfig(); - device_config.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; - device_config.customConfig = &custom_config; + gsl::not_null custom_config = device_configs_builder.PushCustomConfig(); + custom_config->option = QNN_HTP_DEVICE_CONFIG_OPTION_ARCH; + custom_config->arch.arch = htp_arch_; + custom_config->arch.deviceId = device_id_; + + gsl::not_null device_config = device_configs_builder.PushConfig(); + device_config->option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; + device_config->customConfig = custom_config; } } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_configs_helper.h b/onnxruntime/core/providers/qnn/builder/qnn_configs_helper.h index 1f0680782d76b..b581cd90537d9 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_configs_helper.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_configs_helper.h @@ -3,6 +3,7 @@ #pragma once +#include #include namespace onnxruntime { @@ -49,9 +50,9 @@ class QnnConfigsBuilder { * * \return A reference to a default CustomConfigType object. */ - CustomConfigType& PushCustomConfig() { - custom_configs_.push_back(custom_config_init_); - return custom_configs_.back(); + gsl::not_null PushCustomConfig() { + custom_configs_.push_back(std::make_unique(custom_config_init_)); + return custom_configs_.back().get(); } /** @@ -60,15 +61,15 @@ class QnnConfigsBuilder { * * \return A reference to a default BaseConfigType object. */ - BaseConfigType& PushConfig() { - configs_.push_back(base_config_init_); - BaseConfigType& config = configs_.back(); + gsl::not_null PushConfig() { + configs_.push_back(std::make_unique(base_config_init_)); + BaseConfigType* config = configs_.back().get(); // Add pointer to this new config to the list of config pointers. if (IsNullTerminated()) { - config_ptrs_.back() = &config; // Replace last nullptr entry. + config_ptrs_.back() = config; // Replace last nullptr entry. } else { - config_ptrs_.push_back(&config); + config_ptrs_.push_back(config); } return config; @@ -81,8 +82,13 @@ class QnnConfigsBuilder { BaseConfigType base_config_init_; CustomConfigType custom_config_init_; - std::vector custom_configs_; - std::vector configs_; + + // Store elements of unique_ptrs instead of by value because std::vector reallocation would change the + // location of elements in memory. BaseConfigType objects may contain pointers to CustomConfigType objects, + // so we need to make sure that pointers to these objects are stable in memory. + std::vector> custom_configs_; + std::vector> configs_; + std::vector config_ptrs_; }; diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 2cc954db5ad7f..f4ce648251415 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -838,34 +838,34 @@ Status QNNExecutionProvider::CreateComputeFunc(std::vector& nod void QNNExecutionProvider::InitQnnGraphConfigs(qnn::QnnConfigsBuilder& configs_builder) const { if (qnn_backend_manager_->GetQnnBackendType() == qnn::QnnBackendType::HTP) { if (htp_graph_finalization_opt_mode_ != qnn::HtpGraphFinalizationOptimizationMode::kDefault) { - QnnHtpGraph_CustomConfig_t& htp_graph_opt_config = configs_builder.PushCustomConfig(); - htp_graph_opt_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_OPTIMIZATION; - htp_graph_opt_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_FINALIZE_OPTIMIZATION_FLAG; - htp_graph_opt_config.optimizationOption.floatValue = static_cast(htp_graph_finalization_opt_mode_); - - QnnGraph_Config_t& graph_opt_config = configs_builder.PushConfig(); - graph_opt_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; - graph_opt_config.customConfig = &htp_graph_opt_config; + gsl::not_null htp_graph_opt_config = configs_builder.PushCustomConfig(); + htp_graph_opt_config->option = QNN_HTP_GRAPH_CONFIG_OPTION_OPTIMIZATION; + htp_graph_opt_config->optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_FINALIZE_OPTIMIZATION_FLAG; + htp_graph_opt_config->optimizationOption.floatValue = static_cast(htp_graph_finalization_opt_mode_); + + gsl::not_null graph_opt_config = configs_builder.PushConfig(); + graph_opt_config->option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_opt_config->customConfig = htp_graph_opt_config; } if (vtcm_size_in_mb_ > 0) { - QnnHtpGraph_CustomConfig_t& htp_graph_opt_config_vtcm = configs_builder.PushCustomConfig(); - htp_graph_opt_config_vtcm.option = QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE; - htp_graph_opt_config_vtcm.vtcmSizeInMB = static_cast(vtcm_size_in_mb_); + gsl::not_null htp_graph_opt_config_vtcm = configs_builder.PushCustomConfig(); + htp_graph_opt_config_vtcm->option = QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE; + htp_graph_opt_config_vtcm->vtcmSizeInMB = static_cast(vtcm_size_in_mb_); - QnnGraph_Config_t& graph_opt_config_vtcm = configs_builder.PushConfig(); - graph_opt_config_vtcm.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; - graph_opt_config_vtcm.customConfig = &htp_graph_opt_config_vtcm; + gsl::not_null graph_opt_config_vtcm = configs_builder.PushConfig(); + graph_opt_config_vtcm->option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_opt_config_vtcm->customConfig = htp_graph_opt_config_vtcm; } if (enable_HTP_FP16_precision_) { - QnnHtpGraph_CustomConfig_t& htp_graph_precision_config = configs_builder.PushCustomConfig(); - htp_graph_precision_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_PRECISION; - htp_graph_precision_config.precision = QNN_PRECISION_FLOAT16; + gsl::not_null htp_graph_precision_config = configs_builder.PushCustomConfig(); + htp_graph_precision_config->option = QNN_HTP_GRAPH_CONFIG_OPTION_PRECISION; + htp_graph_precision_config->precision = QNN_PRECISION_FLOAT16; - QnnGraph_Config_t& graph_precision_config = configs_builder.PushConfig(); - graph_precision_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; - graph_precision_config.customConfig = &htp_graph_precision_config; + gsl::not_null graph_precision_config = configs_builder.PushConfig(); + graph_precision_config->option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_precision_config->customConfig = htp_graph_precision_config; } } } From d94e6f7bf839738057ee6e43ddd2a658fc1083e7 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 15 Dec 2024 04:26:50 -0800 Subject: [PATCH 26/64] Eliminate use of qmath.h by introducing new quantization utils for QNN --- .../builder/opbuilder/simple_op_builder.cc | 24 +-- .../core/providers/qnn/builder/qnn_utils.cc | 173 ++++++++++++++---- .../core/providers/qnn/builder/qnn_utils.h | 59 +++++- 3 files changed, 203 insertions(+), 53 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc index f23b6b240389d..307ab31a09651 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc @@ -1,15 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/framework/tensorprotoutils.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "core/common/safeint.h" -#include "core/util/qmath.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { @@ -259,15 +254,16 @@ Status ProcessAlphaAttributeAsInput(QnnModelWrapper& qnn_model_wrapper, // Check LeakyRelu input 0 to see if it's quantized tensor bool is_quantized_tensor = node_unit.Outputs()[0].quant_param.has_value(); if (is_quantized_tensor) { - float scale; - uint8_t zero_point; - int64_t num_of_elements = 1; - concurrency::ThreadPool* thread_pool = nullptr; - GetQuantizationParameter(&tensor_data.alpha, num_of_elements, scale, zero_point, thread_pool); - unpacked_data.resize(1); - ParQuantizeLinearStd(&tensor_data.alpha, unpacked_data.data(), num_of_elements, scale, zero_point, thread_pool); - quantize_param = QnnQuantParamsWrapper(scale, static_cast(zero_point)); qnn_data_type = QNN_DATATYPE_UFIXED_POINT_8; + std::array scales = {1.0f}; + std::array offsets = {0}; + std::array shape = {1}; + auto float_data = gsl::make_span(&tensor_data.alpha, 1); + ORT_RETURN_IF_ERROR(qnn::utils::GetDataQuantParams(float_data, shape, scales, offsets, qnn_data_type)); + + unpacked_data.resize(1); + ORT_RETURN_IF_ERROR(qnn::utils::QuantizeData(float_data, shape, scales, offsets, unpacked_data, qnn_data_type)); + quantize_param = QnnQuantParamsWrapper(scales[0], static_cast(offsets[0])); } else { const auto& inputs = node_unit.Inputs(); TensorInfo input_info = {}; diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc index 8e7017c063bc0..64b62779263ad 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc @@ -523,39 +523,14 @@ bool OnnxDataTypeToQnnDataType(const int32_t onnx_data_type, Qnn_DataType_t& qnn } std::pair CheckMinMax(float rmin, float rmax) { - // Ensure a minimum range of 0.0001 (required by QNN) - rmax = std::max(rmax, rmin + 0.0001f); - // Both QNN and ORT require the range to include 0.0f rmin = std::min(rmin, 0.0f); rmax = std::max(rmax, 0.0f); - return std::make_pair(rmin, rmax); -} + // Ensure a minimum range of 0.0001 (required by QNN) + rmax = std::max(rmax, rmin + 0.0001f); -template -Status GetQminQmax(const Qnn_DataType_t qnn_data_type, - T& qmin, - T& qmax) { - if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_8) { - qmin = static_cast(std::numeric_limits::min()); - qmax = static_cast(std::numeric_limits::max()); - } else if (qnn_data_type == QNN_DATATYPE_UFIXED_POINT_8) { - qmin = static_cast(std::numeric_limits::min()); - qmax = static_cast(std::numeric_limits::max()); - } else if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_16) { - qmin = static_cast(std::numeric_limits::min()); - qmax = static_cast(std::numeric_limits::max()); - } else if (qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16) { - qmin = static_cast(std::numeric_limits::min()); - qmax = static_cast(std::numeric_limits::max()); - } else if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_32) { - qmin = static_cast(std::numeric_limits::min()); - qmax = static_cast(std::numeric_limits::max()); - } else { - ORT_RETURN_IF(true, "Qnn Data Type: %d not supported yet.", qnn_data_type); - } - return Status::OK(); + return std::make_pair(rmin, rmax); } inline float RoundHalfToEven(float input) { @@ -579,20 +554,22 @@ Status GetQuantParams(float rmin, rmin = -abs_max; } - float qmin = 0.0f; - float qmax = 255.0f; - ORT_RETURN_IF_ERROR(GetQminQmax(qnn_data_type, qmin, qmax)); + double rmin_dbl = static_cast(rmin); + double rmax_dbl = static_cast(rmax); + double qmin = 0.0; + double qmax = 0.0; + ORT_RETURN_IF_ERROR(GetQminQmax(qnn_data_type, qmin, qmax, symmetric)); - scale = (rmax - rmin) / (qmax - qmin); - float initial_zero_point = 0.0f; + double scale_dbl = (rmax_dbl - rmin_dbl) / (qmax - qmin); + double initial_zero_point = 0.0; if (symmetric) { - initial_zero_point = std::round(rmin + rmax) / 2; + initial_zero_point = std::round(rmin_dbl + rmax_dbl) / 2; } else { - initial_zero_point = qmin - (rmin / scale); + initial_zero_point = qmin - (rmin_dbl / scale_dbl); } - zero_point = static_cast(qnn::utils::RoundHalfToEven(Saturate(qmax, qmin, initial_zero_point))); - // To match QNN quantization definition - zero_point = 0 - zero_point; + zero_point = static_cast(RoundHalfToEven(static_cast(Saturate(qmax, qmin, initial_zero_point)))); + zero_point = -zero_point; // Negate to match QNN quantization definition. + scale = static_cast(scale_dbl); return Status::OK(); } @@ -614,6 +591,126 @@ Status Quantize(const double double_value, return Status::OK(); } +size_t ShapeSizeCalc(gsl::span shape, size_t start, size_t end) { + size_t size = 1; + for (size_t i = start; i < end; i++) { + size *= shape[i]; + } + return size; +} + +Status GetDataQuantParams(gsl::span data, gsl::span shape, + /*out*/ gsl::span scales, /*out*/ gsl::span offsets, + Qnn_DataType_t data_type, bool symmetric, std::optional axis) { + const size_t num_dims = shape.size(); + const size_t num_elems = ShapeSizeCalc(shape, 0, num_dims); + ORT_RETURN_IF_NOT(num_elems == data.size(), "Shape mismatch with data to quantize"); + + size_t block_count = 1; + size_t broadcast_dim = 1; + size_t block_size = num_elems; + + if (axis.has_value()) { + size_t axis_no_neg = *axis < 0 ? static_cast(*axis) + num_dims : static_cast(*axis); + block_count = ShapeSizeCalc(shape, 0, axis_no_neg); + broadcast_dim = shape[axis_no_neg]; + block_size = ShapeSizeCalc(shape, axis_no_neg + 1, num_dims); + } + + ORT_RETURN_IF_NOT(scales.size() == broadcast_dim, "Unexpected size of scales output buffer"); + ORT_RETURN_IF_NOT(offsets.size() == broadcast_dim, "Unexpected size of offsets output buffer"); + + size_t i = 0; + for (size_t n = 0; n < block_count; n++) { + for (size_t bd = 0; bd < broadcast_dim; bd++) { + float rmin = std::numeric_limits::max(); + float rmax = std::numeric_limits::lowest(); + for (size_t j = 0; j < block_size; j++) { + rmin = std::min(rmin, data[i]); + rmax = std::max(rmax, data[i]); + i++; + } + + scales[bd] = 1.0f; + offsets[bd] = 0; + ORT_RETURN_IF_ERROR(GetQuantParams(rmin, rmax, data_type, scales[bd], offsets[bd], symmetric)); + } + } + + assert(i == data.size()); + return Status::OK(); +} + +Status QuantizeData(gsl::span data, gsl::span shape, + gsl::span scales, gsl::span offsets, + /*out*/ gsl::span quant_bytes, Qnn_DataType_t data_type, + std::optional axis) { + const size_t num_dims = shape.size(); + const size_t num_elems = ShapeSizeCalc(shape, 0, num_dims); + ORT_RETURN_IF_NOT(num_elems == data.size(), "Shape mismatch with data to quantize"); + size_t expected_num_quant_bytes = GetElementSizeByType(data_type) * data.size(); + ORT_RETURN_IF_NOT(quant_bytes.size() == expected_num_quant_bytes, + "Cannot quantize data because output buffer is not the correct size"); + + size_t block_count = 1; + size_t broadcast_dim = 1; + size_t block_size = num_elems; + + if (axis.has_value()) { + size_t axis_no_neg = *axis < 0 ? static_cast(*axis) + num_dims : static_cast(*axis); + block_count = ShapeSizeCalc(shape, 0, axis_no_neg); + broadcast_dim = shape[axis_no_neg]; + block_size = ShapeSizeCalc(shape, axis_no_neg + 1, num_dims); + } + + ORT_RETURN_IF_NOT(scales.size() == broadcast_dim, "Unexpected size of scales output buffer"); + ORT_RETURN_IF_NOT(offsets.size() == broadcast_dim, "Unexpected size of offsets output buffer"); + + size_t i = 0; + for (size_t n = 0; n < block_count; n++) { + for (size_t bd = 0; bd < broadcast_dim; bd++) { + switch (data_type) { + case QNN_DATATYPE_SFIXED_POINT_8: { + auto input_span = gsl::make_span(&data[i], block_size); + auto output_span = gsl::make_span(&quant_bytes[i * sizeof(int8_t)], sizeof(int8_t) * block_size); + ORT_RETURN_IF_ERROR(QuantizeData(input_span, scales[bd], offsets[bd], output_span)); + break; + } + case QNN_DATATYPE_UFIXED_POINT_8: { + auto input_span = gsl::make_span(&data[i], block_size); + auto output_span = gsl::make_span(&quant_bytes[i * sizeof(uint8_t)], sizeof(uint8_t) * block_size); + ORT_RETURN_IF_ERROR(QuantizeData(input_span, scales[bd], offsets[bd], output_span)); + break; + } + case QNN_DATATYPE_SFIXED_POINT_16: { + auto input_span = gsl::make_span(&data[i], block_size); + auto output_span = gsl::make_span(&quant_bytes[i * sizeof(int16_t)], sizeof(int16_t) * block_size); + ORT_RETURN_IF_ERROR(QuantizeData(input_span, scales[bd], offsets[bd], output_span)); + break; + } + case QNN_DATATYPE_UFIXED_POINT_16: { + auto input_span = gsl::make_span(&data[i], block_size); + auto output_span = gsl::make_span(&quant_bytes[i * sizeof(uint16_t)], sizeof(uint16_t) * block_size); + ORT_RETURN_IF_ERROR(QuantizeData(input_span, scales[bd], offsets[bd], output_span)); + break; + } + case QNN_DATATYPE_SFIXED_POINT_32: { + auto input_span = gsl::make_span(&data[i], block_size); + auto output_span = gsl::make_span(&quant_bytes[i * sizeof(int32_t)], sizeof(int32_t) * block_size); + ORT_RETURN_IF_ERROR(QuantizeData(input_span, scales[bd], offsets[bd], output_span)); + break; + } + default: + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported quantization data type for QuantizeData"); + } + i += block_size; + } + } + assert(i == data.size()); + + return Status::OK(); +} + static bool GetType(const NodeArg& node_arg, int32_t& type, const logging::Logger& logger) { type = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED; const auto* type_proto = node_arg.TypeAsProto(); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.h b/onnxruntime/core/providers/qnn/builder/qnn_utils.h index 578f50ba895cf..1a30b10105fbf 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.h @@ -74,7 +74,30 @@ static bool ArrayHasString(const std::array& strings, std:: std::pair CheckMinMax(float rmin, float rmax); template -Status GetQminQmax(const Qnn_DataType_t qnn_data_type, T& qmin, T& qmax); +Status GetQminQmax(const Qnn_DataType_t qnn_data_type, + T& qmin, + T& qmax, + bool symmetric = false) { + if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_8) { + qmin = static_cast(std::numeric_limits::min() + static_cast(symmetric)); + qmax = static_cast(std::numeric_limits::max()); + } else if (qnn_data_type == QNN_DATATYPE_UFIXED_POINT_8) { + qmin = static_cast(std::numeric_limits::min()); + qmax = static_cast(std::numeric_limits::max()); + } else if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_16) { + qmin = static_cast(std::numeric_limits::min() + static_cast(symmetric)); + qmax = static_cast(std::numeric_limits::max()); + } else if (qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16) { + qmin = static_cast(std::numeric_limits::min()); + qmax = static_cast(std::numeric_limits::max()); + } else if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_32) { + qmin = static_cast(std::numeric_limits::min() + static_cast(symmetric)); + qmax = static_cast(std::numeric_limits::max()); + } else { + ORT_RETURN_IF(true, "Qnn Data Type: %d not supported yet.", qnn_data_type); + } + return Status::OK(); +} template inline T Saturate(const T qmax, @@ -104,6 +127,40 @@ Status Quantize(const double double_value, const Qnn_DataType_t qnn_data_type, int& quant_value); +size_t ShapeSizeCalc(gsl::span shape, size_t start, size_t end); + +Status GetDataQuantParams(gsl::span data, gsl::span shape, + /*out*/ gsl::span scales, /*out*/ gsl::span offsets, + Qnn_DataType_t data_type, bool symmetric = false, + std::optional axis = std::nullopt); + +Status QuantizeData(gsl::span data, gsl::span shape, + gsl::span scales, gsl::span offsets, + /*out*/ gsl::span quant_bytes, Qnn_DataType_t data_type, + std::optional axis = std::nullopt); + +template +inline Status QuantizeData(gsl::span data, float scale, int32_t offset, + /*out*/ gsl::span quant_bytes) { + const size_t num_elems = data.size(); + const size_t expected_output_bytes = sizeof(QuantType) * num_elems; + ORT_RETURN_IF_NOT(expected_output_bytes == quant_bytes.size(), + "Output buffer is not large enough to hold quantized bytes."); + const double clip_min = static_cast(std::numeric_limits::lowest()); + const double clip_max = static_cast(std::numeric_limits::max()); + + QuantType* output = reinterpret_cast(quant_bytes.data()); + for (size_t i = 0; i < num_elems; ++i) { + const double scale_dbl = static_cast(scale); + const double offset_dbl = static_cast(offset); + double float_val = std::nearbyint(static_cast(data[i]) / scale_dbl) - offset_dbl; + float_val = std::max(float_val, clip_min); + float_val = std::min(float_val, clip_max); + output[i] = static_cast(float_val); + } + return Status::OK(); +} + // Re-writes a buffer of packed 4-bit elements to a buffer of unpacked 8-bit elements. // QNN requires that 4-bit weights are unpacked to 8-bit. template From 4eb1e8088f106bbb2f8721c021329a349b3d738f Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 15 Dec 2024 04:43:01 -0800 Subject: [PATCH 27/64] Move includes into qnn/ort_api.h --- .../qnn/builder/opbuilder/argmax_min_op_builder.cc | 5 +---- .../providers/qnn/builder/opbuilder/base_op_builder.cc | 6 ------ .../providers/qnn/builder/opbuilder/base_op_builder.h | 2 +- .../qnn/builder/opbuilder/batch_norm_op_builder.cc | 6 +----- .../providers/qnn/builder/opbuilder/cast_op_builder.cc | 3 +-- .../providers/qnn/builder/opbuilder/clip_op_builder.cc | 4 +--- .../providers/qnn/builder/opbuilder/conv_op_builder.cc | 6 +----- .../providers/qnn/builder/opbuilder/expand_op_builder.cc | 5 +---- .../providers/qnn/builder/opbuilder/gather_op_builder.cc | 5 +---- .../providers/qnn/builder/opbuilder/gemm_op_builder.cc | 5 +---- .../qnn/builder/opbuilder/instance_norm_op_builder.cc | 7 +------ .../qnn/builder/opbuilder/layer_norm_op_builder.cc | 7 +------ .../providers/qnn/builder/opbuilder/lrn_op_builder.cc | 2 -- .../providers/qnn/builder/opbuilder/pad_op_builder.cc | 6 +----- .../providers/qnn/builder/opbuilder/pool_op_builder.cc | 7 +------ .../providers/qnn/builder/opbuilder/reduce_op_builder.cc | 4 +--- .../qnn/builder/opbuilder/reshape_op_builder.cc | 5 +---- .../providers/qnn/builder/opbuilder/resize_op_builder.cc | 9 ++------- .../providers/qnn/builder/opbuilder/slice_op_builder.cc | 6 +----- .../qnn/builder/opbuilder/softmax_op_builder.cc | 6 +----- .../providers/qnn/builder/opbuilder/split_op_builder.cc | 5 +---- .../providers/qnn/builder/opbuilder/tile_op_builder.cc | 5 +---- onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc | 1 - .../qnn/builder/opbuilder/transpose_op_builder.cc | 2 +- .../qnn/builder/qnn_node_group/conv_activation_fusion.cc | 4 ++-- .../qnn/builder/qnn_node_group/conv_activation_fusion.h | 2 +- .../providers/qnn/builder/qnn_node_group/dq_q_fusion.cc | 4 ++-- .../providers/qnn/builder/qnn_node_group/dq_q_fusion.h | 3 +-- .../qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.cc | 4 ++-- .../qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.h | 3 +-- .../qnn/builder/qnn_node_group/qnn_node_group.cc | 3 +-- onnxruntime/core/providers/qnn/ort_api.h | 2 ++ 32 files changed, 34 insertions(+), 110 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/argmax_min_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/argmax_min_op_builder.cc index 192c9496f0999..76762c82ece2e 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/argmax_min_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/argmax_min_op_builder.cc @@ -1,14 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/framework/tensorprotoutils.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc index 7690427416770..7db4f8c0c609d 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc @@ -4,12 +4,6 @@ #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include - -#include "core/framework/tensorprotoutils.h" -#include "core/providers/cpu/tensor/transpose.h" -#include "core/common/safeint.h" - namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h index b2bb3f043eecd..0c400c4a5abc9 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h @@ -3,11 +3,11 @@ #pragma once +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder.h" #include "core/providers/qnn/builder/qnn_quant_params_wrapper.h" -#include "core/framework/allocator.h" #include "QnnOpDef.h" diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc index 9c7f1d374e5b7..5be54729a6a86 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc @@ -5,15 +5,11 @@ #include #include -#include "core/providers/common.h" -#include "core/framework/float16.h" -#include "core/framework/tensorprotoutils.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { class BatchNormOpBuilder : public BaseOpBuilder { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/cast_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/cast_op_builder.cc index d3bdee02437e4..5b3dd223a9f3f 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/cast_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/cast_op_builder.cc @@ -4,12 +4,11 @@ #include #include +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc index aa6080eb1195d..a86354edf7e8e 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc @@ -4,13 +4,11 @@ #include #include -#include "core/providers/common.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { class ClipOpBuilder : public BaseOpBuilder { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc index f0c6f53affecd..5f723e2e262be 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc @@ -1,15 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/framework/tensorprotoutils.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc index 20978f41b529b..1beab0ed8b735 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc @@ -1,13 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "core/common/safeint.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc index df02d12bd59c9..a3ada04b7b017 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc @@ -2,13 +2,10 @@ // Licensed under the MIT License. #include -#include "core/providers/common.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "core/common/safeint.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/gemm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/gemm_op_builder.cc index 20f2f4383044c..8a36aa192313f 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/gemm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/gemm_op_builder.cc @@ -1,13 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "core/common/safeint.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc index 53bc93e2fa832..80d96f513df63 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc @@ -1,15 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/framework/tensorprotoutils.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" -#include "onnx/defs/data_type_utils.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/layer_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/layer_norm_op_builder.cc index b0394be15aba2..7f830d68999a1 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/layer_norm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/layer_norm_op_builder.cc @@ -2,15 +2,10 @@ // Licensed under the MIT License. #include -#include "core/providers/common.h" -#include "core/framework/tensorprotoutils.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" -#include "onnx/defs/data_type_utils.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc index dbb29557cccc4..bc2b7a01c0779 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc @@ -5,8 +5,6 @@ #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" -#include "onnx/defs/data_type_utils.h" #include "QnnOpDef.h" // From QNN SDK: contains QNN constants (e.g., op names, param values). diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc index 3035da2723907..7d40a3489e550 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc @@ -1,13 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/providers/cpu/tensor/slice_helper.h" -#include "core/common/safeint.h" - -#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc index 0ed11bed30929..4923371ce9ee2 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc @@ -1,15 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/framework/tensorprotoutils.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" -#include "onnx/defs/data_type_utils.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc index a2eeeee4453e4..a74fcfd843551 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc @@ -6,10 +6,8 @@ #include #include -#include "core/common/safeint.h" -#include "onnx/defs/data_type_utils.h" -#include "core/providers/common.h" #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/qnn_utils.h" diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc index c374a3c64b350..7d12a6843d4a6 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc @@ -1,14 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/framework/tensorprotoutils.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc index 6b1088e488c31..5298b20033115 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc @@ -5,15 +5,10 @@ #include #include -#include "core/providers/common.h" -#include "core/framework/tensorprotoutils.h" -#include "core/providers/qnn/builder/qnn_model_wrapper.h" -#include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/providers/cpu/tensor/slice_helper.h" -#include "core/common/safeint.h" - #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" +#include "core/providers/qnn/builder/qnn_model_wrapper.h" +#include "core/providers/qnn/builder/op_builder_factory.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc index e383e71d2a497..3096967a5f166 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc @@ -1,16 +1,12 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/cpu/tensor/slice_helper.h" -#include "core/framework/tensorprotoutils.h" - -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc index bc5339d90660e..e7d37937d527f 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc @@ -1,14 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/framework/tensorprotoutils.h" +#include "base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc index f435b1d6d802f..de7277390f9bb 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc @@ -1,14 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/cpu/tensor/slice_helper.h" -#include "core/common/safeint.h" - -#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc index e66c4cd350235..1d518c3ed5359 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc @@ -1,14 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/cpu/tensor/slice_helper.h" -#include "core/common/safeint.h" - -#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc index 9cb8f91a9db0b..b2891022e73a5 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" -#include "core/framework/utils.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/transpose_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/transpose_op_builder.cc index 1290a012d5902..7bc43f04d5361 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/transpose_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/transpose_op_builder.cc @@ -4,10 +4,10 @@ #include #include +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "core/common/safeint.h" #include "base_op_builder.h" diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.cc b/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.cc index 76316250a88ad..789811a423884 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.cc @@ -6,8 +6,8 @@ #include #include #include -#include "core/graph/graph_utils.h" -#include "core/framework/node_unit.h" + +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_node_group/utils.h" diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.h b/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.h index b604b25e943e6..a211c86c2301e 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.h @@ -9,7 +9,7 @@ #include #include -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_node_group.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.cc b/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.cc index 17af5725a01ee..3af2fdd1f0276 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.cc @@ -6,8 +6,8 @@ #include #include #include -#include "core/graph/graph_utils.h" -#include "core/framework/node_unit.h" + +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_node_group/utils.h" diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.h b/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.h index 90fe44c3af059..d3d552bc172ec 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.h @@ -7,8 +7,7 @@ #include #include -#include "core/common/common.h" -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_node_group.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.cc b/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.cc index aceaf0399a6cb..0b2d7451553e7 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.cc @@ -6,8 +6,8 @@ #include #include #include -#include "core/graph/graph_utils.h" -#include "core/framework/node_unit.h" + +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.h b/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.h index 3b67f13492a46..0a1b16d24ffcd 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.h @@ -7,8 +7,7 @@ #include #include -#include "core/common/common.h" -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_node_group.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/qnn_node_group.cc b/onnxruntime/core/providers/qnn/builder/qnn_node_group/qnn_node_group.cc index 9fb9e815321c0..56413b781b246 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/qnn_node_group.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/qnn_node_group.cc @@ -10,8 +10,7 @@ #include #include #include -#include "core/graph/graph_utils.h" -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" diff --git a/onnxruntime/core/providers/qnn/ort_api.h b/onnxruntime/core/providers/qnn/ort_api.h index 1e6f96b565385..494d1fbeedb3d 100644 --- a/onnxruntime/core/providers/qnn/ort_api.h +++ b/onnxruntime/core/providers/qnn/ort_api.h @@ -6,6 +6,7 @@ #define BUILD_QNN_EP_STATIC 1 #if BUILD_QNN_EP_STATIC +#include "onnx/defs/data_type_utils.h" #include "core/common/common.h" #include "core/common/status.h" #include "core/common/safeint.h" @@ -14,6 +15,7 @@ #include "core/common/path_string.h" #include "core/platform/env.h" #include "core/framework/data_types.h" +#include "core/framework/float16.h" #include "core/framework/run_options.h" #include "core/framework/execution_provider.h" #include "core/framework/model_metadef_id_generator.h" From d86fb6c005856d8a4f2c9a7dadafc4ec7b929ef8 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 15 Dec 2024 04:56:46 -0800 Subject: [PATCH 28/64] Add TensorProto::has_data_type() to provider bridge --- onnxruntime/core/providers/shared_library/provider_interfaces.h | 1 + .../core/providers/shared_library/provider_wrappedtypes.h | 1 + onnxruntime/core/session/provider_bridge_ort.cc | 1 + 3 files changed, 3 insertions(+) diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 3e4b81fb75773..96cd72c91b0e6 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -472,6 +472,7 @@ struct ProviderHost { virtual bool TensorProto__has_raw_data(const ONNX_NAMESPACE::TensorProto* p) = 0; virtual const std::string& TensorProto__raw_data(const ONNX_NAMESPACE::TensorProto* p) = 0; virtual std::string* TensorProto__mutable_raw_data(ONNX_NAMESPACE::TensorProto* p) = 0; + virtual bool TensorProto__has_data_type(const ONNX_NAMESPACE::TensorProto* p) = 0; virtual int32_t TensorProto__data_type(const ONNX_NAMESPACE::TensorProto* p) = 0; virtual void TensorProto__set_data_type(ONNX_NAMESPACE::TensorProto* p, int32_t type) = 0; virtual void TensorProto__CopyFrom(ONNX_NAMESPACE::TensorProto* p, const ONNX_NAMESPACE::TensorProto* other) = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index c2d99e1d5786f..03a0b4ea99524 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -237,6 +237,7 @@ struct TensorProto final { const std::string& raw_data() const { return g_host->TensorProto__raw_data(this); } std::string* mutable_raw_data() { return g_host->TensorProto__mutable_raw_data(this); } + bool has_data_type() const { return g_host->TensorProto__has_data_type(this); } int32_t data_type() const { return g_host->TensorProto__data_type(this); } void set_data_type(int32_t type) { return g_host->TensorProto__set_data_type(this, type); } diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 08943e5a4a2a6..b18d4c6ed23ef 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -594,6 +594,7 @@ struct ProviderHostImpl : ProviderHost { const std::string& TensorProto__raw_data(const ONNX_NAMESPACE::TensorProto* p) override { return p->raw_data(); } std::string* TensorProto__mutable_raw_data(ONNX_NAMESPACE::TensorProto* p) override { return p->mutable_raw_data(); } + bool TensorProto__has_data_type(const ONNX_NAMESPACE::TensorProto* p) override { return p->has_data_type(); } int32_t TensorProto__data_type(const ONNX_NAMESPACE::TensorProto* p) override { return p->data_type(); } void TensorProto__set_data_type(ONNX_NAMESPACE::TensorProto* p, int32_t type) override { p->set_data_type(type); } From 187e3b9e0a38c40fe9ef8db73ab020e02774c2a2 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 15 Dec 2024 06:02:35 -0800 Subject: [PATCH 29/64] Checkpoint: updating usage of provider bridge in ep --- .../qnn/builder/qnn_backend_manager.cc | 12 ++-- .../core/providers/qnn/builder/qnn_model.cc | 2 +- .../core/providers/qnn/builder/qnn_utils.cc | 31 +++++----- onnxruntime/core/providers/qnn/ort_api.h | 2 +- .../providers/qnn/qnn_execution_provider.cc | 62 ++++++++++++------- .../providers/qnn/qnn_provider_factory.cc | 22 ++----- .../providers/shared_library/provider_api.h | 8 +++ .../provider_bridge_provider.cc | 3 + .../shared_library/provider_interfaces.h | 7 +++ .../shared_library/provider_wrappedtypes.h | 1 + .../core/session/provider_bridge_ort.cc | 24 +++++++ 11 files changed, 111 insertions(+), 63 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index 6a1dd37d01b65..5a9abe43fec72 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -246,12 +246,12 @@ void QnnLogging(const char* format, const auto data_type = ::onnxruntime::logging::DataType::SYSTEM; if (logger.OutputIsEnabled(severity, data_type)) { - ::onnxruntime::logging::Capture(logger, - severity, - ::onnxruntime::logging::Category::onnxruntime, - data_type, - ORT_WHERE) - .ProcessPrintf(format, argument_parameter); + auto log_capture = ::onnxruntime::logging::Capture::Create(logger, + severity, + ::onnxruntime::logging::Category::onnxruntime, + data_type, + ORT_WHERE); + log_capture.ProcessPrintf(format, argument_parameter); } } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model.cc b/onnxruntime/core/providers/qnn/builder/qnn_model.cc index 8bafd17b2648e..de8fa816efdb1 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model.cc @@ -101,7 +101,7 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer, // valid throughout the lifetime of the ModelBuilder std::vector> node_unit_holder; std::unordered_map node_unit_map; - std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer, logger); + std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(&graph_viewer, logger); // This name must be same with the EPContext node name const auto& graph_name = fused_node.Name(); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc index 64b62779263ad..50a151292d9c7 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc @@ -731,7 +731,7 @@ NodeAttrHelper::NodeAttrHelper(const NodeUnit& node_unit) float NodeAttrHelper::Get(const std::string& key, float def_val) const { if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - return entry->second.f(); + return entry->second().f(); } return def_val; @@ -739,7 +739,7 @@ float NodeAttrHelper::Get(const std::string& key, float def_val) const { int32_t NodeAttrHelper::Get(const std::string& key, int32_t def_val) const { if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - return narrow(entry->second.i()); + return narrow(entry->second().i()); } return def_val; @@ -747,7 +747,7 @@ int32_t NodeAttrHelper::Get(const std::string& key, int32_t def_val) const { uint32_t NodeAttrHelper::Get(const std::string& key, uint32_t def_val) const { if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - return narrow(entry->second.i()); + return narrow(entry->second().i()); } return def_val; @@ -755,7 +755,7 @@ uint32_t NodeAttrHelper::Get(const std::string& key, uint32_t def_val) const { int64_t NodeAttrHelper::Get(const std::string& key, int64_t def_val) const { if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - return entry->second.i(); + return entry->second().i(); } return def_val; @@ -763,7 +763,7 @@ int64_t NodeAttrHelper::Get(const std::string& key, int64_t def_val) const { const std::string& NodeAttrHelper::Get(const std::string& key, const std::string& def_val) const { if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - return entry->second.s(); + return entry->second().s(); } return def_val; @@ -771,7 +771,7 @@ const std::string& NodeAttrHelper::Get(const std::string& key, const std::string std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - const auto& values = entry->second.ints(); + const auto& values = entry->second().ints(); const int64_t* cbegin = values.data(); const int64_t* cend = values.data() + values.size(); std::vector v; @@ -786,7 +786,7 @@ std::vector NodeAttrHelper::Get(const std::string& key, const std::vect std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - const auto& values = entry->second.ints(); + const auto& values = entry->second().ints(); const int64_t* cbegin = values.data(); const int64_t* cend = values.data() + values.size(); std::vector v; @@ -801,7 +801,7 @@ std::vector NodeAttrHelper::Get(const std::string& key, const std::vec std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - const auto& values = entry->second.ints(); + const auto& values = entry->second().ints(); const int64_t* cbegin = values.data(); const int64_t* cend = values.data() + values.size(); return std::vector{cbegin, cend}; @@ -812,7 +812,7 @@ std::vector NodeAttrHelper::Get(const std::string& key, const std::vect std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - const auto& values = entry->second.floats(); + const auto& values = entry->second().floats(); const float* cbegin = values.data(); const float* cend = values.data() + values.size(); return std::vector{cbegin, cend}; @@ -824,7 +824,7 @@ std::vector NodeAttrHelper::Get(const std::string& key, const std::vector std::optional NodeAttrHelper::GetFloat(const std::string& key) const { std::optional result; if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - result = entry->second.f(); + result = entry->second().f(); } return result; @@ -833,7 +833,7 @@ std::optional NodeAttrHelper::GetFloat(const std::string& key) const { std::optional NodeAttrHelper::GetInt64(const std::string& key) const { std::optional result; if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - result = entry->second.i(); + result = entry->second().i(); } return result; @@ -842,7 +842,7 @@ std::optional NodeAttrHelper::GetInt64(const std::string& key) const { std::optional> NodeAttrHelper::GetFloats(const std::string& key) const { std::optional> result; if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - const auto& values = entry->second.floats(); + const auto& values = entry->second().floats(); const float* cbegin = values.data(); const float* cend = values.data() + values.size(); result = std::vector(cbegin, cend); @@ -854,7 +854,7 @@ std::optional> NodeAttrHelper::GetFloats(const std::string& k std::optional> NodeAttrHelper::GetInt64s(const std::string& key) const { std::optional> result; if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - const auto& values = entry->second.ints(); + const auto& values = entry->second().ints(); const int64_t* cbegin = values.data(); const int64_t* cend = values.data() + values.size(); result = std::vector(cbegin, cend); @@ -866,15 +866,16 @@ std::optional> NodeAttrHelper::GetInt64s(const std::string& std::optional NodeAttrHelper::GetString(const std::string& key) const { std::optional result; if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { - result = entry->second.s(); + result = entry->second().s(); } return result; } bool NodeAttrHelper::HasAttr(const std::string& key) const { - return Contains(node_attributes_, key); + return node_attributes_.find(key) != node_attributes_.end(); } + static bool GetClipMinMaxImpl(const GraphViewer& graph_viewer, const Node& node, float& min, float& max, const logging::Logger& logger) { const auto& node_name = node.Name(); diff --git a/onnxruntime/core/providers/qnn/ort_api.h b/onnxruntime/core/providers/qnn/ort_api.h index 494d1fbeedb3d..6fb346d2349a0 100644 --- a/onnxruntime/core/providers/qnn/ort_api.h +++ b/onnxruntime/core/providers/qnn/ort_api.h @@ -3,7 +3,7 @@ #pragma once -#define BUILD_QNN_EP_STATIC 1 +#define BUILD_QNN_EP_STATIC 0 #if BUILD_QNN_EP_STATIC #include "onnx/defs/data_type_utils.h" diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index f4ce648251415..c8ec3098b7ce0 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -243,8 +243,10 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio // separate out the profiling level for ETW in case it gets disabled later when we extract the events // set to invalid to indicate that ETW is no enabled when we setup QNN qnn::ProfilingLevel profiling_level_etw = qnn::ProfilingLevel::INVALID; - const Env& env = Env::Default(); - // const Env& env = GetDefaultEnv(); + +// TODO: Re-enable ETW after QNN is a DLL +#if 0 + const Env& env = GetDefaultEnv(); auto& provider = env.GetTelemetryProvider(); if (provider.IsEnabled()) { auto level = provider.Level(); @@ -255,6 +257,7 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio } } } +#endif // In case ETW gets disabled later auto profiling_level_pos = provider_options_map.find(PROFILING_LEVEL); @@ -492,9 +495,10 @@ static void LogNodeSupport(const logging::Logger& logger, oss << "\tREASON : " << support_status.ErrorMessage() << std::endl; } - logging::Capture(logger, log_severity, logging::Category::onnxruntime, - log_data_type, call_site) - .Stream() + std::unique_ptr log_capture = logging::Capture::Create(logger, log_severity, + logging::Category::onnxruntime, + log_data_type, call_site); + log_capture->Stream() << (support_status.IsOK() ? "Validation PASSED " : "Validation FAILED ") << "for " << num_nodes << " nodes in " << qnn_node_group.Type() << " (" << qnn_node_group.GetTargetNodeUnit()->OpType() << ") :" << std::endl @@ -598,11 +602,11 @@ static bool EpSharedContextsHasAllGraphs(const std::vectorName(); + const std::string& graph_name = ep_context_node.Name(); bool has_shared_qnn_model = SharedContext::GetInstance().HasQnnModel(graph_name); if (!has_shared_qnn_model) { LOGS(logger, VERBOSE) << "Graph: " << graph_name << " from EpContext node not found from shared EP contexts."; @@ -617,7 +621,7 @@ static bool EpSharedContextsHasAllGraphs(const std::vector>& result, - const utils::GenerateMetadefNameFn& gen_metadef_name, + const std::function& gen_metadef_name, const logging::Logger& logger) { std::unordered_set supported_nodes{}; std::vector> supported_groups{}; @@ -722,7 +726,7 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer std::vector> node_unit_holder; std::unordered_map node_unit_map; - std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer, logger); + std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(&graph_viewer, logger); // remove is_qnn_ctx_model related code const auto supported_nodes = GetSupportedNodes(graph_viewer, node_unit_map, @@ -765,11 +769,11 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer bool is_valid_partition = true; size_t nodes_in_partition = 0; - if (partition && partition->sub_graph) { - nodes_in_partition = partition->sub_graph->nodes.size(); + if (partition && partition->SubGraph()) { + nodes_in_partition = partition->SubGraph()->Nodes().size(); if (nodes_in_partition == 1 && !is_qnn_ctx_model) { - const Node* node = graph_viewer.GetNode(partition->sub_graph->nodes[0]); + const Node* node = graph_viewer.GetNode(partition->SubGraph()->Nodes()[0]); if (!node) { LOGS(logger, ERROR) << "QNN EP: Invalid node in partition of one node."; @@ -922,10 +926,10 @@ Status QNNExecutionProvider::Compile(const std::vector& fused if (EpSharedContextsHasAllGraphs(fused_nodes_and_graphs, logger)) { for (auto fused_node_and_graph : fused_nodes_and_graphs) { const onnxruntime::GraphViewer& graph_viewer(fused_node_and_graph.filtered_graph); - const auto& ep_context_node = graph_viewer.Nodes().begin(); + const Node& ep_context_node = *graph_viewer.Nodes().begin(); const Node& fused_node = fused_node_and_graph.fused_node; const std::string& graph_meta_id = fused_node.Name(); - std::string key = ep_context_node->Name(); + std::string key = ep_context_node.Name(); auto qnn_model_shared = SharedContext::GetInstance().GetSharedQnnModel(key); ORT_RETURN_IF(nullptr == qnn_model_shared, "Graph: " + key + " not found from shared EP contexts."); ORT_RETURN_IF_ERROR(qnn_model_shared->SetGraphInputOutputInfo(graph_viewer, fused_node, logger)); @@ -967,10 +971,10 @@ Status QNNExecutionProvider::Compile(const std::vector& fused for (auto fused_node_and_graph : fused_nodes_and_graphs) { const onnxruntime::GraphViewer& graph_viewer(fused_node_and_graph.filtered_graph); - const auto& ep_context_node = graph_viewer.Nodes().begin(); + const Node& ep_context_node = *graph_viewer.Nodes().begin(); const Node& fused_node = fused_node_and_graph.fused_node; const std::string& graph_meta_id = fused_node.Name(); - std::string key = ep_context_node->Name(); + std::string key = ep_context_node.Name(); ORT_RETURN_IF(qnn_models.find(key) == qnn_models.end(), key + " key name not exist in table qnn_models."); auto qnn_model = std::move(qnn_models[key]); ORT_RETURN_IF_ERROR(qnn_model->SetGraphInputOutputInfo(graph_viewer, fused_node, logger)); @@ -1030,8 +1034,8 @@ const InlinedVector QNNExecutionProvider::GetEpContextNodes() const InlinedVector ep_context_nodes; if (qnn_ep_context_model_) { const auto& graph = qnn_ep_context_model_->MainGraph(); - for (const auto& node : graph.Nodes()) { - ep_context_nodes.push_back(graph.GetNode(node.Index())); + for (const Node* node : graph.Nodes()) { + ep_context_nodes.push_back(graph.GetNode(node->Index())); } } @@ -1122,22 +1126,34 @@ void QNNExecutionProvider::ReleasePerThreadContext() const { per_thread_context_cache->erase(cached_context_it); } +static bool TryGetConfigEntry(const ConfigOptions& config_options, const std::string& key, std::string& value) { + std::optional new_value = config_options.GetConfigEntry(key); + if (!new_value.has_value()) { + return false; + } + + value = *new_value; + return true; +} + Status QNNExecutionProvider::OnRunStart(const onnxruntime::RunOptions& run_options) { auto backend_type = qnn_backend_manager_->GetQnnBackendType(); if (qnn::QnnBackendType::HTP != backend_type && qnn::QnnBackendType::DSP != backend_type) { return Status::OK(); } + const ConfigOptions& config_options = run_options.GetConfigOptions(); + std::string htp_perf_mode = ""; qnn::HtpPerformanceMode htp_performance_mode = qnn::HtpPerformanceMode::kHtpDefault; - if (run_options.config_options.TryGetConfigEntry(kOrtRunOptionsConfigQnnPerfMode, htp_perf_mode)) { + if (TryGetConfigEntry(config_options, kOrtRunOptionsConfigQnnPerfMode, htp_perf_mode)) { // set power mode ParseHtpPerformanceMode(htp_perf_mode, htp_performance_mode); } std::string rpc_latency = ""; uint32_t rpc_control_latency = 0; - if (run_options.config_options.TryGetConfigEntry(kOrtRunOptionsConfigQnnRpcControlLatency, rpc_latency)) { + if (TryGetConfigEntry(config_options, kOrtRunOptionsConfigQnnRpcControlLatency, rpc_latency)) { rpc_control_latency = static_cast(std::stoul(rpc_latency)); LOGS_DEFAULT(VERBOSE) << "rpc_control_latency: " << rpc_control_latency; } @@ -1163,9 +1179,11 @@ Status QNNExecutionProvider::OnRunEnd(bool /*sync_stream*/, const onnxruntime::R return Status::OK(); } + const ConfigOptions& config_options = run_options.GetConfigOptions(); + std::string htp_perf_mode = ""; qnn::HtpPerformanceMode htp_performance_mode = qnn::HtpPerformanceMode::kHtpDefault; - if (run_options.config_options.TryGetConfigEntry(kOrtRunOptionsConfigQnnPerfModePostRun, htp_perf_mode)) { + if (TryGetConfigEntry(config_options, kOrtRunOptionsConfigQnnPerfModePostRun, htp_perf_mode)) { // set power mode ParseHtpPerformanceMode(htp_perf_mode, htp_performance_mode); } diff --git a/onnxruntime/core/providers/qnn/qnn_provider_factory.cc b/onnxruntime/core/providers/qnn/qnn_provider_factory.cc index fdeb9dc106386..2407a7c83bfeb 100644 --- a/onnxruntime/core/providers/qnn/qnn_provider_factory.cc +++ b/onnxruntime/core/providers/qnn/qnn_provider_factory.cc @@ -22,19 +22,8 @@ struct QNNProviderFactory : IExecutionProviderFactory { const ConfigOptions* config_options_; }; -// TODO: Move to core/session/provider_bridge_ort.cc -std::shared_ptr QNNProviderFactoryCreator::Create(const ProviderOptions& provider_options_map, - const SessionOptions* session_options) { - const ConfigOptions* config_options = nullptr; - if (session_options != nullptr) { - config_options = &session_options->config_options; - } - - return std::make_shared(provider_options_map, config_options); -} - -struct QNN_Provider /*: Provider*/ { - std::shared_ptr CreateExecutionProviderFactory(const void* param) /*override*/ { +struct QNN_Provider : Provider { + std::shared_ptr CreateExecutionProviderFactory(const void* param) override { if (param == nullptr) { LOGS_DEFAULT(ERROR) << "[QNN EP] Passed NULL options to CreateExecutionProviderFactory()"; return nullptr; @@ -52,18 +41,15 @@ struct QNN_Provider /*: Provider*/ { return std::make_shared(*provider_options, config_options); } - void Initialize() /*override*/ {} - void Shutdown() /*override*/ {} + void Initialize() override {} + void Shutdown() override {} } g_provider; } // namespace onnxruntime -// TODO: Uncomment when it is an EP dll -#if 0 extern "C" { ORT_API(onnxruntime::Provider*, GetProvider) { return &onnxruntime::g_provider; } } -#endif diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index ceb654931ae61..dc28848b2bab0 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -385,6 +385,14 @@ CreateSupportedPartitions(const GraphViewer& graph_viewer, execution_provider_name, execution_provider_type, node_unit_map, drop_constant_initializers); } +inline std::unique_ptr MakeComputeCapability(const GraphViewer& graph_viewer, + const std::vector& group, + const std::function& generate_metadef_name, + const std::string& execution_provider_name, + bool drop_constant_initializers) { + return g_host->Utils__MakeComputeCapability(graph_viewer, group, generate_metadef_name, + execution_provider_name, drop_constant_initializers); +} } // namespace utils namespace QDQ { diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc index aa8c367d25d51..456e164917587 100644 --- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc +++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc @@ -505,6 +505,9 @@ Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& tensor, const st /*out*/ std::vector& unpacked_tensor) { return g_host->UnpackInitializerData(tensor, model_path, unpacked_tensor); } +Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& tensor, /*out*/ std::vector& unpacked_tensor) { + return g_host->UnpackInitializerData(tensor, std::filesystem::path(), unpacked_tensor); +} } // namespace utils diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 96cd72c91b0e6..03dc05a539d61 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -352,6 +352,7 @@ struct ProviderHost { // TypeProto virtual std::unique_ptr TypeProto__construct() = 0; virtual void TypeProto__CopyFrom(ONNX_NAMESPACE::TypeProto* p, const ONNX_NAMESPACE::TypeProto* other) = 0; + virtual bool TypeProto__has_tensor_type(const ONNX_NAMESPACE::TypeProto* p) = 0; virtual const ONNX_NAMESPACE::TypeProto_Tensor& TypeProto__tensor_type(const ONNX_NAMESPACE::TypeProto* p) = 0; virtual ONNX_NAMESPACE::TypeProto_Tensor* TypeProto__mutable_tensor_type(ONNX_NAMESPACE::TypeProto* p) = 0; @@ -933,6 +934,12 @@ struct ProviderHost { const std::unordered_map* node_unit_map, bool drop_constant_initializers) = 0; + virtual std::unique_ptr + Utils__MakeComputeCapability(const GraphViewer& graph_viewer, + const std::vector& group, + const std::function& generate_metadef_name, + const std::string& execution_provider_name, + bool drop_constant_initializers) = 0; // Model virtual std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path, const IOnnxRuntimeOpSchemaRegistryList* local_registries, diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index 03a0b4ea99524..c92bae856b514 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -345,6 +345,7 @@ struct TypeProto_Sequence final { struct TypeProto final { static std::unique_ptr Create() { return g_host->TypeProto__construct(); } + bool has_tensor_type() const { return g_host->TypeProto__has_tensor_type(this); } const TypeProto_Tensor& tensor_type() const { return g_host->TypeProto__tensor_type(this); } TypeProto_Tensor* mutable_tensor_type() { return g_host->TypeProto__mutable_tensor_type(this); } diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index b18d4c6ed23ef..c63a06fb34f9d 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -92,6 +92,7 @@ using Node_EdgeEnd = Node::EdgeEnd; #include "core/providers/openvino/openvino_provider_factory_creator.h" #include "core/providers/tensorrt/tensorrt_provider_factory_creator.h" #include "core/providers/vitisai/vitisai_provider_factory_creator.h" +#include "core/providers/qnn/qnn_provider_factory_creator.h" #include "core/providers/cuda/cuda_provider_factory.h" #include "core/providers/cann/cann_provider_factory.h" @@ -466,6 +467,7 @@ struct ProviderHostImpl : ProviderHost { // TypeProto (wrapped) std::unique_ptr TypeProto__construct() override { return std::make_unique(); } void TypeProto__CopyFrom(ONNX_NAMESPACE::TypeProto* p, const ONNX_NAMESPACE::TypeProto* other) override { p->CopyFrom(*other); } + bool TypeProto__has_tensor_type(const ONNX_NAMESPACE::TypeProto* p) override { return p->has_tensor_type(); } const ONNX_NAMESPACE::TypeProto_Tensor& TypeProto__tensor_type(const ONNX_NAMESPACE::TypeProto* p) override { return p->tensor_type(); } ONNX_NAMESPACE::TypeProto_Tensor* TypeProto__mutable_tensor_type(ONNX_NAMESPACE::TypeProto* p) override { return p->mutable_tensor_type(); } int TypeProto__value_case(const ONNX_NAMESPACE::TypeProto* p) override { return p->value_case(); } @@ -1138,6 +1140,16 @@ struct ProviderHostImpl : ProviderHost { drop_constant_initializers); } + std::unique_ptr + Utils__MakeComputeCapability(const GraphViewer& graph_viewer, + const std::vector& group, + const std::function& generate_metadef_name, + const std::string& execution_provider_name, + bool drop_constant_initializers) override { + return onnxruntime::utils::MakeComputeCapability(graph_viewer, group, generate_metadef_name, + execution_provider_name, drop_constant_initializers); + } + // Model (wrapped) std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path, const IOnnxRuntimeOpSchemaRegistryList* local_registries, @@ -1925,6 +1937,18 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O return ov_options_converted_map; } +std::shared_ptr QNNProviderFactoryCreator::Create(const ProviderOptions& provider_options_map, + const SessionOptions* session_options) { + const ConfigOptions* config_options = nullptr; + if (session_options != nullptr) { + config_options = &session_options->config_options; + } + + std::array configs_array = {&provider_options_map, config_options}; + const void* arg = reinterpret_cast(&configs_array); + return s_library_qnn.Get().CreateExecutionProviderFactory(arg); +} + std::shared_ptr OpenVINOProviderFactoryCreator::Create( const ProviderOptions* provider_options_map, const SessionOptions* session_options) { // Append session options applicable for EP to EP Provider options. From 693dd335f363e15f4605b7a527436f9e36e3e72e Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 15 Dec 2024 22:12:03 -0800 Subject: [PATCH 30/64] Compiles but does not link (until update cmake to build as shared lib) --- .../qnn/builder/onnx_ctx_model_helper.cc | 18 +++++------ .../builder/opbuilder/reduce_op_builder.cc | 6 ++-- .../qnn/builder/qnn_backend_manager.cc | 16 +++++++++- .../qnn_node_group/conv_activation_fusion.cc | 24 ++++++++------ .../providers/qnn/qnn_execution_provider.cc | 31 ++----------------- .../providers/qnn/qnn_execution_provider.h | 3 -- .../shared_library/provider_interfaces.h | 5 +++ .../shared_library/provider_wrappedtypes.h | 10 ++++++ .../core/session/provider_bridge_ort.cc | 13 ++++++++ 9 files changed, 71 insertions(+), 55 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc index 9fe1ff3da381a..36bccfe7effce 100644 --- a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc @@ -53,9 +53,9 @@ Status GetMainContextNode(const std::vectorOpType(), "Should only filter in the EPContext node."); - utils::NodeAttrHelper node_helper(*ep_context_node); + const Node& ep_context_node = *graph_viewer.Nodes().begin(); + ORT_RETURN_IF_NOT(EPCONTEXT_OP == ep_context_node.OpType(), "Should only filter in the EPContext node."); + utils::NodeAttrHelper node_helper(ep_context_node); int64_t is_main_context = node_helper.Get(MAIN_CONTEXT, static_cast(0)); if (1 == is_main_context) { main_context_pos.push_back(static_cast(i)); @@ -75,12 +75,12 @@ Status CreateNodeArgs(const std::vector& names, std::string name = names[i]; ORT_RETURN_IF(tensor_info_table.find(name) == tensor_info_table.end(), "Tensor name: ", name, " not found in tensor_info_table"); const OnnxTensorInfo& tensor_info = tensor_info_table.at(name); - TypeProto tensor_type; - tensor_type.mutable_tensor_type()->set_elem_type(tensor_info.data_type_); + std::unique_ptr tensor_type = TypeProto::Create(); + tensor_type->mutable_tensor_type()->set_elem_type(tensor_info.data_type_); for (size_t j = 0; j < tensor_info.shape_.size(); ++j) { - tensor_type.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(tensor_info.shape_[j]); + tensor_type->mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(tensor_info.shape_[j]); } - auto& input_arg = graph.GetOrCreateNodeArg(name, &tensor_type); + auto& input_arg = graph.GetOrCreateNodeArg(name, tensor_type.get()); node_args.push_back(&input_arg); } return Status::OK(); @@ -163,8 +163,8 @@ Status TryGetMaxSpillFillSize(const std::vector(0)); if (max_size > max_spill_fill_size) { max_spill_fill_size = max_size; diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc index a74fcfd843551..2ad6d3741d0ba 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc @@ -67,7 +67,7 @@ class ReduceOpBuilder : public BaseOpBuilder { using AxesQnnIntType = uint32_t; Status GetAxesSet(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, - InlinedHashSet& axes_set) const; + std::set& axes_set) const; // Maps an operator type to the opset in which "axes" became an input instead of an attribute. static const std::array opset_with_axes_as_input; @@ -83,7 +83,7 @@ const std::array ReduceOpBuilder::opset_with_axes_as_ }; Status ReduceOpBuilder::GetAxesSet(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, - InlinedHashSet& axes_set) const { + std::set& axes_set) const { ReduceOpType reduce_op_type = GetReduceOpType(node_unit.OpType()); if (reduce_op_type == ReduceOpType::REDUCE_OP_TYPE_UNKNOWN) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN EP: Unknown reduce operator ", node_unit.OpType()); @@ -211,7 +211,7 @@ Status ReduceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w // // Handle axes param. // - InlinedHashSet axes_set; + std::set axes_set; ORT_RETURN_IF_ERROR(GetAxesSet(qnn_model_wrapper, node_unit, axes_set)); const size_t num_axes = axes_set.size(); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index 5a9abe43fec72..dde5738731986 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -251,7 +251,7 @@ void QnnLogging(const char* format, ::onnxruntime::logging::Category::onnxruntime, data_type, ORT_WHERE); - log_capture.ProcessPrintf(format, argument_parameter); + log_capture->ProcessPrintf(format, argument_parameter); } } @@ -1098,6 +1098,8 @@ Status QnnBackendManager::ExtractBackendProfilingInfo() { } bool tracelogging_provider_ep_enabled = false; + // TODO: Re-enable when QNN EP is a dll +#if 0 const Env& env = Env::Default(); // const Env& env = GetDefaultEnv(); auto& provider = env.GetTelemetryProvider(); @@ -1108,6 +1110,7 @@ Status QnnBackendManager::ExtractBackendProfilingInfo() { tracelogging_provider_ep_enabled = true; } } +#endif // ETW disabled previously, but enabled now if (ProfilingLevel::INVALID == profiling_level_etw_ && tracelogging_provider_ep_enabled) { @@ -1325,6 +1328,8 @@ void QnnBackendManager::LogQnnProfileEventAsTraceLogging( const std::string& timingSource, const std::string& eventLevel, const char* eventIdentifier) { + // TODO: Re-enable when QNN EP is a dll +#if 0 TraceLoggingWrite( telemetry_provider_handle, "QNNProfilingEvent", @@ -1337,6 +1342,15 @@ void QnnBackendManager::LogQnnProfileEventAsTraceLogging( TraceLoggingString(timingSource.c_str(), "Timing Source"), TraceLoggingString(eventLevel.c_str(), "Event Level"), TraceLoggingString(eventIdentifier, "Event Identifier")); +#else + ORT_UNUSED_PARAMETER(timestamp); + ORT_UNUSED_PARAMETER(message); + ORT_UNUSED_PARAMETER(qnnScalarValue); + ORT_UNUSED_PARAMETER(unit); + ORT_UNUSED_PARAMETER(timingSource); + ORT_UNUSED_PARAMETER(eventLevel); + ORT_UNUSED_PARAMETER(eventIdentifier); +#endif } #endif diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.cc b/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.cc index 789811a423884..567dd5c1d6567 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/conv_activation_fusion.cc @@ -173,7 +173,7 @@ static bool CanActivationBeRemoved(const QnnModelWrapper& qnn_model_wrapper, static std::vector FindParentDQNodes(const GraphViewer& graph_viewer, const Node& node) { // Get all parent DQ nodes sorted by destination argument index. std::vector parents(node.InputDefs().size(), nullptr); - for (auto it = node.InputEdgesBegin(); it != node.InputEdgesEnd(); it++) { + for (auto it = node.InputEdgesBegin(); it != node.InputEdgesEnd(); ++it) { if (it->GetNode().OpType().compare(DEQUANTIZE_LINEAR) == 0) { parents[it->GetDstArgIndex()] = &(it->GetNode()); } @@ -317,7 +317,7 @@ static Status CreateOrValidateOnQnn(QnnModelWrapper& qnn_model_wrapper, std::optional axis; if (auto entry = dq_attrs.find("axis"); entry != dq_attrs.end()) { - axis = entry->second.i(); + axis = entry->second().i(); } // quantization scale and zp are always the input[1, 2] @@ -327,7 +327,8 @@ static Status CreateOrValidateOnQnn(QnnModelWrapper& qnn_model_wrapper, // Populate NodeUnit outputs and output edges std::vector outputs; - Node::EdgeSet output_edges; + std::vector> output_edges_holder; + std::vector output_edges; for (const Node* q_node : q_nodes) { const auto q_inputs = q_node->InputDefs(); const auto& q_attrs = q_node->GetAttributes(); @@ -335,7 +336,7 @@ static Status CreateOrValidateOnQnn(QnnModelWrapper& qnn_model_wrapper, std::optional axis; if (auto entry = q_attrs.find("axis"); entry != q_attrs.end()) { - axis = entry->second.i(); + axis = entry->second().i(); } // quantization scale and zp are always the input[1, 2] @@ -346,22 +347,25 @@ static Status CreateOrValidateOnQnn(QnnModelWrapper& qnn_model_wrapper, auto q_cur_edge = q_node->OutputEdgesBegin(); auto q_end_edge = q_node->OutputEdgesEnd(); for (; q_cur_edge != q_end_edge; ++q_cur_edge) { - output_edges.insert(Node::EdgeEnd{q_cur_edge->GetNode(), 0, q_cur_edge->GetDstArgIndex()}); + auto output_edge = Node_EdgeEnd::Create(q_cur_edge->GetNode(), 0, q_cur_edge->GetDstArgIndex()); + output_edges.push_back(output_edge.get()); + output_edges_holder.push_back(std::move(output_edge)); } } - NodeUnit custom_node_unit(dq_nodes, target_node, q_nodes, NodeUnit::Type::QDQGroup, - inputs, outputs, num_dqs, output_edges); - const auto* conv_op_builder = qnn::GetOpBuilder(custom_node_unit.OpType()); + std::unique_ptr custom_node_unit = NodeUnit::Create(dq_nodes, target_node, + q_nodes, NodeUnit::Type::QDQGroup, + inputs, outputs, num_dqs, output_edges); + const auto* conv_op_builder = qnn::GetOpBuilder(custom_node_unit->OpType()); if (conv_op_builder == nullptr) { return Status::OK(); } if (validate) { - return conv_op_builder->IsOpSupported(qnn_model_wrapper, custom_node_unit, logger); + return conv_op_builder->IsOpSupported(qnn_model_wrapper, *custom_node_unit, logger); } - return conv_op_builder->AddToModelBuilder(qnn_model_wrapper, custom_node_unit, logger, validate); + return conv_op_builder->AddToModelBuilder(qnn_model_wrapper, *custom_node_unit, logger, validate); } // Traverses graph to check if the given NodeUnit is part of a valid DQ* -> Conv -> Relu/Clip -> Q sequence. diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index c8ec3098b7ce0..cfafc468e8be6 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -25,32 +25,6 @@ namespace onnxruntime { constexpr const char* QNN = "QNN"; -static std::unique_ptr>> s_run_on_unload_; - -// TODO: Remove and use versions in EP provider bridge. -void RunOnUnload(std::function function) { - static std::mutex mutex; - std::lock_guard guard(mutex); - if (!s_run_on_unload_) { - s_run_on_unload_ = std::make_unique>>(); - } - s_run_on_unload_->push_back(std::move(function)); -} - -// TODO: Remove and use versions in EP provider bridge. -struct OnUnload { - ~OnUnload() { - if (!s_run_on_unload_) - return; - - for (auto& function : *s_run_on_unload_) - function(); - - s_run_on_unload_.reset(); - } - -} g_on_unload; - static void ParseProfilingLevel(std::string profiling_level_string, qnn::ProfilingLevel& profiling_level) { std::transform(profiling_level_string.begin(), @@ -189,8 +163,7 @@ qnn::ProfilingLevel QNNExecutionProvider::GetProfilingLevelFromETWLevel(unsigned QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_options_map, const ConfigOptions* config_options) : IExecutionProvider{onnxruntime::kQnnExecutionProvider} { - // TODO: Uncomment when QNN EP is built as a DLL - // InitProviderOrtApi(); + InitProviderOrtApi(); if (config_options) { disable_cpu_ep_fallback_ = config_options->GetConfigOrDefault( @@ -1015,7 +988,7 @@ Status QNNExecutionProvider::Compile(const std::vector& fused buffer_size, max_spill_fill_buffer_size)); } - qnn_ep_context_model_ = std::make_unique("qnn_ep_context_model", false, logger); + qnn_ep_context_model_ = Model::Create("qnn_ep_context_model", false, logger); ORT_RETURN_IF_ERROR(qnn::CreateEPContextNodes(qnn_ep_context_model_.get(), context_buffer.get(), buffer_size, diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.h b/onnxruntime/core/providers/qnn/qnn_execution_provider.h index b390988f39da4..4324b3ddfef78 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.h +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.h @@ -22,9 +22,6 @@ namespace onnxruntime { -// TODO: Remove. It's in provider bridge. -void RunOnUnload(std::function function); - class SharedContext { public: static SharedContext& GetInstance() { diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 03dc05a539d61..41ad441db616e 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -289,6 +289,7 @@ struct ProviderHost { virtual std::unique_ptr logging__Capture__construct(const logging::Logger& logger, logging::Severity severity, const char* category, logging::DataType dataType, const CodeLocation& location) = 0; virtual void logging__Capture__operator_delete(logging::Capture* p) noexcept = 0; virtual std::ostream& logging__Capture__Stream(logging::Capture* p) noexcept = 0; + virtual void logging__Capture__ProcessPrintf(logging::Capture* p, const char* format, va_list args) = 0; // Env virtual Env& Env__Default() = 0; @@ -835,6 +836,8 @@ struct ProviderHost { virtual const NodeAttributes& Node__GetAttributes(const Node* p) noexcept = 0; virtual void Node__AddAttribute(Node* p, const ::std::string& attr_name, const ONNX_NAMESPACE::GraphProto& value) = 0; + virtual void Node__AddAttribute(Node* p, const ::std::string& attr_name, const std::string& value) = 0; + virtual void Node__AddAttribute(Node* p, const ::std::string& attr_name, int64_t value) = 0; virtual size_t Node__GetInputEdgesCount(const Node* p) noexcept = 0; virtual size_t Node__GetOutputEdgesCount(const Node* p) noexcept = 0; @@ -944,6 +947,8 @@ struct ProviderHost { virtual std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path, const IOnnxRuntimeOpSchemaRegistryList* local_registries, const logging::Logger& logger) = 0; + virtual std::unique_ptr Model__construct(const std::string& graph_name, bool is_onnx_domain_only, + const logging::Logger& logger) = 0; virtual void Model__operator_delete(Model* p) = 0; virtual Graph& Model__MainGraph(Model* p) = 0; virtual std::unique_ptr Model__ToProto(Model* p) = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index c92bae856b514..3b5e5039796f7 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -42,6 +42,7 @@ struct Capture final { static void operator delete(void* p) { g_host->logging__Capture__operator_delete(reinterpret_cast(p)); } std::ostream& Stream() noexcept { return g_host->logging__Capture__Stream(this); } + void ProcessPrintf(const char* format, va_list args) { g_host->logging__Capture__ProcessPrintf(this, format, args); } Capture() = delete; Capture(const Capture&) = delete; @@ -824,6 +825,12 @@ struct Node final { void AddAttribute(const ::std::string& attr_name, const ONNX_NAMESPACE::GraphProto& value) { g_host->Node__AddAttribute(this, attr_name, value); } + void AddAttribute(const std::string& attr_name, const std::string& value) { + g_host->Node__AddAttribute(this, attr_name, value); + } + void AddAttribute(const std::string& attr_name, int64_t value) { + g_host->Node__AddAttribute(this, attr_name, value); + } size_t GetInputEdgesCount() const noexcept { return g_host->Node__GetInputEdgesCount(this); } size_t GetOutputEdgesCount() const noexcept { return g_host->Node__GetOutputEdgesCount(this); } @@ -976,6 +983,9 @@ struct Model final { const IOnnxRuntimeOpSchemaRegistryList* local_registries, const logging::Logger& logger) { return g_host->Model__construct(std::move(model_proto), model_path, local_registries, logger); } + static std::unique_ptr Create(const std::string& graph_name, bool is_onnx_domain_only, const logging::Logger& logger) { + return g_host->Model__construct(graph_name, is_onnx_domain_only, logger); + } static void operator delete(void* p) { g_host->Model__operator_delete(reinterpret_cast(p)); } static Status Load(const PathString& file_path, /*out*/ ONNX_NAMESPACE::ModelProto& model_proto) { return g_host->Model__Load(file_path, model_proto); } diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index c63a06fb34f9d..2ad85552aa813 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -396,6 +396,9 @@ struct ProviderHostImpl : ProviderHost { } void logging__Capture__operator_delete(logging::Capture* p) noexcept override { delete p; } std::ostream& logging__Capture__Stream(logging::Capture* p) noexcept override { return p->Stream(); } + void logging__Capture__ProcessPrintf(logging::Capture* p, const char* format, va_list args) override { + p->ProcessPrintf(format, args); + } // Env Env& Env__Default() override { return Env::Default(); } @@ -988,6 +991,12 @@ struct ProviderHostImpl : ProviderHost { void Node__AddAttribute(Node* p, const ::std::string& attr_name, const ONNX_NAMESPACE::GraphProto& value) override { p->AddAttribute(attr_name, value); } + void Node__AddAttribute(Node* p, const ::std::string& attr_name, const std::string& value) override { + p->AddAttribute(attr_name, value); + } + void Node__AddAttribute(Node* p, const ::std::string& attr_name, int64_t value) override { + p->AddAttribute(attr_name, value); + } size_t Node__GetInputEdgesCount(const Node* p) noexcept override { return p->GetInputEdgesCount(); } size_t Node__GetOutputEdgesCount(const Node* p) noexcept override { return p->GetOutputEdgesCount(); } @@ -1156,6 +1165,10 @@ struct ProviderHostImpl : ProviderHost { const logging::Logger& logger) override { return std::make_unique(model_proto, model_path, local_registries, logger); } + std::unique_ptr Model__construct(const std::string& graph_name, bool is_onnx_domain_only, + const logging::Logger& logger) override { + return std::make_unique(graph_name, is_onnx_domain_only, logger); + } void Model__operator_delete(Model* p) override { delete p; } Graph& Model__MainGraph(Model* p) override { return p->MainGraph(); } std::unique_ptr Model__ToProto(Model* p) override { return std::make_unique(p->ToProto()); } From ae2dbd28dc46d30741ab00e5533e7c4bc0f46737 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 15 Dec 2024 22:17:30 -0800 Subject: [PATCH 31/64] Use provider bridge function to get default Env --- .../core/providers/qnn/builder/qnn_backend_manager.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index dde5738731986..c816858018411 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -1100,8 +1100,7 @@ Status QnnBackendManager::ExtractBackendProfilingInfo() { bool tracelogging_provider_ep_enabled = false; // TODO: Re-enable when QNN EP is a dll #if 0 - const Env& env = Env::Default(); - // const Env& env = GetDefaultEnv(); + const Env& env = GetDefaultEnv(); auto& provider = env.GetTelemetryProvider(); auto level = provider.Level(); if (provider.IsEnabled()) { @@ -1507,8 +1506,7 @@ void* QnnBackendManager::LoadLib(const char* file_name, int flags, std::string& auto file_path = std::filesystem::path(file_name); if (!file_path.is_absolute()) { // construct an absolute path from ORT runtime path + file_name and check whether it exists. - const Env& env = Env::Default(); - // const Env& env = GetDefaultEnv(); + const Env& env = GetDefaultEnv(); auto pathstring = env.GetRuntimePath() + ToPathString(file_name); auto absolute_path = pathstring.c_str(); if (std::filesystem::exists(std::filesystem::path(absolute_path))) { From db5f0ec1f664c9494d0fce8391484f8d54bec0e8 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 16 Dec 2024 00:45:12 -0800 Subject: [PATCH 32/64] It works! QNN EP is a shared library and all QNN unit tests pass on Windows ARM64. [disabled ETW code for now] --- cmake/onnxruntime.cmake | 1 - cmake/onnxruntime_providers.cmake | 3 - cmake/onnxruntime_providers_qnn.cmake | 59 ++++++++++++------- cmake/onnxruntime_python.cmake | 8 ++- cmake/onnxruntime_unittests.cmake | 6 +- .../core/providers/qnn/builder/qnn_utils.cc | 10 ++-- onnxruntime/core/providers/qnn/ort_api.h | 6 +- .../providers/qnn/qnn_execution_provider.cc | 3 +- .../providers/qnn/qnn_execution_provider.h | 2 +- 9 files changed, 62 insertions(+), 36 deletions(-) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 732c0511d400f..3b76aff829be2 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -208,7 +208,6 @@ set(onnxruntime_INTERNAL_LIBRARIES ${PROVIDERS_COREML} ${PROVIDERS_DML} ${PROVIDERS_NNAPI} - ${PROVIDERS_QNN} ${PROVIDERS_SNPE} ${PROVIDERS_RKNPU} ${PROVIDERS_VSINPU} diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 582491de9503d..67fa48b28278d 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -74,9 +74,6 @@ endif() if(onnxruntime_USE_JSEP) set(PROVIDERS_JS onnxruntime_providers_js) endif() -if(onnxruntime_USE_QNN) - set(PROVIDERS_QNN onnxruntime_providers_qnn) -endif() if(onnxruntime_USE_RKNPU) set(PROVIDERS_RKNPU onnxruntime_providers_rknpu) endif() diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake index 52ccdbf7c9ecc..2a6c63ee01149 100644 --- a/cmake/onnxruntime_providers_qnn.cmake +++ b/cmake/onnxruntime_providers_qnn.cmake @@ -4,32 +4,51 @@ add_compile_definitions(USE_QNN=1) file(GLOB_RECURSE - onnxruntime_providers_qnn_ep_cc_srcs CONFIGURE_DEPENDS + onnxruntime_providers_qnn_cc_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/providers/qnn/*.h" "${ONNXRUNTIME_ROOT}/core/providers/qnn/*.cc" - ) - - file(GLOB_RECURSE - onnxruntime_providers_qnn_builder_cc_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/providers/qnn/builder/*.h" "${ONNXRUNTIME_ROOT}/core/providers/qnn/builder/*.cc" - ) - - set(onnxruntime_providers_qnn_cc_srcs - ${onnxruntime_providers_qnn_ep_cc_srcs} - ${onnxruntime_providers_qnn_builder_cc_srcs} + "${ONNXRUNTIME_ROOT}/core/providers/qnn/builder/qnn_node_group/*.h" + "${ONNXRUNTIME_ROOT}/core/providers/qnn/builder/qnn_node_group/*.cc" + "${ONNXRUNTIME_ROOT}/core/providers/qnn/builder/opbuilder/*.h" + "${ONNXRUNTIME_ROOT}/core/providers/qnn/builder/opbuilder/*.cc" + "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h" + "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc" ) source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_qnn_cc_srcs}) - onnxruntime_add_static_library(onnxruntime_providers_qnn ${onnxruntime_providers_qnn_cc_srcs}) - onnxruntime_add_include_to_target(onnxruntime_providers_qnn onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf-lite flatbuffers::flatbuffers Boost::mp11) - target_link_libraries(onnxruntime_providers_qnn) - add_dependencies(onnxruntime_providers_qnn onnx ${onnxruntime_EXTERNAL_DEPENDENCIES}) - set_target_properties(onnxruntime_providers_qnn PROPERTIES CXX_STANDARD_REQUIRED ON) - set_target_properties(onnxruntime_providers_qnn PROPERTIES FOLDER "ONNXRuntime") - target_include_directories(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_ROOT} ${onnxruntime_QNN_HOME}/include/QNN ${onnxruntime_QNN_HOME}/include) - set_target_properties(onnxruntime_providers_qnn PROPERTIES LINKER_LANGUAGE CXX) - # ignore the warning unknown-pragmas on "pragma region" - if(NOT MSVC) + onnxruntime_add_shared_library_module(onnxruntime_providers_qnn ${onnxruntime_providers_qnn_cc_srcs}) + onnxruntime_add_include_to_target(onnxruntime_providers_qnn ${ONNXRUNTIME_PROVIDERS_SHARED} ${GSL_TARGET} onnx onnxruntime_common safeint_interface) + target_link_libraries(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_PROVIDERS_SHARED} ${ABSEIL_LIBS}) + add_dependencies(onnxruntime_providers_qnn ${onnxruntime_EXTERNAL_DEPENDENCIES}) + target_include_directories(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_ROOT} + ${CMAKE_CURRENT_BINARY_DIR} + ${onnxruntime_QNN_HOME}/include/QNN + ${onnxruntime_QNN_HOME}/include) + + # Set linker flags for function(s) exported by EP DLL + if(UNIX) + set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds -Xlinker --gc-sections") + elseif(WIN32) + set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/qnn/symbols.def") + else() + message(FATAL_ERROR "onnxruntime_providers_qnn unknown platform, need to specify shared library exports for it") + endif() + + # Set compile options + if(MSVC) + target_compile_options(onnxruntime_providers_qnn PUBLIC /wd4099 /wd4005) + else() + # ignore the warning unknown-pragmas on "pragma region" target_compile_options(onnxruntime_providers_qnn PRIVATE "-Wno-unknown-pragmas") endif() + + set_target_properties(onnxruntime_providers_qnn PROPERTIES LINKER_LANGUAGE CXX) + set_target_properties(onnxruntime_providers_qnn PROPERTIES CXX_STANDARD_REQUIRED ON) + set_target_properties(onnxruntime_providers_qnn PROPERTIES FOLDER "ONNXRuntime") + + install(TARGETS onnxruntime_providers_qnn + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index 5a87252b08573..776c866efbb12 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -180,7 +180,6 @@ target_link_libraries(onnxruntime_pybind11_state PRIVATE ${PROVIDERS_XNNPACK} ${PROVIDERS_WEBGPU} ${PROVIDERS_AZURE} - ${PROVIDERS_QNN} onnxruntime_optimizer onnxruntime_providers onnxruntime_util @@ -997,6 +996,13 @@ if (onnxruntime_USE_COREML) endif() if (onnxruntime_USE_QNN) + add_custom_command( + TARGET onnxruntime_pybind11_state POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + $ + $ + $/onnxruntime/capi/ + ) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 306096db128a7..c19a18ef15089 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -628,12 +628,11 @@ set(ONNXRUNTIME_TEST_LIBS onnxruntime_session ${ONNXRUNTIME_INTEROP_TEST_LIBS} ${onnxruntime_libs} - # CUDA, ROCM, TENSORRT, MIGRAPHX, DNNL, and OpenVINO are dynamically loaded at runtime + # CUDA, ROCM, TENSORRT, MIGRAPHX, DNNL, OpenVINO, and QNN are dynamically loaded at runtime ${PROVIDERS_NNAPI} ${PROVIDERS_VSINPU} ${PROVIDERS_JS} ${PROVIDERS_WEBGPU} - ${PROVIDERS_QNN} ${PROVIDERS_SNPE} ${PROVIDERS_RKNPU} ${PROVIDERS_DML} @@ -704,8 +703,7 @@ endif() if(onnxruntime_USE_QNN AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD) list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/qnn/*) list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_qnn) - list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_qnn) - list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_qnn) + list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_qnn onnxruntime_providers_shared) endif() if(onnxruntime_USE_SNPE) diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc index 50a151292d9c7..74cdf2ad17f42 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc @@ -902,21 +902,23 @@ static bool GetClipMinMaxImpl(const GraphViewer& graph_viewer, const Node& node, switch (input_type) { case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { - auto status = onnxruntime::utils::UnpackTensor(*initializer, graph_viewer.ModelPath(), &value, 1); + std::vector bytes(sizeof(float)); + auto status = onnxruntime::utils::UnpackInitializerData(*initializer, graph_viewer.ModelPath(), bytes); if (!status.IsOK()) { LOGS(logger, ERROR) << "GetClipMinMax() failed to unpack float initializer: " << status.ErrorMessage(); return false; } + value = *reinterpret_cast(bytes.data()); break; } case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: { - MLFloat16 f16_val{}; - auto status = onnxruntime::utils::UnpackTensor(*initializer, graph_viewer.ModelPath(), &f16_val, 1); + std::vector bytes(sizeof(MLFloat16)); + auto status = onnxruntime::utils::UnpackInitializerData(*initializer, graph_viewer.ModelPath(), bytes); if (!status.IsOK()) { LOGS(logger, ERROR) << "GetClipMinMax() failed to unpack float16 initializer: " << status.ErrorMessage(); return false; } - value = f16_val.ToFloat(); + value = reinterpret_cast(bytes.data())->ToFloat(); break; } default: diff --git a/onnxruntime/core/providers/qnn/ort_api.h b/onnxruntime/core/providers/qnn/ort_api.h index 6fb346d2349a0..44face145fb04 100644 --- a/onnxruntime/core/providers/qnn/ort_api.h +++ b/onnxruntime/core/providers/qnn/ort_api.h @@ -6,6 +6,7 @@ #define BUILD_QNN_EP_STATIC 0 #if BUILD_QNN_EP_STATIC +// Includes when building QNN EP statically #include "onnx/defs/data_type_utils.h" #include "core/common/common.h" #include "core/common/status.h" @@ -32,10 +33,13 @@ #include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h" #include "core/providers/common.h" #include "core/providers/partitioning_utils.h" +#include "core/session/onnxruntime_cxx_api.h" #else +// Includes when building QNN EP as a shared library #include "core/providers/shared_library/provider_api.h" +#define ORT_API_MANUAL_INIT +#include "core/session/onnxruntime_cxx_api.h" #endif #include "core/session/onnxruntime_session_options_config_keys.h" #include "core/session/onnxruntime_run_options_config_keys.h" -#include "core/session/onnxruntime_cxx_api.h" diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index cfafc468e8be6..99de2089cea13 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -164,6 +164,7 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio const ConfigOptions* config_options) : IExecutionProvider{onnxruntime::kQnnExecutionProvider} { InitProviderOrtApi(); + metadef_id_generator_ = ModelMetadefIdGenerator::Create(); if (config_options) { disable_cpu_ep_fallback_ = config_options->GetConfigOrDefault( @@ -654,7 +655,7 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer const auto gen_metadef_name = [&]() { uint64_t model_hash; - int metadef_id = metadef_id_generator_.GenerateId(graph_viewer, model_hash); + int metadef_id = metadef_id_generator_->GenerateId(graph_viewer, model_hash); return MakeString(QNN, context_node_name_prefix_, "_", model_hash, "_", metadef_id); }; diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.h b/onnxruntime/core/providers/qnn/qnn_execution_provider.h index 4324b3ddfef78..168fadfa98e25 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.h +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.h @@ -142,7 +142,7 @@ class QNNExecutionProvider : public IExecutionProvider { bool qnn_context_embed_mode_ = true; int32_t vtcm_size_in_mb_ = 0; std::unique_ptr qnn_ep_context_model_; - ModelMetadefIdGenerator metadef_id_generator_; + std::unique_ptr metadef_id_generator_; uint32_t device_id_ = 0; qnn::HtpPerformanceMode default_htp_performance_mode_ = qnn::HtpPerformanceMode::kHtpDefault; uint32_t default_rpc_control_latency_ = 0; From ea2a141a63372b7d497c36c8f64b131cae846cac Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 16 Dec 2024 00:55:34 -0800 Subject: [PATCH 33/64] Add onnxruntime_providers_qnn.dll to nuget --- .../nuget/generate_nuspec_for_native_nuget.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/nuget/generate_nuspec_for_native_nuget.py b/tools/nuget/generate_nuspec_for_native_nuget.py index 11842f34ce45b..e19e0219e7d5f 100644 --- a/tools/nuget/generate_nuspec_for_native_nuget.py +++ b/tools/nuget/generate_nuspec_for_native_nuget.py @@ -382,6 +382,7 @@ def generate_files(line_list, args): "tensorrt_ep_shared_lib": "onnxruntime_providers_tensorrt.dll", "openvino_ep_shared_lib": "onnxruntime_providers_openvino.dll", "cuda_ep_shared_lib": "onnxruntime_providers_cuda.dll", + "qnn_ep_shared_lib": "onnxruntime_providers_qnn.dll", "onnxruntime_perf_test": "onnxruntime_perf_test.exe", "onnx_test_runner": "onnx_test_runner.exe", } @@ -777,6 +778,24 @@ def generate_files(line_list, args): + '\\native" />' ) + if args.execution_provider == "qnn": + files_list.append( + "' + ) + files_list.append( + "' + ) + # process all other library dependencies if is_cpu_package or is_cuda_gpu_package or is_dml_package or is_mklml_package: # Process dnnl dependency From d820c9b587967312a5b85e9eaf0ba685844eb4c5 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 16 Dec 2024 01:13:57 -0800 Subject: [PATCH 34/64] Pass --build_shared_lib to some QNN pipelines. Include Boost::mp11. --- cmake/onnxruntime_providers_qnn.cmake | 2 +- .../android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml | 1 + tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml | 2 ++ .../github/azure-pipelines/win-qnn-arm64-ci-pipeline.yml | 2 ++ tools/ci_build/github/azure-pipelines/win-qnn-ci-pipeline.yml | 2 ++ 5 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake index 2a6c63ee01149..53aba28939a1a 100644 --- a/cmake/onnxruntime_providers_qnn.cmake +++ b/cmake/onnxruntime_providers_qnn.cmake @@ -19,7 +19,7 @@ source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_qnn_cc_srcs}) onnxruntime_add_shared_library_module(onnxruntime_providers_qnn ${onnxruntime_providers_qnn_cc_srcs}) - onnxruntime_add_include_to_target(onnxruntime_providers_qnn ${ONNXRUNTIME_PROVIDERS_SHARED} ${GSL_TARGET} onnx onnxruntime_common safeint_interface) + onnxruntime_add_include_to_target(onnxruntime_providers_qnn ${ONNXRUNTIME_PROVIDERS_SHARED} ${GSL_TARGET} onnx onnxruntime_common Boost::mp11 safeint_interface) target_link_libraries(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_PROVIDERS_SHARED} ${ABSEIL_LIBS}) add_dependencies(onnxruntime_providers_qnn ${onnxruntime_EXTERNAL_DEPENDENCIES}) target_include_directories(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_ROOT} diff --git a/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml index c3dbee336b69d..d080f68ca292f 100644 --- a/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml @@ -72,6 +72,7 @@ jobs: --android_abi=x86_64 \ --android_api=31 \ --parallel \ + --build_shared_lib \ --use_qnn \ --qnn_home $(QnnSDKRootDir) \ --cmake_generator=Ninja \ diff --git a/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml index d3826d90f9073..78bd2e20a4763 100644 --- a/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml @@ -65,6 +65,7 @@ jobs: --config Release \ --use_binskim_compliant_compile_flags \ --build_java \ + --build_shared_lib \ --use_qnn \ --qnn_home $(QnnSDKRootDir) \ --cmake_generator=Ninja \ @@ -77,6 +78,7 @@ jobs: --config Release \ --use_binskim_compliant_compile_flags \ --build_java \ + --build_shared_lib \ --use_qnn \ --qnn_home $(QnnSDKRootDir) \ --cmake_generator=Ninja \ diff --git a/tools/ci_build/github/azure-pipelines/win-qnn-arm64-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-qnn-arm64-ci-pipeline.yml index 5c013fae6be0b..826c43ebd9a15 100644 --- a/tools/ci_build/github/azure-pipelines/win-qnn-arm64-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-qnn-arm64-ci-pipeline.yml @@ -79,6 +79,7 @@ jobs: --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --cmake_generator "Visual Studio 17 2022" + --build_shared_lib --use_qnn --qnn_home $(QnnSDKRootDir) --update --build --parallel @@ -88,6 +89,7 @@ jobs: --config $(BuildConfig) ^ --build_dir $(Build.BinariesDirectory) ^ --cmake_generator "Visual Studio 17 2022" ^ + --build_shared_lib ^ --use_qnn ^ --qnn_home $(QnnSDKRootDir) ^ --test --enable_onnx_tests diff --git a/tools/ci_build/github/azure-pipelines/win-qnn-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-qnn-ci-pipeline.yml index 53700c58c7e7d..485c06fdbed04 100644 --- a/tools/ci_build/github/azure-pipelines/win-qnn-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-qnn-ci-pipeline.yml @@ -72,6 +72,7 @@ jobs: --build_dir $(Build.BinariesDirectory) --cmake_generator "Visual Studio 17 2022" --build_java + --build_shared_lib --use_qnn --qnn_home $(QnnSDKRootDir) --use_binskim_compliant_compile_flags @@ -87,6 +88,7 @@ jobs: --build_dir $(Build.BinariesDirectory) ^ --cmake_generator "Visual Studio 17 2022" ^ --build_java ^ + --build_shared_lib ^ --use_qnn ^ --qnn_home $(QnnSDKRootDir) ^ --use_binskim_compliant_compile_flags ^ From 9861ec8b70a9768a0fb87c8a6c5504621b58cfbe Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 16 Dec 2024 10:40:23 -0800 Subject: [PATCH 35/64] Copy qnn dll for java build --- cmake/onnxruntime_java.cmake | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cmake/onnxruntime_java.cmake b/cmake/onnxruntime_java.cmake index b15b9632e9e24..662f7cb949ece 100644 --- a/cmake/onnxruntime_java.cmake +++ b/cmake/onnxruntime_java.cmake @@ -148,7 +148,7 @@ if (WIN32) if(NOT onnxruntime_ENABLE_STATIC_ANALYSIS) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_JNI_DIR}/$) - if (onnxruntime_USE_CUDA OR onnxruntime_USE_DNNL OR onnxruntime_USE_OPENVINO OR onnxruntime_USE_TENSORRT) + if (onnxruntime_USE_CUDA OR onnxruntime_USE_DNNL OR onnxruntime_USE_OPENVINO OR onnxruntime_USE_TENSORRT OR onnxruntime_USE_QNN) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) endif() if (onnxruntime_USE_CUDA) @@ -163,11 +163,14 @@ if (WIN32) if (onnxruntime_USE_TENSORRT) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) endif() + if (onnxruntime_USE_QNN) + add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) + endif() endif() else() add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_JNI_DIR}/$) - if (onnxruntime_USE_CUDA OR onnxruntime_USE_DNNL OR onnxruntime_USE_OPENVINO OR onnxruntime_USE_TENSORRT) + if (onnxruntime_USE_CUDA OR onnxruntime_USE_DNNL OR onnxruntime_USE_OPENVINO OR onnxruntime_USE_TENSORRT OR onnxruntime_USE_QNN) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) endif() if (onnxruntime_USE_CUDA) @@ -182,6 +185,9 @@ else() if (onnxruntime_USE_TENSORRT) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) endif() + if (onnxruntime_USE_QNN) + add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) + endif() endif() # run the build process (this copies the results back into CMAKE_CURRENT_BINARY_DIR) From b92043dafb5f049a17e7b938ee5d701b14f2aac1 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Tue, 17 Dec 2024 09:51:21 -0800 Subject: [PATCH 36/64] Add linker -rpath=" --- cmake/onnxruntime_providers_qnn.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake index 53aba28939a1a..84a776dac0674 100644 --- a/cmake/onnxruntime_providers_qnn.cmake +++ b/cmake/onnxruntime_providers_qnn.cmake @@ -20,8 +20,8 @@ source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_qnn_cc_srcs}) onnxruntime_add_shared_library_module(onnxruntime_providers_qnn ${onnxruntime_providers_qnn_cc_srcs}) onnxruntime_add_include_to_target(onnxruntime_providers_qnn ${ONNXRUNTIME_PROVIDERS_SHARED} ${GSL_TARGET} onnx onnxruntime_common Boost::mp11 safeint_interface) - target_link_libraries(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_PROVIDERS_SHARED} ${ABSEIL_LIBS}) - add_dependencies(onnxruntime_providers_qnn ${onnxruntime_EXTERNAL_DEPENDENCIES}) + target_link_libraries(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_PROVIDERS_SHARED} ${ABSEIL_LIBS} ${CMAKE_DL_LIBS}) + add_dependencies(onnxruntime_providers_qnn onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES}) target_include_directories(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${onnxruntime_QNN_HOME}/include/QNN @@ -29,7 +29,7 @@ # Set linker flags for function(s) exported by EP DLL if(UNIX) - set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds -Xlinker --gc-sections") + set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds -Xlinker --gc-sections -Xlinker -rpath=\$ORIGIN") elseif(WIN32) set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/qnn/symbols.def") else() From f29ce59bcdf239e2577b35121e67d6896c1c7660 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Tue, 17 Dec 2024 10:31:22 -0800 Subject: [PATCH 37/64] two backslashes in rpath --- cmake/onnxruntime_providers_qnn.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake index 84a776dac0674..215367f2f040c 100644 --- a/cmake/onnxruntime_providers_qnn.cmake +++ b/cmake/onnxruntime_providers_qnn.cmake @@ -29,7 +29,7 @@ # Set linker flags for function(s) exported by EP DLL if(UNIX) - set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds -Xlinker --gc-sections -Xlinker -rpath=\$ORIGIN") + set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds -Xlinker --gc-sections -Xlinker -rpath=\\$ORIGIN") elseif(WIN32) set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/qnn/symbols.def") else() From 5d954d69ccdcdacf1a18756953b1f3f3575e2030 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Tue, 17 Dec 2024 11:24:58 -0800 Subject: [PATCH 38/64] Copy qnn ep dlls when running java unit tests --- cmake/onnxruntime_unittests.cmake | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index c19a18ef15089..b19e32e87443a 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1608,8 +1608,14 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # also copy other library dependencies that may be required by tests to native-test if(onnxruntime_USE_QNN) - add_custom_command(TARGET onnxruntime_providers_qnn POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${QNN_LIB_FILES} ${JAVA_NATIVE_TEST_DIR}) + add_custom_command( + TARGET onnxruntime_providers_qnn POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + $ + $ + ${QNN_LIB_FILES} + ${JAVA_NATIVE_TEST_DIR} + ) endif() # delegate to gradle's test runner From 03614bcfaaa23bd486699d4f5cc5fa1373a3fe34 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Tue, 17 Dec 2024 13:16:42 -0800 Subject: [PATCH 39/64] Modify Java bindings to use QNN shared lib --- cmake/onnxruntime_unittests.cmake | 10 ++-------- java/src/main/java/ai/onnxruntime/OnnxRuntime.java | 14 ++++++++++++++ java/src/main/java/ai/onnxruntime/OrtSession.java | 9 +++++++-- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index b19e32e87443a..bc7abc885b717 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1608,14 +1608,8 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # also copy other library dependencies that may be required by tests to native-test if(onnxruntime_USE_QNN) - add_custom_command( - TARGET onnxruntime_providers_qnn POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy - $ - $ - ${QNN_LIB_FILES} - ${JAVA_NATIVE_TEST_DIR} - ) + add_custom_command(TARGET onnxruntime_providers_qnn POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${QNN_LIB_FILES} ${JAVA_NATIVE_TEST_DIR}) endif() # delegate to gradle's test runner diff --git a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java index b80debdde47c4..ae498bee69353 100644 --- a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java +++ b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java @@ -76,6 +76,9 @@ final class OnnxRuntime { /** The short name of the ONNX runtime TensorRT provider library */ static final String ONNXRUNTIME_LIBRARY_TENSORRT_NAME = "onnxruntime_providers_tensorrt"; + /** The short name of the ONNX runtime QNN provider library */ + static final String ONNXRUNTIME_LIBRARY_QNN_NAME = "onnxruntime_providers_qnn"; + /** The OS & CPU architecture string */ private static final String OS_ARCH_STR = initOsArch(); @@ -252,6 +255,17 @@ static boolean extractTensorRT() { return extractProviderLibrary(ONNXRUNTIME_LIBRARY_TENSORRT_NAME); } + /** + * Extracts the QNN provider library from the classpath resources if present, or checks to + * see if the QNN provider library is in the directory specified by {@link + * #ONNXRUNTIME_NATIVE_PATH}. + * + * @return True if the QNN provider library is ready for loading, false otherwise. + */ + static boolean extractQNN() { + return extractProviderLibrary(ONNXRUNTIME_LIBRARY_QNN_NAME); + } + /** * Extracts a shared provider library from the classpath resources if present, or checks to see if * that library is in the directory specified by {@link #ONNXRUNTIME_NATIVE_PATH}. diff --git a/java/src/main/java/ai/onnxruntime/OrtSession.java b/java/src/main/java/ai/onnxruntime/OrtSession.java index 32dc9d9f84aaa..700008e66bb36 100644 --- a/java/src/main/java/ai/onnxruntime/OrtSession.java +++ b/java/src/main/java/ai/onnxruntime/OrtSession.java @@ -1319,8 +1319,13 @@ public void addXnnpack(Map providerOptions) throws OrtException * @throws OrtException If there was an error in native code. */ public void addQnn(Map providerOptions) throws OrtException { - String qnnProviderName = "QNN"; - addExecutionProvider(qnnProviderName, providerOptions); + if (OnnxRuntime.extractQNN()) { + String qnnProviderName = "QNN"; + addExecutionProvider(qnnProviderName, providerOptions); + } else { + throw new OrtException( + OrtException.OrtErrorCode.ORT_EP_FAIL, "Failed to find QNN shared provider"); + } } /** From 421f39f37676de257bc5eca42584ea3bad2f1a0c Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Tue, 17 Dec 2024 13:54:15 -0800 Subject: [PATCH 40/64] Try to build onnxruntime_providers_shared on Android --- cmake/onnxruntime_providers_cpu.cmake | 1 - cmake/onnxruntime_unittests.cmake | 2 +- java/src/main/java/ai/onnxruntime/OnnxRuntime.java | 5 ++--- tools/ci_build/github/android/build_aar_package.py | 10 ++++++++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/cmake/onnxruntime_providers_cpu.cmake b/cmake/onnxruntime_providers_cpu.cmake index 91a2b13002ec9..79e430763da93 100644 --- a/cmake/onnxruntime_providers_cpu.cmake +++ b/cmake/onnxruntime_providers_cpu.cmake @@ -215,7 +215,6 @@ set_target_properties(onnxruntime_providers PROPERTIES FOLDER "ONNXRuntime") if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin|iOS|visionOS" - AND NOT CMAKE_SYSTEM_NAME STREQUAL "Android" AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") file(GLOB onnxruntime_providers_shared_cc_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/providers/shared/*.h" diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index bc7abc885b717..c19a18ef15089 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1608,7 +1608,7 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # also copy other library dependencies that may be required by tests to native-test if(onnxruntime_USE_QNN) - add_custom_command(TARGET onnxruntime_providers_qnn POST_BUILD + add_custom_command(TARGET onnxruntime_providers_qnn POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${QNN_LIB_FILES} ${JAVA_NATIVE_TEST_DIR}) endif() diff --git a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java index ae498bee69353..01bf33f8d36e5 100644 --- a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java +++ b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java @@ -256,9 +256,8 @@ static boolean extractTensorRT() { } /** - * Extracts the QNN provider library from the classpath resources if present, or checks to - * see if the QNN provider library is in the directory specified by {@link - * #ONNXRUNTIME_NATIVE_PATH}. + * Extracts the QNN provider library from the classpath resources if present, or checks to see if + * the QNN provider library is in the directory specified by {@link #ONNXRUNTIME_NATIVE_PATH}. * * @return True if the QNN provider library is ready for loading, false otherwise. */ diff --git a/tools/ci_build/github/android/build_aar_package.py b/tools/ci_build/github/android/build_aar_package.py index 1b34b3d302e57..7294e0548e1b2 100644 --- a/tools/ci_build/github/android/build_aar_package.py +++ b/tools/ci_build/github/android/build_aar_package.py @@ -129,7 +129,10 @@ def _build_aar(args): # to jnilibs/[abi] for later compiling the aar package abi_jnilibs_dir = os.path.join(jnilibs_dir, abi) os.makedirs(abi_jnilibs_dir, exist_ok=True) - for lib_name in ["libonnxruntime.so", "libonnxruntime4j_jni.so"]: + sym_link_libs = ["libonnxruntime.so", "libonnxruntime4j_jni.so"] + if qnn_android_build: + sym_link_libs.extend(["libonnxruntime_providers_shared.so", "libonnxruntime_providers_qnn.so"]) + for lib_name in sym_link_libs: target_lib_name = os.path.join(abi_jnilibs_dir, lib_name) # If the symbolic already exists, delete it first # For some reason, os.path.exists will return false for a symbolic link in Linux, @@ -141,7 +144,10 @@ def _build_aar(args): # copy executables for each abi, in case we want to publish those as well # some of them might not exist, e.g., if we skip building the tests abi_exe_dir = os.path.join(exe_dir, abi) - for exe_name in ["libonnxruntime.so", "onnxruntime_perf_test", "onnx_test_runner"]: + execs_to_copy = ["libonnxruntime.so", "onnxruntime_perf_test", "onnx_test_runner"] + if qnn_android_build: + execs_to_copy.extend(["libonnxruntime_providers_shared.so", "libonnxruntime_providers_qnn.so"]) + for exe_name in execs_to_copy: src_exe_path = os.path.join(abi_build_dir, build_config, exe_name) if not os.path.exists(src_exe_path): continue From 9354f18dad37234043b3abe4d9be1c2fb010ca5c Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 18 Dec 2024 17:19:42 -0800 Subject: [PATCH 41/64] Pass linker flag to Android build of qnn dll --- cmake/onnxruntime_providers_qnn.cmake | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake index 215367f2f040c..847a677a4d316 100644 --- a/cmake/onnxruntime_providers_qnn.cmake +++ b/cmake/onnxruntime_providers_qnn.cmake @@ -29,7 +29,15 @@ # Set linker flags for function(s) exported by EP DLL if(UNIX) - set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds -Xlinker --gc-sections -Xlinker -rpath=\\$ORIGIN") + string(CONCAT ONNXRUNTIME_PROVIDERS_QNN_LINK_FLAGS + "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds " + "-Xlinker --gc-sections -Xlinker -rpath=\\$ORIGIN") + if(CMAKE_SYSTEM_NAME STREQUAL "Android") + string(CONCAT ONNXRUNTIME_PROVIDERS_QNN_LINK_FLAGS + "${ONNXRUNTIME_PROVIDERS_QNN_LINK_FLAGS} " + "-Xlinker -undefined=Provider_GetHost") + endif() + set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "${ONNXRUNTIME_PROVIDERS_QNN_LINK_FLAGS}") elseif(WIN32) set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/qnn/symbols.def") else() From e8df64fc3ae197c299a0797f58448d8315313752 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 18 Dec 2024 20:03:22 -0800 Subject: [PATCH 42/64] Try different linker flag for android --- cmake/onnxruntime_providers_qnn.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake index 847a677a4d316..6eb547927073b 100644 --- a/cmake/onnxruntime_providers_qnn.cmake +++ b/cmake/onnxruntime_providers_qnn.cmake @@ -35,7 +35,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Android") string(CONCAT ONNXRUNTIME_PROVIDERS_QNN_LINK_FLAGS "${ONNXRUNTIME_PROVIDERS_QNN_LINK_FLAGS} " - "-Xlinker -undefined=Provider_GetHost") + "-Xlinker --allow-shlib-undefined") # Allow undefined global symbols (e.g., Provider_GetHost) in shared library endif() set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "${ONNXRUNTIME_PROVIDERS_QNN_LINK_FLAGS}") elseif(WIN32) From 863ff04c65713f8043e5e4979970206be7db60c3 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 18 Dec 2024 20:56:17 -0800 Subject: [PATCH 43/64] Use -z undefs on android --- cmake/onnxruntime_providers_qnn.cmake | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake index 6eb547927073b..505c357d516d0 100644 --- a/cmake/onnxruntime_providers_qnn.cmake +++ b/cmake/onnxruntime_providers_qnn.cmake @@ -29,15 +29,20 @@ # Set linker flags for function(s) exported by EP DLL if(UNIX) - string(CONCAT ONNXRUNTIME_PROVIDERS_QNN_LINK_FLAGS - "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds " - "-Xlinker --gc-sections -Xlinker -rpath=\\$ORIGIN") if(CMAKE_SYSTEM_NAME STREQUAL "Android") - string(CONCAT ONNXRUNTIME_PROVIDERS_QNN_LINK_FLAGS - "${ONNXRUNTIME_PROVIDERS_QNN_LINK_FLAGS} " - "-Xlinker --allow-shlib-undefined") # Allow undefined global symbols (e.g., Provider_GetHost) in shared library + target_link_options(onnxruntime_providers_qnn PRIVATE + "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds" + "LINKER:--gc-sections" + "LINKER:-rpath=\$ORIGIN" + "LINKER:-z,undefs" + ) + else() + target_link_options(onnxruntime_providers_qnn PRIVATE + "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds" + "LINKER:--gc-sections" + "LINKER:-rpath=\$ORIGIN" + ) endif() - set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "${ONNXRUNTIME_PROVIDERS_QNN_LINK_FLAGS}") elseif(WIN32) set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/qnn/symbols.def") else() From ef1b91da26e6366b474c460f96ee53ca483d8e57 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 18 Dec 2024 22:47:33 -0800 Subject: [PATCH 44/64] Expose ETW logger functionality via provider bridge. Fix multithreading bug in callback from Etw (QNN EP needs to properly lock a logging mutex). --- .../qnn/builder/qnn_backend_manager.cc | 37 ++++++------------- .../qnn/builder/qnn_backend_manager.h | 20 ++++------ onnxruntime/core/providers/qnn/ort_api.h | 6 +++ .../providers/qnn/qnn_execution_provider.cc | 23 ++---------- .../providers/qnn/qnn_execution_provider.h | 11 +----- .../providers/shared_library/provider_api.h | 37 +++++++++++++++++++ .../provider_bridge_provider.cc | 2 +- .../shared_library/provider_interfaces.h | 12 ++++++ .../shared_library/provider_wrappedtypes.h | 15 ++++++++ .../core/session/provider_bridge_ort.cc | 30 ++++++++++++++- 10 files changed, 124 insertions(+), 69 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index c816858018411..494c0a53eaab4 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -22,11 +22,6 @@ #include "core/providers/qnn/builder/onnx_ctx_model_helper.h" #include "core/providers/qnn/builder/qnn_configs_helper.h" -#ifdef _WIN32 -#include -#include "core/platform/tracing.h" -#endif - // Flag to determine if Backend should do node validation for each opNode added #define DO_GRAPH_NODE_VALIDATIONS 1 @@ -255,7 +250,9 @@ void QnnLogging(const char* format, } } -Status QnnBackendManager::InitializeQnnLog() { +Status QnnBackendManager::InitializeQnnLog(const logging::Logger& logger) { + logger_ = &logger; + // Set Qnn log level align with Ort log level auto ort_log_level = logger_->GetSeverity(); QnnLog_Level_t qnn_log_level = MapOrtSeverityToQNNLogLevel(ort_log_level); @@ -303,23 +300,15 @@ QnnLog_Level_t QnnBackendManager::MapOrtSeverityToQNNLogLevel(logging::Severity } } -Status QnnBackendManager::ResetQnnLogLevel() { +Status QnnBackendManager::ResetQnnLogLevel(std::optional ort_log_level) { std::lock_guard lock(logger_mutex_); - - if (backend_setup_completed_ && logger_ != nullptr) { - auto ort_log_level = logger_->GetSeverity(); - LOGS(*logger_, INFO) << "Reset Qnn log level to ORT Logger level: " << (unsigned int)ort_log_level; - return UpdateQnnLogLevel(ort_log_level); + if (!backend_setup_completed_ || logger_ == nullptr) { + return Status::OK(); } - return Status::OK(); -} - -Status QnnBackendManager::UpdateQnnLogLevel(logging::Severity ort_log_level) { ORT_RETURN_IF(nullptr == log_handle_, "Unable to update QNN Log Level. Invalid QNN log handle."); - ORT_RETURN_IF(false == backend_setup_completed_, "Unable to update QNN Log Level. Backend setup not completed."); - ORT_RETURN_IF(nullptr == logger_, "Unable to update QNN Log Level. Invalid logger."); - QnnLog_Level_t qnn_log_level = MapOrtSeverityToQNNLogLevel(ort_log_level); + logging::Severity actual_log_level = ort_log_level.has_value() ? *ort_log_level : logger_->GetSeverity(); + QnnLog_Level_t qnn_log_level = MapOrtSeverityToQNNLogLevel(actual_log_level); LOGS(*logger_, INFO) << "Updating Qnn log level to: " << qnn_log_level; @@ -332,7 +321,8 @@ Status QnnBackendManager::UpdateQnnLogLevel(logging::Severity ort_log_level) { LOGS(*logger_, ERROR) << "Invalid log handle provided to QnnLog_setLogLevel."; } } - ORT_RETURN_IF(QNN_BACKEND_NO_ERROR != result, "Failed to set log level in Qnn backend. Error: ", QnnErrorHandleToString(result)); + ORT_RETURN_IF(QNN_BACKEND_NO_ERROR != result, + "Failed to set log level in Qnn backend. Error: ", QnnErrorHandleToString(result)); return Status::OK(); } @@ -823,7 +813,7 @@ Status QnnBackendManager::SetupBackend(const logging::Logger& logger, LOGS(logger, VERBOSE) << "Backend build version: " << sdk_build_version_; - SetLogger(&logger); + ORT_RETURN_IF_ERROR(InitializeQnnLog(logger)); LOGS(logger, VERBOSE) << "SetLogger succeed."; ORT_RETURN_IF_ERROR(InitializeBackend()); @@ -1098,8 +1088,6 @@ Status QnnBackendManager::ExtractBackendProfilingInfo() { } bool tracelogging_provider_ep_enabled = false; - // TODO: Re-enable when QNN EP is a dll -#if 0 const Env& env = GetDefaultEnv(); auto& provider = env.GetTelemetryProvider(); auto level = provider.Level(); @@ -1109,7 +1097,6 @@ Status QnnBackendManager::ExtractBackendProfilingInfo() { tracelogging_provider_ep_enabled = true; } } -#endif // ETW disabled previously, but enabled now if (ProfilingLevel::INVALID == profiling_level_etw_ && tracelogging_provider_ep_enabled) { @@ -1327,7 +1314,7 @@ void QnnBackendManager::LogQnnProfileEventAsTraceLogging( const std::string& timingSource, const std::string& eventLevel, const char* eventIdentifier) { - // TODO: Re-enable when QNN EP is a dll + // TODO: Re-enable when add a method to ORT Telemetry provider to log EP profiling data. #if 0 TraceLoggingWrite( telemetry_provider_handle, diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h index 661a830bfb733..a52738e8263bf 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h @@ -120,18 +120,10 @@ class QnnBackendManager { const Qnn_ProfileHandle_t& GetQnnProfileHandle() { return profile_backend_handle_; } - void SetLogger(const logging::Logger* logger) { - if (logger_ == nullptr) { - logger_ = logger; - (void)InitializeQnnLog(); - } - } - - Status InitializeQnnLog(); - - Status UpdateQnnLogLevel(logging::Severity ort_log_level); - - Status ResetQnnLogLevel(); + // Resets the QNN log level to the given ORT log level or to the default log level if the argument is + // std::nullopt. + // IMPORTANT: This function locks the internal `logging_mutex_`. + Status ResetQnnLogLevel(std::optional ort_log_level = std::nullopt); // Terminate logging in the backend Status TerminateQnnLog() { @@ -170,6 +162,10 @@ class QnnBackendManager { uint64_t& max_spill_fill_buffer_size); private: + // Sets the ORT logger and creates a corresponding QNN logger with the same log level. + // IMPORTANT: caller must lock the `logger_mutex_` before calling this function. + Status InitializeQnnLog(const logging::Logger& logger); + void* LoadLib(const char* file_name, int flags, std::string& error_msg); Status LoadQnnSystemLib(); diff --git a/onnxruntime/core/providers/qnn/ort_api.h b/onnxruntime/core/providers/qnn/ort_api.h index 44face145fb04..96f6ab76f113f 100644 --- a/onnxruntime/core/providers/qnn/ort_api.h +++ b/onnxruntime/core/providers/qnn/ort_api.h @@ -6,6 +6,12 @@ #define BUILD_QNN_EP_STATIC 0 #if BUILD_QNN_EP_STATIC +#ifdef _WIN32 +#include +#include "core/platform/tracing.h" +#include "core/platform/windows/logging/etw_sink.h" +#endif + // Includes when building QNN EP statically #include "onnx/defs/data_type_utils.h" #include "core/common/common.h" diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 99de2089cea13..afddcec7a182b 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -13,14 +13,6 @@ #include "core/providers/qnn/builder/qnn_def.h" #include "core/providers/qnn/builder/onnx_ctx_model_helper.h" -#ifdef _WIN32 -#include -// TODO: Enable once QNN is built as a DLL -#if 0 -#include "core/platform/windows/logging/etw_sink.h" -#endif -#endif // _WIN32 - namespace onnxruntime { constexpr const char* QNN = "QNN"; @@ -218,8 +210,6 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio // set to invalid to indicate that ETW is no enabled when we setup QNN qnn::ProfilingLevel profiling_level_etw = qnn::ProfilingLevel::INVALID; -// TODO: Re-enable ETW after QNN is a DLL -#if 0 const Env& env = GetDefaultEnv(); auto& provider = env.GetTelemetryProvider(); if (provider.IsEnabled()) { @@ -231,7 +221,6 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio } } } -#endif // In case ETW gets disabled later auto profiling_level_pos = provider_options_map.find(PROFILING_LEVEL); @@ -377,9 +366,7 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio soc_model, enable_htp_weight_sharing); -// TODO: Renable once QNN is a dll -#if 0 -#ifdef _WIN32 +#if defined(_WIN32) && defined(ETW_TRACE_LOGGING_SUPPORTED) auto& etwRegistrationManager = logging::EtwRegistrationManager::Instance(); // Register callback for ETW capture state (rundown) callback_ETWSink_provider_ = onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback( @@ -400,7 +387,7 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio if (IsEnabled == EVENT_CONTROL_CODE_ENABLE_PROVIDER) { if ((MatchAnyKeyword & static_cast(onnxruntime::logging::ORTTraceLoggingKeyword::Logs)) != 0) { auto ortETWSeverity = etwRegistrationManager.MapLevelToSeverity(); - (void)qnn_backend_manager_->UpdateQnnLogLevel(ortETWSeverity); + (void)qnn_backend_manager_->ResetQnnLogLevel(ortETWSeverity); } if ((MatchAnyKeyword & static_cast(onnxruntime::logging::ORTTraceLoggingKeyword::Profiling)) != 0) { if (Level != 0) { @@ -421,7 +408,6 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio }); etwRegistrationManager.RegisterInternalCallback(callback_ETWSink_provider_); #endif -#endif } QNNExecutionProvider::~QNNExecutionProvider() { @@ -434,14 +420,11 @@ QNNExecutionProvider::~QNNExecutionProvider() { } // Unregister the ETW callback -#ifdef _WIN32 - // TODO: Re-enable when QNN EP is a DLL -#if 0 +#if defined(_WIN32) && defined(ETW_TRACE_LOGGING_SUPPORTED) if (callback_ETWSink_provider_ != nullptr) { logging::EtwRegistrationManager::Instance().UnregisterInternalCallback(callback_ETWSink_provider_); } #endif -#endif } // Logs information about the supported/unsupported nodes. diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.h b/onnxruntime/core/providers/qnn/qnn_execution_provider.h index 168fadfa98e25..cd3ccd96e31ab 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.h +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.h @@ -13,12 +13,6 @@ #include "core/providers/qnn/builder/qnn_model.h" #include "core/providers/qnn/builder/qnn_configs_helper.h" #include "HTP/QnnHtpGraph.h" -#ifdef _WIN32 -// TODO: Reenable when QNN ep is a dll -#if 0 -#include "core/platform/windows/logging/etw_sink.h" -#endif -#endif namespace onnxruntime { @@ -149,11 +143,8 @@ class QNNExecutionProvider : public IExecutionProvider { bool enable_HTP_FP16_precision_ = true; bool share_ep_contexts_ = false; bool enable_spill_fill_buffer_ = false; -#ifdef _WIN32 - // TODO: Re-enable when QNN is a DLL -#if 0 +#if defined(_WIN32) && defined(ETW_TRACE_LOGGING_SUPPORTED) onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback callback_ETWSink_provider_ = nullptr; -#endif #endif qnn::ModelSettings model_settings_ = {}; diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index dc28848b2bab0..d4295b88faa79 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -9,6 +9,25 @@ #pragma once #define SHARED_PROVIDER 1 +#ifdef _WIN32 +#include + +// ETW requires Windows 10 SDK or later +// https://stackoverflow.com/questions/2665755/how-can-i-determine-the-version-of-the-windows-sdk-installed-on-my-computer +#if VER_PRODUCTBUILD > 9600 +// ETW trace logging uses Windows 10 SDK's TraceLoggingProvider.h +#define ETW_TRACE_LOGGING_SUPPORTED 1 +#endif // VER_PRODUCTBUILD > 9600 + +#ifdef ETW_TRACE_LOGGING_SUPPORTED +#include +// TraceLoggingProvider.h must follow Windows.h +#include +#include +#include +#endif // defined(ETW_TRACE_LOGGING_SUPPORTED) +#endif // defined(_WIN32) + #include #include #include @@ -136,6 +155,17 @@ enum class DataType { USER = 1 ///< Contains potentially sensitive user data. }; +enum class ORTTraceLoggingKeyword : uint64_t { + Session = 0x1, // ORT Session TraceLoggingWrite + Logs = 0x2, // LOGS() Macro ORT logs. Pair with an appropriate level depending on detail required + Reserved1 = 0x4, // Reserved if we want to add some specific sub-categories instead of just LOGS() or other uses + Reserved2 = 0x8, + Reserved3 = 0x10, + Reserved4 = 0x20, + Reserved5 = 0x40, + Reserved6 = 0x80, + Profiling = 0x100 // Enables profiling. At higher levels >5 can impact inference performance +}; } // namespace logging // OnnxRuntime Types (these are the internal types) @@ -143,6 +173,13 @@ struct CPUIDInfo; namespace logging { struct Logger; struct Capture; +#ifdef ETW_TRACE_LOGGING_SUPPORTED +struct EtwRegistrationManager; +using EtwRegistrationManager_EtwInternalCallback = std::function; +#endif } // namespace logging struct ComputeCapability; struct ConfigOptions; diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc index 456e164917587..4c050534456da 100644 --- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc +++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc @@ -791,5 +791,5 @@ std::string ToUTF8String(const std::wstring& s) { std::wstring ToWideString(const std::string& s) { return g_host->ToWideString(s); } -#endif +#endif // _WIN32 } // namespace onnxruntime diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 41ad441db616e..76ccd361761a7 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -291,6 +291,18 @@ struct ProviderHost { virtual std::ostream& logging__Capture__Stream(logging::Capture* p) noexcept = 0; virtual void logging__Capture__ProcessPrintf(logging::Capture* p, const char* format, va_list args) = 0; +#if defined(ETW_TRACE_LOGGING_SUPPORTED) + // logging::EtwRegistrationManager + virtual logging::EtwRegistrationManager& logging__EtwRegistrationManager__Instance() = 0; + virtual logging::Severity logging__EtwRegistrationManager__MapLevelToSeverity(logging::EtwRegistrationManager* p) = 0; + virtual void logging__EtwRegistrationManager__RegisterInternalCallback( + logging::EtwRegistrationManager* p, + const logging::EtwRegistrationManager_EtwInternalCallback& callback) = 0; + virtual void logging__EtwRegistrationManager__UnregisterInternalCallback( + logging::EtwRegistrationManager* p, + const logging::EtwRegistrationManager_EtwInternalCallback& callback) = 0; +#endif // defined(ETW_TRACE_LOGGING_SUPPORTED) + // Env virtual Env& Env__Default() = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index 3b5e5039796f7..b7817e98377eb 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -48,6 +48,21 @@ struct Capture final { Capture(const Capture&) = delete; void operator=(const Capture&) = delete; }; + +#if defined(ETW_TRACE_LOGGING_SUPPORTED) +struct EtwRegistrationManager final { + using EtwInternalCallback = EtwRegistrationManager_EtwInternalCallback; + static EtwRegistrationManager& Instance() { return g_host->logging__EtwRegistrationManager__Instance(); } + Severity MapLevelToSeverity() { return g_host->logging__EtwRegistrationManager__MapLevelToSeverity(this); } + void RegisterInternalCallback(const EtwInternalCallback& callback) { + g_host->logging__EtwRegistrationManager__RegisterInternalCallback(this, callback); + } + void UnregisterInternalCallback(const EtwInternalCallback& callback) { + g_host->logging__EtwRegistrationManager__UnregisterInternalCallback(this, callback); + } +}; +#endif // defined(ETW_TRACE_LOGGING_SUPPORTED) + } // namespace logging } // namespace onnxruntime diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 2ad85552aa813..78c441efea856 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -37,7 +37,6 @@ #include "core/framework/model_metadef_id_generator.h" #include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h" #include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h" -#include "core/session/onnxruntime_session_options_config_keys.h" #include "core/session/onnxruntime_c_api.h" #include "core/common/string_helper.h" @@ -62,6 +61,10 @@ #include "orttraining/core/framework/distributed_run_context.h" #endif +#ifdef _WIN32 +#include "core/platform/windows/logging/etw_sink.h" +#endif + namespace ONNX_NAMESPACE { // We use these names in the provider API because we don't have the protobuf definitions of the RepeatedField* types using int64s = google::protobuf::RepeatedField; @@ -77,6 +80,11 @@ namespace onnxruntime { using IndexedSubGraph_MetaDef = IndexedSubGraph::MetaDef; using IndexedSubGraph_SourceOfSchema = IndexedSubGraph::SourceOfSchema; using Node_EdgeEnd = Node::EdgeEnd; +#ifdef ETW_TRACE_LOGGING_SUPPORTED +namespace logging { +using EtwRegistrationManager_EtwInternalCallback = EtwRegistrationManager::EtwInternalCallback; +} +#endif } // namespace onnxruntime #include "core/common/cpuid_info.h" @@ -400,6 +408,26 @@ struct ProviderHostImpl : ProviderHost { p->ProcessPrintf(format, args); } +#if defined(ETW_TRACE_LOGGING_SUPPORTED) + // logging::EtwRegistrationManager + logging::EtwRegistrationManager& logging__EtwRegistrationManager__Instance() override { + return logging::EtwRegistrationManager::Instance(); + } + logging::Severity logging__EtwRegistrationManager__MapLevelToSeverity(logging::EtwRegistrationManager* p) override { + return p->MapLevelToSeverity(); + } + void logging__EtwRegistrationManager__RegisterInternalCallback( + logging::EtwRegistrationManager* p, + const logging::EtwRegistrationManager_EtwInternalCallback& callback) override { + p->RegisterInternalCallback(callback); + } + void logging__EtwRegistrationManager__UnregisterInternalCallback( + logging::EtwRegistrationManager* p, + const logging::EtwRegistrationManager_EtwInternalCallback& callback) override { + p->UnregisterInternalCallback(callback); + } +#endif // defined(ETW_TRACE_LOGGING_SUPPORTED) + // Env Env& Env__Default() override { return Env::Default(); } From 57e00722fdb27e48de3407a5d205e12e1c13af98 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 18 Dec 2024 22:53:43 -0800 Subject: [PATCH 45/64] Cmake java android: copy libonnxruntime_providers_shared.so and libonnxruntime_providers_qnn.so to directory for building Android AAR package --- cmake/onnxruntime_java.cmake | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cmake/onnxruntime_java.cmake b/cmake/onnxruntime_java.cmake index 662f7cb949ece..f1c6379afab72 100644 --- a/cmake/onnxruntime_java.cmake +++ b/cmake/onnxruntime_java.cmake @@ -214,6 +214,16 @@ if (ANDROID) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${ANDROID_PACKAGE_ABI_DIR}/$) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${ANDROID_PACKAGE_ABI_DIR}/$) + # If using QNN, also copy libonnxruntime_providers_shared.so and libonnxruntime_providers_qnn.so + if (onnxruntime_USE_QNN) + add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different + $ + ${ANDROID_PACKAGE_ABI_DIR}/$) + add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different + $ + ${ANDROID_PACKAGE_ABI_DIR}/$) + endif() + # Generate the Android AAR package add_custom_command(TARGET onnxruntime4j_jni POST_BUILD From bd32daaf217b30fba222dcea5bb2ed2b19590ea6 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 18 Dec 2024 23:05:05 -0800 Subject: [PATCH 46/64] QNN Nuget Pipeline: print contents of binaries directory to see if shared libs were copied --- .../github/azure-pipelines/templates/qnn-ep-win.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml b/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml index aa0b6bf6d391e..4204a1ba7b90f 100644 --- a/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml +++ b/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml @@ -93,12 +93,18 @@ stages: workingFolder: '$(Build.BinariesDirectory)\${{ parameters.build_config }}' createLogFile: true + - task: CmdLine@2 + displayName: 'Print contents of binaries directory' + inputs: + script: | + dir '$(Build.BinariesDirectory)\${{ parameters.build_config }}' + - template: win-esrp-dll.yml parameters: FolderPath: '$(Build.BinariesDirectory)\${{ parameters.build_config }}\${{ parameters.build_config }}' DisplayName: 'ESRP - Sign dlls' DoEsrp: ${{ parameters.DoEsrp }} - Pattern: 'onnxruntime.dll' + Pattern: 'onnxruntime*.dll' - task: MSBuild@1 displayName: 'Restore NuGet Packages and create project.assets.json' From 8a65a1d34120ef329c2444efe8044f161d1417fd Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Thu, 19 Dec 2024 00:33:50 -0800 Subject: [PATCH 47/64] Fix print in yaml --- tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml b/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml index 4204a1ba7b90f..23147804e856b 100644 --- a/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml +++ b/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml @@ -97,7 +97,7 @@ stages: displayName: 'Print contents of binaries directory' inputs: script: | - dir '$(Build.BinariesDirectory)\${{ parameters.build_config }}' + dir $(Build.BinariesDirectory)\${{ parameters.build_config }} - template: win-esrp-dll.yml parameters: From 7ff94a0b85692ff0f0a9ebc7bee1b429f6852b34 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Thu, 19 Dec 2024 00:59:48 -0800 Subject: [PATCH 48/64] Fix cmake copy command for Java build with qnn --- cmake/onnxruntime_java.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/onnxruntime_java.cmake b/cmake/onnxruntime_java.cmake index f1c6379afab72..d96f90d02b1de 100644 --- a/cmake/onnxruntime_java.cmake +++ b/cmake/onnxruntime_java.cmake @@ -218,10 +218,10 @@ if (ANDROID) if (onnxruntime_USE_QNN) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ - ${ANDROID_PACKAGE_ABI_DIR}/$) + ${ANDROID_PACKAGE_ABI_DIR}/$) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ - ${ANDROID_PACKAGE_ABI_DIR}/$) + ${ANDROID_PACKAGE_ABI_DIR}/$) endif() # Generate the Android AAR package From 0fbff4ad3f112d4444dae4d5a8e3e79f556104c1 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Thu, 19 Dec 2024 02:01:19 -0800 Subject: [PATCH 49/64] Try to fix nuget shared lib files for qnn --- tools/nuget/generate_nuspec_for_native_nuget.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/nuget/generate_nuspec_for_native_nuget.py b/tools/nuget/generate_nuspec_for_native_nuget.py index e19e0219e7d5f..980455ccddb0e 100644 --- a/tools/nuget/generate_nuspec_for_native_nuget.py +++ b/tools/nuget/generate_nuspec_for_native_nuget.py @@ -778,7 +778,7 @@ def generate_files(line_list, args): + '\\native" />' ) - if args.execution_provider == "qnn": + if args.execution_provider == "qnn" or is_qnn_package and not is_ado_packaging_build: files_list.append( " Date: Thu, 19 Dec 2024 10:14:21 -0800 Subject: [PATCH 50/64] Pass --build_shared_lib when bulding python wheels with qnn [pipelines] --- .../github/azure-pipelines/templates/py-win-arm64-qnn.yml | 1 + .../github/azure-pipelines/templates/py-win-arm64ec-qnn.yml | 1 + .../github/azure-pipelines/templates/py-win-x64-qnn.yml | 1 + tools/ci_build/github/linux/build_linux_python_package.sh | 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/py-win-arm64-qnn.yml b/tools/ci_build/github/azure-pipelines/templates/py-win-arm64-qnn.yml index e07f0afa6109c..da58b70be7f83 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-win-arm64-qnn.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-win-arm64-qnn.yml @@ -94,6 +94,7 @@ jobs: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "$(VSGenerator)" + --build_shared_lib --use_qnn --qnn_home $(QnnSDKRootDir) --enable_pybind diff --git a/tools/ci_build/github/azure-pipelines/templates/py-win-arm64ec-qnn.yml b/tools/ci_build/github/azure-pipelines/templates/py-win-arm64ec-qnn.yml index 8cc647c2464f3..e64a184d8ebeb 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-win-arm64ec-qnn.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-win-arm64ec-qnn.yml @@ -92,6 +92,7 @@ jobs: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "$(VSGenerator)" + --build_shared_lib --use_qnn --qnn_home $(QnnSDKRootDir) --enable_pybind diff --git a/tools/ci_build/github/azure-pipelines/templates/py-win-x64-qnn.yml b/tools/ci_build/github/azure-pipelines/templates/py-win-x64-qnn.yml index 466fee92d0d5e..a61bfc7706818 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-win-x64-qnn.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-win-x64-qnn.yml @@ -92,6 +92,7 @@ jobs: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "$(VSGenerator)" + --build_shared_lib --use_qnn --qnn_home $(QnnSDKRootDir) --enable_pybind diff --git a/tools/ci_build/github/linux/build_linux_python_package.sh b/tools/ci_build/github/linux/build_linux_python_package.sh index e2e0cea69efb5..11997382d119c 100755 --- a/tools/ci_build/github/linux/build_linux_python_package.sh +++ b/tools/ci_build/github/linux/build_linux_python_package.sh @@ -94,7 +94,7 @@ fi if [ "$BUILD_DEVICE" == "NPU" ]; then #Enable QNN EP - BUILD_ARGS+=("--use_qnn" "--qnn_home=/qnn_sdk") + BUILD_ARGS+=("--build_shared_lib" "--use_qnn" "--qnn_home=/qnn_sdk") fi export ONNX_ML=1 From f8747db958e7d16b95db7cda2b9975b0e9f7a05d Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Thu, 19 Dec 2024 13:11:44 -0800 Subject: [PATCH 51/64] Print correct binary directory in QNN Nuget pipeline --- tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml b/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml index 23147804e856b..4a437be325e7a 100644 --- a/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml +++ b/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml @@ -97,7 +97,7 @@ stages: displayName: 'Print contents of binaries directory' inputs: script: | - dir $(Build.BinariesDirectory)\${{ parameters.build_config }} + dir $(Build.BinariesDirectory)\${{ parameters.build_config }}\${{ parameters.build_config }} - template: win-esrp-dll.yml parameters: From 058e7bbbf23a4cf2c6384880e49e1440393b9fe3 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Thu, 19 Dec 2024 13:32:18 -0800 Subject: [PATCH 52/64] Add onnxruntime_providers_qnn.dll/.so to setup.py so that it gets copied when building python wheel --- setup.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index c1580eeb9e8f9..5c464eec537ec 100644 --- a/setup.py +++ b/setup.py @@ -311,17 +311,20 @@ def finalize_options(self): providers_tensorrt_or_migraphx = "onnxruntime_providers_" + ("migraphx" if is_migraphx else "tensorrt") providers_openvino = "onnxruntime_providers_openvino" providers_cann = "onnxruntime_providers_cann" +providers_qnn = "onnxruntime_providers_qnn" if platform.system() == "Linux": providers_cuda_or_rocm = "lib" + providers_cuda_or_rocm + ".so" providers_tensorrt_or_migraphx = "lib" + providers_tensorrt_or_migraphx + ".so" providers_openvino = "lib" + providers_openvino + ".so" providers_cann = "lib" + providers_cann + ".so" + providers_qnn = "lib" + providers_qnn + ".so" elif platform.system() == "Windows": providers_cuda_or_rocm = providers_cuda_or_rocm + ".dll" providers_tensorrt_or_migraphx = providers_tensorrt_or_migraphx + ".dll" providers_openvino = providers_openvino + ".dll" providers_cann = providers_cann + ".dll" + providers_qnn = providers_qnn + ".dll" # Additional binaries dl_libs = [] @@ -341,8 +344,9 @@ def finalize_options(self): dl_libs.append(providers_cuda_or_rocm) dl_libs.append(providers_tensorrt_or_migraphx) dl_libs.append(providers_cann) + dl_libs.append(providers_qnn) dl_libs.append("libonnxruntime.so*") - # DNNL, TensorRT & OpenVINO EPs are built as shared libs + # DNNL, TensorRT, OpenVINO, and QNN EPs are built as shared libs libs.extend(["libonnxruntime_providers_shared.so"]) libs.extend(["libonnxruntime_providers_dnnl.so"]) libs.extend(["libonnxruntime_providers_openvino.so"]) @@ -350,6 +354,7 @@ def finalize_options(self): libs.append(providers_cuda_or_rocm) libs.append(providers_tensorrt_or_migraphx) libs.append(providers_cann) + libs.append(providers_qnn) # QNN qnn_deps = [ "libQnnCpu.so", @@ -388,13 +393,14 @@ def finalize_options(self): providers_cann, "onnxruntime.dll", ] - # DNNL, TensorRT & OpenVINO EPs are built as shared libs + # DNNL, TensorRT, OpenVINO, and QNN EPs are built as shared libs libs.extend(["onnxruntime_providers_shared.dll"]) libs.extend(["onnxruntime_providers_dnnl.dll"]) libs.extend(["onnxruntime_providers_tensorrt.dll"]) libs.extend(["onnxruntime_providers_openvino.dll"]) libs.extend(["onnxruntime_providers_cuda.dll"]) libs.extend(["onnxruntime_providers_vitisai.dll"]) + libs.extend(["onnxruntime_providers_qnn.dll"]) # DirectML Libs libs.extend(["DirectML.dll"]) # QNN V68/V73 dependencies From 4172575575cab93ba09cbd9d4a4c745fcebe38ea Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Thu, 19 Dec 2024 14:54:11 -0800 Subject: [PATCH 53/64] Edit Java bindins to allow loading/extracting shared provider libs on Android --- .../main/java/ai/onnxruntime/OnnxRuntime.java | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java index 01bf33f8d36e5..0830ed5b7db39 100644 --- a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java +++ b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java @@ -162,8 +162,14 @@ static synchronized void init() throws IOException { // the ONNX Runtime native library will load it extractProviderLibrary(ONNXRUNTIME_LIBRARY_SHARED_NAME); - load(ONNXRUNTIME_LIBRARY_NAME); - load(ONNXRUNTIME_JNI_LIBRARY_NAME); + if (isAndroid()) { + // On Android, we only need to load onnxruntime4j_jni with System.loadLibrary + System.loadLibrary(ONNXRUNTIME_JNI_LIBRARY_NAME); + } else { + load(ONNXRUNTIME_LIBRARY_NAME); + load(ONNXRUNTIME_JNI_LIBRARY_NAME); + } + ortApiHandle = initialiseAPIBase(ORT_API_VERSION_14); if (ortApiHandle == 0L) { throw new IllegalStateException( @@ -273,10 +279,6 @@ static boolean extractQNN() { * @return True if the library is ready for loading by ORT's native code, false otherwise. */ static synchronized boolean extractProviderLibrary(String libraryName) { - // Android does not need to extract library and it has no shared provider library - if (isAndroid()) { - return false; - } // Check if we've already extracted or check this provider, and it's ready if (extractedSharedProviders.contains(libraryName)) { return true; @@ -323,12 +325,6 @@ static boolean isAndroid() { * @throws IOException If the file failed to read or write. */ private static void load(String library) throws IOException { - // On Android, we simply use System.loadLibrary - if (isAndroid()) { - System.loadLibrary("onnxruntime4j_jni"); - return; - } - // 1) The user may skip loading of this library: String skip = System.getProperty("onnxruntime.native." + library + ".skip"); if (Boolean.TRUE.toString().equalsIgnoreCase(skip)) { From 3e39b88ff4d5d39ad2d98c3cbffa8d7bb9ecb5f1 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Thu, 19 Dec 2024 15:54:02 -0800 Subject: [PATCH 54/64] Create temp directory for java android --- java/src/main/java/ai/onnxruntime/OnnxRuntime.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java index 0830ed5b7db39..742805bebc0fd 100644 --- a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java +++ b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java @@ -155,7 +155,7 @@ static synchronized void init() throws IOException { if (loaded) { return; } - tempDirectory = isAndroid() ? null : Files.createTempDirectory("onnxruntime-java"); + tempDirectory = Files.createTempDirectory("onnxruntime-java"); try { libraryDirPathProperty = System.getProperty(ONNXRUNTIME_NATIVE_PATH); // Extract and prepare the shared provider library but don't try to load it, @@ -181,9 +181,7 @@ static synchronized void init() throws IOException { version = initialiseVersion(); loaded = true; } finally { - if (tempDirectory != null) { - cleanUp(tempDirectory.toFile()); - } + cleanUp(tempDirectory.toFile()); } } From 5ed035fc2e22843803e0ef06c78d4978e18cb350 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Fri, 20 Dec 2024 15:29:21 -0800 Subject: [PATCH 55/64] consistent library loading logic for java android --- java/src/main/java/ai/onnxruntime/OnnxRuntime.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java index 742805bebc0fd..c4500a6a2178d 100644 --- a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java +++ b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java @@ -162,13 +162,8 @@ static synchronized void init() throws IOException { // the ONNX Runtime native library will load it extractProviderLibrary(ONNXRUNTIME_LIBRARY_SHARED_NAME); - if (isAndroid()) { - // On Android, we only need to load onnxruntime4j_jni with System.loadLibrary - System.loadLibrary(ONNXRUNTIME_JNI_LIBRARY_NAME); - } else { - load(ONNXRUNTIME_LIBRARY_NAME); - load(ONNXRUNTIME_JNI_LIBRARY_NAME); - } + load(ONNXRUNTIME_LIBRARY_NAME); + load(ONNXRUNTIME_JNI_LIBRARY_NAME); ortApiHandle = initialiseAPIBase(ORT_API_VERSION_14); if (ortApiHandle == 0L) { From 65624339b5aa6d9b9888b38ae77e0822fb996c3a Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sat, 21 Dec 2024 19:49:22 -0800 Subject: [PATCH 56/64] Add temporary logging --- .../main/java/ai/onnxruntime/OnnxRuntime.java | 3 +++ .../github/android/build_aar_package.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java index c4500a6a2178d..10ea82e02e0ab 100644 --- a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java +++ b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java @@ -155,6 +155,9 @@ static synchronized void init() throws IOException { if (loaded) { return; } + // TODO: Remove + logger.setLevel(Level.FINE); + tempDirectory = Files.createTempDirectory("onnxruntime-java"); try { libraryDirPathProperty = System.getProperty(ONNXRUNTIME_NATIVE_PATH); diff --git a/tools/ci_build/github/android/build_aar_package.py b/tools/ci_build/github/android/build_aar_package.py index 7294e0548e1b2..dc4f76eb25fb6 100644 --- a/tools/ci_build/github/android/build_aar_package.py +++ b/tools/ci_build/github/android/build_aar_package.py @@ -125,6 +125,11 @@ def _build_aar(args): subprocess.run(abi_build_command, env=temp_env, shell=False, check=True, cwd=REPO_DIR) + # TODO: Remove + abi_build_dir_files = os.listdir(abi_build_dir) + print("[REMOVE]: {abi_build_dir=} contents:") + print(abi_jnilibs_dir_files) + # create symbolic links for libonnxruntime.so and libonnxruntime4j_jni.so # to jnilibs/[abi] for later compiling the aar package abi_jnilibs_dir = os.path.join(jnilibs_dir, abi) @@ -139,8 +144,15 @@ def _build_aar(args): # add double check with os.path.islink if os.path.exists(target_lib_name) or os.path.islink(target_lib_name): os.remove(target_lib_name) + print(f"[REMOVE]: Making sym link from {os.path.join(abi_build_dir, build_config, lib_name)} to " + f"{target_lib_name}") os.symlink(os.path.join(abi_build_dir, build_config, lib_name), target_lib_name) + # TODO: Remove + abi_jnilibs_dir_files = os.listdir(abi_jnilibs_dir) + print("[REMOVE]: {abi_jnilibs_dir=} contents:") + print(abi_jnilibs_dir_files) + # copy executables for each abi, in case we want to publish those as well # some of them might not exist, e.g., if we skip building the tests abi_exe_dir = os.path.join(exe_dir, abi) @@ -150,12 +162,19 @@ def _build_aar(args): for exe_name in execs_to_copy: src_exe_path = os.path.join(abi_build_dir, build_config, exe_name) if not os.path.exists(src_exe_path): + print(f"[REMOVE]: Source exe path does not exist: {src_exe_path}") continue os.makedirs(abi_exe_dir, exist_ok=True) dest_exe_path = os.path.join(abi_exe_dir, exe_name) + print(f"[REMOVE]: Copying {src_exe_path} to {dst_exe_path}") shutil.copyfile(src_exe_path, dest_exe_path) + # TODO: Remove + abi_exe_dir_files = os.listdir(abi_exe_dir) + print("[REMOVE]: {abi_exe_dir=} contents:") + print(abi_exe_dir_files) + # we only need to define the header files path once if not header_files_path: header_files_path = os.path.join(abi_build_dir, build_config, "android", "headers") From 0bbf3af4bd38961d91561e9dbd2c005c11929d80 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sat, 21 Dec 2024 20:43:37 -0800 Subject: [PATCH 57/64] fix typo --- tools/ci_build/github/android/build_aar_package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/android/build_aar_package.py b/tools/ci_build/github/android/build_aar_package.py index dc4f76eb25fb6..0fa62a93c21ea 100644 --- a/tools/ci_build/github/android/build_aar_package.py +++ b/tools/ci_build/github/android/build_aar_package.py @@ -128,7 +128,7 @@ def _build_aar(args): # TODO: Remove abi_build_dir_files = os.listdir(abi_build_dir) print("[REMOVE]: {abi_build_dir=} contents:") - print(abi_jnilibs_dir_files) + print(abi_build_dir_files) # create symbolic links for libonnxruntime.so and libonnxruntime4j_jni.so # to jnilibs/[abi] for later compiling the aar package From c04176a5c5b3e5dee7a937f503a4083ba7daa48c Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sat, 21 Dec 2024 21:17:39 -0800 Subject: [PATCH 58/64] fix another type in temporary logging code for debugging android qnn java --- tools/ci_build/github/android/build_aar_package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/android/build_aar_package.py b/tools/ci_build/github/android/build_aar_package.py index 0fa62a93c21ea..df2ed73445217 100644 --- a/tools/ci_build/github/android/build_aar_package.py +++ b/tools/ci_build/github/android/build_aar_package.py @@ -167,7 +167,7 @@ def _build_aar(args): os.makedirs(abi_exe_dir, exist_ok=True) dest_exe_path = os.path.join(abi_exe_dir, exe_name) - print(f"[REMOVE]: Copying {src_exe_path} to {dst_exe_path}") + print(f"[REMOVE]: Copying {src_exe_path} to {dest_exe_path}") shutil.copyfile(src_exe_path, dest_exe_path) # TODO: Remove From cc14971813c957c77bcfbfb4916b45a49c893653 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sat, 21 Dec 2024 23:53:22 -0800 Subject: [PATCH 59/64] Android: Go back to not extracting shared libs from classpath resources. --- .../main/java/ai/onnxruntime/OnnxRuntime.java | 23 ++++++++++++++----- .../main/java/ai/onnxruntime/OrtSession.java | 6 +++-- .../github/android/build_aar_package.py | 19 --------------- 3 files changed, 21 insertions(+), 27 deletions(-) diff --git a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java index 10ea82e02e0ab..c28c79f1e723e 100644 --- a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java +++ b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java @@ -155,17 +155,16 @@ static synchronized void init() throws IOException { if (loaded) { return; } - // TODO: Remove - logger.setLevel(Level.FINE); - - tempDirectory = Files.createTempDirectory("onnxruntime-java"); + tempDirectory = isAndroid() ? null : Files.createTempDirectory("onnxruntime-java"); try { libraryDirPathProperty = System.getProperty(ONNXRUNTIME_NATIVE_PATH); // Extract and prepare the shared provider library but don't try to load it, // the ONNX Runtime native library will load it extractProviderLibrary(ONNXRUNTIME_LIBRARY_SHARED_NAME); - load(ONNXRUNTIME_LIBRARY_NAME); + if (!isAndroid()) { + load(ONNXRUNTIME_LIBRARY_NAME); + } load(ONNXRUNTIME_JNI_LIBRARY_NAME); ortApiHandle = initialiseAPIBase(ORT_API_VERSION_14); @@ -179,7 +178,9 @@ static synchronized void init() throws IOException { version = initialiseVersion(); loaded = true; } finally { - cleanUp(tempDirectory.toFile()); + if (tempDirectory != null) { + cleanUp(tempDirectory.toFile()); + } } } @@ -275,6 +276,10 @@ static boolean extractQNN() { * @return True if the library is ready for loading by ORT's native code, false otherwise. */ static synchronized boolean extractProviderLibrary(String libraryName) { + // Android does not need to extract provider libraries. + if (isAndroid()) { + return false; + } // Check if we've already extracted or check this provider, and it's ready if (extractedSharedProviders.contains(libraryName)) { return true; @@ -321,6 +326,12 @@ static boolean isAndroid() { * @throws IOException If the file failed to read or write. */ private static void load(String library) throws IOException { + // On Android, we simply use System.loadLibrary + if (isAndroid()) { + System.loadLibrary(library); + return; + } + // 1) The user may skip loading of this library: String skip = System.getProperty("onnxruntime.native." + library + ".skip"); if (Boolean.TRUE.toString().equalsIgnoreCase(skip)) { diff --git a/java/src/main/java/ai/onnxruntime/OrtSession.java b/java/src/main/java/ai/onnxruntime/OrtSession.java index 700008e66bb36..c41d06bb1b2bf 100644 --- a/java/src/main/java/ai/onnxruntime/OrtSession.java +++ b/java/src/main/java/ai/onnxruntime/OrtSession.java @@ -1319,8 +1319,10 @@ public void addXnnpack(Map providerOptions) throws OrtException * @throws OrtException If there was an error in native code. */ public void addQnn(Map providerOptions) throws OrtException { - if (OnnxRuntime.extractQNN()) { - String qnnProviderName = "QNN"; + String qnnProviderName = "QNN"; + if (OnnxRuntime.isAndroid()) { + addExecutionProvider(qnnProviderName, providerOptions); + } else if (OnnxRuntime.extractQNN()) { addExecutionProvider(qnnProviderName, providerOptions); } else { throw new OrtException( diff --git a/tools/ci_build/github/android/build_aar_package.py b/tools/ci_build/github/android/build_aar_package.py index df2ed73445217..7294e0548e1b2 100644 --- a/tools/ci_build/github/android/build_aar_package.py +++ b/tools/ci_build/github/android/build_aar_package.py @@ -125,11 +125,6 @@ def _build_aar(args): subprocess.run(abi_build_command, env=temp_env, shell=False, check=True, cwd=REPO_DIR) - # TODO: Remove - abi_build_dir_files = os.listdir(abi_build_dir) - print("[REMOVE]: {abi_build_dir=} contents:") - print(abi_build_dir_files) - # create symbolic links for libonnxruntime.so and libonnxruntime4j_jni.so # to jnilibs/[abi] for later compiling the aar package abi_jnilibs_dir = os.path.join(jnilibs_dir, abi) @@ -144,15 +139,8 @@ def _build_aar(args): # add double check with os.path.islink if os.path.exists(target_lib_name) or os.path.islink(target_lib_name): os.remove(target_lib_name) - print(f"[REMOVE]: Making sym link from {os.path.join(abi_build_dir, build_config, lib_name)} to " - f"{target_lib_name}") os.symlink(os.path.join(abi_build_dir, build_config, lib_name), target_lib_name) - # TODO: Remove - abi_jnilibs_dir_files = os.listdir(abi_jnilibs_dir) - print("[REMOVE]: {abi_jnilibs_dir=} contents:") - print(abi_jnilibs_dir_files) - # copy executables for each abi, in case we want to publish those as well # some of them might not exist, e.g., if we skip building the tests abi_exe_dir = os.path.join(exe_dir, abi) @@ -162,19 +150,12 @@ def _build_aar(args): for exe_name in execs_to_copy: src_exe_path = os.path.join(abi_build_dir, build_config, exe_name) if not os.path.exists(src_exe_path): - print(f"[REMOVE]: Source exe path does not exist: {src_exe_path}") continue os.makedirs(abi_exe_dir, exist_ok=True) dest_exe_path = os.path.join(abi_exe_dir, exe_name) - print(f"[REMOVE]: Copying {src_exe_path} to {dest_exe_path}") shutil.copyfile(src_exe_path, dest_exe_path) - # TODO: Remove - abi_exe_dir_files = os.listdir(abi_exe_dir) - print("[REMOVE]: {abi_exe_dir=} contents:") - print(abi_exe_dir_files) - # we only need to define the header files path once if not header_files_path: header_files_path = os.path.join(abi_build_dir, build_config, "android", "headers") From 4c6a985432779fcb209bb610eab3520e3266601a Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 22 Dec 2024 01:59:51 -0800 Subject: [PATCH 60/64] Try linking shared.so with qnn.so for android --- cmake/onnxruntime_providers_cpu.cmake | 10 +++++++++- cmake/onnxruntime_providers_qnn.cmake | 18 ++++-------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/cmake/onnxruntime_providers_cpu.cmake b/cmake/onnxruntime_providers_cpu.cmake index 79e430763da93..b4b4e7659d05a 100644 --- a/cmake/onnxruntime_providers_cpu.cmake +++ b/cmake/onnxruntime_providers_cpu.cmake @@ -238,7 +238,15 @@ if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/shared/exported_symbols.lst") elseif(UNIX) if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX") - set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/shared/version_script.lds -Xlinker --gc-sections") + target_link_options(onnxruntime_providers_shared PRIVATE + "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/shared/version_script.lds" + "LINKER:--gc-sections") + if(CMAKE_SYSTEM_NAME STREQUAL "Android") + # Need to link libonnxruntime_providers_.so with libonnxruntime_providers_shared.so on Android + # because dlopen with RTLD_GLOBAL does not bring all symbols to global scope. + # See: https://github.com/android/ndk/issues/201 + set(ONNXRUNTIME_PROVIDERS_SHARED onnxruntime_providers_shared) + endif() endif() elseif(WIN32) set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/shared/symbols.def") diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake index 505c357d516d0..4f97c968ebf56 100644 --- a/cmake/onnxruntime_providers_qnn.cmake +++ b/cmake/onnxruntime_providers_qnn.cmake @@ -29,20 +29,10 @@ # Set linker flags for function(s) exported by EP DLL if(UNIX) - if(CMAKE_SYSTEM_NAME STREQUAL "Android") - target_link_options(onnxruntime_providers_qnn PRIVATE - "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds" - "LINKER:--gc-sections" - "LINKER:-rpath=\$ORIGIN" - "LINKER:-z,undefs" - ) - else() - target_link_options(onnxruntime_providers_qnn PRIVATE - "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds" - "LINKER:--gc-sections" - "LINKER:-rpath=\$ORIGIN" - ) - endif() + target_link_options(onnxruntime_providers_qnn PRIVATE + "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds" + "LINKER:--gc-sections" + "LINKER:-rpath=\$ORIGIN") elseif(WIN32) set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/qnn/symbols.def") else() From fc003463828dd631b22d2efa4a7fe4b7b56da0c3 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 22 Dec 2024 16:30:42 -0800 Subject: [PATCH 61/64] Use --undefined=Provider_GetHost --- cmake/onnxruntime_providers_cpu.cmake | 6 ------ cmake/onnxruntime_providers_qnn.cmake | 18 ++++++++++++++---- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/cmake/onnxruntime_providers_cpu.cmake b/cmake/onnxruntime_providers_cpu.cmake index b4b4e7659d05a..f84b26d7d7a47 100644 --- a/cmake/onnxruntime_providers_cpu.cmake +++ b/cmake/onnxruntime_providers_cpu.cmake @@ -241,12 +241,6 @@ if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD target_link_options(onnxruntime_providers_shared PRIVATE "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/shared/version_script.lds" "LINKER:--gc-sections") - if(CMAKE_SYSTEM_NAME STREQUAL "Android") - # Need to link libonnxruntime_providers_.so with libonnxruntime_providers_shared.so on Android - # because dlopen with RTLD_GLOBAL does not bring all symbols to global scope. - # See: https://github.com/android/ndk/issues/201 - set(ONNXRUNTIME_PROVIDERS_SHARED onnxruntime_providers_shared) - endif() endif() elseif(WIN32) set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/shared/symbols.def") diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake index 4f97c968ebf56..063b704524106 100644 --- a/cmake/onnxruntime_providers_qnn.cmake +++ b/cmake/onnxruntime_providers_qnn.cmake @@ -29,10 +29,20 @@ # Set linker flags for function(s) exported by EP DLL if(UNIX) - target_link_options(onnxruntime_providers_qnn PRIVATE - "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds" - "LINKER:--gc-sections" - "LINKER:-rpath=\$ORIGIN") + if(CMAKE_SYSTEM_NAME STREQUAL "Android") + target_link_options(onnxruntime_providers_qnn PRIVATE + "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds" + "LINKER:--gc-sections" + "LINKER:-rpath=\$ORIGIN" + "LINKER:--undefined=Provider_GetHost" + ) + else() + target_link_options(onnxruntime_providers_qnn PRIVATE + "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds" + "LINKER:--gc-sections" + "LINKER:-rpath=\$ORIGIN" + ) + endif() elseif(WIN32) set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/qnn/symbols.def") else() From b707c46acfa93c40ffd85e062888f31add5fbc8a Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 22 Dec 2024 17:13:02 -0800 Subject: [PATCH 62/64] prepend _ to linker arg --- cmake/onnxruntime_providers_qnn.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake index 063b704524106..aa81e70a4bb00 100644 --- a/cmake/onnxruntime_providers_qnn.cmake +++ b/cmake/onnxruntime_providers_qnn.cmake @@ -34,7 +34,7 @@ "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds" "LINKER:--gc-sections" "LINKER:-rpath=\$ORIGIN" - "LINKER:--undefined=Provider_GetHost" + "LINKER:-u,_Provider_GetHost" ) else() target_link_options(onnxruntime_providers_qnn PRIVATE From 7f505586bb7110206e836f6da1140a92c1624fdc Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 23 Dec 2024 11:02:32 -0800 Subject: [PATCH 63/64] Add linker option -z global to libonnxruntime_providers_shared.so on Android --- cmake/onnxruntime_providers_cpu.cmake | 13 ++++++++++--- cmake/onnxruntime_providers_qnn.cmake | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/cmake/onnxruntime_providers_cpu.cmake b/cmake/onnxruntime_providers_cpu.cmake index f84b26d7d7a47..177969b1d0c6d 100644 --- a/cmake/onnxruntime_providers_cpu.cmake +++ b/cmake/onnxruntime_providers_cpu.cmake @@ -238,9 +238,16 @@ if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/shared/exported_symbols.lst") elseif(UNIX) if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX") - target_link_options(onnxruntime_providers_shared PRIVATE - "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/shared/version_script.lds" - "LINKER:--gc-sections") + if(CMAKE_SYSTEM_NAME STREQUAL "Android") + target_link_options(onnxruntime_providers_shared PRIVATE + "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/shared/version_script.lds" + "LINKER:--gc-sections" + "LINKER:-z,global") + else() + target_link_options(onnxruntime_providers_shared PRIVATE + "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/shared/version_script.lds" + "LINKER:--gc-sections") + endif() endif() elseif(WIN32) set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/shared/symbols.def") diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake index aa81e70a4bb00..505c357d516d0 100644 --- a/cmake/onnxruntime_providers_qnn.cmake +++ b/cmake/onnxruntime_providers_qnn.cmake @@ -34,7 +34,7 @@ "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds" "LINKER:--gc-sections" "LINKER:-rpath=\$ORIGIN" - "LINKER:-u,_Provider_GetHost" + "LINKER:-z,undefs" ) else() target_link_options(onnxruntime_providers_qnn PRIVATE From 17c3bdee734a4dad1a6e30a3abb3d67df3d43554 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 23 Dec 2024 14:56:05 -0800 Subject: [PATCH 64/64] Try to use libc++_shared.so for android qnn build --- .../ci_build/github/android/default_qnn_aar_build_settings.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/ci_build/github/android/default_qnn_aar_build_settings.json b/tools/ci_build/github/android/default_qnn_aar_build_settings.json index 599c108f830e7..66ae7d25153f0 100644 --- a/tools/ci_build/github/android/default_qnn_aar_build_settings.json +++ b/tools/ci_build/github/android/default_qnn_aar_build_settings.json @@ -10,6 +10,7 @@ "--parallel", "--cmake_generator=Ninja", "--build_java", + "--android_cpp_shared", "--build_shared_lib", "--use_qnn", "--cmake_extra_defines=onnxruntime_BUILD_UNIT_TESTS=OFF",