From 93669ddb429c04411be265c2c255d643dfcc9728 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Thu, 12 Oct 2023 02:12:52 -0700 Subject: [PATCH 1/6] Fix index-out-of-bounds bug in Slice builder when initializer is shared. --- .../qnn/builder/opbuilder/slice_op_builder.cc | 198 ++++++++---------- .../test/providers/qnn/slice_htp_test.cc | 82 +++++++- 2 files changed, 171 insertions(+), 109 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc index 15fc55b5b59b6..4f6cf714f4108 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc @@ -37,16 +37,16 @@ class SliceOpBuilder : public BaseOpBuilder { TensorShapeVector& raw_starts, TensorShapeVector& raw_ends, TensorShapeVector& raw_axes) const; - typedef struct { - int32_t begin, end, stride; - } Range; - mutable std::vector ranges_; }; Status SliceOpBuilder::ExplictOpCheck(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const { size_t input_count = node_unit.Inputs().size(); - // Op set 9 only has 1 input with starts, ends, axes attribute - // Op set > 9, starts, ends, axes are from node input + + // Opset < 10: Only has 1 data input. The starts, ends, and axes values are attributes. + // Opset >= 10: Everything is an input. The data, starts, and ends inputs are required. + + ORT_RETURN_IF(node_unit.SinceVersion() >= 10 && input_count < 3); + if (input_count > 1) { // Skip the first input. All other input need to be initializer for (size_t i = 1; i < input_count; i++) { @@ -75,6 +75,45 @@ void SliceOpBuilder::GetDataFromAttribute(const NodeUnit& node_unit, } } +// Gets the data from initializer inputs (e.g., starts, ends, axes, or steps) as a TensorShapeVector. +static Status GetInitializerInputData(const NodeUnitIODef& input, const QnnModelWrapper& qnn_model_wrapper, + TensorShapeVector& output) { + OnnxInputInfo input_info = {}; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(input, input_info)); + ORT_RETURN_IF_NOT(input_info.is_initializer, + "QNN requires the starts, ends, axes, and steps inputs to " + "be initializers"); + + std::vector initializer_bytes; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info.initializer_tensor, initializer_bytes)); + + size_t tensor_byte_size = initializer_bytes.size(); + const auto data_type = input_info.initializer_tensor->data_type(); + + Status status; + + switch (data_type) { + case ONNX_NAMESPACE::TensorProto_DataType_INT64: { + const int64_t* tensor_data = reinterpret_cast(initializer_bytes.data()); + size_t size = tensor_byte_size / sizeof(int64_t); + output.insert(output.end(), tensor_data, tensor_data + size); + break; + } + case ONNX_NAMESPACE::TensorProto_DataType_INT32: { + const int32_t* tensor_data = reinterpret_cast(initializer_bytes.data()); + size_t size = tensor_byte_size / sizeof(int32_t); + output.insert(output.end(), tensor_data, tensor_data + size); + break; + } + default: + status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Data type ", data_type, + " is not supported for Slice initializer input ", input.node_arg.Name().c_str()); + break; + } + + return status; +} + // Note: For ONNX Slice operation the expected number of inputs is between 3 and 5 Status SliceOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, @@ -84,123 +123,68 @@ Status SliceOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, if (do_op_validation) { ORT_RETURN_IF_ERROR(ExplictOpCheck(qnn_model_wrapper, node_unit)); } - Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32; + + // Only need to add input 0. The other inputs (if any) contain static data that is passed to QNN APIs + // as static parameters. + return ProcessInput(qnn_model_wrapper, node_unit.Inputs()[0], logger, input_names); +} + +Status SliceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + std::vector&& input_names, + const logging::Logger& logger, + bool do_op_validation) const { + // Extract starts, ends, axes, and steps data from attributes (opset < 10) or initializer inputs (opset >= 10). TensorShapeVector raw_starts; TensorShapeVector raw_ends; TensorShapeVector raw_axes; TensorShapeVector raw_steps; - std::vector input0_shape; - auto inputs = node_unit.Inputs(); - auto input_count = inputs.size(); - // Opset 9, only 1 input, starts, ends, axes are in attribute - if (1 == input_count) { + const auto& inputs = node_unit.Inputs(); + const size_t input_count = inputs.size(); + + // Opset 9 only has 1 input. The starts, ends, axes values are attributes. + if (node_unit.SinceVersion() < 10) { GetDataFromAttribute(node_unit, raw_starts, raw_ends, raw_axes); - } + } else { + constexpr size_t starts_index = 1; + constexpr size_t ends_index = 2; + constexpr size_t axes_index = 3; + constexpr size_t steps_index = 4; - for (size_t input_i = 0; input_i < input_count; ++input_i) { - auto& input_name = inputs[input_i].node_arg.Name(); - if (input_name.empty()) { - // Ignore unspecified/unused optional input - continue; - } - if (qnn_model_wrapper.IsQnnTensorWrapperExist(input_name)) { - LOGS(logger, VERBOSE) << "Tensor already added or the input is not named, skip it: " << input_name; - input_names.push_back(input_name); - ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[input_i].node_arg, input0_shape), "Cannot get shape"); - continue; + // Starts input (required). + ORT_RETURN_IF_ERROR(GetInitializerInputData(inputs[starts_index], qnn_model_wrapper, raw_starts)); + + // Ends input (required). + ORT_RETURN_IF_ERROR(GetInitializerInputData(inputs[ends_index], qnn_model_wrapper, raw_ends)); + + // Axes input (optional). + if (input_count > axes_index && !inputs[axes_index].node_arg.Name().empty()) { + ORT_RETURN_IF_ERROR(GetInitializerInputData(inputs[axes_index], qnn_model_wrapper, raw_axes)); } - bool is_quantized_tensor = inputs[input_i].quant_param.has_value(); - const auto* type_proto = inputs[input_i].node_arg.TypeAsProto(); - ORT_RETURN_IF_ERROR(utils::GetQnnDataType(is_quantized_tensor, type_proto, qnn_data_type)); - - std::vector input_shape; - ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[input_i].node_arg, input_shape), "Cannot get shape"); - - Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT; - utils::InitializeQuantizeParam(quantize_param, is_quantized_tensor); - ORT_RETURN_IF_NOT(qnn_model_wrapper.ProcessQuantizationParameter(inputs[input_i].quant_param, - quantize_param.scaleOffsetEncoding.scale, - quantize_param.scaleOffsetEncoding.offset), - "Cannot get quantization parameter"); - - std::vector unpacked_tensor; - bool is_initializer_input = qnn_model_wrapper.IsInitializerInput(input_name); - if (is_initializer_input) { - const auto& input_tensor = qnn_model_wrapper.GetInitializerTensors().at(input_name); - ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_tensor, unpacked_tensor)); - size_t tensor_byte_size = unpacked_tensor.size(); - const auto data_type = input_tensor->data_type(); - TensorShapeVector data; - if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) { - const int64_t* tensor_data = reinterpret_cast(unpacked_tensor.data()); - size_t size = tensor_byte_size / sizeof(int64_t); - data.insert(data.end(), tensor_data, tensor_data + size); - } else if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT32) { - const int32_t* tensor_data = reinterpret_cast(unpacked_tensor.data()); - size_t size = tensor_byte_size / sizeof(int32_t); - data.insert(data.end(), tensor_data, tensor_data + size); - } else { - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, - "Data type for starts and ends inputs' is not supported in this build. Got ", - data_type); - } - if (input_i == 0) { - // Do nothing! - } else if (input_i == 1) { - // Starts - raw_starts = data; - continue; - } else if (input_i == 2) { - // Ends - raw_ends = data; - continue; - } else if (input_i == 3) { - // Axes - raw_axes = data; - continue; - } else if (input_i == 4) { - // Steps - raw_steps = data; - continue; - } + // Steps input (optional). + if (input_count > steps_index && !inputs[steps_index].node_arg.Name().empty()) { + ORT_RETURN_IF_ERROR(GetInitializerInputData(inputs[steps_index], qnn_model_wrapper, raw_steps)); } - input0_shape = input_shape; - - input_names.push_back(input_name); - Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, input_name); - Qnn_QuantizeParams_t quantize_params = QNN_QUANTIZE_PARAMS_INIT; - QnnTensorWrapper input_tensorwrapper(input_name, tensor_type, qnn_data_type, quantize_params, - std::move(input_shape), std::move(unpacked_tensor)); - ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor."); } + + std::vector input0_shape; + ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[0].node_arg, input0_shape), + "Cannot get shape for Slice input 0."); + TensorShapeVector input_dimensions(input0_shape.cbegin(), input0_shape.cend()); onnxruntime::SliceOp::PrepareForComputeMetadata compute_metadata(input_dimensions); - ORT_RETURN_IF_ERROR( - SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, raw_steps, compute_metadata)); - ranges_.clear(); - for (size_t i = 0; i < input_dimensions.size(); i++) { - auto start = static_cast(compute_metadata.starts_[i]); - auto end = static_cast(compute_metadata.ends_[i]); - auto step = static_cast(compute_metadata.steps_[i]); - ranges_.push_back(Range({start, end, step})); - } - return Status::OK(); -} + ORT_RETURN_IF_ERROR(SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, raw_steps, compute_metadata)); -Status SliceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, - const NodeUnit& node_unit, - std::vector&& input_names, - const logging::Logger& logger, - bool do_op_validation) const { - std::vector ranges_dims{static_cast(ranges_.size()), 3}; + std::vector ranges_dims{static_cast(input_dimensions.size()), 3}; std::vector ranges_data; - for (auto range : ranges_) { - ranges_data.push_back(static_cast(range.begin)); - ranges_data.push_back(static_cast(range.end)); - ranges_data.push_back(static_cast(range.stride)); + for (size_t i = 0; i < input_dimensions.size(); i++) { + ranges_data.push_back(static_cast(compute_metadata.starts_[i])); + ranges_data.push_back(static_cast(compute_metadata.ends_[i])); + ranges_data.push_back(static_cast(compute_metadata.steps_[i])); } + QnnParamWrapper ranges_paramwrapper(node_unit.Index(), node_unit.Name(), QNN_OP_STRIDED_SLICE_PARAM_RANGES, diff --git a/onnxruntime/test/providers/qnn/slice_htp_test.cc b/onnxruntime/test/providers/qnn/slice_htp_test.cc index edc079dc65276..07c97d2d7b1fa 100644 --- a/onnxruntime/test/providers/qnn/slice_htp_test.cc +++ b/onnxruntime/test/providers/qnn/slice_htp_test.cc @@ -14,6 +14,48 @@ namespace onnxruntime { namespace test { + +// Test for "index-out-of-bounds" bug that occurred when a Slice operator +// shared one of its initializer inputs with another op that was processed by QNN EP first. +TEST_F(QnnCPUBackendTests, Slice_SharedInitializersBugFix) { + // Model with an Add that processes a shared initializer before Slice is processed. + GetTestModelFn model_fn = [](ModelTestBuilder& builder) { + NodeArg* input0 = builder.MakeInput({2, 2}, {1, 2, 3, 4}); + + // Initializers + NodeArg* starts_input = builder.Make1DInitializer({1, 0}); // Shared by Add + NodeArg* ends_input = builder.Make1DInitializer({2, 2}); + NodeArg* axes_input = builder.Make1DInitializer({0, 1}); + NodeArg* steps_input = builder.Make1DInitializer({1, 1}); + + // Add input0 with a shared initializer. + NodeArg* add_output = builder.MakeIntermediate(); + builder.AddNode("Add", {input0, starts_input}, {add_output}); + + // Cast Add's output to float. + NodeArg* cast_output = builder.MakeIntermediate(); + Node& cast_node = builder.AddNode("Cast", {add_output}, {cast_output}); + cast_node.AddAttribute("to", static_cast(ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)); + + // Slice Cast's output + NodeArg* slice0_out = builder.MakeOutput(); + builder.AddNode("Slice", {cast_output, starts_input, ends_input, axes_input, steps_input}, {slice0_out}); + }; + + ProviderOptions provider_options; + +#if defined(_WIN32) + provider_options["backend_path"] = "QnnCpu.dll"; +#else + provider_options["backend_path"] = "libQnnCpu.so"; +#endif + + RunQnnModelTest(model_fn, + provider_options, + 13, // opset + ExpectedEPNodeAssignment::All); +} + #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) /** @@ -26,6 +68,7 @@ namespace test { * \param axes_def The axes input's definition. * \param steps_def The steps input's definition. * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None). + * \param use_contrib_qdq Force Q/DQ ops to use the com.microsoft domain (enable 16-bit). */ template static void RunSliceQDQTest(const TestInputDef& data_def, @@ -33,7 +76,8 @@ static void RunSliceQDQTest(const TestInputDef& data_def, const TestInputDef& ends_def, const TestInputDef& axes_def, const TestInputDef& steps_def, - ExpectedEPNodeAssignment expected_ep_assignment) { + ExpectedEPNodeAssignment expected_ep_assignment, + bool use_contrib_qdq = false) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -45,7 +89,8 @@ static void RunSliceQDQTest(const TestInputDef& data_def, const std::vector> int64_inputs = {starts_def, ends_def, axes_def, steps_def}; TestQDQModelAccuracy(BuildOpTestCase("Slice", f32_inputs, int64_inputs, {}), - BuildQDQOpTestCase("Slice", f32_inputs, int64_inputs, {}), + BuildQDQOpTestCase("Slice", f32_inputs, int64_inputs, {}, kOnnxDomain, + use_contrib_qdq), provider_options, 18, expected_ep_assignment); @@ -123,6 +168,39 @@ TEST_F(QnnHTPBackendTests, SliceInt32OnHTP) { ExpectedEPNodeAssignment::All); } +// Test 8-bit QDQ Slice with more than 1 axis. +TEST_F(QnnHTPBackendTests, SliceU8_MultAxes) { + std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; + RunSliceQDQTest(TestInputDef({2, 4}, false, input_data), + TestInputDef({2}, true, {1, 0}), // starts + TestInputDef({2}, true, {2, 3}), // ends + TestInputDef({2}, true, {0, 1}), // axes + TestInputDef({2}, true, {1, 2}), // steps + ExpectedEPNodeAssignment::All); +} + +// Test 16-bit QDQ Slice with more than 1 axis. +TEST_F(QnnHTPBackendTests, SliceU16_MultAxes) { + std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; + RunSliceQDQTest(TestInputDef({2, 4}, false, input_data), + TestInputDef({2}, true, {1, 0}), // starts + TestInputDef({2}, true, {2, 3}), // ends + TestInputDef({2}, true, {0, 1}), // axes + TestInputDef({2}, true, {1, 2}), // steps + ExpectedEPNodeAssignment::All, + true); // Use com.microsoft Q/DQ ops for 16-bit +} + +// Test 8-bit QDQ Slice with more than 1 axis and an end value that exceeds the associated dimension size. +TEST_F(QnnHTPBackendTests, SliceU8_MultAxes_LargeEnd) { + std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; + RunSliceQDQTest(TestInputDef({2, 4}, false, input_data), + TestInputDef({2}, true, {0, 1}), // starts + TestInputDef({2}, true, {-1, 1000}), // ends + TestInputDef({2}, true, {0, 1}), // axes + TestInputDef({2}, true, {1, 1}), // steps + ExpectedEPNodeAssignment::All); +} #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) } // namespace test From 429fea99a52ec7cfda98bbe9f3eacb5574ecc458 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Thu, 12 Oct 2023 02:30:25 -0700 Subject: [PATCH 2/6] No need for input count check --- .../core/providers/qnn/builder/opbuilder/slice_op_builder.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc index 4f6cf714f4108..db0c6f9517f69 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc @@ -44,9 +44,6 @@ Status SliceOpBuilder::ExplictOpCheck(QnnModelWrapper& qnn_model_wrapper, const // Opset < 10: Only has 1 data input. The starts, ends, and axes values are attributes. // Opset >= 10: Everything is an input. The data, starts, and ends inputs are required. - - ORT_RETURN_IF(node_unit.SinceVersion() >= 10 && input_count < 3); - if (input_count > 1) { // Skip the first input. All other input need to be initializer for (size_t i = 1; i < input_count; i++) { From 1567536eeb7670478678512bcd77be44159a4088 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Thu, 12 Oct 2023 12:45:47 -0700 Subject: [PATCH 3/6] Address review comments --- .../qnn/builder/opbuilder/slice_op_builder.cc | 28 +++++++++++-------- .../core/providers/qnn/builder/qnn_utils.h | 8 ++++++ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc index db0c6f9517f69..90c82eeac7255 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc @@ -78,28 +78,29 @@ static Status GetInitializerInputData(const NodeUnitIODef& input, const QnnModel OnnxInputInfo input_info = {}; ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(input, input_info)); ORT_RETURN_IF_NOT(input_info.is_initializer, - "QNN requires the starts, ends, axes, and steps inputs to " - "be initializers"); + "QNN requires the starts, ends, axes, and steps inputs to be initializers"); std::vector initializer_bytes; + + // Note: UnpackInitializerData() uses ORT's protobuf utilities, which ensure that the initializer bytes are + // contiguous, aligned, and in the appropriate endianness. This is necessary to be able to reinterpret bytes + // as an array of larger elements. ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info.initializer_tensor, initializer_bytes)); - size_t tensor_byte_size = initializer_bytes.size(); const auto data_type = input_info.initializer_tensor->data_type(); - Status status; switch (data_type) { case ONNX_NAMESPACE::TensorProto_DataType_INT64: { - const int64_t* tensor_data = reinterpret_cast(initializer_bytes.data()); - size_t size = tensor_byte_size / sizeof(int64_t); - output.insert(output.end(), tensor_data, tensor_data + size); + gsl::span elements = qnn::utils::ReinterpretBytesAsSpan(initializer_bytes.data(), + initializer_bytes.size()); + output.insert(output.end(), elements.begin(), elements.end()); break; } case ONNX_NAMESPACE::TensorProto_DataType_INT32: { - const int32_t* tensor_data = reinterpret_cast(initializer_bytes.data()); - size_t size = tensor_byte_size / sizeof(int32_t); - output.insert(output.end(), tensor_data, tensor_data + size); + gsl::span elements = qnn::utils::ReinterpretBytesAsSpan(initializer_bytes.data(), + initializer_bytes.size()); + output.insert(output.end(), elements.begin(), elements.end()); break; } default: @@ -174,9 +175,12 @@ Status SliceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wr onnxruntime::SliceOp::PrepareForComputeMetadata compute_metadata(input_dimensions); ORT_RETURN_IF_ERROR(SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, raw_steps, compute_metadata)); - std::vector ranges_dims{static_cast(input_dimensions.size()), 3}; + const size_t input_rank = input_dimensions.size(); + std::vector ranges_dims{static_cast(input_rank), 3}; std::vector ranges_data; - for (size_t i = 0; i < input_dimensions.size(); i++) { + ranges_data.reserve(input_rank); + + for (size_t i = 0; i < input_rank; i++) { ranges_data.push_back(static_cast(compute_metadata.starts_[i])); ranges_data.push_back(static_cast(compute_metadata.ends_[i])); ranges_data.push_back(static_cast(compute_metadata.steps_[i])); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.h b/onnxruntime/core/providers/qnn/builder/qnn_utils.h index a54e0c8276e71..f71d63ba7e6f6 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.h @@ -8,12 +8,20 @@ #include #include #include +#include namespace onnxruntime { namespace qnn { class QnnOpConfigWrapper; namespace utils { + +// Reinterprets an array of contiguous bytes in the target's endianness to a span of elements. +template +inline gsl::span ReinterpretBytesAsSpan(const uint8_t* data, size_t num_bytes) { + return gsl::span(reinterpret_cast(data), num_bytes / sizeof(T)); +} + size_t GetElementSizeByType(const Qnn_DataType_t& data_type); size_t GetElementSizeByType(ONNXTensorElementDataType elem_type); From 98741862401434977ebf4959216153aec4fc2cb6 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Thu, 12 Oct 2023 12:55:27 -0700 Subject: [PATCH 4/6] Simplify more --- .../providers/qnn/builder/opbuilder/slice_op_builder.cc | 6 ++---- onnxruntime/core/providers/qnn/builder/qnn_utils.h | 6 +++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc index 90c82eeac7255..ddfb3355c50b0 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc @@ -92,14 +92,12 @@ static Status GetInitializerInputData(const NodeUnitIODef& input, const QnnModel switch (data_type) { case ONNX_NAMESPACE::TensorProto_DataType_INT64: { - gsl::span elements = qnn::utils::ReinterpretBytesAsSpan(initializer_bytes.data(), - initializer_bytes.size()); + gsl::span elements = qnn::utils::ReinterpretBytesAsSpan(initializer_bytes); output.insert(output.end(), elements.begin(), elements.end()); break; } case ONNX_NAMESPACE::TensorProto_DataType_INT32: { - gsl::span elements = qnn::utils::ReinterpretBytesAsSpan(initializer_bytes.data(), - initializer_bytes.size()); + gsl::span elements = qnn::utils::ReinterpretBytesAsSpan(initializer_bytes); output.insert(output.end(), elements.begin(), elements.end()); break; } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.h b/onnxruntime/core/providers/qnn/builder/qnn_utils.h index f71d63ba7e6f6..e2a0965fb8135 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.h @@ -16,10 +16,10 @@ class QnnOpConfigWrapper; namespace utils { -// Reinterprets an array of contiguous bytes in the target's endianness to a span of elements. +// Reinterprets an array of contiguous, aligned bytes in the target's endianness to a span of elements. template -inline gsl::span ReinterpretBytesAsSpan(const uint8_t* data, size_t num_bytes) { - return gsl::span(reinterpret_cast(data), num_bytes / sizeof(T)); +inline gsl::span ReinterpretBytesAsSpan(gsl::span bytes_span) { + return gsl::span(reinterpret_cast(bytes_span.data()), bytes_span.size() / sizeof(T)); } size_t GetElementSizeByType(const Qnn_DataType_t& data_type); From e5683889d426ff275631aea2a469afbfc1781a1f Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Thu, 12 Oct 2023 22:59:53 -0700 Subject: [PATCH 5/6] Use Tensor utils to get initializer data --- .../qnn/builder/opbuilder/slice_op_builder.cc | 58 ++++++++++--------- .../providers/qnn/builder/qnn_model_wrapper.h | 2 + .../core/providers/qnn/builder/qnn_utils.h | 8 --- 3 files changed, 33 insertions(+), 35 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc index ddfb3355c50b0..83d0a65e6729d 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc @@ -8,6 +8,8 @@ #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/cpu/tensor/slice_helper.h" +#include "core/framework/tensorprotoutils.h" + #include "base_op_builder.h" namespace onnxruntime { @@ -75,36 +77,38 @@ void SliceOpBuilder::GetDataFromAttribute(const NodeUnit& node_unit, // Gets the data from initializer inputs (e.g., starts, ends, axes, or steps) as a TensorShapeVector. static Status GetInitializerInputData(const NodeUnitIODef& input, const QnnModelWrapper& qnn_model_wrapper, TensorShapeVector& output) { - OnnxInputInfo input_info = {}; - ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(input, input_info)); - ORT_RETURN_IF_NOT(input_info.is_initializer, - "QNN requires the starts, ends, axes, and steps inputs to be initializers"); - - std::vector initializer_bytes; - - // Note: UnpackInitializerData() uses ORT's protobuf utilities, which ensure that the initializer bytes are - // contiguous, aligned, and in the appropriate endianness. This is necessary to be able to reinterpret bytes - // as an array of larger elements. - ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info.initializer_tensor, initializer_bytes)); + const auto& input_name = input.node_arg.Name(); + const bool is_initializer = qnn_model_wrapper.IsInitializerInput(input_name); + ORT_RETURN_IF_NOT(is_initializer, "Expected input ", input_name.c_str(), " to be an initializer."); + gsl::not_null initializer_proto = qnn_model_wrapper + .GetInitializerTensors() + .at(input_name); + ORT_RETURN_IF_NOT(initializer_proto->has_data_type(), "Expected initializer ", input_name.c_str(), + " to have a proto data type."); + + // Create empty Tensor. + const auto* dtype = DataTypeImpl::TensorTypeFromONNXEnum(initializer_proto->data_type())->GetElementType(); + TensorShape shape = onnxruntime::utils::GetTensorShapeFromTensorProto(*initializer_proto); + Tensor tensor(dtype, shape, std::make_shared()); + + // Deserialize initializer into Tensor. + const auto& model_path = qnn_model_wrapper.GetGraphViewer().ModelPath(); + const ORTCHAR_T* model_path_str = model_path.IsEmpty() ? nullptr : model_path.ToPathString().c_str(); + ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor(onnxruntime::Env::Default(), model_path_str, + *initializer_proto, tensor)); - const auto data_type = input_info.initializer_tensor->data_type(); Status status; - switch (data_type) { - case ONNX_NAMESPACE::TensorProto_DataType_INT64: { - gsl::span elements = qnn::utils::ReinterpretBytesAsSpan(initializer_bytes); - output.insert(output.end(), elements.begin(), elements.end()); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_INT32: { - gsl::span elements = qnn::utils::ReinterpretBytesAsSpan(initializer_bytes); - output.insert(output.end(), elements.begin(), elements.end()); - break; - } - default: - status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Data type ", data_type, - " is not supported for Slice initializer input ", input.node_arg.Name().c_str()); - break; + // Copy Tensor of int32_t or int64_t elems into output (int64_ts). + if (tensor.IsDataType()) { + gsl::span tensor_elems = tensor.DataAsSpan(); + output.insert(output.end(), tensor_elems.begin(), tensor_elems.end()); + } else if (tensor.IsDataType()) { + gsl::span tensor_elems = tensor.DataAsSpan(); + output.insert(output.end(), tensor_elems.begin(), tensor_elems.end()); + } else { + status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Data type ", DataTypeImpl::ToString(dtype), + " is not supported for Slice initializer input ", input.node_arg.Name().c_str()); } return status; diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h index 22f8d3a0eaa64..89f26e1f84d06 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h @@ -181,6 +181,8 @@ class QnnModelWrapper { QnnBackendType GetQnnBackendType() { return qnn_backend_type_; } + const GraphViewer& GetGraphViewer() const { return graph_viewer_; } + private: bool CreateQnnInputOutputTensors(const std::string& qnn_node_name, const std::vector& names, diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.h b/onnxruntime/core/providers/qnn/builder/qnn_utils.h index e2a0965fb8135..a54e0c8276e71 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.h @@ -8,20 +8,12 @@ #include #include #include -#include namespace onnxruntime { namespace qnn { class QnnOpConfigWrapper; namespace utils { - -// Reinterprets an array of contiguous, aligned bytes in the target's endianness to a span of elements. -template -inline gsl::span ReinterpretBytesAsSpan(gsl::span bytes_span) { - return gsl::span(reinterpret_cast(bytes_span.data()), bytes_span.size() / sizeof(T)); -} - size_t GetElementSizeByType(const Qnn_DataType_t& data_type); size_t GetElementSizeByType(ONNXTensorElementDataType elem_type); From 14bbb7d88d07727839b3e860c22a1da646400bd0 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Thu, 12 Oct 2023 23:17:49 -0700 Subject: [PATCH 6/6] Adjust model path handling --- .../core/providers/qnn/builder/opbuilder/slice_op_builder.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc index 83d0a65e6729d..88c94581a8887 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc @@ -92,8 +92,8 @@ static Status GetInitializerInputData(const NodeUnitIODef& input, const QnnModel Tensor tensor(dtype, shape, std::make_shared()); // Deserialize initializer into Tensor. - const auto& model_path = qnn_model_wrapper.GetGraphViewer().ModelPath(); - const ORTCHAR_T* model_path_str = model_path.IsEmpty() ? nullptr : model_path.ToPathString().c_str(); + onnxruntime::PathString model_path = qnn_model_wrapper.GetGraphViewer().ModelPath().ToPathString(); + const ORTCHAR_T* model_path_str = model_path.empty() ? nullptr : model_path.c_str(); ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor(onnxruntime::Env::Default(), model_path_str, *initializer_proto, tensor));