diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc index 15fc55b5b59b6..88c94581a8887 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc @@ -8,6 +8,8 @@ #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/cpu/tensor/slice_helper.h" +#include "core/framework/tensorprotoutils.h" + #include "base_op_builder.h" namespace onnxruntime { @@ -37,16 +39,13 @@ class SliceOpBuilder : public BaseOpBuilder { TensorShapeVector& raw_starts, TensorShapeVector& raw_ends, TensorShapeVector& raw_axes) const; - typedef struct { - int32_t begin, end, stride; - } Range; - mutable std::vector ranges_; }; Status SliceOpBuilder::ExplictOpCheck(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const { size_t input_count = node_unit.Inputs().size(); - // Op set 9 only has 1 input with starts, ends, axes attribute - // Op set > 9, starts, ends, axes are from node input + + // Opset < 10: Only has 1 data input. The starts, ends, and axes values are attributes. + // Opset >= 10: Everything is an input. The data, starts, and ends inputs are required. if (input_count > 1) { // Skip the first input. All other input need to be initializer for (size_t i = 1; i < input_count; i++) { @@ -75,6 +74,46 @@ void SliceOpBuilder::GetDataFromAttribute(const NodeUnit& node_unit, } } +// Gets the data from initializer inputs (e.g., starts, ends, axes, or steps) as a TensorShapeVector. +static Status GetInitializerInputData(const NodeUnitIODef& input, const QnnModelWrapper& qnn_model_wrapper, + TensorShapeVector& output) { + const auto& input_name = input.node_arg.Name(); + const bool is_initializer = qnn_model_wrapper.IsInitializerInput(input_name); + ORT_RETURN_IF_NOT(is_initializer, "Expected input ", input_name.c_str(), " to be an initializer."); + gsl::not_null initializer_proto = qnn_model_wrapper + .GetInitializerTensors() + .at(input_name); + ORT_RETURN_IF_NOT(initializer_proto->has_data_type(), "Expected initializer ", input_name.c_str(), + " to have a proto data type."); + + // Create empty Tensor. + const auto* dtype = DataTypeImpl::TensorTypeFromONNXEnum(initializer_proto->data_type())->GetElementType(); + TensorShape shape = onnxruntime::utils::GetTensorShapeFromTensorProto(*initializer_proto); + Tensor tensor(dtype, shape, std::make_shared()); + + // Deserialize initializer into Tensor. + onnxruntime::PathString model_path = qnn_model_wrapper.GetGraphViewer().ModelPath().ToPathString(); + const ORTCHAR_T* model_path_str = model_path.empty() ? nullptr : model_path.c_str(); + ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor(onnxruntime::Env::Default(), model_path_str, + *initializer_proto, tensor)); + + Status status; + + // Copy Tensor of int32_t or int64_t elems into output (int64_ts). + if (tensor.IsDataType()) { + gsl::span tensor_elems = tensor.DataAsSpan(); + output.insert(output.end(), tensor_elems.begin(), tensor_elems.end()); + } else if (tensor.IsDataType()) { + gsl::span tensor_elems = tensor.DataAsSpan(); + output.insert(output.end(), tensor_elems.begin(), tensor_elems.end()); + } else { + status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Data type ", DataTypeImpl::ToString(dtype), + " is not supported for Slice initializer input ", input.node_arg.Name().c_str()); + } + + return status; +} + // Note: For ONNX Slice operation the expected number of inputs is between 3 and 5 Status SliceOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, @@ -84,123 +123,71 @@ Status SliceOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, if (do_op_validation) { ORT_RETURN_IF_ERROR(ExplictOpCheck(qnn_model_wrapper, node_unit)); } - Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32; + + // Only need to add input 0. The other inputs (if any) contain static data that is passed to QNN APIs + // as static parameters. + return ProcessInput(qnn_model_wrapper, node_unit.Inputs()[0], logger, input_names); +} + +Status SliceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + std::vector&& input_names, + const logging::Logger& logger, + bool do_op_validation) const { + // Extract starts, ends, axes, and steps data from attributes (opset < 10) or initializer inputs (opset >= 10). TensorShapeVector raw_starts; TensorShapeVector raw_ends; TensorShapeVector raw_axes; TensorShapeVector raw_steps; - std::vector input0_shape; - auto inputs = node_unit.Inputs(); - auto input_count = inputs.size(); - // Opset 9, only 1 input, starts, ends, axes are in attribute - if (1 == input_count) { + const auto& inputs = node_unit.Inputs(); + const size_t input_count = inputs.size(); + + // Opset 9 only has 1 input. The starts, ends, axes values are attributes. + if (node_unit.SinceVersion() < 10) { GetDataFromAttribute(node_unit, raw_starts, raw_ends, raw_axes); - } + } else { + constexpr size_t starts_index = 1; + constexpr size_t ends_index = 2; + constexpr size_t axes_index = 3; + constexpr size_t steps_index = 4; - for (size_t input_i = 0; input_i < input_count; ++input_i) { - auto& input_name = inputs[input_i].node_arg.Name(); - if (input_name.empty()) { - // Ignore unspecified/unused optional input - continue; - } - if (qnn_model_wrapper.IsQnnTensorWrapperExist(input_name)) { - LOGS(logger, VERBOSE) << "Tensor already added or the input is not named, skip it: " << input_name; - input_names.push_back(input_name); - ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[input_i].node_arg, input0_shape), "Cannot get shape"); - continue; + // Starts input (required). + ORT_RETURN_IF_ERROR(GetInitializerInputData(inputs[starts_index], qnn_model_wrapper, raw_starts)); + + // Ends input (required). + ORT_RETURN_IF_ERROR(GetInitializerInputData(inputs[ends_index], qnn_model_wrapper, raw_ends)); + + // Axes input (optional). + if (input_count > axes_index && !inputs[axes_index].node_arg.Name().empty()) { + ORT_RETURN_IF_ERROR(GetInitializerInputData(inputs[axes_index], qnn_model_wrapper, raw_axes)); } - bool is_quantized_tensor = inputs[input_i].quant_param.has_value(); - const auto* type_proto = inputs[input_i].node_arg.TypeAsProto(); - ORT_RETURN_IF_ERROR(utils::GetQnnDataType(is_quantized_tensor, type_proto, qnn_data_type)); - - std::vector input_shape; - ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[input_i].node_arg, input_shape), "Cannot get shape"); - - Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT; - utils::InitializeQuantizeParam(quantize_param, is_quantized_tensor); - ORT_RETURN_IF_NOT(qnn_model_wrapper.ProcessQuantizationParameter(inputs[input_i].quant_param, - quantize_param.scaleOffsetEncoding.scale, - quantize_param.scaleOffsetEncoding.offset), - "Cannot get quantization parameter"); - - std::vector unpacked_tensor; - bool is_initializer_input = qnn_model_wrapper.IsInitializerInput(input_name); - if (is_initializer_input) { - const auto& input_tensor = qnn_model_wrapper.GetInitializerTensors().at(input_name); - ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_tensor, unpacked_tensor)); - size_t tensor_byte_size = unpacked_tensor.size(); - const auto data_type = input_tensor->data_type(); - TensorShapeVector data; - if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) { - const int64_t* tensor_data = reinterpret_cast(unpacked_tensor.data()); - size_t size = tensor_byte_size / sizeof(int64_t); - data.insert(data.end(), tensor_data, tensor_data + size); - } else if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT32) { - const int32_t* tensor_data = reinterpret_cast(unpacked_tensor.data()); - size_t size = tensor_byte_size / sizeof(int32_t); - data.insert(data.end(), tensor_data, tensor_data + size); - } else { - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, - "Data type for starts and ends inputs' is not supported in this build. Got ", - data_type); - } - if (input_i == 0) { - // Do nothing! - } else if (input_i == 1) { - // Starts - raw_starts = data; - continue; - } else if (input_i == 2) { - // Ends - raw_ends = data; - continue; - } else if (input_i == 3) { - // Axes - raw_axes = data; - continue; - } else if (input_i == 4) { - // Steps - raw_steps = data; - continue; - } + // Steps input (optional). + if (input_count > steps_index && !inputs[steps_index].node_arg.Name().empty()) { + ORT_RETURN_IF_ERROR(GetInitializerInputData(inputs[steps_index], qnn_model_wrapper, raw_steps)); } - input0_shape = input_shape; - - input_names.push_back(input_name); - Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, input_name); - Qnn_QuantizeParams_t quantize_params = QNN_QUANTIZE_PARAMS_INIT; - QnnTensorWrapper input_tensorwrapper(input_name, tensor_type, qnn_data_type, quantize_params, - std::move(input_shape), std::move(unpacked_tensor)); - ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor."); } + + std::vector input0_shape; + ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[0].node_arg, input0_shape), + "Cannot get shape for Slice input 0."); + TensorShapeVector input_dimensions(input0_shape.cbegin(), input0_shape.cend()); onnxruntime::SliceOp::PrepareForComputeMetadata compute_metadata(input_dimensions); - ORT_RETURN_IF_ERROR( - SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, raw_steps, compute_metadata)); - ranges_.clear(); - for (size_t i = 0; i < input_dimensions.size(); i++) { - auto start = static_cast(compute_metadata.starts_[i]); - auto end = static_cast(compute_metadata.ends_[i]); - auto step = static_cast(compute_metadata.steps_[i]); - ranges_.push_back(Range({start, end, step})); - } - return Status::OK(); -} + ORT_RETURN_IF_ERROR(SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, raw_steps, compute_metadata)); -Status SliceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, - const NodeUnit& node_unit, - std::vector&& input_names, - const logging::Logger& logger, - bool do_op_validation) const { - std::vector ranges_dims{static_cast(ranges_.size()), 3}; + const size_t input_rank = input_dimensions.size(); + std::vector ranges_dims{static_cast(input_rank), 3}; std::vector ranges_data; - for (auto range : ranges_) { - ranges_data.push_back(static_cast(range.begin)); - ranges_data.push_back(static_cast(range.end)); - ranges_data.push_back(static_cast(range.stride)); + ranges_data.reserve(input_rank); + + for (size_t i = 0; i < input_rank; i++) { + ranges_data.push_back(static_cast(compute_metadata.starts_[i])); + ranges_data.push_back(static_cast(compute_metadata.ends_[i])); + ranges_data.push_back(static_cast(compute_metadata.steps_[i])); } + QnnParamWrapper ranges_paramwrapper(node_unit.Index(), node_unit.Name(), QNN_OP_STRIDED_SLICE_PARAM_RANGES, diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h index 22f8d3a0eaa64..89f26e1f84d06 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h @@ -181,6 +181,8 @@ class QnnModelWrapper { QnnBackendType GetQnnBackendType() { return qnn_backend_type_; } + const GraphViewer& GetGraphViewer() const { return graph_viewer_; } + private: bool CreateQnnInputOutputTensors(const std::string& qnn_node_name, const std::vector& names, diff --git a/onnxruntime/test/providers/qnn/slice_htp_test.cc b/onnxruntime/test/providers/qnn/slice_htp_test.cc index edc079dc65276..07c97d2d7b1fa 100644 --- a/onnxruntime/test/providers/qnn/slice_htp_test.cc +++ b/onnxruntime/test/providers/qnn/slice_htp_test.cc @@ -14,6 +14,48 @@ namespace onnxruntime { namespace test { + +// Test for "index-out-of-bounds" bug that occurred when a Slice operator +// shared one of its initializer inputs with another op that was processed by QNN EP first. +TEST_F(QnnCPUBackendTests, Slice_SharedInitializersBugFix) { + // Model with an Add that processes a shared initializer before Slice is processed. + GetTestModelFn model_fn = [](ModelTestBuilder& builder) { + NodeArg* input0 = builder.MakeInput({2, 2}, {1, 2, 3, 4}); + + // Initializers + NodeArg* starts_input = builder.Make1DInitializer({1, 0}); // Shared by Add + NodeArg* ends_input = builder.Make1DInitializer({2, 2}); + NodeArg* axes_input = builder.Make1DInitializer({0, 1}); + NodeArg* steps_input = builder.Make1DInitializer({1, 1}); + + // Add input0 with a shared initializer. + NodeArg* add_output = builder.MakeIntermediate(); + builder.AddNode("Add", {input0, starts_input}, {add_output}); + + // Cast Add's output to float. + NodeArg* cast_output = builder.MakeIntermediate(); + Node& cast_node = builder.AddNode("Cast", {add_output}, {cast_output}); + cast_node.AddAttribute("to", static_cast(ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)); + + // Slice Cast's output + NodeArg* slice0_out = builder.MakeOutput(); + builder.AddNode("Slice", {cast_output, starts_input, ends_input, axes_input, steps_input}, {slice0_out}); + }; + + ProviderOptions provider_options; + +#if defined(_WIN32) + provider_options["backend_path"] = "QnnCpu.dll"; +#else + provider_options["backend_path"] = "libQnnCpu.so"; +#endif + + RunQnnModelTest(model_fn, + provider_options, + 13, // opset + ExpectedEPNodeAssignment::All); +} + #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) /** @@ -26,6 +68,7 @@ namespace test { * \param axes_def The axes input's definition. * \param steps_def The steps input's definition. * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None). + * \param use_contrib_qdq Force Q/DQ ops to use the com.microsoft domain (enable 16-bit). */ template static void RunSliceQDQTest(const TestInputDef& data_def, @@ -33,7 +76,8 @@ static void RunSliceQDQTest(const TestInputDef& data_def, const TestInputDef& ends_def, const TestInputDef& axes_def, const TestInputDef& steps_def, - ExpectedEPNodeAssignment expected_ep_assignment) { + ExpectedEPNodeAssignment expected_ep_assignment, + bool use_contrib_qdq = false) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -45,7 +89,8 @@ static void RunSliceQDQTest(const TestInputDef& data_def, const std::vector> int64_inputs = {starts_def, ends_def, axes_def, steps_def}; TestQDQModelAccuracy(BuildOpTestCase("Slice", f32_inputs, int64_inputs, {}), - BuildQDQOpTestCase("Slice", f32_inputs, int64_inputs, {}), + BuildQDQOpTestCase("Slice", f32_inputs, int64_inputs, {}, kOnnxDomain, + use_contrib_qdq), provider_options, 18, expected_ep_assignment); @@ -123,6 +168,39 @@ TEST_F(QnnHTPBackendTests, SliceInt32OnHTP) { ExpectedEPNodeAssignment::All); } +// Test 8-bit QDQ Slice with more than 1 axis. +TEST_F(QnnHTPBackendTests, SliceU8_MultAxes) { + std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; + RunSliceQDQTest(TestInputDef({2, 4}, false, input_data), + TestInputDef({2}, true, {1, 0}), // starts + TestInputDef({2}, true, {2, 3}), // ends + TestInputDef({2}, true, {0, 1}), // axes + TestInputDef({2}, true, {1, 2}), // steps + ExpectedEPNodeAssignment::All); +} + +// Test 16-bit QDQ Slice with more than 1 axis. +TEST_F(QnnHTPBackendTests, SliceU16_MultAxes) { + std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; + RunSliceQDQTest(TestInputDef({2, 4}, false, input_data), + TestInputDef({2}, true, {1, 0}), // starts + TestInputDef({2}, true, {2, 3}), // ends + TestInputDef({2}, true, {0, 1}), // axes + TestInputDef({2}, true, {1, 2}), // steps + ExpectedEPNodeAssignment::All, + true); // Use com.microsoft Q/DQ ops for 16-bit +} + +// Test 8-bit QDQ Slice with more than 1 axis and an end value that exceeds the associated dimension size. +TEST_F(QnnHTPBackendTests, SliceU8_MultAxes_LargeEnd) { + std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; + RunSliceQDQTest(TestInputDef({2, 4}, false, input_data), + TestInputDef({2}, true, {0, 1}), // starts + TestInputDef({2}, true, {-1, 1000}), // ends + TestInputDef({2}, true, {0, 1}), // axes + TestInputDef({2}, true, {1, 1}), // steps + ExpectedEPNodeAssignment::All); +} #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) } // namespace test