From 6e61306e6cbb22c53f1d073a6030fbca62a1b10c Mon Sep 17 00:00:00 2001 From: Adrian Lizarraga Date: Fri, 2 Feb 2024 00:22:16 -0800 Subject: [PATCH] Fix Split index bugs uncovered by QNN SDK 2.19 (#19381) ### Description - When converting ONNX split sizes to QNN split indices, do not include the split at index 0. QNN 2.19 assumes index 0 is implicit and throws a validation error if provided. - Fix bug when using an ONNX Split operator with a `num_outputs` attribute that does not evenly divide into `shape[axis]`. The ONNX spec states that the last chunk should be smaller, but QNN EP made the last chunk larger. - Fix bug when using an ONNX Split operator with a `split` input. QNN EP was incorrectly passing the split sizes as split indices without conversion. ### Motivation and Context QNN SDK 2.19 updated validation criteria for Split operators. QNN EP was previously passing a split index that should have been implicit. Also, discovered a bugs when using `num_outputs` attribute and `split` input. --- .../qnn/builder/opbuilder/split_op_builder.cc | 40 ++++++++++++------ .../test/providers/qnn/split_op_test.cc | 41 +++++++++++++++---- 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc index f4b0d1ff59175..9849a05db329c 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc @@ -55,6 +55,19 @@ Status SplitOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, return Status::OK(); } +// Converts an ONNX list of split lengths to a QNN list of split indices. +// Note that the first split index at 0 is implicit (QNN SDK >= 2.19 will raise a validation error if included). +static void ConvertSplitLengthsToSplitIndices(gsl::span split_lengths, + std::vector& split_indices) { + uint32_t split_it = 0; + for (size_t i = 0; i < split_lengths.size(); ++i) { + if (i > 0) { // Do not include the 0th split index. + split_indices.push_back(split_it); + } + split_it += SafeInt(split_lengths[i]); + } +} + Status SplitOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, std::vector&& input_names, @@ -79,22 +92,15 @@ Status SplitOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wr const int64_t* tensor_data = reinterpret_cast(unpacked_tensor.data()); size_t tensor_byte_size = unpacked_tensor.size(); size_t size = tensor_byte_size / sizeof(int64_t); - split_index.push_back(0); // QNN need the start index of each range and starts from 0 - std::transform(tensor_data, tensor_data + size, std::back_inserter(split_index), - [](int64_t item) { return SafeInt(item); }); - split_index.pop_back(); + ConvertSplitLengthsToSplitIndices({tensor_data, size}, split_index); } else { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN doesn't support dynamic split"); } } else { NodeAttrHelper node_helper(node_unit); if (node_helper.HasAttr("split")) { - auto split = node_helper.Get("split", std::vector{0}); - uint32_t split_it = 0; - for (size_t i = 0; i < split.size(); ++i) { - split_index.push_back(split_it); - split_it += split[i]; - } + auto split_lengths = node_helper.Get("split", std::vector{0}); + ConvertSplitLengthsToSplitIndices(split_lengths, split_index); } } @@ -105,11 +111,19 @@ Status SplitOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wr "Cannot get shape"); ORT_ENFORCE(static_cast(input_shape.size()) > axis_value, "axis not valid!"); ORT_RETURN_IF_NOT(input_shape.at(axis_value) > 0, "Shape value not valid!"); - auto num_outputs = node_unit.Outputs().size(); - auto step = SafeInt(input_shape.at(axis_value) / num_outputs); + + // ONNX spec states that if not evenly divisible by `num_outputs`, the last chunk is smaller. + // Therefore, we have to use ceil() when computing shape[axis] / num_outputs. + // See: core/providers/cpu/tensor/split.cc::PrepareForCompute() + const float num_outputs = static_cast(node_unit.Outputs().size()); + const float split_dim_size = static_cast(input_shape[axis_value]); + const uint32_t step = SafeInt(std::ceil(split_dim_size / num_outputs)); uint32_t split_it = 0; + for (size_t i = 0; i < num_outputs; ++i) { - split_index.push_back(split_it); + if (i > 0) { // 0th split index is implicit (QNN >= 2.19 raises validation error if included) + split_index.push_back(split_it); + } split_it += step; } } diff --git a/onnxruntime/test/providers/qnn/split_op_test.cc b/onnxruntime/test/providers/qnn/split_op_test.cc index 57e4b211777bb..6dc721edb421e 100644 --- a/onnxruntime/test/providers/qnn/split_op_test.cc +++ b/onnxruntime/test/providers/qnn/split_op_test.cc @@ -302,19 +302,46 @@ TEST_F(QnnHTPBackendTests, Split_Int32_Opset13) { // Test 8-bit QDQ Split opset 18 on HTP backend: equal split of axis 0 via 'num_outputs' attribute // and 'split' input. TEST_F(QnnHTPBackendTests, Split_Equal_Axis0_Opset18) { + // Split 6 into 3 outputs of lengths [2, 2, 2] + TestInputDef input_def({6, 2}, false, + {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.f, 8.f, 9.0f, 10.0f, 11.0f}); + // Use 'split' input (initializer). - RunQDQSplitOpTestOnHTP(TestInputDef({4, 2}, false, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.f, 8.f}), - {2, 2}, // split - 0, // axis - -1, // num_outputs - 18, // opset + RunQDQSplitOpTestOnHTP(input_def, + {2, 2, 2}, // split + 0, // axis + -1, // num_outputs + 18, // opset ExpectedEPNodeAssignment::All); // Use 'num_outputs' attribute. - RunQDQSplitOpTestOnHTP(TestInputDef({4, 2}, false, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.f, 8.f}), + RunQDQSplitOpTestOnHTP(input_def, + {}, // split (use num_outputs instead) + 0, // axis + 3, // num_outputs + 18, // opset + ExpectedEPNodeAssignment::All); +} + +// Test 8-bit QDQ Split opset 18 on HTP backend. Use an uneven split (last chunk should be smaller). +TEST_F(QnnHTPBackendTests, Split_NonEqual_Axis0_Opset18) { + // Split 7 into 3 outputs of lengths [3, 3, 1] + TestInputDef input_def({7, 2}, false, + {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.f, 8.f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f}); + + // Use a `split` input with uneven split lengths. + RunQDQSplitOpTestOnHTP(input_def, + {3, 3, 1}, // split + 0, // axis + -1, // num_outputs + 18, // opset + ExpectedEPNodeAssignment::All); + + // Use a `num_outputs` attribute that does not evenly divide into shape[axis]. + RunQDQSplitOpTestOnHTP(input_def, {}, // split (use num_outputs instead) 0, // axis - 2, // num_outputs + 3, // num_outputs 18, // opset ExpectedEPNodeAssignment::All); }