diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc index cc7a892d1c445..7783d3b3f36b7 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc @@ -60,6 +60,7 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() { {"HardSwish", {}}, {"Sigmoid", {}}, {"Slice", {}}, + {"LogSoftmax", {}}, {"Softmax", {}}, {"Sqrt", {}}, {"Atan", {}}, @@ -72,7 +73,10 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() { {"Log", {}}, {"LRN", {}}, {"Ceil", {}}, + {"Floor", {}}, + {"Round", {}}, {"Abs", {}}, + {"Neg", {}}, {"DepthToSpace", {}}, {"SpaceToDepth", {}}}; } @@ -82,10 +86,13 @@ static const OpVersionsAndSelector::OpVersionsMap GetBinaryOpVersionsMap() { {"Mul", {}}, {"Pow", {}}, {"Sub", {}}, + {"PRelu", {}}, {"GridSample", {}}}; } static const OpVersionsAndSelector::OpVersionsMap GetVariadicOpVersionsMap() { - return {{"Concat", {}}}; + return {{"Concat", {}}, + {"Max", {}}, + {"Min", {}}}; } static const OpVersionsAndSelector::OpVersionsMap GetConvOpVersionsMap() { return {{"Conv", {}}}; diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc index ca18c051a9922..8abb847b20b46 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc @@ -29,26 +29,37 @@ class SimpleOpBuilder : public BaseOpBuilder { bool do_op_validation) const override ORT_MUST_USE_RESULT; private: - Status ExplictOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const; + Status ExplicitOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const; static constexpr std::array gridsample_supported_modes = {"bilinear", "nearest"}; static constexpr std::array gridsample_supported_padding_modes = {"zeros", "border", "reflection"}; }; -Status SimpleOpBuilder::ExplictOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const { - // QNN Softmax only supports an axis value equal to input_rank - 1 (i.e., same as -1). - if (node_unit.OpType() == "Softmax") { - int32_t axis = node_unit.SinceVersion() < 13 ? 1 : -1; // Default axis changed from 1 to -1 in opset 13. +static int32_t GetDefaultAxisAttribute(const std::string& op_type, int opset_version) { + if (op_type == "Softmax" || op_type == "LogSoftmax") { + // Default axis changed from 1 to -1 in opset 13. + return opset_version < 13 ? 1 : -1; + } + + return 0; +} + +Status SimpleOpBuilder::ExplicitOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const { + const std::string& op_type = node_unit.OpType(); + + // QNN Softmax and LogSoftmax only support an axis value equal to input_rank - 1 (i.e., same as -1). + if (op_type == "Softmax" || op_type == "LogSoftmax") { + int32_t axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion()); Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT; ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis)); std::vector input_shape; ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape), "QNN EP: Cannot get shape for Softmax input"); ORT_RETURN_IF(axis != static_cast(input_shape.size() - 1), - "QNN Softmax only supports an `axis` attribute equal to input_rank-1 (or -1)"); + "QNN ", op_type.c_str(), " only supports an `axis` attribute equal to input_rank-1 (or -1)"); } - if (node_unit.OpType() == "GridSample") { + if (op_type == "GridSample") { NodeAttrHelper node_helper(node_unit); std::string mode = node_helper.Get("mode", "linear"); ORT_RETURN_IF_NOT(utils::ArrayHasString(gridsample_supported_modes, mode), "GridSample does not support mode ", @@ -58,6 +69,13 @@ Status SimpleOpBuilder::ExplictOpCheck(const QnnModelWrapper& qnn_model_wrapper, padding_mode.c_str()); } + // ONNX's Min and Max operators accept a variable number of inputs (i.e., variadic). + // However, QNN's Min and Max operators must take in exactly two inputs. + if (op_type == "Min" || op_type == "Max") { + ORT_RETURN_IF_NOT(node_unit.Inputs().size() == 2, + "QNN EP only supports Min and Max operators with exactly 2 inputs."); + } + return Status::OK(); } @@ -207,7 +225,7 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w const std::string& op_type = node_unit.OpType(); if (do_op_validation) { - ORT_RETURN_IF_ERROR(ExplictOpCheck(qnn_model_wrapper, node_unit)); + ORT_RETURN_IF_ERROR(ExplicitOpCheck(qnn_model_wrapper, node_unit)); // Skip the op validation for DepthToSpace & SpaceToDepth if it's not NHWC data layout if (node_unit.Domain() != kMSInternalNHWCDomain && (op_type == "DepthToSpace" || op_type == "SpaceToDepth" || op_type == "GridSample")) { return Status::OK(); @@ -217,7 +235,7 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w std::vector param_tensor_names; // Add attribute if (op_type == "LogSoftmax" || op_type == "Softmax" || op_type == "Concat") { - int32_t default_axis = ("Softmax" == op_type) ? -1 : 0; + int32_t default_axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion()); Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT; ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, default_axis)); QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar); diff --git a/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc b/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc index e579e3274e699..eaeebba5bea5c 100644 --- a/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc +++ b/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc @@ -43,7 +43,7 @@ static GetTestQDQModelFn BuildQDQArgMxxTestCase(const std::string& op_typ return [op_type, input_def, attrs](ModelTestBuilder& builder, std::vector>& output_qparams) { ORT_UNUSED_PARAMETER(output_qparams); - QuantParams input_qparams = GetTestInputQuantParams(input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); auto* input = MakeTestInput(builder, input_def); @@ -205,7 +205,7 @@ TEST_F(QnnHTPBackendTests, ArgMaxMin_AsGraphOutputUnsupported) { auto model_builder_func = [](const std::string& op_type, const TestInputDef& input_def, const std::vector& attrs) -> GetTestModelFn { return [op_type, input_def, attrs](ModelTestBuilder& builder) { - QuantParams input_qparams = GetTestInputQuantParams(input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); auto* input = MakeTestInput(builder, input_def); auto* output = builder.MakeOutput(); diff --git a/onnxruntime/test/providers/qnn/average_pool_test.cc b/onnxruntime/test/providers/qnn/average_pool_test.cc index 114802d56cfd3..79ec07796c0e8 100644 --- a/onnxruntime/test/providers/qnn/average_pool_test.cc +++ b/onnxruntime/test/providers/qnn/average_pool_test.cc @@ -5,7 +5,9 @@ #include #include +#include +#include "core/graph/node_attr_utils.h" #include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" @@ -16,87 +18,11 @@ namespace onnxruntime { namespace test { -// Returns a function that creates a graph with a single AveragePool operator. -static GetTestModelFn BuildAveragePoolTestCase(const TestInputDef& input_def, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - int64_t count_include_pad, - const std::string& auto_pad = "NOTSET") { - return [input_def, kernel_shape, strides, pads, - count_include_pad, auto_pad](ModelTestBuilder& builder) { - auto* input = MakeTestInput(builder, input_def); - - auto* output = builder.MakeOutput(); - Node& pool_node = builder.AddNode("AveragePool", {input}, {output}); - - pool_node.AddAttribute("kernel_shape", kernel_shape); - - if (!strides.empty()) { - pool_node.AddAttribute("strides", strides); - } - - pool_node.AddAttribute("auto_pad", auto_pad); - - if (!pads.empty() && auto_pad == "NOTSET") { - pool_node.AddAttribute("pads", pads); - } - - if (count_include_pad > 0) { - pool_node.AddAttribute("count_include_pad", count_include_pad); - } - }; -} - -// Returns a function that creates a graph with a QDQ AveragePool operator. -template -GetTestQDQModelFn BuildAveragePoolQDQTestCase(const TestInputDef& input_def, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - int64_t count_include_pad, - const std::string& auto_pad = "NOTSET") { - return [input_def, kernel_shape, strides, pads, - count_include_pad, auto_pad](ModelTestBuilder& builder, - std::vector>& output_qparams) { - auto* input_arg = MakeTestInput(builder, input_def); - - // add QDQ + AveragePool - QuantParams input_qparams = GetTestInputQuantParams(input_def); - auto* dq_output = AddQDQNodePair(builder, input_arg, input_qparams.scale, input_qparams.zero_point); - auto* averagepool_output = builder.MakeIntermediate(); - Node& pool_node = builder.AddNode("AveragePool", {dq_output}, {averagepool_output}); - - pool_node.AddAttribute("kernel_shape", kernel_shape); - - if (!strides.empty()) { - pool_node.AddAttribute("strides", strides); - } - - pool_node.AddAttribute("auto_pad", auto_pad); - - if (!pads.empty() && auto_pad == "NOTSET") { - pool_node.AddAttribute("pads", pads); - } - - if (count_include_pad > 0) { - pool_node.AddAttribute("count_include_pad", count_include_pad); - } - - // op_output -> Q -> DQ -> output - AddQDQNodePairWithOutputAsGraphOutput(builder, averagepool_output, - output_qparams[0].scale, output_qparams[0].zero_point); - }; -} - // Runs an AveragePool model on the QNN CPU backend. Checks the graph node assignment, and that inference // outputs for QNN and CPU match. -static void RunAveragePoolOpTest(const TestInputDef& input_def, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - int64_t count_include_pad, - const std::string& auto_pad, +static void RunAveragePoolOpTest(const std::string& op_type, + const std::vector>& input_defs, + const std::vector& attrs, ExpectedEPNodeAssignment expected_ep_assignment, int opset = 18) { ProviderOptions provider_options; @@ -106,7 +32,7 @@ static void RunAveragePoolOpTest(const TestInputDef& input_def, provider_options["backend_path"] = "libQnnCpu.so"; #endif - RunQnnModelTest(BuildAveragePoolTestCase(input_def, kernel_shape, strides, pads, count_include_pad, auto_pad), + RunQnnModelTest(BuildOpTestCase(op_type, input_defs, attrs), provider_options, opset, expected_ep_assignment); @@ -115,14 +41,11 @@ static void RunAveragePoolOpTest(const TestInputDef& input_def, // Runs a QDQ AveragePool model on the QNN HTP backend. Checks the graph node assignment, and that accuracy // on QNN EP is at least as good as on CPU EP. template -static void RunQDQAveragePoolOpTest(const TestInputDef& input_def, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - int64_t count_include_pad, - const std::string& auto_pad, +static void RunQDQAveragePoolOpTest(const std::string& op_type, + const std::vector>& input_defs, + const std::vector& attrs, ExpectedEPNodeAssignment expected_ep_assignment, - int opset = 18, float fp32_abs_err = 1e-5f) { + int opset = 18) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -130,13 +53,11 @@ static void RunQDQAveragePoolOpTest(const TestInputDef& input_def, provider_options["backend_path"] = "libQnnHtp.so"; #endif - TestQDQModelAccuracy(BuildAveragePoolTestCase(input_def, kernel_shape, strides, pads, count_include_pad, auto_pad), - BuildAveragePoolQDQTestCase(input_def, kernel_shape, strides, pads, count_include_pad, - auto_pad), + TestQDQModelAccuracy(BuildOpTestCase(op_type, input_defs, attrs), + BuildQDQOpTestCase(op_type, input_defs, attrs), provider_options, opset, - expected_ep_assignment, - fp32_abs_err); + expected_ep_assignment); } // @@ -144,46 +65,48 @@ static void RunQDQAveragePoolOpTest(const TestInputDef& input_def, // // AveragePool with kernel size equal to the spatial dimension of input tensor. -TEST_F(QnnCPUBackendTests, AveragePool_Global) { - RunAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // random input - {3, 3}, // kernel_shape - {3, 3}, // strides - {0, 0, 0, 0}, // pads - 0, // count_include_pad - "NOTSET", +TEST_F(QnnCPUBackendTests, AveragePool_AsGlobal) { + RunAveragePoolOpTest("AveragePool", + {TestInputDef({1, 2, 3, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 18))}, + {utils::MakeAttribute("kernel_shape", std::vector{3, 3}), + utils::MakeAttribute("strides", std::vector{3, 3})}, + ExpectedEPNodeAssignment::All); +} + +// Test GlobalAveragePool on QNN CPU backend. +TEST_F(QnnCPUBackendTests, GlobalAveragePool) { + RunAveragePoolOpTest("GlobalAveragePool", + {TestInputDef({1, 2, 3, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 18))}, + {}, ExpectedEPNodeAssignment::All); } // AveragePool that counts padding. TEST_F(QnnCPUBackendTests, AveragePool_CountIncludePad) { - RunAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // random input - {1, 1}, // kernel_shape - {1, 1}, // strides - {0, 0, 0, 0}, // pads - 1, // count_include_pad - "NOTSET", + RunAveragePoolOpTest("AveragePool", + {TestInputDef({1, 2, 3, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 18))}, + {utils::MakeAttribute("kernel_shape", std::vector{1, 1}), + utils::MakeAttribute("count_include_pad", static_cast(1))}, ExpectedEPNodeAssignment::All); } // AveragePool that use auto_pad 'SAME_UPPER'. TEST_F(QnnCPUBackendTests, AveragePool_AutopadSameUpper) { - RunAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // random input - {1, 1}, // kernel_shape - {1, 1}, // strides - {}, // pads - 1, // count_include_pad - "SAME_UPPER", + RunAveragePoolOpTest("AveragePool", + {TestInputDef({1, 2, 3, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 18))}, + {utils::MakeAttribute("kernel_shape", std::vector{1, 1}), + utils::MakeAttribute("count_include_pad", static_cast(1)), + utils::MakeAttribute("auto_pad", "SAME_UPPER")}, ExpectedEPNodeAssignment::All); } // AveragePool that use auto_pad 'SAME_LOWER'. TEST_F(QnnCPUBackendTests, AveragePool_AutopadSameLower) { - RunAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // random input - {1, 1}, // kernel_shape - {1, 1}, // strides - {}, // pads - 1, // count_include_pad - "SAME_LOWER", + RunAveragePoolOpTest("AveragePool", + {TestInputDef({1, 2, 3, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 18))}, + {utils::MakeAttribute("kernel_shape", std::vector{1, 1}), + utils::MakeAttribute("count_include_pad", static_cast(1)), + utils::MakeAttribute("auto_pad", "SAME_LOWER")}, ExpectedEPNodeAssignment::All); } @@ -193,15 +116,23 @@ TEST_F(QnnCPUBackendTests, AveragePool_AutopadSameLower) { // // QDQ AveragePool with kernel size equal to the spatial dimension of input tensor. -TEST_F(QnnHTPBackendTests, AveragePool_Global_HTP) { +TEST_F(QnnHTPBackendTests, AveragePool_AsGlobal) { std::vector input = {32.1289f, -59.981f, -17.2799f, 62.7263f, 33.6205f, -19.3515f, -54.0113f, 37.5648f, 61.5357f, -52.5769f, 27.3637f, -9.01382f, -65.5612f, 19.9497f, -47.9228f, 26.9813f, 83.064f, 0.362503f}; - RunQDQAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, input), - {3, 3}, // kernel_shape - {3, 3}, // strides - {0, 0, 0, 0}, // pads - 0, // count_include_pad - "NOTSET", + RunQDQAveragePoolOpTest("AveragePool", + {TestInputDef({1, 2, 3, 3}, false, input)}, + {utils::MakeAttribute("kernel_shape", std::vector{3, 3}), + utils::MakeAttribute("strides", std::vector{3, 3})}, + ExpectedEPNodeAssignment::All); +} + +// Test accuracy for 8-bit QDQ GlobalAveragePool with input of rank 4. +TEST_F(QnnHTPBackendTests, GlobalAveragePool) { + std::vector input = GetFloatDataInRange(-32.0f, 32.0f, 18); + + RunQDQAveragePoolOpTest("GlobalAveragePool", + {TestInputDef({1, 2, 3, 3}, false, input)}, + {}, ExpectedEPNodeAssignment::All); } @@ -210,12 +141,10 @@ TEST_F(QnnHTPBackendTests, AveragePool_CountIncludePad_HTP_u8) { std::vector input = {-9.0f, -7.33f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; - RunQDQAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, input), - {1, 1}, // kernel_shape - {1, 1}, // strides - {0, 0, 0, 0}, // pads - 1, // count_include_pad - "NOTSET", + RunQDQAveragePoolOpTest("AveragePool", + {TestInputDef({1, 2, 3, 3}, false, input)}, + {utils::MakeAttribute("kernel_shape", std::vector{1, 1}), + utils::MakeAttribute("count_include_pad", static_cast(1))}, ExpectedEPNodeAssignment::All, 18); } @@ -225,12 +154,10 @@ TEST_F(QnnHTPBackendTests, AveragePool_AutopadSameUpper_HTP_u8) { std::vector input = {-9.0f, -7.33f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; - RunQDQAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, input), - {1, 1}, // kernel_shape - {1, 1}, // strides - {}, // pads - 0, // count_include_pad - "SAME_UPPER", + RunQDQAveragePoolOpTest("AveragePool", + {TestInputDef({1, 2, 3, 3}, false, input)}, + {utils::MakeAttribute("kernel_shape", std::vector{1, 1}), + utils::MakeAttribute("auto_pad", "SAME_UPPER")}, ExpectedEPNodeAssignment::All, 18); } @@ -240,12 +167,10 @@ TEST_F(QnnHTPBackendTests, AveragePool_AutopadSameLower_HTP_u8) { std::vector input = {-9.0f, -7.33f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; - RunQDQAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, input), - {1, 1}, // kernel_shape - {1, 1}, // strides - {}, // pads - 0, // count_include_pad - "SAME_LOWER", + RunQDQAveragePoolOpTest("AveragePool", + {TestInputDef({1, 2, 3, 3}, false, input)}, + {utils::MakeAttribute("kernel_shape", std::vector{1, 1}), + utils::MakeAttribute("auto_pad", "SAME_LOWER")}, ExpectedEPNodeAssignment::All, 18); } diff --git a/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc b/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc index 8e4a07e66624e..9b65ca7bda3e2 100644 --- a/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc +++ b/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc @@ -114,15 +114,15 @@ GetTestQDQModelFn BuildQDQBatchNormTestCase(const TestInputDef input_qparams = GetTestInputQuantParams(input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); NodeArg* scale = MakeTestInput(builder, scale_def); - QuantParams scale_qparams = GetTestInputQuantParams(scale_def); + QuantParams scale_qparams = GetTestInputQuantParams(scale_def); NodeArg* scale_qdq = AddQDQNodePair(builder, scale, scale_qparams.scale, scale_qparams.zero_point); NodeArg* bias = MakeTestInput(builder, bias_def); - QuantParams bias_qparams = GetTestInputQuantParams(bias_def); + QuantParams bias_qparams = GetTestInputQuantParams(bias_def); NodeArg* bias_qdq = AddQDQNodePair(builder, bias, bias_qparams.scale, bias_qparams.zero_point); std::vector mean_vals(num_channels); diff --git a/onnxruntime/test/providers/qnn/conv_test.cc b/onnxruntime/test/providers/qnn/conv_test.cc index c6ebaaf7ab7e4..b66d86f24af4e 100644 --- a/onnxruntime/test/providers/qnn/conv_test.cc +++ b/onnxruntime/test/providers/qnn/conv_test.cc @@ -156,13 +156,13 @@ static GetTestQDQModelFn BuildQDQConvTestCase(const std::string& con // input -> Q/DQ -> auto* input = MakeTestInput(builder, input_def); - QuantParams input_qparams = GetTestInputQuantParams(input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); auto* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); conv_inputs.push_back(input_qdq); // weights -> Q/DQ -> auto* weights = MakeTestInput(builder, weights_def); - QuantParams weights_qparams = GetTestInputQuantParams(weights_def); + QuantParams weights_qparams = GetTestInputQuantParams(weights_def); auto* weights_qdq = AddQDQNodePair(builder, weights, weights_qparams.scale, weights_qparams.zero_point); conv_inputs.push_back(weights_qdq); diff --git a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc index d2ca9d8ff71e0..5b05b39f34a27 100644 --- a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc @@ -37,7 +37,7 @@ static GetTestQDQModelFn BuildQDQGatherOpTestCase(const TestInputDef< return [input_def, indices_def, axis](ModelTestBuilder& builder, std::vector>& output_qparams) { NodeArg* input = MakeTestInput(builder, input_def); - QuantParams input_qparams = GetTestInputQuantParams(input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); NodeArg* indices = MakeTestInput(builder, indices_def); diff --git a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc index 683c4d49fa99d..594973e37ef0b 100644 --- a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc +++ b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc @@ -45,12 +45,12 @@ static GetTestQDQModelFn BuildQDQInstanceNormTestCase(const TestInput std::vector>& output_qparams) { // input => Q => DQ => NodeArg* input = MakeTestInput(builder, input_def); - QuantParams input_qparams = GetTestInputQuantParams(input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); // scale => Q => DQ => NodeArg* scale = MakeTestInput(builder, scale_def); - QuantParams scale_qparams = GetTestInputQuantParams(scale_def); + QuantParams scale_qparams = GetTestInputQuantParams(scale_def); NodeArg* scale_qdq = AddQDQNodePair(builder, scale, scale_qparams.scale, scale_qparams.zero_point); // bias (as int32) => DQ => diff --git a/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc b/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc index 772476cb0d245..a8237817c71df 100644 --- a/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc @@ -33,7 +33,7 @@ static GetTestQDQModelFn BuildQDQLeakyReluOpTestCase(const TestInputD std::vector>& output_qparams) { // input => Q => DQ => NodeArg* input = MakeTestInput(builder, input_def); - QuantParams input_qparams = GetTestInputQuantParams(input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); // LeakryRelu diff --git a/onnxruntime/test/providers/qnn/lrn_op_test.cc b/onnxruntime/test/providers/qnn/lrn_op_test.cc index 82f7b246aa5e4..4f64b4a7e0d3f 100644 --- a/onnxruntime/test/providers/qnn/lrn_op_test.cc +++ b/onnxruntime/test/providers/qnn/lrn_op_test.cc @@ -39,7 +39,7 @@ static GetTestQDQModelFn BuildQDQLRNTestCase(const TestInputDef>& output_qparams) { // input -> Q -> DQ -> NodeArg* input = MakeTestInput(builder, input_def); - QuantParams input_qparams = GetTestInputQuantParams(input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); // LRN diff --git a/onnxruntime/test/providers/qnn/matmul_test.cpp b/onnxruntime/test/providers/qnn/matmul_test.cpp index 00ba7bd7858c3..6edb6ecdcfb1a 100644 --- a/onnxruntime/test/providers/qnn/matmul_test.cpp +++ b/onnxruntime/test/providers/qnn/matmul_test.cpp @@ -34,12 +34,12 @@ static GetTestQDQModelFn BuildMatMulOpQDQTestCase(const TestInputDef< std::vector>& output_qparams) { // input1 -> Q -> DQ -> NodeArg* input1 = MakeTestInput(builder, input1_def); - QuantParams input1_qparams = GetTestInputQuantParams(input1_def); + QuantParams input1_qparams = GetTestInputQuantParams(input1_def); auto* input1_qdq = AddQDQNodePair(builder, input1, input1_qparams.scale, input1_qparams.zero_point); // input2 -> Q -> DQ -> NodeArg* input2 = MakeTestInput(builder, input2_def); - QuantParams input2_qparams = GetTestInputQuantParams(input2_def); + QuantParams input2_qparams = GetTestInputQuantParams(input2_def); auto* input2_qdq = AddQDQNodePair(builder, input2, input2_qparams.scale, input2_qparams.zero_point); // MatMul @@ -108,9 +108,9 @@ TEST_F(QnnCPUBackendTests, MatMulOp) { // Test MatMul broadcasting // Note slight inaccuracy in CPU backend: // Expected: contains 896 values, where each value and its corresponding value in 16-byte object -// <80-03 00-00 00-00 00-00 40-00 34-F0 5B-01 00-00> are an almost-equal pair -// Actual: 16-byte object <80-03 00-00 00-00 00-00 40-00 23-F0 5B-01 00-00>, -// where the value pair (148.536011, 148.536255) at index #4 don't match, which is 0.000244141 from 148.536 +// <80-03 00-00 00-00 00-00 40-00 34-DD F7-01 00-00> are an almost-equal pair +// Actual: 16-byte object <80-03 00-00 00-00 00-00 40-00 23-DD F7-01 00-00>, +// where the value pair (73.68116, 73.680809) at index #80 don't match, which is -0.000350952 from 73.6812 TEST_F(QnnCPUBackendTests, MatMulOp_Broadcast) { // Create two matrices with element values in the range [-10.0, 10.0]. std::vector input_a = GetFloatDataInRange(-10.0f, 10.0f, 28 * 64); @@ -118,7 +118,7 @@ TEST_F(QnnCPUBackendTests, MatMulOp_Broadcast) { RunMatMulOpOpTest(TestInputDef({28, 1, 64}, false, input_a), TestInputDef({64, 32}, false, input_b), - ExpectedEPNodeAssignment::All, 18, 0.00026f); + ExpectedEPNodeAssignment::All, 18, 0.0004f); } #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/max_min_op_test.cc b/onnxruntime/test/providers/qnn/max_min_op_test.cc new file mode 100644 index 0000000000000..09ea71e5f03eb --- /dev/null +++ b/onnxruntime/test/providers/qnn/max_min_op_test.cc @@ -0,0 +1,135 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#if !defined(ORT_MINIMAL_BUILD) + +#include + +#include "test/providers/qnn/qnn_test_utils.h" + +#include "onnx/onnx_pb.h" +#include "gtest/gtest.h" + +namespace onnxruntime { +namespace test { + +// Runs an Max/Min model on the QNN CPU backend. Checks the graph node assignment, and that inference +// outputs for QNN EP and CPU EP match. +static void RunCPUMinOrMaxOpTest(const std::string& op_type, + const std::vector>& input_defs, + ExpectedEPNodeAssignment expected_ep_assignment, + int opset = 13) { + ProviderOptions provider_options; + +#if defined(_WIN32) + provider_options["backend_path"] = "QnnCpu.dll"; +#else + provider_options["backend_path"] = "libQnnCpu.so"; +#endif + + RunQnnModelTest(BuildOpTestCase(op_type, input_defs, {}, kOnnxDomain), + provider_options, + opset, + expected_ep_assignment); +} + +// Runs a QDQ Max/Min model on the QNN (HTP) EP and the ORT CPU EP. Checks the graph node assignment, and that inference +// running the QDQ model on QNN EP is at least as accurate as on ORT CPU EP (when compared to the baseline float32 model). +template +static void RunQDQMinOrMaxOpTest(const std::string& op_type, + const std::vector>& input_defs, + ExpectedEPNodeAssignment expected_ep_assignment, + int opset = 13) { + ProviderOptions provider_options; + +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + + TestQDQModelAccuracy(BuildOpTestCase(op_type, input_defs, {}, kOnnxDomain), // baseline float32 model + BuildQDQOpTestCase(op_type, input_defs, {}, kOnnxDomain), // QDQ model + provider_options, + opset, + expected_ep_assignment, + 1e-4f); +} + +// +// CPU tests: +// + +// Test that Min with 1 input is *NOT* supported on CPU backend. +TEST_F(QnnCPUBackendTests, Min_1Input_NotSupported) { + RunCPUMinOrMaxOpTest("Min", + {TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f)}, + ExpectedEPNodeAssignment::None, 13); +} + +// Test that Max with 1 input is *NOT* supported on CPU backend. +TEST_F(QnnCPUBackendTests, Max_1Input_NotSupported) { + RunCPUMinOrMaxOpTest("Max", + {TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f)}, + ExpectedEPNodeAssignment::None, 13); +} + +// Test Min with 2 inputs on CPU backend. +TEST_F(QnnCPUBackendTests, Min_2Inputs) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 48); + RunCPUMinOrMaxOpTest("Min", + {TestInputDef({1, 3, 4, 4}, false, input_data), + TestInputDef({1, 3, 4, 4}, false, input_data)}, + ExpectedEPNodeAssignment::All, 13); +} + +// Test Max with 2 inputs on CPU backend. +TEST_F(QnnCPUBackendTests, Max_2Inputs) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 48); + RunCPUMinOrMaxOpTest("Max", + {TestInputDef({1, 3, 4, 4}, false, input_data), + TestInputDef({1, 3, 4, 4}, false, input_data)}, + ExpectedEPNodeAssignment::All, 13); +} + +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) +// +// HTP tests: +// + +// Test that Min with 1 input is *NOT* supported on HTP backend. +TEST_F(QnnHTPBackendTests, Min_1Input_NotSupported) { + RunQDQMinOrMaxOpTest("Min", + {TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f)}, + ExpectedEPNodeAssignment::None, 13); +} + +// Test that Max with 1 input is *NOT* supported on HTP backend. +TEST_F(QnnHTPBackendTests, Max_1Input_NotSupported) { + RunQDQMinOrMaxOpTest("Max", + {TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f)}, + ExpectedEPNodeAssignment::None, 13); +} + +// Test accuracy of 8-bit Q/DQ Min with 2 inputs on HTP backend. +TEST_F(QnnHTPBackendTests, Min_2Inputs) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 48); + RunQDQMinOrMaxOpTest("Min", + {TestInputDef({1, 3, 4, 4}, false, input_data), + TestInputDef({1, 3, 4, 4}, false, input_data)}, + ExpectedEPNodeAssignment::All, 13); +} + +// Test accuracy of 8-bit Q/DQ Max with 2 inputs on HTP backend. +TEST_F(QnnHTPBackendTests, Max_2Inputs) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 48); + RunQDQMinOrMaxOpTest("Max", + {TestInputDef({1, 3, 4, 4}, false, input_data), + TestInputDef({1, 3, 4, 4}, false, input_data)}, + ExpectedEPNodeAssignment::All, 13); +} + +#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) +} // namespace test +} // namespace onnxruntime +#endif // !defined(ORT_MINIMAL_BUILD) diff --git a/onnxruntime/test/providers/qnn/pool_op_test.cpp b/onnxruntime/test/providers/qnn/pool_op_test.cpp index c6e8a032ca7f4..1c73eae1468ff 100644 --- a/onnxruntime/test/providers/qnn/pool_op_test.cpp +++ b/onnxruntime/test/providers/qnn/pool_op_test.cpp @@ -41,7 +41,7 @@ GetTestQDQModelFn BuildPoolQDQTestCase(const std::string& op_type, std::vector>& output_qparams) { // input -> Q -> DQ -> NodeArg* input = MakeTestInput(builder, input_def); - QuantParams input_qparams = GetTestInputQuantParams(input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); // MaxPool diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.cc b/onnxruntime/test/providers/qnn/qnn_test_utils.cc index feacdc54226b6..548f80675a622 100644 --- a/onnxruntime/test/providers/qnn/qnn_test_utils.cc +++ b/onnxruntime/test/providers/qnn/qnn_test_utils.cc @@ -21,19 +21,21 @@ std::vector GetFloatDataInRange(float min_val, float max_val, size_t num_ return {}; } + if (num_elems == 1) { + return {min_val}; + } + std::vector data; data.reserve(num_elems); - const float step_size = (max_val - min_val) / static_cast(num_elems); + const float step_size = (max_val - min_val) / static_cast(num_elems - 1); float val = min_val; for (size_t i = 0; i < num_elems; i++) { data.push_back(val); val += step_size; } - // Try to ensure that 0.0 and max_val are also included in the array. - // If num_elems is less than 3, then not all of min_val, 0, and max_val will be present. - data[num_elems / 2] = 0.0f; + // Ensure that max_val is included exactly (due to rounding from adding step sizes). data[num_elems - 1] = max_val; return data; diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.h b/onnxruntime/test/providers/qnn/qnn_test_utils.h index dd5e6fc23670a..1b0b85319918f 100644 --- a/onnxruntime/test/providers/qnn/qnn_test_utils.h +++ b/onnxruntime/test/providers/qnn/qnn_test_utils.h @@ -199,7 +199,7 @@ struct TestInputDef { std::pair range_override_; }; -template +template inline QuantParams GetTestInputQuantParams(const TestInputDef& input_def) { const std::pair frange = input_def.GetRange(); return QuantParams::Compute(frange.first, frange.second); @@ -239,10 +239,10 @@ void InferenceModel(const std::string& model_data, const char* log_id, * \param fp32_abs_err Small tolerance used for floating-point comparisons. * \param log_severity The logger's severity setting. */ -template +template inline void TestQDQModelAccuracy(const GetTestModelFn& f32_model_fn, const GetTestQDQModelFn& qdq_model_fn, const ProviderOptions& qnn_options, int opset_version, - ExpectedEPNodeAssignment expected_ep_assignment, float fp32_abs_err, + ExpectedEPNodeAssignment expected_ep_assignment, float fp32_abs_err = 1e-4f, logging::Severity log_severity = logging::Severity::kERROR) { // Add kMSDomain to cover contrib op like Gelu const std::unordered_map domain_to_version = {{"", opset_version}, {kMSDomain, 1}}; @@ -314,7 +314,8 @@ inline void TestQDQModelAccuracy(const GetTestModelFn& f32_model_fn, const GetTe // limit the error message count in case test with large data failed size_t max_error_count = 10; - int error_count = 0; + size_t error_count = 0; + // Compare accuracy of QDQ results with float model. // QNN EP must be at least as accurate as CPU EP when running the QDQ model. for (size_t i = 0; i < num_outputs; i++) { @@ -433,6 +434,79 @@ inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef manual quantization (int32) => DQ => final float bias NodeArg* MakeTestQDQBiasInput(ModelTestBuilder& builder, const TestInputDef& bias_def, float bias_scale); +/** + * Returns a function that builds a model with a single operator with N inputs of the same element type. + * + * \param op_type The operator to instantiate. + * \param input_defs List of input definitions. + * \param attrs List of operator attributes. + * \param op_domain The operator's domain. Defaults to the ONNX domain (i.e., ""). + * \returns A model building function. + */ +template +inline GetTestModelFn BuildOpTestCase(const std::string& op_type, + const std::vector>& input_defs, + const std::vector& attrs, + const std::string& op_domain = kOnnxDomain) { + return [op_type, input_defs, attrs, op_domain](ModelTestBuilder& builder) { + std::vector op_inputs; + op_inputs.reserve(input_defs.size()); + + for (const auto& input_def : input_defs) { + NodeArg* input = MakeTestInput(builder, input_def); + op_inputs.push_back(input); + } + + auto* output = builder.MakeOutput(); + Node& onnx_node = builder.AddNode(op_type, op_inputs, {output}, op_domain); + + for (const auto& attr : attrs) { + onnx_node.AddAttributeProto(attr); + } + }; +} + +/** + * Returns a function that builds a model with a single QDQ operator with N inputs of the same element type. + * + * \param op_type The operator to instantiate. + * \param input_defs List of input definitions. + * \param attrs List of operator attributes. + * \param op_domain The operator's domain. Defaults to the ONNX domain (i.e., ""). + * \returns A model building function. + */ +template +inline GetTestQDQModelFn BuildQDQOpTestCase(const std::string& op_type, + const std::vector>& input_defs, + const std::vector& attrs, + const std::string& op_domain = kOnnxDomain) { + return [op_type, input_defs, attrs, op_domain](ModelTestBuilder& builder, + std::vector>& output_qparams) { + std::vector op_inputs; + op_inputs.reserve(input_defs.size()); + + for (const auto& input_def : input_defs) { + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_after_qdq = AddQDQNodePair(builder, input, input_qparams.scale, + input_qparams.zero_point); + op_inputs.push_back(input_after_qdq); + } + + // Op -> op_output + auto* op_output = builder.MakeIntermediate(); + Node& onnx_node = builder.AddNode(op_type, op_inputs, {op_output}, op_domain); + + for (const auto& attr : attrs) { + onnx_node.AddAttributeProto(attr); + } + + // op_output -> Q -> DQ -> output + AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, + output_qparams[0].zero_point); + }; +} + /** * Runs a test model on the QNN EP. Checks the graph node assignment, and that inference * outputs for QNN and CPU match. diff --git a/onnxruntime/test/providers/qnn/reduce_op_test.cc b/onnxruntime/test/providers/qnn/reduce_op_test.cc index 755f6b094df07..c3c2b578a1bd0 100644 --- a/onnxruntime/test/providers/qnn/reduce_op_test.cc +++ b/onnxruntime/test/providers/qnn/reduce_op_test.cc @@ -366,7 +366,7 @@ static void RunReduceOpQDQTest(const std::string& op_type, bool keepdims, int opset, ExpectedEPNodeAssignment expected_ep_assignment, - float fp32_abs_err = 1e-5f) { + float fp32_abs_err = 1e-4f) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; diff --git a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc index 4e7702bd84270..49122c9dacdb1 100644 --- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc @@ -18,149 +18,16 @@ namespace onnxruntime { namespace test { #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) -using UInt8Limits = std::numeric_limits; - -template -static GetTestModelFn BuildUnaryOpTestCase(const std::string& op_type, const TestInputDef& input0_def, - const std::vector& attrs, - const std::string& domain = kOnnxDomain) { - return [op_type, input0_def, attrs, domain](ModelTestBuilder& builder) { - NodeArg* input0 = MakeTestInput(builder, input0_def); - - auto* output = builder.MakeOutput(); - auto& op_node = builder.AddNode(op_type, {input0}, {output}, domain); - for (const auto& attr : attrs) { - op_node.AddAttributeProto(attr); - } - }; -} - -// Creates the graph: -// _______________________ -// | | -// input_u8 -> DQ -> | SimpleOp | -> Q -> output_u8 -// |_______________________| -// -// Currently used to test QNN EP. -template -GetTestQDQModelFn BuildQDQUnaryOpTestCase(const TestInputDef& input_def, - const std::string& op_type, - const std::vector& attrs, - const std::string& domain = kOnnxDomain) { - return [input_def, op_type, attrs, domain](ModelTestBuilder& builder, - std::vector>& output_qparams) { - auto* input = MakeTestInput(builder, input_def); - QuantParams input_qparams = GetTestInputQuantParams(input_def); - auto* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); - - auto* op_output = builder.MakeIntermediate(); - auto& op_node = builder.AddNode(op_type, {input_qdq}, {op_output}, domain); - - for (const auto& attr : attrs) { - op_node.AddAttributeProto(attr); - } - - // op_output -> Q -> DQ -> output - AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, output_qparams[0].zero_point); - }; -} - -/** - * Runs an Simple Op model on the QNN HTP backend. Checks the graph node assignment, and that inference - * outputs for QNN and CPU match. - * - * \param input_shape The input's shape. - * \param test_description Description of the test for error reporting. - * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None). - * \param num_modes_in_graph The number of expected nodes in the graph. - */ -template -static void RunQDQUnaryOpTest(const TestInputDef& input_def, const std::string& op_type, - const std::vector& attrs, - int opset_version, - ExpectedEPNodeAssignment expected_ep_assignment, - const std::string& domain = kOnnxDomain) { - ProviderOptions provider_options; -#if defined(_WIN32) - provider_options["backend_path"] = "QnnHtp.dll"; -#else - provider_options["backend_path"] = "libQnnHtp.so"; -#endif - - // Runs model with DQ-> Op -> Q and compares the outputs of the CPU and QNN EPs. - TestQDQModelAccuracy(BuildUnaryOpTestCase(op_type, input_def, attrs, domain), - BuildQDQUnaryOpTestCase(input_def, op_type, attrs, domain), - provider_options, - opset_version, - expected_ep_assignment, - 1e-5f); -} - -// TODO: share with other op tests -// Creates the graph with two inputs and attributes -template -static GetTestModelFn BuildOpTestCase(const std::string& op_type, - const TestInputDef& input0_def, - const TestInputDef& input1_def, - const std::vector& attrs) { - return [op_type, input0_def, input1_def, attrs](ModelTestBuilder& builder) { - NodeArg* input0 = MakeTestInput(builder, input0_def); - NodeArg* input1 = MakeTestInput(builder, input1_def); - - auto* output = builder.MakeOutput(); - Node& onnx_node = builder.AddNode(op_type, {input0, input1}, {output}); - - for (const auto& attr : attrs) { - onnx_node.AddAttributeProto(attr); - } - }; -} - -// Creates the graph with two inputs and attributes -// _______________________ -// | | -// input0_u8 -> DQ -> | SimpleOp | -> Q -> output_u8 -// input1_u8 -> DQ -> |_______________________| -// -// Currently used to test QNN EP. -template -static GetTestQDQModelFn BuildQDQOpTestCase(const std::string& op_type, - const TestInputDef& input0_def, - const TestInputDef& input1_def, - const std::vector& attrs) { - return [op_type, input0_def, input1_def, attrs](ModelTestBuilder& builder, - std::vector>& output_qparams) { - NodeArg* input0 = MakeTestInput(builder, input0_def); - NodeArg* input1 = MakeTestInput(builder, input1_def); - - // input -> Q -> DQ -> Op - QuantParams input0_qparams = GetTestInputQuantParams(input0_def); - auto* qdq0_output = AddQDQNodePair(builder, input0, input0_qparams.scale, input0_qparams.zero_point); - - QuantParams input1_qparams = GetTestInputQuantParams(input1_def); - auto* qdq1_output = AddQDQNodePair(builder, input1, input1_qparams.scale, input1_qparams.zero_point); - - // Op -> op_output - auto* op_output = builder.MakeIntermediate(); - Node& onnx_node = builder.AddNode(op_type, {qdq0_output, qdq1_output}, {op_output}); - - for (const auto& attr : attrs) { - onnx_node.AddAttributeProto(attr); - } - - // op_output -> Q -> DQ -> output - AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, - output_qparams[0].zero_point); - }; -} - +// Tests the accuracy of a QDQ model on QNN EP by comparing to CPU EP, which runs both the fp32 model +// and the QDQ model. template static void RunQDQOpTest(const std::string& op_type, - const TestInputDef& input0_def, - const TestInputDef& input1_def, + const std::vector>& input_defs, const std::vector& attrs, int opset_version, - ExpectedEPNodeAssignment expected_ep_assignment) { + ExpectedEPNodeAssignment expected_ep_assignment, + const std::string& op_domain = kOnnxDomain, + float fp32_abs_err = 1e-4f) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -168,21 +35,22 @@ static void RunQDQOpTest(const std::string& op_type, provider_options["backend_path"] = "libQnnHtp.so"; #endif - TestQDQModelAccuracy(BuildOpTestCase(op_type, input0_def, input1_def, attrs), - BuildQDQOpTestCase(op_type, input0_def, input1_def, attrs), + TestQDQModelAccuracy(BuildOpTestCase(op_type, input_defs, attrs, op_domain), + BuildQDQOpTestCase(op_type, input_defs, attrs, op_domain), provider_options, opset_version, expected_ep_assignment, - 1e-5f); + fp32_abs_err); } +// Runs a non-QDQ model on HTP and compares output to CPU EP. template static void RunOpTest(const std::string& op_type, - const TestInputDef& input0_def, - const TestInputDef& input1_def, + const std::vector>& input_defs, const std::vector& attrs, int opset_version, - ExpectedEPNodeAssignment expected_ep_assignment) { + ExpectedEPNodeAssignment expected_ep_assignment, + const std::string& op_domain = kOnnxDomain) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -191,151 +59,307 @@ static void RunOpTest(const std::string& op_type, #endif // Runs model with a Q/DQ binary op and compares the outputs of the CPU and QNN EPs. - RunQnnModelTest(BuildOpTestCase(op_type, input0_def, input1_def, attrs), + RunQnnModelTest(BuildOpTestCase(op_type, input_defs, attrs, op_domain), provider_options, opset_version, expected_ep_assignment); } +// Test the accuracy of QDQ Sigmoid. +TEST_F(QnnHTPBackendTests, UnaryOp_Sigmoid) { + RunQDQOpTest("Sigmoid", + {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, + {}, + 13, + ExpectedEPNodeAssignment::All); +} + +// Test the accuracy of QDQ Tanh. +TEST_F(QnnHTPBackendTests, UnaryOp_Tanh) { + RunQDQOpTest("Tanh", + {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, + {}, + 13, + ExpectedEPNodeAssignment::All); +} + // Check that QNN compiles DQ -> Gelu -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Gelu) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), // Input range [-10.0, 10.0f] - "Gelu", - {}, - 11, - ExpectedEPNodeAssignment::All, - kMSDomain); // GeLu is a contrib op. + RunQDQOpTest("Gelu", + {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, + {}, + 11, + ExpectedEPNodeAssignment::All, + kMSDomain); // GeLu is a contrib op. } // Check that QNN compiles DQ -> Elu -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Elu) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), // Input range [-10.0, 10.0f] - "Elu", - {}, - 11, - ExpectedEPNodeAssignment::All); + RunQDQOpTest("Elu", + {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, + {}, + 11, + ExpectedEPNodeAssignment::All); +} + +// Tests accuracy of QDQ Relu +// TODO: Relu does not set negative values to zero! +// Could be due to ORT's ReluQuantFusion! +// +// Inaccuracy detected for output 'output', element 0. +// Output quant params: scale=0.039215687662363052, zero_point=0. +// Expected val: 0 +// QNN QDQ val: -10 (err 10) +// CPU QDQ val: 0 (err 0) +TEST_F(QnnHTPBackendTests, DISABLED_UnaryOp_Relu) { + RunQDQOpTest("Relu", + {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, + {}, + 14, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> HardSwish -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_HardSwish) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), // Input range [-10.0, 10.0f] - "HardSwish", - {}, - 14, - ExpectedEPNodeAssignment::All); + RunQDQOpTest("HardSwish", + {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, + {}, + 14, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Atan -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Atan) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), // Input range [-10.0, 10.0f] - "Atan", - {}, - 14, - ExpectedEPNodeAssignment::All); + RunQDQOpTest("Atan", + {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, + {}, + 14, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Asin -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Asin) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -0.5f, 0.5f), // input range -0.5 to 0.5 - "Asin", {}, - 13, ExpectedEPNodeAssignment::All); + RunQDQOpTest("Asin", + {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-0.5, 0.5, 6))}, + {}, + 13, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Sign -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Sign) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), - "Sign", {}, - 13, ExpectedEPNodeAssignment::All); + RunQDQOpTest("Sign", + {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, + {}, + 13, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Sin -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Sin) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -3.14159f, 3.14159f), - "Sin", {}, - 11, ExpectedEPNodeAssignment::All); + RunQDQOpTest("Sin", + {TestInputDef({1, 2, 3}, false, -3.14159f, 3.14159f)}, + {}, + 11, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Cos -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Cos) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, {-3.14159f, -1.5f, -0.5f, 0.0f, 1.5, 3.14159f}), - "Cos", {}, - 11, ExpectedEPNodeAssignment::All); + RunQDQOpTest("Cos", + {TestInputDef({1, 2, 3}, false, {-3.14159f, -1.5f, -0.5f, 0.0f, 1.5, 3.14159f})}, + {}, + 11, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Cos -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Cos_Inaccurate) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, {-3.14159f, -1.88436f, -0.542863f, 0.0f, 1.05622f, 3.14159f}), - "Cos", {}, - 11, ExpectedEPNodeAssignment::All); + RunQDQOpTest("Cos", + {TestInputDef({1, 2, 3}, false, {-3.14159f, -1.88436f, -0.542863f, 0.0f, 1.05622f, 3.14159f})}, + {}, + 11, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Log -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Log) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, {3.14159f, 100.88436f, 10.542863f, 9.1f, 1.05622f, 3.14159f}), - "Log", {}, - 11, ExpectedEPNodeAssignment::All); + RunQDQOpTest("Log", + {TestInputDef({1, 2, 3}, false, {3.14159f, 100.88436f, 10.542863f, 9.1f, 1.05622f, 3.14159f})}, + {}, + 11, ExpectedEPNodeAssignment::All); +} + +// Test accuracy of 8-bit QDQ Exp +TEST_F(QnnHTPBackendTests, UnaryOp_Exp) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 6); + RunQDQOpTest("Exp", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, + 13, + ExpectedEPNodeAssignment::All); +} + +// Test accuracy of 8-bit QDQ Sqrt +TEST_F(QnnHTPBackendTests, UnaryOp_Sqrt) { + std::vector input_data = GetFloatDataInRange(0.0f, 20.0f, 9); + RunQDQOpTest("Sqrt", + {TestInputDef({1, 3, 3}, false, input_data)}, + {}, + 13, + ExpectedEPNodeAssignment::All); +} + +// Test accuracy of 8-bit QDQ Neg +TEST_F(QnnHTPBackendTests, UnaryOp_Neg) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 6); + RunQDQOpTest("Neg", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, + 13, + ExpectedEPNodeAssignment::All); +} + +// Test Not operator on HTP backend. +TEST_F(QnnHTPBackendTests, UnaryOp_Not) { + RunOpTest("Not", + {TestInputDef({1, 4}, false, {false, false, true, true})}, + {}, + 17, + ExpectedEPNodeAssignment::All); +} + +// Test accuracy of 8-bit QDQ Round +TEST_F(QnnHTPBackendTests, UnaryOp_Round) { + std::vector input_data = GetFloatDataInRange(-9.0f, 9.0f, 6); + RunQDQOpTest("Round", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, + 11, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that the default axis (-1) for SoftMax opset 13 works. TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_DefaultAxis) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -5.0f, 5.0f), - "Softmax", - {}, // Uses default axis of -1 for opset 13 - 13, ExpectedEPNodeAssignment::All); + RunQDQOpTest("Softmax", + {TestInputDef({1, 2, 3}, false, -5.0f, 5.0f)}, + {}, // Uses default axis of -1 for opset 13 + 13, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that an axis != -1 is not supported. TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_UnsupportedAxis) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -5.0f, 5.0f), - "Softmax", - {utils::MakeAttribute("axis", static_cast(1))}, - 13, ExpectedEPNodeAssignment::None); + RunQDQOpTest("Softmax", + {TestInputDef({1, 2, 3}, false, -5.0f, 5.0f)}, + {utils::MakeAttribute("axis", static_cast(1))}, + 13, + ExpectedEPNodeAssignment::None); } // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that the default axis (1) for SoftMax opset < 13 does not work. TEST_F(QnnHTPBackendTests, UnaryOp_Softmax11_DefaultAxisFails) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -5.0f, 5.0f), - "Softmax", - {}, // Uses default axis of 1 for opset < 13. - 11, ExpectedEPNodeAssignment::None); + RunQDQOpTest("Softmax", + {TestInputDef({1, 2, 3}, false, -5.0f, 5.0f)}, + {}, // Uses default axis of 1 for opset < 13. + 11, + ExpectedEPNodeAssignment::None); } // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that setting an axis value of -1 works for Softmax opset < 13. TEST_F(QnnHTPBackendTests, UnaryOp_Softmax11_SetValidAxis) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -5.0f, 5.0f), - "Softmax", - {utils::MakeAttribute("axis", static_cast(-1))}, - 11, ExpectedEPNodeAssignment::All); + RunQDQOpTest("Softmax", + {TestInputDef({1, 2, 3}, false, -5.0f, 5.0f)}, + {utils::MakeAttribute("axis", static_cast(-1))}, + 11, + ExpectedEPNodeAssignment::All); +} + +// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit. +// Test that the default axis (-1) for LogSoftmax opset 13 works. +TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_DefaultAxis) { + std::vector input_data = GetFloatDataInRange(-5.0f, 5.0f, 6); + RunQDQOpTest("LogSoftmax", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, // Uses default axis of -1 for opset 13 + 13, + ExpectedEPNodeAssignment::All); +} + +// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit. +// Test that an axis != -1 is not supported. +TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_UnsupportedAxis) { + std::vector input_data = GetFloatDataInRange(-5.0f, 5.0f, 6); + RunQDQOpTest("LogSoftmax", + {TestInputDef({1, 2, 3}, false, input_data)}, + {utils::MakeAttribute("axis", static_cast(1))}, + 13, + ExpectedEPNodeAssignment::None); +} + +// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit. +// Test that the default axis (1) for LogSoftmax opset < 13 does not work. +TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax11_DefaultAxisFails) { + std::vector input_data = GetFloatDataInRange(-5.0f, 5.0f, 6); + RunQDQOpTest("LogSoftmax", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, // Uses default axis of 1 for opset < 13. + 11, + ExpectedEPNodeAssignment::None); +} + +// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit. +// Test that setting an axis value of -1 works for LogSoftmax opset < 13. +TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax11_SetValidAxis) { + std::vector input_data = GetFloatDataInRange(-5.0f, 5.0f, 6); + RunQDQOpTest("LogSoftmax", + {TestInputDef({1, 2, 3}, false, input_data)}, + {utils::MakeAttribute("axis", static_cast(-1))}, + 11, + ExpectedEPNodeAssignment::All); } // Test QDQ Abs op. TEST_F(QnnHTPBackendTests, UnaryOp_Abs) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), - "Abs", - {}, - 13, ExpectedEPNodeAssignment::All); + RunQDQOpTest("Abs", + {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, + {}, + 13, + ExpectedEPNodeAssignment::All); } // Test QDQ Ceil op. TEST_F(QnnHTPBackendTests, UnaryOp_Ceil) { - RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -100.0f, 100.0f), - "Ceil", - {}, - 13, ExpectedEPNodeAssignment::All); + const std::vector input_data = GetFloatDataInRange(-12.0f, 12.0f, 6); + RunQDQOpTest("Ceil", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, + 13, + ExpectedEPNodeAssignment::All); +} + +// Test QDQ Floor op. +TEST_F(QnnHTPBackendTests, UnaryOp_Floor) { + const std::vector input_data = GetFloatDataInRange(-12.0f, 12.0f, 6); + RunQDQOpTest("Floor", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, + 13, + ExpectedEPNodeAssignment::All); } // Test QDQ DepthToSpace. @@ -348,11 +372,12 @@ TEST_F(QnnHTPBackendTests, DepthToSpaceOp_CRD) { 21., 22., 23., 27., 28., 29., 30., 31., 32.}; - RunQDQUnaryOpTest(TestInputDef({1, 4, 2, 3}, false, X), - "DepthToSpace", - {utils::MakeAttribute("blocksize", static_cast(2)), - utils::MakeAttribute("mode", "CRD")}, - 11, ExpectedEPNodeAssignment::All); + RunQDQOpTest("DepthToSpace", + {TestInputDef({1, 4, 2, 3}, false, X)}, + {utils::MakeAttribute("blocksize", static_cast(2)), + utils::MakeAttribute("mode", "CRD")}, + 11, + ExpectedEPNodeAssignment::All); } // Test QDQ DepthToSpace. @@ -365,11 +390,12 @@ TEST_F(QnnHTPBackendTests, DepthToSpaceOp_DCR) { 21., 22., 23., 27., 28., 29., 30., 31., 32.}; - RunQDQUnaryOpTest(TestInputDef({1, 4, 2, 3}, false, X), - "DepthToSpace", - {utils::MakeAttribute("blocksize", static_cast(2)), - utils::MakeAttribute("mode", "DCR")}, - 11, ExpectedEPNodeAssignment::All); + RunQDQOpTest("DepthToSpace", + {TestInputDef({1, 4, 2, 3}, false, X)}, + {utils::MakeAttribute("blocksize", static_cast(2)), + utils::MakeAttribute("mode", "DCR")}, + 11, + ExpectedEPNodeAssignment::All); } // Test QDQ SpaceToDepth. @@ -379,10 +405,11 @@ TEST_F(QnnHTPBackendTests, SpaceToDepthOp) { 2.0f, 2.1f, 2.2f, 2.3f, 3.0f, 3.1f, 3.2f, 3.3f}; - RunQDQUnaryOpTest(TestInputDef({1, 2, 2, 4}, false, X), - "SpaceToDepth", - {utils::MakeAttribute("blocksize", static_cast(2))}, - 11, ExpectedEPNodeAssignment::All); + RunQDQOpTest("SpaceToDepth", + {TestInputDef({1, 2, 2, 4}, false, X)}, + {utils::MakeAttribute("blocksize", static_cast(2))}, + 11, + ExpectedEPNodeAssignment::All); } // Run QDQ model on HTP twice @@ -404,23 +431,21 @@ TEST_F(QnnHTPBackendTests, ContextBinaryCacheTest) { // Runs model with DQ-> Atan-> Q and compares the outputs of the CPU and QNN EPs. // 1st run will generate the Qnn context cache binary file - TestQDQModelAccuracy(BuildUnaryOpTestCase(op_type, input_def, {}), - BuildQDQUnaryOpTestCase(input_def, op_type, {}), + TestQDQModelAccuracy(BuildOpTestCase(op_type, {input_def}, {}), + BuildQDQOpTestCase(op_type, {input_def}, {}), provider_options, 14, - ExpectedEPNodeAssignment::All, - 1e-5f); + ExpectedEPNodeAssignment::All); // Make sure the Qnn context cache binary file is generated EXPECT_TRUE(std::filesystem::exists(context_binary_file.c_str())); // 2nd run will load and run from Qnn context cache binary file - TestQDQModelAccuracy(BuildUnaryOpTestCase(op_type, input_def, {}), - BuildQDQUnaryOpTestCase(input_def, op_type, {}), + TestQDQModelAccuracy(BuildOpTestCase(op_type, {input_def}, {}), + BuildQDQOpTestCase(op_type, {input_def}, {}), provider_options, 14, - ExpectedEPNodeAssignment::All, - 1e-5f); + ExpectedEPNodeAssignment::All); } TEST_F(QnnHTPBackendTests, QuantAccuracyTest) { @@ -439,7 +464,7 @@ TEST_F(QnnHTPBackendTests, QuantAccuracyTest) { // input -> Q -> Transpose -> DQ -> output NodeArg* input0 = MakeTestInput(builder, input0_def); - QuantParams qparams = GetTestInputQuantParams(input0_def); + QuantParams qparams = GetTestInputQuantParams(input0_def); auto* quant_input = builder.MakeIntermediate(); builder.AddQuantizeLinearNode(input0, qparams.scale, qparams.zero_point, quant_input); @@ -462,8 +487,8 @@ TEST_F(QnnHTPBackendTests, QuantAccuracyTest) { // Test QDQ Add TEST_F(QnnHTPBackendTests, BinaryOp_Add4D) { RunQDQOpTest("Add", - TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), - TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + {TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f)}, {}, 17, ExpectedEPNodeAssignment::All); @@ -472,8 +497,8 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Add4D) { // Test QDQ Sub TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D) { RunQDQOpTest("Sub", - TestInputDef({1, 3, 8, 8}, false, -10.0f, 10.0f), - TestInputDef({1, 3, 8, 8}, false, -10.0f, 10.0f), + {TestInputDef({1, 3, 8, 8}, false, -10.0f, 10.0f), + TestInputDef({1, 3, 8, 8}, false, -10.0f, 10.0f)}, {}, 17, ExpectedEPNodeAssignment::All); @@ -481,8 +506,8 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D) { TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D_LargeInputs) { RunQDQOpTest("Sub", - TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + {TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f)}, {}, 17, ExpectedEPNodeAssignment::All); @@ -490,17 +515,65 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D_LargeInputs) { TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D_Broadcast) { RunQDQOpTest("Sub", - TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - TestInputDef({3, 1, 1}, true, {1.0f, 0.5f, -0.3f}), + {TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + TestInputDef({3, 1, 1}, true, {1.0f, 0.5f, -0.3f})}, {}, 17, ExpectedEPNodeAssignment::All); } +// Test accuracy of QDQ Pow +#if defined(__linux__) +// TODO: This fails on Linux (HTP emulation). Works on Windows ARM64. +// Inaccuracy detected for output 'output', element 0. +// Output quant params: scale=0.051073111593723297, zero_point=2. +// Expected val: 0.0099999997764825821 +// QNN QDQ val: 12.921497344970703 (err 12.911497116088867) +// CPU QDQ val: -0.10214622318744659 (err 0.11214622110128403) +TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Pow) { +#else +TEST_F(QnnHTPBackendTests, BinaryOp_Pow) { +#endif + std::vector bases_input = {-10.0f, -8.0f, -6.0f, 1.0f, 2.0f, 3.0f, 5.5f, 10.0f}; + std::vector exponents_input = {-2.0f, -1.0f, 0.0f, 0.5f, 1.0f, 2.0f, 1.5f, 0.2f}; + RunQDQOpTest("Pow", + {TestInputDef({1, 2, 2, 2}, false, bases_input), + TestInputDef({1, 2, 2, 2}, false, exponents_input)}, + {}, + 15, + ExpectedEPNodeAssignment::All); +} + +// Test accuracy of QDQ PRelu with dynamic slopes. +TEST_F(QnnHTPBackendTests, BinaryOp_PRelu_DynamicSlopes) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 8); + std::vector slopes_data = GetFloatDataInRange(-1.0f, 1.0f, 8); + RunQDQOpTest("PRelu", + {TestInputDef({1, 2, 2, 2}, false, input_data), + TestInputDef({1, 2, 2, 2}, false, slopes_data)}, + {}, + 16, + ExpectedEPNodeAssignment::All); +} + +// Test accuracy of QDQ PRelu with static slope weights. +TEST_F(QnnHTPBackendTests, BinaryOp_PRelu_StaticSlopes) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 8); + std::vector slopes_data = GetFloatDataInRange(-1.0f, 1.0f, 8); + RunQDQOpTest("PRelu", + {TestInputDef({1, 2, 2, 2}, false, input_data), + TestInputDef({1, 2, 2, 2}, true, slopes_data)}, + {}, + 16, + ExpectedEPNodeAssignment::All); +} + TEST_F(QnnHTPBackendTests, BinaryOp_Div4D_SmallInputs) { + std::vector input0_data = {-10.0f, -8.0f, -1.0f, 0.0f, 1.0f, 2.1f, 8.0f, 10.0f}; + std::vector input1_data = {5.0f, 4.0f, 1.0f, 1.0f, 1.0f, 4.0f, 4.0f, 5.0f}; RunQDQOpTest("Div", - TestInputDef({1, 2, 2, 2}, false, {-10.0f, -8.0f, -1.0f, 0.0f, 1.0f, 2.1f, 8.0f, 10.0f}), - TestInputDef({1, 2, 2, 2}, false, {5.0f, 4.0f, 1.0f, 1.0f, 1.0f, 4.0f, 4.0f, 5.0f}), + {TestInputDef({1, 2, 2, 2}, false, input0_data), + TestInputDef({1, 2, 2, 2}, false, input1_data)}, {}, 17, ExpectedEPNodeAssignment::All); @@ -514,8 +587,8 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Div4D_SmallInputs) { // CPU QDQ val: -516716.71875 (err 238759.40625) TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Div4D_LargeInputs) { RunQDQOpTest("Div", - TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + {TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f)}, {}, 17, ExpectedEPNodeAssignment::All); @@ -523,8 +596,8 @@ TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Div4D_LargeInputs) { TEST_F(QnnHTPBackendTests, BinaryOp_Div4D_Broadcast) { RunQDQOpTest("Div", - TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - TestInputDef({3, 1, 1}, true, {1.0f, 0.5f, -0.3f}), + {TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + TestInputDef({3, 1, 1}, true, {1.0f, 0.5f, -0.3f})}, {}, 17, ExpectedEPNodeAssignment::All); @@ -532,29 +605,30 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Div4D_Broadcast) { // Test QDQ Mul TEST_F(QnnHTPBackendTests, BinaryOp_Mul4D) { + std::vector input_data = GetFloatDataInRange(-10.0, 10.0f, 8); RunQDQOpTest("Mul", - TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), - TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + {TestInputDef({1, 2, 2, 2}, false, input_data), + TestInputDef({1, 2, 2, 2}, false, input_data)}, {}, 17, ExpectedEPNodeAssignment::All); } // Test And -TEST_F(QnnCPUBackendTests, BinaryOp_And4D) { +TEST_F(QnnHTPBackendTests, BinaryOp_And4D) { RunOpTest("And", - TestInputDef({1, 4}, false, {false, false, true, true}), - TestInputDef({1, 4}, false, {false, true, false, true}), + {TestInputDef({1, 4}, false, {false, false, true, true}), + TestInputDef({1, 4}, false, {false, true, false, true})}, {}, 17, ExpectedEPNodeAssignment::All); } // Test that Or is not yet supported on CPU backend. -TEST_F(QnnCPUBackendTests, BinaryOp_HTP_Or_Unsupported) { +TEST_F(QnnHTPBackendTests, BinaryOp_HTP_Or_Unsupported) { RunOpTest("Or", - TestInputDef({1, 4}, false, {false, false, true, true}), - TestInputDef({1, 4}, false, {false, true, false, true}), + {TestInputDef({1, 4}, false, {false, false, true, true}), + TestInputDef({1, 4}, false, {false, true, false, true})}, {}, 17, ExpectedEPNodeAssignment::None); @@ -563,8 +637,8 @@ TEST_F(QnnCPUBackendTests, BinaryOp_HTP_Or_Unsupported) { // Test QDQ GridSample with bilinear TEST_F(QnnHTPBackendTests, GridSample_Bilinear) { RunQDQOpTest("GridSample", - TestInputDef({1, 1, 3, 2}, false, -10.0f, 10.0f), - TestInputDef({1, 2, 4, 2}, false, -10.0f, 10.0f), + {TestInputDef({1, 1, 3, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 6)), + TestInputDef({1, 2, 4, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 16))}, {utils::MakeAttribute("align_corners", static_cast(0)), utils::MakeAttribute("mode", "bilinear"), utils::MakeAttribute("padding_mode", "zeros")}, @@ -575,8 +649,8 @@ TEST_F(QnnHTPBackendTests, GridSample_Bilinear) { // Test QDQ GridSample with align corners TEST_F(QnnHTPBackendTests, GridSample_AlignCorners) { RunQDQOpTest("GridSample", - TestInputDef({1, 1, 3, 2}, false, -10.0f, 10.0f), - TestInputDef({1, 2, 4, 2}, false, -10.0f, 10.0f), + {TestInputDef({1, 1, 3, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 6)), + TestInputDef({1, 2, 4, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 16))}, {utils::MakeAttribute("align_corners", static_cast(1)), utils::MakeAttribute("mode", "bilinear"), utils::MakeAttribute("padding_mode", "zeros")}, @@ -592,8 +666,8 @@ TEST_F(QnnHTPBackendTests, GridSample_AlignCorners) { // CPU QDQ val: 3.3850328922271729 (err 0.022981882095336914) TEST_F(QnnHTPBackendTests, DISABLED_GridSample_BorderPadding) { RunQDQOpTest("GridSample", - TestInputDef({1, 1, 3, 2}, false, -10.0f, 10.0f), - TestInputDef({1, 2, 4, 2}, false, -10.0f, 10.0f), + {TestInputDef({1, 1, 3, 2}, false, -10.0f, 10.0f), + TestInputDef({1, 2, 4, 2}, false, -10.0f, 10.0f)}, {utils::MakeAttribute("mode", "bilinear"), utils::MakeAttribute("padding_mode", "border")}, 17, @@ -603,8 +677,8 @@ TEST_F(QnnHTPBackendTests, DISABLED_GridSample_BorderPadding) { // Test QDQ GridSample with nearest mode TEST_F(QnnHTPBackendTests, GridSample_Nearest) { RunQDQOpTest("GridSample", - TestInputDef({1, 1, 3, 2}, false, -10.0f, 10.0f), - TestInputDef({1, 2, 4, 2}, false, -10.0f, 10.0f), + {TestInputDef({1, 1, 3, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 6)), + TestInputDef({1, 2, 4, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 16))}, {utils::MakeAttribute("mode", "nearest")}, 17, ExpectedEPNodeAssignment::All); @@ -618,13 +692,33 @@ TEST_F(QnnHTPBackendTests, GridSample_Nearest) { // CPU QDQ val: 3.2036216259002686 (err 0.0092642307281494141) TEST_F(QnnHTPBackendTests, DISABLED_GridSample_ReflectionPaddingMode) { RunQDQOpTest("GridSample", - TestInputDef({1, 1, 3, 2}, false, -10.0f, 10.0f), - TestInputDef({1, 2, 4, 2}, false, -10.0f, 10.0f), + {TestInputDef({1, 1, 3, 2}, false, -10.0f, 10.0f), + TestInputDef({1, 2, 4, 2}, false, -10.0f, 10.0f)}, {utils::MakeAttribute("padding_mode", "reflection")}, 17, ExpectedEPNodeAssignment::All); } +// Test QDQ Concat: 3 inputs concatenated at the last axis. +TEST_F(QnnHTPBackendTests, VariadicOp_Concat_3Inputs_LastAxis) { + RunQDQOpTest("Concat", + {TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + TestInputDef({1, 2, 2, 3}, false, -1.0f, 1.0f), + TestInputDef({1, 2, 2, 1}, false, -2.0f, 2.0f)}, + {utils::MakeAttribute("axis", static_cast(-1))}, + 13, + ExpectedEPNodeAssignment::All); +} + +// Test QDQ Concat: 2 inputs concatenated at the second axis. +TEST_F(QnnHTPBackendTests, VariadicOp_Concat_2Inputs_2ndAxis) { + RunQDQOpTest("Concat", + {TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + TestInputDef({1, 3, 2, 2}, false, -2.0f, 2.0f)}, + {utils::MakeAttribute("axis", static_cast(1))}, + 13, + ExpectedEPNodeAssignment::All); +} #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) } // namespace test diff --git a/onnxruntime/test/providers/qnn/slice_htp_test.cc b/onnxruntime/test/providers/qnn/slice_htp_test.cc index 23d817a69b89b..f7163f04736a5 100644 --- a/onnxruntime/test/providers/qnn/slice_htp_test.cc +++ b/onnxruntime/test/providers/qnn/slice_htp_test.cc @@ -45,7 +45,7 @@ static GetTestQDQModelFn BuildQDQSliceTestCase(const TestInputDef>& output_qparams) { NodeArg* data = MakeTestInput(builder, data_def); - QuantParams data_qparams = GetTestInputQuantParams(data_def); + QuantParams data_qparams = GetTestInputQuantParams(data_def); NodeArg* data_qdq = AddQDQNodePair(builder, data, data_qparams.scale, data_qparams.zero_point); NodeArg* starts = MakeTestInput(builder, starts_def); diff --git a/onnxruntime/test/providers/qnn/transpose_htp_test.cc b/onnxruntime/test/providers/qnn/transpose_htp_test.cc index adc0e7104b136..8d8c1ebb0fd15 100644 --- a/onnxruntime/test/providers/qnn/transpose_htp_test.cc +++ b/onnxruntime/test/providers/qnn/transpose_htp_test.cc @@ -38,7 +38,7 @@ static GetTestQDQModelFn BuildQDQTransposeTestCase(const TestInputDef const std::vector& attrs) { return [input_def, attrs](ModelTestBuilder& builder, std::vector>& output_qparams) { NodeArg* input = MakeTestInput(builder, input_def); - QuantParams input_qparams = GetTestInputQuantParams(input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); auto* output = builder.MakeIntermediate(); diff --git a/onnxruntime/test/providers/qnn/where_htp_test.cc b/onnxruntime/test/providers/qnn/where_htp_test.cc index 02238dad1c5dd..49f3ef0fd983a 100644 --- a/onnxruntime/test/providers/qnn/where_htp_test.cc +++ b/onnxruntime/test/providers/qnn/where_htp_test.cc @@ -42,12 +42,12 @@ static GetTestQDQModelFn BuildQDQWhereTestCase(const TestInputDef Q => DQ => NodeArg* x = MakeTestInput(builder, x_def); - QuantParams x_qparams = GetTestInputQuantParams(x_def); + QuantParams x_qparams = GetTestInputQuantParams(x_def); NodeArg* x_qdq = AddQDQNodePair(builder, x, x_qparams.scale, x_qparams.zero_point); // y => Q => DQ => NodeArg* y = MakeTestInput(builder, y_def); - QuantParams y_qparams = GetTestInputQuantParams(y_def); + QuantParams y_qparams = GetTestInputQuantParams(y_def); NodeArg* y_qdq = AddQDQNodePair(builder, y, y_qparams.scale, y_qparams.zero_point); // Where operator.