diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 1c77121ba9df1..c924eeecf7248 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -786,7 +786,7 @@ void QDQTransformerGemmTests(bool has_output_q, bool has_bias, bool beta_not_one auto check_binary_op_graph = [&](InferenceSessionWrapper& session) { auto op_to_count = CountOpsInGraph(session.GetGraph()); const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); - if ((!has_output_q || std::is_same_v)&&(!has_bias || (std::is_same_v && !beta_not_one)) && + if ((!has_output_q || std::is_same_v) && (!has_bias || (std::is_same_v && !beta_not_one)) && (std::is_same_v || std::is_same_v)) { EXPECT_EQ(op_to_count["com.microsoft.QGemm"], 1); EXPECT_EQ(op_to_count["Gemm"], 0); @@ -979,6 +979,52 @@ TEST(QDQTransformerTests, ReshapeDropQDQ) { RunReshapeDropQDQTestCase({1, 3, 2, 2}, {1, 12}, false, 21); // Use int16 ONNX QDQ ops } +// Runs a test case that checks if Q/DQ nodes are *not* dropped from DQ -> MaxPool -> Q if the quantization scale is +// negative. +template +static void RunMaxPoolNegativeScaleDropQDQTestCase() { + auto build_test_case = [](ModelTestBuilder& builder) { + constexpr QuantType qmin = std::numeric_limits::min(); + constexpr QuantType qmax = std::numeric_limits::max(); + + const std::vector input_shape = {1, 17, 17, 3}; + auto* input_arg = builder.MakeInput(input_shape, qmin, qmax); + auto* output_arg = builder.MakeOutput(); + + constexpr float scale = -0.003f; + QuantType zero_point = 1 + (qmax + qmin) / 2; + + auto* input_arg_dq = builder.MakeIntermediate(); + auto* maxpool_output = builder.MakeIntermediate(); + + builder.AddDequantizeLinearNode(input_arg, scale, zero_point, input_arg_dq); + + Node& maxpool_node = builder.AddNode("MaxPool", {input_arg_dq}, {maxpool_output}); + maxpool_node.AddAttribute("auto_pad", "VALID"); + maxpool_node.AddAttribute("kernel_shape", std::vector({2, 2})); + + builder.AddQuantizeLinearNode(maxpool_output, scale, zero_point, output_arg); + }; + + auto check_graph = [](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + EXPECT_EQ(op_to_count["MaxPool"], 1); + EXPECT_EQ(op_to_count["QuantizeLinear"], 1); + EXPECT_EQ(op_to_count["DequantizeLinear"], 1); + }; + + constexpr int opset = 21; + TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2, opset); +} + +// Checks that Q/DQ nodes are *not* dropped from DQ -> MaxPool -> Q for negative scale. Uses 8-bit and 16-bit Q/DQ ops. +TEST(QDQTransformerTests, MaxpoolDontDropQDQForNegativeScale) { + RunMaxPoolNegativeScaleDropQDQTestCase(); + RunMaxPoolNegativeScaleDropQDQTestCase(); + RunMaxPoolNegativeScaleDropQDQTestCase(); + RunMaxPoolNegativeScaleDropQDQTestCase(); +} + // Runs a test case that checks if Q/DQ nodes are dropped from DQ -> (Un)Squeeze -> Q. template static void RunSqueezeUnsqueezeDropQDQTestCase(const std::string& squeeze_type,