From 7d38ea47841ebdbd5f6006abd8f574ddebcc4a6a Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Tue, 25 Jun 2024 17:20:56 -0700 Subject: [PATCH 01/32] Remove QDQ nodes around Flatten MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With matching quantization parameters: DequantizeLinear ∘ Flatten ∘ QuantizeLinear is equivalent to just the Flatten, and it saves some floating- point computations. There's already support for a similar optimization for an equivalent Reshape: this change extends the existing optimization to also recognize Flatten. https://github.com/microsoft/onnxruntime/discussions/21167 --- .../qdq_selector_action_transformer.cc | 3 +- .../test/optimizer/qdq_transformer_test.cc | 47 +++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index 80ead8f8c68d6..e27984000d6ba 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -65,7 +65,8 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { {"Reshape", {}}, {"Transpose", {}}, {"Squeeze", {}}, - {"Unsqueeze", {}}}, + {"Unsqueeze", {}}, + {"Flatten", {}}}, std::move(selector), std::move(drop_action)); #else diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 1c77121ba9df1..f71e53f3b0cd8 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -1040,6 +1040,53 @@ TEST(QDQTransformerTests, UnsqueezeDropQDQ) { RunSqueezeUnsqueezeDropQDQTestCase("Unsqueeze", {1, 3, 2, 2}, {0}, false, 21); } +// Runs a test case that checks if Q/DQ nodes are dropped from DQ -> Flatten -> Q. +template +static void RunFlattenDropQDQTestCase(const std::vector& input_shape, + int64_t axis = 1, + bool use_contrib_qdq = false, + int opset = 21) { + auto build_test_case = [input_shape, axis, use_contrib_qdq](ModelTestBuilder& builder) { + constexpr QuantType qmin = std::numeric_limits::min(); + constexpr QuantType qmax = std::numeric_limits::max(); + + auto* input_arg = builder.MakeInput(input_shape, qmin, qmax); + auto* output_arg = builder.MakeOutput(); + QuantType zero_point = 1 + (qmax + qmin) / 2; + + auto* input_arg_dq = builder.MakeIntermediate(); + auto* flatten_output = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(input_arg, .003f, zero_point, input_arg_dq, use_contrib_qdq); + Node& flatten_node = builder.AddNode("Flatten", {input_arg_dq}, {flatten_output}); + flatten_node.AddAttribute("axis", axis); + + // add Q + builder.AddQuantizeLinearNode(flatten_output, .003f, zero_point, output_arg, use_contrib_qdq); + }; + + auto check_graph = [use_contrib_qdq](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count["Flatten"], 1); + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); + }; + + TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2, opset); +} + +// Checks that Q/DQ nodes are dropped from DQ -> Reshape -> Q. Uses 8-bit and 16-bit Q/DQ ops. +TEST(QDQTransformerTests, FlattenDropQDQ) { + for (int64_t axis: {0, 1, 3}) { + RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis); + RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, true, 13); // Use com.microsoft QDQ ops + RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, true, 13); // Use int16 com.microsoft QDQ ops + RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, true, 13); // Use int16 com.microsoft QDQ ops + RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, false); // Use int16 ONNX QDQ ops + RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, false); // Use int16 ONNX QDQ ops + } +} + TEST(QDQTransformerTests, DoubleQDQ) { constexpr uint8_t good_u8_1 = 80; constexpr uint8_t good_u8_2 = 40; From b3b506b7eff9a0f31eb5f0a8ce2622cf3c0b0cd9 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Wed, 26 Jun 2024 09:34:57 -0700 Subject: [PATCH 02/32] Add more operators for which QDQ can be removed --- .../selectors_actions/qdq_selector_action_transformer.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index e27984000d6ba..bdbce93fe8f38 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -66,7 +66,13 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { {"Transpose", {}}, {"Squeeze", {}}, {"Unsqueeze", {}}, - {"Flatten", {}}}, + {"Flatten", {}}, + {"Expand", {}}, + {"Tile", {}}, + {"Slice", {}}, + {"GatherElements", {}}, + {"DepthToSpace", {}}, + {"SpaceToDepth", {}}}, std::move(selector), std::move(drop_action)); #else From 2160e44a634d8835825c20c9b80306ebddb8e698 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Wed, 26 Jun 2024 11:05:02 -0700 Subject: [PATCH 03/32] Keep QDQ nodes w/ nonpositive scale around MaxPool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the DropQDQNodesRules optimization removes QuantizeLinear and DequantizeLinear nodes from DequantizeLinear∘MaxPool∘QuantizeLinear. However, if the x_scale/y_scale values are non-positive, this changes the ordering of the elements in the input value, so this optimization is changing the results. This change adds a check for whether the scale in the QuantizeLinear (or DequantizeLinear) is a positive scalar, and a new selector to disallow removing the QDQ around MaxPool if it is not. https://github.com/microsoft/onnxruntime/issues/21176 --- .../optimizer/qdq_transformer/qdq_util.cc | 35 +++++++++++++++++++ .../core/optimizer/qdq_transformer/qdq_util.h | 4 +++ .../qdq_selector_action_transformer.cc | 20 +++++++++-- .../selectors_actions/qdq_selectors.cc | 7 ++++ .../selectors_actions/qdq_selectors.h | 9 ++--- 5 files changed, 68 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc index 5a6c47a8d8454..a9bed8f9a8ced 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc @@ -166,6 +166,41 @@ bool QOrDQNodeHasConstantScalarScaleAndZeroPoint( return true; } +bool IsQOrDQScalePositiveConstantScalar( + const Node& q_or_dq_node, const GetConstantInitializerFn& get_const_initializer, + const Path& model_path) { + auto q_or_dq_input_defs = q_or_dq_node.InputDefs(); + + ORT_ENFORCE(q_or_dq_input_defs.size() >= 2); + + if (!optimizer_utils::IsScalar(*q_or_dq_input_defs[InputIndex::SCALE_ID])) { + return false; + } + + const ONNX_NAMESPACE::TensorProto* q_or_dq_scale_tensor_proto = + get_const_initializer(q_or_dq_input_defs[InputIndex::SCALE_ID]->Name()); + if (nullptr == q_or_dq_scale_tensor_proto) { + return false; + } + + Initializer q_or_dq_scale(*q_or_dq_scale_tensor_proto, model_path); + + switch (q_or_dq_scale.data_type()) { + case ONNX_NAMESPACE::TensorProto::FLOAT: + return q_or_dq_scale.data()[0] > 0; + + case ONNX_NAMESPACE::TensorProto::FLOAT16: + return q_or_dq_scale.data()[0] > 0; + + case ONNX_NAMESPACE::TensorProto::BFLOAT16: + return q_or_dq_scale.data()[0] > 0; + + default: + assert(false); + return false; + } +} + #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) bool MatchQNode(const Node& node) { diff --git a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h index c5f7cd601a2f0..519cfdff2580e 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h +++ b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h @@ -64,6 +64,10 @@ bool QOrDQNodeHasConstantScalarScaleAndZeroPoint( const GetConstantInitializerFn& get_const_initializer, bool& zero_point_exists); +// Checks that the y_scale/x_scale input to the QuantizeLinear/DequantizeLinear node is a positive scalar. +bool IsQOrDQScalePositiveConstantScalar(const Node& q_or_dq_node, const GetConstantInitializerFn& get_const_initializer, + const Path& model_path); + #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) // Check Q node op type, version, and domain. bool MatchQNode(const Node& node); diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index 80ead8f8c68d6..74cd223f07375 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -35,6 +35,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { // 3 nodes. DQ, target, Q. Merge into target and remove DQ and Q. const std::string drop_action_name{"drop"}; const std::string drop_action_no_int16_name{"drop_no_int16_support"}; + const std::string drop_action_no_int16_nor_nonpositive_scale_name{"drop_no_int16_support_no_nonpositive_scale"}; NTO::NodeLocation dq{NTO::NodeType::kInput, 0}; NTO::NodeLocation q{NTO::NodeType::kOutput, 0}; @@ -46,19 +47,32 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr drop_action_no_int16 = std::make_unique( std::vector(moves)); // Copy before std::move(moves) + std::unique_ptr drop_action_no_int16_nor_nonpositive_scale = std::make_unique( + std::vector(moves)); // Copy before std::move(moves) std::unique_ptr drop_action = std::make_unique(std::move(moves)); #if !defined(ORT_MINIMAL_BUILD) - // Use a separate selector + action that disallows 16-bit types for MaxPool and Resize. + // Use a separate selectors & actions for MaxPool and Resize. + // + // They disallow 16-bit types for MaxPool and Resize: // int16 MaxPool is not supported by the ONNX specification. // int16 Resize is not supported by the ORT implementation (although allowed by ONNX). + // + // And cannot eliminate the QDQ for MaxPool if the scale is not positive, as a negative scale will change the ordering + // of the elements between quantized & de-quantized values. std::unique_ptr selector_disallow_16bit = std::make_unique(false); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_name, - {{"MaxPool", {12}}, - {"Resize", {}}}, + {{"Resize", {}}}, std::move(selector_disallow_16bit), std::move(drop_action_no_int16)); + std::unique_ptr selector_disallow_16bit_and_nonpositive_scale = ( + std::make_unique(false, true, false)); + qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_nor_nonpositive_scale_name, + {{"MaxPool", {12}}}, + std::move(selector_disallow_16bit_and_nonpositive_scale), + std::move(drop_action_no_int16_nor_nonpositive_scale)); + std::unique_ptr selector = std::make_unique(true); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_name, {{"Gather", {}}, diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc index 09705f61c82ce..cdf5eb2b3f1c2 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc @@ -150,6 +150,13 @@ bool DropQDQNodeGroupSelector::Check(const GraphViewer& graph_viewer, return graph_viewer.GetConstantInitializer(initializer_name, true); }; + if (!allow_nonpositive_scale_) { + // IsQDQPairSupported will check that the scale is the same between q_node and dq_node. + if (!IsQOrDQScalePositiveConstantScalar(q_node, get_const_initializer, graph_viewer.ModelPath())) { + return false; + } + } + return IsQDQPairSupported(q_node, dq_node, get_const_initializer, graph_viewer.ModelPath()); } diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h index 1a2a620acb480..7ef18d292e4ae 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h @@ -48,8 +48,8 @@ class NodeGroupSelector { // Zero point and scale are constant scalars and must match class DropQDQNodeGroupSelector : public NodeGroupSelector { public: - explicit DropQDQNodeGroupSelector(bool allow_16bit = true, bool allow_4bit = true) - : allow_16bit_(allow_16bit), allow_4bit_(allow_4bit) {} + explicit DropQDQNodeGroupSelector(bool allow_16bit = true, bool allow_4bit = true, bool allow_nonpositive_scale = true) + : allow_16bit_(allow_16bit), allow_4bit_(allow_4bit), allow_nonpositive_scale_(allow_nonpositive_scale) {} private: bool Check(const GraphViewer& graph_viewer, const Node& node, @@ -58,6 +58,7 @@ class DropQDQNodeGroupSelector : public NodeGroupSelector { bool allow_16bit_; bool allow_4bit_; + bool allow_nonpositive_scale_; }; // Single DQ -> node. @@ -292,8 +293,8 @@ class BaseSelector : public NodeSelector { class DropQDQNodesSelector : public BaseSelector { public: - explicit DropQDQNodesSelector(bool allow_16bit = false, bool allow_4bit = false) - : BaseSelector(std::make_unique(allow_16bit, allow_4bit)) {} + explicit DropQDQNodesSelector(bool allow_16bit = false, bool allow_4bit = false, bool allow_nonpositive_scale = true) + : BaseSelector(std::make_unique(allow_16bit, allow_4bit, allow_nonpositive_scale)) {} }; class DropDQNodesSelector : public BaseSelector { From 61c5d84428b6a5b2f3fcd3af2005cbe2496a475d Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Wed, 26 Jun 2024 11:24:10 -0700 Subject: [PATCH 04/32] Unit test on QDQ w/ nonpositive scale around MaxPool --- .../test/optimizer/qdq_transformer_test.cc | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 1c77121ba9df1..bc07e72b2a479 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -979,6 +979,52 @@ TEST(QDQTransformerTests, ReshapeDropQDQ) { RunReshapeDropQDQTestCase({1, 3, 2, 2}, {1, 12}, false, 21); // Use int16 ONNX QDQ ops } +// Runs a test case that checks if Q/DQ nodes are *not* dropped from DQ -> MaxPool -> Q if the quantization scale is +// negative. +template +static void RunMaxPoolNegativeScaleDropQDQTestCase() { + auto build_test_case = [](ModelTestBuilder& builder) { + constexpr QuantType qmin = std::numeric_limits::min(); + constexpr QuantType qmax = std::numeric_limits::max(); + + const std::vector input_shape = {1, 17, 17, 3}; + auto* input_arg = builder.MakeInput(input_shape, qmin, qmax); + auto* output_arg = builder.MakeOutput(); + + constexpr float scale = -0.003f; + QuantType zero_point = 1 + (qmax + qmin) / 2; + + auto* input_arg_dq = builder.MakeIntermediate(); + auto* maxpool_output = builder.MakeIntermediate(); + + builder.AddDequantizeLinearNode(input_arg, scale, zero_point, input_arg_dq); + + Node& maxpool_node = builder.AddNode("MaxPool", {input_arg_dq}, {maxpool_output}); + maxpool_node.AddAttribute("auto_pad", "VALID"); + maxpool_node.AddAttribute("kernel_shape", std::vector({2, 2})); + + builder.AddQuantizeLinearNode(maxpool_output, scale, zero_point, output_arg); + }; + + auto check_graph = [](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + EXPECT_EQ(op_to_count["MaxPool"], 1); + EXPECT_EQ(op_to_count["QuantizeLinear"], 1); + EXPECT_EQ(op_to_count["DequantizeLinear"], 1); + }; + + constexpr int opset = 21; + TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2, opset); +} + +// Checks that Q/DQ nodes are *not* dropped from DQ -> MaxPool -> Q for negative scale. Uses 8-bit and 16-bit Q/DQ ops. +TEST(QDQTransformerTests, MaxpoolDontDropQDQForNegativeScale) { + RunMaxPoolNegativeScaleDropQDQTestCase(); + RunMaxPoolNegativeScaleDropQDQTestCase(); + RunMaxPoolNegativeScaleDropQDQTestCase(); + RunMaxPoolNegativeScaleDropQDQTestCase(); +} + // Runs a test case that checks if Q/DQ nodes are dropped from DQ -> (Un)Squeeze -> Q. template static void RunSqueezeUnsqueezeDropQDQTestCase(const std::string& squeeze_type, From ce1ee8aae97d91ddffe05a778b06fd31acdfec6c Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Fri, 28 Jun 2024 11:57:29 -0700 Subject: [PATCH 05/32] Unit test on removing QDQ around Expand --- .../test/optimizer/qdq_transformer_test.cc | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 16abba3b80276..5acf45330b53c 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -1133,6 +1133,51 @@ TEST(QDQTransformerTests, FlattenDropQDQ) { } } +// Runs a test case that checks if Q/DQ nodes are dropped from DQ -> Expand -> Q. +template +static void RunExpandDropQDQTestCase(const std::vector& input_shape, + const std::vector& expanded_shape, + bool use_contrib_qdq = false, + int opset = 21) { + auto build_test_case = [input_shape, expanded_shape, use_contrib_qdq](ModelTestBuilder& builder) { + constexpr QuantType qmin = std::numeric_limits::min(); + constexpr QuantType qmax = std::numeric_limits::max(); + + auto* input_arg = builder.MakeInput(input_shape, qmin, qmax); + auto* output_arg = builder.MakeOutput(); + QuantType zero_point = 1 + (qmax + qmin) / 2; + + auto* input_arg_dq = builder.MakeIntermediate(); + auto* expanded_shape_arg = builder.Make1DInitializer(expanded_shape); + auto* expand_output = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(input_arg, .003f, zero_point, input_arg_dq, use_contrib_qdq); + builder.AddNode("Expand", {input_arg_dq, expanded_shape_arg}, {expand_output}); + + // add Q + builder.AddQuantizeLinearNode(expand_output, .003f, zero_point, output_arg, use_contrib_qdq); + }; + + auto check_graph = [use_contrib_qdq](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count["Expand"], 1); + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); + }; + + TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2, opset); +} + +// Checks that Q/DQ nodes are dropped from DQ -> Expand -> Q. Uses 8-bit and 16-bit Q/DQ ops. +TEST(QDQTransformerTests, ExpandDropQDQ) { + RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}); + RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, true, 13); // Use com.microsoft QDQ ops + RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, true, 13); // Use int16 com.microsoft QDQ ops + RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, true, 13); // Use int16 com.microsoft QDQ ops + RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, false); // Use int16 ONNX QDQ ops + RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, false); // Use int16 ONNX QDQ ops +} + TEST(QDQTransformerTests, DoubleQDQ) { constexpr uint8_t good_u8_1 = 80; constexpr uint8_t good_u8_2 = 40; From 73cc2122fa587c9e20ee990ca055c1ebfd5a5abc Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Fri, 28 Jun 2024 12:08:37 -0700 Subject: [PATCH 06/32] Add selector to remove QDQ nodes around Min, Max, and Abs Only does so if the scale is positive --- .../qdq_selector_action_transformer.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index bfcfd4c3c1881..eb57d32d93144 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -34,6 +34,7 @@ void SplitQDQRules(SelectorActionRegistry& qdq_selector_action_registry) { void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { // 3 nodes. DQ, target, Q. Merge into target and remove DQ and Q. const std::string drop_action_name{"drop"}; + const std::string drop_action_no_nonpositive_scale_name{"drop_no_nonpositive_scale"}; const std::string drop_action_no_int16_name{"drop_no_int16_support"}; const std::string drop_action_no_int16_nor_nonpositive_scale_name{"drop_no_int16_support_no_nonpositive_scale"}; NTO::NodeLocation dq{NTO::NodeType::kInput, 0}; @@ -47,6 +48,8 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr drop_action_no_int16 = std::make_unique( std::vector(moves)); // Copy before std::move(moves) + std::unique_ptr drop_action_no_nonpositive_scale = std::make_unique( + std::vector(moves)); // Copy before std::move(moves) std::unique_ptr drop_action_no_int16_nor_nonpositive_scale = std::make_unique( std::vector(moves)); // Copy before std::move(moves) std::unique_ptr drop_action = std::make_unique(std::move(moves)); @@ -73,6 +76,15 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::move(selector_disallow_16bit_and_nonpositive_scale), std::move(drop_action_no_int16_nor_nonpositive_scale)); + std::unique_ptr selector_disallow_nonpositive_scale = ( + std::make_unique(true, true, false)); + qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_nonpositive_scale_name, + {{"Min", {}}, + {"Max", {}}, + {"Abs", {}}}, + std::move(selector_disallow_nonpositive_scale), + std::move(drop_action_no_nonpositive_scale)); + std::unique_ptr selector = std::make_unique(true); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_name, {{"Gather", {}}, From 3b94f983886aa331f6c8193250d3f741982b2bde Mon Sep 17 00:00:00 2001 From: Maxwell D Collins Date: Sun, 7 Jul 2024 22:34:36 -0700 Subject: [PATCH 07/32] Change formatting according to clangformat https://github.com/microsoft/onnxruntime/actions/runs/9684819243/job/26883605849 --- .../selectors_actions/qdq_selector_action_transformer.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index 74cd223f07375..ad164d1abb77d 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -66,8 +66,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::move(selector_disallow_16bit), std::move(drop_action_no_int16)); - std::unique_ptr selector_disallow_16bit_and_nonpositive_scale = ( - std::make_unique(false, true, false)); + std::unique_ptr selector_disallow_16bit_and_nonpositive_scale = std::make_unique(false, true, false); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_nor_nonpositive_scale_name, {{"MaxPool", {12}}}, std::move(selector_disallow_16bit_and_nonpositive_scale), From daf808eb19bd03033786a846f5a39eaec622c906 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Mon, 8 Jul 2024 12:49:58 -0700 Subject: [PATCH 08/32] Unit test that QDQ nodes are remove around Tile --- .../test/optimizer/qdq_transformer_test.cc | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 5acf45330b53c..1ad062543fd7c 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -1178,6 +1178,51 @@ TEST(QDQTransformerTests, ExpandDropQDQ) { RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, false); // Use int16 ONNX QDQ ops } +// Runs a test case that checks if Q/DQ nodes are dropped from DQ -> Tile -> Q. +template +static void RunTileDropQDQTestCase(const std::vector& input_shape, + const std::vector& repeats, + bool use_contrib_qdq = false, + int opset = 21) { + auto build_test_case = [input_shape, repeats, use_contrib_qdq](ModelTestBuilder& builder) { + constexpr QuantType qmin = std::numeric_limits::min(); + constexpr QuantType qmax = std::numeric_limits::max(); + + auto* input_arg = builder.MakeInput(input_shape, qmin, qmax); + auto* output_arg = builder.MakeOutput(); + QuantType zero_point = 1 + (qmax + qmin) / 2; + + auto* input_arg_dq = builder.MakeIntermediate(); + auto* repeats_arg = builder.Make1DInitializer(repeats); + auto* tile_output = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(input_arg, .003f, zero_point, input_arg_dq, use_contrib_qdq); + builder.AddNode("Tile", {input_arg_dq, repeats_arg}, {tile_output}); + + // add Q + builder.AddQuantizeLinearNode(tile_output, .003f, zero_point, output_arg, use_contrib_qdq); + }; + + auto check_graph = [use_contrib_qdq](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count["Tile"], 1); + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); + }; + + TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2, opset); +} + +// Checks that Q/DQ nodes are dropped from DQ -> Tile -> Q. Uses 8-bit and 16-bit Q/DQ ops. +TEST(QDQTransformerTests, TileDropQDQ) { + RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}); + RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, true, 13); // Use com.microsoft QDQ ops + RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, true, 13); // Use int16 com.microsoft QDQ ops + RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, true, 13); // Use int16 com.microsoft QDQ ops + RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, false); // Use int16 ONNX QDQ ops + RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, false); // Use int16 ONNX QDQ ops +} + TEST(QDQTransformerTests, DoubleQDQ) { constexpr uint8_t good_u8_1 = 80; constexpr uint8_t good_u8_2 = 40; From 68def1e7cf8f9f95d1f094f84e96c1fee7c39e95 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Mon, 8 Jul 2024 14:46:14 -0700 Subject: [PATCH 09/32] Switch to std::fileystem::path in IsQOrDQScalePositiveConstantScalar Updating to match https://github.com/microsoft/onnxruntime/commit/07c429191e19678b97ec8fe818ecb9a64ac6b394 --- onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc | 2 +- onnxruntime/core/optimizer/qdq_transformer/qdq_util.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc index 60879af8602cf..7ef4ced1835f0 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc @@ -168,7 +168,7 @@ bool QOrDQNodeHasConstantScalarScaleAndZeroPoint( bool IsQOrDQScalePositiveConstantScalar( const Node& q_or_dq_node, const GetConstantInitializerFn& get_const_initializer, - const Path& model_path) { + const std::filesystem::path& model_path) { auto q_or_dq_input_defs = q_or_dq_node.InputDefs(); ORT_ENFORCE(q_or_dq_input_defs.size() >= 2); diff --git a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h index 779645ec91167..008f9972a143b 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h +++ b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h @@ -67,7 +67,7 @@ bool QOrDQNodeHasConstantScalarScaleAndZeroPoint( // Checks that the y_scale/x_scale input to the QuantizeLinear/DequantizeLinear node is a positive scalar. bool IsQOrDQScalePositiveConstantScalar(const Node& q_or_dq_node, const GetConstantInitializerFn& get_const_initializer, - const Path& model_path); + const std::filesystem::path& model_path); #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) // Check Q node op type, version, and domain. From cf10b50c2b6b520b176937847273219dc83c0dc5 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Mon, 8 Jul 2024 15:57:44 -0700 Subject: [PATCH 10/32] Remove SpaceToDepth and SpaceToDepth from Drop QDQ optimization No integer implementations are present, so they need to stay in floating-point. https://github.com/microsoft/onnxruntime/issues/21287 --- .../selectors_actions/qdq_selector_action_transformer.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index aae90f619069f..10d4a77d167e0 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -85,6 +85,8 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::move(drop_action_no_nonpositive_scale)); std::unique_ptr selector = std::make_unique(true); + // DepthToSpace and SpaceToDepth not included because there is no integer implementations. + // https://github.com/microsoft/onnxruntime/issues/21287 qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_name, {{"Gather", {}}, {"Reshape", {}}, @@ -95,9 +97,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { {"Expand", {}}, {"Tile", {}}, {"Slice", {}}, - {"GatherElements", {}}, - {"DepthToSpace", {}}, - {"SpaceToDepth", {}}}, + {"GatherElements", {}}}, std::move(selector), std::move(drop_action)); #else From 939f2406fec16a512326867b15d8367857296c09 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Wed, 10 Jul 2024 13:01:34 -0700 Subject: [PATCH 11/32] Fix grammar in comment --- .../selectors_actions/qdq_selector_action_transformer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index 10d4a77d167e0..aa2073e944fcc 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -85,7 +85,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::move(drop_action_no_nonpositive_scale)); std::unique_ptr selector = std::make_unique(true); - // DepthToSpace and SpaceToDepth not included because there is no integer implementations. + // DepthToSpace and SpaceToDepth not included because there are no integer implementations. // https://github.com/microsoft/onnxruntime/issues/21287 qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_name, {{"Gather", {}}, From 452bdd122307e5038f72c78e1974a9e024ab10bc Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Wed, 10 Jul 2024 13:01:44 -0700 Subject: [PATCH 12/32] Unit test that QDQ is dropped around Slice --- .../test/optimizer/qdq_transformer_test.cc | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 1ad062543fd7c..047bfcd51801d 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -1223,6 +1223,53 @@ TEST(QDQTransformerTests, TileDropQDQ) { RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, false); // Use int16 ONNX QDQ ops } +// Runs a test case that checks if Q/DQ nodes are dropped from DQ -> Slice -> Q. +template +static void RunSliceDropQDQTestCase(const std::vector& input_shape, + const std::vector& starts, + const std::vector& ends, + bool use_contrib_qdq = false, + int opset = 21) { + auto build_test_case = [input_shape, starts, ends, use_contrib_qdq](ModelTestBuilder& builder) { + constexpr QuantType qmin = std::numeric_limits::min(); + constexpr QuantType qmax = std::numeric_limits::max(); + + auto* input_arg = builder.MakeInput(input_shape, qmin, qmax); + auto* output_arg = builder.MakeOutput(); + QuantType zero_point = 1 + (qmax + qmin) / 2; + + auto* input_arg_dq = builder.MakeIntermediate(); + auto* starts_arg = builder.Make1DInitializer(starts); + auto* ends_arg = builder.Make1DInitializer(ends); + auto* slice_output = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(input_arg, .003f, zero_point, input_arg_dq, use_contrib_qdq); + builder.AddNode("Slice", {input_arg_dq, starts_arg, ends_arg}, {slice_output}); + + // add Q + builder.AddQuantizeLinearNode(slice_output, .003f, zero_point, output_arg, use_contrib_qdq); + }; + + auto check_graph = [use_contrib_qdq](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count["Slice"], 1); + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); + }; + + TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2, opset); +} + +// Checks that Q/DQ nodes are dropped from DQ -> Slice -> Q. Uses 8-bit and 16-bit Q/DQ ops. +TEST(QDQTransformerTests, SliceDropQDQ) { + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}); + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use com.microsoft QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use int16 com.microsoft QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use int16 com.microsoft QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, false); // Use int16 ONNX QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, false); // Use int16 ONNX QDQ ops +} + TEST(QDQTransformerTests, DoubleQDQ) { constexpr uint8_t good_u8_1 = 80; constexpr uint8_t good_u8_2 = 40; From a67d3fb961e9fd53ddc31a925ed7e5b22881be0c Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Thu, 11 Jul 2024 13:33:57 -0700 Subject: [PATCH 13/32] Unit test on removing QDQ around GatherElements --- .../qdq_selector_action_transformer.cc | 4 +- .../test/optimizer/qdq_transformer_test.cc | 46 +++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index aa2073e944fcc..396202bc1ec0f 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -89,6 +89,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { // https://github.com/microsoft/onnxruntime/issues/21287 qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_name, {{"Gather", {}}, + {"GatherElements", {}}, {"Reshape", {}}, {"Transpose", {}}, {"Squeeze", {}}, @@ -96,8 +97,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { {"Flatten", {}}, {"Expand", {}}, {"Tile", {}}, - {"Slice", {}}, - {"GatherElements", {}}}, + {"Slice", {}}}, std::move(selector), std::move(drop_action)); #else diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 047bfcd51801d..c9c43514c9f87 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -1270,6 +1270,52 @@ TEST(QDQTransformerTests, SliceDropQDQ) { RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, false); // Use int16 ONNX QDQ ops } +// Runs a test case that checks if Q/DQ nodes are dropped from DQ -> GatherElements -> Q. +template +static void RunGatherElementsDropQDQTestCase(const std::vector& input_shape, + const std::vector& indices_shape, + const std::vector& indices_data, + bool use_contrib_qdq = false, + int opset = 21) { + auto build_test_case = [input_shape, indices_shape, indices_data, use_contrib_qdq](ModelTestBuilder& builder) { + constexpr QuantType qmin = std::numeric_limits::min(); + constexpr QuantType qmax = std::numeric_limits::max(); + + auto* input_arg = builder.MakeInput(input_shape, qmin, qmax); + auto* indices_arg = builder.MakeInitializer(indices_shape, indices_data); + auto* output_arg = builder.MakeOutput(); + QuantType zero_point = 1 + (qmax + qmin) / 2; + + auto* input_arg_dq = builder.MakeIntermediate(); + auto* gather_elements_output = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(input_arg, .003f, zero_point, input_arg_dq, use_contrib_qdq); + builder.AddNode("GatherElements", {input_arg_dq, indices_arg}, {gather_elements_output}); + + // add Q + builder.AddQuantizeLinearNode(gather_elements_output, .003f, zero_point, output_arg, use_contrib_qdq); + }; + + auto check_graph = [use_contrib_qdq](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count["GatherElements"], 1); + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); + }; + + TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2, opset); +} + +// Checks that Q/DQ nodes are dropped from DQ -> GatherElements -> Q. Uses 8-bit and 16-bit Q/DQ ops. +TEST(QDQTransformerTests, GatherElementsDropQDQ) { + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}); + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); // Use com.microsoft QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); // Use int16 com.microsoft QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); // Use int16 com.microsoft QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, false); // Use int16 ONNX QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, false); // Use int16 ONNX QDQ ops +} + TEST(QDQTransformerTests, DoubleQDQ) { constexpr uint8_t good_u8_1 = 80; constexpr uint8_t good_u8_2 = 40; From c7a50da3854788fa585d88b72d150d0f4c54024b Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Thu, 11 Jul 2024 13:57:11 -0700 Subject: [PATCH 14/32] Apply lintrunner --- .../qdq_selector_action_transformer.cc | 3 +- .../test/optimizer/qdq_transformer_test.cc | 52 +++++++++---------- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index 396202bc1ec0f..bfba8e5cd1112 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -75,8 +75,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::move(selector_disallow_16bit_and_nonpositive_scale), std::move(drop_action_no_int16_nor_nonpositive_scale)); - std::unique_ptr selector_disallow_nonpositive_scale = ( - std::make_unique(true, true, false)); + std::unique_ptr selector_disallow_nonpositive_scale = (std::make_unique(true, true, false)); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_nonpositive_scale_name, {{"Min", {}}, {"Max", {}}, diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index c9c43514c9f87..4577eb2e74706 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -1123,13 +1123,13 @@ static void RunFlattenDropQDQTestCase(const std::vector& input_shape, // Checks that Q/DQ nodes are dropped from DQ -> Reshape -> Q. Uses 8-bit and 16-bit Q/DQ ops. TEST(QDQTransformerTests, FlattenDropQDQ) { - for (int64_t axis: {0, 1, 3}) { + for (int64_t axis : {0, 1, 3}) { RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis); - RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, true, 13); // Use com.microsoft QDQ ops - RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, true, 13); // Use int16 com.microsoft QDQ ops - RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, true, 13); // Use int16 com.microsoft QDQ ops - RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, false); // Use int16 ONNX QDQ ops - RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, false); // Use int16 ONNX QDQ ops + RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, true, 13); // Use com.microsoft QDQ ops + RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, true, 13); // Use int16 com.microsoft QDQ ops + RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, true, 13); // Use int16 com.microsoft QDQ ops + RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, false); // Use int16 ONNX QDQ ops + RunFlattenDropQDQTestCase({1, 3, 2, 2}, axis, false); // Use int16 ONNX QDQ ops } } @@ -1171,11 +1171,11 @@ static void RunExpandDropQDQTestCase(const std::vector& input_shape, // Checks that Q/DQ nodes are dropped from DQ -> Expand -> Q. Uses 8-bit and 16-bit Q/DQ ops. TEST(QDQTransformerTests, ExpandDropQDQ) { RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}); - RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, true, 13); // Use com.microsoft QDQ ops - RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, true, 13); // Use int16 com.microsoft QDQ ops - RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, true, 13); // Use int16 com.microsoft QDQ ops - RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, false); // Use int16 ONNX QDQ ops - RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, false); // Use int16 ONNX QDQ ops + RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, true, 13); // Use com.microsoft QDQ ops + RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, true, 13); // Use int16 com.microsoft QDQ ops + RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, true, 13); // Use int16 com.microsoft QDQ ops + RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, false); // Use int16 ONNX QDQ ops + RunExpandDropQDQTestCase({1, 3, 1, 1}, {1, 3, 7, 13}, false); // Use int16 ONNX QDQ ops } // Runs a test case that checks if Q/DQ nodes are dropped from DQ -> Tile -> Q. @@ -1216,11 +1216,11 @@ static void RunTileDropQDQTestCase(const std::vector& input_shape, // Checks that Q/DQ nodes are dropped from DQ -> Tile -> Q. Uses 8-bit and 16-bit Q/DQ ops. TEST(QDQTransformerTests, TileDropQDQ) { RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}); - RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, true, 13); // Use com.microsoft QDQ ops - RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, true, 13); // Use int16 com.microsoft QDQ ops - RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, true, 13); // Use int16 com.microsoft QDQ ops - RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, false); // Use int16 ONNX QDQ ops - RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, false); // Use int16 ONNX QDQ ops + RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, true, 13); // Use com.microsoft QDQ ops + RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, true, 13); // Use int16 com.microsoft QDQ ops + RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, true, 13); // Use int16 com.microsoft QDQ ops + RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, false); // Use int16 ONNX QDQ ops + RunTileDropQDQTestCase({1, 3, 2, 2}, {1, 1, 3, 3}, false); // Use int16 ONNX QDQ ops } // Runs a test case that checks if Q/DQ nodes are dropped from DQ -> Slice -> Q. @@ -1263,11 +1263,11 @@ static void RunSliceDropQDQTestCase(const std::vector& input_shape, // Checks that Q/DQ nodes are dropped from DQ -> Slice -> Q. Uses 8-bit and 16-bit Q/DQ ops. TEST(QDQTransformerTests, SliceDropQDQ) { RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}); - RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use com.microsoft QDQ ops - RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use int16 com.microsoft QDQ ops - RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use int16 com.microsoft QDQ ops - RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, false); // Use int16 ONNX QDQ ops - RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, false); // Use int16 ONNX QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use com.microsoft QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use int16 com.microsoft QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use int16 com.microsoft QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, false); // Use int16 ONNX QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, false); // Use int16 ONNX QDQ ops } // Runs a test case that checks if Q/DQ nodes are dropped from DQ -> GatherElements -> Q. @@ -1309,11 +1309,11 @@ static void RunGatherElementsDropQDQTestCase(const std::vector& input_s // Checks that Q/DQ nodes are dropped from DQ -> GatherElements -> Q. Uses 8-bit and 16-bit Q/DQ ops. TEST(QDQTransformerTests, GatherElementsDropQDQ) { RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}); - RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); // Use com.microsoft QDQ ops - RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); // Use int16 com.microsoft QDQ ops - RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); // Use int16 com.microsoft QDQ ops - RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, false); // Use int16 ONNX QDQ ops - RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, false); // Use int16 ONNX QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); // Use com.microsoft QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); // Use int16 com.microsoft QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); // Use int16 com.microsoft QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, false); // Use int16 ONNX QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, false); // Use int16 ONNX QDQ ops } TEST(QDQTransformerTests, DoubleQDQ) { From 82fdcc47ffad8078d377dacd182738a767bfe279 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Fri, 12 Jul 2024 09:54:08 -0700 Subject: [PATCH 15/32] Drop QDQ around ReduceMin & ReduceMax, not Min & Max Don't expect the drop qdq optimization to work for multiple inputs for now. --- .../selectors_actions/qdq_selector_action_transformer.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index bfba8e5cd1112..7d08d64eb2aba 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -77,8 +77,8 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr selector_disallow_nonpositive_scale = (std::make_unique(true, true, false)); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_nonpositive_scale_name, - {{"Min", {}}, - {"Max", {}}, + {{"ReduceMin", {}}, + {"ReduceMax", {}}, {"Abs", {}}}, std::move(selector_disallow_nonpositive_scale), std::move(drop_action_no_nonpositive_scale)); From 9b3bf09041ed4fe8bb78f93c630bc6320b77c54b Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Fri, 12 Jul 2024 11:26:08 -0700 Subject: [PATCH 16/32] Disallow 16bit for ReduceMin & ReduceMax Apparently the type constraints for these ops don't include 16-bit integers. --- .../selectors_actions/qdq_selector_action_transformer.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index 7d08d64eb2aba..a925ab271989e 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -71,15 +71,15 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr selector_disallow_16bit_and_nonpositive_scale = std::make_unique(false, true, false); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_nor_nonpositive_scale_name, - {{"MaxPool", {12}}}, + {{"MaxPool", {12}}, + {"ReduceMin", {}}, + {"ReduceMax", {}}}, std::move(selector_disallow_16bit_and_nonpositive_scale), std::move(drop_action_no_int16_nor_nonpositive_scale)); std::unique_ptr selector_disallow_nonpositive_scale = (std::make_unique(true, true, false)); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_nonpositive_scale_name, - {{"ReduceMin", {}}, - {"ReduceMax", {}}, - {"Abs", {}}}, + {{"Abs", {}}}, std::move(selector_disallow_nonpositive_scale), std::move(drop_action_no_nonpositive_scale)); From 872f983b4a1614c94f6929be95e0ffe8106e8bc0 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Fri, 12 Jul 2024 11:26:38 -0700 Subject: [PATCH 17/32] Unit test on dropping QDQ from around ReduceMin/Max --- .../test/optimizer/qdq_transformer_test.cc | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 4577eb2e74706..8bfcff4787257 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -1316,6 +1316,62 @@ TEST(QDQTransformerTests, GatherElementsDropQDQ) { RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, false); // Use int16 ONNX QDQ ops } +// Runs a test case whether Q/DQ nodes are dropped from DQ -> Reduce(Min|Max) -> Q. +template +static void RunReduceExtremumDropQDQTestCase(const std::string& op_type, + const std::vector& input_shape, + float qscale, + bool expect_drop_qdq, + bool use_contrib_qdq = false, + int opset = 21) { + auto build_test_case = [op_type, input_shape, qscale, use_contrib_qdq](ModelTestBuilder& builder) { + constexpr QuantType qmin = std::numeric_limits::min(); + constexpr QuantType qmax = std::numeric_limits::max(); + + auto* input_arg = builder.MakeInput(input_shape, qmin, qmax); + auto* output_arg = builder.MakeOutput(); + QuantType zero_point = 1 + (qmax + qmin) / 2; + + auto* input_arg_dq = builder.MakeIntermediate(); + auto* reduce_output = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(input_arg, qscale, zero_point, input_arg_dq, use_contrib_qdq); + builder.AddNode(op_type, {input_arg_dq}, {reduce_output}); + + // add Q + builder.AddQuantizeLinearNode(reduce_output, qscale, zero_point, output_arg, use_contrib_qdq); + }; + + auto check_graph = [op_type, expect_drop_qdq, use_contrib_qdq](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count[op_type], 1); + if (expect_drop_qdq) { + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); + } else { + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 1); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 1); + } + }; + + TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2, opset); +} + +// Checks whether Q/DQ nodes are dropped from DQ -> Reduce(Min|Max) -> Q. Uses 8-bit and 16-bit Q/DQ ops. +TEST(QDQTransformerTests, ReduceExtremumDropQDQ) { + // Check that Q/DQ nodes are dropped for positive scale + RunReduceExtremumDropQDQTestCase("ReduceMin", {3, 3}, 0.003f, true); + RunReduceExtremumDropQDQTestCase("ReduceMin", {3, 3}, 0.003f, true, true, 13); // Use com.microsoft QDQ ops + RunReduceExtremumDropQDQTestCase("ReduceMax", {3, 3}, 0.003f, true); + RunReduceExtremumDropQDQTestCase("ReduceMax", {3, 3}, 0.003f, true, true, 13); // Use com.microsoft QDQ ops + + // Check that Q/DQ nodes are dropped for negative scale + RunReduceExtremumDropQDQTestCase("ReduceMin", {3, 3}, -0.003f, false); + RunReduceExtremumDropQDQTestCase("ReduceMin", {3, 3}, -0.003f, false, true, 13); // Use com.microsoft QDQ ops + RunReduceExtremumDropQDQTestCase("ReduceMax", {3, 3}, -0.003f, false); + RunReduceExtremumDropQDQTestCase("ReduceMax", {3, 3}, -0.003f, false, true, 13); // Use com.microsoft QDQ ops +} + TEST(QDQTransformerTests, DoubleQDQ) { constexpr uint8_t good_u8_1 = 80; constexpr uint8_t good_u8_2 = 40; From 5bf7d846fa3039272119f6dc74bccf89ff75b0e9 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Tue, 16 Jul 2024 12:18:32 -0700 Subject: [PATCH 18/32] Fix comment in ReduceExtremumDropQDQ test case --- onnxruntime/test/optimizer/qdq_transformer_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 8bfcff4787257..6d8041f5b0680 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -1365,7 +1365,7 @@ TEST(QDQTransformerTests, ReduceExtremumDropQDQ) { RunReduceExtremumDropQDQTestCase("ReduceMax", {3, 3}, 0.003f, true); RunReduceExtremumDropQDQTestCase("ReduceMax", {3, 3}, 0.003f, true, true, 13); // Use com.microsoft QDQ ops - // Check that Q/DQ nodes are dropped for negative scale + // Check that Q/DQ nodes are *not* dropped for negative scale RunReduceExtremumDropQDQTestCase("ReduceMin", {3, 3}, -0.003f, false); RunReduceExtremumDropQDQTestCase("ReduceMin", {3, 3}, -0.003f, false, true, 13); // Use com.microsoft QDQ ops RunReduceExtremumDropQDQTestCase("ReduceMax", {3, 3}, -0.003f, false); From 28824d68867483a33536984aa27b858c6cbf6788 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Tue, 16 Jul 2024 12:19:01 -0700 Subject: [PATCH 19/32] Remove selector to drop QDQ around Abs Results don't appear to match --- .../selectors_actions/qdq_selector_action_transformer.cc | 6 ------ 1 file changed, 6 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index a925ab271989e..0b5e7d32b65c2 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -77,12 +77,6 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::move(selector_disallow_16bit_and_nonpositive_scale), std::move(drop_action_no_int16_nor_nonpositive_scale)); - std::unique_ptr selector_disallow_nonpositive_scale = (std::make_unique(true, true, false)); - qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_nonpositive_scale_name, - {{"Abs", {}}}, - std::move(selector_disallow_nonpositive_scale), - std::move(drop_action_no_nonpositive_scale)); - std::unique_ptr selector = std::make_unique(true); // DepthToSpace and SpaceToDepth not included because there are no integer implementations. // https://github.com/microsoft/onnxruntime/issues/21287 From 71253a8a04be3e3f84ee6e391645a3499167b897 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Tue, 16 Jul 2024 12:20:19 -0700 Subject: [PATCH 20/32] Reformatting from lintrunner --- onnxruntime/test/optimizer/qdq_transformer_test.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 6d8041f5b0680..954a071fcdbf5 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -1361,15 +1361,15 @@ static void RunReduceExtremumDropQDQTestCase(const std::string& op_type, TEST(QDQTransformerTests, ReduceExtremumDropQDQ) { // Check that Q/DQ nodes are dropped for positive scale RunReduceExtremumDropQDQTestCase("ReduceMin", {3, 3}, 0.003f, true); - RunReduceExtremumDropQDQTestCase("ReduceMin", {3, 3}, 0.003f, true, true, 13); // Use com.microsoft QDQ ops + RunReduceExtremumDropQDQTestCase("ReduceMin", {3, 3}, 0.003f, true, true, 13); // Use com.microsoft QDQ ops RunReduceExtremumDropQDQTestCase("ReduceMax", {3, 3}, 0.003f, true); - RunReduceExtremumDropQDQTestCase("ReduceMax", {3, 3}, 0.003f, true, true, 13); // Use com.microsoft QDQ ops + RunReduceExtremumDropQDQTestCase("ReduceMax", {3, 3}, 0.003f, true, true, 13); // Use com.microsoft QDQ ops // Check that Q/DQ nodes are *not* dropped for negative scale RunReduceExtremumDropQDQTestCase("ReduceMin", {3, 3}, -0.003f, false); - RunReduceExtremumDropQDQTestCase("ReduceMin", {3, 3}, -0.003f, false, true, 13); // Use com.microsoft QDQ ops + RunReduceExtremumDropQDQTestCase("ReduceMin", {3, 3}, -0.003f, false, true, 13); // Use com.microsoft QDQ ops RunReduceExtremumDropQDQTestCase("ReduceMax", {3, 3}, -0.003f, false); - RunReduceExtremumDropQDQTestCase("ReduceMax", {3, 3}, -0.003f, false, true, 13); // Use com.microsoft QDQ ops + RunReduceExtremumDropQDQTestCase("ReduceMax", {3, 3}, -0.003f, false, true, 13); // Use com.microsoft QDQ ops } TEST(QDQTransformerTests, DoubleQDQ) { From 191dc150df745e1674ea3d9c1b5acccd0d767adb Mon Sep 17 00:00:00 2001 From: mcollinswisc Date: Thu, 25 Jul 2024 12:40:33 -0700 Subject: [PATCH 21/32] Fix comment grammar according to review suggestion Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com> --- .../selectors_actions/qdq_selector_action_transformer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index ad164d1abb77d..07e806b4d4940 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -52,7 +52,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr drop_action = std::make_unique(std::move(moves)); #if !defined(ORT_MINIMAL_BUILD) - // Use a separate selectors & actions for MaxPool and Resize. + // Use separate selectors & actions for MaxPool and Resize. // // They disallow 16-bit types for MaxPool and Resize: // int16 MaxPool is not supported by the ONNX specification. From 023bbafa10e429af208a2c613d869ef9ee335085 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Thu, 25 Jul 2024 12:48:06 -0700 Subject: [PATCH 22/32] Register drop_action_no_int16_nor_nonpositive_scale in minimal build Per review comment: https://github.com/microsoft/onnxruntime/pull/21182#pullrequestreview-2199920043 --- .../selectors_actions/qdq_selector_action_transformer.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index 07e806b4d4940..69d76e4225b53 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -83,6 +83,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::move(drop_action)); #else qdq_selector_action_registry.RegisterAction(drop_action_no_int16_name, std::move(drop_action_no_int16)); + qdq_selector_action_registry.RegisterAction(drop_action_no_int16_nor_nonpositive_scale_name, std::move(drop_action_no_int16_nor_nonpositive_scale)); qdq_selector_action_registry.RegisterAction(drop_action_name, std::move(drop_action)); #endif } From c287525fddcff8cee98683b4bc55ff178f36adbf Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Thu, 25 Jul 2024 12:50:05 -0700 Subject: [PATCH 23/32] Continue line to keep it under 120 chars --- .../selectors_actions/qdq_selector_action_transformer.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index 69d76e4225b53..f21e848845de4 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -83,7 +83,9 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::move(drop_action)); #else qdq_selector_action_registry.RegisterAction(drop_action_no_int16_name, std::move(drop_action_no_int16)); - qdq_selector_action_registry.RegisterAction(drop_action_no_int16_nor_nonpositive_scale_name, std::move(drop_action_no_int16_nor_nonpositive_scale)); + qdq_selector_action_registry.RegisterAction( + drop_action_no_int16_nor_nonpositive_scale_name, + std::move(drop_action_no_int16_nor_nonpositive_scale)); qdq_selector_action_registry.RegisterAction(drop_action_name, std::move(drop_action)); #endif } From e34258d6779d4ae733ca97199b6cbf2af7f7d362 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Thu, 25 Jul 2024 12:53:30 -0700 Subject: [PATCH 24/32] Change name "no nonpositive scale" to "and positive scale" To avoid double negative, per review comment: https://github.com/microsoft/onnxruntime/pull/21376#discussion_r1690768042 --- .../qdq_selector_action_transformer.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index f21e848845de4..8caec4b99fc7d 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -35,7 +35,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { // 3 nodes. DQ, target, Q. Merge into target and remove DQ and Q. const std::string drop_action_name{"drop"}; const std::string drop_action_no_int16_name{"drop_no_int16_support"}; - const std::string drop_action_no_int16_nor_nonpositive_scale_name{"drop_no_int16_support_no_nonpositive_scale"}; + const std::string drop_action_no_int16_and_positive_scale_name{"drop_no_int16_support_and_positive_scale"}; NTO::NodeLocation dq{NTO::NodeType::kInput, 0}; NTO::NodeLocation q{NTO::NodeType::kOutput, 0}; @@ -47,7 +47,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr drop_action_no_int16 = std::make_unique( std::vector(moves)); // Copy before std::move(moves) - std::unique_ptr drop_action_no_int16_nor_nonpositive_scale = std::make_unique( + std::unique_ptr drop_action_no_int16_and_positive_scale = std::make_unique( std::vector(moves)); // Copy before std::move(moves) std::unique_ptr drop_action = std::make_unique(std::move(moves)); @@ -67,10 +67,10 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::move(drop_action_no_int16)); std::unique_ptr selector_disallow_16bit_and_nonpositive_scale = std::make_unique(false, true, false); - qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_nor_nonpositive_scale_name, + qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_and_positive_scale_name, {{"MaxPool", {12}}}, std::move(selector_disallow_16bit_and_nonpositive_scale), - std::move(drop_action_no_int16_nor_nonpositive_scale)); + std::move(drop_action_no_int16_and_positive_scale)); std::unique_ptr selector = std::make_unique(true); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_name, @@ -84,8 +84,8 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { #else qdq_selector_action_registry.RegisterAction(drop_action_no_int16_name, std::move(drop_action_no_int16)); qdq_selector_action_registry.RegisterAction( - drop_action_no_int16_nor_nonpositive_scale_name, - std::move(drop_action_no_int16_nor_nonpositive_scale)); + drop_action_no_int16_and_positive_scale_name, + std::move(drop_action_no_int16_and_positive_scale)); qdq_selector_action_registry.RegisterAction(drop_action_name, std::move(drop_action)); #endif } From b0753e4344702096f515bac0feee1c594673eca6 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Thu, 25 Jul 2024 16:55:31 -0700 Subject: [PATCH 25/32] More reformatting to keep lines under 120 chars https://github.com/microsoft/onnxruntime/pull/21376#discussion_r1690769470 --- .../selectors_actions/qdq_selector_action_transformer.cc | 7 ++++--- .../qdq_transformer/selectors_actions/qdq_selectors.h | 3 ++- onnxruntime/test/optimizer/qdq_transformer_test.cc | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index b3126aa576843..1223783a044a0 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -58,15 +58,16 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { // int16 MaxPool is not supported by the ONNX specification. // int16 Resize is not supported by the ORT implementation (although allowed by ONNX). // - // And cannot eliminate the QDQ for MaxPool if the scale is not positive, as a negative scale will change the ordering - // of the elements between quantized & de-quantized values. + // And cannot eliminate the QDQ for MaxPool if the scale is not positive, as a negative + // scale will change the ordering of the elements between quantized & de-quantized values. std::unique_ptr selector_disallow_16bit = std::make_unique(false); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_name, {{"Resize", {}}}, std::move(selector_disallow_16bit), std::move(drop_action_no_int16)); - std::unique_ptr selector_disallow_16bit_and_nonpositive_scale = std::make_unique(false, true, false); + std::unique_ptr selector_disallow_16bit_and_nonpositive_scale = + std::make_unique(false, true, false); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_and_positive_scale_name, {{"MaxPool", {12}}}, std::move(selector_disallow_16bit_and_nonpositive_scale), diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h index dfe1efaf6f214..7e009da39403b 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h @@ -48,7 +48,8 @@ class NodeGroupSelector { // Zero point and scale are constant scalars and must match class DropQDQNodeGroupSelector : public NodeGroupSelector { public: - explicit DropQDQNodeGroupSelector(bool allow_16bit = true, bool allow_4bit = true, bool allow_nonpositive_scale = true) + explicit DropQDQNodeGroupSelector(bool allow_16bit = true, bool allow_4bit = true, + bool allow_nonpositive_scale = true) : allow_16bit_(allow_16bit), allow_4bit_(allow_4bit), allow_nonpositive_scale_(allow_nonpositive_scale) {} private: diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index de549118b73d5..400a51700c97b 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -787,7 +787,7 @@ void QDQTransformerGemmTests(bool has_output_q, bool has_bias, bool beta_not_one auto check_binary_op_graph = [&](InferenceSessionWrapper& session) { auto op_to_count = CountOpsInGraph(session.GetGraph()); const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); - if ((!has_output_q || std::is_same_v) && (!has_bias || (std::is_same_v && !beta_not_one)) && + if ((!has_output_q || std::is_same_v)&&(!has_bias || (std::is_same_v && !beta_not_one)) && (std::is_same_v || std::is_same_v)) { EXPECT_EQ(op_to_count["com.microsoft.QGemm"], 1); EXPECT_EQ(op_to_count["Gemm"], 0); From 610a6a939d5237080e252bef80a839a5b135cffe Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Wed, 31 Jul 2024 16:31:57 -0700 Subject: [PATCH 26/32] Undo spacing change in Gemm test Don't want it included in this PR, it's an unrelated change that I think is being produced by the auto-formatter. --- onnxruntime/test/optimizer/qdq_transformer_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 1d8ea5d38bb54..a51840b6ca3dc 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -787,7 +787,7 @@ void QDQTransformerGemmTests(bool has_output_q, bool has_bias, bool beta_not_one auto check_binary_op_graph = [&](InferenceSessionWrapper& session) { auto op_to_count = CountOpsInGraph(session.GetGraph()); const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); - if ((!has_output_q || std::is_same_v)&&(!has_bias || (std::is_same_v && !beta_not_one)) && + if ((!has_output_q || std::is_same_v) && (!has_bias || (std::is_same_v && !beta_not_one)) && (std::is_same_v || std::is_same_v)) { EXPECT_EQ(op_to_count["com.microsoft.QGemm"], 1); EXPECT_EQ(op_to_count["Gemm"], 0); From d63854b7699004294d237320a43ef1f872d77ce5 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Wed, 31 Jul 2024 16:33:41 -0700 Subject: [PATCH 27/32] Delete drop_action_no_nonpositive_scale_name It's no longer used --- .../selectors_actions/qdq_selector_action_transformer.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index f602cd9ef9459..e6c8a86192fce 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -34,7 +34,6 @@ void SplitQDQRules(SelectorActionRegistry& qdq_selector_action_registry) { void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { // 3 nodes. DQ, target, Q. Merge into target and remove DQ and Q. const std::string drop_action_name{"drop"}; - const std::string drop_action_no_nonpositive_scale_name{"drop_no_nonpositive_scale"}; const std::string drop_action_no_int16_name{"drop_no_int16_support"}; const std::string drop_action_no_int16_and_positive_scale_name{"drop_no_int16_support_and_positive_scale"}; NTO::NodeLocation dq{NTO::NodeType::kInput, 0}; From 848f701408614a6ef99be2d0bbe856bfd671f9be Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Wed, 31 Jul 2024 16:34:51 -0700 Subject: [PATCH 28/32] Alphabetize operator names included in base selector --- .../qdq_selector_action_transformer.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index e6c8a86192fce..37ae5540ec497 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -79,16 +79,16 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { // DepthToSpace and SpaceToDepth not included because there are no integer implementations. // https://github.com/microsoft/onnxruntime/issues/21287 qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_name, - {{"Gather", {}}, + {{"Expand", {}}, + {"Flatten", {}}, + {"Gather", {}}, {"GatherElements", {}}, {"Reshape", {}}, - {"Transpose", {}}, + {"Slice", {}}, {"Squeeze", {}}, - {"Unsqueeze", {}}, - {"Flatten", {}}, - {"Expand", {}}, {"Tile", {}}, - {"Slice", {}}}, + {"Transpose", {}}, + {"Unsqueeze", {}}}, std::move(selector), std::move(drop_action)); #else From b1de950ef93dbc67f7c0dda2dc3b7964fc1cb9ad Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Wed, 31 Jul 2024 16:37:03 -0700 Subject: [PATCH 29/32] Alphabetize operators in no 16-bit & positive selector --- .../selectors_actions/qdq_selector_action_transformer.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index 37ae5540ec497..7c3d6ae23cc04 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -70,8 +70,8 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::make_unique(false, true, false); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_and_positive_scale_name, {{"MaxPool", {12}}, - {"ReduceMin", {}}, - {"ReduceMax", {}}}, + {"ReduceMax", {}}, + {"ReduceMin", {}}}, std::move(selector_no_16bit_and_positive_scale), std::move(drop_action_no_int16_and_positive_scale)); From fb91e4161447b880add894ef18b3a89f2293706e Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Thu, 1 Aug 2024 09:51:49 -0700 Subject: [PATCH 30/32] Move some comments to line before To keep lines under 120 chars --- .../test/optimizer/qdq_transformer_test.cc | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index a51840b6ca3dc..146ed224f6ef6 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -1265,8 +1265,10 @@ static void RunSliceDropQDQTestCase(const std::vector& input_shape, TEST(QDQTransformerTests, SliceDropQDQ) { RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}); RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use com.microsoft QDQ ops - RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use int16 com.microsoft QDQ ops - RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use int16 com.microsoft QDQ ops + // Use int16 com.microsoft QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); + // Use int16 com.microsoft QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, false); // Use int16 ONNX QDQ ops RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, false); // Use int16 ONNX QDQ ops } @@ -1310,9 +1312,12 @@ static void RunGatherElementsDropQDQTestCase(const std::vector& input_s // Checks that Q/DQ nodes are dropped from DQ -> GatherElements -> Q. Uses 8-bit and 16-bit Q/DQ ops. TEST(QDQTransformerTests, GatherElementsDropQDQ) { RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}); - RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); // Use com.microsoft QDQ ops - RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); // Use int16 com.microsoft QDQ ops - RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); // Use int16 com.microsoft QDQ ops + // Use com.microsoft QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); + // Use int16 com.microsoft QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); + // Use int16 com.microsoft QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, false); // Use int16 ONNX QDQ ops RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, false); // Use int16 ONNX QDQ ops } From 85010fe2a4ed42e83fb9ae6bd503fa910b5a3c5a Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Mon, 5 Aug 2024 10:37:11 -0700 Subject: [PATCH 31/32] Change spacing around comments according to clang-format/lintrunner I guess these are no longer lined up anyway after moving some to previous line. --- onnxruntime/test/optimizer/qdq_transformer_test.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 146ed224f6ef6..d07977d4b97b8 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -1264,13 +1264,13 @@ static void RunSliceDropQDQTestCase(const std::vector& input_shape, // Checks that Q/DQ nodes are dropped from DQ -> Slice -> Q. Uses 8-bit and 16-bit Q/DQ ops. TEST(QDQTransformerTests, SliceDropQDQ) { RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}); - RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use com.microsoft QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use com.microsoft QDQ ops // Use int16 com.microsoft QDQ ops RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); // Use int16 com.microsoft QDQ ops RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, true, 13); - RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, false); // Use int16 ONNX QDQ ops - RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, false); // Use int16 ONNX QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, false); // Use int16 ONNX QDQ ops + RunSliceDropQDQTestCase({1, 3, 5, 5}, {0, 1, 1, 1}, {1, 3, 4, 4}, false); // Use int16 ONNX QDQ ops } // Runs a test case that checks if Q/DQ nodes are dropped from DQ -> GatherElements -> Q. @@ -1318,8 +1318,8 @@ TEST(QDQTransformerTests, GatherElementsDropQDQ) { RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); // Use int16 com.microsoft QDQ ops RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, true, 13); - RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, false); // Use int16 ONNX QDQ ops - RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, false); // Use int16 ONNX QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, false); // Use int16 ONNX QDQ ops + RunGatherElementsDropQDQTestCase({3, 3}, {2, 3}, {1, 2, 0, 2, 0, 0}, false); // Use int16 ONNX QDQ ops } // Runs a test case whether Q/DQ nodes are dropped from DQ -> Reduce(Min|Max) -> Q. From e5638e5ea9a51a727edfd804027bc5e40a5cdef5 Mon Sep 17 00:00:00 2001 From: Maxwell Collins Date: Thu, 15 Aug 2024 08:35:44 -0700 Subject: [PATCH 32/32] Set /bigobj for qdq_transformer_test.cc Seeing: fatal error C1128: number of sections exceeded object file format limit: compile with /bigobj so apparently these additional tests are pushing this file over the limit. Given there's already a statement setting /bigobj for sibling graph_transform_test, simply copy-pasting that for qdq_transformer_test --- cmake/onnxruntime_unittests.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index d5c3af748e528..b366c6d3f15c0 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -887,10 +887,12 @@ if (MSVC) target_compile_options(onnxruntime_test_all PRIVATE "$<$:SHELL:--compiler-options /wd4244>" "$<$>:/wd4244>") - # Avoid this compile error in graph_transform_test.cc: + # Avoid this compile error in graph_transform_test.cc and qdq_transformer_test.cc: # fatal error C1128: number of sections exceeded object file format limit: compile with /bigobj set_property(SOURCE "${TEST_SRC_DIR}/optimizer/graph_transform_test.cc" APPEND PROPERTY COMPILE_OPTIONS "/bigobj") + set_property(SOURCE "${TEST_SRC_DIR}/optimizer/qdq_transformer_test.cc" + APPEND PROPERTY COMPILE_OPTIONS "/bigobj") else() target_compile_options(onnxruntime_test_all PRIVATE "-Wno-parentheses") endif()