diff --git a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc index 5a6c47a8d8454..a9bed8f9a8ced 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc @@ -166,6 +166,41 @@ bool QOrDQNodeHasConstantScalarScaleAndZeroPoint( return true; } +bool IsQOrDQScalePositiveConstantScalar( + const Node& q_or_dq_node, const GetConstantInitializerFn& get_const_initializer, + const Path& model_path) { + auto q_or_dq_input_defs = q_or_dq_node.InputDefs(); + + ORT_ENFORCE(q_or_dq_input_defs.size() >= 2); + + if (!optimizer_utils::IsScalar(*q_or_dq_input_defs[InputIndex::SCALE_ID])) { + return false; + } + + const ONNX_NAMESPACE::TensorProto* q_or_dq_scale_tensor_proto = + get_const_initializer(q_or_dq_input_defs[InputIndex::SCALE_ID]->Name()); + if (nullptr == q_or_dq_scale_tensor_proto) { + return false; + } + + Initializer q_or_dq_scale(*q_or_dq_scale_tensor_proto, model_path); + + switch (q_or_dq_scale.data_type()) { + case ONNX_NAMESPACE::TensorProto::FLOAT: + return q_or_dq_scale.data()[0] > 0; + + case ONNX_NAMESPACE::TensorProto::FLOAT16: + return q_or_dq_scale.data()[0] > 0; + + case ONNX_NAMESPACE::TensorProto::BFLOAT16: + return q_or_dq_scale.data()[0] > 0; + + default: + assert(false); + return false; + } +} + #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) bool MatchQNode(const Node& node) { diff --git a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h index c5f7cd601a2f0..519cfdff2580e 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h +++ b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h @@ -64,6 +64,10 @@ bool QOrDQNodeHasConstantScalarScaleAndZeroPoint( const GetConstantInitializerFn& get_const_initializer, bool& zero_point_exists); +// Checks that the y_scale/x_scale input to the QuantizeLinear/DequantizeLinear node is a positive scalar. +bool IsQOrDQScalePositiveConstantScalar(const Node& q_or_dq_node, const GetConstantInitializerFn& get_const_initializer, + const Path& model_path); + #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) // Check Q node op type, version, and domain. bool MatchQNode(const Node& node); diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index 80ead8f8c68d6..74cd223f07375 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -35,6 +35,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { // 3 nodes. DQ, target, Q. Merge into target and remove DQ and Q. const std::string drop_action_name{"drop"}; const std::string drop_action_no_int16_name{"drop_no_int16_support"}; + const std::string drop_action_no_int16_nor_nonpositive_scale_name{"drop_no_int16_support_no_nonpositive_scale"}; NTO::NodeLocation dq{NTO::NodeType::kInput, 0}; NTO::NodeLocation q{NTO::NodeType::kOutput, 0}; @@ -46,19 +47,32 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr drop_action_no_int16 = std::make_unique( std::vector(moves)); // Copy before std::move(moves) + std::unique_ptr drop_action_no_int16_nor_nonpositive_scale = std::make_unique( + std::vector(moves)); // Copy before std::move(moves) std::unique_ptr drop_action = std::make_unique(std::move(moves)); #if !defined(ORT_MINIMAL_BUILD) - // Use a separate selector + action that disallows 16-bit types for MaxPool and Resize. + // Use a separate selectors & actions for MaxPool and Resize. + // + // They disallow 16-bit types for MaxPool and Resize: // int16 MaxPool is not supported by the ONNX specification. // int16 Resize is not supported by the ORT implementation (although allowed by ONNX). + // + // And cannot eliminate the QDQ for MaxPool if the scale is not positive, as a negative scale will change the ordering + // of the elements between quantized & de-quantized values. std::unique_ptr selector_disallow_16bit = std::make_unique(false); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_name, - {{"MaxPool", {12}}, - {"Resize", {}}}, + {{"Resize", {}}}, std::move(selector_disallow_16bit), std::move(drop_action_no_int16)); + std::unique_ptr selector_disallow_16bit_and_nonpositive_scale = ( + std::make_unique(false, true, false)); + qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_nor_nonpositive_scale_name, + {{"MaxPool", {12}}}, + std::move(selector_disallow_16bit_and_nonpositive_scale), + std::move(drop_action_no_int16_nor_nonpositive_scale)); + std::unique_ptr selector = std::make_unique(true); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_name, {{"Gather", {}}, diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc index 09705f61c82ce..cdf5eb2b3f1c2 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc @@ -150,6 +150,13 @@ bool DropQDQNodeGroupSelector::Check(const GraphViewer& graph_viewer, return graph_viewer.GetConstantInitializer(initializer_name, true); }; + if (!allow_nonpositive_scale_) { + // IsQDQPairSupported will check that the scale is the same between q_node and dq_node. + if (!IsQOrDQScalePositiveConstantScalar(q_node, get_const_initializer, graph_viewer.ModelPath())) { + return false; + } + } + return IsQDQPairSupported(q_node, dq_node, get_const_initializer, graph_viewer.ModelPath()); } diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h index 1a2a620acb480..7ef18d292e4ae 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h @@ -48,8 +48,8 @@ class NodeGroupSelector { // Zero point and scale are constant scalars and must match class DropQDQNodeGroupSelector : public NodeGroupSelector { public: - explicit DropQDQNodeGroupSelector(bool allow_16bit = true, bool allow_4bit = true) - : allow_16bit_(allow_16bit), allow_4bit_(allow_4bit) {} + explicit DropQDQNodeGroupSelector(bool allow_16bit = true, bool allow_4bit = true, bool allow_nonpositive_scale = true) + : allow_16bit_(allow_16bit), allow_4bit_(allow_4bit), allow_nonpositive_scale_(allow_nonpositive_scale) {} private: bool Check(const GraphViewer& graph_viewer, const Node& node, @@ -58,6 +58,7 @@ class DropQDQNodeGroupSelector : public NodeGroupSelector { bool allow_16bit_; bool allow_4bit_; + bool allow_nonpositive_scale_; }; // Single DQ -> node. @@ -292,8 +293,8 @@ class BaseSelector : public NodeSelector { class DropQDQNodesSelector : public BaseSelector { public: - explicit DropQDQNodesSelector(bool allow_16bit = false, bool allow_4bit = false) - : BaseSelector(std::make_unique(allow_16bit, allow_4bit)) {} + explicit DropQDQNodesSelector(bool allow_16bit = false, bool allow_4bit = false, bool allow_nonpositive_scale = true) + : BaseSelector(std::make_unique(allow_16bit, allow_4bit, allow_nonpositive_scale)) {} }; class DropDQNodesSelector : public BaseSelector {