onnx · daquexian · Apr 4, 2021 · Apr 4, 2021
diff --git a/onnxoptimizer/passes/fuse_add_bias_into_conv.h b/onnxoptimizer/passes/fuse_add_bias_into_conv.h
@@ -38,11 +38,22 @@ struct FuseAddBiasIntoConv final : public PredicateBasedPass {
   }
   static Node *makeSqueezeOrUnsqueeze(Graph &graph, std::vector<int64_t> &axes,
                                       Value *input, Node *target_node,
-                                      BuiltinSymbol k) {
+                                      BuiltinSymbol k, bool is_input_qdq) {
     assert(k == kSqueeze || k == kUnsqueeze);
     Node *squeeze = graph.create(k, 1);
-    int opset_version = getOpsetVersion(graph);
+    Node *dequant_node = nullptr;
+    Node *quant_node = nullptr;
+    // insert squeeze op before qdq
+    if (is_input_qdq) {
+      dequant_node = input->node();
+      quant_node = dequant_node->input(0)->node();
+      target_node = quant_node;
+      input = target_node->input(0);
+      dequant_node->output()->clearMetadata();
+      quant_node->output()->clearMetadata();
+    }
     squeeze->addInput(input);
+    int opset_version = getOpsetVersion(graph);
     int version_threshold = 13;
     if (opset_version < version_threshold && opset_version != 0) {
       squeeze->is_(kaxes, std::move(axes));
@@ -54,7 +65,13 @@ struct FuseAddBiasIntoConv final : public PredicateBasedPass {
       Value *tv = graph.addInitializerAndInput(t);
       squeeze->addInput(tv);
     }
+    if (is_input_qdq) {
+      quant_node->replaceInput(0, squeeze->output());
+    }
     squeeze->insertBefore(target_node);
+    if (is_input_qdq) {
+      return dequant_node;
+    }
     return squeeze;
   }
   bool runTransform(Node *n, Graph &graph,
@@ -64,16 +81,15 @@ struct FuseAddBiasIntoConv final : public PredicateBasedPass {
     destroy_current = NodeDestroyType::DestroyZero;
     auto orig_conv = n->inputs()[0];
     auto orig_bias = n->inputs()[1];
-    // check if bias is Const or in graph's initializers
-    if (orig_bias->node()->kind() != kConstant &&
-        orig_bias->node()->kind() != kParam) {
-      return false;
-    }
     // check if conv is only used by Add
     if (orig_conv->uses().size() > 1) {
       return false;
     }
     auto conv_shape = orig_conv->sizes();
+    // We need the size of bias
+    if (!orig_bias->has_sizes()) {
+      return false;
+    }
     auto bias_shape = orig_bias->sizes();
     auto weight_shape = orig_conv->node()->inputs()[1]->sizes();
     int64_t M = -1;
@@ -116,13 +132,13 @@ struct FuseAddBiasIntoConv final : public PredicateBasedPass {
       if (bias_shape.size() > 1) {
         std::vector<int64_t> axes(bias_shape.size() - 1);
         std::iota(axes.begin(), axes.end(), 0);
-        Node *squeeze = makeSqueezeOrUnsqueeze(graph, axes, conv_3rd_input,
-                                               orig_conv->node(), kSqueeze);
+        Node *squeeze = makeSqueezeOrUnsqueeze(
+            graph, axes, conv_3rd_input, orig_conv->node(), kSqueeze, false);
         conv_3rd_input = squeeze->output();
       } else if (bias_shape.size() == 0) {
         std::vector<int64_t> axes = {0};
-        Node *unsqueeze = makeSqueezeOrUnsqueeze(graph, axes, conv_3rd_input,
-                                                 orig_conv->node(), kUnsqueeze);
+        Node *unsqueeze = makeSqueezeOrUnsqueeze(
+            graph, axes, conv_3rd_input, orig_conv->node(), kUnsqueeze, false);
         conv_3rd_input = unsqueeze->output();
       }
       if (M > 1) {
@@ -150,17 +166,25 @@ struct FuseAddBiasIntoConv final : public PredicateBasedPass {
                bias_shape[1 + bias_shape.size() - static_cast<unsigned>(rank)]
                        .dim == M) {
       ONNX_ASSERT(bias_shape.size() > 1);
+      const bool is_input_qdq =
+          orig_bias->node()->kind() == Symbol("DequantizeLinear") &&
+          orig_bias->node()->input(0)->node()->kind() ==
+              Symbol("QuantizeLinear");
       if (orig_bias->node()->kind() != kParam &&
           orig_conv->node()->isBefore(orig_bias->node())) {
+        if (is_input_qdq) {
+          orig_bias->node()->input(0)->node()->moveBefore(orig_conv->node());
+        }
         orig_bias->node()->moveBefore(orig_conv->node());
       }
       std::vector<int64_t> axes(bias_shape.size());
       std::iota(axes.begin(), axes.end(), static_cast<int64_t>(0));
       axes.erase(axes.begin() +
                  (1 + bias_shape.size() - static_cast<unsigned>(rank)));
-      Node *squeeze = makeSqueezeOrUnsqueeze(graph, axes, orig_bias,
-                                             orig_conv->node(), kSqueeze);
-      orig_conv->node()->addInput(squeeze->output());
+
+      Node *new_bias = makeSqueezeOrUnsqueeze(
+          graph, axes, orig_bias, orig_conv->node(), kSqueeze, is_input_qdq);
+      orig_conv->node()->addInput(new_bias->output());
     } else {
       return false;
     }

diff --git a/onnxoptimizer/test/optimizer_test.py b/onnxoptimizer/test/optimizer_test.py
@@ -1125,6 +1125,66 @@ def test_fuse_add_bias_into_conv_squeeze_4d_bias_no_fuse(self):
         assert optimized_model.graph.node[0].op_type == 'Conv'
         assert optimized_model.graph.node[1].op_type == 'Add'
 
+    # type: () -> None
+    def test_fuse_add_bias_into_conv_with_non_constant_bias(self):
+        nodes = [helper.make_node("Conv", ["X", "Y"], ["Z"]),
+                 helper.make_node("Sin", ["A"], ["B"]),
+                 helper.make_node("Add", ["Z", "B"], ["C"])]
+        graph = helper.make_graph(
+            nodes,
+            "test",
+            [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 3, 3)),
+             helper.make_tensor_value_info(
+                 "Y", TensorProto.FLOAT, (16, 5, 3, 3)),
+             helper.make_tensor_value_info("A", TensorProto.FLOAT, (16, 1, 1))],
+            [helper.make_tensor_value_info(
+                "C", TensorProto.FLOAT, (1, 16, 1, 1))],
+            value_info=[helper.make_tensor_value_info(
+                "B", TensorProto.FLOAT, (16, 1, 1))]
+        )
+        optimized_model = self._optimized(graph, ["fuse_add_bias_into_conv"])
+
+        assert len(list(optimized_model.graph.node)) == 3
+        assert optimized_model.graph.node[0].op_type == 'Sin'
+        assert optimized_model.graph.node[1].op_type == 'Squeeze'
+        assert optimized_model.graph.node[2].op_type == 'Conv'
+        assert optimized_model.graph.output[0].name == 'C'
+
+    # type: () -> None
+    def test_fuse_add_bias_into_conv_with_quanted_bias(self):
+        nodes = [helper.make_node("Conv", ["X", "Y"], ["Z"]),
+                 helper.make_node("QuantizeLinear", ["A", "scale", "zero_point"], ["B"], axis=0),
+                 helper.make_node("DequantizeLinear", ["B", "scale", "zero_point"], ["C"], axis=0),
+                 helper.make_node("Add", ["Z", "C"], ["D"])]
+        graph = helper.make_graph(
+            nodes,
+            "test",
+            [helper.make_tensor_value_info("X", TensorProto.FLOAT, (1, 5, 3, 3)),
+             helper.make_tensor_value_info(
+                 "Y", TensorProto.FLOAT, (16, 5, 3, 3)),
+             helper.make_tensor_value_info("A", TensorProto.FLOAT, (16, 1, 1))],
+            [helper.make_tensor_value_info(
+                "D", TensorProto.FLOAT, (1, 16, 1, 1))],
+            [helper.make_tensor("scale", TensorProto.FLOAT,
+                                dims=(16,),
+                                vals=np.random.rand(16).astype(np.float32).tobytes(),
+                                raw=True),
+             helper.make_tensor("zero_point", TensorProto.INT8,
+                                             dims=(16,),
+                                             vals=np.zeros([16]).astype(np.int8).tobytes(),
+                                             raw=True)],
+            value_info=[helper.make_tensor_value_info(
+                "C", TensorProto.FLOAT, (16, 1, 1))]
+        )
+        optimized_model = self._optimized(graph, ["fuse_add_bias_into_conv"], opset_imports=[helper.make_opsetid("", 13)])
+
+        assert len(list(optimized_model.graph.node)) == 4
+        assert optimized_model.graph.node[0].op_type == 'Squeeze'
+        assert optimized_model.graph.node[1].op_type == 'QuantizeLinear'
+        assert optimized_model.graph.node[2].op_type == 'DequantizeLinear'
+        assert optimized_model.graph.node[3].op_type == 'Conv'
+        assert optimized_model.graph.output[0].name == 'D'
+
     def test_fuse_matmul_add_bias_into_gemm(self):  # type: () -> None
         matmul = helper.make_node("MatMul", ["X", "Y"], ["Z"])
         add = helper.make_node("Add", ["Z", "B"], ["A"])

diff --git a/third_party/onnx b/third_party/onnx