microsoft · jchen351 · Sep 9, 2024 · Sep 9, 2024 · Sep 9, 2024 · Sep 11, 2024
diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
@@ -384,6 +384,7 @@
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t, MatMulInteger);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t, MatMulInteger);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ConvInteger);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t_int8_t, ConvInteger);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t, QLinearConv);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t, QLinearConv);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, Slice);
@@ -1700,6 +1701,9 @@
       BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t,
                                                                   MatMulInteger)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ConvInteger)>,
+
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t_int8_t,
+                                                                  ConvInteger)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t,
                                                                   QLinearConv)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t,

diff --git a/onnxruntime/core/providers/cpu/quantization/conv_integer.cc b/onnxruntime/core/providers/cpu/quantization/conv_integer.cc
@@ -33,6 +33,18 @@ ONNX_OPERATOR_KERNEL_EX(
         .TypeConstraint("T3", DataTypeImpl::GetTensorType<int32_t>()),
     ConvInteger);
 
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    ConvInteger,
+    kOnnxDomain,
+    10,
+    uint8_t_int8_t,
+    kCpuExecutionProvider,
+    KernelDefBuilder()
+        .TypeConstraint("T1", DataTypeImpl::GetTensorType<uint8_t>())
+        .TypeConstraint("T2", DataTypeImpl::GetTensorType<int8_t>())
+        .TypeConstraint("T3", DataTypeImpl::GetTensorType<int32_t>()),
+    ConvInteger);
+
 Status ConvInteger::Compute(OpKernelContext* context) const {
   size_t num_inputs = OpKernel::Node().InputDefs().size();
   const auto* X = context->Input<Tensor>(0);
@@ -42,12 +54,17 @@ Status ConvInteger::Compute(OpKernelContext* context) const {
   if (num_inputs >= 3) {
     const auto* X_Zero_Point = context->Input<Tensor>(2);
     ORT_ENFORCE(IsScalarOr1ElementVector(X_Zero_Point), "Must be a scalar or 1D tensor or size 1.");
-    input_offset = *(X_Zero_Point->Data<uint8_t>());
+    input_offset = *static_cast<const uint8_t*>(X_Zero_Point->DataRaw());
   }
   if (num_inputs >= 4) {
     const auto* W_Zero_Point = context->Input<Tensor>(3);
     ORT_ENFORCE(IsScalarOr1ElementVector(W_Zero_Point), "Non per-tensor quantization is not supported now.");
-    filter_offset = *(W_Zero_Point->Data<uint8_t>());
+    if (W->IsDataType<uint8_t>())
+      filter_offset = *static_cast<const uint8_t*>(W_Zero_Point->DataRaw());
+    else if (W->IsDataType<int8_t>())
+      filter_offset = *static_cast<const int8_t*>(W_Zero_Point->DataRaw());
+    else
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported data type for W_Zero_Point.");
   }
 
   const int64_t N = X->Shape()[0];
@@ -109,9 +126,9 @@ Status ConvInteger::Compute(OpKernelContext* context) const {
 
   concurrency::ThreadPool* thread_pool = context->GetOperatorThreadPool();
 
-  const auto* Xdata = X->Data<uint8_t>();
-  const auto* Wdata = W->Data<uint8_t>();
-  auto* Ydata = Y->MutableData<int32_t>();
+  const auto* Xdata = static_cast<const uint8_t*>(X->DataRaw());
+  const auto* Wdata = static_cast<const uint8_t*>(W->DataRaw());
+  auto* Ydata = Y->template MutableData<int32_t>();
 
   for (int image_id = 0; image_id < N; ++image_id) {
     for (int group_id = 0; group_id < conv_attrs_.group; ++group_id) {
@@ -155,6 +172,7 @@ Status ConvInteger::Compute(OpKernelContext* context) const {
       gemm_shape.M = static_cast<size_t>(M / conv_attrs_.group);
       gemm_shape.N = static_cast<size_t>(output_image_size);
       gemm_shape.K = static_cast<size_t>(kernel_dim);
+      gemm_shape.AIsSigned = W->IsDataType<int8_t>();
 
       MLAS_GEMM_QUANT_DATA_PARAMS gemm_params;
       gemm_params.A = Wdata + group_id * W_offset;

diff --git a/onnxruntime/test/providers/cpu/nn/conv_integer_test.cc b/onnxruntime/test/providers/cpu/nn/conv_integer_test.cc
@@ -50,6 +50,26 @@ TEST(ConvIntegerTest, WithPadding_2D_u8u8) {
   test.Run();
 }
 
+TEST(ConvIntegerTest, WithoutPadding_2D_u8s8) {
+  OpTester test("ConvInteger", 10);
+  std::vector<int64_t> x_dims{1, 1, 3, 3};
+  test.AddInput<uint8_t>("x", x_dims,
+                         {2, 3, 4,
+                          5, 6, 7,
+                          8, 9, 10});
+  std::vector<int64_t> w_dims{1, 1, 2, 2};
+  test.AddInput<int8_t>("w", w_dims,
+                        {-9, -9,
+                         -9, -9});
+  test.AddInput<uint8_t>("x_zero_point", {}, {1});
+  test.AddInput<int8_t>("w_zero_point", {}, {-10});
+  std::vector<int64_t> y_dims{1, 1, 2, 2};
+  test.AddOutput<int32_t>("y", y_dims,
+                          {12, 16,
+                           24, 28});
+  test.Run();
+}
+
 TEST(ConvIntegerTest, WithGroup_2D_u8u8) {
   OpTester test("ConvInteger", 10);
   std::vector<int64_t> x_dims{1, 3, 3, 3};
@@ -92,6 +112,48 @@ TEST(ConvIntegerTest, WithGroup_2D_u8u8) {
   test.Run();
 }
 
+TEST(ConvIntegerTest, WithGroup_2D_u8s8) {
+  OpTester test("ConvInteger", 10);
+  std::vector<int64_t> x_dims{1, 3, 3, 3};
+  test.AddInput<uint8_t>("x", x_dims,
+                         {2, 3, 4,
+                          5, 6, 7,
+                          8, 9, 10,
+                          11, 12, 13,
+                          14, 15, 16,
+                          17, 18, 19,
+                          20, 21, 22,
+                          23, 24, 25,
+                          26, 27, 28});
+  std::vector<int64_t> w_dims{3, 1, 2, 2};
+  test.AddInput<int8_t>("w", w_dims,
+                        {-2, -1,
+                         -1, -2,
+                         0, 1,
+                         1, 0,
+                         2, 3,
+                         3, 2});
+  test.AddInput<uint8_t>("x_zero_point", {}, {1});
+  test.AddInput<int8_t>("w_zero_point", {}, {-3});
+  test.AddAttribute<std::vector<int64_t>>("pads", {1, 1, 1, 1});
+  test.AddAttribute("group", static_cast<int64_t>(3));
+  std::vector<int64_t> y_dims{1, 3, 4, 4};
+  test.AddOutput<int32_t>("y", y_dims,
+                          {1, 4, 7, 6,
+                           6, 18, 24, 15,
+                           15, 36, 42, 24,
+                           14, 23, 26, 9,
+                           30, 73, 80, 48,
+                           79, 168, 182, 96,
+                           100, 210, 224, 117,
+                           64, 116, 123, 54,
+                           95, 214, 225, 126,
+                           224, 462, 484, 249,
+                           257, 528, 550, 282,
+                           150, 281, 292, 135});
+  test.Run();
+}
+
 TEST(ConvIntegerTest, WithPadding_3D_u8u8) {
   // TODO: Unskip when fixed #41968513
   if (DefaultDmlExecutionProvider().get() != nullptr) {
@@ -140,6 +202,49 @@ TEST(ConvIntegerTest, WithPadding_3D_u8u8) {
   test.Run();
 }
 
+TEST(ConvIntegerTest, WithPadding_3D_u8s8) {
+  OpTester test("ConvInteger", 10);
+  std::vector<int64_t> x_dims{1, 1, 3, 3, 3};
+  test.AddInput<uint8_t>("x", x_dims,
+                         {2, 3, 4,
+                          5, 6, 7,
+                          8, 9, 10,
+                          11, 12, 13,
+                          14, 15, 16,
+                          17, 18, 19,
+                          20, 21, 22,
+                          23, 24, 25,
+                          26, 27, 28});
+  std::vector<int64_t> w_dims{1, 1, 2, 2, 2};
+  test.AddInput<int8_t>("w", w_dims,
+                        {-9, -9,
+                         -9, -9,
+                         -9, -9,
+                         -9, -9});
+  test.AddInput<uint8_t>("x_zero_point", {}, {1});
+  test.AddInput<int8_t>("w_zero_point", {}, {-10});
+  test.AddAttribute<std::vector<int64_t>>("pads", {1, 1, 1, 1, 1, 1});
+  std::vector<int64_t> y_dims{1, 1, 4, 4, 4};
+  test.AddOutput<int32_t>("y", y_dims,
+                          {1, 3, 5, 3,
+                           5, 12, 16, 9,
+                           11, 24, 28, 15,
+                           7, 15, 17, 9,
+                           11, 24, 28, 15,
+                           28, 60, 68, 36,
+                           40, 84, 92, 48,
+                           23, 48, 52, 27,
+                           29, 60, 64, 33,
+                           64, 132, 140, 72,
+                           76, 156, 164, 84,
+                           41, 84, 88, 45,
+                           19, 39, 41, 21,
+                           41, 84, 88, 45,
+                           47, 96, 100, 51,
+                           25, 51, 53, 27});
+  test.Run();
+}
+
 TEST(ConvIntegerTest, Pointwise_2D_u8u8) {
   OpTester test("ConvInteger", 10);
   std::vector<int64_t> x_dims{1, 1, 3, 3};
@@ -194,6 +299,36 @@ TEST(ConvIntegerTest, Pointwise_3D_u8u8) {
                            100, 104, 108});
   test.Run();
 }
+TEST(ConvIntegerTest, Pointwise_3D_u8s8) {
+  OpTester test("ConvInteger", 10);
+  std::vector<int64_t> x_dims{1, 1, 3, 3, 3};
+  test.AddInput<uint8_t>("x", x_dims,
+                         {2, 3, 4,
+                          5, 6, 7,
+                          8, 9, 10,
+                          11, 12, 13,
+                          14, 15, 16,
+                          17, 18, 19,
+                          20, 21, 22,
+                          23, 24, 25,
+                          26, 27, 28});
+  std::vector<int64_t> w_dims{1, 1, 1, 1, 1};
+  test.AddInput<int8_t>("w", w_dims, {-16});
+  test.AddInput<uint8_t>("x_zero_point", {}, {1});
+  test.AddInput<int8_t>("w_zero_point", {}, {-20});
+  std::vector<int64_t> y_dims{1, 1, 3, 3, 3};
+  test.AddOutput<int32_t>("y", y_dims,
+                          {4, 8, 12,
+                           16, 20, 24,
+                           28, 32, 36,
+                           40, 44, 48,
+                           52, 56, 60,
+                           64, 68, 72,
+                           76, 80, 84,
+                           88, 92, 96,
+                           100, 104, 108});
+  test.Run();
+}
 
 TEST(ConvIntegerTest, WithStride2_2D_u8u8) {
   OpTester test("ConvInteger", 10);
@@ -225,6 +360,36 @@ TEST(ConvIntegerTest, WithStride2_2D_u8u8) {
   test.Run();
 }
 
+TEST(ConvIntegerTest, WithStride2_2D_u8s8) {
+  OpTester test("ConvInteger", 10);
+  std::vector<int64_t> x_dims{1, 1, 7, 7};
+  test.AddInput<uint8_t>("x", x_dims,
+                         {10, 11, 12, 13, 14, 15, 16,
+                          20, 21, 22, 23, 24, 25, 26,
+                          30, 31, 32, 33, 34, 35, 36,
+                          40, 41, 42, 43, 44, 45, 46,
+                          50, 51, 52, 53, 54, 55, 56,
+                          60, 61, 62, 63, 64, 65, 66,
+                          70, 71, 72, 73, 74, 75, 76});
+  std::vector<int64_t> w_dims{1, 1, 3, 3};
+  test.AddInput<int8_t>("w", w_dims,
+                        {-2, -1, -2,
+                         -1, 0, -1,
+                         -2, -1, -2});
+  test.AddInput<uint8_t>("x_zero_point", {}, {10});
+  test.AddInput<int8_t>("w_zero_point", {}, {-3});
+  test.AddAttribute<std::vector<int64_t>>("pads", {1, 1, 1, 1});
+  test.AddAttribute<std::vector<int64_t>>("strides", {2, 2});
+  std::vector<int64_t> y_dims{1, 1, 4, 4};
+  test.AddOutput<int32_t>("y", y_dims,
+                          {33, 62, 84, 75,
+                           224, 330, 360, 282,
+                           444, 630, 660, 502,
+                           453, 642, 664, 495});
+  // Exercise the (stride_w = 2) path inside Math::Im2col.
+  test.Run();
+}
+
 TEST(ConvIntegerTest, WithStride3_2D_u8u8) {
   OpTester test("ConvInteger", 10);
   std::vector<int64_t> x_dims{1, 1, 7, 7};
@@ -254,5 +419,34 @@ TEST(ConvIntegerTest, WithStride3_2D_u8u8) {
   test.Run();
 }
 
+TEST(ConvIntegerTest, WithStride3_2D_u8s8) {
+  OpTester test("ConvInteger", 10);
+  std::vector<int64_t> x_dims{1, 1, 7, 7};
+  test.AddInput<uint8_t>("x", x_dims,
+                         {10, 11, 12, 13, 14, 15, 16,
+                          20, 21, 22, 23, 24, 25, 26,
+                          30, 31, 32, 33, 34, 35, 36,
+                          40, 41, 42, 43, 44, 45, 46,
+                          50, 51, 52, 53, 54, 55, 56,
+                          60, 61, 62, 63, 64, 65, 66,
+                          70, 71, 72, 73, 74, 75, 76});
+  std::vector<int64_t> w_dims{1, 1, 3, 3};
+  test.AddInput<int8_t>("w", w_dims,
+                        {-2, -1, -2,
+                         -1, 0, -1,
+                         -2, -1, -2});
+  test.AddInput<uint8_t>("x_zero_point", {}, {10});
+  test.AddInput<int8_t>("w_zero_point", {}, {-3});
+  test.AddAttribute<std::vector<int64_t>>("pads", {2, 2, 1, 1});
+  test.AddAttribute<std::vector<int64_t>>("strides", {3, 3});
+  std::vector<int64_t> y_dims{1, 1, 3, 3};
+  test.AddOutput<int32_t>("y", y_dims,
+                          {0, 8, 20,
+                           80, 330, 375,
+                           200, 780, 825});
+  // Exercise the (stride_w > 2) path inside Math::Im2col.
+  test.Run();
+}
+
 }  // namespace test
 }  // namespace onnxruntime