fix test failures in training pipeline

microsoft · Oct 9, 2023 · 1a8f99e · 1a8f99e
1 parent 64f5aaf
commit 1a8f99e
Show file tree

Hide file tree

Showing 5 changed files with 23 additions and 9 deletions.
diff --git a/onnxruntime/__init__.py b/onnxruntime/__init__.py
@@ -43,7 +43,6 @@
     from onnxruntime.capi._pybind_state import get_device  # noqa: F401
     from onnxruntime.capi._pybind_state import get_version_string  # noqa: F401
     from onnxruntime.capi._pybind_state import has_collective_ops  # noqa: F401
-    from onnxruntime.capi._pybind_state import quantize_matmul_4bits  # noqa: F401
     from onnxruntime.capi._pybind_state import set_default_logger_severity  # noqa: F401
     from onnxruntime.capi._pybind_state import set_default_logger_verbosity  # noqa: F401
     from onnxruntime.capi._pybind_state import set_seed  # noqa: F401

diff --git a/onnxruntime/contrib_ops/cpu/quantization/dequantize_blockwise.h b/onnxruntime/contrib_ops/cpu/quantization/dequantize_blockwise.h
@@ -7,6 +7,7 @@
 
 #include <vector>
 
+#include "core/common/safeint.h"
 #include "core/framework/float16.h"
 #include "core/platform/threadpool.h"
 #include <iostream>
@@ -45,10 +46,11 @@ void QuantizeBlockwise(
         int32_t k_block_idx = static_cast<int32_t>(block_idx % block_per_K);
         int32_t k = k_block_idx * block_size;
         BlockwiseQuantBlock<T, block_size, bits>* blob_ptr = dst_blob + block_idx;
+        size_t offset = SafeInt<size_t>(k) * N + n;
         if (nullptr != zero_points_tmp_ptr) {
-          blob_ptr->quant(src + k * N + n, scale[block_idx], zero_points_tmp_ptr[block_idx], k, K, N);
+          blob_ptr->quant(src + offset, scale[block_idx], zero_points_tmp_ptr[block_idx], k, K, N);
         } else {
-          blob_ptr->quant(src + k * N + n, scale[block_idx], k, K, N);
+          blob_ptr->quant(src + offset, scale[block_idx], k, K, N);
         }
       },
       0);
@@ -119,17 +121,19 @@ void DequantizeBlockwise(
         int32_t k_block_idx = static_cast<int32_t>(task_idx % block_per_K);
         int32_t k = k_block_idx * block_size;
         const BlockwiseQuantBlock<T, block_size, bits>* blob_ptr = src_blob + task_idx;
+        size_t offset = SafeInt<size_t>(n) * K + k;
         if (nullptr != zero_points) {
           // if bits >= 4
           if constexpr (bits > 4) {  // zero point is stored with a byte
-            blob_ptr->dequant(dst + n * K + k, scale[task_idx], zero_points[task_idx], k, K);
+            blob_ptr->dequant(dst + offset, scale[task_idx], zero_points[task_idx], k, K);
           } else {  // zero points is stored with 4bits
             uint8_t zp = zero_points[task_idx / 2];
             zp = (task_idx & 1) ? (zp >> 4) : (zp & 0xf);
-            blob_ptr->dequant(dst + n * K + k, scale[task_idx], zp, k, K);
+            blob_ptr->dequant(dst + offset, scale[task_idx], zp, k, K);
           }
-        } else {
-          blob_ptr->dequant(dst + n * K + k, scale[task_idx], k, K);
+        }
+        else {
+          blob_ptr->dequant(dst + offset, scale[task_idx], k, K);
         }
       },
       0);

diff --git a/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py
@@ -14,7 +14,7 @@
 import onnx
 from onnx.onnx_pb import GraphProto, ModelProto, NodeProto, TensorProto
 
-import onnxruntime as ort
+from onnxruntime.capi._pybind_state import quantize_matmul_4bits
 
 from .onnx_model import ONNXModel
 from .quant_utils import attribute_to_kwarg
@@ -62,7 +62,7 @@ def int4_block_quant(self, fp32weight: npt.ArrayLike) -> np.ndarray:
         packed = np.zeros((cols, k_blocks, blob_size), dtype="uint8")
         scales = np.zeros((cols * k_blocks), dtype=fp32weight.dtype)
         zero_point = np.zeros((cols * k_blocks + 1) // 2, dtype="uint8")
-        ort.quantize_matmul_4bits(packed, fp32weight, scales, zero_point, block_size, cols, rows, self.is_symmetric)
+        quantize_matmul_4bits(packed, fp32weight, scales, zero_point, block_size, cols, rows, self.is_symmetric)
 
         return (packed, scales, zero_point)
 

diff --git a/onnxruntime/test/python/quantization/test_op_matmul_4bits.py b/onnxruntime/test/python/quantization/test_op_matmul_4bits.py
@@ -7,6 +7,7 @@
 
 import tempfile
 import unittest
+from importlib.util import find_spec
 from pathlib import Path
 from typing import Dict, Tuple, Union
 
@@ -136,6 +137,9 @@ def quant_test(
             else:
                 raise exception
 
+    @unittest.skipIf(
+        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
+    )
     def test_quantize_matmul_int4_symmetric(self):
         np.random.seed(13)
 
@@ -144,6 +148,9 @@ def test_quantize_matmul_int4_symmetric(self):
         data_reader = self.input_feeds(1, {"input": [100, 52]})
         self.quant_test(model_fp32_path, data_reader, 32, True)
 
+    @unittest.skipIf(
+        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
+    )
     def test_quantize_matmul_int4_offsets(self):
         model_fp32_path = str(Path(self._tmp_model_dir.name).joinpath("matmul_fp32_offset.onnx").absolute())
         self.construct_model_matmul(model_fp32_path, symmetric=False)

diff --git a/onnxruntime/test/python/quantization/test_quantizeblockwise_4bits.py b/onnxruntime/test/python/quantization/test_quantizeblockwise_4bits.py
@@ -6,6 +6,7 @@
 # --------------------------------------------------------------------------
 
 import unittest
+from importlib.util import find_spec
 
 import numpy as np
 import numpy.typing as npt
@@ -96,6 +97,9 @@ def quantize_blockwise_4bits_target(matrix_float: npt.ArrayLike, block_size: int
 
 
 class TestQuantizeBlockwise4Bits(unittest.TestCase):
+    @unittest.skipIf(
+        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
+    )
     def test_quantize_blockwise_4bits(self):
         for rows, cols in [(128, 128), (32, 128), (128, 32), (52, 128), (128, 52), (73, 123)]:
             for block_size in [16, 32, 64, 128]: