From 1948c4ad45abef02fc57c641b8276aecfc3197ff Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Tue, 26 Dec 2023 01:19:19 +0100 Subject: [PATCH] fix one bug --- .../python/tools/quantization/quant_utils.py | 2 - .../python/quantization/test_quant_util.py | 46 ++++++++++++------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/onnxruntime/python/tools/quantization/quant_utils.py b/onnxruntime/python/tools/quantization/quant_utils.py index c8ace8a2b3a64..66e86e4b3839e 100644 --- a/onnxruntime/python/tools/quantization/quant_utils.py +++ b/onnxruntime/python/tools/quantization/quant_utils.py @@ -226,7 +226,6 @@ def compute_scale_zp(rmin, rmax, qmin, qmax, symmetric=False, min_real_range=Non :return: zero and scale [z, s] """ - assert rmin <= rmax, f"rmin={rmin} > rmax={rmax}" if qmin > 0 or qmax < 0: raise ValueError(f"qmin and qmax must meet requirement: qmin <= 0 <= qmax while qmin:{qmin}, qmmax:{qmax}") @@ -245,7 +244,6 @@ def compute_scale_zp(rmin, rmax, qmin, qmax, symmetric=False, min_real_range=Non rmin = -absmax rmax = +absmax - assert rmin <= rmax, f"rmin={rmin} > rmax={rmax}" assert qmin <= qmax, f"qmin={rmin} > qmax={rmax}" dr = numpy.array(rmax - rmin, dtype=numpy.float64) dq = numpy.array(qmax, dtype=numpy.float64) - numpy.array(qmin, dtype=numpy.float64) diff --git a/onnxruntime/test/python/quantization/test_quant_util.py b/onnxruntime/test/python/quantization/test_quant_util.py index 749f1a458c502..848857ceb279d 100644 --- a/onnxruntime/test/python/quantization/test_quant_util.py +++ b/onnxruntime/test/python/quantization/test_quant_util.py @@ -18,12 +18,12 @@ class TestQuantUtil(unittest.TestCase): def test_compute_scale_zp(self): - def _compute_scale_zp(rmin, rmax, qmin, qmax, symmetric=False, min_real_range=None): + def _compute_scale_zp(rmin, rmax, qmin, qmax, qtype, symmetric=False, min_real_range=None): zp, scale = compute_scale_zp( numpy.array(rmin, dtype=numpy.float32), numpy.array(rmax, dtype=numpy.float32), - qmin, - qmax, + numpy.array(qmin, dtype=qtype), + numpy.array(qmax, dtype=qtype), symmetric=symmetric, min_real_range=min_real_range, ) @@ -31,31 +31,43 @@ def _compute_scale_zp(rmin, rmax, qmin, qmax, symmetric=False, min_real_range=No assert isinstance(scale, numpy.ndarray) return [float(zp), float(scale)] - self.assertEqual(_compute_scale_zp(0.0, 0.0, -127, 127, symmetric=True), [0, 1.0]) - self.assertEqual(_compute_scale_zp(1.0, -1.0, -127, 127, symmetric=True), [0, 1.0]) - self.assertEqual(_compute_scale_zp(0.0, 0.0, 0, 255, symmetric=True), [0, 1.0]) - self.assertEqual(_compute_scale_zp(1.0, -1.0, 0, 255, symmetric=True), [0, 1.0]) + self.assertEqual(_compute_scale_zp(0.0, 0.0, -127, 127, numpy.int8, symmetric=True), [0, 1.0]) + self.assertEqual(_compute_scale_zp(1.0, -1.0, -127, 127, numpy.int8, symmetric=True), [0, 1.0]) + self.assertEqual(_compute_scale_zp(0.0, 0.0, 0, 255, numpy.uint8, symmetric=True), [0, 1.0]) + self.assertEqual(_compute_scale_zp(1.0, -1.0, 0, 255, numpy.uint8, symmetric=True), [0, 1.0]) - self.assertEqual(_compute_scale_zp(-1.0, 2.0, -127, 127, symmetric=True), [0, numpy.float32(2.0 / 127)]) - self.assertEqual(_compute_scale_zp(-1.0, 2.0, -127, 127, symmetric=False), [-42, numpy.float32(3.0 / 254)]) + self.assertEqual( + _compute_scale_zp(-1.0, 2.0, -127, 127, numpy.int8, symmetric=True), [0, numpy.float32(2.0 / 127)] + ) + self.assertEqual( + _compute_scale_zp(-1.0, 2.0, -127, 127, numpy.int8, symmetric=False), [-42, numpy.float32(3.0 / 254)] + ) - self.assertEqual(_compute_scale_zp(-1.0, 2.0, 0, 255, symmetric=True), [128, numpy.float32(4.0 / 255)]) - self.assertEqual(_compute_scale_zp(-1.0, 2.0, 0, 255, symmetric=False), [85, numpy.float32(3.0 / 255)]) + self.assertEqual( + _compute_scale_zp(-1.0, 2.0, 0, 255, numpy.uint8, symmetric=True), [128, numpy.float32(4.0 / 255)] + ) + self.assertEqual( + _compute_scale_zp(-1.0, 2.0, 0, 255, numpy.uint8, symmetric=False), [85, numpy.float32(3.0 / 255)] + ) tiny_float = numpy.float32(numpy.finfo(numpy.float32).tiny * 0.1) - self.assertEqual(_compute_scale_zp(-tiny_float, tiny_float, 0, 255, symmetric=True), [0, 1.0]) - self.assertEqual(_compute_scale_zp(-tiny_float, 0.0, 0, 255, symmetric=False), [0, 1.0]) + self.assertEqual(_compute_scale_zp(-tiny_float, tiny_float, 0, 255, numpy.uint8, symmetric=True), [0, 1.0]) + self.assertEqual(_compute_scale_zp(-tiny_float, 0.0, 0, 255, numpy.uint8, symmetric=False), [0, 1.0]) # Test enforcing a minimum floatint-point range. - self.assertEqual(_compute_scale_zp(0.0, 0.0, 0, 255, symmetric=False, min_real_range=0.0001), [0, 0.0001 / 255]) self.assertEqual( - _compute_scale_zp(0.0, 0.0, -128, 127, symmetric=True, min_real_range=0.0001), [0, 0.0002 / 255] + _compute_scale_zp(0.0, 0.0, 0, 255, numpy.uint8, symmetric=False, min_real_range=0.0001), [0, 0.0001 / 255] + ) + self.assertEqual( + _compute_scale_zp(0.0, 0.0, -128, 127, numpy.int8, symmetric=True, min_real_range=0.0001), [0, 0.0002 / 255] ) self.assertEqual( - _compute_scale_zp(0.0, 0.0, 0, 65535, symmetric=False, min_real_range=0.0001), [0, 0.0001 / 65535] + _compute_scale_zp(0.0, 0.0, 0, 65535, numpy.uint16, symmetric=False, min_real_range=0.0001), + [0, 0.0001 / 65535], ) self.assertEqual( - _compute_scale_zp(0.0, 0.0, -32768, 32767, symmetric=True, min_real_range=0.0001), [0, 0.0002 / 65535] + _compute_scale_zp(0.0, 0.0, -32768, 32767, numpy.int16, symmetric=True, min_real_range=0.0001), + [0, 0.0002 / 65535], ) def test_load_external_model(self):