diff --git a/onnxruntime/python/tools/quantization/onnx_quantizer.py b/onnxruntime/python/tools/quantization/onnx_quantizer.py index d4c1ec4ab85e7..02301b799c07f 100644 --- a/onnxruntime/python/tools/quantization/onnx_quantizer.py +++ b/onnxruntime/python/tools/quantization/onnx_quantizer.py @@ -1272,8 +1272,12 @@ def quantize_weight_per_channel( # Update packed weight, zero point, and scale initializers zero_scale_shape = [initializer.dims[channel_axis]] - scale_initializer = onnx.helper.make_tensor(scale_name, initializer.data_type, zero_scale_shape, scale_list) - zero_initializer = onnx.helper.make_tensor(zp_name, weight_qType, zero_scale_shape, zero_point_list) + scale_initializer = onnx.helper.make_tensor( + scale_name, initializer.data_type, zero_scale_shape, np.hstack(scale_list).tolist() + ) + zero_initializer = onnx.helper.make_tensor( + zp_name, weight_qType, zero_scale_shape, np.hstack(zero_point_list).tolist() + ) self.model.initializer_extend([scale_initializer, zero_initializer]) diff --git a/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py b/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py index 100ae7d8a22d1..fb2c9b477c05c 100644 --- a/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py +++ b/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py @@ -147,7 +147,8 @@ def test_qdq_default(self): self.assertEqual(bias_zp.int32_data[0], self.default_zp_scales["BIAS"][0]) self.assertEqual(bias_zp.data_type, self.default_bias_qtype) - self.assertEqual(bias_sc.float_data[0], self.default_zp_scales["BIAS"][1]) + np_array = onnx.numpy_helper.to_array(bias_sc) + self.assertEqual(np_array[0], self.default_zp_scales["BIAS"][1]) self.assertEqual(out_zp.int32_data[0], self.default_zp_scales["OUT"][0]) self.assertEqual(out_zp.data_type, self.default_act_qtype) @@ -253,7 +254,7 @@ def test_qdq_overrides2(self): """ Test overriding rmin/rmax for Sigmoid output. """ - sigmoid_rmin, sigmoid_rmax = 0.0, 0.5 + sigmoid_rmin, sigmoid_rmax = np.array(0.0, dtype=np.float32), np.array(0.5, dtype=np.float32) inp_zp, inp_sc, sig_out_zp, sig_out_sc, _, _, _, _, _, _ = self.perform_qdq_quantization( "model_quant_overrides2.onnx", tensor_quant_overrides={"SIG_OUT": [{"rmin": sigmoid_rmin, "rmax": sigmoid_rmax}]}, @@ -462,7 +463,7 @@ def test_override_validation_bad_combination(self): }, ) - self.assertIn("option 'rmin' is invalid with 'scale' and 'zero_point'", str(context.exception)) + self.assertIn("Tensor override option 'rmax' is invalid with 'scale' and 'zero_point'", str(context.exception)) with self.assertRaises(ValueError) as context: self.perform_qdq_quantization(