diff --git a/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py index f4bcd508960a1..3090296b774aa 100644 --- a/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py +++ b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py @@ -440,7 +440,7 @@ def quantize(self, node: NodeProto, graph_stack: list[GraphProto]) -> NodeProto: kwargs["bits"] = 4 kwargs["block_size"] = self.config.block_size if self.config.accuracy_level is not None: - kwargs["accuracy_level"] = self.accuracy_level + kwargs["accuracy_level"] = self.config.accuracy_level matmul_q4_node = onnx.helper.make_node( "MatMulNBits",