diff --git a/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py index 40a4a4d26dc1c..cc8bd622df9b1 100644 --- a/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py +++ b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py @@ -797,8 +797,8 @@ def parse_args(): parser.add_argument( "--quant_format", default="QOperator", - type=QuantFormat, - choices=list(QuantFormat), + type=str, + choices=["QOperator", "QDQ"], help="QuantFormat {QOperator, QDQ}" "QOperator format quantizes the model with quantized operators directly." "QDQ format quantize the model by inserting DeQuantizeLinear before the MatMul.", @@ -814,7 +814,7 @@ def parse_args(): input_model_path = args.input_model output_model_path = args.output_model - quant_format = args.quant_format + quant_format = QuantFormat[args.quant_format] if os.path.exists(output_model_path): logger.error(f"file {output_model_path} already exists")