Skip to content

Commit

Permalink
update inc API usage
Browse files Browse the repository at this point in the history
Signed-off-by: yuwenzho <[email protected]>
  • Loading branch information
yuwenzho committed Sep 20, 2023
1 parent b232f2e commit 2f867e2
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 11 deletions.
20 changes: 15 additions & 5 deletions onnxruntime/python/tools/quantization/quantize_weight_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import importlib
from pathlib import Path
from packaging import version
from .calibrate import CalibrationDataReader
from .quant_utils import load_model_with_shape_infer

Expand Down Expand Up @@ -42,6 +43,7 @@ def __init__(
self,
group_size=32,
scheme="sym",
ratios={},

Check warning

Code scanning / lintrunner

RUFF/B006 Warning

Do not use mutable data structures for argument defaults.
See https://beta.ruff.rs/docs/rules/
use_external_data_format=False,
):
"""
Expand All @@ -63,6 +65,7 @@ def __init__(
scheme=scheme,
use_external_data_format=use_external_data_format
)
self.ratios = ratios

class GPTQWeightOnlyQuantConfig(WeightOnlyQuantConfig):
def __init__(
Expand Down Expand Up @@ -131,9 +134,9 @@ def _generate_weight_only_node_config(model, group_size, scheme):
dict: weight only quant configuration for nodes.
"""
weight_only_node_config = {}
template_config = {'weight': {"bits": 4, "group_size": group_size, "scheme": scheme}}
template_config = {"bits": 4, "group_size": group_size, "scheme": scheme}
for node in model.graph.node:
if node.op_type in ["MatMul"]: # TODO: enable Gemm op support
if node.op_type in ["MatMul"]:
weight_only_node_config[node.name] = template_config
return weight_only_node_config

Expand All @@ -156,11 +159,15 @@ def quantize_weight_only(
RuntimeError: Raise RuntimeError if neural-compressor is not correctly installed.
"""
try:
importlib.import_module("neural_compressor.adaptor.ox_utils.weight_only")
importlib.import_module("neural_compressor")
except Exception as e:
logging.error(f"{e}.")
raise RuntimeError("neural-compressor is not correctly installed. Please check your environment.") from e

Check warning

Code scanning / lintrunner

RUFF/W293 Warning

Blank line contains whitespace.
See https://beta.ruff.rs/docs/rules/
import neural_compressor
assert version.parse(neural_compressor.__version__) >= version.parse("2.3.0"), \
"Require neural-compressor >= 2.3.0 to support weight only quantization!"

Check warning

Code scanning / lintrunner

RUFF/W293 Warning

Blank line contains whitespace.
See https://beta.ruff.rs/docs/rules/
def inc_dataloader():
data_reader = copy.deepcopy(weight_only_config.calibration_data_reader)
for data in data_reader:
Expand All @@ -174,8 +181,11 @@ def inc_dataloader():
algorithm = weight_only_config.algorithm
if algorithm == "RTN":
from neural_compressor.adaptor.ox_utils.weight_only import rtn_quantize
ratios = weight_only_config.ratios

model = rtn_quantize(model=model_input,

Check warning

Code scanning / lintrunner

RUFF/W291 Warning

Trailing whitespace.
See https://beta.ruff.rs/docs/rules/
tune_cfg=weight_only_node_config)
weight_config=weight_only_node_config,
ratios=ratios)
elif algorithm == "GPTQ":
from neural_compressor.adaptor.ox_utils.weight_only import gptq_quantize
percdamp = weight_only_config.percdamp
Expand All @@ -186,7 +196,7 @@ def inc_dataloader():
dataloader = inc_dataloader()

model = gptq_quantize(model=model_input,
tune_cfg=weight_only_node_config,
weight_config=weight_only_node_config,
dataloader=dataloader,
n_samples=-1,
percdamp=percdamp,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,20 @@ def construct_model(output_model_path):
initializers = []

Check warning

Code scanning / lintrunner

RUFF/W293 Warning test

Blank line contains whitespace.
See https://beta.ruff.rs/docs/rules/
# make mul node
mul_data = np.random.normal(0, 0.1, [1, 10]).astype(np.float32)
mul_data = np.random.normal(0, 0.1, [1, 32]).astype(np.float32)
initializers.append(onnx.numpy_helper.from_array(mul_data, name="mul.data"))
mul_node = onnx.helper.make_node("Mul", ["input", "mul.data"], ["mul.output"], "Mul_0")

# make matmul node
matmul_weight = np.random.normal(0, 0.1, [10, 1]).astype(np.float32)
matmul_weight = np.random.normal(0, 0.1, [32, 1]).astype(np.float32)
initializers.append(onnx.numpy_helper.from_array(matmul_weight, name="matmul.weight"))
matmul_node = onnx.helper.make_node("MatMul",

Check warning

Code scanning / lintrunner

RUFF/W291 Warning test

Trailing whitespace.
See https://beta.ruff.rs/docs/rules/
["mul.output", "matmul.weight"],

Check warning

Code scanning / lintrunner

RUFF/W291 Warning test

Trailing whitespace.
See https://beta.ruff.rs/docs/rules/
["output"],
"MatMul_1")

# make graph
input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 10])
input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 32])
output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 1])
graph_name = "weight_only_quant_test"
graph = helper.make_graph(
Expand Down Expand Up @@ -91,7 +91,7 @@ def test_quantize_weight_only_rtn(self):
self,
self._model_fp32_path,
self._model_weight_only_path,
{"input": np.random.rand(1, 10).astype(np.float32)},
{"input": np.random.rand(1, 32).astype(np.float32)},
)

model_fp32 = ONNXModel(onnx.load(self._model_fp32_path))
Expand All @@ -108,14 +108,14 @@ def test_quantize_weight_only_gptq(self):
if not find_spec("neural_compressor"):
self.skipTest("skip test_quantize_weight_only_gptq since neural_compressor is not installed")

Check warning

Code scanning / lintrunner

RUFF/W293 Warning test

Blank line contains whitespace.
See https://beta.ruff.rs/docs/rules/
data_reader = input_feeds_neg_one_zero_one(10, {"input": [1, 10]})
data_reader = input_feeds_neg_one_zero_one(10, {"input": [1, 32]})
weight_only_config = GPTQWeightOnlyQuantConfig(data_reader)
quantize_weight_only(self._model_fp32_path, self._model_weight_only_path, weight_only_config)
check_model_correctness(
self,
self._model_fp32_path,
self._model_weight_only_path,
{"input": np.random.rand(1, 10).astype(np.float32)},
{"input": np.random.rand(1, 32).astype(np.float32)},
)

model_fp32 = ONNXModel(onnx.load(self._model_fp32_path))
Expand Down

0 comments on commit 2f867e2

Please sign in to comment.