Skip to content

Commit

Permalink
refactor(ir_quantizer): refactor ir quantizer and fix missing quant i…
Browse files Browse the repository at this point in the history
…nfo in caffe reduce opr
  • Loading branch information
dingshaohua960303 committed Jun 6, 2022
1 parent 3603e18 commit 9e96da5
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 135 deletions.
6 changes: 4 additions & 2 deletions mgeconvert/backend/ir_to_caffe/caffe_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,10 @@ def __init__(
self.quantizer = quantizer
self.convert_backend = convert_backend

def update_quantize_dict(self, tensor):
self.quantizer.parse_quant_info(tensor)
def update_quantize_dict(self, tensor, name=None):
if tensor.q_dtype is not None:
tname = name if name is not None else tensor.name
self.quantizer.set_quant_info(tname, tensor)

def dump(self, proto_file, caffe_file=None):
CaffeNet = cp.NetParameter(layer=self.layers)
Expand Down
23 changes: 8 additions & 15 deletions mgeconvert/backend/ir_to_caffe/caffe_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,7 @@ def _reduce(opr, context):
param = cp.ReshapeParameter(shape=cp.BlobShape(dim=opr.out_tensors[0].shape))
bottom = top
name = opr.out_tensors[0].name + context.gen_name
context.update_quantize_dict(opr.out_tensors[0], name=name)
top = [context.reset_blob_name(opr.out_tensors[0], name)]
context.add_layer(
cp.LayerParameter(
Expand All @@ -882,6 +883,8 @@ def _reduce(opr, context):
),
)
)
for tname in top:
context.update_quantize_dict(opr.inp_tensors[0], name=tname)
bottom = top
top = [context.set_blob_name(opr.out_tensors[0], opr.out_tensors[0].name)]
context.add_layer(
Expand All @@ -900,6 +903,7 @@ def _reduce(opr, context):
)
bottom = top
name = opr.out_tensors[0].name + context.gen_name
context.update_quantize_dict(opr.out_tensors[0], name=name)
top = [context.reset_blob_name(opr.out_tensors[0], name)]
context.add_layer(
cp.LayerParameter(
Expand Down Expand Up @@ -1143,15 +1147,8 @@ def silu(opr, context):
inp = opr.inp_tensors[0]
sigmoid_op = SigmoidOpr()
sigmoid_op.add_inp_tensors(inp)
fake_sigmoid_out = IRTensor(
inp.name + "_sigmoid_out",
inp.shape,
inp.dtype,
scale=inp.scale,
zero_point=inp.zero_point,
q_type=inp.q_dtype,
)
context.update_quantize_dict(fake_sigmoid_out)
assert inp.scale is None, "Can not convert quantized silu"
fake_sigmoid_out = IRTensor(inp.name + "_sigmoid_out", inp.shape, inp.dtype,)
sigmoid_op.add_out_tensors(fake_sigmoid_out)
context.add_layer(_gen_layer(sigmoid_op, sigmoid_op.name, context))
mul_op = MulOpr()
Expand Down Expand Up @@ -1265,10 +1262,8 @@ def _fake_repeat(opr, context):
opr.inp_tensors[0].name + "_unsqueeze",
unsqueeze_shape,
opr.inp_tensors[0].dtype,
q_type=opr.inp_tensors[0].q_dtype,
scale=opr.inp_tensors[0].scale,
zero_point=opr.inp_tensors[0].zero_point,
)
fake_unsqueeze_out.set_qparams_from_other_tensor(opr.inp_tensors[0])
context.update_quantize_dict(fake_unsqueeze_out)
param = cp.ReshapeParameter(shape=cp.BlobShape(dim=unsqueeze_shape))
bottom = [context.get_blob_name(opr.inp_tensors[0])]
Expand All @@ -1288,10 +1283,8 @@ def _fake_repeat(opr, context):
opr.inp_tensors[0].name + "_unsqueeze_tile",
unsqueeze_shape,
opr.inp_tensors[0].dtype,
q_type=opr.inp_tensors[0].q_dtype,
scale=opr.inp_tensors[0].scale,
zero_point=opr.inp_tensors[0].zero_point,
)
fake_tile.set_qparams_from_other_tensor(opr.inp_tensors[0])
context.update_quantize_dict(fake_tile)
bottom = top
top = [context.set_blob_name(fake_tile, fake_tile.name)]
Expand Down
91 changes: 14 additions & 77 deletions mgeconvert/backend/ir_to_onnx/onnx_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
TrueDivOpr,
TypeCvtOpr,
)
from ...converter_ir.ir_tensor import IRTensor
from ...frontend.mge_to_ir.mge_utils import get_symvar_value

mge2onnx_dtype_mapping = {
Expand Down Expand Up @@ -157,17 +156,9 @@ def _parse_out_tensor_quant_info(self):
for out in self._opr.out_tensors:
self.quantizer.parse_quant_info(out)

def _parse_fake_tensor_info(self, fake_name, relate_tensor, shape=None, dtype=None):
if self.quantizer is not None:
fake_tensor = IRTensor(
name=fake_name,
shape=shape if shape is not None else relate_tensor.shape,
dtype=dtype if dtype is not None else relate_tensor.dtype,
scale=relate_tensor.scale,
zero_point=relate_tensor.zero_point,
q_type=relate_tensor.q_dtype,
)
self.quantizer.parse_quant_info(fake_tensor)
def _parse_fake_tensor_info(self, fake_name, relate_tensor):
if self.quantizer is not None and relate_tensor.q_dtype is not None:
self.quantizer.set_quant_info(fake_name, relate_tensor)

def _get_inputs(self, exclude_idx=None):
"""
Expand Down Expand Up @@ -294,7 +285,7 @@ def convert(self):
const_1, mge2onnx_dtype_mapping[opr.inp_tensors[0].dtype], [], [1.0]
),
)
self._parse_fake_tensor_info(const_1, opr.inp_tensors[0], shape=())
self._parse_fake_tensor_info(const_1, opr.inp_tensors[0])
nodes.append(const_1_node)
add = exp + "_add_const_1"
add_node = onnx.helper.make_node("Add", [exp, const_1], [add])
Expand Down Expand Up @@ -416,9 +407,7 @@ def convert(self):
self._net_sources.extend(slice_net_sources)
nodes.append(slice_op)
if len(squeeze_axis) > 0:
self._parse_fake_tensor_info(
slice_outputs[0], opr.inp_tensors[0], opr.out_tensors[0].shape
)
self._parse_fake_tensor_info(slice_outputs[0], opr.inp_tensors[0])
Squeeze = onnx.helper.make_node(
"Squeeze", slice_outputs, outputs, axes=squeeze_axis
)
Expand Down Expand Up @@ -645,14 +634,7 @@ def convert(self):
grad_out_tile = onnx.helper.make_node(
"Tile", [inputs[1], grad_out_tile_in], [grad_out_tile_out]
)
grad_tiled_shape = _infer_shape_by_numpy(
self._opr.inp_tensors[1].shape,
np.tile,
[1, opr.src_shape[1] // opr.group, 1, 1],
)
self._parse_fake_tensor_info(
grad_out_tile_out, self._opr.inp_tensors[1], grad_tiled_shape
)
self._parse_fake_tensor_info(grad_out_tile_out, self._opr.inp_tensors[1])
nodes.append(grad_out_tile)

# Reshape
Expand Down Expand Up @@ -683,18 +665,7 @@ def convert(self):
)
nodes.append(reshape)

grad_reshape_shape = (
opr.grad_out_shape[0]
* opr.grad_out_shape[1]
* opr.src_shape[1]
// opr.group,
1,
opr.grad_out_shape[2],
opr.grad_out_shape[3],
)
self._parse_fake_tensor_info(
grad_out_reshape_out, self._opr.out_tensors[0], shape=grad_reshape_shape
)
self._parse_fake_tensor_info(grad_out_reshape_out, self._opr.out_tensors[0])

# Reshape
# src: (ni, ci, hi, wi) -> (1, ni x ci, hi, wi)
Expand All @@ -721,15 +692,7 @@ def convert(self):
)
nodes.append(reshape)

src_shape = (
1,
opr.src_shape[0] * opr.src_shape[1],
opr.src_shape[2],
opr.src_shape[3],
)
self._parse_fake_tensor_info(
src_reshape_out, self._opr.inp_tensors[0], shape=src_shape
)
self._parse_fake_tensor_info(src_reshape_out, self._opr.inp_tensors[0])
# Conv:
# group = ni * ci
# src(1, ni x ci, hi, wi) + grad_out(no x co x ci / group, 1, ho, wo)
Expand Down Expand Up @@ -815,9 +778,7 @@ def convert(self):
self._net_sources.append(grad_weight_reshape2_source)
self._parameters.append(grad_weight_reshape2_param)
grad_weight_reshape2_out = outputs[0] + "_grad_weight_reshape2_out"
self._parse_fake_tensor_info(
grad_weight_reshape2_out, opr.out_tensors[0], shape=opr.out_tensors[0].shape
)
self._parse_fake_tensor_info(grad_weight_reshape2_out, opr.out_tensors[0])
reshape = onnx.helper.make_node(
"Reshape",
[grad_weight_reduce_out, grad_weight_reshape2_in],
Expand Down Expand Up @@ -995,15 +956,7 @@ def convert(self):
)
out_nodes.append(nodes)
if len(inputs) > 1:
reduce_shape = _infer_shape_by_numpy(
self._opr.inp_tensors[0].shape,
np.sum,
axis=self._opr.axis,
keepdims=self._opr.keep_dims,
)
self._parse_fake_tensor_info(
temp_node, self._opr.inp_tensors[0], shape=reduce_shape
)
self._parse_fake_tensor_info(temp_node, self._opr.inp_tensors[0])
shape = inputs[1] + "_shape"
shape_tensor = onnx.helper.make_tensor_value_info(
shape,
Expand Down Expand Up @@ -1053,9 +1006,7 @@ def convert(self):
to=mge2onnx_dtype_mapping[np.int64],
)
inputs[1] = inputs[1] + "_int64"
self._parse_fake_tensor_info(
inputs[1], self._opr.inp_tensors[1], dtype=np.int64
)
self._parse_fake_tensor_info(inputs[1], self._opr.inp_tensors[1])
outputs = self._get_outputs()
broadcast_node = onnx.helper.make_node("Expand", inputs, outputs)
return [typecvt_node, broadcast_node], self._net_sources, self._parameters
Expand Down Expand Up @@ -1103,15 +1054,7 @@ def convert(self):
flatten = onnx.helper.make_node(
"Flatten", inputs=inputs, outputs=[tmp_name], axis=opr.start_axis,
)
reshape_shape = tuple(
list(opr.inp_tensors[0].shape)[: opr.start_axis] + [-1]
)
flatten_shape = _infer_shape_by_numpy(
opr.inp_tensors[0].shape, np.reshape, reshape_shape
)
self._parse_fake_tensor_info(
tmp_name, opr.inp_tensors[0], shape=flatten_shape
)
self._parse_fake_tensor_info(tmp_name, opr.inp_tensors[0])
nodes.append(flatten)
squeeze = onnx.helper.make_node(
"Squeeze", [tmp_name], outputs, axes=[opr.start_axis]
Expand Down Expand Up @@ -1298,12 +1241,7 @@ def convert(self):
)
nodes.append(unsqueeze)

unsqueeze_shape = _infer_shape_by_numpy(
opr.inp_tensors[0].shape, np.expand_dims, axis=(opr.axis + 1)
)
self._parse_fake_tensor_info(
unsqueeze_out, opr.inp_tensors[0], shape=unsqueeze_shape
)
self._parse_fake_tensor_info(unsqueeze_out, opr.inp_tensors[0])

shape = list(opr.inp_tensors[0].shape)
shape.insert(opr.axis, 1)
Expand All @@ -1327,8 +1265,7 @@ def convert(self):
"Tile", inputs=[unsqueeze_out, tile_repeats], outputs=[repeat_name],
)

tile_shape = _infer_shape_by_numpy(unsqueeze_shape, np.tile, repeat_shape)
self._parse_fake_tensor_info(repeat_name, opr.inp_tensors[0], shape=tile_shape)
self._parse_fake_tensor_info(repeat_name, opr.inp_tensors[0])

nodes.append(repeat)
shape_tensor_name_after = repeat_name + "_reshape_after"
Expand Down
78 changes: 51 additions & 27 deletions mgeconvert/converter_ir/ir_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def quantize(self, tensor: IRTensor):
if tensor.qmin is not None and tensor.qmax is not None:
v_min = tensor.qmin
v_max = tensor.qmax
elif np.issubdtype(dt, np.integer):
else:
assert np.issubdtype(dt, np.integer)
v_min = np.iinfo(dt).min
v_max = np.iinfo(dt).max
value = np.clip(value, v_min, v_max)
Expand All @@ -55,47 +56,70 @@ def save_quantize_params(self, irgraph):
for t in all_tensors:
self.parse_quant_info(t)

def parse_quant_info(self, t: IRTensor):
if t.q_dtype is None:
return
def fake_quant(self, t: IRTensor):
assert t.q_dtype is not None and t.np_data is not None
inp = megengine.tensor(t.np_data, dtype="float32")
scale = megengine.tensor([float(t.scale)])
zp = float(t.zero_point) if t.zero_point else 0.0
zero_point = megengine.tensor([zp])
if t.qmin is not None and t.qmax is not None:
v_min = t.qmin
v_max = t.qmax
else:
dt = np.dtype(t.np_dtype)
assert np.issubdtype(dt, np.integer)
v_min = np.iinfo(dt).min
v_max = np.iinfo(dt).max
from megengine.core._imperative_rt.core2 import ( # pylint:disable=import-error
apply,
)
from megengine.core.ops.builtin import FakeQuant

return apply(FakeQuant(qmin=v_min, qmax=v_max), inp, scale, zero_point)[
0
].numpy()

def get_quant_info(self, t: IRTensor):
assert t.q_dtype is not None
assert isinstance(t.q_dtype, str)
np_dtype = t.np_dtype
q_dtype = t.q_dtype[1:] if t.q_dtype[0] == "q" else t.q_dtype
try:
dt = np.dtype(np_dtype)
except TypeError:
dt = None

v_max, v_min = None, None
is_weight = bool(t.np_data is not None)
is_weight = t.np_data is not None
if t.qmin is not None and t.qmax is not None:
v_min = t.qmin
v_max = t.qmax
elif dt is not None and np.issubdtype(dt, np.integer):
v_min = np.iinfo(dt).min
v_max = np.iinfo(dt).max
if self.param_fake_quant and is_weight:
if t.scale is not None:
inp = megengine.tensor(t.np_data)
scale = megengine.tensor(t.scale)
zp = float(t.zero_point) if t.zero_point else 0.0
zero_point = megengine.tensor(zp)
from megengine.core._imperative_rt.core2 import ( # pylint:disable=import-error
apply,
)
from megengine.core.ops.builtin import FakeQuant
assert v_max is not None and v_min is not None
return {
"dtype": q_dtype,
"qmin": v_min,
"qmax": v_max,
"scale": t.scale,
"zero_point": t.zero_point,
"is_weight": is_weight,
}

def set_quant_info(self, name, t: IRTensor):
"""Set tensor named `name` the same quant info as tensor `t`.
"""
self.quant_params[name] = self.get_quant_info(t)

t.np_data = apply(
FakeQuant(qmin=v_min, qmax=v_max), inp, scale, zero_point
)[0].numpy()
def parse_quant_info(self, t: IRTensor):
if t.q_dtype is None:
return
is_weight = t.np_data is not None
if self.param_fake_quant and is_weight:
t.np_data = self.fake_quant(t)
else:
param = {
"dtype": np_dtype,
"qmin": v_min,
"qmax": v_max,
"scale": t.scale,
"zero_point": t.zero_point,
"is_weight": is_weight,
}
self.quant_params[t.name] = param
self.quant_params[t.name] = self.get_quant_info(t)

def dump_quant_param(self, path="quant_params.json"):
if len(self.quant_params) == 0:
Expand Down
Loading

0 comments on commit 9e96da5

Please sign in to comment.