diff --git a/mgeconvert/backend/ir_to_caffe/caffe_converter.py b/mgeconvert/backend/ir_to_caffe/caffe_converter.py index 99f6773..5754bd5 100644 --- a/mgeconvert/backend/ir_to_caffe/caffe_converter.py +++ b/mgeconvert/backend/ir_to_caffe/caffe_converter.py @@ -131,8 +131,10 @@ def __init__( self.quantizer = quantizer self.convert_backend = convert_backend - def update_quantize_dict(self, tensor): - self.quantizer.parse_quant_info(tensor) + def update_quantize_dict(self, tensor, name=None): + if tensor.q_dtype is not None: + tname = name if name is not None else tensor.name + self.quantizer.set_quant_info(tname, tensor) def dump(self, proto_file, caffe_file=None): CaffeNet = cp.NetParameter(layer=self.layers) diff --git a/mgeconvert/backend/ir_to_caffe/caffe_op.py b/mgeconvert/backend/ir_to_caffe/caffe_op.py index 1945b3c..23644dc 100644 --- a/mgeconvert/backend/ir_to_caffe/caffe_op.py +++ b/mgeconvert/backend/ir_to_caffe/caffe_op.py @@ -858,6 +858,7 @@ def _reduce(opr, context): param = cp.ReshapeParameter(shape=cp.BlobShape(dim=opr.out_tensors[0].shape)) bottom = top name = opr.out_tensors[0].name + context.gen_name + context.update_quantize_dict(opr.out_tensors[0], name=name) top = [context.reset_blob_name(opr.out_tensors[0], name)] context.add_layer( cp.LayerParameter( @@ -882,6 +883,8 @@ def _reduce(opr, context): ), ) ) + for tname in top: + context.update_quantize_dict(opr.inp_tensors[0], name=tname) bottom = top top = [context.set_blob_name(opr.out_tensors[0], opr.out_tensors[0].name)] context.add_layer( @@ -900,6 +903,7 @@ def _reduce(opr, context): ) bottom = top name = opr.out_tensors[0].name + context.gen_name + context.update_quantize_dict(opr.out_tensors[0], name=name) top = [context.reset_blob_name(opr.out_tensors[0], name)] context.add_layer( cp.LayerParameter( @@ -1143,15 +1147,8 @@ def silu(opr, context): inp = opr.inp_tensors[0] sigmoid_op = SigmoidOpr() sigmoid_op.add_inp_tensors(inp) - fake_sigmoid_out = IRTensor( - inp.name + "_sigmoid_out", - inp.shape, - inp.dtype, - scale=inp.scale, - zero_point=inp.zero_point, - q_type=inp.q_dtype, - ) - context.update_quantize_dict(fake_sigmoid_out) + assert inp.scale is None, "Can not convert quantized silu" + fake_sigmoid_out = IRTensor(inp.name + "_sigmoid_out", inp.shape, inp.dtype,) sigmoid_op.add_out_tensors(fake_sigmoid_out) context.add_layer(_gen_layer(sigmoid_op, sigmoid_op.name, context)) mul_op = MulOpr() @@ -1265,10 +1262,8 @@ def _fake_repeat(opr, context): opr.inp_tensors[0].name + "_unsqueeze", unsqueeze_shape, opr.inp_tensors[0].dtype, - q_type=opr.inp_tensors[0].q_dtype, - scale=opr.inp_tensors[0].scale, - zero_point=opr.inp_tensors[0].zero_point, ) + fake_unsqueeze_out.set_qparams_from_other_tensor(opr.inp_tensors[0]) context.update_quantize_dict(fake_unsqueeze_out) param = cp.ReshapeParameter(shape=cp.BlobShape(dim=unsqueeze_shape)) bottom = [context.get_blob_name(opr.inp_tensors[0])] @@ -1288,10 +1283,8 @@ def _fake_repeat(opr, context): opr.inp_tensors[0].name + "_unsqueeze_tile", unsqueeze_shape, opr.inp_tensors[0].dtype, - q_type=opr.inp_tensors[0].q_dtype, - scale=opr.inp_tensors[0].scale, - zero_point=opr.inp_tensors[0].zero_point, ) + fake_tile.set_qparams_from_other_tensor(opr.inp_tensors[0]) context.update_quantize_dict(fake_tile) bottom = top top = [context.set_blob_name(fake_tile, fake_tile.name)] diff --git a/mgeconvert/backend/ir_to_onnx/onnx_op.py b/mgeconvert/backend/ir_to_onnx/onnx_op.py index 4cad216..6d4b269 100644 --- a/mgeconvert/backend/ir_to_onnx/onnx_op.py +++ b/mgeconvert/backend/ir_to_onnx/onnx_op.py @@ -61,7 +61,6 @@ TrueDivOpr, TypeCvtOpr, ) -from ...converter_ir.ir_tensor import IRTensor from ...frontend.mge_to_ir.mge_utils import get_symvar_value mge2onnx_dtype_mapping = { @@ -157,17 +156,9 @@ def _parse_out_tensor_quant_info(self): for out in self._opr.out_tensors: self.quantizer.parse_quant_info(out) - def _parse_fake_tensor_info(self, fake_name, relate_tensor, shape=None, dtype=None): - if self.quantizer is not None: - fake_tensor = IRTensor( - name=fake_name, - shape=shape if shape is not None else relate_tensor.shape, - dtype=dtype if dtype is not None else relate_tensor.dtype, - scale=relate_tensor.scale, - zero_point=relate_tensor.zero_point, - q_type=relate_tensor.q_dtype, - ) - self.quantizer.parse_quant_info(fake_tensor) + def _parse_fake_tensor_info(self, fake_name, relate_tensor): + if self.quantizer is not None and relate_tensor.q_dtype is not None: + self.quantizer.set_quant_info(fake_name, relate_tensor) def _get_inputs(self, exclude_idx=None): """ @@ -294,7 +285,7 @@ def convert(self): const_1, mge2onnx_dtype_mapping[opr.inp_tensors[0].dtype], [], [1.0] ), ) - self._parse_fake_tensor_info(const_1, opr.inp_tensors[0], shape=()) + self._parse_fake_tensor_info(const_1, opr.inp_tensors[0]) nodes.append(const_1_node) add = exp + "_add_const_1" add_node = onnx.helper.make_node("Add", [exp, const_1], [add]) @@ -416,9 +407,7 @@ def convert(self): self._net_sources.extend(slice_net_sources) nodes.append(slice_op) if len(squeeze_axis) > 0: - self._parse_fake_tensor_info( - slice_outputs[0], opr.inp_tensors[0], opr.out_tensors[0].shape - ) + self._parse_fake_tensor_info(slice_outputs[0], opr.inp_tensors[0]) Squeeze = onnx.helper.make_node( "Squeeze", slice_outputs, outputs, axes=squeeze_axis ) @@ -645,14 +634,7 @@ def convert(self): grad_out_tile = onnx.helper.make_node( "Tile", [inputs[1], grad_out_tile_in], [grad_out_tile_out] ) - grad_tiled_shape = _infer_shape_by_numpy( - self._opr.inp_tensors[1].shape, - np.tile, - [1, opr.src_shape[1] // opr.group, 1, 1], - ) - self._parse_fake_tensor_info( - grad_out_tile_out, self._opr.inp_tensors[1], grad_tiled_shape - ) + self._parse_fake_tensor_info(grad_out_tile_out, self._opr.inp_tensors[1]) nodes.append(grad_out_tile) # Reshape @@ -683,18 +665,7 @@ def convert(self): ) nodes.append(reshape) - grad_reshape_shape = ( - opr.grad_out_shape[0] - * opr.grad_out_shape[1] - * opr.src_shape[1] - // opr.group, - 1, - opr.grad_out_shape[2], - opr.grad_out_shape[3], - ) - self._parse_fake_tensor_info( - grad_out_reshape_out, self._opr.out_tensors[0], shape=grad_reshape_shape - ) + self._parse_fake_tensor_info(grad_out_reshape_out, self._opr.out_tensors[0]) # Reshape # src: (ni, ci, hi, wi) -> (1, ni x ci, hi, wi) @@ -721,15 +692,7 @@ def convert(self): ) nodes.append(reshape) - src_shape = ( - 1, - opr.src_shape[0] * opr.src_shape[1], - opr.src_shape[2], - opr.src_shape[3], - ) - self._parse_fake_tensor_info( - src_reshape_out, self._opr.inp_tensors[0], shape=src_shape - ) + self._parse_fake_tensor_info(src_reshape_out, self._opr.inp_tensors[0]) # Conv: # group = ni * ci # src(1, ni x ci, hi, wi) + grad_out(no x co x ci / group, 1, ho, wo) @@ -815,9 +778,7 @@ def convert(self): self._net_sources.append(grad_weight_reshape2_source) self._parameters.append(grad_weight_reshape2_param) grad_weight_reshape2_out = outputs[0] + "_grad_weight_reshape2_out" - self._parse_fake_tensor_info( - grad_weight_reshape2_out, opr.out_tensors[0], shape=opr.out_tensors[0].shape - ) + self._parse_fake_tensor_info(grad_weight_reshape2_out, opr.out_tensors[0]) reshape = onnx.helper.make_node( "Reshape", [grad_weight_reduce_out, grad_weight_reshape2_in], @@ -995,15 +956,7 @@ def convert(self): ) out_nodes.append(nodes) if len(inputs) > 1: - reduce_shape = _infer_shape_by_numpy( - self._opr.inp_tensors[0].shape, - np.sum, - axis=self._opr.axis, - keepdims=self._opr.keep_dims, - ) - self._parse_fake_tensor_info( - temp_node, self._opr.inp_tensors[0], shape=reduce_shape - ) + self._parse_fake_tensor_info(temp_node, self._opr.inp_tensors[0]) shape = inputs[1] + "_shape" shape_tensor = onnx.helper.make_tensor_value_info( shape, @@ -1053,9 +1006,7 @@ def convert(self): to=mge2onnx_dtype_mapping[np.int64], ) inputs[1] = inputs[1] + "_int64" - self._parse_fake_tensor_info( - inputs[1], self._opr.inp_tensors[1], dtype=np.int64 - ) + self._parse_fake_tensor_info(inputs[1], self._opr.inp_tensors[1]) outputs = self._get_outputs() broadcast_node = onnx.helper.make_node("Expand", inputs, outputs) return [typecvt_node, broadcast_node], self._net_sources, self._parameters @@ -1103,15 +1054,7 @@ def convert(self): flatten = onnx.helper.make_node( "Flatten", inputs=inputs, outputs=[tmp_name], axis=opr.start_axis, ) - reshape_shape = tuple( - list(opr.inp_tensors[0].shape)[: opr.start_axis] + [-1] - ) - flatten_shape = _infer_shape_by_numpy( - opr.inp_tensors[0].shape, np.reshape, reshape_shape - ) - self._parse_fake_tensor_info( - tmp_name, opr.inp_tensors[0], shape=flatten_shape - ) + self._parse_fake_tensor_info(tmp_name, opr.inp_tensors[0]) nodes.append(flatten) squeeze = onnx.helper.make_node( "Squeeze", [tmp_name], outputs, axes=[opr.start_axis] @@ -1298,12 +1241,7 @@ def convert(self): ) nodes.append(unsqueeze) - unsqueeze_shape = _infer_shape_by_numpy( - opr.inp_tensors[0].shape, np.expand_dims, axis=(opr.axis + 1) - ) - self._parse_fake_tensor_info( - unsqueeze_out, opr.inp_tensors[0], shape=unsqueeze_shape - ) + self._parse_fake_tensor_info(unsqueeze_out, opr.inp_tensors[0]) shape = list(opr.inp_tensors[0].shape) shape.insert(opr.axis, 1) @@ -1327,8 +1265,7 @@ def convert(self): "Tile", inputs=[unsqueeze_out, tile_repeats], outputs=[repeat_name], ) - tile_shape = _infer_shape_by_numpy(unsqueeze_shape, np.tile, repeat_shape) - self._parse_fake_tensor_info(repeat_name, opr.inp_tensors[0], shape=tile_shape) + self._parse_fake_tensor_info(repeat_name, opr.inp_tensors[0]) nodes.append(repeat) shape_tensor_name_after = repeat_name + "_reshape_after" diff --git a/mgeconvert/converter_ir/ir_quantizer.py b/mgeconvert/converter_ir/ir_quantizer.py index 0da6af2..77bb8a7 100644 --- a/mgeconvert/converter_ir/ir_quantizer.py +++ b/mgeconvert/converter_ir/ir_quantizer.py @@ -40,7 +40,8 @@ def quantize(self, tensor: IRTensor): if tensor.qmin is not None and tensor.qmax is not None: v_min = tensor.qmin v_max = tensor.qmax - elif np.issubdtype(dt, np.integer): + else: + assert np.issubdtype(dt, np.integer) v_min = np.iinfo(dt).min v_max = np.iinfo(dt).max value = np.clip(value, v_min, v_max) @@ -55,47 +56,70 @@ def save_quantize_params(self, irgraph): for t in all_tensors: self.parse_quant_info(t) - def parse_quant_info(self, t: IRTensor): - if t.q_dtype is None: - return + def fake_quant(self, t: IRTensor): + assert t.q_dtype is not None and t.np_data is not None + inp = megengine.tensor(t.np_data, dtype="float32") + scale = megengine.tensor([float(t.scale)]) + zp = float(t.zero_point) if t.zero_point else 0.0 + zero_point = megengine.tensor([zp]) + if t.qmin is not None and t.qmax is not None: + v_min = t.qmin + v_max = t.qmax + else: + dt = np.dtype(t.np_dtype) + assert np.issubdtype(dt, np.integer) + v_min = np.iinfo(dt).min + v_max = np.iinfo(dt).max + from megengine.core._imperative_rt.core2 import ( # pylint:disable=import-error + apply, + ) + from megengine.core.ops.builtin import FakeQuant + + return apply(FakeQuant(qmin=v_min, qmax=v_max), inp, scale, zero_point)[ + 0 + ].numpy() + + def get_quant_info(self, t: IRTensor): + assert t.q_dtype is not None + assert isinstance(t.q_dtype, str) np_dtype = t.np_dtype + q_dtype = t.q_dtype[1:] if t.q_dtype[0] == "q" else t.q_dtype try: dt = np.dtype(np_dtype) except TypeError: dt = None v_max, v_min = None, None - is_weight = bool(t.np_data is not None) + is_weight = t.np_data is not None if t.qmin is not None and t.qmax is not None: v_min = t.qmin v_max = t.qmax elif dt is not None and np.issubdtype(dt, np.integer): v_min = np.iinfo(dt).min v_max = np.iinfo(dt).max - if self.param_fake_quant and is_weight: - if t.scale is not None: - inp = megengine.tensor(t.np_data) - scale = megengine.tensor(t.scale) - zp = float(t.zero_point) if t.zero_point else 0.0 - zero_point = megengine.tensor(zp) - from megengine.core._imperative_rt.core2 import ( # pylint:disable=import-error - apply, - ) - from megengine.core.ops.builtin import FakeQuant + assert v_max is not None and v_min is not None + return { + "dtype": q_dtype, + "qmin": v_min, + "qmax": v_max, + "scale": t.scale, + "zero_point": t.zero_point, + "is_weight": is_weight, + } + + def set_quant_info(self, name, t: IRTensor): + """Set tensor named `name` the same quant info as tensor `t`. + """ + self.quant_params[name] = self.get_quant_info(t) - t.np_data = apply( - FakeQuant(qmin=v_min, qmax=v_max), inp, scale, zero_point - )[0].numpy() + def parse_quant_info(self, t: IRTensor): + if t.q_dtype is None: + return + is_weight = t.np_data is not None + if self.param_fake_quant and is_weight: + t.np_data = self.fake_quant(t) else: - param = { - "dtype": np_dtype, - "qmin": v_min, - "qmax": v_max, - "scale": t.scale, - "zero_point": t.zero_point, - "is_weight": is_weight, - } - self.quant_params[t.name] = param + self.quant_params[t.name] = self.get_quant_info(t) def dump_quant_param(self, path="quant_params.json"): if len(self.quant_params) == 0: diff --git a/mgeconvert/converter_ir/ir_transform.py b/mgeconvert/converter_ir/ir_transform.py index f4b0696..1b8a827 100644 --- a/mgeconvert/converter_ir/ir_transform.py +++ b/mgeconvert/converter_ir/ir_transform.py @@ -239,8 +239,6 @@ def _transpose_pattern_as_input(net): dtype=np.int32, np_data=np.array(op.pattern, dtype=np.int32), owner_opr=op, - q_type="int32", - np_dtype="int32", axis=None, ) op.add_inp_tensors(perm_tensor) @@ -268,8 +266,6 @@ def _pad_width_as_input(net): dtype=np.int32, np_data=padddings, owner_opr=op, - q_type="int32", - np_dtype="int32", axis=None, ) op.add_inp_tensors(pad_tensor) @@ -287,8 +283,6 @@ def _reduce_axis_as_input(net): dtype=np.int32, np_data=np.array(op.axis, dtype=np.int32), owner_opr=op, - q_type="int32", - np_dtype="int32", axis=None, ) op.add_inp_tensors(axis_tensor) @@ -332,8 +326,6 @@ def have_padding(opr): dtype=np.int32, owner_opr=None, np_data=np_data, - q_type="int32", - np_dtype="int32", axis=None, ) net.add_tensor(new_tensor_id, pad_in_tensor) @@ -393,8 +385,6 @@ def _deconv_shape_as_input(net: IRGraph): dtype=np.int32, owner_opr=op, np_data=np_data, - q_type="int32", - np_dtype="int32", axis=None, ) shape_tensor = net.get_tensor(new_tensor_id, shape_symvar) @@ -427,8 +417,6 @@ def _resize_params_as_input(net): shape=(2,), dtype=np.int32, np_data=np.array(op.out_size, dtype=np.int32), - q_type="int32", - np_dtype="int32", axis=None, ) op.add_inp_tensors(out_size_tensor) @@ -578,8 +566,6 @@ def make_input(axis, param, init_value): dtype=np.int32, np_data=np.array(ret, dtype=np.int32), owner_opr=op, # pylint:disable=cell-var-from-loop - q_type="int32", - np_dtype="int32", ) return ret