From 7f3c99ef57c6451537877bd85287d68bedcc97ea Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Sun, 8 Oct 2023 22:09:23 -0700 Subject: [PATCH] Fix Pad's quantization (#17807) Fix #17760. Upstream exporter creates empty string as Pad's 3rd input and the quantization tool 1) considers that as a valid tensor name and 2) adds corresponding invalid quantization nodes. This PR adds a condition check to make quantization tool working. --- .../tools/quantization/onnx_quantizer.py | 1 + .../tools/quantization/operators/pad.py | 14 ++- .../test/python/quantization/test_op_pad.py | 118 ++++++++++++++++++ 3 files changed, 131 insertions(+), 2 deletions(-) diff --git a/onnxruntime/python/tools/quantization/onnx_quantizer.py b/onnxruntime/python/tools/quantization/onnx_quantizer.py index 447ce72e2e17f..ab58143e9c2ab 100644 --- a/onnxruntime/python/tools/quantization/onnx_quantizer.py +++ b/onnxruntime/python/tools/quantization/onnx_quantizer.py @@ -645,6 +645,7 @@ def _get_quantize_input_nodes(self, node, input_index, qType, given_scale_name=N :return: List of newly created nodes in NodeProto format. """ input_name = node.input[input_index] + assert input_name != "", "Cannot access undefined variable in graph." output_name = input_name + TENSOR_NAME_QUANT_SUFFIX ql_node_name = input_name + "_QuantizeLinear" diff --git a/onnxruntime/python/tools/quantization/operators/pad.py b/onnxruntime/python/tools/quantization/operators/pad.py index 2d1690e545263..25818de1b76bd 100644 --- a/onnxruntime/python/tools/quantization/operators/pad.py +++ b/onnxruntime/python/tools/quantization/operators/pad.py @@ -31,7 +31,7 @@ def quantize(self): kwargs.update(kv) if "mode" not in kwargs or kwargs["mode"] == b"constant": - if len(node.input) > 2: # There is 3rd input 'constant_value' + if len(node.input) > 2 and node.input[2] != "": # There is 3rd input 'constant_value' zp_tensor = self.quantizer.model.get_initializer(quantized_input_value.zp_name) scale_tensor = self.quantizer.model.get_initializer(quantized_input_value.scale_name) if zp_tensor is None or scale_tensor is None: @@ -72,7 +72,17 @@ def quantize(self): self.quantizer.new_nodes.extend(pad_value_qnodes) node.input[2] = pad_value_qnodes[0].output[0] else: - node.input.extend([quantized_input_value.zp_name]) # pad zero_point for original zero + # In quantized format, the `zero` before quantization is mapped + # to quantized_input_value.zp_name. Thus, padding 0 to + # original tensor should become padding zero point to quantized + # tensor. + if len(node.input) == 2: + # Feed quantization's zero point to padding node. + node.input.append(quantized_input_value.zp_name) + else: + # Assign quantization's zero point to padding node. + assert node.input[2] == "" + node.input[2] = quantized_input_value.zp_name # Create an entry for output quantized value quantized_output_value = QuantizedValue( diff --git a/onnxruntime/test/python/quantization/test_op_pad.py b/onnxruntime/test/python/quantization/test_op_pad.py index c413dedbef051..005f4752c16cc 100644 --- a/onnxruntime/test/python/quantization/test_op_pad.py +++ b/onnxruntime/test/python/quantization/test_op_pad.py @@ -5,6 +5,7 @@ # license information. # -------------------------------------------------------------------------- +import itertools import unittest import numpy as np @@ -404,6 +405,123 @@ def test_static_mode_constant_value_edge_case(self): "constant", constant_value=0.1, quantize_mode="static", extra_options={"dual_feed": True} ) + @classmethod + def construct_model_add_pad_add( + cls, + # Name of model input, i.e., "input" in the illustration graph below. + name, + # Name of model output. + final_name, + # model input shape. + shape, + ): + # Graph implemented below is + # `name`, `name` -> Add -> "first_add_output" + # "first_add_output", "pads" -> Pad -> "pad_output" + # "pad_output", "pad_output" -> Add -> `final_name` + # where `name` is the 2nd argument of this function, + # `final_name` is the 3rd argument of this function, + # and the rest lowercase strings are tensor names in the graph. + + input_name = name + first_add_output_name = "first_add_output" + pads_name = "pads" + pad_output_name = "pad_output" + second_add_output_name = final_name + + input_shape = shape + input_rank = len(input_shape) + + input_tensor = helper.make_tensor_value_info(input_name, TensorProto.FLOAT, input_shape) + + first_add_node = helper.make_node( + "Add", + [input_name, input_name], + [first_add_output_name], + name="FirstAdd", + ) + + pads = [1, 2] * input_rank + pads_initializer = helper.make_tensor( + pads_name, + TensorProto.INT64, + # 1-D tensor of shape [2 * input_rank]. + [len(pads)], + pads, + ) + pad_node = helper.make_node( + "Pad", + [first_add_output_name, pads_name, ""], + [pad_output_name], + name="PadNode", + mode="constant", + ) + pad_output_shape = tuple(input_shape[i] + pads[i] + pads[i + input_rank] for i in range(input_rank)) + + second_add_node = helper.make_node( + "Add", + [pad_output_name, pad_output_name], + [second_add_output_name], + name="SecondAdd", + ) + + output_tensor = helper.make_tensor_value_info(second_add_output_name, TensorProto.FLOAT, pad_output_shape) + + graph = helper.make_graph( + [first_add_node, pad_node, second_add_node], + "TestPadWithEmptyStringInput", + [input_tensor], + [output_tensor], + initializer=[pads_initializer], + ) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + model.ir_version = 7 # use stable onnx ir version + + return model + + def test_pad_with_empty_string_input_name(self): + np.random.seed(108) + model_fp32_path = "pad_with_empty_string_input_name_fp32.onnx" + model_i8_path = "pad_with_empty_string_input_name_i8.onnx" + + shape = [ + 3, + ] + name = "input" + data_reader = self.input_feeds( + 1, + { + name: shape, + }, + ) + + model_fp32 = TestOpQuatizerPad.construct_model_add_pad_add(name=name, shape=shape, final_name="output") + + onnx.save(model_fp32, model_fp32_path) + + self.quantize_model( + model_fp32_path, + model_i8_path, + data_reader=data_reader, + ) + + model_i8 = onnx.load(model_i8_path) + + # Assert quantization really happens. + self.assertEqual(model_i8.graph.node[0].op_type, "QuantizeLinear") + self.assertEqual(model_i8.graph.node[1].op_type, "QLinearAdd") + self.assertEqual(model_i8.graph.node[2].op_type, "Pad") + self.assertEqual(model_i8.graph.node[3].op_type, "QLinearAdd") + self.assertEqual(model_i8.graph.node[4].op_type, "DequantizeLinear") + + for node in model_i8.graph.node: + # Examine no empty string flows to quantization process. + # Previously, optional input specified by `""` in NodeProto.input + # may cause phantom node to generate `"_quantized"` in quantization process. + for name in itertools.chain(node.input, node.output): + self.assertNotEqual(name, "") + self.assertNotEqual(name, "_quantized") + if __name__ == "__main__": unittest.main()