From ff7be37fd04fa868cc570996ab3174171abb09bf Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Fri, 23 Feb 2024 18:23:55 -0800 Subject: [PATCH 1/8] QNN quant preprocessing: ensure fused nodes have a name --- .../execution_providers/qnn/fusion_lpnorm.py | 7 ++++++- .../execution_providers/qnn/preprocess.py | 11 +++++++++++ .../python/tools/quantization/fusions/fusion.py | 15 +++++++++++++++ .../tools/quantization/fusions/fusion_gelu.py | 12 +++++++++--- .../quantization/fusions/fusion_layernorm.py | 1 + .../python/tools/quantization/onnx_model.py | 17 +++++++++++++++++ 6 files changed, 59 insertions(+), 4 deletions(-) diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/fusion_lpnorm.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/fusion_lpnorm.py index 9ebf400498e0e..fbf954febdda4 100644 --- a/onnxruntime/python/tools/quantization/execution_providers/qnn/fusion_lpnorm.py +++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/fusion_lpnorm.py @@ -122,6 +122,11 @@ def fuse( self.nodes_to_remove.extend(subgraph_nodes) fused_node = onnx.helper.make_node( - self.fused_op_type, inputs=[subgraph_input], outputs=[subgraph_output], p=2, axis=-1 + self.fused_op_type, + name=self.create_unique_node_name(), + inputs=[subgraph_input], + outputs=[subgraph_output], + p=2, + axis=-1, ) self.nodes_to_add.append(fused_node) diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/preprocess.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/preprocess.py index becbaceab184e..b1c114fe1f9fd 100644 --- a/onnxruntime/python/tools/quantization/execution_providers/qnn/preprocess.py +++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/preprocess.py @@ -44,6 +44,17 @@ def qnn_preprocess_model(model_input: Path, model_output: Path, fuse_layernorm: if fusion_layernorm.apply(): modified = True + # Make sure all nodes have a name. + unnamed_node_prefix = "qnn_preproc_node_" + available_suffix = onnx_model.get_largest_node_name_suffix(unnamed_node_prefix) + 1 + for node in onnx_model.model.graph.node: + if node.op_type != "Constant" and not node.name: + new_node_name = f"{unnamed_node_prefix}{available_suffix!s}" + available_suffix += 1 + node.name = new_node_name + modified = True + logging.warning(f"Node of type {node.op_type} does not have a name. Renamed to {new_node_name}.") + if modified: onnx_model.topological_sort() onnx.save_model(model, model_output) diff --git a/onnxruntime/python/tools/quantization/fusions/fusion.py b/onnxruntime/python/tools/quantization/fusions/fusion.py index b54b421226f1a..4bdc5c26cc946 100644 --- a/onnxruntime/python/tools/quantization/fusions/fusion.py +++ b/onnxruntime/python/tools/quantization/fusions/fusion.py @@ -24,6 +24,9 @@ def __init__(self, model: ONNXModel, fused_op_type: str, search_op_type: str): self.nodes_to_remove: list = [] self.nodes_to_add: list = [] + self._new_node_name_prefix = self.fused_op_type + "_fused_" + self.search_op_type + "_" + self._new_node_name_suffix = None # int|None used to create unique node names for the fused ops. + def fuse( self, node: onnx.NodeProto, @@ -57,6 +60,18 @@ def apply(self) -> bool: return graph_updated + def create_unique_node_name(self): + prefix = self._new_node_name_prefix + + if self._new_node_name_suffix is None: + largest_suffix: int = self.model.get_largest_node_name_suffix(prefix) + self._new_node_name_suffix = largest_suffix + 1 + + new_name = f"{prefix}{self._new_node_name_suffix!s}" + self._new_node_name_suffix += 1 + + return new_name + @staticmethod def is_safe_to_fuse_nodes( nodes_to_remove: list[onnx.NodeProto], diff --git a/onnxruntime/python/tools/quantization/fusions/fusion_gelu.py b/onnxruntime/python/tools/quantization/fusions/fusion_gelu.py index a20d6dbffd7a7..e3ca122263e80 100644 --- a/onnxruntime/python/tools/quantization/fusions/fusion_gelu.py +++ b/onnxruntime/python/tools/quantization/fusions/fusion_gelu.py @@ -112,7 +112,9 @@ def fuse_1( return False self.nodes_to_remove.extend(subgraph_nodes) - fused_node = onnx.helper.make_node("Gelu", inputs=[subgraph_input], outputs=[subgraph_output]) + fused_node = onnx.helper.make_node( + "Gelu", name=self.create_unique_node_name(), inputs=[subgraph_input], outputs=[subgraph_output] + ) fused_node.domain = "com.microsoft" self.nodes_to_add.append(fused_node) return True @@ -188,7 +190,9 @@ def fuse_2( return False self.nodes_to_remove.extend(subgraph_nodes) - fused_node = onnx.helper.make_node("Gelu", inputs=[root_node.output[0]], outputs=[mul.output[0]]) + fused_node = onnx.helper.make_node( + "Gelu", name=self.create_unique_node_name(), inputs=[root_node.output[0]], outputs=[mul.output[0]] + ) fused_node.domain = "com.microsoft" self.nodes_to_add.append(fused_node) return True @@ -263,7 +267,9 @@ def fuse_3( return False self.nodes_to_remove.extend(subgraph_nodes) - fused_node = onnx.helper.make_node("Gelu", inputs=[root_node.output[0]], outputs=[last_mul.output[0]]) + fused_node = onnx.helper.make_node( + "Gelu", name=self.create_unique_node_name(), inputs=[root_node.output[0]], outputs=[last_mul.output[0]] + ) fused_node.domain = "com.microsoft" self.nodes_to_add.append(fused_node) return True diff --git a/onnxruntime/python/tools/quantization/fusions/fusion_layernorm.py b/onnxruntime/python/tools/quantization/fusions/fusion_layernorm.py index d7fb89236d3d2..7d58c1c180822 100644 --- a/onnxruntime/python/tools/quantization/fusions/fusion_layernorm.py +++ b/onnxruntime/python/tools/quantization/fusions/fusion_layernorm.py @@ -127,6 +127,7 @@ def fuse( normalize_node = onnx.helper.make_node( "LayerNormalization", + name=self.create_unique_node_name(), inputs=[reduce_mean_node.input[0], weight_input, bias_input], outputs=[last_add_node.output[0]], ) diff --git a/onnxruntime/python/tools/quantization/onnx_model.py b/onnxruntime/python/tools/quantization/onnx_model.py index 4591c9c950e6e..46d245d353a07 100644 --- a/onnxruntime/python/tools/quantization/onnx_model.py +++ b/onnxruntime/python/tools/quantization/onnx_model.py @@ -283,6 +283,23 @@ def find_node_by_name(self, node_name, new_nodes_list, graph): node = find_by_name(node_name, graph_nodes_list) return node + def get_largest_node_name_suffix(self, node_name_prefix): + """ + Gets the largest node name (int) suffix for all node names that begin with `node_name_prefix`. + Example: for nodes my_prefix_0 and my_prefix_3, this method returns 3. + """ + suffix = -1 + + for node in self.model.graph.node: + if node.name and node.name.startswith(node_name_prefix): + try: + index = int(node.name[len(node_name_prefix) :]) + suffix = max(index, suffix) + except ValueError: + continue + + return suffix + def find_nodes_by_initializer(self, graph, initializer): """ Find all nodes with given initializer as an input. From fa3fc786d1de400a39526405c42601a3aa3f1b65 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Sun, 25 Feb 2024 23:36:26 -0800 Subject: [PATCH 2/8] Add unittest for gelu fusion --- .../tools/quantization/fusions/fusion_gelu.py | 17 +- .../test/python/quantization/test_fusions.py | 195 ++++++++++++++++++ 2 files changed, 202 insertions(+), 10 deletions(-) create mode 100644 onnxruntime/test/python/quantization/test_fusions.py diff --git a/onnxruntime/python/tools/quantization/fusions/fusion_gelu.py b/onnxruntime/python/tools/quantization/fusions/fusion_gelu.py index e3ca122263e80..42c4a11833641 100644 --- a/onnxruntime/python/tools/quantization/fusions/fusion_gelu.py +++ b/onnxruntime/python/tools/quantization/fusions/fusion_gelu.py @@ -175,11 +175,9 @@ def fuse_2( if not self.has_constant_input(sqrt_node, 2.0): return False - root_node = self.model.get_parent(div, 0, output_name_to_node) - if root_node is None: - return False + subgraph_input = div.input[0] - if root_node.output[0] not in mul.input: + if subgraph_input not in mul.input: return False subgraph_nodes = [div, erf_node, add_after_erf, mul_after_erf, mul] @@ -191,7 +189,7 @@ def fuse_2( self.nodes_to_remove.extend(subgraph_nodes) fused_node = onnx.helper.make_node( - "Gelu", name=self.create_unique_node_name(), inputs=[root_node.output[0]], outputs=[mul.output[0]] + "Gelu", name=self.create_unique_node_name(), inputs=[subgraph_input], outputs=[mul.output[0]] ) fused_node.domain = "com.microsoft" self.nodes_to_add.append(fused_node) @@ -243,9 +241,8 @@ def fuse_3( if i < 0: return False - root_node = self.model.get_parent(first_mul, 0 if i == 1 else 1, output_name_to_node) - if root_node is None: - return False + root_input_index = 1 - i + subgraph_input = first_mul.input[root_input_index] if mul_half.output[0] not in input_name_to_nodes: return False @@ -254,7 +251,7 @@ def fuse_3( return False last_mul = children[0] - if not (last_mul.input[0] == root_node.output[0] or last_mul.input[1] == root_node.output[0]): + if not (last_mul.input[0] == subgraph_input or last_mul.input[1] == subgraph_input): return False subgraph_nodes = [first_mul, erf_node, add_after_erf, mul_half, last_mul] @@ -268,7 +265,7 @@ def fuse_3( self.nodes_to_remove.extend(subgraph_nodes) fused_node = onnx.helper.make_node( - "Gelu", name=self.create_unique_node_name(), inputs=[root_node.output[0]], outputs=[last_mul.output[0]] + "Gelu", name=self.create_unique_node_name(), inputs=[subgraph_input], outputs=[last_mul.output[0]] ) fused_node.domain = "com.microsoft" self.nodes_to_add.append(fused_node) diff --git a/onnxruntime/test/python/quantization/test_fusions.py b/onnxruntime/test/python/quantization/test_fusions.py new file mode 100644 index 0000000000000..3da24724e4e32 --- /dev/null +++ b/onnxruntime/test/python/quantization/test_fusions.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +import unittest + +import math +import numpy as np +import onnx + +from onnxruntime.quantization.onnx_model import ONNXModel +from onnxruntime.quantization.fusions import FusionGelu + +class TestFusions(unittest.TestCase): + def build_erf_sequence_1_model(self): + """ + +-------Mul(0.5)---------------------+ + | | + | v + [root] --> Div -----> Erf --> Add --> Mul --> + (B=1.4142...) (1) + + """ + shape = (1, 2, 3) + root_inp = onnx.helper.make_tensor_value_info("root", onnx.TensorProto.FLOAT, shape) + output = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, shape) + one_const = onnx.numpy_helper.from_array(np.array(1.0, dtype=np.float32), "one_const") + half_const = onnx.numpy_helper.from_array(np.array(0.5, dtype=np.float32), "half_const") + root2_const = onnx.numpy_helper.from_array(np.array(math.sqrt(2.0), dtype=np.float32), "root2_const") + + mul0_node = onnx.helper.make_node("Mul", ["root", "half_const"], ["mul0_out"]) + div_node = onnx.helper.make_node("Div", ["root", "root2_const"], ["div_out"]) + erf_node = onnx.helper.make_node("Erf", ["div_out"], ["erf_out"]) + add_node = onnx.helper.make_node("Add", ["erf_out", "one_const"], ["add_out"]) + mul1_node = onnx.helper.make_node("Mul", ["add_out", "mul0_out"], ["output"]) + + graph = onnx.helper.make_graph( + [mul0_node, div_node, erf_node, add_node, mul1_node], + "elf_sequence_1", + [root_inp], + [output], + initializer=[one_const, half_const, root2_const], + ) + model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 18)]) + return ONNXModel(model) + + def build_erf_sequence_2_model(self): + """ + +------------------------------------+ + | | + | v + [root] --> Div -----> Erf --> Add --> Mul -->Mul --> + (B=1.4142...) (1) (0.5) + + """ + shape = (1, 2, 3) + root_inp = onnx.helper.make_tensor_value_info("root", onnx.TensorProto.FLOAT, shape) + output = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, shape) + one_const = onnx.numpy_helper.from_array(np.array(1.0, dtype=np.float32), "one_const") + half_const = onnx.numpy_helper.from_array(np.array(0.5, dtype=np.float32), "half_const") + root2_const = onnx.numpy_helper.from_array(np.array(math.sqrt(2.0), dtype=np.float32), "root2_const") + + div_node = onnx.helper.make_node("Div", ["root", "root2_const"], ["div_out"]) + erf_node = onnx.helper.make_node("Erf", ["div_out"], ["erf_out"]) + add_node = onnx.helper.make_node("Add", ["erf_out", "one_const"], ["add_out"]) + mul0_node = onnx.helper.make_node("Mul", ["add_out", "root"], ["mul0_out"]) + mul1_node = onnx.helper.make_node("Mul", ["mul0_out", "half_const"], ["output"]) + + graph = onnx.helper.make_graph( + [div_node, erf_node, add_node, mul0_node, mul1_node], + "elf_sequence_2", + [root_inp], + [output], + initializer=[one_const, half_const, root2_const], + ) + model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 18)]) + return ONNXModel(model) + + def build_erf_sequence_3_model(self): + """ + +------------------------------------------+ + | | + | v + [root] --> Div -----> Erf --> Add --> Mul -->Mul + (B=1.4142...) (A=1) (A=0.5) + + """ + shape = (1, 2, 3) + root_inp = onnx.helper.make_tensor_value_info("root", onnx.TensorProto.FLOAT, shape) + output = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, shape) + one_const = onnx.numpy_helper.from_array(np.array(1.0, dtype=np.float32), "one_const") + half_const = onnx.numpy_helper.from_array(np.array(0.5, dtype=np.float32), "half_const") + root2_const = onnx.numpy_helper.from_array(np.array(math.sqrt(2.0), dtype=np.float32), "root2_const") + + div_node = onnx.helper.make_node("Div", ["root", "root2_const"], ["div_out"]) + erf_node = onnx.helper.make_node("Erf", ["div_out"], ["erf_out"]) + add_node = onnx.helper.make_node("Add", ["erf_out", "one_const"], ["add_out"]) + mul0_node = onnx.helper.make_node("Mul", ["add_out", "half_const"], ["mul0_out"]) + mul1_node = onnx.helper.make_node("Mul", ["mul0_out", "root"], ["output"]) + + graph = onnx.helper.make_graph( + [div_node, erf_node, add_node, mul0_node, mul1_node], + "elf_sequence_3", + [root_inp], + [output], + initializer=[one_const, half_const, root2_const], + ) + model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 18)]) + return ONNXModel(model) + + def build_erf_sequence_4_model(self): + """ + +----------------------------------------------+ + | | + | v + [root] --> Mul -----> Erf --> Add --> Mul -->Mul + (A=0.7071067690849304) (B=1) (B=0.5) + + """ + shape = (1, 2, 3) + root_inp = onnx.helper.make_tensor_value_info("root", onnx.TensorProto.FLOAT, shape) + output = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, shape) + one_const = onnx.numpy_helper.from_array(np.array(1.0, dtype=np.float32), "one_const") + half_const = onnx.numpy_helper.from_array(np.array(0.5, dtype=np.float32), "half_const") + frac_const = onnx.numpy_helper.from_array(np.array(0.7071067690849304, dtype=np.float32), "frac_const") + + mul0_node = onnx.helper.make_node("Mul", ["root", "frac_const"], ["mul0_out"]) + erf_node = onnx.helper.make_node("Erf", ["mul0_out"], ["erf_out"]) + add_node = onnx.helper.make_node("Add", ["erf_out", "one_const"], ["add_out"]) + mul1_node = onnx.helper.make_node("Mul", ["add_out", "half_const"], ["mul1_out"]) + mul2_node = onnx.helper.make_node("Mul", ["mul1_out", "root"], ["output"]) + + graph = onnx.helper.make_graph( + [mul0_node, erf_node, add_node, mul1_node, mul2_node], + "elf_sequence_4", + [root_inp], + [output], + initializer=[one_const, half_const, frac_const], + ) + model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 18)]) + return ONNXModel(model) + + def test_fuse_erf_to_gelu_1(self): + model = self.build_erf_sequence_1_model() + fusion_gelu = FusionGelu(model) + modified = fusion_gelu.apply() + + self.assertTrue(modified) + self.assertEqual(len(model.model.graph.node), 1) + + gelu_node = model.model.graph.node[0] + self.assertEqual(gelu_node.op_type, "Gelu") + self.assertTrue(gelu_node.name) + + def test_fuse_erf_to_gelu_2(self): + model = self.build_erf_sequence_2_model() + fusion_gelu = FusionGelu(model) + modified = fusion_gelu.apply() + + self.assertTrue(modified) + self.assertEqual(len(model.model.graph.node), 1) + + gelu_node = model.model.graph.node[0] + self.assertEqual(gelu_node.op_type, "Gelu") + self.assertTrue(gelu_node.name) + + def test_fuse_erf_to_gelu_3(self): + model = self.build_erf_sequence_3_model() + fusion_gelu = FusionGelu(model) + modified = fusion_gelu.apply() + + self.assertTrue(modified) + self.assertEqual(len(model.model.graph.node), 1) + + gelu_node = model.model.graph.node[0] + self.assertEqual(gelu_node.op_type, "Gelu") + self.assertTrue(gelu_node.name) + + def test_fuse_erf_to_gelu_4(self): + model = self.build_erf_sequence_4_model() + fusion_gelu = FusionGelu(model) + modified = fusion_gelu.apply() + + self.assertTrue(modified) + self.assertEqual(len(model.model.graph.node), 1) + + gelu_node = model.model.graph.node[0] + self.assertEqual(gelu_node.op_type, "Gelu") + self.assertTrue(gelu_node.name) + +if __name__ == "__main__": + unittest.main() From 8f7c88f6342e8d65c3c08e2b59a27d9f252b5191 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 26 Feb 2024 09:59:48 -0800 Subject: [PATCH 3/8] Simplify fusion call --- onnxruntime/test/python/quantization/test_fusions.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/onnxruntime/test/python/quantization/test_fusions.py b/onnxruntime/test/python/quantization/test_fusions.py index 3da24724e4e32..d265dbea3d87f 100644 --- a/onnxruntime/test/python/quantization/test_fusions.py +++ b/onnxruntime/test/python/quantization/test_fusions.py @@ -145,8 +145,7 @@ def build_erf_sequence_4_model(self): def test_fuse_erf_to_gelu_1(self): model = self.build_erf_sequence_1_model() - fusion_gelu = FusionGelu(model) - modified = fusion_gelu.apply() + modified = FusionGelu(model).apply() self.assertTrue(modified) self.assertEqual(len(model.model.graph.node), 1) @@ -157,8 +156,7 @@ def test_fuse_erf_to_gelu_1(self): def test_fuse_erf_to_gelu_2(self): model = self.build_erf_sequence_2_model() - fusion_gelu = FusionGelu(model) - modified = fusion_gelu.apply() + modified = FusionGelu(model).apply() self.assertTrue(modified) self.assertEqual(len(model.model.graph.node), 1) @@ -169,8 +167,7 @@ def test_fuse_erf_to_gelu_2(self): def test_fuse_erf_to_gelu_3(self): model = self.build_erf_sequence_3_model() - fusion_gelu = FusionGelu(model) - modified = fusion_gelu.apply() + modified = FusionGelu(model).apply() self.assertTrue(modified) self.assertEqual(len(model.model.graph.node), 1) @@ -181,8 +178,7 @@ def test_fuse_erf_to_gelu_3(self): def test_fuse_erf_to_gelu_4(self): model = self.build_erf_sequence_4_model() - fusion_gelu = FusionGelu(model) - modified = fusion_gelu.apply() + modified = FusionGelu(model).apply() self.assertTrue(modified) self.assertEqual(len(model.model.graph.node), 1) From 6ef325fd9f6aa3f3c0d0abeaf5d70577e6ba8b1f Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 26 Feb 2024 10:00:52 -0800 Subject: [PATCH 4/8] Run lintrunner --- .../test/python/quantization/test_fusions.py | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/onnxruntime/test/python/quantization/test_fusions.py b/onnxruntime/test/python/quantization/test_fusions.py index d265dbea3d87f..8a4e81180e40c 100644 --- a/onnxruntime/test/python/quantization/test_fusions.py +++ b/onnxruntime/test/python/quantization/test_fusions.py @@ -5,23 +5,24 @@ # license information. # -------------------------------------------------------------------------- +import math import unittest -import math import numpy as np import onnx -from onnxruntime.quantization.onnx_model import ONNXModel from onnxruntime.quantization.fusions import FusionGelu +from onnxruntime.quantization.onnx_model import ONNXModel + class TestFusions(unittest.TestCase): def build_erf_sequence_1_model(self): """ - +-------Mul(0.5)---------------------+ - | | - | v - [root] --> Div -----> Erf --> Add --> Mul --> - (B=1.4142...) (1) + +-------Mul(0.5)---------------------+ + | | + | v + [root] --> Div -----> Erf --> Add --> Mul --> + (B=1.4142...) (1) """ shape = (1, 2, 3) @@ -49,11 +50,11 @@ def build_erf_sequence_1_model(self): def build_erf_sequence_2_model(self): """ - +------------------------------------+ - | | - | v - [root] --> Div -----> Erf --> Add --> Mul -->Mul --> - (B=1.4142...) (1) (0.5) + +------------------------------------+ + | | + | v + [root] --> Div -----> Erf --> Add --> Mul -->Mul --> + (B=1.4142...) (1) (0.5) """ shape = (1, 2, 3) @@ -81,11 +82,11 @@ def build_erf_sequence_2_model(self): def build_erf_sequence_3_model(self): """ - +------------------------------------------+ - | | - | v - [root] --> Div -----> Erf --> Add --> Mul -->Mul - (B=1.4142...) (A=1) (A=0.5) + +------------------------------------------+ + | | + | v + [root] --> Div -----> Erf --> Add --> Mul -->Mul + (B=1.4142...) (A=1) (A=0.5) """ shape = (1, 2, 3) @@ -113,11 +114,11 @@ def build_erf_sequence_3_model(self): def build_erf_sequence_4_model(self): """ - +----------------------------------------------+ - | | - | v - [root] --> Mul -----> Erf --> Add --> Mul -->Mul - (A=0.7071067690849304) (B=1) (B=0.5) + +----------------------------------------------+ + | | + | v + [root] --> Mul -----> Erf --> Add --> Mul -->Mul + (A=0.7071067690849304) (B=1) (B=0.5) """ shape = (1, 2, 3) @@ -187,5 +188,6 @@ def test_fuse_erf_to_gelu_4(self): self.assertEqual(gelu_node.op_type, "Gelu") self.assertTrue(gelu_node.name) + if __name__ == "__main__": unittest.main() From 2470cf5f6a50038ab2881983f700bd5b72b58714 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 26 Feb 2024 13:44:51 -0800 Subject: [PATCH 5/8] Test fusion correctness --- .../test/python/quantization/test_fusions.py | 105 ++++++++++++++---- 1 file changed, 85 insertions(+), 20 deletions(-) diff --git a/onnxruntime/test/python/quantization/test_fusions.py b/onnxruntime/test/python/quantization/test_fusions.py index 8a4e81180e40c..c6fbb967662b8 100644 --- a/onnxruntime/test/python/quantization/test_fusions.py +++ b/onnxruntime/test/python/quantization/test_fusions.py @@ -11,12 +11,33 @@ import numpy as np import onnx +import onnxruntime from onnxruntime.quantization.fusions import FusionGelu from onnxruntime.quantization.onnx_model import ONNXModel class TestFusions(unittest.TestCase): - def build_erf_sequence_1_model(self): + def check_fused_model_correctness(self, orig_model, fused_model, inputs, rtol=1e-7, atol=0): + orig_session = onnxruntime.InferenceSession(orig_model.SerializeToString(), providers=["CPUExecutionProvider"]) + orig_results = orig_session.run(None, inputs) + + fused_session = onnxruntime.InferenceSession( + fused_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + fused_results = fused_session.run([], inputs) + + self.assertEqual(len(orig_results), len(fused_results), "Number of outputs for fused model differs") + for idx, expected_output in enumerate(orig_results): + actual_output = fused_results[idx] + np.testing.assert_allclose( + expected_output, + actual_output, + rtol=rtol, + atol=atol, + err_msg=f"Fused model output {idx} differs", + ) + + def build_erf_sequence_1_model(self, shape): """ +-------Mul(0.5)---------------------+ | | @@ -25,7 +46,6 @@ def build_erf_sequence_1_model(self): (B=1.4142...) (1) """ - shape = (1, 2, 3) root_inp = onnx.helper.make_tensor_value_info("root", onnx.TensorProto.FLOAT, shape) output = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, shape) one_const = onnx.numpy_helper.from_array(np.array(1.0, dtype=np.float32), "one_const") @@ -45,10 +65,14 @@ def build_erf_sequence_1_model(self): [output], initializer=[one_const, half_const, root2_const], ) - model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 18)]) + opset_imports = [ + onnx.helper.make_opsetid("", 18), + onnx.helper.make_opsetid("com.microsoft", 1), + ] + model = onnx.helper.make_model(graph, opset_imports=opset_imports) return ONNXModel(model) - def build_erf_sequence_2_model(self): + def build_erf_sequence_2_model(self, shape): """ +------------------------------------+ | | @@ -57,7 +81,6 @@ def build_erf_sequence_2_model(self): (B=1.4142...) (1) (0.5) """ - shape = (1, 2, 3) root_inp = onnx.helper.make_tensor_value_info("root", onnx.TensorProto.FLOAT, shape) output = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, shape) one_const = onnx.numpy_helper.from_array(np.array(1.0, dtype=np.float32), "one_const") @@ -77,10 +100,14 @@ def build_erf_sequence_2_model(self): [output], initializer=[one_const, half_const, root2_const], ) - model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 18)]) + opset_imports = [ + onnx.helper.make_opsetid("", 18), + onnx.helper.make_opsetid("com.microsoft", 1), + ] + model = onnx.helper.make_model(graph, opset_imports=opset_imports) return ONNXModel(model) - def build_erf_sequence_3_model(self): + def build_erf_sequence_3_model(self, shape): """ +------------------------------------------+ | | @@ -89,7 +116,6 @@ def build_erf_sequence_3_model(self): (B=1.4142...) (A=1) (A=0.5) """ - shape = (1, 2, 3) root_inp = onnx.helper.make_tensor_value_info("root", onnx.TensorProto.FLOAT, shape) output = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, shape) one_const = onnx.numpy_helper.from_array(np.array(1.0, dtype=np.float32), "one_const") @@ -109,10 +135,14 @@ def build_erf_sequence_3_model(self): [output], initializer=[one_const, half_const, root2_const], ) - model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 18)]) + opset_imports = [ + onnx.helper.make_opsetid("", 18), + onnx.helper.make_opsetid("com.microsoft", 1), + ] + model = onnx.helper.make_model(graph, opset_imports=opset_imports) return ONNXModel(model) - def build_erf_sequence_4_model(self): + def build_erf_sequence_4_model(self, shape): """ +----------------------------------------------+ | | @@ -121,7 +151,6 @@ def build_erf_sequence_4_model(self): (A=0.7071067690849304) (B=1) (B=0.5) """ - shape = (1, 2, 3) root_inp = onnx.helper.make_tensor_value_info("root", onnx.TensorProto.FLOAT, shape) output = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, shape) one_const = onnx.numpy_helper.from_array(np.array(1.0, dtype=np.float32), "one_const") @@ -141,13 +170,21 @@ def build_erf_sequence_4_model(self): [output], initializer=[one_const, half_const, frac_const], ) - model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 18)]) + opset_imports = [ + onnx.helper.make_opsetid("", 18), + onnx.helper.make_opsetid("com.microsoft", 1), + ] + model = onnx.helper.make_model(graph, opset_imports=opset_imports) return ONNXModel(model) def test_fuse_erf_to_gelu_1(self): - model = self.build_erf_sequence_1_model() - modified = FusionGelu(model).apply() + shape = (1, 2, 3) + model = self.build_erf_sequence_1_model(shape) + orig_model = onnx.ModelProto() + orig_model.CopyFrom(model.model) + # Check that fusion simplified model to 1 Gelu node. + modified = FusionGelu(model).apply() self.assertTrue(modified) self.assertEqual(len(model.model.graph.node), 1) @@ -155,10 +192,18 @@ def test_fuse_erf_to_gelu_1(self): self.assertEqual(gelu_node.op_type, "Gelu") self.assertTrue(gelu_node.name) + # Check that fusion is equivalent to original Erf model. + inputs = {"root": np.ones(shape, dtype=np.float32)} + self.check_fused_model_correctness(orig_model, model.model, inputs) + def test_fuse_erf_to_gelu_2(self): - model = self.build_erf_sequence_2_model() - modified = FusionGelu(model).apply() + shape = (1, 2, 3) + model = self.build_erf_sequence_2_model(shape) + orig_model = onnx.ModelProto() + orig_model.CopyFrom(model.model) + # Check that fusion simplified model to 1 Gelu node. + modified = FusionGelu(model).apply() self.assertTrue(modified) self.assertEqual(len(model.model.graph.node), 1) @@ -166,10 +211,18 @@ def test_fuse_erf_to_gelu_2(self): self.assertEqual(gelu_node.op_type, "Gelu") self.assertTrue(gelu_node.name) + # Check that fusion is equivalent to original Erf model. + inputs = {"root": np.ones(shape, dtype=np.float32)} + self.check_fused_model_correctness(orig_model, model.model, inputs) + def test_fuse_erf_to_gelu_3(self): - model = self.build_erf_sequence_3_model() - modified = FusionGelu(model).apply() + shape = (1, 2, 3) + model = self.build_erf_sequence_3_model(shape) + orig_model = onnx.ModelProto() + orig_model.CopyFrom(model.model) + # Check that fusion simplified model to 1 Gelu node. + modified = FusionGelu(model).apply() self.assertTrue(modified) self.assertEqual(len(model.model.graph.node), 1) @@ -177,10 +230,18 @@ def test_fuse_erf_to_gelu_3(self): self.assertEqual(gelu_node.op_type, "Gelu") self.assertTrue(gelu_node.name) + # Check that fusion is equivalent to original Erf model. + inputs = {"root": np.ones(shape, dtype=np.float32)} + self.check_fused_model_correctness(orig_model, model.model, inputs) + def test_fuse_erf_to_gelu_4(self): - model = self.build_erf_sequence_4_model() - modified = FusionGelu(model).apply() + shape = (1, 2, 3) + model = self.build_erf_sequence_4_model(shape) + orig_model = onnx.ModelProto() + orig_model.CopyFrom(model.model) + # Check that fusion simplified model to 1 Gelu node. + modified = FusionGelu(model).apply() self.assertTrue(modified) self.assertEqual(len(model.model.graph.node), 1) @@ -188,6 +249,10 @@ def test_fuse_erf_to_gelu_4(self): self.assertEqual(gelu_node.op_type, "Gelu") self.assertTrue(gelu_node.name) + # Check that fusion is equivalent to original Erf model. + inputs = {"root": np.ones(shape, dtype=np.float32)} + self.check_fused_model_correctness(orig_model, model.model, inputs) + if __name__ == "__main__": unittest.main() From a3d0c8c936ce771e42d7f8195b179432686af128 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 26 Feb 2024 15:27:14 -0800 Subject: [PATCH 6/8] Test other fusions --- .../test/python/quantization/test_fusions.py | 114 +++++++++++++++++- 1 file changed, 113 insertions(+), 1 deletion(-) diff --git a/onnxruntime/test/python/quantization/test_fusions.py b/onnxruntime/test/python/quantization/test_fusions.py index c6fbb967662b8..c244d74abb6ed 100644 --- a/onnxruntime/test/python/quantization/test_fusions.py +++ b/onnxruntime/test/python/quantization/test_fusions.py @@ -12,7 +12,8 @@ import onnx import onnxruntime -from onnxruntime.quantization.fusions import FusionGelu +from onnxruntime.quantization.execution_providers.qnn.fusion_lpnorm import FusionLpNormalization +from onnxruntime.quantization.fusions import FusionGelu, FusionLayerNormalization from onnxruntime.quantization.onnx_model import ONNXModel @@ -177,6 +178,79 @@ def build_erf_sequence_4_model(self, shape): model = onnx.helper.make_model(graph, opset_imports=opset_imports) return ONNXModel(model) + def build_reduce_mean_sequence_model(self, shape, scale_val, bias_val, axis=-1): + """ + +----------------------+ + | | + | v + [Root] --> ReduceMean --> Sub --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Add + (axis=2 or -1) | (Y=2) (axis=2 or -1) (E-6 or E-12 or 0) ^ ^ ^ + | | | | + +-------------------------------------------------+ [Scale] [Bias] + """ + root_inp = onnx.helper.make_tensor_value_info("root", onnx.TensorProto.FLOAT, shape) + output = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, shape) + scale_const = onnx.numpy_helper.from_array(np.array(scale_val, dtype=np.float32), "scale_const") + bias_const = onnx.numpy_helper.from_array(np.array(bias_val, dtype=np.float32), "bias_const") + axes_const = onnx.numpy_helper.from_array(np.array([axis], dtype=np.int64), "axes_const") + two_const = onnx.numpy_helper.from_array(np.array(2.0, dtype=np.float32), "two_const") + eps_const = onnx.numpy_helper.from_array(np.array(1.0e-8, dtype=np.float32), "eps_const") + + rm0_node = onnx.helper.make_node("ReduceMean", ["root", "axes_const"], ["rm0_out"]) + sub_node = onnx.helper.make_node("Sub", ["root", "rm0_out"], ["sub_out"]) + pow_node = onnx.helper.make_node("Pow", ["sub_out", "two_const"], ["pow_out"]) + rm1_node = onnx.helper.make_node("ReduceMean", ["pow_out", "axes_const"], ["rm1_out"]) + add0_node = onnx.helper.make_node("Add", ["rm1_out", "eps_const"], ["add0_out"]) + sqrt_node = onnx.helper.make_node("Sqrt", ["add0_out"], ["sqrt_out"]) + div_node = onnx.helper.make_node("Div", ["sub_out", "sqrt_out"], ["div_out"]) + mul_node = onnx.helper.make_node("Mul", ["div_out", "scale_const"], ["mul_out"]) + add1_node = onnx.helper.make_node("Add", ["mul_out", "bias_const"], ["output"]) + + graph = onnx.helper.make_graph( + [rm0_node, sub_node, pow_node, rm1_node, add0_node, sqrt_node, div_node, mul_node, add1_node], + "reduce_mean_sequence", + [root_inp], + [output], + initializer=[scale_const, bias_const, axes_const, two_const, eps_const], + ) + opset_imports = [ + onnx.helper.make_opsetid("", 18), + ] + model = onnx.helper.make_model(graph, opset_imports=opset_imports) + return ONNXModel(model) + + def build_reduce_l2_sequence_model(self, shape, epsilon_val, axis=-1): + """ + [root] --> ReduceL2 -----> Clip --> Expand ----> Div --> + | (axis=-1) (min=epsilon) (shape=root) ^ + | (keepdims=True) | + | | + +-----------------------------------------------+ + """ + root_inp = onnx.helper.make_tensor_value_info("root", onnx.TensorProto.FLOAT, shape) + output = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, shape) + axes_const = onnx.numpy_helper.from_array(np.array([axis], dtype=np.int64), "axes_const") + eps_const = onnx.numpy_helper.from_array(np.array(epsilon_val, dtype=np.float32), "eps_const") + shape_const = onnx.numpy_helper.from_array(np.array(list(shape), dtype=np.int64), "shape_const") + + rl2_node = onnx.helper.make_node("ReduceL2", ["root", "axes_const"], ["rl2_out"], keepdims=1) + clip_node = onnx.helper.make_node("Clip", ["rl2_out", "eps_const"], ["clip_out"]) + expand_node = onnx.helper.make_node("Expand", ["clip_out", "shape_const"], ["expand_out"]) + div_node = onnx.helper.make_node("Div", ["root", "expand_out"], ["output"]) + + graph = onnx.helper.make_graph( + [rl2_node, clip_node, expand_node, div_node], + "reducel2_sequence", + [root_inp], + [output], + initializer=[axes_const, eps_const, shape_const], + ) + opset_imports = [ + onnx.helper.make_opsetid("", 18), + ] + model = onnx.helper.make_model(graph, opset_imports=opset_imports) + return ONNXModel(model) + def test_fuse_erf_to_gelu_1(self): shape = (1, 2, 3) model = self.build_erf_sequence_1_model(shape) @@ -253,6 +327,44 @@ def test_fuse_erf_to_gelu_4(self): inputs = {"root": np.ones(shape, dtype=np.float32)} self.check_fused_model_correctness(orig_model, model.model, inputs) + def test_fuse_reduce_l2_to_lpnorm(self): + shape = (1, 2, 3) + model = self.build_reduce_l2_sequence_model(shape, 1e-12, axis=-1) + orig_model = onnx.ModelProto() + orig_model.CopyFrom(model.model) + + # Check that fusion simplified model to 1 LpNormalization node. + modified = FusionLpNormalization(model).apply() + self.assertTrue(modified) + self.assertEqual(len(model.model.graph.node), 1) + + lpnorm_node = model.model.graph.node[0] + self.assertEqual(lpnorm_node.op_type, "LpNormalization") + self.assertTrue(lpnorm_node.name) + + # LpNorm's p attribute should be set to 2 + p_attr = next(attr for attr in lpnorm_node.attribute if attr.name == "p") + self.assertEqual(p_attr.i, 2) + + def test_fuse_reduce_mean_to_layer_norm(self): + shape = (1, 2, 3) + model = self.build_reduce_mean_sequence_model(shape, [2.0, 2.0, 2.0], [1.0, 1.0, 1.0], axis=-1) + orig_model = onnx.ModelProto() + orig_model.CopyFrom(model.model) + + # Check that fusion simplified model to 1 LayerNormalization node. + modified = FusionLayerNormalization(model).apply() + self.assertTrue(modified) + self.assertEqual(len(model.model.graph.node), 1) + + layer_norm_node = model.model.graph.node[0] + self.assertEqual(layer_norm_node.op_type, "LayerNormalization") + self.assertTrue(layer_norm_node.name) + + # Check that fused model is equivalent to original model. + inputs = {"root": np.ones(shape, dtype=np.float32)} + self.check_fused_model_correctness(orig_model, model.model, inputs) + if __name__ == "__main__": unittest.main() From 747b56b93f10cfe21fa09e595b6f0c28b8e51513 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 26 Feb 2024 15:32:53 -0800 Subject: [PATCH 7/8] Add method comment --- onnxruntime/test/python/quantization/test_fusions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/onnxruntime/test/python/quantization/test_fusions.py b/onnxruntime/test/python/quantization/test_fusions.py index c244d74abb6ed..485bdfe5d62ec 100644 --- a/onnxruntime/test/python/quantization/test_fusions.py +++ b/onnxruntime/test/python/quantization/test_fusions.py @@ -19,6 +19,9 @@ class TestFusions(unittest.TestCase): def check_fused_model_correctness(self, orig_model, fused_model, inputs, rtol=1e-7, atol=0): + """ + Checks that the output of the fused model matches the output of the original model. + """ orig_session = onnxruntime.InferenceSession(orig_model.SerializeToString(), providers=["CPUExecutionProvider"]) orig_results = orig_session.run(None, inputs) From 8048dfb4c77373651df2ee4ac04f79a29e31571a Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Mon, 26 Feb 2024 15:53:25 -0800 Subject: [PATCH 8/8] Test multiple fused nodes in one graph --- .../test/python/quantization/test_fusions.py | 50 +++++++++++++++---- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/onnxruntime/test/python/quantization/test_fusions.py b/onnxruntime/test/python/quantization/test_fusions.py index 485bdfe5d62ec..bea110e566fb9 100644 --- a/onnxruntime/test/python/quantization/test_fusions.py +++ b/onnxruntime/test/python/quantization/test_fusions.py @@ -43,12 +43,16 @@ def check_fused_model_correctness(self, orig_model, fused_model, inputs, rtol=1e def build_erf_sequence_1_model(self, shape): """ + Erf sequence that fuses into Gelu: +-------Mul(0.5)---------------------+ | | | v [root] --> Div -----> Erf --> Add --> Mul --> (B=1.4142...) (1) + This method builds 2 of these Erf sequences: + + [root] -> ERF_SEQUENCE1 -> ERF_SEQUENCE2 -> output """ root_inp = onnx.helper.make_tensor_value_info("root", onnx.TensorProto.FLOAT, shape) output = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, shape) @@ -56,15 +60,34 @@ def build_erf_sequence_1_model(self, shape): half_const = onnx.numpy_helper.from_array(np.array(0.5, dtype=np.float32), "half_const") root2_const = onnx.numpy_helper.from_array(np.array(math.sqrt(2.0), dtype=np.float32), "root2_const") + # First Erf sequence mul0_node = onnx.helper.make_node("Mul", ["root", "half_const"], ["mul0_out"]) div_node = onnx.helper.make_node("Div", ["root", "root2_const"], ["div_out"]) erf_node = onnx.helper.make_node("Erf", ["div_out"], ["erf_out"]) add_node = onnx.helper.make_node("Add", ["erf_out", "one_const"], ["add_out"]) - mul1_node = onnx.helper.make_node("Mul", ["add_out", "mul0_out"], ["output"]) + mul1_node = onnx.helper.make_node("Mul", ["add_out", "mul0_out"], ["seq1_output"]) + + # Second Erf sequence + mul0_node_dup = onnx.helper.make_node("Mul", ["seq1_output", "half_const"], ["mul0_out_dup"]) + div_node_dup = onnx.helper.make_node("Div", ["seq1_output", "root2_const"], ["div_out_dup"]) + erf_node_dup = onnx.helper.make_node("Erf", ["div_out_dup"], ["erf_out_dup"]) + add_node_dup = onnx.helper.make_node("Add", ["erf_out_dup", "one_const"], ["add_out_dup"]) + mul1_node_dup = onnx.helper.make_node("Mul", ["add_out_dup", "mul0_out_dup"], ["output"]) graph = onnx.helper.make_graph( - [mul0_node, div_node, erf_node, add_node, mul1_node], - "elf_sequence_1", + [ + mul0_node, + div_node, + erf_node, + add_node, + mul1_node, + mul0_node_dup, + div_node_dup, + erf_node_dup, + add_node_dup, + mul1_node_dup, + ], + "two_erf_sequences", [root_inp], [output], initializer=[one_const, half_const, root2_const], @@ -99,7 +122,7 @@ def build_erf_sequence_2_model(self, shape): graph = onnx.helper.make_graph( [div_node, erf_node, add_node, mul0_node, mul1_node], - "elf_sequence_2", + "erf_sequence_2", [root_inp], [output], initializer=[one_const, half_const, root2_const], @@ -134,7 +157,7 @@ def build_erf_sequence_3_model(self, shape): graph = onnx.helper.make_graph( [div_node, erf_node, add_node, mul0_node, mul1_node], - "elf_sequence_3", + "erf_sequence_3", [root_inp], [output], initializer=[one_const, half_const, root2_const], @@ -169,7 +192,7 @@ def build_erf_sequence_4_model(self, shape): graph = onnx.helper.make_graph( [mul0_node, erf_node, add_node, mul1_node, mul2_node], - "elf_sequence_4", + "erf_sequence_4", [root_inp], [output], initializer=[one_const, half_const, frac_const], @@ -260,14 +283,19 @@ def test_fuse_erf_to_gelu_1(self): orig_model = onnx.ModelProto() orig_model.CopyFrom(model.model) - # Check that fusion simplified model to 1 Gelu node. + # Check that fusion simplified model to 2 Gelu nodes. modified = FusionGelu(model).apply() self.assertTrue(modified) - self.assertEqual(len(model.model.graph.node), 1) + self.assertEqual(len(model.model.graph.node), 2) - gelu_node = model.model.graph.node[0] - self.assertEqual(gelu_node.op_type, "Gelu") - self.assertTrue(gelu_node.name) + gelu_node_0 = model.model.graph.node[0] + gelu_node_1 = model.model.graph.node[1] + self.assertEqual(gelu_node_0.op_type, "Gelu") + self.assertEqual(gelu_node_1.op_type, "Gelu") + + self.assertTrue(gelu_node_0.name) + self.assertTrue(gelu_node_1.name) + self.assertNotEqual(gelu_node_0.name, gelu_node_1.name) # Generated names should not be equal # Check that fusion is equivalent to original Erf model. inputs = {"root": np.ones(shape, dtype=np.float32)}