From 4c6e18e60f63b7f03d6fe7180b6fd48e8ce786ec Mon Sep 17 00:00:00 2001 From: Reese Grimsley Date: Tue, 9 Jul 2024 09:42:21 -0500 Subject: [PATCH 1/4] [Model Optimizer] Add conv modification to implement even-sized with next size up odd kernel --- .../tidl_onnx_model_optimizer/src/common.py | 10 +++ .../tidl_onnx_model_optimizer/src/conv.py | 80 ++++++++++++++++++- 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/common.py b/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/common.py index 8beeac1..65333f7 100644 --- a/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/common.py +++ b/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/common.py @@ -60,6 +60,7 @@ """ from typing import List import onnx_graphsurgeon as gs +import onnx class UniqueIdGenerator: """ @@ -196,3 +197,12 @@ def bordered(text): res.append('│' + (s + ' ' * width)[:width] + '│') res.append('└' + '─' * width + '┘') return '\n'.join(res) + +def reset_shape_inference(onnx_graph:onnx.GraphProto): + ''' + Clear all value_info entries that hold shape inference information + ''' + while len(onnx_graph.value_info) > 0: + onnx_graph.value_info.pop() + return onnx_graph + diff --git a/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/conv.py b/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/conv.py index 58ffaa8..095f227 100644 --- a/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/conv.py +++ b/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/conv.py @@ -55,13 +55,15 @@ # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED # OF THE POSSIBILITY OF SUCH DAMAGE. + """ Module containing Conv layer specific functions and optimizations """ import logging +import numpy as np import onnx_graphsurgeon as gs import onnx -import numpy as np +import copy def tidl_convert_conv_large_pad_to_smaller_kernel (graph: gs.Graph, onnx_graph: onnx.GraphProto): @@ -148,3 +150,79 @@ def tidl_convert_conv_large_pad_to_smaller_kernel (graph: gs.Graph, onnx_graph: conv.inputs[1] = gs.Constant(name= f"{weights.name}_reduced", values=reduced_weight_tensor) # bias need not change + + +def tidl_convert_conv_even_filter_to_odd(graph: gs.Graph, onnx_graph: onnx.GraphProto, zero_points={'Conv_Name_Fake_Example': -0.001}): + ''' + Even-sized convolution kernels are not supported in TIDL + Replace even-sized kernels with next-size up odd kernels, with padding handled appropriately. Additional filter weights are the zero_points + + :param zero_points: On a per-layer basis, the zero-point for asymmetric quantization. This is a dictionary where key is the layer name, and value is the zero-point for that layer (assumed same for all layers, i.e. no grouping) + + Some tricks are required here due to Conv layer implementation in TIDL being 'SAME' only. This requires padding be handled outside the layer itself (due to asymmetric pads). Asymmetric quantization is not well supported for these layers, since the zero-point is unknown until calibration. The zero-point fills the additional convolution weights + ''' + #identify conv nodes + #find conv nodes w/ even sized kernels + #replace even sized kernel with odd, and move values into appropriate shape Constant tensor + #reset pad values in conv node + #create Pad node that handles all padding, include 'zero point' values? + #make Conv input the Pad input, Pad output the Conv input + + conv_nodes = [node for node in graph.nodes if node.op == "Conv"] + + for conv in conv_nodes: + kernel_shape = conv.attrs['kernel_shape'] + pads = conv.attrs['pads'] + weight_tensor = conv.inputs[1] + + conv_input = conv.inputs[0] + + MAX_SUPPORTED_CONV_KERNEL = 7 #7x7 is largest validated layer size + if kernel_shape[0] % 2 == 0 and kernel_shape[0] < MAX_SUPPORTED_CONV_KERNEL and kernel_shape[1] == kernel_shape[0]: + logging.debug('Promoting conv node (%s) size (%d x %d) to next size up' % (conv.name, kernel_shape[0], kernel_shape[1])) + + new_size = kernel_shape[0] + 1 + new_shape = [new_size, new_size] + + zero_p = zero_points.get(conv.name, 0) + + new_weights_shape = [*weight_tensor.shape[:2], *new_shape] + + # is it correct to put the zero point here or only in the layer padding + new_weights = np.full(new_weights_shape, zero_p, dtype=np.float32) + # We will pad left and top side of the filter weights with the fill_value / zero-point as we increase the spatial dimensions by 1 + new_weights[:,:,1:,1:] = weight_tensor.values + + new_weights_tensor = gs.Constant(weight_tensor.name, new_weights) + conv.inputs[1] = new_weights_tensor + + + conv.attrs['kernel_shape'] = new_shape + logging.debug(' New conv kernel shape: ') + + + pad_name = 'Pad/' + conv.name + + pads = copy.copy(pads) + pads[0] += 1 # x1 (height) +1 to account for larger filter + pads[1] += 1 # x2 (width) +1 to account for larger filter + all_pads = np.asarray([0,0, pads[0], pads[1], 0, 0, pads[2], pads[3] ]) #incorporate all dimensions: depending on opset, may not support axis specification + pads_tensor = gs.Constant(pad_name + '_pads', np.asarray(all_pads, np.int64)) + fill_value_tensor = gs.Constant(pad_name + '_fill', np.asanyarray([zero_p], dtype=np.float32)) + + + conv.attrs['pads'] = [0,0,0,0] + + pad_attrs = { + 'mode' : 'constant' + } + pad_inputs = [conv_input, pads_tensor, fill_value_tensor] + pad_outputs = [gs.Variable(pad_name+'_output', dtype=conv_input.dtype)] + + logging.debug(' Adding Pad layer with dimensions (%d,%d,%d,%d) and resetting conv pads to 0\'s' % (pads[0], pads[1], pads[2], pads[3])) + + pad_node = gs.Node('Pad', pad_name, pad_attrs, pad_inputs, pad_outputs) + + conv.inputs[0] = pad_outputs[0] + graph.nodes.append(pad_node) + From f4886ad77a47182750f29d537cfce2f91eef7748 Mon Sep 17 00:00:00 2001 From: Reese Grimsley Date: Thu, 15 Aug 2024 11:49:34 -0500 Subject: [PATCH 2/4] [Model Optimizer] enable elementwise operations to replace unsupported Sub, Mean nodes --- .../tidl_onnx_model_optimizer/src/eltwise.py | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/eltwise.py diff --git a/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/eltwise.py b/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/eltwise.py new file mode 100644 index 0000000..f8a53e3 --- /dev/null +++ b/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/eltwise.py @@ -0,0 +1,154 @@ +# Copyright (c) {2023 - 2024} Texas Instruments Incorporated +# +# All rights reserved not granted herein. +# +# Limited License. +# +# Texas Instruments Incorporated grants a world-wide, royalty-free, non-exclusive +# license under copyrights and patents it now or hereafter owns or controls to make, +# have made, use, import, offer to sell and sell ("Utilize") this software subject to the +# terms herein. With respect to the foregoing patent license, such license is granted +# solely to the extent that any such patent is necessary to Utilize the software alone. +# The patent license shall not apply to any combinations which include this software, +# other than combinations with devices manufactured by or for TI ("TI Devices"). +# No hardware patent is licensed hereunder. +# +# Redistributions must preserve existing copyright notices and reproduce this license +# (including the above copyright notice and the disclaimer and (if applicable) source +# code license limitations below) in the documentation and/or other materials provided +# with the distribution +# +# Redistribution and use in binary form, without modification, are permitted provided +# that the following conditions are met: +# +# * No reverse engineering, decompilation, or disassembly of this software is +# permitted with respect to any software provided in binary form. +# +# * any redistribution and use are licensed by TI for use only with TI Devices. +# +# * Nothing shall obligate TI to provide you with source code for the software +# licensed and provided to you in object code. +# +# If software source code is provided to you, modification and redistribution of the +# source code are permitted provided that the following conditions are met: +# +# * any redistribution and use of the source code, including any resulting derivative +# works, are licensed by TI for use only with TI Devices. +# +# * any redistribution and use of any object code compiled from the source code +# and any resulting derivative works, are licensed by TI for use only with TI Devices. +# +# Neither the name of Texas Instruments Incorporated nor the names of its suppliers +# +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# DISCLAIMER. +# +# THIS SOFTWARE IS PROVIDED BY TI AND TI'S LICENSORS "AS IS" AND ANY EXPRESS +# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL TI AND TI'S LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. +""" +Module containing Global average pooling layer specific functions and optimizations +""" +import logging +import onnx_graphsurgeon as gs +import onnx +import numpy as np + + +def tidl_replace_sub_with_neg_add(graph: gs.Graph, + onnx_graph: onnx.GraphProto): + ''' + Sub node is not supported, but this can be replaced (less efficiently) with negation and add + ''' + print('\n*\ntidl_replace_mean_with_eltwise\n*\n') + + + for node in graph.nodes: + + if node.op == "Sub": + #Sub -> C = A-B. inputs=[A,B] + A, B = node.inputs + C = node.outputs[0] + broadcast_neg = 1 + if A.shape != B.shape: + logging.warning('This is a broadcasted node; not yet supported for Sub replacment') + continue + + logging.debug(f'Replacing Sub node {node.name} with Multiply-Add') + #Create Mul node, and use B as one input and -1 (constant; broadcasted) as the other + base_name = node.name + mul_name = base_name + '_Mul' + #We will broadcast -1 multiplication across the whole input B + neg_values = np.ndarray((1), dtype=B.dtype) + neg_values[0] = -1 + negation_tensor = gs.Constant(mul_name + '/neg', neg_values) + negation_output = gs.Variable(mul_name + '/negative', dtype=B.dtype, shape=B.shape) + mul_node = gs.Node('Mul', mul_name, {}, [B, negation_tensor], [negation_output]) + + #Create add node for A + (-B) + add_name = base_name + '_Add' + add_node = gs.Node('Add', add_name, {} , [A, negation_output], outputs=[C]) + + node.outputs.pop(0) + + graph.nodes.append(mul_node) + graph.nodes.append(add_node) + #old Sub node will be removed with graph.cleanup. + #The new nodes will require graph.toposort() + +def tidl_replace_mean_with_eltwise(graph: gs.Graph, + onnx_graph: onnx.GraphProto): + ''' + Elementwise Mean node is not supported, but we can emulate with add -> multiply + + Currently only supports Mean between two input tensors, but should be trivial to extend + Note that quantization may impact this layer, especially for many inputs + ''' + + for node in graph.nodes: + if node.op == "Mean": + if len(node.inputs) != 2: + logging.warning(f'Mean between arbitrary number of inputs is not supported; only 2. Skip node {node.name}') + continue + logging.debug(f'Replacing Mean ({node.name}) of two inputs with representative Add->Multiply elementwise layers') + base_name = node.name + A, B = node.inputs[0:2] + output = node.outputs[0] + + if A.shape != B.shape: + logging.warning('Detected non-elementwise operation / broadcasting -- this is not supported') + continue + + add_name = base_name + '_Add' + sum_tensor = gs.Variable(base_name + '/Sum', dtype=A.dtype, shape=A.shape) + add_node = gs.Node('Add', add_name, {} , [A, B], outputs=[sum_tensor]) + + div_name = base_name + '_Mul_by_half' + divisor_tensor_name = div_name + '/divisor' + + if 'float' not in str(A.dtype): + logging.warning(f'potential issue with dtype {str(A.dtype)}; this may cause problems with quantization') + + div_by_two_values = np.ndarray((1), dtype=A.dtype) + div_by_two_values[0] = 1/2 + divisor_tensor = gs.Constant(divisor_tensor_name, div_by_two_values) + + mul_node = gs.Node('Mul', div_name, {}, [sum_tensor, divisor_tensor], [output]) + + node.outputs.clear() + + graph.nodes.append(mul_node) + graph.nodes.append(add_node) + + #cleanup and toposort graph to fully apply changes + + From 0b59eead0e0a843b520670e56257daffe4ffc842 Mon Sep 17 00:00:00 2001 From: Reese Grimsley Date: Thu, 15 Aug 2024 11:50:22 -0500 Subject: [PATCH 3/4] [Model Optimizer] Add rule to remove duplicate/identical nodes --- .../tidl_onnx_model_optimizer/src/common.py | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/common.py b/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/common.py index 65333f7..c0ef78d 100644 --- a/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/common.py +++ b/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/common.py @@ -206,3 +206,48 @@ def reset_shape_inference(onnx_graph:onnx.GraphProto): onnx_graph.value_info.pop() return onnx_graph +def tidl_remove_duplicates(graph:gs.Graph, onnx_graph:onnx.GraphProto, do_cleanup=True): + ''' + Some nodes are simply duplicates of each other. There is no need to process these, and we can reuse the outputs of one for all of them + ''' + replacement_node_pairs = [] + for i, node_i in enumerate(graph.nodes): + + for j, node_j in enumerate(graph.nodes): + if node_i.op != node_j.op: continue + + nodes_to_remove = list(map(lambda x: x[1], replacement_node_pairs)) + + if node_i == node_j or node_i in nodes_to_remove: + continue # skip itself + elif node_i.inputs != node_j.inputs or node_i.attrs != node_j.attrs: + continue + + # hang onto the nodes we will remove/replace. We should not remove them while iterating + replacement_node_pairs.append((node_i, node_j)) #(node to keep as replacement, node to remove) + + + print(list(map(lambda x: x[0], replacement_node_pairs))) + + for n in replacement_node_pairs: + removal_node = n[1] + keep_node = n[0] + removal_outputs = removal_node.outputs + keep_outputs = keep_node.outputs + + # for each output in the node to remove, find the consuming nodes and change their input to use the output that we will keep + out_layers = find_out_layers(removal_node) + + for layer in out_layers: + for i, in_tensor in enumerate(layer.inputs): + for j, out_tensor in enumerate(removal_outputs): + if in_tensor == out_tensor: + + replacement_tensor = keep_outputs[j] + layer.inputs[i] = replacement_tensor + + #clear the outputs to that graph.cleanup() will remove them + removal_node.outputs.clear() + + if do_cleanup: + graph.cleanup().toposort() From b7b07738bcd9afc7f74580217e81c307668a84ed Mon Sep 17 00:00:00 2001 From: Reese Grimsley Date: Thu, 15 Aug 2024 11:52:09 -0500 Subject: [PATCH 4/4] [Model Optimizer] Add rule to remove Resize nodes with unity Resize scale (s=1's) --- .../tidl_onnx_model_optimizer/src/resize.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/resize.py b/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/resize.py index c3d1470..62aae37 100644 --- a/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/resize.py +++ b/scripts/osrt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/resize.py @@ -63,6 +63,7 @@ import onnx_graphsurgeon as gs import onnx import numpy as np +from tidl_onnx_model_optimizer.src.common import find_out_layers, remove_node def tidl_convert_resize_params_size_to_scale(graph: gs.Graph, @@ -102,3 +103,42 @@ def tidl_convert_resize_params_size_to_scale(graph: gs.Graph, # endif # endif # endfor + + +def tidl_remove_unity_resize(graph: gs.Graph, + onnx_graph: onnx.GraphProto): + ''' + Some models have an effectively null resize node that scales by a factor of 1 in all dimensions + Such a node is often an export artifact -- a layer added by a model format converter + This is node effectively unity, but it will be processed nonetheless. It should therefore be removed + ''' + + tensors = graph.tensors() + nodes_to_remove = [] + for node in graph.nodes: + + if node.op == "Resize": + inputs = node.inputs + if len(inputs) >= 3: + X, roi, scales = inputs[0:3] + else: + continue + Y = node.outputs[0] + attrs = node.attrs + + if X.shape == Y.shape and all(map(lambda x: x==1, scales.values)): + #ensure it's not using ROI, which is only with crop-and-resize mode + if node.attrs['coordinate_transformation_mode'] == 'tf_crop_and_resize': + logging.warning("Detected Resize node as using ROI... skipping") + continue + + logging.debug("Removing unity Resize node %s" % node.name) + + out_nodes = find_out_layers(node) + + for o_node in out_nodes: + for i, net in enumerate(o_node.inputs): + if net == Y: + o_node.inputs[i] = X + + #node will be removed by cleanup since it has only unused outputs