TexasInstruments · reesegrimsley · Jul 9, 2024 · Aug 15, 2024 · Aug 15, 2024 · Aug 15, 2024
diff --git a/..._model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/common.py b/..._model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/common.py
@@ -60,6 +60,7 @@
 """
 from typing import List
 import onnx_graphsurgeon as gs
+import onnx
 
 class UniqueIdGenerator:
     """
@@ -196,3 +197,57 @@ def bordered(text):
         res.append('│' + (s + ' ' * width)[:width] + '│')
     res.append('└' + '─' * width + '┘')
     return '\n'.join(res)
+
+def reset_shape_inference(onnx_graph:onnx.GraphProto):
+    '''
+    Clear all value_info entries that hold shape inference information
+    '''
+    while len(onnx_graph.value_info) > 0: 
+        onnx_graph.value_info.pop()
+    return onnx_graph
+
+def tidl_remove_duplicates(graph:gs.Graph, onnx_graph:onnx.GraphProto, do_cleanup=True):
+    '''
+    Some nodes are simply duplicates of each other. There is no need to process these, and we can reuse the outputs of one for all of them
+    '''
+    replacement_node_pairs = []
+    for i, node_i in enumerate(graph.nodes):
+
+        for j, node_j in enumerate(graph.nodes):
+            if node_i.op != node_j.op: continue
+
+            nodes_to_remove = list(map(lambda x: x[1], replacement_node_pairs))
+
+            if node_i == node_j or node_i in nodes_to_remove: 
+                continue # skip itself
+            elif node_i.inputs != node_j.inputs or node_i.attrs != node_j.attrs:
+                continue 
+
+            # hang onto the nodes we will remove/replace. We should not remove them while iterating
+            replacement_node_pairs.append((node_i, node_j)) #(node to keep as replacement, node to remove)
+
+
+    print(list(map(lambda x: x[0], replacement_node_pairs)))
+
+    for n in replacement_node_pairs: 
+        removal_node = n[1]
+        keep_node = n[0]
+        removal_outputs = removal_node.outputs
+        keep_outputs = keep_node.outputs
+
+        # for each output in the node to remove, find the consuming nodes and change their input to use the output that we will keep
+        out_layers = find_out_layers(removal_node)
+
+        for layer in out_layers:
+            for i, in_tensor in enumerate(layer.inputs):
+                for j, out_tensor in enumerate(removal_outputs):
+                    if in_tensor == out_tensor:
+
+                        replacement_tensor = keep_outputs[j]
+                        layer.inputs[i] = replacement_tensor
+
+        #clear the outputs to that graph.cleanup() will remove them
+        removal_node.outputs.clear()
+
+    if do_cleanup:
+        graph.cleanup().toposort()
diff --git a/...rt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/conv.py b/...rt_model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/conv.py
@@ -55,13 +55,15 @@
 # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 # OF THE POSSIBILITY OF SUCH DAMAGE.
+
 """
 Module containing Conv layer specific functions and optimizations
 """
 import logging
+import numpy as np
 import onnx_graphsurgeon as gs
 import onnx
-import numpy as np
+import copy
 
 
 def tidl_convert_conv_large_pad_to_smaller_kernel (graph: gs.Graph, onnx_graph: onnx.GraphProto):
@@ -148,3 +150,79 @@ def tidl_convert_conv_large_pad_to_smaller_kernel (graph: gs.Graph, onnx_graph:
             conv.inputs[1] = gs.Constant(name= f"{weights.name}_reduced",
                                          values=reduced_weight_tensor)
             # bias need not change
+
+
+def tidl_convert_conv_even_filter_to_odd(graph: gs.Graph, onnx_graph: onnx.GraphProto, zero_points={'Conv_Name_Fake_Example': -0.001}):
+    '''
+    Even-sized convolution kernels are not supported in TIDL
+    Replace even-sized kernels with next-size up odd kernels, with padding handled appropriately. Additional filter weights are the zero_points
+
+    :param zero_points: On a per-layer basis, the zero-point for asymmetric quantization. This is a dictionary where key is the layer name, and value is the zero-point for that layer (assumed same for all layers, i.e. no grouping)
+
+    Some tricks are required here due to Conv layer implementation in TIDL being 'SAME' only. This requires padding be handled outside the layer itself (due to asymmetric pads). Asymmetric quantization is not well supported for these layers, since the zero-point is unknown until calibration. The zero-point fills the additional convolution weights
+    '''
+    #identify conv nodes
+    #find conv nodes w/ even sized kernels
+    #replace even sized kernel with odd, and move values into appropriate shape Constant tensor
+    #reset pad values in conv node
+    #create Pad node that handles all padding, include 'zero point' values?
+    #make Conv input the Pad input, Pad output the Conv input
+
+    conv_nodes = [node for node in graph.nodes if node.op == "Conv"]
+
+    for conv in conv_nodes:
+        kernel_shape = conv.attrs['kernel_shape']
+        pads = conv.attrs['pads']
+        weight_tensor = conv.inputs[1]
+
+        conv_input = conv.inputs[0]
+
+        MAX_SUPPORTED_CONV_KERNEL = 7 #7x7 is largest validated layer size
+        if kernel_shape[0] % 2 == 0 and kernel_shape[0] < MAX_SUPPORTED_CONV_KERNEL and kernel_shape[1] == kernel_shape[0]:
+            logging.debug('Promoting conv node (%s) size (%d x %d) to next size up' % (conv.name, kernel_shape[0], kernel_shape[1]))
+
+            new_size = kernel_shape[0] + 1
+            new_shape = [new_size, new_size]
+
+            zero_p = zero_points.get(conv.name, 0)
+
+            new_weights_shape = [*weight_tensor.shape[:2], *new_shape]
+
+            # is it correct to put the zero point here or only in the layer padding
+            new_weights = np.full(new_weights_shape, zero_p, dtype=np.float32)
+            # We will pad left and top side of the filter weights with the fill_value / zero-point as we increase the spatial dimensions by 1
+            new_weights[:,:,1:,1:] = weight_tensor.values
+
+            new_weights_tensor = gs.Constant(weight_tensor.name, new_weights)
+            conv.inputs[1] = new_weights_tensor
+
+
+            conv.attrs['kernel_shape'] = new_shape
+            logging.debug('  New conv kernel shape: ')
+
+
+            pad_name = 'Pad/' + conv.name
+
+            pads = copy.copy(pads)
+            pads[0] += 1 # x1 (height) +1  to account for larger filter
+            pads[1] += 1 # x2 (width) +1 to account for larger filter
+            all_pads = np.asarray([0,0, pads[0], pads[1], 0, 0, pads[2], pads[3] ]) #incorporate all dimensions: depending on opset, may not support axis specification 
+            pads_tensor = gs.Constant(pad_name + '_pads', np.asarray(all_pads, np.int64))
+            fill_value_tensor = gs.Constant(pad_name + '_fill', np.asanyarray([zero_p], dtype=np.float32))
+
+
+            conv.attrs['pads'] = [0,0,0,0]
+
+            pad_attrs = {
+                'mode' : 'constant'
+            }
+            pad_inputs = [conv_input, pads_tensor, fill_value_tensor]
+            pad_outputs = [gs.Variable(pad_name+'_output', dtype=conv_input.dtype)]
+
+            logging.debug('  Adding Pad layer with dimensions (%d,%d,%d,%d) and resetting conv pads to 0\'s' % (pads[0], pads[1], pads[2], pads[3]))
+
+            pad_node = gs.Node('Pad', pad_name, pad_attrs, pad_inputs, pad_outputs)
+
+            conv.inputs[0] = pad_outputs[0]
+            graph.nodes.append(pad_node)
+
diff --git a/...model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/eltwise.py b/...model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/eltwise.py
@@ -0,0 +1,154 @@
+# Copyright (c) {2023 - 2024} Texas Instruments Incorporated
+#
+# All rights reserved not granted herein.
+#
+# Limited License.
+#
+# Texas Instruments Incorporated grants a world-wide, royalty-free, non-exclusive
+# license under copyrights and patents it now or hereafter owns or controls to make,
+# have made, use, import, offer to sell and sell ("Utilize") this software subject to the
+# terms herein.  With respect to the foregoing patent license, such license is granted
+# solely to the extent that any such patent is necessary to Utilize the software alone.
+# The patent license shall not apply to any combinations which include this software,
+# other than combinations with devices manufactured by or for TI ("TI Devices").
+# No hardware patent is licensed hereunder.
+#
+# Redistributions must preserve existing copyright notices and reproduce this license
+# (including the above copyright notice and the disclaimer and (if applicable) source
+# code license limitations below) in the documentation and/or other materials provided
+# with the distribution
+#
+# Redistribution and use in binary form, without modification, are permitted provided
+# that the following conditions are met:
+#
+# *       No reverse engineering, decompilation, or disassembly of this software is
+# permitted with respect to any software provided in binary form.
+#
+# *       any redistribution and use are licensed by TI for use only with TI Devices.
+#
+# *       Nothing shall obligate TI to provide you with source code for the software
+# licensed and provided to you in object code.
+#
+# If software source code is provided to you, modification and redistribution of the
+# source code are permitted provided that the following conditions are met:
+#
+# *       any redistribution and use of the source code, including any resulting derivative
+# works, are licensed by TI for use only with TI Devices.
+#
+# *       any redistribution and use of any object code compiled from the source code
+# and any resulting derivative works, are licensed by TI for use only with TI Devices.
+#
+# Neither the name of Texas Instruments Incorporated nor the names of its suppliers
+#
+# may be used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# DISCLAIMER.
+#
+# THIS SOFTWARE IS PROVIDED BY TI AND TI'S LICENSORS "AS IS" AND ANY EXPRESS
+# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL TI AND TI'S LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+# OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+Module containing Global average pooling layer specific functions and optimizations
+"""
+import logging
+import onnx_graphsurgeon as gs
+import onnx
+import numpy as np
+
+
+def tidl_replace_sub_with_neg_add(graph: gs.Graph,
+                            onnx_graph: onnx.GraphProto):
+    '''
+    Sub node is not supported, but this can be replaced (less efficiently) with negation and add
+    '''
+    print('\n*\ntidl_replace_mean_with_eltwise\n*\n')
+
+
+    for node in graph.nodes:
+
+        if node.op == "Sub":
+            #Sub -> C = A-B. inputs=[A,B]
+            A, B = node.inputs
+            C = node.outputs[0]
+            broadcast_neg = 1
+            if A.shape != B.shape:
+                logging.warning('This is a broadcasted node; not yet supported for Sub replacment')
+                continue
+
+            logging.debug(f'Replacing Sub node {node.name} with Multiply-Add')
+            #Create Mul node, and use B as one input and -1 (constant; broadcasted) as the other
+            base_name = node.name
+            mul_name = base_name + '_Mul'
+            #We will broadcast -1 multiplication across the whole input B
+            neg_values = np.ndarray((1), dtype=B.dtype)
+            neg_values[0] = -1
+            negation_tensor = gs.Constant(mul_name + '/neg', neg_values)
+            negation_output = gs.Variable(mul_name + '/negative', dtype=B.dtype, shape=B.shape)
+            mul_node = gs.Node('Mul', mul_name, {}, [B, negation_tensor], [negation_output])
+
+            #Create add node for A + (-B) 
+            add_name = base_name + '_Add'
+            add_node = gs.Node('Add', add_name, {} , [A, negation_output], outputs=[C])
+
+            node.outputs.pop(0)
+
+            graph.nodes.append(mul_node)
+            graph.nodes.append(add_node)
+            #old Sub node will be removed with graph.cleanup. 
+            #The new nodes will require graph.toposort()
+
+def tidl_replace_mean_with_eltwise(graph: gs.Graph,
+                            onnx_graph: onnx.GraphProto):
+    '''
+    Elementwise Mean node is not supported, but we can emulate with add -> multiply
+
+    Currently only supports Mean between two input tensors, but should be trivial to extend
+    Note that quantization may impact this layer, especially for many inputs
+    '''
+
+    for node in graph.nodes:
+        if node.op == "Mean":
+            if len(node.inputs) != 2:
+                logging.warning(f'Mean between arbitrary number of inputs is not supported; only 2. Skip node {node.name}')
+                continue
+            logging.debug(f'Replacing Mean ({node.name}) of two inputs with representative Add->Multiply elementwise layers')
+            base_name = node.name
+            A, B = node.inputs[0:2]
+            output = node.outputs[0]
+
+            if A.shape != B.shape:
+                logging.warning('Detected non-elementwise operation / broadcasting -- this is not supported')
+                continue
+
+            add_name = base_name + '_Add'
+            sum_tensor = gs.Variable(base_name + '/Sum', dtype=A.dtype, shape=A.shape)
+            add_node = gs.Node('Add', add_name, {} , [A, B], outputs=[sum_tensor])
+
+            div_name = base_name + '_Mul_by_half'
+            divisor_tensor_name = div_name + '/divisor'
+
+            if  'float' not in str(A.dtype):
+                logging.warning(f'potential issue with dtype {str(A.dtype)}; this may cause problems with quantization')
+
+            div_by_two_values = np.ndarray((1), dtype=A.dtype)
+            div_by_two_values[0] = 1/2
+            divisor_tensor = gs.Constant(divisor_tensor_name, div_by_two_values)
+
+            mul_node = gs.Node('Mul', div_name, {}, [sum_tensor, divisor_tensor], [output])
+
+            node.outputs.clear()
+
+            graph.nodes.append(mul_node)
+            graph.nodes.append(add_node)
+
+            #cleanup and toposort graph to fully apply changes
+
+
diff --git a/..._model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/resize.py b/..._model_tools/onnx_tools/tidl-onnx-model-optimizer/tidl_onnx_model_optimizer/src/resize.py
@@ -63,6 +63,7 @@
 import onnx_graphsurgeon as gs
 import onnx
 import numpy as np
+from tidl_onnx_model_optimizer.src.common import find_out_layers, remove_node
 
 
 def tidl_convert_resize_params_size_to_scale(graph: gs.Graph,
@@ -102,3 +103,42 @@ def tidl_convert_resize_params_size_to_scale(graph: gs.Graph,
             # endif
         # endif
     # endfor
+
+
+def tidl_remove_unity_resize(graph: gs.Graph,
+                            onnx_graph: onnx.GraphProto):
+    '''
+    Some models have an effectively null resize node that scales by a factor of 1 in all dimensions
+    Such a node is often an export artifact -- a layer added by a model format converter
+    This is node effectively unity, but it will be processed nonetheless. It should therefore be removed
+    '''
+
+    tensors = graph.tensors()
+    nodes_to_remove = []
+    for node in graph.nodes:
+
+        if node.op == "Resize":
+            inputs = node.inputs
+            if len(inputs) >= 3:
+                X, roi, scales = inputs[0:3]
+            else: 
+                continue
+            Y = node.outputs[0]
+            attrs = node.attrs
+
+            if X.shape == Y.shape and all(map(lambda x: x==1, scales.values)):
+                #ensure it's not using ROI, which is only with crop-and-resize mode
+                if node.attrs['coordinate_transformation_mode'] == 'tf_crop_and_resize':
+                    logging.warning("Detected Resize node as using ROI... skipping")
+                    continue
+
+                logging.debug("Removing unity Resize node %s" % node.name)
+
+                out_nodes = find_out_layers(node)
+
+                for o_node in out_nodes:
+                    for i, net in enumerate(o_node.inputs):
+                        if net == Y:
+                            o_node.inputs[i] = X
+
+                #node will be removed by cleanup since it has only unused outputs