Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Model Optimizer] Add conv even -> larger odd kernel #78

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
"""
from typing import List
import onnx_graphsurgeon as gs
import onnx

class UniqueIdGenerator:
"""
Expand Down Expand Up @@ -196,3 +197,57 @@ def bordered(text):
res.append('│' + (s + ' ' * width)[:width] + '│')
res.append('└' + '─' * width + '┘')
return '\n'.join(res)

def reset_shape_inference(onnx_graph:onnx.GraphProto):
'''
Clear all value_info entries that hold shape inference information
'''
while len(onnx_graph.value_info) > 0:
onnx_graph.value_info.pop()
return onnx_graph

def tidl_remove_duplicates(graph:gs.Graph, onnx_graph:onnx.GraphProto, do_cleanup=True):
'''
Some nodes are simply duplicates of each other. There is no need to process these, and we can reuse the outputs of one for all of them
'''
replacement_node_pairs = []
for i, node_i in enumerate(graph.nodes):

for j, node_j in enumerate(graph.nodes):
if node_i.op != node_j.op: continue

nodes_to_remove = list(map(lambda x: x[1], replacement_node_pairs))

if node_i == node_j or node_i in nodes_to_remove:
continue # skip itself
elif node_i.inputs != node_j.inputs or node_i.attrs != node_j.attrs:
continue

# hang onto the nodes we will remove/replace. We should not remove them while iterating
replacement_node_pairs.append((node_i, node_j)) #(node to keep as replacement, node to remove)


print(list(map(lambda x: x[0], replacement_node_pairs)))

for n in replacement_node_pairs:
removal_node = n[1]
keep_node = n[0]
removal_outputs = removal_node.outputs
keep_outputs = keep_node.outputs

# for each output in the node to remove, find the consuming nodes and change their input to use the output that we will keep
out_layers = find_out_layers(removal_node)

for layer in out_layers:
for i, in_tensor in enumerate(layer.inputs):
for j, out_tensor in enumerate(removal_outputs):
if in_tensor == out_tensor:

replacement_tensor = keep_outputs[j]
layer.inputs[i] = replacement_tensor

#clear the outputs to that graph.cleanup() will remove them
removal_node.outputs.clear()

if do_cleanup:
graph.cleanup().toposort()
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,15 @@
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.

"""
Module containing Conv layer specific functions and optimizations
"""
import logging
import numpy as np
import onnx_graphsurgeon as gs
import onnx
import numpy as np
import copy


def tidl_convert_conv_large_pad_to_smaller_kernel (graph: gs.Graph, onnx_graph: onnx.GraphProto):
Expand Down Expand Up @@ -148,3 +150,79 @@ def tidl_convert_conv_large_pad_to_smaller_kernel (graph: gs.Graph, onnx_graph:
conv.inputs[1] = gs.Constant(name= f"{weights.name}_reduced",
values=reduced_weight_tensor)
# bias need not change


def tidl_convert_conv_even_filter_to_odd(graph: gs.Graph, onnx_graph: onnx.GraphProto, zero_points={'Conv_Name_Fake_Example': -0.001}):
'''
Even-sized convolution kernels are not supported in TIDL
Replace even-sized kernels with next-size up odd kernels, with padding handled appropriately. Additional filter weights are the zero_points

:param zero_points: On a per-layer basis, the zero-point for asymmetric quantization. This is a dictionary where key is the layer name, and value is the zero-point for that layer (assumed same for all layers, i.e. no grouping)

Some tricks are required here due to Conv layer implementation in TIDL being 'SAME' only. This requires padding be handled outside the layer itself (due to asymmetric pads). Asymmetric quantization is not well supported for these layers, since the zero-point is unknown until calibration. The zero-point fills the additional convolution weights
'''
#identify conv nodes
#find conv nodes w/ even sized kernels
#replace even sized kernel with odd, and move values into appropriate shape Constant tensor
#reset pad values in conv node
#create Pad node that handles all padding, include 'zero point' values?
#make Conv input the Pad input, Pad output the Conv input

conv_nodes = [node for node in graph.nodes if node.op == "Conv"]

for conv in conv_nodes:
kernel_shape = conv.attrs['kernel_shape']
pads = conv.attrs['pads']
weight_tensor = conv.inputs[1]

conv_input = conv.inputs[0]

MAX_SUPPORTED_CONV_KERNEL = 7 #7x7 is largest validated layer size
if kernel_shape[0] % 2 == 0 and kernel_shape[0] < MAX_SUPPORTED_CONV_KERNEL and kernel_shape[1] == kernel_shape[0]:
logging.debug('Promoting conv node (%s) size (%d x %d) to next size up' % (conv.name, kernel_shape[0], kernel_shape[1]))

new_size = kernel_shape[0] + 1
new_shape = [new_size, new_size]

zero_p = zero_points.get(conv.name, 0)

new_weights_shape = [*weight_tensor.shape[:2], *new_shape]

# is it correct to put the zero point here or only in the layer padding
new_weights = np.full(new_weights_shape, zero_p, dtype=np.float32)
# We will pad left and top side of the filter weights with the fill_value / zero-point as we increase the spatial dimensions by 1
new_weights[:,:,1:,1:] = weight_tensor.values

new_weights_tensor = gs.Constant(weight_tensor.name, new_weights)
conv.inputs[1] = new_weights_tensor


conv.attrs['kernel_shape'] = new_shape
logging.debug(' New conv kernel shape: ')


pad_name = 'Pad/' + conv.name

pads = copy.copy(pads)
pads[0] += 1 # x1 (height) +1 to account for larger filter
pads[1] += 1 # x2 (width) +1 to account for larger filter
all_pads = np.asarray([0,0, pads[0], pads[1], 0, 0, pads[2], pads[3] ]) #incorporate all dimensions: depending on opset, may not support axis specification
pads_tensor = gs.Constant(pad_name + '_pads', np.asarray(all_pads, np.int64))
fill_value_tensor = gs.Constant(pad_name + '_fill', np.asanyarray([zero_p], dtype=np.float32))


conv.attrs['pads'] = [0,0,0,0]

pad_attrs = {
'mode' : 'constant'
}
pad_inputs = [conv_input, pads_tensor, fill_value_tensor]
pad_outputs = [gs.Variable(pad_name+'_output', dtype=conv_input.dtype)]

logging.debug(' Adding Pad layer with dimensions (%d,%d,%d,%d) and resetting conv pads to 0\'s' % (pads[0], pads[1], pads[2], pads[3]))

pad_node = gs.Node('Pad', pad_name, pad_attrs, pad_inputs, pad_outputs)

conv.inputs[0] = pad_outputs[0]
graph.nodes.append(pad_node)

Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# Copyright (c) {2023 - 2024} Texas Instruments Incorporated
#
# All rights reserved not granted herein.
#
# Limited License.
#
# Texas Instruments Incorporated grants a world-wide, royalty-free, non-exclusive
# license under copyrights and patents it now or hereafter owns or controls to make,
# have made, use, import, offer to sell and sell ("Utilize") this software subject to the
# terms herein. With respect to the foregoing patent license, such license is granted
# solely to the extent that any such patent is necessary to Utilize the software alone.
# The patent license shall not apply to any combinations which include this software,
# other than combinations with devices manufactured by or for TI ("TI Devices").
# No hardware patent is licensed hereunder.
#
# Redistributions must preserve existing copyright notices and reproduce this license
# (including the above copyright notice and the disclaimer and (if applicable) source
# code license limitations below) in the documentation and/or other materials provided
# with the distribution
#
# Redistribution and use in binary form, without modification, are permitted provided
# that the following conditions are met:
#
# * No reverse engineering, decompilation, or disassembly of this software is
# permitted with respect to any software provided in binary form.
#
# * any redistribution and use are licensed by TI for use only with TI Devices.
#
# * Nothing shall obligate TI to provide you with source code for the software
# licensed and provided to you in object code.
#
# If software source code is provided to you, modification and redistribution of the
# source code are permitted provided that the following conditions are met:
#
# * any redistribution and use of the source code, including any resulting derivative
# works, are licensed by TI for use only with TI Devices.
#
# * any redistribution and use of any object code compiled from the source code
# and any resulting derivative works, are licensed by TI for use only with TI Devices.
#
# Neither the name of Texas Instruments Incorporated nor the names of its suppliers
#
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# DISCLAIMER.
#
# THIS SOFTWARE IS PROVIDED BY TI AND TI'S LICENSORS "AS IS" AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL TI AND TI'S LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
"""
Module containing Global average pooling layer specific functions and optimizations
"""
import logging
import onnx_graphsurgeon as gs
import onnx
import numpy as np


def tidl_replace_sub_with_neg_add(graph: gs.Graph,
onnx_graph: onnx.GraphProto):
'''
Sub node is not supported, but this can be replaced (less efficiently) with negation and add
'''
print('\n*\ntidl_replace_mean_with_eltwise\n*\n')


for node in graph.nodes:

if node.op == "Sub":
#Sub -> C = A-B. inputs=[A,B]
A, B = node.inputs
C = node.outputs[0]
broadcast_neg = 1
if A.shape != B.shape:
logging.warning('This is a broadcasted node; not yet supported for Sub replacment')
continue

logging.debug(f'Replacing Sub node {node.name} with Multiply-Add')
#Create Mul node, and use B as one input and -1 (constant; broadcasted) as the other
base_name = node.name
mul_name = base_name + '_Mul'
#We will broadcast -1 multiplication across the whole input B
neg_values = np.ndarray((1), dtype=B.dtype)
neg_values[0] = -1
negation_tensor = gs.Constant(mul_name + '/neg', neg_values)
negation_output = gs.Variable(mul_name + '/negative', dtype=B.dtype, shape=B.shape)
mul_node = gs.Node('Mul', mul_name, {}, [B, negation_tensor], [negation_output])

#Create add node for A + (-B)
add_name = base_name + '_Add'
add_node = gs.Node('Add', add_name, {} , [A, negation_output], outputs=[C])

node.outputs.pop(0)

graph.nodes.append(mul_node)
graph.nodes.append(add_node)
#old Sub node will be removed with graph.cleanup.
#The new nodes will require graph.toposort()

def tidl_replace_mean_with_eltwise(graph: gs.Graph,
onnx_graph: onnx.GraphProto):
'''
Elementwise Mean node is not supported, but we can emulate with add -> multiply

Currently only supports Mean between two input tensors, but should be trivial to extend
Note that quantization may impact this layer, especially for many inputs
'''

for node in graph.nodes:
if node.op == "Mean":
if len(node.inputs) != 2:
logging.warning(f'Mean between arbitrary number of inputs is not supported; only 2. Skip node {node.name}')
continue
logging.debug(f'Replacing Mean ({node.name}) of two inputs with representative Add->Multiply elementwise layers')
base_name = node.name
A, B = node.inputs[0:2]
output = node.outputs[0]

if A.shape != B.shape:
logging.warning('Detected non-elementwise operation / broadcasting -- this is not supported')
continue

add_name = base_name + '_Add'
sum_tensor = gs.Variable(base_name + '/Sum', dtype=A.dtype, shape=A.shape)
add_node = gs.Node('Add', add_name, {} , [A, B], outputs=[sum_tensor])

div_name = base_name + '_Mul_by_half'
divisor_tensor_name = div_name + '/divisor'

if 'float' not in str(A.dtype):
logging.warning(f'potential issue with dtype {str(A.dtype)}; this may cause problems with quantization')

div_by_two_values = np.ndarray((1), dtype=A.dtype)
div_by_two_values[0] = 1/2
divisor_tensor = gs.Constant(divisor_tensor_name, div_by_two_values)

mul_node = gs.Node('Mul', div_name, {}, [sum_tensor, divisor_tensor], [output])

node.outputs.clear()

graph.nodes.append(mul_node)
graph.nodes.append(add_node)

#cleanup and toposort graph to fully apply changes


Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
import onnx_graphsurgeon as gs
import onnx
import numpy as np
from tidl_onnx_model_optimizer.src.common import find_out_layers, remove_node


def tidl_convert_resize_params_size_to_scale(graph: gs.Graph,
Expand Down Expand Up @@ -102,3 +103,42 @@ def tidl_convert_resize_params_size_to_scale(graph: gs.Graph,
# endif
# endif
# endfor


def tidl_remove_unity_resize(graph: gs.Graph,
onnx_graph: onnx.GraphProto):
'''
Some models have an effectively null resize node that scales by a factor of 1 in all dimensions
Such a node is often an export artifact -- a layer added by a model format converter
This is node effectively unity, but it will be processed nonetheless. It should therefore be removed
'''

tensors = graph.tensors()
nodes_to_remove = []
for node in graph.nodes:

if node.op == "Resize":
inputs = node.inputs
if len(inputs) >= 3:
X, roi, scales = inputs[0:3]
else:
continue
Y = node.outputs[0]
attrs = node.attrs

if X.shape == Y.shape and all(map(lambda x: x==1, scales.values)):
#ensure it's not using ROI, which is only with crop-and-resize mode
if node.attrs['coordinate_transformation_mode'] == 'tf_crop_and_resize':
logging.warning("Detected Resize node as using ROI... skipping")
continue

logging.debug("Removing unity Resize node %s" % node.name)

out_nodes = find_out_layers(node)

for o_node in out_nodes:
for i, net in enumerate(o_node.inputs):
if net == Y:
o_node.inputs[i] = X

#node will be removed by cleanup since it has only unused outputs