forked from onnx/onnx-tensorrt
-
Notifications
You must be signed in to change notification settings - Fork 2
/
onnx2trt_utils.hpp
256 lines (195 loc) · 12.2 KB
/
onnx2trt_utils.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "ShapedWeights.hpp"
#include "trt_utils.hpp"
#include "OnnxAttrs.hpp"
#include <onnx/onnx_pb.h>
#include <onnx/onnxifi.h>
#include <NvInfer.h>
#include <cstring>
#include <numeric>
#include <iostream>
using std::cerr;
using std::endl;
class CeilingPoolDim:public nvinfer1::IOutputDimensionsFormula
{
public:
nvinfer1::DimsHW compute(nvinfer1::DimsHW inputDims, nvinfer1::DimsHW kernelSize,
nvinfer1::DimsHW stride, nvinfer1::DimsHW padding, nvinfer1::DimsHW dilation, const char* layerName) const
{
nvinfer1::DimsHW outputDims;
for (int dimension = 0; dimension < inputDims.nbDims; dimension++)
{
outputDims.d[dimension] = static_cast<int>(ceil((inputDims.d[dimension] + padding.d[dimension] * 2.0 - kernelSize.d[dimension]) / stride.d[dimension] + 1.0));
}
return outputDims;
}
};
std::ostream& operator<<(std::ostream& stream, nvinfer1::Dims const& shape);
std::ostream& operator<<(std::ostream& stream, nvinfer1::DataType const& dtype);
std::ostream& operator<<(std::ostream& stream, nvinfer1::Permutation const& perm);
namespace onnx2trt
{
// Helper function to calculate the volume of a Dims object
int64_t volume(const nvinfer1::Dims& dims);
// Adds a constant scalar to the network in the form of a constant layer.
template <typename ScalarType>
nvinfer1::IConstantLayer* addConstantScalar(IImporterContext* ctx, ScalarType scalar, ShapedWeights::DataType type, nvinfer1::Dims shape = nvinfer1::Dims{1,{1}})
{
ShapedWeights scalarWeights = ctx->createTempWeights(type, shape);
static_cast<ScalarType*>(scalarWeights.values)[0] = static_cast<ScalarType>(scalar);
return ctx->network()->addConstant(scalarWeights.shape, scalarWeights);
}
// Helper function to create a tensor given a vector of values and a shape.
template <typename ScalarType>
inline nvinfer1::IConstantLayer* addConstant(IImporterContext* ctx, const std::vector<ScalarType>& values, ShapedWeights::DataType type, nvinfer1::Dims shape)
{
ShapedWeights weights = ctx->createTempWeights(type, shape);
std::memcpy(weights.values, values.data(), values.size() * sizeof(ScalarType));
return ctx->network()->addConstant(weights.shape, weights);
}
enum ScaleOp
{
kSHIFT,
kSCALE,
kPOWER,
};
// Helper function to import ONNX activation nodes into TRT
NodeImportResult activationHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node,
std::vector<TensorOrWeights>& inputs, nvinfer1::ActivationType op, float* alpha = nullptr, float* beta = nullptr);
// Helper function to add a Scale layer into TRT
NodeImportResult addScale(IImporterContext* ctx, nvinfer1::ITensor& tensor_, nvinfer1::ScaleMode mode,
nvinfer1::Weights shift, nvinfer1::Weights scale, nvinfer1::Weights power);
// Helper function to auto-generate the output padding given the attributes for certain ONNX nodes
void auto_gen_input_output_padding(nvinfer1::Dims input_dims, nvinfer1::Dims output_shape, nvinfer1::Dims kernel_size,
nvinfer1::Dims strides, nvinfer1::Dims dilations, const int nbSpatialDims, nvinfer1::Dims& beg_padding,
nvinfer1::Dims& end_padding, nvinfer1::Dims& output_padding, nvinfer1::PaddingMode paddingMode);
// Helper function for handling tensor broadcasting for opsets < 7
Status applyLegacyBinaryOpBroadcasting(IImporterContext* ctx,
::ONNX_NAMESPACE::NodeProto const& node,
TensorOrWeights& lhs,
TensorOrWeights& rhs);
// Helper function to import ArgMin and ArgMax nodes into TRT
NodeImportResult argMinMaxHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node,
std::vector<TensorOrWeights>& inputs, nvinfer1::TopKOperation op);
// Helper function to broadcast one tensor to a given shape.
void broadcastTensors(IImporterContext* ctx, nvinfer1::ITensor*& t1, const int nbDims);
// Helper function to broadcast two tensors to the larger shape
void broadcastTensors(IImporterContext* ctx, nvinfer1::ITensor*& t1, nvinfer1::ITensor*& t2);
// Helper function for parsing brodacsting attributes
Status check_broadcast_attrs(IImporterContext* ctx, OnnxAttrs const& attrs, nvinfer1::Dims const& dims);
// Helper function to check if node is connected to a grpah input
bool check_for_input(::ONNX_NAMESPACE::NodeProto const& node, std::string const& input_node);
// Helper function to check if node inputs are INT32 type
bool check_for_int32(std::vector<TensorOrWeights>const & inputs);
// Helper function to check if a scale layer can be used of elementwise
bool check_for_scale(std::vector<TensorOrWeights>const & inputs);
// Helper function to convert an ONNX axis into a TRT axis (supports negative indexing)
Status convert_axis(int& axis, int nbDims);
// Helper function to convert an ONNX datatype to a TRT datatype with INT64 downcasting
bool convert_dtype(int32_t onnx_dtype, nvinfer1::DataType* trt_dtype);
// Helper function to convert an ONNX datatype to a TRT datatype without INT64 downcasting
bool convert_input_dtype(int32_t onnx_dtype, nvinfer1::DataType* trt_dtype);
// Helper function to convert ONNX weights to TRT weights
bool convert_onnx_weights(::ONNX_NAMESPACE::TensorProto const& onnx_tensor, onnx2trt::ShapedWeights* weights);
// Helper function to convert an weight graph output to tensor
nvinfer1::ITensor& convert_output_weight_to_tensor(TensorOrWeights& input, IImporterContext* ctx);
// Helper function to squeeze a tensor into two dimensions
nvinfer1::ITensor* convert_tensor_to_2d(IImporterContext* ctx, nvinfer1::ITensor& tensor, int axis);
// Helper function to convert ONNX weight descriptors to TRT weights
bool convert_weight_descriptor(onnxTensorDescriptorV1 const &desc, onnx2trt::ShapedWeights *weights);
// Helper functinon to convert weights to tensors
nvinfer1::ITensor& convertToTensor(TensorOrWeights& input, IImporterContext* ctx);
// Helper function to convert a 1D shape into a tensor of the same size
nvinfer1::ITensor& dimension_to_tensor(IImporterContext* ctx, nvinfer1::Dims dims);
// Helper function to calculate the ceil division of two integers.
int div_ceil(int n, int d);
// Helper function to import elementwise nodes into TRT
NodeImportResult elementwiseHelper(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node,
std::vector<TensorOrWeights>& inputs, nvinfer1::ElementWiseOperation binary_op);
// Helper functino to flatten a tensor on a specified axis
nvinfer1::ITensor* flattenTensor(IImporterContext* ctx, nvinfer1::ITensor& tensor, int axis);
// Helper function to check if any input dimensions are dynamic
bool isDynamic (nvinfer1::Dims const& dims);
// Helper function to import a plugin from TensorRT's plugin registry given the name and version.
nvinfer1::IPluginV2* importPluginFromRegistry(IImporterContext* ctx, const std::string& pluginName,
const std::string& pluginVersion, const std::string& nodeName, const std::vector<nvinfer1::PluginField>& pluginFields);
// Returns false if the transpose does not require any data movement (i.e., it's equivalent to a reshape)
inline bool is_transpose_required(nvinfer1::Dims const& shape, nvinfer1::Permutation const& perm);
// Helper function to get the length of the specified axis
nvinfer1::ITensor* getAxisLength(IImporterContext* ctx, nvinfer1::ITensor* inpTensor, int axis, nvinfer1::Dims shape=nvinfer1::Dims{0});
// Helper function to calculate the output size of a convolution operation
int get_conv_output_size(int input_size, int filter_size,
int stride, int dilation_rate,
int total_padding);
// Helper function to get the name of an ONNX data type
const char* get_dtype_name(int32_t onnx_dtype);
// Helper function to get the size of an ONNX data type
int get_dtype_size(int32_t onnx_dtype);
// Helper function to help extract the index of a potential -1 dimension in the reshape node
Status get_infer_dim(int& infer_dim, nvinfer1::Dims const& new_shape);
// Helper function to extract kernel parameters given a node's attributes
void get_kernel_params(::ONNX_NAMESPACE::NodeProto const& onnx_node,
nvinfer1::Dims* kernel_size,
nvinfer1::Dims* strides,
nvinfer1::Dims* beg_padding,
nvinfer1::Dims* end_padding,
nvinfer1::PaddingMode& paddingMode,
bool& count_exclude_padding,
nvinfer1::Dims* dilations=nullptr,
nvinfer1::Dims* output_padding=nullptr);
// Helper function to get the scale mode for TRT's scale layer given the shapes of the inputs
nvinfer1::ScaleMode get_scale_mode(nvinfer1::Dims const& weights_shape, nvinfer1::Dims const& tensor_shape);
// Helper function to create a Dims object with the specified number of dims and value
nvinfer1::Dims makeDims(int nbDims, int val);
// Helper function to create a shape tensor from a Dims object for dynamic reshape
nvinfer1::ITensor& makeShapeTensor(IImporterContext* ctx, nvinfer1::Dims dims);
// Helper function to map various ONNX pooling ops into TensorRT.
NodeImportResult poolingHelper(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector<TensorOrWeights>& inputs, nvinfer1::PoolingType type);
// Helper function to reshape a tensor to a specified size
nvinfer1::ITensor* reshape_tensor(IImporterContext* ctx, nvinfer1::ITensor& tensor, nvinfer1::Dims shape);
// Helper function to convert ONNX node to a scale layer
NodeImportResult scaleHelper(IImporterContext* ctx,
::ONNX_NAMESPACE::NodeProto const& node,
std::vector<TensorOrWeights>& inputs,
ScaleOp op);
// Helper function to set ONNX node attributes
void setAttr(nvinfer1::Dims * trtAttr, ::ONNX_NAMESPACE::AttributeProto const* onnxAttr, int nbSpatialDims, int defaultVal);
// Helper function to squeeze a tensor on a given set of axes
nvinfer1::ITensor* squeezeTensor(IImporterContext* ctx, nvinfer1::ITensor& tensor, const std::vector<int>& axes);
// Helper function to transpose a tensor given a permutation
nvinfer1::ITensor* transpose_tensor(IImporterContext* ctx, nvinfer1::ITensor& tensor, nvinfer1::Permutation const& perm,
bool permute_dim_types);
// Helper function for slice
Status slice_array(TensorOrWeights weights, std::vector<int32_t>& weight_vector);
// Helper function to import unary operations into TRT
NodeImportResult unaryHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node,
std::vector<TensorOrWeights>& inputs, nvinfer1::UnaryOperation op);
// Helper function to unsqueeze tensors on a given set of axes
nvinfer1::ITensor* unsqueezeTensor(IImporterContext* ctx, nvinfer1::ITensor& tensor, const std::vector<int>& axes);
// Helper function to update padding values.
void update_padded_values(std::vector<float>&pad_values, const nvinfer1::DimsHW beg_padding,
const nvinfer1::DimsHW end_padding, const nvinfer1::Dims padded_shape, const float pad_value);
// Helper function to convert weights to a vector.
Status weightsToVector(TensorOrWeights weights, std::vector<int64_t>* weightVector);
} // namespace onnx2trt