From 078f20b8fbcbb802aa5ffcd2134519721a61b88c Mon Sep 17 00:00:00 2001 From: Kevin Chen <45886021+kevinch-nv@users.noreply.github.com> Date: Mon, 4 Oct 2021 15:05:15 -0700 Subject: [PATCH] TensorRT 8.2 EA ONNX Parser Release (#754) Signed-off-by: Kevin Chen --- CMakeLists.txt | 43 +-- ConditionalHelpers.cpp | 330 +++++++++++++++++++ ConditionalHelpers.hpp | 51 +++ ImporterContext.hpp | 26 +- ModelImporter.cpp | 97 +++++- ModelImporter.hpp | 8 - README.md | 14 +- ShapedWeights.cpp | 1 + Status.hpp | 8 + builtin_op_importers.cpp | 665 +++++++++++++++++++++++--------------- docs/Changelog.md | 17 + docs/contributing.md | 1 - docs/operators.md | 26 +- onnx2trt.hpp | 5 +- onnx2trt_utils.cpp | 315 ++++++++---------- onnx2trt_utils.hpp | 20 +- onnx_tensorrt/__init__.py | 2 +- onnx_utils.hpp | 28 +- 18 files changed, 1138 insertions(+), 519 deletions(-) create mode 100644 ConditionalHelpers.cpp create mode 100644 ConditionalHelpers.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ba7f0308..c3692aee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,8 +4,8 @@ cmake_minimum_required(VERSION 3.13) project(onnx2trt LANGUAGES CXX C) set(ONNX2TRT_ROOT ${PROJECT_SOURCE_DIR}) -# Set C++11 as standard for the whole project -set(CMAKE_CXX_STANDARD 11) +# Set C++14 as standard for the whole project +set(CMAKE_CXX_STANDARD 14) # Enable compiler warnings if (CMAKE_COMPILER_IS_GNUCC) @@ -20,12 +20,16 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(PARSER_LINKER_SCRIPT ${ONNX2TRT_ROOT}/libnvonnxparser.version) +# Find length of source directory used to pad filename in Status.hpp +string(LENGTH "${CMAKE_SOURCE_DIR}/" SOURCE_LENGTH) +add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}") + #-------------------------------------------------- # Version information #-------------------------------------------------- set(ONNX2TRT_MAJOR 8) -set(ONNX2TRT_MINOR 0) -set(ONNX2TRT_PATCH 1) +set(ONNX2TRT_MINOR 2) +set(ONNX2TRT_PATCH 0) set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version") #-------------------------------------------------- @@ -43,26 +47,13 @@ set(IMPORTER_SOURCES LoopHelpers.cpp RNNHelpers.cpp OnnxAttrs.cpp + ConditionalHelpers.cpp ) -# Do not build ONNXIFI by default. -if(BUILD_ONNXIFI) - if (NOT CUDA_TOOLKIT_ROOT_DIR) - set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda) - endif() - message(debug "CUDA_TOOLKIT_ROOT_DIR: ${CUDA_TOOLKIT_ROOT_DIR}") - find_path(CUDA_INCLUDE_DIR cuda_runtime.h - HINTS ${CUDA_TOOLKIT_ROOT_DIR} - PATH_SUFFIXES include - ) +if (BUILD_ONNXIFI) set(ONNXIFI_SOURCES onnx_trt_backend.cpp) endif() -# Build with negative indices support for Gather: -if (DEFINED SUPPORT_NEGATIVE_GATHER) - add_definitions("-DSUPPORT_NEGATIVE_GATHER=1") -endif() - # Build executables if BUILD_LIBRARY_ONLY flag is not set if (NOT DEFINED BUILD_LIBRARY_ONLY) set(EXECUTABLE_SOURCES @@ -89,6 +80,16 @@ if(NOT TARGET onnx_proto) add_subdirectory(third_party/onnx EXCLUDE_FROM_ALL) endif() +# CUDA +if (NOT CUDA_TOOLKIT_ROOT_DIR) + set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda) +endif() +find_path(CUDA_INCLUDE_DIR cuda_runtime.h + HINTS ${CUDA_TOOLKIT_ROOT_DIR} + PATH_SUFFIXES include +) +MESSAGE(STATUS "Found CUDA headers at ${CUDA_INCLUDE_DIR}") + # TensorRT find_path(TENSORRT_INCLUDE_DIR NvInfer.h HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR} @@ -112,7 +113,7 @@ endif() # Importer library # -------------------------------- add_library(nvonnxparser SHARED ${IMPORTER_SOURCES}) -target_include_directories(nvonnxparser PUBLIC ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR}) +target_include_directories(nvonnxparser PUBLIC ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${CUDA_INCLUDE_DIR}) target_link_libraries(nvonnxparser PUBLIC onnx_proto ${PROTOBUF_LIBRARY} ${TENSORRT_LIBRARY}) set_target_properties(nvonnxparser PROPERTIES VERSION ${ONNX2TRT_VERSION} @@ -121,7 +122,7 @@ set_target_properties(nvonnxparser PROPERTIES LINK_FLAGS "-Wl,--version-script=${PARSER_LINKER_SCRIPT}" ) add_library(nvonnxparser_static STATIC ${IMPORTER_SOURCES}) -target_include_directories(nvonnxparser_static PUBLIC ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR}) +target_include_directories(nvonnxparser_static PUBLIC ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${CUDA_INCLUDE_DIR}) target_link_libraries(nvonnxparser_static PUBLIC onnx_proto ${PROTOBUF_LIBRARY} ${TENSORRT_LIBRARY}) # -------------------------------- diff --git a/ConditionalHelpers.cpp b/ConditionalHelpers.cpp new file mode 100644 index 00000000..e0f13f8e --- /dev/null +++ b/ConditionalHelpers.cpp @@ -0,0 +1,330 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "ConditionalHelpers.hpp" +#include "ModelImporter.hpp" +#include "onnx2trt_utils.hpp" +#include "toposort.hpp" + +namespace onnx2trt +{ + +using NodeName = std::string; +using LayerName = std::string; +using InputIndex = int32_t; + +// A SubgraphPortsMap maps either the inputs or outputs ports of each node in an ONNX graph. +using SubgraphPortsMap = std::unordered_map>; + +// An InputsMap tracks which IIfConditionalInputLayer we've added to a layer's inputs, +// so that we can reuse them if needed. +using InputsMap = std::unordered_map; + +// Search for a network Layer name in a SubgraphPortsMap using partial (prefix) name matching. +// ONNX nodes are matched to network layers using prefix-matching because an ONNX node may have +// several network layers associcated with it. +SubgraphPortsMap::const_iterator findLayer(const SubgraphPortsMap& inputs, const std::string layerName) +{ + return std::find_if(inputs.begin(), inputs.end(), [&](const auto& item) { + const auto& key = item.first; + return layerName.compare(0, key.size(), key) == 0; + }); +} + +// Add an ConditionalInputLayer between `layer` and its inputs. +// I.e. input[inIdx] -> layer ==> input[inIdx] -> ConditionalInputLayer -> layer. +Status addConditionalInputLayer(IImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap, + nvinfer1::ILayer& layer, int32_t inIdx) +{ + auto input = layer.getInput(inIdx); + if (input == nullptr) + { + // Phantom input (an input that is really constant weights). + return Status::success(); + } + + if (layer.getType() == nvinfer1::LayerType::kCONDITIONAL_OUTPUT) + { + return Status::success(); + } + + auto const name = input->getName(); + auto it = inputsMap.find(name); + nvinfer1::IIfConditionalInputLayer* inputLayer = nullptr; + if (it == inputsMap.end()) + { + inputLayer = conditional->addInput(*input); + inputsMap[name] = inputLayer; + const std::string inputLayerName(name); + ctx->registerLayer(inputLayer, inputLayerName + "_InputLayer"); + ctx->registerTensor(TensorOrWeights{inputLayer->getOutput(0)}, inputLayerName + "_InputLayer_output"); + } + else + { + // An InputLayer may in the inputsMap if it has several consumers. + inputLayer = it->second; + } + layer.setInput(inIdx, *(inputLayer->getOutput(0))); + return Status::success(); +}; + +// Take a snapshot of the network before and after parsing the subgraph and return a list +// of newly added network layers. +Status importSubgraph( + IImporterContext* ctx, const ::ONNX_NAMESPACE::GraphProto& subgraph, std::vector& newLayers) +{ + auto net = ctx->network(); + int32_t beforeSubgraph = net->getNbLayers(); + CHECK(onnx2trt::parseGraph(ctx, subgraph)); + + for (int32_t i = beforeSubgraph; i < net->getNbLayers(); i++) + { + newLayers.push_back(net->getLayer(i)); + } + + return Status::success(); +} + +// Add an IConditionalInputLayer to `layer`'s inputs, if they don't already exist. +Status addConditionalInputIfNeeded(IImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap, + nvinfer1::ILayer& layer, SubgraphPortsMap subgraphInputsMap) +{ + // Return all of the layer's inputs that are external to the subgraph that + // that the layer belongs to. + auto getLayerExternalInputs = [&](std::string const& layerName) { + std::set inIndices; + auto iter = findLayer(subgraphInputsMap, layerName); + if (iter != subgraphInputsMap.end()) + { + const auto& indicesSet = iter->second; + inIndices.insert(indicesSet.begin(), indicesSet.end()); + } + + return inIndices; + }; + + const auto inIndices = getLayerExternalInputs(layer.getName()); + for (auto inIdx : inIndices) + { + LOG_VERBOSE("Adding Input layer for " << layer.getName()); + addConditionalInputLayer(ctx, conditional, inputsMap, layer, inIdx); + } + return Status::success(); +} + +// Add IConditionalInputLayers to `layer`'s inputs. +Status addIfInputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap, + const ::ONNX_NAMESPACE::GraphProto& subgraph, const std::vector& newLayers) +{ + // Find all of the tensors entering the subgraph. + // The node-names are from the ONNX context. + using NodeName = std::string; + using InputIndex = int32_t; + std::unordered_map> subgraphInputsMap; + getSubgraphInputs(subgraph, subgraphInputsMap); + + // Add a ConditionalInputLayer in front of each input that is external to the subgraph. + for (const auto& layer : newLayers) + { + addConditionalInputIfNeeded(ctx, conditional, inputsMap, *layer, subgraphInputsMap); + } + + return Status::success(); +} + +// Add an IConditionalOutputLayer to `layer`'s outputs. +Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* conditional, + const ::ONNX_NAMESPACE::GraphProto& thenGraph, const std::vector& thenLayers, + const ::ONNX_NAMESPACE::GraphProto& elseGraph, const std::vector& elseLayers, + std::vector& graphOutputs) +{ + // Reported outputs are outputs that the ONNX model reports as subgraph outputs. This list is + // not sufficient because it may produce names that are not fully compatible with TensorRT's naming. + // We use this list to help find the subgraph (SG) output tensors. + auto getReportedOutputs + = [&ctx](const ::ONNX_NAMESPACE::GraphProto& body, std::vector& reportedOutputs) { + // Assuming that the subgraph was imported already, we can iterate on its output tensors. + const auto nbOutputs = body.output_size(); + for (auto i = 0; i < nbOutputs; i++) + { + reportedOutputs.emplace_back(body.output(i).name()); + } + }; + + using NodeName = std::string; + std::unordered_map> thenOutputs; + std::unordered_map> elseOutputs; + + std::vector thenReportedOutputs; + getReportedOutputs(thenGraph, thenReportedOutputs); + getSubgraphOutputs(thenGraph, thenOutputs, thenReportedOutputs); + std::vector elseReportedOutputs; + getReportedOutputs(thenGraph, elseReportedOutputs); + getSubgraphOutputs(elseGraph, elseOutputs, elseReportedOutputs); + + // Retrieve the output tensors of a subgraph (tensors exiting the subgraph). + auto getSubgraphOutputTensors + = [](IImporterContext* ctx, std::vector& sgOutputs, SubgraphPortsMap& subgraphOutputs, + const ::ONNX_NAMESPACE::GraphProto& subgraph, std::vector subgraphLayers) { + for (const auto& layer : subgraphLayers) + { + const auto layerName = layer->getName(); + auto iter = findLayer(subgraphOutputs, layerName); + if (iter != subgraphOutputs.end()) + { + sgOutputs.push_back(layer->getOutput(0)); + } + } + + if (sgOutputs.empty()) + { + // No new layers, so we can't deduce the outputs and have to use what ONNX tells us. + const int32_t nbOutputs = subgraph.output_size(); + for (int32_t outIdx = 0; outIdx < nbOutputs; outIdx++) + { + const auto thenName = subgraph.output(outIdx).name(); + auto* thenTensor = &convertToTensor(ctx->tensors().at(thenName), ctx); + sgOutputs.push_back(thenTensor); + } + } + }; + + std::vector thenOutputTensors; + getSubgraphOutputTensors(ctx, thenOutputTensors, thenOutputs, thenGraph, thenLayers); + + std::vector elseSGOutputTensors; + getSubgraphOutputTensors(ctx, elseSGOutputTensors, elseOutputs, elseGraph, elseLayers); + + ASSERT(thenOutputTensors.size() == elseSGOutputTensors.size() + && "The then/else branches of an If operator must have the same number of outputs.", + ErrorCode::kINVALID_NODE); + + // Add an ConditionalOutputLayer with one output and two inputs + // (one from the thenGraph and another from the elseGraph). + for (size_t i = 0; i < elseSGOutputTensors.size(); i++) + { + auto* outputLayer = conditional->addOutput(*thenOutputTensors[i], *elseSGOutputTensors[i]); + ctx->registerLayer(outputLayer, std::string(conditional->getName()) + "_OutputLayer"); + graphOutputs.emplace_back(outputLayer->getOutput(0)); + } + return Status::success(); +} + +// Given a subgraph, find all of its external inputs/outputs (tensors entering/exiting the subgraph). +Status getSubgraphTensors(const ::ONNX_NAMESPACE::GraphProto& graph, + std::unordered_map>& externalOutputs, bool extractOutputs, + const std::vector* reportedOutputs = nullptr) +{ + std::vector topoOrder; + ASSERT(toposort(graph.node(), &topoOrder) && "Failed to sort the model topologically.", ErrorCode::kINVALID_GRAPH); + using NodeName = std::string; + using TensorName = std::string; + using PortIndex = int32_t; + using Port = std::pair; + std::unordered_set outputTensors; + std::unordered_set inputTensors; + + // To determine which tensors are entering or exiting the given graph, we first collect the sets of all input and + // output tensors. Then we categorize the tensors according to this logic: + // Entering tensors := {inputs} - {outputs} + // Exiting tensors := {outputs} - {inputs} + + // Collect all input and output tensors belonging to nodes in the graph. + for (const auto& nodeIndex : topoOrder) + { + const auto& node = graph.node(nodeIndex); + for (const auto& outputName : node.output()) + { + outputTensors.insert(outputName); + } + for (const auto& inputName : node.input()) + { + inputTensors.insert(inputName); + } + } + + using NodeProto = const ::ONNX_NAMESPACE::NodeProto; + auto getOutputs = [](NodeProto& node) { return node.output(); }; + auto getInputs = [](NodeProto& node) { return node.input(); }; + + // Retrieve the list of tensors either exiting or entering the subgraph. + std::unordered_map> externalPortsMap; + auto filterTensors = [&](std::unordered_set tensors, auto nodeAccessor) { + for (const auto& nodeIndex : topoOrder) + { + const auto& node = graph.node(nodeIndex); + const auto& nodeName = getNodeName(node); + PortIndex i = 0; + + for (const auto& tensorName : nodeAccessor(node)) + { + if (tensorName.empty()) + { + continue; + } + if (tensors.count(tensorName) == 0) + { + auto prefixFound = false; + if (reportedOutputs) + { + // reportedOutputs are the names of the outputs as reported by the ONNX parser and help + // us further filter the output tensors. + // Exiting tensors := {outputs} - {inputs} - {unreported tensors} + // An example: a Split node is internal to a subgraph and has 4 outputs, but only two are + // connected to the rest of the graph. To prevent mistaking the 2 unused outputs as subgraph + // outputs, we look for them in reportedOutputs which leads us to ignore the 2 tensors. + const auto iter = std::find_if( + reportedOutputs->begin(), reportedOutputs->end(), [&](const auto& outputName) { + // Prefix name matching. + return tensorName.compare(0, outputName.size(), outputName) == 0; + }); + prefixFound = iter != reportedOutputs->end(); + } + if (!reportedOutputs || prefixFound) + { + externalPortsMap[tensorName].push_back(std::make_pair(nodeName, i)); + } + } + i++; + } + } + }; + + if (extractOutputs) + { + filterTensors(inputTensors, getOutputs); + } + else + { + filterTensors(outputTensors, getInputs); + } + + // Create the user's view of the external inputs, which uses the node-name as the key for + // looking up input/output port index. + for (auto const& input : externalPortsMap) + { + for (const Port& inPort : input.second) + { + auto const nodeName = inPort.first; + auto const portIndex = inPort.second; + externalOutputs[nodeName].insert(portIndex); + } + } + return Status::success(); +} + +Status getSubgraphOutputs(const ::ONNX_NAMESPACE::GraphProto& graph, + std::unordered_map>& externalOutputs, + const std::vector& reportedOutputs) +{ + return getSubgraphTensors(graph, externalOutputs, true, &reportedOutputs); +} + +Status getSubgraphInputs( + const ::ONNX_NAMESPACE::GraphProto& graph, std::unordered_map>& externalInputs) +{ + return getSubgraphTensors(graph, externalInputs, false); +} + +} // namespace onnx2trt diff --git a/ConditionalHelpers.hpp b/ConditionalHelpers.hpp new file mode 100644 index 00000000..5260e0fb --- /dev/null +++ b/ConditionalHelpers.hpp @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Helper functions used for importing the ONNX If-operator follow below. + * + */ + +#pragma once + +#include "ImporterContext.hpp" +#include "Status.hpp" +#include +#include +#include +#include +#include + +namespace onnx2trt +{ + +// Given a subgraph, find all of its external inputs (tensors entering the subgraph). +// The result is returned in `subgraphInputs`, which is a map indexed by layer-name and with values indicating a set +// of external input indices. +Status getSubgraphInputs( + const ::ONNX_NAMESPACE::GraphProto& graph, std::unordered_map>& subgraphInputs); + +// Given a subgraph, find all of its external outputs (tensors exiting the subgraph). +// The result is returned in `subgraphInputs`, which is a map indexed by layer-name and with values indicating a set +// of external outputs indices. +Status getSubgraphOutputs(const ::ONNX_NAMESPACE::GraphProto& graph, + std::unordered_map>& subgraphOutputs, + const std::vector& reportedOutputs); + +// Take a snapshot of the network before and after parsing the subgraph and return a list +// of newly added network layers. +Status importSubgraph( + IImporterContext* ctx, const ::ONNX_NAMESPACE::GraphProto& subgraph, std::vector& newLayers); + +using InputsMap = std::unordered_map; + +// Add IIfConditionalInputLayers to the inputs of the subgraph indicated by `subgraph`. +onnx2trt::Status addIfInputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap, + const ::ONNX_NAMESPACE::GraphProto& subgraph, const std::vector& newLayers); + +// Add IIfConditionalOutputLayers to the outputs of the subgraph indicated by `subgraph`. +onnx2trt::Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* conditional, + const ::ONNX_NAMESPACE::GraphProto& thenGraph, const std::vector& thenLayers, + const ::ONNX_NAMESPACE::GraphProto& elseGraph, const std::vector& elseLayers, + std::vector& graphOutputs); + +} // namespace onnx2trt diff --git a/ImporterContext.hpp b/ImporterContext.hpp index 88273607..af45e1ee 100644 --- a/ImporterContext.hpp +++ b/ImporterContext.hpp @@ -84,8 +84,9 @@ class ImporterContext final : public IImporterContext int64_t mSuffixCounter{0}; // increasing suffix counter used to uniquify layer names. std::unordered_set mUnsupportedShapeTensors; // Container to hold output tensor names of layers that produce shape tensor outputs but do not natively support them. StringMap mLoopTensors; // Container to map subgraph tensors to their original outer graph names. - std::string mOnnxFileLocation; // Keep track of the directory of the parsed ONNX file + std::string mOnnxFileLocation; // Keep track of the directory of the parsed ONNX file std::unique_ptr mErrorWrapper; // error recorder to control TRT errors + StringMap mConstantLayers; public: ImporterContext(nvinfer1::INetworkDefinition* network, nvinfer1::ILogger* logger) @@ -178,6 +179,15 @@ class ImporterContext final : public IImporterContext LOG_VERBOSE("Registering layer: " << uniqueName << " for ONNX node: " << basename); layer->setName(uniqueName.c_str()); + if (layer->getType() == nvinfer1::LayerType::kCONSTANT) + { + if (basename != uniqueName) + { + LOG_ERROR("Constant layer: " << uniqueName << " can be a duplicate of: " << basename); + assert(!"Internal error: duplicate constant layers for the same weights"); + } + mConstantLayers.insert({uniqueName, static_cast(layer)}); + } } } @@ -271,6 +281,20 @@ class ImporterContext final : public IImporterContext { return mErrorWrapper ? mErrorWrapper->getErrorRecorder() : nullptr; } + nvinfer1::IConstantLayer* getConstantLayer(const char* name) const final + { + if (name == nullptr) + { + return nullptr; + } + auto const iter = mConstantLayers.find(name); + if (iter == mConstantLayers.end()) + { + return nullptr; + } + return iter->second; + } + private: std::string generateUniqueName(std::set& namesSet, const std::string& basename) { diff --git a/ModelImporter.cpp b/ModelImporter.cpp index efe15396..8080373d 100644 --- a/ModelImporter.cpp +++ b/ModelImporter.cpp @@ -223,27 +223,96 @@ Status parseGraph(IImporterContext* ctx, const ::ONNX_NAMESPACE::GraphProto& gra return Status::success(); } -Status importInput(ImporterContext* ctx, ::ONNX_NAMESPACE::ValueInfoProto const& input, nvinfer1::ITensor** tensor) +Status importInput(ImporterContext* ctx, ::ONNX_NAMESPACE::ValueInfoProto const& input, nvinfer1::ITensor** tensor, + std::vector& namedDims) { auto const& onnxDtype = input.type().tensor_type(); nvinfer1::DataType trtDtype; ASSERT_INPUT(convertDtype(onnxDtype.elem_type(), &trtDtype) && "Failed to convert ONNX date type to TensorRT data type.", ErrorCode::kUNSUPPORTED_NODE, input.name()); nvinfer1::Dims trt_dims; - ASSERT_INPUT(convertOnnxDims(onnxDtype.shape().dim(), trt_dims) && "Failed to convert ONNX dimensions to TensorRT dimensions.", ErrorCode::kUNSUPPORTED_GRAPH, input.name()); + size_t const oldNbNamedDimensions = namedDims.size(); + ASSERT_INPUT(convertOnnxDims(onnxDtype.shape().dim(), trt_dims, namedDims) && "Failed to convert ONNX dimensions to TensorRT dimensions.", ErrorCode::kUNSUPPORTED_GRAPH, input.name()); nvinfer1::ITensor* userInput = ctx->getUserInput(input.name().c_str()); if (userInput) { ASSERT_INPUT(userInput && "User input is missing.", ErrorCode::kINVALID_VALUE, input.name()); - // Note: We intentionally don't check dimensions/dtype here so that users can change the input shape/type if - // they want to. + // Intentionally don't check dimensions/dtype here so that users can change the input shape/type if + // they want to. However, equalities implied by dimension names are nonetheless respected. *tensor = userInput; - return Status::success(); + } + else + { + LOG_VERBOSE( + "Adding network input: " << input.name() << " with dtype: " << trtDtype << ", dimensions: " << trt_dims); + ASSERT_INPUT( (*tensor = ctx->network()->addInput(input.name().c_str(), trtDtype, trt_dims)) && "Failed to add input to the network.", + ErrorCode::kUNSUPPORTED_NODE, input.name()); + } + + // Fill in field `tensor` for any dimensions that had names in the ONNX. + for (auto i = oldNbNamedDimensions; i < namedDims.size(); ++i) + { + namedDims[i].tensor = *tensor; + } + return Status::success(); +} + +//! Add equality assertions for dimensions with the same name. +static Status assertDimsWithSameNameAreEqual(ImporterContext* ctx, std::vector& namedDims) +{ + // Cache for IShapeLayer + std::unordered_map shapeMap; + + // Sort records by name of dimension, using stable_sort for reproducibility. + std::stable_sort(namedDims.begin(), namedDims.end(), + [](const NamedDimension& x, const NamedDimension& y) { return x.dimParam < y.dimParam; }); + + // Each loop iteration covers a sequence of named dimensions with the same name. + // For each sequence, add IAssertionLayers that assert that the values are equal. + // TensorRT knows about transitive closure of equality, so just add the assertions + // for adjacent records. + decltype(namedDims.begin()) j; + for (auto i = namedDims.begin(); i < namedDims.end(); i = j) + { + // Walk j forward so that [i,j) is indices of named dimensions with the same name. + j = i; + do + { + ++j; + } while (j != namedDims.end() && j->dimParam == i->dimParam); + + if (j - i < 2) + { + // Single occurrence of name is uninteresting. + continue; + } + + std::ostringstream message; + message << "input dimensions named " << i->tensor->getName() << " must be equal"; + + // prev is the current end of the daisy chain. + nvinfer1::ITensor* prev = nullptr; + for (auto k = i; k < j; ++k) + { + // Create ITensor "next" with dimension length for record k. + auto& shape = shapeMap[k->tensor]; + if (shape == nullptr) + { + shape = ctx->network()->addShape(*k->tensor); + } + auto* slice = ctx->network()->addSlice(*shape->getOutput(0), {1, {k->index}}, {1, {1}}, {1, {1}}); + nvinfer1::ITensor* next = slice->getOutput(0); + + if (prev) + { + // Add a link to the chain. + auto* equal = ctx->network()->addElementWise(*prev, *next, nvinfer1::ElementWiseOperation::kEQUAL); + auto* assertion = ctx->network()->addAssertion(*equal->getOutput(0), message.str().c_str()); + ASSERT(assertion != nullptr && "addAssertion failed", ErrorCode::kMODEL_DESERIALIZE_FAILED); + } + prev = next; + } } - LOG_VERBOSE( - "Adding network input: " << input.name() << " with dtype: " << trtDtype << ", dimensions: " << trt_dims); - ASSERT_INPUT( (*tensor = ctx->network()->addInput(input.name().c_str(), trtDtype, trt_dims)) && "Failed to add input to the network.", - ErrorCode::kUNSUPPORTED_NODE, input.name()); return Status::success(); } @@ -258,19 +327,20 @@ Status importInputs(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& gr initializers.emplace(initializer.name()); } + std::vector namedDims; for (const ::ONNX_NAMESPACE::ValueInfoProto& input : graph.input()) { TensorOrWeights tensor; if (!initializers.count(input.name())) { - nvinfer1::ITensor* tensor_ptr; - CHECK(importInput(ctx, input, &tensor_ptr)); + nvinfer1::ITensor* tensor_ptr{nullptr}; + CHECK(importInput(ctx, input, &tensor_ptr, namedDims)); tensor = tensor_ptr; } ctx->registerTensor(std::move(tensor), input.name()); } - return Status::success(); + return assertDimsWithSameNameAreEqual(ctx, namedDims); } Status deserialize_onnx_model(void const* serialized_onnx_model, size_t serialized_onnx_model_size, @@ -419,7 +489,6 @@ bool ModelImporter::supportsModel( } return allSupported; } - // Mark experimental ops as unsupported bool ModelImporter::supportsOperator(const char* op_name) const { @@ -508,8 +577,10 @@ Status ModelImporter::importModel( ASSERT(!_importer_ctx.network()->hasImplicitBatchDimension() && "This version of the ONNX parser only supports TensorRT INetworkDefinitions with an explicit batch dimension. Please ensure the network was created using the EXPLICIT_BATCH NetworkDefinitionCreationFlag.", ErrorCode::kINVALID_VALUE); auto* ctx = &_importer_ctx; _importer_ctx.clearOpsets(); +#if ENABLE_STD_PLUGIN // Initialize plugin registry initLibNvInferPlugins(static_cast(&ctx->logger()), ""); +#endif // ENABLE_STD_PLUGIN for (int i = 0; i < model.opset_import().size(); ++i) { std::string domain = model.opset_import(i).domain(); diff --git a/ModelImporter.hpp b/ModelImporter.hpp index 673602e9..e4896451 100644 --- a/ModelImporter.hpp +++ b/ModelImporter.hpp @@ -43,14 +43,6 @@ class ModelImporter : public nvonnxparser::IParser { delete this; } - // virtual void registerOpImporter(std::string op, - // NodeImporter const &node_importer) override { - // // Note: This allows existing importers to be replaced - // _op_importers[op] = node_importer; - //} - // virtual Status const &setInput(const char *name, - // nvinfer1::ITensor *input) override; - // virtual Status const& setOutput(const char* name, nvinfer1::ITensor** output) override; int getNbErrors() const override { return _errors.size(); diff --git a/README.md b/README.md index c03b78c1..0317069f 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia. ## Supported TensorRT Versions -Development on the Master branch is for the latest version of [TensorRT 8.0.1.6](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support. +Development on the Master branch is for the latest version of [TensorRT 8.2.0.6](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support. For previous versions of TensorRT, refer to their respective branches. @@ -48,12 +48,12 @@ Current supported ONNX operators are found in the [operator support matrix](docs ### Dependencies - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases) - - [TensorRT 8.0.1.6](https://developer.nvidia.com/tensorrt) - - [TensorRT 8.0.1.6 open source libaries (master branch)](https://github.com/NVIDIA/TensorRT/) + - [TensorRT 8.2.0.6](https://developer.nvidia.com/tensorrt) + - [TensorRT 8.2.0.6 open source libaries (master branch)](https://github.com/NVIDIA/TensorRT/) ### Building -For building within docker, we recommend using and setting up the docker containers as instructed in the main [TensorRT repository](https://github.com/NVIDIA/TensorRT#setting-up-the-build-environment) to build the onnx-tensorrt library. +For building within docker, we recommend using and setting up the docker containers as instructed in the main (TensorRT repository)[https://github.com/NVIDIA/TensorRT#setting-up-the-build-environment] to build the onnx-tensorrt library. Once you have cloned the repository, you can build the parser libraries and executables by running: @@ -63,7 +63,9 @@ Once you have cloned the repository, you can build the parser libraries and exec // Ensure that you update your LD_LIBRARY_PATH to pick up the location of the newly built library: export LD_LIBRARY_PATH=$PWD:$LD_LIBRARY_PATH -For building only the libraries, append `-DBUILD_LIBRARY_ONLY=1` to the CMake build command. If your model has Gather or GatherElements operations with negative indices, add `-DSUPPORT_NEGATIVE_GATHER` to the build command. Note that enabling negative-indices gather will have a performance impact on gathers with non-negative indices. +Note that this project has a dependency on CUDA. By default the build will look in `/usr/local/cuda` for the CUDA toolkit installation. If your CUDA path is different, overwrite the default path by providing `-DCUDA_TOOLKIT_ROOT_DIR=` in the CMake command. + +For building only the libraries, append `-DBUILD_LIBRARY_ONLY=1` to the CMake build command. ### Experimental Ops All experimental operators will be considered unsupported by the ONNX-TRT's `supportsModel()` function. @@ -99,7 +101,7 @@ Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` python3 -m pip install /python/tensorrt-8.x.x.x-cp-none-linux_x86_64.whl -TensorRT 8.0.1.6 supports ONNX release 1.6.0. Install it with: +TensorRT 8.2.0.6 supports ONNX release 1.6.0. Install it with: python3 -m pip install onnx==1.6.0 diff --git a/ShapedWeights.cpp b/ShapedWeights.cpp index 760bac19..d42e4631 100644 --- a/ShapedWeights.cpp +++ b/ShapedWeights.cpp @@ -143,6 +143,7 @@ bool transposeWeights(ShapedWeights const& weights, nvinfer1::Permutation const& new_shape.d[d] = shape.d[perm.order[d]]; result->shape.d[d] = new_shape.d[d]; } + if (shape.nbDims <= 4) { diff --git a/Status.hpp b/Status.hpp index f56c6e93..ba25de71 100644 --- a/Status.hpp +++ b/Status.hpp @@ -9,6 +9,14 @@ #include #include +#ifndef ENABLE_STD_PLUGIN +#define ENABLE_STD_PLUGIN 1 +#endif // ENABLE_STD_PLUGIN + +#ifndef ENABLE_SAFE_PLUGIN +#define ENABLE_SAFE_PLUGIN 0 +#endif // ENABLE_SAFE_PLUGIN + // Used to strip out build path information from debug prints #if defined(SOURCE_LENGTH) #define __FILENAME__ (__FILE__ + SOURCE_LENGTH) diff --git a/builtin_op_importers.cpp b/builtin_op_importers.cpp index 937eaece..624fc5bb 100644 --- a/builtin_op_importers.cpp +++ b/builtin_op_importers.cpp @@ -3,6 +3,7 @@ */ #include "builtin_op_importers.hpp" +#include "ConditionalHelpers.hpp" #include "LoopHelpers.hpp" #include "ModelImporter.hpp" #include "NvInferPlugin.h" @@ -16,12 +17,11 @@ #include #include #include // For std::memcpy, std::memset +#include #include #include // For std::iota #include #include -#include -#include namespace onnx2trt { @@ -470,7 +470,6 @@ DEFINE_BUILTIN_OP_IMPORTER(Clip) if (ctx->getOpsetVersion() >= 11) { - int numInputs = inputs.size(); // Handle "min" node input. if (numInputs == 2) { @@ -612,7 +611,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Conv) ErrorCode::kUNSUPPORTED_NODE); } // Handle Multi-input convolution - return convDeconvMultiInput(ctx, node, inputs, true /*isConv*/); + return convMultiInput(ctx, node, inputs); } nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); @@ -726,46 +725,46 @@ DEFINE_BUILTIN_OP_IMPORTER(Conv) // When input.nbDims = 3, we expand it to 4D DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) { - if (inputs.at(1).is_tensor()) - { - if (inputs.size() == 3) + // Expand spatial dims from 1D to 2D, return true if reshaped activation + const auto NCWtoNCHW = [&ctx, &node](nvinfer1::ITensor*& tensor, nvinfer1::Dims& tensorShape) { + if (tensor && tensor->getDimensions().nbDims == 3) { - ASSERT(inputs.at(2).is_weights() - && "The bias tensor is required to be an initializer for the Deconvolution operator", - ErrorCode::kUNSUPPORTED_NODE); + const std::vector axes{3}; + tensor = unsqueezeTensor(ctx, node, *tensor, axes); + tensorShape = tensor->getDimensions(); + return true; } - // Handle Multi-input deconvolution - return convDeconvMultiInput(ctx, node, inputs, false /*isConv*/); - } + // for initializer, just change the shape by appending 1 + if (tensorShape.nbDims == 3) + { + tensorShape.nbDims = 4; + tensorShape.d[3] = 1; + } + return false; + }; + + ASSERT((inputs.size() < 3 || inputs.at(2).is_weights()) + && "The bias tensor is required to be an initializer for the Deconvolution operator", + ErrorCode::kUNSUPPORTED_NODE); nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); + nvinfer1::ITensor* kernelTensorPtr = inputs.at(1).is_tensor() ? &convertToTensor(inputs.at(1), ctx) : nullptr; nvinfer1::Dims dims = tensorPtr->getDimensions(); // Deconvolution input must be at least 3D and at most 5D. ASSERT(dims.nbDims >= 3 && dims.nbDims <= 5 && "TensorRT only supports 1D, 2D or 3D deconvolutions!", ErrorCode::kUNSUPPORTED_NODE); - // Deconvolution weights must be an initializer - ASSERT( (inputs.at(1).is_weights()) && "ConvTranspose weights must be an initializer", ErrorCode::kUNSUPPORTED_NODE); // Kernel weights have layout [C, M/group, k1, k2, (k3)] - auto kernelWeights = inputs.at(1).weights(); + auto kernelShape = inputs.at(1).shape(); - bool needToExpandDims = (dims.nbDims == 3); - if (needToExpandDims) - { - std::vector axes{3}; - tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes); - ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); - dims = tensorPtr->getDimensions(); - } - if (kernelWeights.shape.nbDims == 3) - { - kernelWeights.shape.nbDims = 4; - kernelWeights.shape.d[3] = 1; - } + bool needReshapeBack = NCWtoNCHW(tensorPtr, dims); + NCWtoNCHW(kernelTensorPtr, kernelShape); const int nbSpatialDims = dims.nbDims - 2; // Check that the number of spatial dimensions and the kernel shape matches up. - ASSERT( (nbSpatialDims == kernelWeights.shape.nbDims - 2) && "The number of spatial dimensions and the kernel shape doesn't match up", ErrorCode::kUNSUPPORTED_NODE); + ASSERT((nbSpatialDims == kernelShape.nbDims - 2) + && "The number of spatial dimensions and the kernel shape doesn't match up", + ErrorCode::kUNSUPPORTED_NODE); // Get all attributes OnnxAttrs attrs(node, ctx); @@ -780,29 +779,31 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) bool exclude_padding = false; int ngroup = attrs.get("group", 1); - int noutput = kernelWeights.shape.d[1] * ngroup; // Note: Weights order is CKRS + int noutput = kernelShape.d[1] * ngroup; // Note: Weights order is CKRS // Check for bias_weights nvinfer1::Weights biasWeights; if (inputs.size() == 3) { - ASSERT(inputs.at(2).is_weights() && "The bias tensor is required to be an initializer.", ErrorCode::kUNSUPPORTED_NODE); auto shapedBiasWeights = inputs.at(2).weights(); // ONNX requires shapedBiasWeights to be 1D - ASSERT(shapedBiasWeights.shape.nbDims == 1 && "The bias tensor is required to be 1D.", ErrorCode::kINVALID_NODE); - ASSERT( (shapedBiasWeights.shape.d[0] == noutput) && "The shape of the bias tensor does not align with the shape of the output.", ErrorCode::kINVALID_NODE); + ASSERT( + shapedBiasWeights.shape.nbDims == 1 && "The bias tensor is required to be 1D.", ErrorCode::kINVALID_NODE); + ASSERT((shapedBiasWeights.shape.d[0] == noutput) + && "The shape of the bias tensor does not align with the shape of the output.", + ErrorCode::kINVALID_NODE); biasWeights = shapedBiasWeights; } else { - biasWeights = ShapedWeights::empty(kernelWeights.type); + biasWeights = ShapedWeights::empty(::ONNX_NAMESPACE::TensorProto::FLOAT); } // Kernel shape either comes from the attributes or extracted from the kernel weights shape kernelSize.nbDims = nbSpatialDims; for (int i = 1; i <= nbSpatialDims; ++i) { - kernelSize.d[nbSpatialDims - i] = kernelWeights.shape.d[kernelWeights.shape.nbDims - i]; + kernelSize.d[nbSpatialDims - i] = kernelShape.d[kernelShape.nbDims - i]; } getKernelParams(ctx, node, &kernelSize, &strides, &begPadding, &endPadding, paddingMode, exclude_padding, @@ -810,8 +811,8 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) for (int i = 1; i <= nbSpatialDims; ++i) { - ASSERT( (kernelSize.d[nbSpatialDims - i] == kernelWeights.shape.d[kernelWeights.shape.nbDims - i]) - && "Attribute kernel_shape misaligns with the dimensions of the weight tensor.", + ASSERT((kernelSize.d[nbSpatialDims - i] == kernelShape.d[kernelShape.nbDims - i]) + && "Attribute kernel_shape misaligns with the dimensions of the weight tensor.", ErrorCode::kUNSUPPORTED_NODE); } @@ -869,7 +870,9 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) } } - nvinfer1::Weights emptyBiasWeights = ShapedWeights::empty(kernelWeights.type); + const auto emptyBiasWeights = ShapedWeights::empty(::ONNX_NAMESPACE::TensorProto::FLOAT); + const auto kernelWeights + = kernelTensorPtr ? nvinfer1::Weights{nvinfer1::DataType::kFLOAT, nullptr, 0} : inputs.at(1).weights(); // Create a deconvolution layer and set known attributes - strides,ngroups, and dilations // If there is still output padding, remove the bias weights. Bias will be added below. @@ -878,6 +881,14 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) layer->setStrideNd(strides); layer->setNbGroups(ngroup); layer->setDilationNd(dilations); + if (kernelTensorPtr) + { + layer->setInput(1, *kernelTensorPtr); + } + else + { + ctx->network()->setWeightsName(kernelWeights, inputs.at(1).weights().getName()); + } // Check that 3D deconvolution paddings is valid if (nbSpatialDims == 3) @@ -898,28 +909,38 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) // Register layer, along with refittable kernel weights and bias weights (if any) ctx->registerLayer(layer, getNodeName(node)); - ctx->network()->setWeightsName(kernelWeights, inputs.at(1).weights().getName()); - if (inputs.size() == 3) - { - ctx->network()->setWeightsName(biasWeights, inputs.at(2).weights().getName()); - } tensorPtr = layer->getOutput(0); dims = tensorPtr->getDimensions(); // There is still output padding. Add a padding layer to handle it. if (hasOutputPadding) { - // TRT only support 2D padding on the outermost dimensions - ASSERT( (outputPadding.nbDims == 2 || (outputPadding.nbDims == 3 && outputPadding.d[0] == 0)) - && "TensorRT only supports 2D padding on the outermost dimensions.", - ErrorCode::kUNSUPPORTED_NODE); - // Convert 3D padding to 2d padding - if (nbSpatialDims == 3) + LOG_VERBOSE("Padding output deconvolution tensor with: " << outputPadding); + + // Add padding layer + nvinfer1::ITensor* start{}; + nvinfer1::ITensor* totalPadding{}; + std::vector combinePadding{}; + for (int32_t i = 0; i < outputPadding.nbDims; ++i) { - outputPadding = {2, {outputPadding.d[1], outputPadding.d[2]}}; + combinePadding.insert(combinePadding.begin(), 0); + combinePadding.push_back(outputPadding.d[i]); } - LOG_VERBOSE("Padding output deconvolution tensor with: " << outputPadding); - tensorPtr = ctx->network()->addPaddingNd(*tensorPtr, makeDims(2, 0), outputPadding)->getOutput(0); + ASSERT( + convertOnnxPadding(ctx, dims.nbDims, combinePadding, start, totalPadding) && "Failed to convert padding!", + ErrorCode::kUNSUPPORTED_NODE); + const auto size + = ctx->network() + ->addElementWise(shapeOf(*tensorPtr).tensor(ctx), *totalPadding, nvinfer1::ElementWiseOperation::kSUM) + ->getOutput(0); + const auto stride = makeDims(dims.nbDims, 1); + const auto& dummy = stride; + auto* sliceLayer = ctx->network()->addSlice(*tensorPtr, dummy, dummy, stride); + ASSERT(sliceLayer && "Could not create padding layer", ErrorCode::kUNSUPPORTED_NODE); + sliceLayer->setInput(1, *start); + sliceLayer->setInput(2, *size); + sliceLayer->setMode(nvinfer1::SliceMode::kFILL); + tensorPtr = sliceLayer->getOutput(0); // This bias is not handled by deconv. Use an elementwise to handle it. if (biasWeights.count != 0) @@ -935,7 +956,12 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) } } - if (needToExpandDims) + if (inputs.size() == 3) + { + ctx->network()->setWeightsName(biasWeights, inputs.at(2).weights().getName()); + } + + if (needReshapeBack) { std::vector axes{3}; tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes); @@ -1156,17 +1182,18 @@ NodeImportResult QuantDequantLinearHelper( ASSERT(zeroPointSize == scaleSize && "The scale and zero-point must have the same size", nvonnxparser::ErrorCode::kINVALID_NODE); - // Read the optional quantization axis attribute. + // Read the optional quantization axis attribute. Set it to the rank of the input tensor if not provided OnnxAttrs attrs(node, ctx); - const int32_t INVALID_AXIS = dataInput.getDimensions().nbDims; - int32_t axis = attrs.get("axis", INVALID_AXIS); + const int32_t nbDims = dataInput.getDimensions().nbDims; + int32_t axis = attrs.get("axis", nbDims); + CHECK(convertAxis(axis, nbDims)); if (scaleSize != 1) { // Per-Channel Quantization. // We assume this is weight-quantization with dimensions KCRS (K is # output channels). // Activations-quantization does not support per-axis quantization. - if (axis == INVALID_AXIS) + if (axis == nbDims) { axis = 0; } @@ -1178,8 +1205,7 @@ NodeImportResult QuantDequantLinearHelper( else { // Per-Tensor Quantization. - ASSERT((axis == 1 || axis == INVALID_AXIS) && "Found quantization axis not compatible with a single quantization scale", nvonnxparser::ErrorCode::kINVALID_NODE); - // Currently this is ignored by TRT, but it is required by addScaleNd (for computing nbSpatialDims). + // Currently axis is ignored by TRT, but it is required here by addScaleNd (for computing nbSpatialDims). axis = 1; } @@ -1190,8 +1216,7 @@ NodeImportResult QuantDequantLinearHelper( nvinfer1::IDequantizeLayer* dq = ctx->network()->addDequantize(dataInput, *scaleInput); ASSERT(dq && "Failed to create Dequantize layer.", ErrorCode::kUNSUPPORTED_NODE); dq->setAxis(axis); - nodeName += std::string("_quantize_scale_node"); - dq->setName(nodeName.c_str()); + nodeName += std::string("_dequantize_scale_node"); layer = dq; } else @@ -1274,6 +1299,43 @@ DEFINE_BUILTIN_OP_IMPORTER(Dropout) } } +DEFINE_BUILTIN_OP_IMPORTER(Einsum) +{ + OnnxAttrs attrs(node, ctx); + const std::string equation = attrs.get("equation"); + + std::string invalidCharacters; + for (char c : equation) + { + if ((c < 'a' || c > 'z') && c != '-' && c != '>' && c != '.' && c != ',' && c != ' ') + { + invalidCharacters.push_back(c); + invalidCharacters.push_back(','); + } + } + + if (!invalidCharacters.empty()) + { + invalidCharacters.pop_back(); + return MAKE_ERROR("Invalid character(s) in Einsum equation: " + invalidCharacters, ErrorCode::kINVALID_NODE); + } + + ASSERT((!inputs.empty()) && "Inputs vector is empty.", ErrorCode::kINVALID_NODE); + + std::vector inputTensors; + + for (auto input : inputs) + { + auto* tensor_ptr = &convertToTensor(input, ctx); + inputTensors.push_back(tensor_ptr); + } + auto nbInputs = static_cast(inputTensors.size()); + nvinfer1::IEinsumLayer* einsumLayer = ctx->network()->addEinsum(inputTensors.data(), nbInputs, equation.c_str()); + ctx->registerLayer(einsumLayer, getNodeName(node)); + + RETURN_FIRST_OUTPUT(einsumLayer); +} + DEFINE_BUILTIN_OP_IMPORTER(Elu) { OnnxAttrs attrs(node, ctx); @@ -1319,9 +1381,12 @@ DEFINE_BUILTIN_OP_IMPORTER(Expand) const ShapeTensor starts = similar(ctx, newDims, 0); // Do the broadcast rule. const ShapeTensor sizes = broadcast(ctx, newDims, newShape); - // Compute (x > 1 ? 1 : 0) for x in newDims, assuming positive x, using only TensorRT operations. + + const ShapeTensor delta = sub(ctx, sizes, newDims); const ShapeTensor one = shapeVector(1); - const ShapeTensor strides = min(ctx, one, sub(ctx, newDims, one)); + // stride 1 for dims where sizes same as Slice input, 0 for not the same. + // delta is non-negative for Expand here + const ShapeTensor strides = sub(ctx, one, min(ctx, one, delta)); nvinfer1::ISliceLayer* sliceLayer = addSlice(ctx, newInputTensor, starts, sizes, strides); ctx->registerLayer(sliceLayer, getNodeName(node)); @@ -1401,105 +1466,54 @@ DEFINE_BUILTIN_OP_IMPORTER(Floor) DEFINE_BUILTIN_OP_IMPORTER(Gather) { - nvinfer1::ITensor* data = &convertToTensor(inputs.at(0), ctx); + nvinfer1::ITensor& data = convertToTensor(inputs.at(0), ctx); // TRT does not support BOOL input types for this node - ASSERT( (data->getType() != nvinfer1::DataType::kBOOL) && "This version of TensorRT does not support BOOL input type for the Gather operator.", ErrorCode::kUNSUPPORTED_NODE); - - nvinfer1::ITensor* indices = &convertToTensor(inputs.at(1), ctx); + ASSERT( (data.getType() != nvinfer1::DataType::kBOOL) && "This version of TensorRT does not support BOOL input type for the Gather operator.", ErrorCode::kUNSUPPORTED_NODE); + nvinfer1::ITensor& indices = convertToTensor(inputs.at(1), ctx); OnnxAttrs attrs(node, ctx); - int32_t axis = attrs.get("axis", 0); - int32_t nbDims = inputs.at(0).shape().nbDims; + int axis = attrs.get("axis", 0); + int nbDims = inputs.at(0).shape().nbDims; CHECK(convertAxis(axis, nbDims)); LOG_VERBOSE("Using Gather axis: " << axis); - - // Support for negative indices can be enabled through adding -DSUPPORT_NEGATIVE_GATHER=1 in the CMake build command. - // This will unnecessarily reduce performance of networks that use only non-negative Gather indices. -#if SUPPORT_NEGATIVE_GATHER - indices = convertGatherIndices(ctx, data, indices, axis); -#endif // SUPPORT_NEGATIVE_GATHER - - auto* layer = ctx->network()->addGather(*data, *indices, axis); + auto* layer = ctx->network()->addGather(data, indices, axis); ctx->registerLayer(layer, getNodeName(node)); RETURN_FIRST_OUTPUT(layer); } DEFINE_BUILTIN_OP_IMPORTER(GatherElements) { - - // We can treat GatherElements as a regular Gather operation with transformed input and indices tensors. - // Consider a simple example of a 3D tensor with axis = 1. - // The regular forumla of out[i][j][k] = in[i][idx[i][j][k]][k] can be rewritten as out[i][j][k] = in'[idx'[i,j,k]] - // Where in' is a squeezed down 1D representation of the data and idx' is calculated from the following formula: - // idx' = idx[i,j,k] * pitch[1] + bias. The bias is calculated as i*pitch[0] + k*pitch[2]. - - // clang-format off - /* Example: Data is 3D tensor of shape [2,2,2] with values [[[1,2], [3,4]], [[5,6], [7,8]]] - Indices is a 3D tensor of shape [2,2,1] with values [[[0], [1]], [[0], [1]]] - From the original formula, the output is [[[1], [3]], [[5], [7]]], - - Pitch vector of data is [4,2,1]. - - idx` calculation: - idx`[0, 0, 0] = [idx[0,0,0]](0) * [pitch[axis]](2) + [i(0)*pitch[0](4)](0) + [k(0)*pitch[2](1)](0) = 0 - idx`[0, 1, 0] = [idx[0,1,0]](1) * [pitch[axis]](2) + [i(0)*pitch[0](4)](0) + [k(0)*pitch[2](1)](0) = 2 - idx`[1, 0, 0] = [idx[1,0,0]](0) * [pitch[axis]](2) + [i(1)*pitch[0](4)](4) + [k(0)*pitch[2](1)](0) = 4 - idx`[1, 1, 0] = [idx[1,1,0]](1) * [pitch[axis]](2) + [i(1)*pitch[0](4)](4) + [k(0)*pitch[2](1)](0) = 6 - = [[[0], [2]], [[4], [6]]] - - After linearizing data to 1D: [1,2,3,4,5,6,7,8], gathering on axis 0 with the new indices gives the same results. - */ - // clang-format on - - nvinfer1::ITensor* data = &convertToTensor(inputs.at(0), ctx); - nvinfer1::ITensor* index = &convertToTensor(inputs.at(1), ctx); - - const nvinfer1::Dims& idxDims = index->getDimensions(); - const nvinfer1::Dims& daDims = data->getDimensions(); - - ASSERT((data->getType() != nvinfer1::DataType::kBOOL) && "This version of TensorRT does not support BOOL input type for the GatherElements operator.", ErrorCode::kUNSUPPORTED_NODE); - - // Note the above tranformation requires dimensions to be known at parse time, so check for dynamic shapes - ASSERT(!isDynamic(daDims) && !isDynamic(idxDims) - && "This version of TenosrRT does not support GatherElements on dynamic shapes!", + nvinfer1::ITensor& data = convertToTensor(inputs.at(0), ctx); + nvinfer1::ITensor& indices = convertToTensor(inputs.at(1), ctx); + const nvinfer1::Dims& dataDims = data.getDimensions(); + ASSERT((data.getType() != nvinfer1::DataType::kBOOL) + && "This version of TensorRT does not support BOOL input type for the GatherElements operator.", ErrorCode::kUNSUPPORTED_NODE); OnnxAttrs attrs(node, ctx); int32_t axis = attrs.get("axis", 0); - int32_t dataNbDims = daDims.nbDims; - - // Support for negative indices can be enabled through adding -DSUPPORT_NEGATIVE_GATHER=1 in the CMake build command. - // This will unnecessarily reduce performance of networks that use only non-negative Gather indices. -#if SUPPORT_NEGATIVE_GATHER - index = convertGatherIndices(ctx, data, index, axis); -#endif // SUPPORT_NEGATIVE_GATHER - + const int32_t dataNbDims = dataDims.nbDims; CHECK(convertAxis(axis, dataNbDims)); LOG_VERBOSE("Using Gather axis: " << axis); - // Calculate data pitches vector, and create axisPitch vector - int64_t nIndx = volume(idxDims); - std::vector pitches = calculatePitches(daDims); - std::vector axisPitch(nIndx, pitches[axis]); - - // Calculate bias vector - std::vector biasVector = calculateBias(daDims, idxDims, pitches, axis); - - // Perform idx` = idx * pitch[axis] + bias calculation. - auto* axisPitchTensor = addConstant(ctx, axisPitch, ::ONNX_NAMESPACE::TensorProto::INT32, idxDims)->getOutput(0); - auto* biasTensor = addConstant(ctx, biasVector, ::ONNX_NAMESPACE::TensorProto::INT32, idxDims)->getOutput(0); + auto* layer = ctx->network()->addGatherV2(data, indices, nvinfer1::GatherMode::kELEMENT); + layer->setGatherAxis(axis); + ctx->registerLayer(layer, getNodeName(node)); + RETURN_FIRST_OUTPUT(layer); +} - auto* mul - = ctx->network()->addElementWise(*index, *axisPitchTensor, nvinfer1::ElementWiseOperation::kPROD)->getOutput(0); - auto* newIndices - = ctx->network()->addElementWise(*mul, *biasTensor, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0); +DEFINE_BUILTIN_OP_IMPORTER(GatherND) +{ + nvinfer1::ITensor& data = convertToTensor(inputs.at(0), ctx); + nvinfer1::ITensor& indices = convertToTensor(inputs.at(1), ctx); + ASSERT((data.getType() != nvinfer1::DataType::kBOOL) + && "This version of TensorRT does not support BOOL input type for the GatherND operator.", + ErrorCode::kUNSUPPORTED_NODE); - nvinfer1::Dims flattenDataDims{1, {static_cast(volume(daDims))}}; - auto* reshape = ctx->network()->addShuffle(*data); - reshape->setReshapeDimensions(flattenDataDims); - reshape->setZeroIsPlaceholder(false); + OnnxAttrs attrs(node, ctx); + auto const nbElementWiseDims = attrs.get("batch_dims", 0); - nvinfer1::ITensor* flattenData = reshape->getOutput(0); - auto* layer = ctx->network()->addGather(*flattenData, *newIndices, 0); + auto* layer = ctx->network()->addGatherV2(data, indices, nvinfer1::GatherMode::kND); + layer->setNbElementWiseDims(nbElementWiseDims); ctx->registerLayer(layer, getNodeName(node)); RETURN_FIRST_OUTPUT(layer); } @@ -2114,6 +2128,9 @@ DEFINE_BUILTIN_OP_IMPORTER(If) const ::ONNX_NAMESPACE::GraphProto& elseGraph = attrs.get("else_branch"); // Number of outputs are the same between the two branches. + ASSERT(thenGraph.output_size() == elseGraph.output_size() + && "then/else subgraphs should have the same number of outputs.", + ErrorCode::kINVALID_NODE); const int32_t nbOutputs = thenGraph.output_size(); std::vector graphOutputs; @@ -2127,43 +2144,39 @@ DEFINE_BUILTIN_OP_IMPORTER(If) { graphOutputs.emplace_back(ctx->tensors().at(body.output(i).name())); } + return {graphOutputs}; } - // For tensor conditionals, we need to parse both branches - else + + // + // The condition is not a build-time constant. Construct an if-conditional construct. + // + + // The `condition` tensor must be a scalar boolean. + auto* condTensor = convertToScalar(ctx, &convertToTensor(cond, ctx)); + ASSERT(condTensor && "Failed to convert the input cond to a scalar.", ErrorCode::kINVALID_NODE); + + auto conditional = ctx->network()->addIfConditional(); + conditional->setName(getNodeName(node).c_str()); + conditional->setCondition(*condTensor); + + std::vector thenLayers, elseLayers; + CHECK(importSubgraph(ctx, thenGraph, thenLayers)); + CHECK(importSubgraph(ctx, elseGraph, elseLayers)); + + // Names must be unique + for (auto i = 0; i < nbOutputs; i++) { - CHECK(onnx2trt::parseGraph(ctx, thenGraph)); - CHECK(onnx2trt::parseGraph(ctx, elseGraph)); - for (auto i = 0; i < nbOutputs; i++) - { - const auto thenName = thenGraph.output(i).name(); - const auto elseName = elseGraph.output(i).name(); - ASSERT(thenName != elseName && "TensorRT requires conditional subgraphs to have different output tensor names!", ErrorCode::kUNSUPPORTED_NODE); - auto* thenTensor = &convertToTensor(ctx->tensors().at(thenName), ctx); - auto* elseTensor = &convertToTensor(ctx->tensors().at(elseName), ctx); - auto* condTensor = &convertToTensor(cond, ctx); - // While the number and datatypes of the outputs of each branch are equal, the shapes may be different - // TRT only supports dynamic branch selection if the output shapes are equal and if their shapes are broadcastable - CHECK(isBroadcastValid(ctx, thenTensor->getDimensions(), elseTensor->getDimensions())); - // Broadcast the condition tensor to the size of the output tensor for usage with the ISelect layer - CHECK(broadcastTensors(ctx, condTensor, thenTensor)); - const bool needsCast = thenTensor->getType() == nvinfer1::DataType::kBOOL; - if (needsCast) - { - thenTensor = castHelper(ctx, thenTensor, nvinfer1::DataType::kINT32); - elseTensor = castHelper(ctx, elseTensor, nvinfer1::DataType::kINT32); - } - auto* layer = ctx->network()->addSelect(*condTensor, *thenTensor, *elseTensor); - ctx->registerLayer(layer, getNodeName(node)); - if (needsCast) - { - graphOutputs.emplace_back(castHelper(ctx, layer->getOutput(0), nvinfer1::DataType::kBOOL)); - } - else - { - graphOutputs.emplace_back(layer->getOutput(0)); - } - } + const auto thenName = thenGraph.output(i).name(); + const auto elseName = elseGraph.output(i).name(); + ASSERT(thenName != elseName && "TensorRT requires conditional subgraphs to have different output tensor names!", ErrorCode::kUNSUPPORTED_NODE); } + + using InputsMap = std::unordered_map; + InputsMap inputsMap; + CHECK(addIfInputLayers(ctx, conditional, inputsMap, thenGraph, thenLayers)); + CHECK(addIfInputLayers(ctx, conditional, inputsMap, elseGraph, elseLayers)); + CHECK(addIfOutputLayers(ctx, conditional, thenGraph, thenLayers, elseGraph, elseLayers, graphOutputs)); + return {graphOutputs}; } @@ -2197,6 +2210,15 @@ DEFINE_BUILTIN_OP_IMPORTER(InstanceNormalization) int nbDims = tensorPtr->getDimensions().nbDims; ASSERT(nbDims >= 3 && nbDims <= 5 && "TensorRT only supports InstanceNormalization on 3D, 4D, or 5D tensors!", ErrorCode::kUNSUPPORTED_NODE); + + const bool needToExpandDims = (nbDims == 3); + if (needToExpandDims) + { + // Expand spatial dims from 1D to 2D + const std::vector axes{3}; + tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes); + ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); + } auto scale_weights = inputs.at(1).weights(); auto bias_weights = inputs.at(2).weights(); OnnxAttrs attrs(node, ctx); @@ -2204,7 +2226,6 @@ DEFINE_BUILTIN_OP_IMPORTER(InstanceNormalization) const int32_t relu {0}; // the ONNX instance norm op does not use the relu parameter const float alpha {0.f}; // the ONNX instance norm op does not use the alpha parameter - // Populate instanceNormalization plugin properties. const std::string pluginName = "InstanceNormalization_TRT"; const std::string pluginVersion = "1"; @@ -2223,7 +2244,30 @@ DEFINE_BUILTIN_OP_IMPORTER(InstanceNormalization) auto* layer = ctx->network()->addPluginV2(&tensorPtr, 1, *plugin); ctx->registerLayer(layer, getNodeName(node)); - RETURN_FIRST_OUTPUT(layer); + tensorPtr = layer->getOutput(0); + + if (needToExpandDims) + { + // Un-expand spatial dims back to 1D + const std::vector axes{3}; + tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes); + ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); + } + + return {{tensorPtr}}; +} + +DEFINE_BUILTIN_OP_IMPORTER(IsNaN) +{ + // IEEE arithmetic guarantees that x == x is false if x is a NaN, and true otherwise. + const std::vector newInputs{inputs[0], inputs[0]}; + auto equalResult = elementwiseHelper(ctx, node, newInputs, nvinfer1::ElementWiseOperation::kEQUAL); + if (equalResult.is_error()) + { + return equalResult; + } + auto equalRet = equalResult.value().at(0); + return unaryHelper(ctx, node, equalRet, nvinfer1::UnaryOperation::kNOT); } DEFINE_BUILTIN_OP_IMPORTER(LeakyRelu) @@ -3033,97 +3077,153 @@ DEFINE_BUILTIN_OP_IMPORTER(Or) DEFINE_BUILTIN_OP_IMPORTER(Pad) { nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); - int nbDims = tensorPtr->getDimensions().nbDims; - std::vector axes; - // TensorRT only supports 2D padding on the outermost dimensions of an input tensor that is - // at least 4D. Unsqueeze leading dimensions to convert input tensor to 4D if necessary - bool needToExpandDims = (nbDims < 4); - if (needToExpandDims) - { - int diff = 4 - nbDims; - axes.resize(diff); - std::iota(axes.begin(), axes.end(), 0); - tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes); - ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); - nbDims = tensorPtr->getDimensions().nbDims; - } + const int32_t nbDims = tensorPtr->getDimensions().nbDims; - nvinfer1::Dims2 begPadding{0,0}; - nvinfer1::Dims2 endPadding{0,0}; OnnxAttrs attrs(node, ctx); - auto mode = attrs.get("mode", "constant"); - float value{0.f}; - std::vector onnxPadding; + const auto mode = attrs.get("mode", "constant"); + float value{0.F}; + nvinfer1::ITensor* valuePtr = nullptr; + std::vector onnxPadding; if (ctx->getOpsetVersion() < 11) { - value = attrs.get("value", 0.f); - auto padding = attrs.get>("pads"); - onnxPadding = std::vector(padding.begin(), padding.end()); + value = attrs.get("value", 0.F); + auto padding = attrs.get>("pads"); + onnxPadding = std::vector(padding.begin(), padding.end()); + if (onnxPadding.empty()) + { + LOG_VERBOSE("Found no-op pad in node: " + getNodeName(node)); + RETURN_IDENTITY(inputs.at(0)); + } } - // In opset >= 11, padding indicies and values moved from attributes to inputs else { - ASSERT(inputs.at(1).is_weights() && "The input pads is required to be an initializer.", - ErrorCode::kUNSUPPORTED_NODE); - weightsToVector(inputs.at(1).weights(), &onnxPadding); + // In opset >= 11, padding indicies and values moved from attributes to inputs + if (inputs.at(1).is_weights()) + { + weightsToVector(inputs.at(1).weights(), &onnxPadding); + } if (inputs.size() == 3) { - ASSERT(inputs.at(2).is_weights() && "The input constant_value is required to be an initializer.", - ErrorCode::kUNSUPPORTED_NODE); - auto padWeight = inputs.at(2).weights(); - ASSERT( (padWeight.count() == 1) && "The input constant_value is required to be a scalar.", ErrorCode::kINVALID_NODE); - value = static_cast(padWeight.values)[0]; + if (inputs.at(2).is_weights()) + { + const auto padWeight = inputs.at(2).weights(); + ASSERT((padWeight.count() == 1) && "The input constant_value is required to be a scalar.", + ErrorCode::kINVALID_NODE); + value = static_cast(padWeight.values)[0]; + } + else + { + valuePtr = &convertToTensor(inputs.at(2), ctx); + } } } - // Passthrough path for no-op padding - if (std::all_of(onnxPadding.begin(), onnxPadding.end(), [](int i){ return i == 0; })) { - LOG_VERBOSE("Found no-op pad in node: " + getNodeName(node)); - RETURN_IDENTITY(inputs.at(0)); - } - - ASSERT(mode == "constant" && value == 0.f && "This version of TensorRT only supports constant 0 padding!", - ErrorCode::kUNSUPPORTED_NODE); - - // Variables to help with padding on NHWC tensors - nvinfer1::Permutation firstPerm; - nvinfer1::Permutation secondPerm; - for (int32_t i = 0; i < nbDims; i++) + nvinfer1::ITensor* start{}; + nvinfer1::ITensor* size{}; + if (onnxPadding.empty()) { - firstPerm.order[i] = i; - secondPerm.order[i] = i; + // the pads is from activation instead of initializer or attributes + nvinfer1::ITensor* onnxPaddingPtr = &convertToTensor(inputs.at(1), ctx); + ASSERT((onnxPaddingPtr->getDimensions().nbDims == 1) && "The pads input must be 1D.", + ErrorCode::kUNSUPPORTED_NODE); + ASSERT(onnxPaddingPtr->getDimensions().d[0] == nbDims * 2 + && "pads should be a 1D tensor of shape [2 * input_rank]", + ErrorCode::kUNSUPPORTED_NODE); + + auto pre = ctx->network() + ->addSlice( + *onnxPaddingPtr, nvinfer1::Dims{1, {0}}, nvinfer1::Dims{1, {nbDims}}, nvinfer1::Dims{1, {1}}) + ->getOutput(0); + auto post = ctx->network() + ->addSlice(*onnxPaddingPtr, nvinfer1::Dims{1, {nbDims}}, nvinfer1::Dims{1, {nbDims}}, + nvinfer1::Dims{1, {1}}) + ->getOutput(0); + + const std::vector zerosVal(nbDims, 0); + const auto zeros + = addConstant(ctx, zerosVal, ::ONNX_NAMESPACE::TensorProto::INT32, nvinfer1::Dims{1, {nbDims}})->getOutput(0); + start = ctx->network()->addElementWise(*zeros, *pre, nvinfer1::ElementWiseOperation::kSUB)->getOutput(0); + const auto totalPadding + = ctx->network()->addElementWise(*pre, *post, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0); + size + = ctx->network() + ->addElementWise(shapeOf(*tensorPtr).tensor(ctx), *totalPadding, nvinfer1::ElementWiseOperation::kSUM) + ->getOutput(0); } - ASSERT(convertOnnxPadding(onnxPadding, begPadding, endPadding, firstPerm, secondPerm) && "TensorRT only supports 2D padding!", ErrorCode::kUNSUPPORTED_NODE); - // TODO: Remove this once TRT's padding layer supports non-activation types. - const nvinfer1::DataType originalDtype = tensorPtr->getType(); - const bool needsCast = originalDtype != nvinfer1::DataType::kFLOAT; - if (needsCast) + else { - tensorPtr = castHelper(ctx, tensorPtr, nvinfer1::DataType::kFLOAT); + // passthrough path for no-op padding + if (std::all_of(onnxPadding.begin(), onnxPadding.end(), [](int32_t i) { return i == 0; })) + { + LOG_VERBOSE("Found no-op pad in node: " + getNodeName(node)); + RETURN_IDENTITY(inputs.at(0)); + } + + // the pads is from initializer or attributes + nvinfer1::ITensor* totalPadding = nullptr; + ASSERT(convertOnnxPadding(ctx, nbDims, onnxPadding, start, totalPadding) && "Failed to convert padding!", + ErrorCode::kUNSUPPORTED_NODE); + size + = ctx->network() + ->addElementWise(shapeOf(*tensorPtr).tensor(ctx), *totalPadding, nvinfer1::ElementWiseOperation::kSUM) + ->getOutput(0); } - // Transpose tensor if necessary to support generic 2D padding - tensorPtr = transposeTensor(ctx, node, *tensorPtr, firstPerm); - auto* layer = ctx->network()->addPaddingNd(*tensorPtr, begPadding, endPadding); + // add slice node + const auto stride = makeDims(nbDims, 1); + const auto& dummy = stride; + auto* layer = ctx->network()->addSlice(*tensorPtr, dummy, dummy, stride); ASSERT(layer && "Could not create padding layer", ErrorCode::kUNSUPPORTED_NODE); - ctx->registerLayer(layer, getNodeName(node)); - tensorPtr = layer->getOutput(0); - - tensorPtr = transposeTensor(ctx, node, *tensorPtr, secondPerm); + layer->setInput(1, *start); + layer->setInput(2, *size); + if (mode == "constant") + { + layer->setMode(nvinfer1::SliceMode::kFILL); - if (needsCast) + if (valuePtr) + { + layer->setInput(4, *valuePtr); + } + else if (value != 0.F) + { + // constant_value must have the same data type as the input tensor + nvinfer1::ITensor* fillValue = nullptr; + switch (tensorPtr->getType()) + { + case nvinfer1::DataType::kFLOAT: + case nvinfer1::DataType::kHALF: + case nvinfer1::DataType::kINT8: + fillValue = addConstant(ctx, std::vector{value}, ::ONNX_NAMESPACE::TensorProto::FLOAT, + nvinfer1::Dims{ + 0, {0}})->getOutput(0); + break; + default: + fillValue = addConstant(ctx, std::vector{static_cast(value)}, + ::ONNX_NAMESPACE::TensorProto::INT32, + nvinfer1::Dims{ + 0, {0}})->getOutput(0); + break; + } + ASSERT(fillValue && "Could not create layer for constant_value", ErrorCode::kUNSUPPORTED_NODE); + layer->setInput(4, *fillValue); + } + } + else if (mode == "reflect") { - tensorPtr = castHelper(ctx, tensorPtr, originalDtype); + layer->setMode(nvinfer1::SliceMode::kREFLECT); } - - // Squeeze back to original rank if necessary - if (needToExpandDims) + else if (mode == "edge") { - tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes); - ASSERT(tensorPtr && "Failed the squeeze tensor.", ErrorCode::kUNSUPPORTED_NODE); + layer->setMode(nvinfer1::SliceMode::kCLAMP); } - return {{tensorPtr}}; + else + { + return MAKE_ERROR("Unsupported pad mode", ErrorCode::kUNSUPPORTED_NODE); + } + + ctx->registerLayer(layer, getNodeName(node)); + return {{layer->getOutput(0)}}; } DEFINE_BUILTIN_OP_IMPORTER(ParametricSoftplus) @@ -3382,6 +3482,16 @@ DEFINE_BUILTIN_OP_IMPORTER(Relu) return activationHelper(ctx, node, inputs, nvinfer1::ActivationType::kRELU); } +DEFINE_BUILTIN_OP_IMPORTER(Sign) +{ + return unaryHelper(ctx, node, inputs.at(0), nvinfer1::UnaryOperation::kSIGN); +} + +DEFINE_BUILTIN_OP_IMPORTER(Round) +{ + return unaryHelper(ctx, node, inputs.at(0), nvinfer1::UnaryOperation::kROUND); +} + DEFINE_BUILTIN_OP_IMPORTER(Resize) { nvinfer1::ITensor& input = convertToTensor(inputs.at(0), ctx); @@ -3958,6 +4068,36 @@ DEFINE_BUILTIN_OP_IMPORTER(Scan) return {nodeOutputs}; } +DEFINE_BUILTIN_OP_IMPORTER(ScatterND) +{ + auto* layer = addScatterLayer(ctx, inputs, nvinfer1::ScatterMode::kND); + ctx->registerLayer(layer, getNodeName(node)); + RETURN_FIRST_OUTPUT(layer); +} + +DEFINE_BUILTIN_OP_IMPORTER(ScatterElements) +{ + auto* layer = addScatterLayer(ctx, inputs, nvinfer1::ScatterMode::kELEMENT); + OnnxAttrs attrs(node, ctx); + int32_t axis = attrs.get("axis", 0); + int32_t nbDims = inputs.at(0).shape().nbDims; + CHECK(convertAxis(axis, nbDims)); + layer->setAxis(axis); + ctx->registerLayer(layer, getNodeName(node)); + RETURN_FIRST_OUTPUT(layer); +} + +DEFINE_BUILTIN_OP_IMPORTER(Scatter) +{ + // Scatter was deprecated in Opset 11 and replaced by ScatterElements + if (ctx->getOpsetVersion() >= 11) + { + LOG_WARNING("Scatter was deprecated in Opset 11. Node: \"" << getNodeName(node) << "\" will be converted to ScatterElements."); + } + + return importScatterElements(ctx, node, inputs); +} + DEFINE_BUILTIN_OP_IMPORTER(Selu) { OnnxAttrs attrs(node, ctx); @@ -4187,6 +4327,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Split) std::vector splitList; ShapeTensor sizes; ShapeTensor sizeSliceAxis; + ShapeTensor splitSizesTensor; const bool hasSplitList = (ctx->getOpsetVersion() >= 13) ? (inputs.size() == 2) : attrs.count("split"); if (hasSplitList) { @@ -4194,12 +4335,18 @@ DEFINE_BUILTIN_OP_IMPORTER(Split) // In opset >= 13, split lengths are an optional input if (ctx->getOpsetVersion() >= 13) { - ASSERT(inputs.at(1).is_weights() && "Split input 'split', if specified, must be an initializer!", ErrorCode::kUNSUPPORTED_NODE); - auto splitWeights = inputs.at(1).weights(); - int32_t* splitValues = static_cast(splitWeights.values); - for (size_t i = 0; i < splitWeights.count(); i++) + if (inputs.at(1).is_weights()) { - splitList.push_back(splitValues[i]); + const auto splitWeights = inputs.at(1).weights(); + const int32_t* splitValues = static_cast(splitWeights.values); + for (size_t i = 0; i < splitWeights.count(); i++) + { + splitList.push_back(splitValues[i]); + } + } + else + { + splitSizesTensor = {ctx, inputs.at(1)}; } } // Pre-opset 13 split lengths are provided as an attribute @@ -4207,7 +4354,9 @@ DEFINE_BUILTIN_OP_IMPORTER(Split) { splitList = attrs.get>("split"); } - ASSERT( (static_cast(splitList.size()) == numOutputs) && "The shape of the split attribute misaligns with the number of outputs.", ErrorCode::kINVALID_NODE); + ASSERT((splitList.empty() || (static_cast(splitList.size()) == numOutputs)) + && "The shape of the split attribute misaligns with the number of outputs.", + ErrorCode::kINVALID_NODE); } else { @@ -4233,7 +4382,14 @@ DEFINE_BUILTIN_OP_IMPORTER(Split) } if (hasSplitList) { - sizeSliceAxis = shapeVector(splitList[i]); + if (splitList.empty()) + { + sizeSliceAxis = gather(ctx, splitSizesTensor, ShapeTensor(1, {i})); + } + else + { + sizeSliceAxis = shapeVector(splitList[i]); + } sizes = interlace(ctx, inputDims, sizeSliceAxis, subscripts); } @@ -4342,7 +4498,6 @@ DEFINE_BUILTIN_OP_IMPORTER(Tile) // "input : T // Input tensor of any shape." nvinfer1::ITensor& input = convertToTensor(inputs.at(0), ctx); - ASSERT((input.getType() != nvinfer1::DataType::kBOOL) && "This version of TensorRT does not support BOOL input for the Tile operator." , ErrorCode::kUNSUPPORTED_NODE); const auto inputDims = shapeOf(input); // "repeats : T1 @@ -5021,6 +5176,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_MaxAverageBlendPool) RETURN_FIRST_OUTPUT(layer); } +#if ENABLE_STD_PLUGIN DEFINE_BUILTIN_OP_IMPORTER(TRT_PluginV2) { std::vector tensors; @@ -5048,6 +5204,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_PluginV2) ctx->registerLayer(layer, getNodeName(node)); RETURN_ALL_OUTPUTS(layer); } +#endif // ENABLE_STD_PLUGIN DEFINE_BUILTIN_OP_IMPORTER(TRT_Gather) { diff --git a/docs/Changelog.md b/docs/Changelog.md index d586e09b..1fab021d 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -2,6 +2,23 @@ # ONNX-TensorRT Changelog +## TensorRT 8.2 EA Release - 2021-10-04 +### Added +- Added support for the following ONNX operators: + - Einsum + - IsNan + - GatherND + - Scatter + - ScatterElements + - ScatterND + - Sign + - Round + +### Updated +- Updated `Gather` and `GatherElements` implementations to natively support negative indices +- Updated `Pad` layer to support ND padding, along with `edge` and `reflect` padding mode support +- Updated `If` layer with general performance improvements. + ## TensorRT 8.0 Release - 2021-07-02 ### Added - Rehauled resize operator, now fully supporting the following modes: diff --git a/docs/contributing.md b/docs/contributing.md index 99c15119..ce78f59d 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -1,5 +1,4 @@ - # Contributing Contributions are always welcome to improve the onnx-tensorrt parser. For those looking to contribute, please follow the PR process as outlined in the [TensorRT Open Source Software repository](https://github.com/NVIDIA/TensorRT/blob/master/CONTRIBUTING.md). diff --git a/docs/operators.md b/docs/operators.md index 416264a0..5972ca37 100644 --- a/docs/operators.md +++ b/docs/operators.md @@ -2,7 +2,7 @@ # Supported ONNX Operators -TensorRT 8.0 supports operators up to Opset 13. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/master/docs/Operators.md) +TensorRT 8.2 supports operators up to Opset 13. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/master/docs/Operators.md) TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, INT8, and BOOL @@ -49,7 +49,7 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT. | Div | Y | FP32, FP16, INT32 | | Dropout | Y | FP32, FP16 | | DynamicQuantizeLinear | N | -| Einsum | N | +| Einsum | Y | FP32, FP16 | Ellipsis and diagonal operations are not supported. | Elu | Y | FP32, FP16, INT8 | | Equal | Y | FP32, FP16, INT32 | | Erf | Y | FP32, FP16 | @@ -58,9 +58,9 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT. | EyeLike | Y | FP32, FP16, INT32, BOOL | | Flatten | Y | FP32, FP16, INT32, BOOL | | Floor | Y | FP32, FP16 | -| Gather | Y | FP32, FP16, INT32, BOOL | Only positive indices (>=0) are supported
Compile with `-DSUPPORT_NEGATIVE_GATHER=1` to enable support for negative indices -| GatherElements | Y | FP32, FP16, INT32, BOOL | Only positive indices (>=0) are supported
Compile with `-DSUPPORT_NEGATIVE_GATHER=1` to enable support for negative indices -| GatherND | N | +| Gather | Y | FP32, FP16, INT8, INT32 | +| GatherElements | Y | FP32, FP16, INT8, INT32 | +| GatherND | Y | FP32, FP16, INT8, INT32 | | Gemm | Y | FP32, FP16, INT8 | | GlobalAveragePool | Y | FP32, FP16, INT8 | | GlobalLpPool | Y | FP32, FP16, INT8 | @@ -75,7 +75,7 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT. | ImageScaler | Y | FP32, FP16 | | InstanceNormalization | Y | FP32, FP16 | Scales `scale` and biases `B` must be initializers. Input rank must be >=3 & <=5 | | IsInf | N | -| IsNaN | N | +| IsNaN | Y | FP32, FP16, INT32 | | LeakyRelu | Y | FP32, FP16, INT8 | | Less | Y | FP32, FP16, INT32 | | LessOrEqual | Y | FP32, FP16, INT32 | @@ -100,12 +100,12 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT. | Multinomial | N | | Neg | Y | FP32, FP16, INT32 | | NegativeLogLikelihoodLoss | N | -| NonMaxSuppression | Y [EXPERIMENTAL] | FP32, FP16 | Inputs `max_output_boxes_per_class`, `iou_threshold`, and `score_threshold` must be initializers.
Output has fixed shape and is padded to [`max_output_boxes_per_class`, 3]. +| NonMaxSuppression | Y [EXPERIMENTAL] | FP32, FP16 | Inputs `max_output_boxes_per_class`, `iou_threshold`, and `score_threshold` must be initializers. Output has fixed shape and is padded to [`max_output_boxes_per_class`, 3]. | NonZero | N | | Not | Y | BOOL | | OneHot | N | | Or | Y | BOOL | -| Pad | Y | FP32, FP16, INT8, INT32 | Zero-constant padding on two dimensions only | +| Pad | Y | FP32, FP16, INT8, INT32 | | ParametricSoftplus | Y | FP32, FP16, INT8 | | Pow | Y | FP32, FP16 | | PRelu | Y | FP32, FP16, INT8 | @@ -134,12 +134,12 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT. | ReverseSequence | Y | FP32, FP16 | Dynamic input shapes are unsupported | RNN | Y | FP32, FP16 | For bidirectional RNNs, activation functions must be the same for both the forward and reverse pass | RoiAlign | N | -| Round | N | +| Round | Y | FP32, FP16, INT8 | | ScaledTanh | Y | FP32, FP16, INT8 | | Scan | Y | FP32, FP16 | -| Scatter | N | -| ScatterElements | N | -| ScatterND | N | +| Scatter | Y | FP32, FP16, INT8, INT32 | +| ScatterElements | Y | FP32, FP16, INT8, INT32 | +| ScatterND | Y | FP32, FP16, INT8, INT32 | | Selu | Y | FP32, FP16, INT8| | SequenceAt | N | | SequenceConstruct | N | @@ -150,7 +150,7 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT. | Shape | Y | FP32, FP16, INT32, INT8, BOOL | | Shrink | N | | Sigmoid | Y | FP32, FP16, INT8 | -| Sign | N | +| Sign | Y | FP32, FP16, INT8, INT32 | | Sin | Y | FP32, FP16 | | Sinh | Y | FP32, FP16 | | Size | Y | FP32, FP16, INT32, INT8, BOOL | diff --git a/onnx2trt.hpp b/onnx2trt.hpp index 680ef900..4ee38e04 100644 --- a/onnx2trt.hpp +++ b/onnx2trt.hpp @@ -54,11 +54,10 @@ class IImporterContext virtual nvinfer1::ILogger& logger() = 0; virtual bool hasError() const = 0; virtual nvinfer1::IErrorRecorder* getErrorRecorder() const = 0; + virtual nvinfer1::IConstantLayer* getConstantLayer(const char* name) const = 0; protected: - virtual ~IImporterContext() - { - } + virtual ~IImporterContext() {} }; } // namespace onnx2trt diff --git a/onnx2trt_utils.cpp b/onnx2trt_utils.cpp index cc5ca367..cf50bb9f 100644 --- a/onnx2trt_utils.cpp +++ b/onnx2trt_utils.cpp @@ -3,6 +3,7 @@ */ #include "onnx2trt_utils.hpp" +#include "NvInferSafeRuntime.h" #include "OnnxAttrs.hpp" #include @@ -330,7 +331,8 @@ Status convertAxis(int& axis, int nbDims) { axis += nbDims; } - ASSERT((axis >= 0 && axis < nbDims) && "Axis must be in the range [0, nbDims).", ErrorCode::kUNSUPPORTED_NODE); + // Support nbDims as a valid axis for QuantDequantLinearHelper + ASSERT((axis >= 0 && axis <= nbDims) && "Axis must be in the range [0, nbDims].", ErrorCode::kUNSUPPORTED_NODE); return Status::success(); } @@ -393,112 +395,43 @@ int32_t* convertINT64(const int64_t* weightValues, nvinfer1::Dims shape, IImport return int32Weights; } -nvinfer1::ITensor* convertGatherIndices(IImporterContext* ctx, nvinfer1::ITensor* data, nvinfer1::ITensor* indices, int32_t axis) +bool convertOnnxPadding(IImporterContext* ctx, int32_t nbInputDims, const std::vector& onnxPadding, + nvinfer1::ITensor*& startTensor, nvinfer1::ITensor*& totalPaddingTensor) { - const int32_t n = indices->getDimensions().nbDims; - auto axisLength = getAxisLength(ctx, data, axis); - broadcastTensor(ctx, axisLength, n); - - // The formula here implements "indices < 0 ? indices + axisLength : indices" - // via the formula "indices - axisLength * max(-1, min(0, indices))". - // Think of the "max(-1, min(0, indices))" as extracting the sign bit from the indices. - const nvinfer1::Dims d = makeDims(n, 1); - auto zero = addConstantScalar(ctx, 0, ::ONNX_NAMESPACE::TensorProto::INT32, d)->getOutput(0); - auto minusOne = addConstantScalar(ctx, -1, ::ONNX_NAMESPACE::TensorProto::INT32, d)->getOutput(0); - auto min = ctx->network()->addElementWise(*zero, *indices, nvinfer1::ElementWiseOperation::kMIN)->getOutput(0); - auto max = ctx->network()->addElementWise(*minusOne, *min, nvinfer1::ElementWiseOperation::kMAX)->getOutput(0); - auto prod = ctx->network()->addElementWise(*max, *axisLength, nvinfer1::ElementWiseOperation::kPROD)->getOutput(0); - auto sub = ctx->network()->addElementWise(*indices, *prod, nvinfer1::ElementWiseOperation::kSUB)->getOutput(0); - return sub; -} - -bool convertOnnxPadding(std::vector& onnxPadding, nvinfer1::Dims2& begPadding, nvinfer1::Dims2& endPadding, - nvinfer1::Permutation& firstPerm, nvinfer1::Permutation& secondPerm) -{ - // Input tensor may have been unsqueezed to 4D. Insert no-op pads for all unsqueezed dimensions - const size_t minimumSize = 8; - while (onnxPadding.size() < minimumSize) - { - onnxPadding.insert(onnxPadding.begin() + onnxPadding.size() / 2, 0); - onnxPadding.insert(onnxPadding.begin(), 0); - } - - const auto size = onnxPadding.size(); - const auto half = size / 2; - std::set pads; - for (size_t i = 0; i < onnxPadding.size(); i++) - { - if (onnxPadding[i] != 0) - { - pads.emplace(i); - } - } - // For all present paddings, ensure that their corresponding beg/end index is also present - for (const auto& pad : pads) - { - if (pad < half) - { - pads.emplace(pad + half); - } - else - { - pads.emplace(pad - half); - } - } - // For no-op paddings, simply return - if (pads.size() == 0) + std::vector start; + std::vector totalPadding; + if (onnxPadding.size() % 2U != 0) { - return true; - } - // For 1D padding, set the second dimension to either the last or second last dimension - if (pads.size() == 2) - { - if (pads.find(half - 1) == pads.end()) - { - pads.emplace(size - 1); - pads.emplace(half - 1); - } - else - { - pads.emplace(size - 2); - pads.emplace(half - 2); - } + return false; } - // Fail on > 2D padding. - if (pads.size() > 4) + const auto diff = nbInputDims - static_cast(onnxPadding.size() / 2U); + if (diff < 0) { return false; } + start.resize(nbInputDims, 0); + totalPadding.resize(nbInputDims, 0); - // Pads should now be populated with 4 indices. Set beg and end padding values - std::vector finalIndices(pads.begin(), pads.end()); - begPadding.d[0] = onnxPadding[finalIndices[0]]; - begPadding.d[1] = onnxPadding[finalIndices[1]]; - endPadding.d[0] = onnxPadding[finalIndices[2]]; - endPadding.d[1] = onnxPadding[finalIndices[3]]; - - // For the first permutation, swap the last two dimensions with the first two indices - std::swap(firstPerm.order[half - 1], firstPerm.order[finalIndices[1]]); - std::swap(firstPerm.order[half - 2], firstPerm.order[finalIndices[0]]); - - // For the second (reverse) permutation - it is a mapping of the original in-order indices from the first - // permutation - secondPerm = firstPerm; - for (size_t i = 0; i < half; i++) + for (int32_t i = diff; i < nbInputDims; i++) { - if (secondPerm.order[i] != static_cast(i)) + const auto idx = i - diff; + const auto pre = onnxPadding[idx]; + const auto post = onnxPadding[onnxPadding.size() / 2U + idx]; + if (pre < 0 || post < 0) { - for (size_t j = 0; j < half; j++) - { - if (firstPerm.order[j] == static_cast(i)) - { - secondPerm.order[i] = static_cast(j); - continue; - } - } + return false; } + + start[i] = -pre; + totalPadding[i] = pre + post; } - return true; + + startTensor + = addConstant(ctx, start, ::ONNX_NAMESPACE::TensorProto::INT32, nvinfer1::Dims{1, {nbInputDims}})->getOutput(0); + totalPaddingTensor + = addConstant(ctx, totalPadding, ::ONNX_NAMESPACE::TensorProto::INT32, nvinfer1::Dims{1, {nbInputDims}}) + ->getOutput(0); + return startTensor && totalPaddingTensor; } bool shiftIsAllZeros(const ShapedWeights& shiftInt8) @@ -527,17 +460,9 @@ onnx2trt::ShapedWeights createZeroShifts(const onnx2trt::ShapedWeights& shiftInt nvinfer1::ITensor* createZeroTensor(IImporterContext* ctx, nvinfer1::ITensor* data) { - nvinfer1::ITensor* zero; - if (data->getType() == nvinfer1::DataType::kFLOAT) - { - zero - = addConstant(ctx, std::vector{0.f}, ::ONNX_NAMESPACE::TensorProto::FLOAT, {0, {1}})->getOutput(0); - } - else - { - zero - = addConstant(ctx, std::vector{0}, ::ONNX_NAMESPACE::TensorProto::INT32, {0, {1}})->getOutput(0); - } + nvinfer1::ITensor* zero + = addConstant(ctx, std::vector{0.f}, ::ONNX_NAMESPACE::TensorProto::FLOAT, {0, {1}})->getOutput(0); + zero = castHelper(ctx, zero, data->getType()); broadcastTensors(ctx, zero, data); zero = ctx->network()->addElementWise(*data, *zero, nvinfer1::ElementWiseOperation::kPROD)->getOutput(0); return zero; @@ -851,6 +776,12 @@ nvinfer1::ITensor& convertToTensor(TensorOrWeights& input, IImporterContext* ctx } // Handle non-tensor indices input by adding a new constant layer to the network. ShapedWeights& weights = input.weights(); + + auto const existingConstantLayer = ctx->getConstantLayer(weights.getName()); + if (existingConstantLayer != nullptr) + { + return *(existingConstantLayer->getOutput(0)); + } // Note the TRT doesn't natively handle boolean weights. First create an INT32 weights copy of the boolean weights, // then cast it back to bool within TRT. if (weights.type == ::ONNX_NAMESPACE::TensorProto::BOOL) @@ -939,7 +870,7 @@ bool elementwiseCheck(const std::vector& inputs, const nvinfer1 } NodeImportResult elementwiseHelper(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, - std::vector& inputs, nvinfer1::ElementWiseOperation binary_op) + const std::vector& inputs, nvinfer1::ElementWiseOperation binary_op) { ASSERT((!inputs.empty()) && "Inputs vector is empty.", ErrorCode::kINVALID_NODE); @@ -1243,7 +1174,21 @@ nvinfer1::ITensor* globalPoolingHelper(IImporterContext* ctx, ::ONNX_NAMESPACE:: nvinfer1::IPluginCreator* importPluginCreator( const std::string& pluginName, const std::string& pluginVersion, const std::string& pluginNamespace) { - return getPluginRegistry()->getPluginCreator(pluginName.c_str(), pluginVersion.c_str(), pluginNamespace.c_str()); + nvinfer1::IPluginCreator* creator = nullptr; + +#if ENABLE_STD_PLUGIN + creator = getPluginRegistry()->getPluginCreator(pluginName.c_str(), pluginVersion.c_str(), pluginNamespace.c_str()); +#endif // ENABLE_STD_PLUGIN + +#if ENABLE_SAFE_PLUGIN + if (creator == nullptr && nvinfer1::safe::getSafePluginRegistry() != nullptr) + { + creator = nvinfer1::safe::getSafePluginRegistry()->getPluginCreator( + pluginName.c_str(), pluginVersion.c_str(), pluginNamespace.c_str()); + } +#endif // ENABLE_SAFE_PLUGIN + + return creator; } std::unique_ptr createPlugin(const std::string& name, @@ -1897,19 +1842,25 @@ bool supportsShapeTensor(nvinfer1::LayerType type, nvinfer1::ElementWiseOperatio { // Layers that allow shape tensor output case nvinfer1::LayerType::kCONCATENATION: + case nvinfer1::LayerType::kCONDITION: + case nvinfer1::LayerType::kCONDITIONAL_INPUT: + case nvinfer1::LayerType::kCONDITIONAL_OUTPUT: case nvinfer1::LayerType::kCONSTANT: case nvinfer1::LayerType::kGATHER: case nvinfer1::LayerType::kIDENTITY: case nvinfer1::LayerType::kPADDING: + case nvinfer1::LayerType::kSCATTER: case nvinfer1::LayerType::kSELECT: case nvinfer1::LayerType::kSHAPE: case nvinfer1::LayerType::kSHUFFLE: case nvinfer1::LayerType::kSLICE: return true; // Layers that do not allow shape tensor output case nvinfer1::LayerType::kACTIVATION: + case nvinfer1::LayerType::kASSERTION: case nvinfer1::LayerType::kCONVOLUTION: case nvinfer1::LayerType::kDECONVOLUTION: case nvinfer1::LayerType::kDEQUANTIZE: + case nvinfer1::LayerType::kEINSUM: case nvinfer1::LayerType::kFULLY_CONNECTED: case nvinfer1::LayerType::kITERATOR: case nvinfer1::LayerType::kLOOP_OUTPUT: @@ -2043,19 +1994,21 @@ NodeImportResult unaryHelper( IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, TensorOrWeights& input, nvinfer1::UnaryOperation op) { nvinfer1::ITensor* tensorPtr = &convertToTensor(input, ctx); - auto inputType = tensorPtr->getType(); + const auto rank = tensorPtr->getDimensions().nbDims; + const auto inputType = tensorPtr->getType(); + bool validUnaryType = true; switch (op) { case nvinfer1::UnaryOperation::kNOT: { // TRT only supports BOOL types for the NOT operation - validUnaryType = (inputType == nvinfer1::DataType::kBOOL); + validUnaryType = inputType == nvinfer1::DataType::kBOOL; break; } case nvinfer1::UnaryOperation::kABS: { - // ABS can work with INT32 types via temporary cast to FLOAT. + // WAR: Special operators like ABS can work with INT32 types via temporary cast to FLOAT. if (inputType == nvinfer1::DataType::kINT32) { tensorPtr = castHelper(ctx, tensorPtr, nvinfer1::DataType::kFLOAT); @@ -2064,7 +2017,7 @@ NodeImportResult unaryHelper( } case nvinfer1::UnaryOperation::kNEG: { - // NEG can work with INT32 types via ElementWise Layer: (0 - x) + // WAR: NEG can work with INT32 types via ElementWise Layer: (0 - x) if (inputType == nvinfer1::DataType::kINT32) { // Calculate the rank of the input, and set all size to one and rely on broadcasting @@ -2081,11 +2034,11 @@ NodeImportResult unaryHelper( validUnaryType = (inputType != nvinfer1::DataType::kBOOL && inputType != nvinfer1::DataType::kINT32); } } + ASSERT(validUnaryType && "This version of TensorRT does not support the given operator with the given input data type.", ErrorCode::kUNSUPPORTED_NODE); - int rank = tensorPtr->getDimensions().nbDims; // Support scalar inputs by unsqueezing to 1D if (rank == 0) { @@ -2121,126 +2074,103 @@ NodeImportResult unaryHelper( return {{tensorPtr}}; } -NodeImportResult convDeconvMultiInput( - IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, std::vector& inputs, bool isConv) +NodeImportResult convMultiInput( + IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, std::vector& inputs) { - nvinfer1::ITensor* inputTensor = &convertToTensor(inputs.at(0), ctx); - nvinfer1::ITensor* weightsTensor = &convertToTensor(inputs.at(1), ctx); + nvinfer1::ITensor* input_tensor_ptr = &convertToTensor(inputs.at(0), ctx); + nvinfer1::ITensor* kernel_tensor_ptr = &convertToTensor(inputs.at(1), ctx); - nvinfer1::Dims inputDims = inputTensor->getDimensions(); - nvinfer1::Dims weightsDims = weightsTensor->getDimensions(); - const std::string layerType = isConv ? "Convolution " : "Deconvolution"; - LOG_VERBOSE(layerType << " input dimensions: " << inputDims); - LOG_VERBOSE(layerType << " kernel dimensions: " << weightsDims); + nvinfer1::Dims dims = input_tensor_ptr->getDimensions(); + LOG_VERBOSE("Convolution input dimensions: " << dims); - bool needToExpandDims = (inputDims.nbDims == 3); + bool needToExpandDims = (dims.nbDims == 3); if (needToExpandDims) { // Expand spatial dims from 1D to 2D - const std::vector axes{3}; - inputTensor = unsqueezeTensor(ctx, node, *inputTensor, axes); - ASSERT(inputTensor && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); - inputDims = inputTensor->getDimensions(); + const std::vector axes{3}; + input_tensor_ptr = unsqueezeTensor(ctx, node, *input_tensor_ptr, axes); + ASSERT(input_tensor_ptr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); + dims = input_tensor_ptr->getDimensions(); } - if (weightsDims.nbDims == 3) + if (kernel_tensor_ptr->getDimensions().nbDims == 3) { // Expand spatial dims from 1D to 2D - const std::vector axes{3}; - weightsTensor = unsqueezeTensor(ctx, node, *weightsTensor, axes); - ASSERT(weightsTensor && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); - weightsDims = weightsTensor->getDimensions(); + const std::vector axes{3}; + kernel_tensor_ptr = unsqueezeTensor(ctx, node, *kernel_tensor_ptr, axes); + ASSERT(kernel_tensor_ptr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); } + nvinfer1::Dims kernel_size = inputs.at(1).shape(); - auto kernelWeights = nvinfer1::Weights{nvinfer1::DataType::kFLOAT, nullptr, 0}; - auto biasWeights = nvinfer1::Weights{nvinfer1::DataType::kFLOAT, nullptr, 0}; + auto kernel_weights = nvinfer1::Weights{nvinfer1::DataType::kFLOAT, nullptr, 0}; + auto bias_weights = nvinfer1::Weights{nvinfer1::DataType::kFLOAT, nullptr, 0}; - const int32_t nbSpatialDims = inputDims.nbDims - 2; + nvinfer1::Dims input_dims = input_tensor_ptr->getDimensions(); + const int nbSpatialDims = input_dims.nbDims - 2; // Check that the number of spatial dimensions and the kernel shape matches up. - ASSERT((nbSpatialDims == weightsDims.nbDims - 2) + ASSERT((nbSpatialDims == kernel_tensor_ptr->getDimensions().nbDims - 2) && "The input tensor shape misaligns with the input kernel shape.", ErrorCode::kUNSUPPORTED_NODE); - nvinfer1::Dims filterDims; - filterDims.nbDims = nbSpatialDims; + nvinfer1::Dims filter_dim; + filter_dim.nbDims = nbSpatialDims; nvinfer1::Dims strides = makeDims(nbSpatialDims, 1); - nvinfer1::Dims begPadding = makeDims(nbSpatialDims, 0); - nvinfer1::Dims endPadding = makeDims(nbSpatialDims, 0); + nvinfer1::Dims beg_padding = makeDims(nbSpatialDims, 0); + nvinfer1::Dims end_padding = makeDims(nbSpatialDims, 0); nvinfer1::Dims dilations = makeDims(nbSpatialDims, 1); nvinfer1::PaddingMode paddingMode; bool exclude_padding; getKernelParams( - ctx, node, &filterDims, &strides, &begPadding, &endPadding, paddingMode, exclude_padding, &dilations); + ctx, node, &filter_dim, &strides, &beg_padding, &end_padding, paddingMode, exclude_padding, &dilations); - for (int32_t i = 1; i <= nbSpatialDims; ++i) + for (int i = 1; i <= nbSpatialDims; ++i) { - ASSERT((filterDims.d[nbSpatialDims - i] - == weightsDims.d[weightsDims.nbDims - i]) + ASSERT((filter_dim.d[nbSpatialDims - i] + == kernel_tensor_ptr->getDimensions().d[kernel_tensor_ptr->getDimensions().nbDims - i]) && "The attribute kernel_shape misalgins with the shape of the input kernel.", ErrorCode::kUNSUPPORTED_NODE); } - OnnxAttrs attrs(node, ctx); - int32_t numGroups = attrs.get("group", 1); - int32_t nChannel = inputDims.d[1]; - // Conv weights shape is provided as [M,C/G,H1,H2], while deconv weights shape is provied as [C,M/G,H1,H2] - int32_t M = isConv ? weightsDims.d[0] : weightsDims.d[1] * numGroups; - int32_t C = isConv ? weightsDims.d[1] * numGroups : weightsDims.d[0]; + int nChannel = input_dims.d[1]; + int K = kernel_size.d[0]; + int C = kernel_size.d[1]; if (inputs.size() == 3) { // TRT-9875 - fix how bias tensor is handled - biasWeights = inputs.at(2).weights(); + bias_weights = inputs.at(2).weights(); } - ASSERT((nChannel == -1 || C == nChannel) + OnnxAttrs attrs(node, ctx); + int ngroup = attrs.get("group", 1); + ASSERT((nChannel == -1 || C * ngroup == nChannel) && "The attribute group and the kernel shape misalign with the channel size of the input tensor. ", ErrorCode::kINVALID_NODE); - nvinfer1::ILayer* layer = nullptr; - if (isConv) - { - nvinfer1::IConvolutionLayer* convLayer - = ctx->network()->addConvolutionNd(*inputTensor, M, filterDims, kernelWeights, biasWeights); - layer = convLayer; - ASSERT(convLayer && "Failed to add the Convolution layer.", ErrorCode::kUNSUPPORTED_NODE); - - convLayer->setStrideNd(strides); - convLayer->setPaddingMode(paddingMode); - convLayer->setPrePadding(begPadding); - convLayer->setPostPadding(endPadding); - convLayer->setDilationNd(dilations); - convLayer->setNbGroups(numGroups); - } - else - { - nvinfer1::IDeconvolutionLayer* deconvLayer - = ctx->network()->addDeconvolutionNd(*inputTensor, M, filterDims, kernelWeights, biasWeights); - layer = deconvLayer; - ASSERT(deconvLayer && "Failed to add the Deconvolution layer.", ErrorCode::kUNSUPPORTED_NODE); - - deconvLayer->setStrideNd(strides); - deconvLayer->setPaddingMode(paddingMode); - deconvLayer->setPrePadding(begPadding); - deconvLayer->setPostPadding(endPadding); - deconvLayer->setDilationNd(dilations); - deconvLayer->setNbGroups(numGroups); - } + nvinfer1::IConvolutionLayer* layer + = ctx->network()->addConvolutionNd(*input_tensor_ptr, K, filter_dim, kernel_weights, bias_weights); + ASSERT(layer && "Failed to add the Convolution layer.", ErrorCode::kUNSUPPORTED_NODE); + layer->setStrideNd(strides); + layer->setPaddingMode(paddingMode); + layer->setPrePadding(beg_padding); + layer->setPostPadding(end_padding); + layer->setDilationNd(dilations); + layer->setNbGroups(ngroup); // Set kernel weights tensor as second convolution input. - layer->setInput(1, *weightsTensor); + layer->setInput(1, *kernel_tensor_ptr); ctx->registerLayer(layer, getNodeName(node)); - nvinfer1::ITensor* outputTensor = layer->getOutput(0); + nvinfer1::ITensor* output_tensor_ptr = layer->getOutput(0); if (needToExpandDims) { // Un-expand spatial dims back to 1D - const std::vector axes{3}; - outputTensor = squeezeTensor(ctx, node, *outputTensor, axes); - ASSERT(outputTensor && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); + const std::vector axes{3}; + output_tensor_ptr = squeezeTensor(ctx, node, *output_tensor_ptr, axes); + ASSERT(output_tensor_ptr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); } - return {{outputTensor}}; + return {{output_tensor_ptr}}; } nvinfer1::ITensor* unsqueezeTensor(IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, @@ -2384,4 +2314,13 @@ nvinfer1::ITensor* addSoftmax(IImporterContext* ctx, const ::ONNX_NAMESPACE::Nod return softMax->getOutput(0); } +nvinfer1::IScatterLayer* addScatterLayer( + IImporterContext* ctx, std::vector& inputs, nvinfer1::ScatterMode mode) +{ + nvinfer1::ITensor& data = convertToTensor(inputs.at(0), ctx); + nvinfer1::ITensor& indices = convertToTensor(inputs.at(1), ctx); + nvinfer1::ITensor& updates = convertToTensor(inputs.at(2), ctx); + return ctx->network()->addScatter(data, indices, updates, mode); +} + } // namespace onnx2trt diff --git a/onnx2trt_utils.hpp b/onnx2trt_utils.hpp index 3156bbe8..db2c0cf6 100644 --- a/onnx2trt_utils.hpp +++ b/onnx2trt_utils.hpp @@ -170,11 +170,9 @@ bool convertDtype(int32_t onnx_dtype, nvinfer1::DataType* trt_dtype); // Helper function to convert INT64 weight values into INT32 int32_t* convertINT64(const int64_t* weightValues, nvinfer1::Dims shape, IImporterContext* ctx); -// Helper function to convert negative gather indices into non-negative indices. -nvinfer1::ITensor* convertGatherIndices(IImporterContext* ctx, nvinfer1::ITensor* data, nvinfer1::ITensor* indices, int32_t axis); - -// Helper function to convert ONNX padding into TRT padding. Will update begPadding, endPadding, firstPerm, and secondPerm by reference -bool convertOnnxPadding(std::vector& onnxPadding, nvinfer1::Dims2& begPadding, nvinfer1::Dims2& endPadding, nvinfer1::Permutation& firstPerm, nvinfer1::Permutation& secondPerm); +// Helper function to convert ONNX padding into TRT padding. Will update startTensor and totalPaddingTensor by reference +bool convertOnnxPadding(IImporterContext* ctx, int32_t nbInputDims, const std::vector& onnxPadding, + nvinfer1::ITensor*& startTensor, nvinfer1::ITensor*& totalPaddingTensor); // Helper function to check if all of the values in the shift tensor are zeros bool shiftIsAllZeros(const ShapedWeights& shiftInt8); @@ -189,9 +187,9 @@ nvinfer1::ITensor* createZeroTensor(IImporterContext* ctx, nvinfer1::ITensor* da bool convertOnnxWeights( const ::ONNX_NAMESPACE::TensorProto& onnxTensor, onnx2trt::ShapedWeights* weights, IImporterContext* ctx); -// Helper function to convert multi input convolution/deconvolution -NodeImportResult convDeconvMultiInput( - IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, std::vector& inputs, bool isConv); +// Helper function to convert multi input convolution +NodeImportResult convMultiInput( + IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, std::vector& inputs); // Helper function to convert a 1D tensor into a scalar nvinfer1::ITensor* convertToScalar(IImporterContext* ctx, nvinfer1::ITensor* inpTensor); @@ -210,7 +208,7 @@ bool elementwiseCheck(const std::vector& inputs, const nvinfer1 // Helper function to import an ONNX elementwise op into TRT NodeImportResult elementwiseHelper(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, - std::vector& inputs, nvinfer1::ElementWiseOperation binary_op); + const std::vector& inputs, nvinfer1::ElementWiseOperation binary_op); // Helper function to flatten a tensor on a given axis nvinfer1::ITensor* flattenTensor(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, nvinfer1::ITensor& tensor, int axis = 0, bool regLayer = false); @@ -374,4 +372,8 @@ ShapeTensor axesToInterlaceSubscripts(const ShapeTensor& axes, int nbDims); //! Helper function to add SoftMax layer. nvinfer1::ITensor* addSoftmax(IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, nvinfer1::ITensor& input); +// Helper function to import ONNX scatter nodes into TRT +nvinfer1::IScatterLayer* addScatterLayer( + IImporterContext* ctx, std::vector& inputs, nvinfer1::ScatterMode mode); + } // namespace onnx2trt diff --git a/onnx_tensorrt/__init__.py b/onnx_tensorrt/__init__.py index ccd23709..2a3f701a 100644 --- a/onnx_tensorrt/__init__.py +++ b/onnx_tensorrt/__init__.py @@ -4,4 +4,4 @@ from . import backend -__version__ = "8.0.1" +__version__ = "8.2.0" diff --git a/onnx_utils.hpp b/onnx_utils.hpp index 3e149b69..c2c147bb 100644 --- a/onnx_utils.hpp +++ b/onnx_utils.hpp @@ -15,8 +15,30 @@ namespace { +//! Describes occurrence of a named dimension. +class NamedDimension +{ +public: + //! TensorRT tensor. + nvinfer1::ITensor* tensor; + + //! Index of tensor dimension to be named. + int32_t index; + + //! ONNX "dim param" that is the name of the dimension. + std::string dimParam; + + //! Construct a NamedDimension where the tensor will be filled in later. + NamedDimension(int32_t index_, const std::string& dimParam_) + : tensor(nullptr) + , index(index_) + , dimParam(dimParam_) + { + } +}; + template -bool convertOnnxDims(OnnxDims const& onnxDims, nvinfer1::Dims& trtDims) +bool convertOnnxDims(OnnxDims const& onnxDims, nvinfer1::Dims& trtDims, std::vector& namedDims) { std::vector onnxDimsVec; for (const auto& onnxDim : onnxDims) @@ -28,6 +50,10 @@ bool convertOnnxDims(OnnxDims const& onnxDims, nvinfer1::Dims& trtDims) } else { + if (!onnxDim.dim_param().empty()) + { + namedDims.emplace_back(static_cast(onnxDimsVec.size()), onnxDim.dim_param()); + } const int32_t dim = onnxDim.dim_param() == "" ? (onnxDim.dim_value() >= 0 ? onnxDim.dim_value() : -1) : -1; onnxDimsVec.emplace_back(dim); }