Skip to content

Commit

Permalink
[TensorRT EP] Update TRT10.0 deprecated api (#20989)
Browse files Browse the repository at this point in the history
### Description
<!-- Describe your changes. -->

Note:
* This PR would remove C4996 suppression in
tensorrt_execution_provider.cc only (according to Nvidia, places with
nvinfer.h included need C4996 suppression, when /Zc:__cplusplus is
enabled in ORT win build)
* A follow-up PR will be raised to update deprecated TRT Plugin api
usage.

Here are deprecated apis to be updated in this PR:
| deprecated api | Update |
| ------------------------------------------------------------ |
------------------------------------------------------------ |
|
[kCUBLAS](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/namespacenvinfer1.html#a9e1d81e5a8bfeb38b86e22a66d5f836a)
| / |
|
[kCUBLAS_LT](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/namespacenvinfer1.html#a9e1d81e5a8bfeb38b86e22a66d5f836a)
| / |
|
[kCUDNN](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/namespacenvinfer1.html#a9e1d81e5a8bfeb38b86e22a66d5f836a)
| / |
|
[reallocateOutput](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1v__1__0_1_1_i_output_allocator.html#acae6441d4029584cc1c6550917518691)
| Superseded by
[reallocateOutputAsync](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1v__1__0_1_1_i_output_allocator.html#aa40eeb891c1dfe4c1bbf1eabe8c705ab)
with cudaStream_t argument |
|
[createExecutionContextWithoutDeviceMemory](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_cuda_engine.html#adc86bcc42b098204997396ef2b1093fb)
| Superseded by
[createExecutionContext()](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_cuda_engine.html#a35de29aa6134165a5b14a537e6d99e82)
with parameter.<br />Check
[ExecutionContextAllocationStrategy::kUSER_MANAGED](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/namespacenvinfer1.html#ac6251a050df629edfc0ce037fa366503)
for more detail |




### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
TRT deprecated api list:
https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/deprecated.html
  • Loading branch information
yf711 authored Jul 2, 2024
1 parent beb2496 commit 7be1d4a
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 20 deletions.
58 changes: 39 additions & 19 deletions onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,20 @@ nvinfer1::TacticSources GetTacticSourceFromString(std::string& tactic_string) {
nvinfer1::TacticSource source{};
t = toUpper(t);
if (t == "CUBLAS") {
LOGS_DEFAULT(WARNING) << "[TensorRT EP] Tactic kCUBLAS is deprecated in TensorRT 10.0";
#if NV_TENSORRT_MAJOR < 10
source = nvinfer1::TacticSource::kCUBLAS;
#endif
} else if (t == "CUBLASLT" || t == "CUBLAS_LT") {
LOGS_DEFAULT(WARNING) << "[TensorRT EP] Tactic kCUBLAS_LT is deprecated in TensorRT 9.0";
#if NV_TENSORRT_MAJOR < 9
source = nvinfer1::TacticSource::kCUBLAS_LT;
#endif
} else if (t == "CUDNN") {
LOGS_DEFAULT(WARNING) << "[TensorRT EP] Tactic kCUDNN is deprecated in TensorRT 10.0";
#if NV_TENSORRT_MAJOR < 10
source = nvinfer1::TacticSource::kCUDNN;
#endif
} else if (t == "EDGE_MASK_CONVOLUTIONS") {
source = nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS;
} else if (t == "JIT_CONVOLUTIONS") {
Expand Down Expand Up @@ -298,6 +307,25 @@ void CudaCall<cudnnStatus_t, true>(cudnnStatus_t retCode, const char* exprString
return g_host->CudaCall_true(retCode, exprString, libName, successCode, msg, file, line);
}

#if NV_TENSORRT_MAJOR >= 10
void* OutputAllocator::reallocateOutputAsync(char const* /*tensorName*/, void* /*currentMemory*/, uint64_t size,
uint64_t /*alignment*/, cudaStream_t /*stream*/) noexcept {
// Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr
// even for empty tensors, so allocate a dummy byte.
size = std::max(size, static_cast<uint64_t>(1));
if (size > allocated_size) {
cudaFree(outputPtr);
outputPtr = nullptr;
allocated_size = 0;
if (cudaMalloc(&outputPtr, size) == cudaSuccess) {
allocated_size = size;
}
}
// if cudaMalloc fails, returns nullptr.
return outputPtr;
}
#else
// Only override this method when TensorRT <= 8.6
void* OutputAllocator::reallocateOutput(char const* /*tensorName*/, void* /*currentMemory*/, uint64_t size,
uint64_t /*alignment*/) noexcept {
// Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr
Expand All @@ -314,6 +342,7 @@ void* OutputAllocator::reallocateOutput(char const* /*tensorName*/, void* /*curr
// if cudaMalloc fails, returns nullptr.
return outputPtr;
}
#endif

void OutputAllocator::notifyShape(char const* /*tensorName*/, nvinfer1::Dims const& dims) noexcept {
output_shapes.clear();
Expand Down Expand Up @@ -3152,14 +3181,10 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
if (mem_size > max_ctx_mem_size_) {
max_ctx_mem_size_ = mem_size;
}

#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4996) // nvinfer1::ICudaEngine::createExecutionContextWithoutDeviceMemory was deprecated
#endif
#if NV_TENSORRT_MAJOR < 10
trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContextWithoutDeviceMemory());
#if defined(_MSC_VER)
#pragma warning(pop)
#else
trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContext(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED));
#endif
} else {
trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContext());
Expand Down Expand Up @@ -3606,14 +3631,12 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView

if (context_update) {
if (trt_state->context_memory_sharing_enable) {
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4996) // nvinfer1::ICudaEngine::createExecutionContextWithoutDeviceMemory was deprecated
#endif
#if NV_TENSORRT_MAJOR < 10
*(trt_state->context) = std::unique_ptr<nvinfer1::IExecutionContext>(
trt_state->engine->get()->createExecutionContextWithoutDeviceMemory());
#if defined(_MSC_VER)
#pragma warning(pop)
#else
*(trt_state->context) = std::unique_ptr<nvinfer1::IExecutionContext>(
trt_state->engine->get()->createExecutionContext(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED));
#endif
} else {
*(trt_state->context) = std::unique_ptr<nvinfer1::IExecutionContext>(
Expand Down Expand Up @@ -3830,13 +3853,10 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con
if (mem_size > max_ctx_mem_size_) {
max_ctx_mem_size_ = mem_size;
}
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4996) // nvinfer1::ICudaEngine::createExecutionContextWithoutDeviceMemory was deprecated
#endif
#if NV_TENSORRT_MAJOR < 10
trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContextWithoutDeviceMemory());
#if defined(_MSC_VER)
#pragma warning(pop)
#else
trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContext(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED));
#endif
} else {
trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContext());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,11 @@ using unique_pointer = std::unique_ptr<T, TensorrtInferDeleter>;
//
class OutputAllocator : public nvinfer1::IOutputAllocator {
public:
#if NV_TENSORRT_MAJOR >= 10
void* reallocateOutputAsync(char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t stream) noexcept override;
#else
void* reallocateOutput(char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept override;

#endif
void notifyShape(char const* tensorName, nvinfer1::Dims const& dims) noexcept override;

void* getBuffer() {
Expand Down

0 comments on commit 7be1d4a

Please sign in to comment.