Skip to content

Commit

Permalink
Adding CUDNN Frontend to CUDA EP and use it for Convolution
Browse files Browse the repository at this point in the history
  • Loading branch information
JTischbein committed Jul 23, 2024
1 parent 0f1f3b7 commit 168e6ff
Show file tree
Hide file tree
Showing 33 changed files with 1,178 additions and 514 deletions.
10 changes: 10 additions & 0 deletions cgmanifests/generated/cgmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,16 @@
},
"comments": "directx_headers"
}
},
{
"component": {
"type": "git",
"git": {
"commitHash": "1b0b5eac540b7f8fd19b18f1e6b8427c95503348",
"repositoryUrl": "https://github.com/NVIDIA/cudnn-frontend.git"
},
"comments": "cudnn_frontend"
}
}
]
}
1 change: 1 addition & 0 deletions cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,4 @@ utf8_range;https://github.com/protocolbuffers/utf8_range/archive/72c943dea2b9240
extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c
composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/204da9c522cebec5220bba52cd3542ebcaf99e7a.zip;1827348efd47831c13074245274d41b7cae8a557
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.3.0.zip;dfccd5ff821cb2b2fa832bef26f39154358e6e13
12 changes: 12 additions & 0 deletions cmake/external/cudnn_frontend.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
include(FetchContent)
FetchContent_Declare(
cudnn_frontend
URL ${DEP_URL_cudnn_frontend}
URL_HASH SHA1=${DEP_SHA1_cudnn_frontend}
)

set(CUDNN_FRONTEND_BUILD_SAMPLES OFF)
set(CUDNN_FRONTEND_BUILD_UNIT_TESTS OFF)
set(CUDNN_FRONTEND_BUILD_PYTHON_BINDINGS OFF)
set(CUDNN_PATH ${onnxruntime_CUDNN_HOME})
FetchContent_MakeAvailable(cudnn_frontend)
3 changes: 2 additions & 1 deletion cmake/onnxruntime_providers_cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@
target_compile_definitions(${target} PRIVATE USE_CUDA_MINIMAL)
target_link_libraries(${target} PRIVATE ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface CUDA::cudart)
else()
target_link_libraries(${target} PRIVATE CUDA::cublasLt CUDA::cublas cudnn CUDA::curand CUDA::cufft CUDA::cudart
include(cudnn_frontend)
target_link_libraries(${target} PRIVATE CUDA::cublasLt CUDA::cublas cudnn cudnn_frontend CUDA::curand CUDA::cufft CUDA::cudart
${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface)
if(onnxruntime_CUDNN_HOME)
target_include_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/include)
Expand Down
2 changes: 2 additions & 0 deletions cmake/onnxruntime_rocm_hipify.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ set(provider_excluded_files
"rnn/rnn_impl.cu"
"rnn/rnn_impl.h"
"shared_inc/cuda_call.h"
"shared_inc/cudnn_fe_call.h"
"shared_inc/fpgeneric.h"
"cuda_allocator.cc"
"cuda_allocator.h"
Expand All @@ -171,6 +172,7 @@ set(provider_excluded_files
"cuda_utils.cu"
"cudnn_common.cc"
"cudnn_common.h"
"cudnn_fe_call.cc"
"cupti_manager.cc"
"cupti_manager.h"
"fpgeneric.cu"
Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ function(AddTest)
if(onnxruntime_USE_CUDA)
#XXX: we should not need to do this. onnxruntime_test_all.exe should not have direct dependency on CUDA DLLs,
# otherwise it will impact when CUDA DLLs can be unloaded.
target_link_libraries(${_UT_TARGET} PRIVATE CUDA::cudart)
target_link_libraries(${_UT_TARGET} PRIVATE CUDA::cudart cudnn_frontend)
endif()
target_link_libraries(${_UT_TARGET} PRIVATE ${_UT_LIBS} GTest::gtest GTest::gmock ${onnxruntime_EXTERNAL_LIBRARIES})
endif()
Expand Down
2 changes: 2 additions & 0 deletions include/onnxruntime/core/providers/cuda/cuda_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ struct CudaContext : public CustomOpContext {
bool enable_skip_layer_norm_strict_mode = false;
bool prefer_nhwc = false;
bool use_tf32 = true;
bool fuse_conv_bias = true;

void Init(const OrtKernelContext& kernel_ctx) {
cuda_stream = FetchResource<cudaStream_t>(kernel_ctx, CudaResource::cuda_stream_t);
Expand All @@ -57,6 +58,7 @@ struct CudaContext : public CustomOpContext {
kernel_ctx, CudaResource::enable_skip_layer_norm_strict_mode_t);
prefer_nhwc = FetchResource<bool>(kernel_ctx, CudaResource::prefer_nhwc_t);
use_tf32 = FetchResource<bool>(kernel_ctx, CudaResource::use_tf32_t);
fuse_conv_bias = FetchResource<bool>(kernel_ctx, CudaResource::fuse_conv_bias_t);
}

template <typename T>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,6 @@ struct OrtCUDAProviderOptionsV2 {
int prefer_nhwc = 0; // make the CUDA EP NHWC preferred
int use_ep_level_unified_stream = 0; // flag specifying if ep level stream is used or not
int use_tf32 = 1; // use TF32
int fuse_conv_bias = 0; // Enable CUDNN Frontend kernel fusing, results in JIT compiles

Check warning on line 41 in include/onnxruntime/core/providers/cuda/cuda_provider_options.h

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: include/onnxruntime/core/providers/cuda/cuda_provider_options.h:41: Lines should be <= 120 characters long [whitespace/line_length] [2]
int sdpa_kernel = 0; // Scaled Dot Product Attention kernel option
};
1 change: 1 addition & 0 deletions include/onnxruntime/core/providers/cuda/cuda_resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ enum CudaResource : int {
enable_skip_layer_norm_strict_mode_t,
prefer_nhwc_t,
use_tf32_t,
fuse_conv_bias_t
};
2 changes: 1 addition & 1 deletion onnxruntime/contrib_ops/cuda/diffusion/nhwc_conv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace cuda {
T, \
kCudaExecutionProvider, \
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
Conv<T, true>);
onnxruntime::cuda::Conv<T, true>);

REGISTER_KERNEL_TYPED(float)
REGISTER_KERNEL_TYPED(MLFloat16)
Expand Down
Loading

0 comments on commit 168e6ff

Please sign in to comment.