Skip to content

Commit

Permalink
[CUDA] Fix cuda provider fallback inconsistency (#21425)
Browse files Browse the repository at this point in the history
* Fix fallback setting (cuda still falls back to cuda).
* Fix cuda provider fallback inconsistent with/without CUDA_PATH
environment variable.
* Add cuda and cudnn major version requirement in error message.

Example result in Windows:
```
>>> import onnxruntime
>>> ort_session = onnxruntime.InferenceSession("model.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
2024-07-19 17:43:44.2260019 [E:onnxruntime:Default, provider_bridge_ort.cc:1972 onnxruntime::TryGetProviderInfo_CUDA] D:\onnxruntime\onnxruntime\core\session\provider_bridge_ort.cc:1636 onnxruntime::ProviderLibrary::Get [ONNXRuntimeError] : 1 : FAIL : LoadLibrary failed with error 126 "" when trying to load "C:\Users\.conda\envs\py310\lib\site-packages\onnxruntime\capi\onnxruntime_providers_cuda.dll"

2024-07-19 17:43:44.2312351 [W:onnxruntime:Default, onnxruntime_pybind_state.cc:970 onnxruntime::python::CreateExecutionProviderInstance] Failed to create CUDAExecutionProvider. Require cuDNN 9.* and CUDA 12.*, and the latest MSVC runtime. Please install all dependencies as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported.
>>> ort_session
<onnxruntime.capi.onnxruntime_inference_collection.InferenceSession object at 0x0000016BB2DF7D60>
>>> ort_session.get_providers()
['CPUExecutionProvider']
```

Example result in Linux:
```
>>> import onnxruntime
>>> ort_session = onnxruntime.InferenceSession("resnet50-v2-7.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
2024-07-20 20:33:26.486974543 [E:onnxruntime:Default, provider_bridge_ort.cc:1972 TryGetProviderInfo_CUDA] /work/onnxruntime/onnxruntime/core/session/provider_bridge_ort.cc:1636 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory

2024-07-20 20:33:26.487034646 [W:onnxruntime:Default, onnxruntime_pybind_state.cc:961 CreateExecutionProviderInstance] Failed to create CUDAExecutionProvider. Require cuDNN 9.* and CUDA 12.*. Please install all dependencies as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported.
>>> ort_session.get_providers()
['CPUExecutionProvider']
```
### Motivation and Context
#21424
  • Loading branch information
tianleiwu authored Jul 23, 2024
1 parent 7af39c6 commit 2b7e2a5
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 17 deletions.
8 changes: 6 additions & 2 deletions cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,12 @@ endif()

onnxruntime_add_include_to_target(onnxruntime_pybind11_state Python::Module Python::NumPy)
target_include_directories(onnxruntime_pybind11_state PRIVATE ${ONNXRUNTIME_ROOT} ${pybind11_INCLUDE_DIRS})
if(onnxruntime_USE_CUDA AND onnxruntime_CUDNN_HOME)
target_include_directories(onnxruntime_pybind11_state PRIVATE ${onnxruntime_CUDNN_HOME}/include)
if(onnxruntime_USE_CUDA)
target_include_directories(onnxruntime_pybind11_state PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
# cudnn_home is optional for Window when cuda and cudnn are installed in the same directory.
if(onnxruntime_CUDNN_HOME)
target_include_directories(onnxruntime_pybind11_state PRIVATE ${onnxruntime_CUDNN_HOME}/include)
endif()
endif()
if(onnxruntime_USE_CANN)
target_include_directories(onnxruntime_pybind11_state PRIVATE ${onnxruntime_CANN_HOME}/include)
Expand Down
16 changes: 12 additions & 4 deletions onnxruntime/python/onnxruntime_inference_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,10 +438,18 @@ def _create_inference_session(self, providers, provider_options, disabled_optimi

# Tensorrt can fall back to CUDA if it's explicitly assigned. All others fall back to CPU.
if "TensorrtExecutionProvider" in available_providers:
if providers and any(
provider == "CUDAExecutionProvider"
or (isinstance(provider, tuple) and provider[0] == "CUDAExecutionProvider")
for provider in providers
if (
providers
and any(
provider == "CUDAExecutionProvider"
or (isinstance(provider, tuple) and provider[0] == "CUDAExecutionProvider")
for provider in providers
)
and any(
provider == "TensorrtExecutionProvider"
or (isinstance(provider, tuple) and provider[0] == "TensorrtExecutionProvider")
for provider in providers
)
):
self._fallback_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
else:
Expand Down
24 changes: 13 additions & 11 deletions onnxruntime/python/onnxruntime_pybind_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@
#include "contrib_ops/cpu/aten_ops/aten_op_executor.h"
#endif

#ifdef USE_CUDA
#include <cuda.h> // for CUDA_VERSION
#include <cudnn.h> // for CUDNN_MAJOR
#endif

#include <pybind11/functional.h>

// Explicitly provide a definition for the static const var 'GPU' in the OrtDevice struct,
Expand Down Expand Up @@ -951,21 +956,18 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
// external CUDA allocator.
external_allocator_info = info.external_allocator_info;
return cuda_provider_info->CreateExecutionProviderFactory(info)->CreateProvider();
} else {
if (!Env::Default().GetEnvironmentVar("CUDA_PATH").empty()) {
ORT_THROW(
"CUDA_PATH is set but CUDA wasnt able to be loaded. Please install the correct version of CUDA and"
"cuDNN as mentioned in the GPU requirements page "
" (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), "
" make sure they're in the PATH, and that your GPU is supported.");
}
}
}
LOGS_DEFAULT(WARNING) << "Failed to create "
<< type
<< ". Please reference "
<< "https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements"
<< "to ensure all dependencies are met.";
<< ". Require cuDNN " << CUDNN_MAJOR << ".* and "
<< "CUDA " << (CUDA_VERSION / 1000) << ".*"
#if defined(_MSC_VER)
<< ", and the latest MSVC runtime"
#endif
<< ". Please install all dependencies as mentioned in the GPU requirements page"
" (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), "
"make sure they're in the PATH, and that your GPU is supported.";
#endif
} else if (type == kRocmExecutionProvider) {
#ifdef USE_ROCM
Expand Down

0 comments on commit 2b7e2a5

Please sign in to comment.