diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index d979d53347c4f..1b432dad44263 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1726,8 +1726,10 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv } trt_version_ = getInferLibVersion(); + CUDA_CALL_THROW(cudaRuntimeGetVersion(&cuda_version_)); LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT version is " << trt_version_; + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] CUDA version is " << cuda_version_; LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT provider options: " << "device_id: " << device_id_ @@ -2466,13 +2468,13 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph, // So, simply return the ComputeCapability here. if (graph.NumberOfNodes() == 1 && GraphHasCtxNode(graph)) { SubGraph_t supported_node_vector = {{0}, true}; - std::unique_ptr sub_graph = GetSubGraph(supported_node_vector, graph, TRTGenerateId(graph), 0); + std::unique_ptr sub_graph = GetSubGraph(supported_node_vector, graph, TRTGenerateId(graph, std::to_string(trt_version_), std::to_string(cuda_version_)), 0); result.push_back(ComputeCapability::Create(std::move(sub_graph))); return result; } // Generate unique kernel name for TRT graph - HashValue model_hash = TRTGenerateId(graph); + HashValue model_hash = TRTGenerateId(graph, std::to_string(trt_version_), std::to_string(cuda_version_)); // Get supported node list from TensorRT parser const int number_of_ort_nodes = graph.NumberOfNodes(); diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 9e3a03417d917..d3e0b0fba8891 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -333,6 +333,7 @@ class TensorrtExecutionProvider : public IExecutionProvider { // The format is as for TENSORRT_VERSION: (MAJOR * 100 + MINOR) * 100 + PATCH int32_t trt_version_; + int32_t cuda_version_; // The OrtAllocator object will be get during ep compute time // and should be kept for the lifetime of TRT EP object. diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h index 95abcd1bad2b8..5a7b135fd92cd 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h @@ -520,7 +520,7 @@ void RemoveCachesByType(const std::string& root, std::string file_extension) { * compiled kernels, so the name must be unique and deterministic across models and sessions. * */ -HashValue TRTGenerateId(const GraphViewer& graph_viewer) { +HashValue TRTGenerateId(const GraphViewer& graph_viewer, std::string trt_version, std::string cuda_version) { HashValue model_hash = 0; // find the top level graph @@ -583,12 +583,11 @@ HashValue TRTGenerateId(const GraphViewer& graph_viewer) { #endif #ifdef CUDA_VERSION - hash_str(std::to_string(CUDA_VERSION)); + hash_str(cuda_version); #endif #if defined(NV_TENSORRT_MAJOR) && defined(NV_TENSORRT_MINOR) - std::string TRT_VERSION = std::to_string(NV_TENSORRT_MAJOR) + "." + std::to_string(NV_TENSORRT_MINOR); - hash_str(TRT_VERSION); + hash_str(trt_version); #endif model_hash = hash[0] | (uint64_t(hash[1]) << 32); diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index 63327a028c6f4..0022d7fc0e184 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -342,8 +342,12 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) { Graph& graph = model->MainGraph(); GraphViewer viewer(graph); + std::string trt_version = std::to_string(NV_TENSORRT_MAJOR) + "." + std::to_string(NV_TENSORRT_MINOR); + std::string cuda_version = std::to_string(CUDA_VERSION); + std::string ort_version = ORT_VERSION; + // get the hash for the model when loaded from file - HashValue model_hash = TRTGenerateId(viewer); + HashValue model_hash = TRTGenerateId(viewer, trt_version, cuda_version); ASSERT_NE(model_hash, 0); // now load the model from bytes and check the hash differs @@ -358,7 +362,7 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) { // Test loading same model from file and byte steam. Hash values should be different Graph& graph2 = model2->MainGraph(); GraphViewer viewer2(graph2); - HashValue model_hash2 = TRTGenerateId(viewer2); + HashValue model_hash2 = TRTGenerateId(viewer2, trt_version, cuda_version); ASSERT_NE(model_hash, model_hash2); // Test loading same model from different path, see if hash values are same as well @@ -367,7 +371,7 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) { ASSERT_TRUE(Model::Load(model_path, model3, nullptr, DefaultLoggingManager().DefaultLogger()).IsOK()); Graph& graph3 = model3->MainGraph(); GraphViewer viewer3(graph3); - HashValue model_hash3 = TRTGenerateId(viewer3); + HashValue model_hash3 = TRTGenerateId(viewer3, trt_version, cuda_version); ASSERT_EQ(model_hash, model_hash3) << "model 1&3 are same models and they have same hash, no matter where they are loaded"; }