Skip to content

Commit

Permalink
[TensorRT EP] Use TRT/CUDA/ORT version from runtime instead of build …
Browse files Browse the repository at this point in the history
…time to generate hash value (#22921)

Use TensorRT and CUDA version fetched at **runtime** to get the hash
value which determines the cache name.

The old way to get the version is at compile/build time that might have
some issues in some cases,
ex:
TRT EP uses the TRT version which we or users built against at compile
time.
However, users can change different TRT version at run time, that can
cause issue because TRT EP always checks the "fixed" TRT version, not
the TRT version it uses now. This can cause TRT EP to use incompatible
TRT engine cache.

see the github issue here:

#22382 (comment)
  • Loading branch information
chilo-ms authored Dec 4, 2024
1 parent bd701e4 commit 9b9f881
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1726,8 +1726,10 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
}

trt_version_ = getInferLibVersion();
CUDA_CALL_THROW(cudaRuntimeGetVersion(&cuda_version_));

LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT version is " << trt_version_;
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] CUDA version is " << cuda_version_;

LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT provider options: "
<< "device_id: " << device_id_
Expand Down Expand Up @@ -2466,13 +2468,13 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
// So, simply return the ComputeCapability here.
if (graph.NumberOfNodes() == 1 && GraphHasCtxNode(graph)) {
SubGraph_t supported_node_vector = {{0}, true};
std::unique_ptr<IndexedSubGraph> sub_graph = GetSubGraph(supported_node_vector, graph, TRTGenerateId(graph), 0);
std::unique_ptr<IndexedSubGraph> sub_graph = GetSubGraph(supported_node_vector, graph, TRTGenerateId(graph, std::to_string(trt_version_), std::to_string(cuda_version_)), 0);
result.push_back(ComputeCapability::Create(std::move(sub_graph)));
return result;
}

// Generate unique kernel name for TRT graph
HashValue model_hash = TRTGenerateId(graph);
HashValue model_hash = TRTGenerateId(graph, std::to_string(trt_version_), std::to_string(cuda_version_));

// Get supported node list from TensorRT parser
const int number_of_ort_nodes = graph.NumberOfNodes();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {

// The format is as for TENSORRT_VERSION: (MAJOR * 100 + MINOR) * 100 + PATCH
int32_t trt_version_;
int32_t cuda_version_;

// The OrtAllocator object will be get during ep compute time
// and should be kept for the lifetime of TRT EP object.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ void RemoveCachesByType(const std::string& root, std::string file_extension) {
* compiled kernels, so the name must be unique and deterministic across models and sessions.
* </remarks>
*/
HashValue TRTGenerateId(const GraphViewer& graph_viewer) {
HashValue TRTGenerateId(const GraphViewer& graph_viewer, std::string trt_version, std::string cuda_version) {
HashValue model_hash = 0;

// find the top level graph
Expand Down Expand Up @@ -583,12 +583,11 @@ HashValue TRTGenerateId(const GraphViewer& graph_viewer) {
#endif

#ifdef CUDA_VERSION
hash_str(std::to_string(CUDA_VERSION));
hash_str(cuda_version);
#endif

#if defined(NV_TENSORRT_MAJOR) && defined(NV_TENSORRT_MINOR)
std::string TRT_VERSION = std::to_string(NV_TENSORRT_MAJOR) + "." + std::to_string(NV_TENSORRT_MINOR);
hash_str(TRT_VERSION);
hash_str(trt_version);
#endif

model_hash = hash[0] | (uint64_t(hash[1]) << 32);
Expand Down
10 changes: 7 additions & 3 deletions onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,12 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
Graph& graph = model->MainGraph();
GraphViewer viewer(graph);

std::string trt_version = std::to_string(NV_TENSORRT_MAJOR) + "." + std::to_string(NV_TENSORRT_MINOR);
std::string cuda_version = std::to_string(CUDA_VERSION);
std::string ort_version = ORT_VERSION;

// get the hash for the model when loaded from file
HashValue model_hash = TRTGenerateId(viewer);
HashValue model_hash = TRTGenerateId(viewer, trt_version, cuda_version);
ASSERT_NE(model_hash, 0);

// now load the model from bytes and check the hash differs
Expand All @@ -358,7 +362,7 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
// Test loading same model from file and byte steam. Hash values should be different
Graph& graph2 = model2->MainGraph();
GraphViewer viewer2(graph2);
HashValue model_hash2 = TRTGenerateId(viewer2);
HashValue model_hash2 = TRTGenerateId(viewer2, trt_version, cuda_version);
ASSERT_NE(model_hash, model_hash2);

// Test loading same model from different path, see if hash values are same as well
Expand All @@ -367,7 +371,7 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
ASSERT_TRUE(Model::Load(model_path, model3, nullptr, DefaultLoggingManager().DefaultLogger()).IsOK());
Graph& graph3 = model3->MainGraph();
GraphViewer viewer3(graph3);
HashValue model_hash3 = TRTGenerateId(viewer3);
HashValue model_hash3 = TRTGenerateId(viewer3, trt_version, cuda_version);
ASSERT_EQ(model_hash, model_hash3) << "model 1&3 are same models and they have same hash, no matter where they are loaded";
}

Expand Down

0 comments on commit 9b9f881

Please sign in to comment.