From a0775d74a15361d392f6c4dc540f6d56a45fab00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20M=C3=BCller?= <44298237+gedoensmax@users.noreply.github.com> Date: Sat, 27 Apr 2024 05:25:24 +0200 Subject: [PATCH] Fix: Shared lib tests fail during build for CUDA,TRT,DML (#20453) The order of defines for these test have to be in the same order. If we check for TRT -> CUDA ->DML wen cannot reverse that order in later defines as we might want to build for multiple EPs. +@PatriceVignola --- onnxruntime/test/shared_lib/test_inference.cc | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index 2ccd3c69ab818..051a93ac8458f 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -2234,10 +2234,10 @@ TEST(CApiTest, basic_cuda_graph) { #define cudaMemcpyHostToDevice hipMemcpyHostToDevice #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost Ort::MemoryInfo info_mem("Hip", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault); +#elif defined(USE_CUDA) || defined(USE_TENSORRT) + Ort::MemoryInfo info_mem("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault); #elif defined(USE_DML) Ort::MemoryInfo info_mem("DML", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemTypeDefault); -#else - Ort::MemoryInfo info_mem("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault); #endif Ort::Allocator allocator(session, info_mem); @@ -2250,12 +2250,12 @@ TEST(CApiTest, basic_cuda_graph) { ASSERT_NE(input_data.get(), nullptr); -#ifdef USE_DML +#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM) + (void)cudaMemcpy(input_data.get(), x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice); +#elif defined(USE_DML) ComPtr input_resource; Ort::ThrowOnError(ort_dml_api->GetD3D12ResourceFromAllocation(allocator, input_data.get(), &input_resource)); UploadDataToDml(dml_objects, input_resource.Get(), gsl::make_span(reinterpret_cast(x_values.data()), sizeof(float) * x_values.size())); -#else - (void)cudaMemcpy(input_data.get(), x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice); #endif // Create an OrtValue tensor backed by data on CUDA memory @@ -2283,13 +2283,13 @@ TEST(CApiTest, basic_cuda_graph) { // Check the values against the bound raw memory (needs copying from device to host first) std::array y_values; -#ifdef USE_DML +#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM) + (void)cudaMemcpy(y_values.data(), output_data.get(), sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost); +#elif defined(USE_DML) ComPtr output_resource; Ort::ThrowOnError(ort_dml_api->GetD3D12ResourceFromAllocation(allocator, output_data.get(), &output_resource)); auto output_cpu_bytes = reinterpret_cast(y_values.data()); DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * y_values.size())); -#else - (void)cudaMemcpy(y_values.data(), output_data.get(), sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost); #endif ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y)); @@ -2297,10 +2297,10 @@ TEST(CApiTest, basic_cuda_graph) { // Replay the captured CUDA graph session.Run(Ort::RunOptions(), binding); -#ifdef USE_DML - DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * y_values.size())); -#else +#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM) (void)cudaMemcpy(y_values.data(), output_data.get(), sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost); +#elif defined(USE_DML) + DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * y_values.size())); #endif ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y)); @@ -2308,20 +2308,20 @@ TEST(CApiTest, basic_cuda_graph) { // Change the input and replay the CUDA graph again. x_values = {10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f}; -#ifdef USE_DML - UploadDataToDml(dml_objects, input_resource.Get(), gsl::make_span(reinterpret_cast(x_values.data()), sizeof(float) * x_values.size())); -#else +#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM) (void)cudaMemcpy(input_data.get(), x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice); +#elif defined(USE_DML) + UploadDataToDml(dml_objects, input_resource.Get(), gsl::make_span(reinterpret_cast(x_values.data()), sizeof(float) * x_values.size())); #endif binding.SynchronizeInputs(); session.Run(Ort::RunOptions(), binding); -#ifdef USE_DML - DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * y_values.size())); -#else +#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM) (void)cudaMemcpy(y_values.data(), output_data.get(), sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost); +#elif defined(USE_DML) + DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * y_values.size())); #endif expected_y = {10.0f, 40.0f, 90.0f, 160.0f, 250.0f, 360.0f};