Skip to content

Commit

Permalink
Fix: Shared lib tests fail during build for CUDA,TRT,DML (#20453)
Browse files Browse the repository at this point in the history
The order of defines for these test have to be in the same order. If we
check for TRT -> CUDA ->DML wen cannot reverse that order in later
defines as we might want to build for multiple EPs.

+@PatriceVignola
  • Loading branch information
gedoensmax authored Apr 27, 2024
1 parent 2f5fe45 commit a0775d7
Showing 1 changed file with 17 additions and 17 deletions.
34 changes: 17 additions & 17 deletions onnxruntime/test/shared_lib/test_inference.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2234,10 +2234,10 @@ TEST(CApiTest, basic_cuda_graph) {
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
Ort::MemoryInfo info_mem("Hip", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
#elif defined(USE_CUDA) || defined(USE_TENSORRT)
Ort::MemoryInfo info_mem("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
#elif defined(USE_DML)
Ort::MemoryInfo info_mem("DML", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemTypeDefault);
#else
Ort::MemoryInfo info_mem("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
#endif

Ort::Allocator allocator(session, info_mem);
Expand All @@ -2250,12 +2250,12 @@ TEST(CApiTest, basic_cuda_graph) {

ASSERT_NE(input_data.get(), nullptr);

#ifdef USE_DML
#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM)
(void)cudaMemcpy(input_data.get(), x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
#elif defined(USE_DML)
ComPtr<ID3D12Resource> input_resource;
Ort::ThrowOnError(ort_dml_api->GetD3D12ResourceFromAllocation(allocator, input_data.get(), &input_resource));
UploadDataToDml(dml_objects, input_resource.Get(), gsl::make_span(reinterpret_cast<const std::byte*>(x_values.data()), sizeof(float) * x_values.size()));
#else
(void)cudaMemcpy(input_data.get(), x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
#endif

// Create an OrtValue tensor backed by data on CUDA memory
Expand Down Expand Up @@ -2283,45 +2283,45 @@ TEST(CApiTest, basic_cuda_graph) {
// Check the values against the bound raw memory (needs copying from device to host first)
std::array<float, 3 * 2> y_values;

#ifdef USE_DML
#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM)
(void)cudaMemcpy(y_values.data(), output_data.get(), sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
#elif defined(USE_DML)
ComPtr<ID3D12Resource> output_resource;
Ort::ThrowOnError(ort_dml_api->GetD3D12ResourceFromAllocation(allocator, output_data.get(), &output_resource));
auto output_cpu_bytes = reinterpret_cast<std::byte*>(y_values.data());
DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * y_values.size()));
#else
(void)cudaMemcpy(y_values.data(), output_data.get(), sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
#endif

ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y));

// Replay the captured CUDA graph
session.Run(Ort::RunOptions(), binding);

#ifdef USE_DML
DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * y_values.size()));
#else
#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM)
(void)cudaMemcpy(y_values.data(), output_data.get(), sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
#elif defined(USE_DML)
DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * y_values.size()));
#endif

ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y));

// Change the input and replay the CUDA graph again.
x_values = {10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f};

#ifdef USE_DML
UploadDataToDml(dml_objects, input_resource.Get(), gsl::make_span(reinterpret_cast<const std::byte*>(x_values.data()), sizeof(float) * x_values.size()));
#else
#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM)
(void)cudaMemcpy(input_data.get(), x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
#elif defined(USE_DML)
UploadDataToDml(dml_objects, input_resource.Get(), gsl::make_span(reinterpret_cast<const std::byte*>(x_values.data()), sizeof(float) * x_values.size()));
#endif

binding.SynchronizeInputs();

session.Run(Ort::RunOptions(), binding);

#ifdef USE_DML
DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * y_values.size()));
#else
#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM)
(void)cudaMemcpy(y_values.data(), output_data.get(), sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
#elif defined(USE_DML)
DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * y_values.size()));
#endif

expected_y = {10.0f, 40.0f, 90.0f, 160.0f, 250.0f, 360.0f};
Expand Down

0 comments on commit a0775d7

Please sign in to comment.