From fdaf8bca619a40c5b4628502b7bee20ccadd715b Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 27 Sep 2023 18:42:20 -0400 Subject: [PATCH] convert librett unit tests to use default stream provided by TA runtime, not CUDA runtime --- tests/librett.cpp | 120 +++++++++++++++++++++++++++------------------- 1 file changed, 72 insertions(+), 48 deletions(-) diff --git a/tests/librett.cpp b/tests/librett.cpp index 3b76e1106e..58093d0e06 100644 --- a/tests/librett.cpp +++ b/tests/librett.cpp @@ -55,13 +55,18 @@ BOOST_AUTO_TEST_CASE(librett_gpu_mem) { iter++; } } + + auto q = TiledArray::deviceEnv::instance()->stream(0); + DeviceSafeCall(TiledArray::device::setDevice(q.device)); + int* a_device; TiledArray::device::malloc(&a_device, A * A * sizeof(int)); int* b_device; TiledArray::device::malloc(&b_device, A * A * sizeof(int)); - TiledArray::device::memcpy(a_device, a_host, A * A * sizeof(int), - TiledArray::device::MemcpyHostToDevice); + TiledArray::device::memcpyAsync(a_device, a_host, A * A * sizeof(int), + TiledArray::device::MemcpyHostToDevice, + q.stream); std::vector extent({A, A}); TiledArray::extent_to_col_major(extent); @@ -70,22 +75,23 @@ BOOST_AUTO_TEST_CASE(librett_gpu_mem) { TiledArray::permutation_to_col_major(perm); librettHandle plan; - auto stream = TiledArray::deviceEnv::instance()->stream(0); - DeviceSafeCall(TiledArray::device::setDevice(stream.device)); librettResult status; - status = librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), - stream.stream); + status = + librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), q.stream); BOOST_CHECK(status == LIBRETT_SUCCESS); status = librettExecute(plan, a_device, b_device); BOOST_CHECK(status == LIBRETT_SUCCESS); - librettDestroy(plan); - TiledArray::device::memcpy(b_host, b_device, A * A * sizeof(int), - TiledArray::device::MemcpyDeviceToHost); + TiledArray::device::memcpyAsync(b_host, b_device, A * A * sizeof(int), + TiledArray::device::MemcpyDeviceToHost, + q.stream); + TiledArray::device::streamSynchronize(q.stream); + + librettDestroy(plan); iter = 0; for (std::size_t i = 0; i < A; i++) { @@ -113,17 +119,19 @@ BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym) { } } + auto q = TiledArray::deviceEnv::instance()->stream(0); + DeviceSafeCall(TiledArray::device::setDevice(q.device)); + int* a_device; TiledArray::device::malloc(&a_device, A * B * sizeof(int)); int* b_device; TiledArray::device::malloc(&b_device, A * B * sizeof(int)); - TiledArray::device::memcpy(a_device, a_host, A * B * sizeof(int), - TiledArray::device::MemcpyHostToDevice); + TiledArray::device::memcpyAsync(a_device, a_host, A * B * sizeof(int), + TiledArray::device::MemcpyHostToDevice, + q.stream); librettHandle plan; - auto stream = TiledArray::deviceEnv::instance()->stream(0); - DeviceSafeCall(TiledArray::device::setDevice(stream.device)); librettResult status; std::vector extent({B, A}); @@ -132,18 +140,21 @@ BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym) { std::vector perm({1, 0}); TiledArray::permutation_to_col_major(perm); - status = librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), - stream.stream); + status = + librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), q.stream); BOOST_CHECK(status == LIBRETT_SUCCESS); status = librettExecute(plan, a_device, b_device); BOOST_CHECK(status == LIBRETT_SUCCESS); - librettDestroy(plan); - TiledArray::device::memcpy(b_host, b_device, A * B * sizeof(int), - TiledArray::device::MemcpyDeviceToHost); + TiledArray::device::memcpyAsync(b_host, b_device, A * B * sizeof(int), + TiledArray::device::MemcpyDeviceToHost, + q.stream); + TiledArray::device::streamSynchronize(q.stream); + + librettDestroy(plan); iter = 0; for (std::size_t i = 0; i < B; i++) { @@ -173,19 +184,21 @@ BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym_rank_three_column_major) { } } + auto q = TiledArray::deviceEnv::instance()->stream(0); + DeviceSafeCall(TiledArray::device::setDevice(q.device)); + int* a_device; TiledArray::device::malloc(&a_device, A * B * C * sizeof(int)); int* b_device; TiledArray::device::malloc(&b_device, A * B * C * sizeof(int)); - TiledArray::device::memcpy(a_device, a_host, A * B * C * sizeof(int), - TiledArray::device::MemcpyHostToDevice); + TiledArray::device::memcpyAsync(a_device, a_host, A * B * C * sizeof(int), + TiledArray::device::MemcpyHostToDevice, + q.stream); // b(j,i,k) = a(i,j,k) librettHandle plan; - auto stream = TiledArray::deviceEnv::instance()->stream(0); - DeviceSafeCall(TiledArray::device::setDevice(stream.device)); librettResult status; std::vector extent3{int(A), int(B), int(C)}; @@ -194,7 +207,7 @@ BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym_rank_three_column_major) { // std::vector perm3{0, 2, 1}; status = librettPlanMeasure(&plan, 3, extent3.data(), perm3.data(), - sizeof(int), stream.stream, a_device, b_device); + sizeof(int), q.stream, a_device, b_device); BOOST_CHECK(status == LIBRETT_SUCCESS); @@ -202,8 +215,10 @@ BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym_rank_three_column_major) { BOOST_CHECK(status == LIBRETT_SUCCESS); - TiledArray::device::memcpy(b_host, b_device, A * B * C * sizeof(int), - TiledArray::device::MemcpyDeviceToHost); + TiledArray::device::memcpyAsync(b_host, b_device, A * B * C * sizeof(int), + TiledArray::device::MemcpyDeviceToHost, + q.stream); + TiledArray::device::streamSynchronize(q.stream); status = librettDestroy(plan); @@ -239,19 +254,21 @@ BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym_rank_three_row_major) { } } + auto q = TiledArray::deviceEnv::instance()->stream(0); + DeviceSafeCall(TiledArray::device::setDevice(q.device)); + int* a_device; TiledArray::device::malloc(&a_device, A * B * C * sizeof(int)); int* b_device; TiledArray::device::malloc(&b_device, A * B * C * sizeof(int)); - TiledArray::device::memcpy(a_device, a_host, A * B * C * sizeof(int), - TiledArray::device::MemcpyHostToDevice); + TiledArray::device::memcpyAsync(a_device, a_host, A * B * C * sizeof(int), + TiledArray::device::MemcpyHostToDevice, + q.stream); // b(j,i,k) = a(i,j,k) librettHandle plan; - auto stream = TiledArray::deviceEnv::instance()->stream(0); - DeviceSafeCall(TiledArray::device::setDevice(stream.device)); librettResult status; std::vector extent({A, B, C}); @@ -261,7 +278,7 @@ BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym_rank_three_row_major) { TiledArray::permutation_to_col_major(perm); status = librettPlanMeasure(&plan, 3, extent.data(), perm.data(), sizeof(int), - stream.stream, a_device, b_device); + q.stream, a_device, b_device); BOOST_CHECK(status == LIBRETT_SUCCESS); @@ -269,8 +286,10 @@ BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym_rank_three_row_major) { BOOST_CHECK(status == LIBRETT_SUCCESS); - TiledArray::device::memcpy(b_host, b_device, A * B * C * sizeof(int), - TiledArray::device::MemcpyDeviceToHost); + TiledArray::device::memcpyAsync(b_host, b_device, A * B * C * sizeof(int), + TiledArray::device::MemcpyDeviceToHost, + q.stream); + TiledArray::device::streamSynchronize(q.stream); status = librettDestroy(plan); @@ -308,9 +327,10 @@ BOOST_AUTO_TEST_CASE(librett_unified_mem) { } } + auto q = TiledArray::deviceEnv::instance()->stream(0); + DeviceSafeCall(TiledArray::device::setDevice(q.device)); + librettHandle plan; - auto stream = TiledArray::deviceEnv::instance()->stream(0); - DeviceSafeCall(TiledArray::device::setDevice(stream.device)); librettResult status; std::vector extent({A, A}); @@ -319,8 +339,8 @@ BOOST_AUTO_TEST_CASE(librett_unified_mem) { std::vector perm({1, 0}); TiledArray::permutation_to_col_major(perm); - status = librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), - stream.stream); + status = + librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), q.stream); BOOST_CHECK(status == LIBRETT_SUCCESS); @@ -328,9 +348,9 @@ BOOST_AUTO_TEST_CASE(librett_unified_mem) { BOOST_CHECK(status == LIBRETT_SUCCESS); - librettDestroy(plan); + TiledArray::device::streamSynchronize(q.stream); - TiledArray::device::deviceSynchronize(); + librettDestroy(plan); iter = 0; for (std::size_t i = 0; i < A; i++) { @@ -359,9 +379,10 @@ BOOST_AUTO_TEST_CASE(librett_unified_mem_nonsym) { } } + auto q = TiledArray::deviceEnv::instance()->stream(0); + DeviceSafeCall(TiledArray::device::setDevice(q.device)); + librettHandle plan; - auto stream = TiledArray::deviceEnv::instance()->stream(0); - DeviceSafeCall(TiledArray::device::setDevice(stream.device)); librettResult status; std::vector extent({B, A}); @@ -370,8 +391,8 @@ BOOST_AUTO_TEST_CASE(librett_unified_mem_nonsym) { std::vector perm({1, 0}); TiledArray::permutation_to_col_major(perm); - status = librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), - stream.stream); + status = + librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), q.stream); BOOST_CHECK(status == LIBRETT_SUCCESS); @@ -379,8 +400,9 @@ BOOST_AUTO_TEST_CASE(librett_unified_mem_nonsym) { BOOST_CHECK(status == LIBRETT_SUCCESS); + TiledArray::device::streamSynchronize(q.stream); + librettDestroy(plan); - TiledArray::device::deviceSynchronize(); iter = 0; for (std::size_t i = 0; i < B; i++) { @@ -410,9 +432,10 @@ BOOST_AUTO_TEST_CASE(librett_unified_mem_rank_three) { } } + auto q = TiledArray::deviceEnv::instance()->stream(0); + DeviceSafeCall(TiledArray::device::setDevice(q.device)); + librettHandle plan; - auto stream = TiledArray::deviceEnv::instance()->stream(0); - DeviceSafeCall(TiledArray::device::setDevice(stream.device)); librettResult status; // b(k,i,j) = a(i,j,k) @@ -423,8 +446,8 @@ BOOST_AUTO_TEST_CASE(librett_unified_mem_rank_three) { std::vector perm({2, 0, 1}); TiledArray::permutation_to_col_major(perm); - status = librettPlan(&plan, 3, extent.data(), perm.data(), sizeof(int), - stream.stream); + status = + librettPlan(&plan, 3, extent.data(), perm.data(), sizeof(int), q.stream); BOOST_CHECK(status == LIBRETT_SUCCESS); @@ -432,8 +455,9 @@ BOOST_AUTO_TEST_CASE(librett_unified_mem_rank_three) { BOOST_CHECK(status == LIBRETT_SUCCESS); + TiledArray::device::streamSynchronize(q.stream); + librettDestroy(plan); - TiledArray::device::deviceSynchronize(); iter = 0; for (std::size_t i = 0; i < A; i++) {