From 66c1e69d974acd90f896c3f9fee727c397902bc0 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 12 Jul 2023 21:24:55 -0700 Subject: [PATCH] Make ReduceData::value safer Although ReduceData::value is meant to be called only once, the user might call it multiple times. This will result in the wrong result for sum reduction because of how the value function is implemented. In this PR, we fix this defect by allowing the function be called multiple times. --- Src/Base/AMReX_Reduce.H | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/Src/Base/AMReX_Reduce.H b/Src/Base/AMReX_Reduce.H index b04879f70ed..34f1c6e74bb 100644 --- a/Src/Base/AMReX_Reduce.H +++ b/Src/Base/AMReX_Reduce.H @@ -666,9 +666,14 @@ public: template typename D::Type value (D & reduce_data) { + auto hp = reduce_data.hostPtr(); + + if (m_result_is_ready) { + return *hp; + } + using ReduceTuple = typename D::Type; auto const& stream = Gpu::gpuStream(); - auto hp = reduce_data.hostPtr(); auto dp = reduce_data.devicePtr(); auto const& nblocks = reduce_data.nBlocks(); #if defined(AMREX_USE_SYCL) @@ -676,7 +681,6 @@ public: const int N = nblocks[0]; if (N == 0) { Reduce::detail::for_each_init<0, ReduceTuple, Ps...>(*hp); - return *hp; } else { Gpu::PinnedVector tmp(N); Gpu::dtoh_memcpy_async(tmp.data(), dp, sizeof(ReduceTuple)*N); @@ -684,7 +688,7 @@ public: for (int i = 1; i < N; ++i) { Reduce::detail::for_each_local<0, ReduceTuple, Ps...>(tmp[0], tmp[i]); } - return tmp[0]; + *hp = tmp[0]; } } else #endif @@ -738,9 +742,14 @@ public: }); #endif Gpu::streamSynchronize(); - return *hp; } + + m_result_is_ready = true; + return *hp; } + +private: + bool m_result_is_ready = false; }; namespace Reduce { @@ -1135,15 +1144,20 @@ public: template typename D::Type value (D & reduce_data) { - using ReduceTuple = typename D::Type; auto& rrv = reduce_data.reference(); - if (rrv.size() > 1) { - for (int i = 1, N = rrv.size(); i < N; ++i) { - Reduce::detail::for_each_local<0, ReduceTuple, Ps...>(rrv[0], rrv[i]); + if (! m_result_is_ready) { + using ReduceTuple = typename D::Type; + if (rrv.size() > 1) { + for (int i = 1, N = rrv.size(); i < N; ++i) { + Reduce::detail::for_each_local<0, ReduceTuple, Ps...>(rrv[0], rrv[i]); + } } + m_result_is_ready = true; } return rrv[0]; } + + bool m_result_is_ready = false; }; namespace Reduce {