From 5471bc75e5733a0db664d2742e556362b08c43e9 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sun, 15 Oct 2023 12:06:20 -0400 Subject: [PATCH] fix __CUDA_ARCH__ ifdefs --- Src/Base/AMReX_GpuAtomic.H | 65 +++++++++++++++++++++++++++---------- Src/Base/AMReX_GpuRange.H | 7 +++- Src/Base/AMReX_GpuUtility.H | 10 +++--- Src/Base/AMReX_Random.H | 39 ++++++++++++++++++---- 4 files changed, 92 insertions(+), 29 deletions(-) diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H index b0e0f0a9a51..5fe2315e94e 100644 --- a/Src/Base/AMReX_GpuAtomic.H +++ b/Src/Base/AMReX_GpuAtomic.H @@ -141,8 +141,11 @@ namespace detail { sycl::atomic_ref a{*sum}; return a.fetch_add(value); #else - amrex::ignore_unused(sum, value); - return T(); // should never get here, but have to return something + AMREX_IF_ON_DEVICE(( return atomicAdd(sum, value); )) + AMREX_IF_ON_HOST(( + amrex::ignore_unused(sum, value); + return T(); // should never get here, but have to return something + )) #endif } @@ -318,8 +321,11 @@ namespace detail { sycl::atomic_ref a{*m}; return a.fetch_min(value); #else - amrex::ignore_unused(m,value); - return T(); // should never get here, but have to return something + AMREX_IF_ON_DEVICE(( return atomicMin(m, value); )) + AMREX_IF_ON_HOST(( + amrex::ignore_unused(m,value); + return T(); // should never get here, but have to return something + )) #endif } @@ -381,8 +387,11 @@ namespace detail { sycl::atomic_ref a{*m}; return a.fetch_max(value); #else - amrex::ignore_unused(m,value); - return T(); // should never get here, but have to return something + AMREX_IF_ON_DEVICE(( return atomixMax(m, value); )) + AMREX_IF_ON_HOST(( + amrex::ignore_unused(m,value); + return T(); // should never get here, but have to return something + )) #endif } @@ -441,9 +450,14 @@ namespace detail { sycl::atomic_ref a{*m}; return a.fetch_or(value); #else - int const old = *m; - *m = (*m) || value; - return old; + AMREX_IF_ON_DEVICE(( + return atomicOr(m, value); + )) + AMREX_IF_ON_HOST(( + int const old = *m; + *m = (*m) || value; + return old; + )) #endif } @@ -464,9 +478,14 @@ namespace detail { sycl::atomic_ref a{*m}; return a.fetch_and(value ? ~0x0 : 0); #else - int const old = *m; - *m = (*m) && value; - return old; + AMREX_IF_ON_DEVICE(( + return atomicAnd(m, value ? ~0x0 : 0); + )) + AMREX_IF_ON_HOST(( + int const old = *m; + *m = (*m) && value; + return old; + )) #endif } @@ -488,9 +507,14 @@ namespace detail { sycl::atomic_ref a{*address}; return a.exchange(val); #else - auto const old = *address; - *address = val; - return old; + AMREX_IF_ON_DEVICE(( + return atomicExch(address, val); + )) + AMREX_IF_ON_HOST(( + auto const old = *address; + *address = val; + return old; + )) #endif } @@ -513,9 +537,14 @@ namespace detail { a.compare_exchange_strong(compare, val); return compare; #else - auto const old = *address; - *address = (old == compare ? val : old); - return old; + AMREX_IF_ON_DEVICE(( + return atomicCAS(address, compare, val); + )) + AMREX_IF_ON_HOST(( + auto const old = *address; + *address = (old == compare ? val : old); + return old; + )) #endif } } diff --git a/Src/Base/AMReX_GpuRange.H b/Src/Base/AMReX_GpuRange.H index a9e1e80ef09..409074fd304 100644 --- a/Src/Base/AMReX_GpuRange.H +++ b/Src/Base/AMReX_GpuRange.H @@ -98,7 +98,12 @@ struct range_impl #elif defined (__SYCL_DEVICE_ONLY__) return iterator(m_b, m_gid, m_grange); #else - return iterator(m_b,0,1); + AMREX_IF_ON_DEVICE(( + return iterator(m_b, blockDim.x*blockIdx.x+threadIdx.x, blockDim.x*gridDim.x); + )) + AMREX_IF_ON_HOST(( + return iterator(m_b,0,1); + )) #endif } diff --git a/Src/Base/AMReX_GpuUtility.H b/Src/Base/AMReX_GpuUtility.H index a1fa3cdd9dc..f78aab91249 100644 --- a/Src/Base/AMReX_GpuUtility.H +++ b/Src/Base/AMReX_GpuUtility.H @@ -26,8 +26,9 @@ namespace Gpu { template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T LDG (Array4 const& a, int i, int j, int k) noexcept { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) - return __ldg(a.ptr(i,j,k)); +#if defined(AMREX_USE_CUDA) + AMREX_IF_ON_DEVICE((return __ldg(a.ptr(i,j,k));)) + AMREX_IF_ON_HOST((return a(i,j,k);)) #else return a(i,j,k); #endif @@ -36,8 +37,9 @@ namespace Gpu { template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T LDG (Array4 const& a, int i, int j, int k, int n) noexcept { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) - return __ldg(a.ptr(i,j,k,n)); +#if defined(AMREX_USE_CUDA) + AMREX_IF_ON_DEVICE((return __ldg(a.ptr(i,j,k,n));)) + AMREX_IF_ON_HOST((return a(i,j,k,n);)) #else return a(i,j,k,n); #endif diff --git a/Src/Base/AMReX_Random.H b/Src/Base/AMReX_Random.H index 77b4f8193e4..f69b8d30b0f 100644 --- a/Src/Base/AMReX_Random.H +++ b/Src/Base/AMReX_Random.H @@ -39,8 +39,19 @@ namespace amrex mkl::rng::device::uniform distr; return mkl::rng::device::generate(distr, *random_engine.engine); #else - amrex::ignore_unused(random_engine); - return Random(); +#ifdef BL_USE_FLOAT + AMREX_IF_ON_DEVICE(( + return 1.0f - curand_uniform(random_engine.rand_state); + )) +#else + AMREX_IF_ON_DEVICE(( + return 1.0 - curand_uniform_double(random_engine.rand_state); + )) +#endif + AMREX_IF_ON_HOST(( + amrex::ignore_unused(random_engine); + return Random(); + )) #endif } @@ -72,8 +83,19 @@ namespace amrex mkl::rng::device::gaussian distr(mean, stddev); return mkl::rng::device::generate(distr, *random_engine.engine); #else - amrex::ignore_unused(random_engine); - return RandomNormal(mean, stddev); +#ifdef BL_USE_FLOAT + AMREX_IF_ON_DEVICE(( + return stddev * curand_normal(random_engine.rand_state) + mean; + )) +#else + AMREX_IF_ON_DEVICE(( + return stddev * curand_normal_double(random_engine.rand_state) + mean; + )) +#endif + AMREX_IF_ON_HOST(( + amrex::ignore_unused(random_engine); + return RandomNormal(mean, stddev); + )) #endif } @@ -99,8 +121,13 @@ namespace amrex mkl::rng::device::poisson distr(lambda); return mkl::rng::device::generate(distr, *random_engine.engine); #else - amrex::ignore_unused(random_engine); - return RandomPoisson(lambda); + AMREX_IF_ON_DEVICE(( + return curand_poisson(random_engine.rand_state, lambda); + )) + AMREX_IF_ON_HOST(( + amrex::ignore_unused(random_engine); + return RandomPoisson(lambda); + )) #endif }