Skip to content

Commit

Permalink
fix __CUDA_ARCH__ ifdefs
Browse files Browse the repository at this point in the history
  • Loading branch information
BenWibking committed Oct 15, 2023
1 parent 615bffc commit 5471bc7
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 29 deletions.
65 changes: 47 additions & 18 deletions Src/Base/AMReX_GpuAtomic.H
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,11 @@ namespace detail {
sycl::atomic_ref<T,mo,ms,AS> a{*sum};
return a.fetch_add(value);
#else
amrex::ignore_unused(sum, value);
return T(); // should never get here, but have to return something
AMREX_IF_ON_DEVICE(( return atomicAdd(sum, value); ))
AMREX_IF_ON_HOST((
amrex::ignore_unused(sum, value);
return T(); // should never get here, but have to return something
))
#endif
}

Expand Down Expand Up @@ -318,8 +321,11 @@ namespace detail {
sycl::atomic_ref<T,mo,ms,as> a{*m};
return a.fetch_min(value);
#else
amrex::ignore_unused(m,value);
return T(); // should never get here, but have to return something
AMREX_IF_ON_DEVICE(( return atomicMin(m, value); ))
AMREX_IF_ON_HOST((
amrex::ignore_unused(m,value);
return T(); // should never get here, but have to return something
))
#endif
}

Expand Down Expand Up @@ -381,8 +387,11 @@ namespace detail {
sycl::atomic_ref<T,mo,ms,as> a{*m};
return a.fetch_max(value);
#else
amrex::ignore_unused(m,value);
return T(); // should never get here, but have to return something
AMREX_IF_ON_DEVICE(( return atomixMax(m, value); ))
AMREX_IF_ON_HOST((
amrex::ignore_unused(m,value);
return T(); // should never get here, but have to return something
))
#endif
}

Expand Down Expand Up @@ -441,9 +450,14 @@ namespace detail {
sycl::atomic_ref<int,mo,ms,as> a{*m};
return a.fetch_or(value);
#else
int const old = *m;
*m = (*m) || value;
return old;
AMREX_IF_ON_DEVICE((
return atomicOr(m, value);
))
AMREX_IF_ON_HOST((
int const old = *m;
*m = (*m) || value;
return old;
))
#endif
}

Expand All @@ -464,9 +478,14 @@ namespace detail {
sycl::atomic_ref<int,mo,ms,as> a{*m};
return a.fetch_and(value ? ~0x0 : 0);
#else
int const old = *m;
*m = (*m) && value;
return old;
AMREX_IF_ON_DEVICE((
return atomicAnd(m, value ? ~0x0 : 0);
))
AMREX_IF_ON_HOST((
int const old = *m;
*m = (*m) && value;
return old;
))
#endif
}

Expand All @@ -488,9 +507,14 @@ namespace detail {
sycl::atomic_ref<T,mo,ms,as> a{*address};
return a.exchange(val);
#else
auto const old = *address;
*address = val;
return old;
AMREX_IF_ON_DEVICE((
return atomicExch(address, val);
))
AMREX_IF_ON_HOST((
auto const old = *address;
*address = val;
return old;
))
#endif
}

Expand All @@ -513,9 +537,14 @@ namespace detail {
a.compare_exchange_strong(compare, val);
return compare;
#else
auto const old = *address;
*address = (old == compare ? val : old);
return old;
AMREX_IF_ON_DEVICE((
return atomicCAS(address, compare, val);
))
AMREX_IF_ON_HOST((
auto const old = *address;
*address = (old == compare ? val : old);
return old;
))
#endif
}
}
Expand Down
7 changes: 6 additions & 1 deletion Src/Base/AMReX_GpuRange.H
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,12 @@ struct range_impl
#elif defined (__SYCL_DEVICE_ONLY__)
return iterator(m_b, m_gid, m_grange);
#else
return iterator(m_b,0,1);
AMREX_IF_ON_DEVICE((
return iterator(m_b, blockDim.x*blockIdx.x+threadIdx.x, blockDim.x*gridDim.x);
))
AMREX_IF_ON_HOST((
return iterator(m_b,0,1);
))
#endif
}

Expand Down
10 changes: 6 additions & 4 deletions Src/Base/AMReX_GpuUtility.H
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ namespace Gpu {
template <typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T LDG (Array4<T> const& a, int i, int j, int k) noexcept {
#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA)
return __ldg(a.ptr(i,j,k));
#if defined(AMREX_USE_CUDA)
AMREX_IF_ON_DEVICE((return __ldg(a.ptr(i,j,k));))
AMREX_IF_ON_HOST((return a(i,j,k);))
#else
return a(i,j,k);
#endif
Expand All @@ -36,8 +37,9 @@ namespace Gpu {
template <typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T LDG (Array4<T> const& a, int i, int j, int k, int n) noexcept {
#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA)
return __ldg(a.ptr(i,j,k,n));
#if defined(AMREX_USE_CUDA)
AMREX_IF_ON_DEVICE((return __ldg(a.ptr(i,j,k,n));))
AMREX_IF_ON_HOST((return a(i,j,k,n);))
#else
return a(i,j,k,n);
#endif
Expand Down
39 changes: 33 additions & 6 deletions Src/Base/AMReX_Random.H
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,19 @@ namespace amrex
mkl::rng::device::uniform<Real> distr;
return mkl::rng::device::generate(distr, *random_engine.engine);
#else
amrex::ignore_unused(random_engine);
return Random();
#ifdef BL_USE_FLOAT
AMREX_IF_ON_DEVICE((
return 1.0f - curand_uniform(random_engine.rand_state);
))
#else
AMREX_IF_ON_DEVICE((
return 1.0 - curand_uniform_double(random_engine.rand_state);
))
#endif
AMREX_IF_ON_HOST((
amrex::ignore_unused(random_engine);
return Random();
))
#endif
}

Expand Down Expand Up @@ -72,8 +83,19 @@ namespace amrex
mkl::rng::device::gaussian<Real> distr(mean, stddev);
return mkl::rng::device::generate(distr, *random_engine.engine);
#else
amrex::ignore_unused(random_engine);
return RandomNormal(mean, stddev);
#ifdef BL_USE_FLOAT
AMREX_IF_ON_DEVICE((
return stddev * curand_normal(random_engine.rand_state) + mean;
))
#else
AMREX_IF_ON_DEVICE((
return stddev * curand_normal_double(random_engine.rand_state) + mean;
))
#endif
AMREX_IF_ON_HOST((
amrex::ignore_unused(random_engine);
return RandomNormal(mean, stddev);
))
#endif
}

Expand All @@ -99,8 +121,13 @@ namespace amrex
mkl::rng::device::poisson<unsigned int> distr(lambda);
return mkl::rng::device::generate(distr, *random_engine.engine);
#else
amrex::ignore_unused(random_engine);
return RandomPoisson(lambda);
AMREX_IF_ON_DEVICE((
return curand_poisson(random_engine.rand_state, lambda);
))
AMREX_IF_ON_HOST((
amrex::ignore_unused(random_engine);
return RandomPoisson(lambda);
))
#endif
}

Expand Down

0 comments on commit 5471bc7

Please sign in to comment.