Skip to content

Commit

Permalink
Merge branch 'AMReX-Codes:development' into development
Browse files Browse the repository at this point in the history
  • Loading branch information
ruohai0925 authored Nov 7, 2023
2 parents 6be3be5 + d364631 commit e5f8355
Show file tree
Hide file tree
Showing 17 changed files with 518 additions and 471 deletions.
9 changes: 4 additions & 5 deletions Docs/sphinx_documentation/source/GPU.rst
Original file line number Diff line number Diff line change
Expand Up @@ -489,11 +489,10 @@ GPU support.
When AMReX is compiled with ``USE_OMP_OFFLOAD=TRUE``,
``AMREX_USE_OMP_OFFLOAD`` is defined.

In addition to AMReX's preprocessor macros, CUDA provides the
``__CUDA_ARCH__`` macro which is only defined when in device code.
HIP and Sycl provide similar macros.
``AMREX_DEVICE_COMPILE`` should be used when a ``__host__ __device__``
function requires separate code for the CPU and GPU implementations.
The macros ``AMREX_IF_ON_DEVICE((code_for_device))`` and
``AMREX_IF_ON_HOST((code_for_host))`` should be used when a
``__host__ __device__`` function requires separate code for the
CPU and GPU implementations.

.. ===================================================================
Expand Down
60 changes: 27 additions & 33 deletions Src/Base/AMReX.H
Original file line number Diff line number Diff line change
Expand Up @@ -113,16 +113,15 @@ namespace amrex

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void Error (const char* msg = nullptr) {
#if AMREX_DEVICE_COMPILE
#if defined(NDEBUG)
amrex::ignore_unused(msg);
AMREX_IF_ON_DEVICE((amrex::ignore_unused(msg);))
#else
if (msg) { AMREX_DEVICE_PRINTF("Error %s\n", msg); }
AMREX_DEVICE_ASSERT(0);
#endif
#else
Error_host("Error", msg);
AMREX_IF_ON_DEVICE((
if (msg) { AMREX_DEVICE_PRINTF("Error %s\n", msg); }
AMREX_DEVICE_ASSERT(0);
))
#endif
AMREX_IF_ON_HOST((Error_host("Error", msg);))
}

//! Print out warning message to cerr.
Expand All @@ -132,32 +131,28 @@ namespace amrex

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void Warning (const char * msg) {
#if AMREX_DEVICE_COMPILE
#if defined(NDEBUG)
amrex::ignore_unused(msg);
#else
if (msg) { AMREX_DEVICE_PRINTF("Warning %s\n", msg); }
#endif
AMREX_IF_ON_DEVICE((amrex::ignore_unused(msg);))
#else
Warning_host(msg);
AMREX_IF_ON_DEVICE((if (msg) { AMREX_DEVICE_PRINTF("Warning %s\n", msg); }))
#endif
AMREX_IF_ON_HOST((Warning_host(msg);))
}

//! Print out message to cerr and exit via abort().
void Abort (const std::string& msg);

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void Abort (const char * msg = nullptr) {
#if AMREX_DEVICE_COMPILE
#if defined(NDEBUG)
amrex::ignore_unused(msg);
AMREX_IF_ON_DEVICE((amrex::ignore_unused(msg);))
#else
if (msg) { AMREX_DEVICE_PRINTF("Abort %s\n", msg); }
AMREX_DEVICE_ASSERT(0);
#endif
#else
Error_host("Abort", msg);
AMREX_IF_ON_DEVICE((
if (msg) { AMREX_DEVICE_PRINTF("Abort %s\n", msg); }
AMREX_DEVICE_ASSERT(0);
))
#endif
AMREX_IF_ON_HOST((Error_host("Abort", msg);))
}

/**
Expand All @@ -170,22 +165,21 @@ namespace amrex

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void Assert (const char* EX, const char* file, int line, const char* msg = nullptr) {
#if AMREX_DEVICE_COMPILE
#if defined(NDEBUG)
amrex::ignore_unused(EX,file,line,msg);
#else
if (msg) {
AMREX_DEVICE_PRINTF("Assertion `%s' failed, file \"%s\", line %d, Msg: %s",
EX, file, line, msg);
} else {
AMREX_DEVICE_PRINTF("Assertion `%s' failed, file \"%s\", line %d",
EX, file, line);
}
AMREX_DEVICE_ASSERT(0);
#endif
AMREX_IF_ON_DEVICE((amrex::ignore_unused(EX,file,line,msg);))
#else
Assert_host(EX,file,line,msg);
AMREX_IF_ON_DEVICE((
if (msg) {
AMREX_DEVICE_PRINTF("Assertion `%s' failed, file \"%s\", line %d, Msg: %s",
EX, file, line, msg);
} else {
AMREX_DEVICE_PRINTF("Assertion `%s' failed, file \"%s\", line %d",
EX, file, line);
}
AMREX_DEVICE_ASSERT(0);
))
#endif
AMREX_IF_ON_HOST((Assert_host(EX,file,line,msg);))
}

/**
Expand Down
178 changes: 98 additions & 80 deletions Src/Base/AMReX_Algorithm.H
Original file line number Diff line number Diff line change
Expand Up @@ -161,51 +161,52 @@ namespace amrex
AMREX_GPU_HOST_DEVICE
ItType upper_bound (ItType first, ItType last, const ValType& val)
{
#if AMREX_DEVICE_COMPILE
std::ptrdiff_t count = last-first;
while(count>0){
auto it = first;
const auto step = count/2;
it += step;
if (!(val < *it)){
first = ++it;
count -= step + 1;
AMREX_IF_ON_DEVICE((
std::ptrdiff_t count = last-first;
while(count>0){
auto it = first;
const auto step = count/2;
it += step;
if (!(val < *it)){
first = ++it;
count -= step + 1;
}
else{
count = step;
}
}
else{
count = step;
}
}

return first;
#else
return std::upper_bound(first, last, val);
#endif
return first;
))
AMREX_IF_ON_HOST((
return std::upper_bound(first, last, val);
))
}

template<typename ItType, typename ValType>
AMREX_GPU_HOST_DEVICE
ItType lower_bound (ItType first, ItType last, const ValType& val)
{
#ifdef AMREX_DEVICE_COMPILE
std::ptrdiff_t count = last-first;
while(count>0)
{
auto it = first;
const auto step = count/2;
it += step;
if (*it < val){
first = ++it;
count -= step + 1;
}
else{
count = step;
AMREX_IF_ON_DEVICE((
std::ptrdiff_t count = last-first;
while(count>0)
{
auto it = first;
const auto step = count/2;
it += step;
if (*it < val){
first = ++it;
count -= step + 1;
}
else{
count = step;
}
}
}

return first;
#else
return std::lower_bound(first, last, val);
#endif
return first;
))
AMREX_IF_ON_HOST((
return std::lower_bound(first, last, val);
))
}

namespace detail {
Expand Down Expand Up @@ -239,83 +240,100 @@ int builtin_clz_wrapper (clzll_tag, T x) noexcept
return static_cast<int>(__builtin_clzll(x) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
}

#ifdef AMREX_USE_CUDA

// likewise with CUDA, there are __clz functions that take (signed) int and long long int
template <typename T, typename = typename std::enable_if<sizeof(T) <= sizeof(int)>::type>
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
int clz_wrapper (clz_tag, T x) noexcept
{
return __clz((int) x) - (sizeof(int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
}

template <typename T, typename = typename std::enable_if<sizeof(T) <= sizeof(long long int)>::type>
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
int clz_wrapper (clzll_tag, T x) noexcept
{
return __clzll((long long int) x) - (sizeof(long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
}
#endif

}
template <class T, typename std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
std::is_same_v<std::decay_t<T>,std::uint16_t> ||
std::is_same_v<std::decay_t<T>,std::uint32_t> ||
std::is_same_v<std::decay_t<T>,std::uint64_t>, int> = 0>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
int clz (T x) noexcept;

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
int clz (std::uint8_t x) noexcept
int clz_generic (std::uint8_t x) noexcept
{
#if (AMREX_DEVICE_COMPILE && defined(AMREX_USE_CUDA)) // all supported cuda versions have __clz
return detail::clz_wrapper(detail::clz_tag{}, x);
#elif (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
#else
static constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
auto upper = x >> 4;
auto lower = x & 0xF;
return upper ? clz_lookup[upper] : 4 + clz_lookup[lower];
#endif
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
int clz (std::uint16_t x) noexcept
int clz_generic (std::uint16_t x) noexcept
{
#if (AMREX_DEVICE_COMPILE && defined(AMREX_USE_CUDA)) // all supported cuda versions have __clz
return detail::clz_wrapper(detail::clz_tag{}, x);
#elif (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
#else
auto upper = std::uint8_t(x >> 8);
auto lower = std::uint8_t(x & 0xFF);
return upper ? clz(upper) : 8 + clz(lower);
#endif
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
int clz (std::uint32_t x) noexcept
int clz_generic (std::uint32_t x) noexcept
{
#if (AMREX_DEVICE_COMPILE && defined(AMREX_USE_CUDA)) // all supported cuda versions have __clz
return detail::clz_wrapper(detail::clz_tag{}, x);
#elif (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
#else
auto upper = std::uint16_t(x >> 16);
auto lower = std::uint16_t(x & 0xFFFF);
return upper ? clz(upper) : 16 + clz(lower);
#endif
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
int clz (std::uint64_t x) noexcept
int clz_generic (std::uint64_t x) noexcept
{
#if (AMREX_DEVICE_COMPILE && defined(AMREX_USE_CUDA)) // all supported cuda versions have __clz
return detail::clz_wrapper(detail::clz_tag{}, x);
#elif (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
#else
auto upper = std::uint32_t(x >> 32);
auto lower = std::uint32_t(x & 0xFFFFFFFF);
return upper ? clz(upper) : 32 + clz(lower);
}

#if defined AMREX_USE_CUDA

namespace detail {
// likewise with CUDA, there are __clz functions that take (signed) int and long long int
template <typename T, typename = typename std::enable_if<sizeof(T) <= sizeof(int)>::type>
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
int clz_wrapper (clz_tag, T x) noexcept
{
return __clz((int) x) - (sizeof(int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
}

template <typename T, typename = typename std::enable_if<sizeof(T) <= sizeof(long long int)>::type>
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
int clz_wrapper (clzll_tag, T x) noexcept
{
return __clzll((long long int) x) - (sizeof(long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
}
}

template <class T, typename std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
std::is_same_v<std::decay_t<T>,std::uint16_t> ||
std::is_same_v<std::decay_t<T>,std::uint32_t> ||
std::is_same_v<std::decay_t<T>,std::uint64_t>, int> >
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
int clz (T x) noexcept
{
AMREX_IF_ON_DEVICE((return detail::clz_wrapper(detail::clz_tag{}, x);))
#if AMREX_HAS_BUILTIN_CLZ
AMREX_IF_ON_HOST((return detail::builtin_clz_wrapper(detail::clz_tag{}, x);))
#else
AMREX_IF_ON_HOST((return clz_generic(x);))
#endif
}

#else // !defined AMREX_USE_CUDA

template <class T, typename std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
std::is_same_v<std::decay_t<T>,std::uint16_t> ||
std::is_same_v<std::decay_t<T>,std::uint32_t> ||
std::is_same_v<std::decay_t<T>,std::uint64_t>, int> >
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
int clz (T x) noexcept
{
#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
#else
return clz_generic(x);
#endif
}

#endif // defined AMREX_USE_CUDA

}

#endif
Loading

0 comments on commit e5f8355

Please sign in to comment.