Skip to content

Commit

Permalink
PODVector Updates
Browse files Browse the repository at this point in the history
Remove deprecated and unused PolymorphicAllocator. It has been replaced by
PolymorphicArenaAllocator.

Restrict PODVector's Allocator to std::allocator and AMReX's various Arena
based allocators. This simplifies the implementation of PODVector, because
std::allocator is stateless and Arena based allocators are simple even when
it's polymorphic.

Fix a few issues of PODVectors with a PolymorphicArenaAllocator. For
example, copy assignment operator should copy the Allocator. Copy
constructor should consider the possibility that other PODVector has a
different type of Arena.

Add placeholders for potentially growing and shrinking memory allocation
in-place that will be implemented in a follow-up PR.

Update PODVector's growth strategy. Hopefully this helps to reduce the
memory consumption.

  * Always try to grow in-place.

  * For assign, operator=, resize & reserve, allocate the specified size
    without capacity.

  * For push_back & emplace_back, grow the capacity by a factor that is 1.5
    by default.

  * For insert, the capacity grows either by a factor that is 1.5 by default
    or to the new size, whichever is greater.
  • Loading branch information
WeiqunZhang committed Jul 17, 2023
1 parent 01b750d commit b73a396
Show file tree
Hide file tree
Showing 7 changed files with 581 additions and 619 deletions.
2 changes: 1 addition & 1 deletion Src/AmrCore/AMReX_TagBox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ TagBoxArray::local_collate_gpu (Gpu::PinnedVector<IntVect>& v) const
Gpu::dtoh_memcpy(hv_ntags.data(), dv_ntags.data(), ntotblocks*sizeof(int));

Gpu::PinnedVector<int> hv_tags_offset(ntotblocks+1);
hv_tags_offset[0] = 0;
if (! hv_tags_offset.empty()) { hv_tags_offset[0] = 0; }
std::partial_sum(hv_ntags.begin(), hv_ntags.end(), hv_tags_offset.begin()+1);
int ntotaltags = hv_tags_offset.back();

Expand Down
21 changes: 21 additions & 0 deletions Src/Base/AMReX_Arena.H
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <cstddef>
#include <cstdlib>
#include <limits>
#include <utility>

namespace amrex {

Expand Down Expand Up @@ -100,6 +101,26 @@ public:
* \return a pointer to the allocated memory
*/
[[nodiscard]] virtual void* alloc (std::size_t sz) = 0;

/**
* Try to allocate in-place by extending the capacity of given pointer.
*/
[[nodiscard]] virtual std::pair<void*,std::size_t>
alloc_in_place (void* /*pt*/, std::size_t /*szmin*/, std::size_t szmax)
{
auto* p = alloc(szmax);
return std::make_pair(p, szmax);
}

/**
* Try to shrink in-place
*/
[[nodiscard]] virtual void*
shrink_in_place (void* /*pt*/, std::size_t sz)
{
return alloc(sz);
}

/**
* \brief A pure virtual function for deleting the arena pointed to by pt
*/
Expand Down
263 changes: 113 additions & 150 deletions Src/Base/AMReX_GpuAllocators.H
Original file line number Diff line number Diff line change
Expand Up @@ -20,205 +20,152 @@
namespace amrex {

template <typename T>
struct RunOnGpu : std::false_type {};

template <typename T>
struct IsPolymorphicArenaAllocator : std::false_type {};

struct ArenaAllocatorTraits {
typedef std::true_type propagate_on_container_copy_assignment;
typedef std::true_type propagate_on_container_move_assignment;
typedef std::true_type propagate_on_container_swap;
typedef std::true_type is_always_equal;
struct FatPtr
{
T* m_ptr = nullptr;
std::size_t m_size = 0;
[[nodiscard]] constexpr T* ptr () const noexcept { return m_ptr; }
[[nodiscard]] constexpr std::size_t size () const noexcept { return m_size; }
};

template<typename T>
class ArenaAllocator
: public ArenaAllocatorTraits
template <class T, class AR>
struct ArenaAllocatorBase
{
public :

using value_type = T;
using arena_wrapper_type = AR;

inline value_type* allocate(std::size_t n)
{
value_type* result = nullptr;
result = (value_type*) The_Arena()->alloc(n * sizeof(T));
return result;
}
constexpr ArenaAllocatorBase () = default;
explicit constexpr ArenaAllocatorBase (AR a_ar) : ar(a_ar) {}

inline void deallocate(value_type* ptr, std::size_t)
[[nodiscard]] T* allocate (std::size_t n)
{
if (ptr != nullptr) { The_Arena()->free(ptr); }
return (T*) arena()->alloc(n * sizeof(T));
}
};

template<typename T>
class DeviceArenaAllocator
: public ArenaAllocatorTraits
{
public :

using value_type = T;

inline value_type* allocate(std::size_t n)
[[nodiscard]] FatPtr<T>
allocate_in_place (T* p, std::size_t nmin, std::size_t nmax)
{
value_type* result = nullptr;
result = (value_type*) The_Device_Arena()->alloc(n * sizeof(T));
return result;
auto pn = arena()->alloc_in_place(p, nmin*sizeof(T), nmax*sizeof(T));
return FatPtr<T>{(T*)pn.first, pn.second/sizeof(T)};
}

inline void deallocate(value_type* ptr, std::size_t)
[[nodiscard]] T*
shrink_in_place (T* p, std::size_t n)
{
if (ptr != nullptr) { The_Device_Arena()->free(ptr); }
return (T*) arena()->shrink_in_place(p,n*sizeof(T));
}
};

template<typename T>
class PinnedArenaAllocator
: public ArenaAllocatorTraits
{
public :

using value_type = T;

inline value_type* allocate(std::size_t n)
void deallocate (T* ptr, std::size_t)
{
value_type* result = nullptr;
result = (value_type*) The_Pinned_Arena()->alloc(n * sizeof(T));
return result;
if (ptr != nullptr) { arena()->free(ptr); }
}

inline void deallocate(value_type* ptr, std::size_t)
{
if (ptr != nullptr) { The_Pinned_Arena()->free(ptr); }
[[nodiscard]] Arena* arena () const noexcept {
return ar.arena();
}
};

template<typename T>
class ManagedArenaAllocator
: public ArenaAllocatorTraits
{
public :

using value_type = T;

inline value_type* allocate(std::size_t n)
{
value_type* result = nullptr;
result = (value_type*) The_Managed_Arena()->alloc(n * sizeof(T));
return result;
}
private:
AR ar{};
};

inline void deallocate(value_type* ptr, std::size_t)
{
if (ptr != nullptr) { The_Managed_Arena()->free(ptr); }
struct ArenaWrapper {
[[nodiscard]] Arena* arena () const noexcept {
return The_Arena();
}
};

template<typename T>
class AsyncArenaAllocator
: public ArenaAllocatorTraits
{
public :

using value_type = T;

inline value_type* allocate(std::size_t n)
{
value_type* result = nullptr;
result = (value_type*) The_Async_Arena()->alloc(n * sizeof(T));
return result;
struct DeviceArenaWrapper {
[[nodiscard]] Arena* arena () const noexcept {
return The_Device_Arena();
}
};

inline void deallocate(value_type* ptr, std::size_t)
{
if (ptr != nullptr) { The_Async_Arena()->free(ptr); }
struct PinnedArenaWrapper {
[[nodiscard]] Arena* arena () const noexcept {
return The_Pinned_Arena();
}
};

template<typename T>
class PolymorphicArenaAllocator
: public ArenaAllocatorTraits
{
public :

using value_type = T;

inline value_type* allocate(std::size_t n)
{
value_type* result = nullptr;
result = (value_type*) arena()->alloc(n * sizeof(T));
return result;
struct ManagedArenaWrapper {
[[nodiscard]] Arena* arena () const noexcept {
return The_Managed_Arena();
}
};

inline void deallocate(value_type* ptr, std::size_t)
{
if (ptr != nullptr) { arena()->free(ptr); }
struct AsyncArenaWrapper {
[[nodiscard]] Arena* arena () const noexcept {
return The_Async_Arena();
}
};

struct PolymorphicArenaWrapper {
constexpr PolymorphicArenaWrapper () = default;
explicit constexpr PolymorphicArenaWrapper (Arena* a_arena)
: m_arena(a_arena) {}
[[nodiscard]] Arena* arena () const noexcept {
return (m_arena) ? m_arena : The_Arena();
}

Arena* m_arena = nullptr;
};

template<typename T>
class PolymorphicAllocator
class ArenaAllocator
: public ArenaAllocatorBase<T,ArenaWrapper>
{
public :
};

using value_type = T;
template<typename T>
class DeviceArenaAllocator
: public ArenaAllocatorBase<T,DeviceArenaWrapper>
{
};

PolymorphicAllocator () : m_use_gpu_aware_mpi(ParallelDescriptor::UseGpuAwareMpi()) {}
template<typename T>
class PinnedArenaAllocator
: public ArenaAllocatorBase<T,PinnedArenaWrapper>
{
};

inline value_type* allocate(std::size_t n)
{
value_type* result = nullptr;
if (m_use_gpu_aware_mpi)
{
result = (value_type*) The_Arena()->alloc(n * sizeof(T));
}
else
{
result = (value_type*) The_Pinned_Arena()->alloc(n * sizeof(T));
}
return result;
}
template<typename T>
class ManagedArenaAllocator
: public ArenaAllocatorBase<T,ManagedArenaWrapper>
{
};

inline void deallocate(value_type* ptr, std::size_t)
{
if (ptr != nullptr)
{
if (m_use_gpu_aware_mpi)
{
The_Arena()->free(ptr);
}
else
{
The_Pinned_Arena()->free(ptr);
}
}
}
template<typename T>
class AsyncArenaAllocator
: public ArenaAllocatorBase<T,AsyncArenaWrapper>
{
};

bool m_use_gpu_aware_mpi;
template<typename T>
class PolymorphicArenaAllocator
: public ArenaAllocatorBase<T,PolymorphicArenaWrapper>
{
public :
constexpr PolymorphicArenaAllocator () = default;
explicit constexpr PolymorphicArenaAllocator (Arena* a_arena)
: ArenaAllocatorBase<T,PolymorphicArenaWrapper>
(PolymorphicArenaWrapper(a_arena))
{}
};

template <class U, class V>
friend bool
operator== (PolymorphicAllocator<U> const& a, PolymorphicAllocator<V> const& b) noexcept
{
return a.m_use_gpu_aware_mpi == b.m_use_gpu_aware_mpi;
}
template <typename T>
struct RunOnGpu : std::false_type {};

template <class U, class V>
friend bool
operator!= (PolymorphicAllocator<U> const& a, PolymorphicAllocator<V> const& b) noexcept
{
return a.m_use_gpu_aware_mpi != b.m_use_gpu_aware_mpi;
}
template <class T, class Enable = void>
struct IsArenaAllocator : std::false_type {};
//
template <class T>
struct IsArenaAllocator
<T,std::enable_if_t<std::is_base_of
<ArenaAllocatorBase<typename T::value_type,
typename T::arena_wrapper_type>,
T>::value>>
: std::true_type {};

};
template <typename T>
struct IsPolymorphicArenaAllocator : std::false_type {};

#ifdef AMREX_USE_GPU
template <typename T>
Expand Down Expand Up @@ -246,6 +193,22 @@ namespace amrex {
using DefaultAllocator = std::allocator<T>;
#endif // AMREX_USE_GPU

template <typename A1, typename A2,
std::enable_if_t<IsArenaAllocator<A1>::value &&
IsArenaAllocator<A2>::value, int> = 0>
bool operator== (A1 const& a1, A2 const& a2)
{
return a1.arena() == a2.arena();
}

template <typename A1, typename A2,
std::enable_if_t<IsArenaAllocator<A1>::value &&
IsArenaAllocator<A2>::value, int> = 0>
bool operator!= (A1 const& a1, A2 const& a2)
{
return a1.arena() != a2.arena();
}

} // namespace amrex

#endif // AMREX_GPUALLOCATORS_H_
13 changes: 0 additions & 13 deletions Src/Base/AMReX_GpuContainers.H
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,6 @@ namespace amrex::Gpu {
template <class T>
using HostVector = PinnedVector<T>;

/**
* \brief The behavior of PolymorphicVector changes depending on
* the amrex.use_gpu_aware_mpi runtime flag. If the flag is true,
* this vector will use device memory. If it is false, this Vector
* will use pinned memory.
*
*/
template <class T>
using PolymorphicVector = PODVector<T, PolymorphicAllocator<T> >;

/**
* \brief This is identical to ManagedVector<T>. The ManagedDeviceVector
* form is deprecated and will be removed in a future release.
Expand Down Expand Up @@ -101,9 +91,6 @@ namespace amrex::Gpu {

template <class T>
using AsyncVector = PODVector<T>;

template <class T>
using PolymorphicVector = PODVector<T>;
#endif

struct HostToDevice {};
Expand Down
Loading

0 comments on commit b73a396

Please sign in to comment.