PODVector Updates

Remove deprecated and unused PolymorphicAllocator. It has been replaced by PolymorphicArenaAllocator. Restrict PODVector's Allocator to std::allocator and AMReX's various Arena based allocators. This simplifies the implementation of PODVector, because std::allocator is stateless and Arena based allocators are simple even when it's polymorphic. Fix a few issues of PODVectors with a PolymorphicArenaAllocator. For example, copy assignment operator should copy the Allocator. Copy constructor should consider the possibility that other PODVector has a different type of Arena. Add placeholders for potentially growing and shrinking memory allocation in-place that will be implemented in a follow-up PR. Update PODVector's growth strategy. Hopefully this helps to reduce the memory consumption. * Always try to grow in-place. * For assign, operator=, resize & reserve, allocate the specified size without capacity. * For push_back & emplace_back, grow the capacity by a factor that is 1.5 by default. * For insert, the capacity grows either by a factor that is 1.5 by default or to the new size, whichever is greater.
WeiqunZhang · Jul 17, 2023 · b73a396 · b73a396
1 parent 01b750d
commit b73a396
Show file tree

Hide file tree

Showing 7 changed files with 581 additions and 619 deletions.
diff --git a/Src/AmrCore/AMReX_TagBox.cpp b/Src/AmrCore/AMReX_TagBox.cpp
@@ -492,7 +492,7 @@ TagBoxArray::local_collate_gpu (Gpu::PinnedVector<IntVect>& v) const
  Gpu::dtoh_memcpy(hv_ntags.data(), dv_ntags.data(), ntotblocks*sizeof(int));
 
  Gpu::PinnedVector<int> hv_tags_offset(ntotblocks+1);
- hv_tags_offset[0] = 0;
+ if (! hv_tags_offset.empty()) { hv_tags_offset[0] = 0; }
  std::partial_sum(hv_ntags.begin(), hv_ntags.end(), hv_tags_offset.begin()+1);
  int ntotaltags = hv_tags_offset.back();
 

diff --git a/Src/Base/AMReX_Arena.H b/Src/Base/AMReX_Arena.H
@@ -7,6 +7,7 @@
 #include <cstddef>
 #include <cstdlib>
 #include <limits>
+#include <utility>
 
 namespace amrex {
 
@@ -100,6 +101,26 @@ public:
  * \return a pointer to the allocated memory
  */
  [[nodiscard]] virtual void* alloc (std::size_t sz) = 0;
+
+ /**
+ * Try to allocate in-place by extending the capacity of given pointer.
+ */
+ [[nodiscard]] virtual std::pair<void*,std::size_t>
+ alloc_in_place (void* /*pt*/, std::size_t /*szmin*/, std::size_t szmax)
+ {
+ auto* p = alloc(szmax);
+ return std::make_pair(p, szmax);
+ }
+
+ /**
+ * Try to shrink in-place
+ */
+ [[nodiscard]] virtual void*
+ shrink_in_place (void* /*pt*/, std::size_t sz)
+ {
+ return alloc(sz);
+ }
+
  /**
  * \brief A pure virtual function for deleting the arena pointed to by pt
  */

diff --git a/Src/Base/AMReX_GpuAllocators.H b/Src/Base/AMReX_GpuAllocators.H
@@ -20,205 +20,152 @@
 namespace amrex {
 
  template <typename T>
- struct RunOnGpu : std::false_type {};
-
- template <typename T>
- struct IsPolymorphicArenaAllocator : std::false_type {};
-
- struct ArenaAllocatorTraits {
- typedef std::true_type propagate_on_container_copy_assignment;
- typedef std::true_type propagate_on_container_move_assignment;
- typedef std::true_type propagate_on_container_swap;
- typedef std::true_type is_always_equal;
+ struct FatPtr
+ {
+ T* m_ptr = nullptr;
+ std::size_t m_size = 0;
+ [[nodiscard]] constexpr T* ptr () const noexcept { return m_ptr; }
+ [[nodiscard]] constexpr std::size_t size () const noexcept { return m_size; }
  };
 
- template<typename T>
- class ArenaAllocator
- : public ArenaAllocatorTraits
+ template <class T, class AR>
+ struct ArenaAllocatorBase
  {
- public :
-
  using value_type = T;
+ using arena_wrapper_type = AR;
 
- inline value_type* allocate(std::size_t n)
- {
- value_type* result = nullptr;
- result = (value_type*) The_Arena()->alloc(n * sizeof(T));
- return result;
- }
+ constexpr ArenaAllocatorBase () = default;
+ explicit constexpr ArenaAllocatorBase (AR a_ar) : ar(a_ar) {}
 
- inline void deallocate(value_type* ptr, std::size_t)
+ [[nodiscard]] T* allocate (std::size_t n)
  {
- if (ptr != nullptr) { The_Arena()->free(ptr); }
+ return (T*) arena()->alloc(n * sizeof(T));
  }
- };
 
- template<typename T>
- class DeviceArenaAllocator
- : public ArenaAllocatorTraits
- {
- public :
-
- using value_type = T;
-
- inline value_type* allocate(std::size_t n)
+ [[nodiscard]] FatPtr<T>
+ allocate_in_place (T* p, std::size_t nmin, std::size_t nmax)
  {
- value_type* result = nullptr;
- result = (value_type*) The_Device_Arena()->alloc(n * sizeof(T));
- return result;
+ auto pn = arena()->alloc_in_place(p, nmin*sizeof(T), nmax*sizeof(T));
+ return FatPtr<T>{(T*)pn.first, pn.second/sizeof(T)};
  }
 
- inline void deallocate(value_type* ptr, std::size_t)
+ [[nodiscard]] T*
+ shrink_in_place (T* p, std::size_t n)
  {
- if (ptr != nullptr) { The_Device_Arena()->free(ptr); }
+ return (T*) arena()->shrink_in_place(p,n*sizeof(T));
  }
- };
-
- template<typename T>
- class PinnedArenaAllocator
- : public ArenaAllocatorTraits
- {
- public :
 
- using value_type = T;
-
- inline value_type* allocate(std::size_t n)
+ void deallocate (T* ptr, std::size_t)
  {
- value_type* result = nullptr;
- result = (value_type*) The_Pinned_Arena()->alloc(n * sizeof(T));
- return result;
+ if (ptr != nullptr) { arena()->free(ptr); }
  }
 
- inline void deallocate(value_type* ptr, std::size_t)
- {
- if (ptr != nullptr) { The_Pinned_Arena()->free(ptr); }
+ [[nodiscard]] Arena* arena () const noexcept {
+ return ar.arena();
  }
- };
-
- template<typename T>
- class ManagedArenaAllocator
- : public ArenaAllocatorTraits
- {
- public :
-
- using value_type = T;
 
- inline value_type* allocate(std::size_t n)
- {
- value_type* result = nullptr;
- result = (value_type*) The_Managed_Arena()->alloc(n * sizeof(T));
- return result;
- }
+ private:
+ AR ar{};
+ };
 
-  inline void deallocate(value_type* ptr, std::size_t)
- {
- if (ptr != nullptr) { The_Managed_Arena()->free(ptr); }
+ struct ArenaWrapper {
+ [[nodiscard]] Arena* arena () const noexcept {
+ return The_Arena();
  }
  };
 
- template<typename T>
- class AsyncArenaAllocator
- : public ArenaAllocatorTraits
- {
- public :
-
- using value_type = T;
-
- inline value_type* allocate(std::size_t n)
- {
- value_type* result = nullptr;
- result = (value_type*) The_Async_Arena()->alloc(n * sizeof(T));
- return result;
+ struct DeviceArenaWrapper {
+ [[nodiscard]] Arena* arena () const noexcept {
+ return The_Device_Arena();
  }
+ };
 
-  inline void deallocate(value_type* ptr, std::size_t)
- {
- if (ptr != nullptr) { The_Async_Arena()->free(ptr); }
+ struct PinnedArenaWrapper {
+ [[nodiscard]] Arena* arena () const noexcept {
+ return The_Pinned_Arena();
  }
  };
 
- template<typename T>
- class PolymorphicArenaAllocator
- : public ArenaAllocatorTraits
- {
- public :
-
- using value_type = T;
-
- inline value_type* allocate(std::size_t n)
- {
- value_type* result = nullptr;
- result = (value_type*) arena()->alloc(n * sizeof(T));
- return result;
+ struct ManagedArenaWrapper {
+ [[nodiscard]] Arena* arena () const noexcept {
+ return The_Managed_Arena();
  }
+ };
 
-  inline void deallocate(value_type* ptr, std::size_t)
- {
- if (ptr != nullptr) { arena()->free(ptr); }
+ struct AsyncArenaWrapper {
+ [[nodiscard]] Arena* arena () const noexcept {
+ return The_Async_Arena();
  }
+ };
 
+ struct PolymorphicArenaWrapper {
+ constexpr PolymorphicArenaWrapper () = default;
+ explicit constexpr PolymorphicArenaWrapper (Arena* a_arena)
+ : m_arena(a_arena) {}
  [[nodiscard]] Arena* arena () const noexcept {
  return (m_arena) ? m_arena : The_Arena();
  }
-
  Arena* m_arena = nullptr;
  };
 
  template<typename T>
- class PolymorphicAllocator
+ class ArenaAllocator
+ : public ArenaAllocatorBase<T,ArenaWrapper>
  {
- public :
+ };
 
- using value_type = T;
+ template<typename T>
+ class DeviceArenaAllocator
+ : public ArenaAllocatorBase<T,DeviceArenaWrapper>
+ {
+ };
 
- PolymorphicAllocator () : m_use_gpu_aware_mpi(ParallelDescriptor::UseGpuAwareMpi()) {}
+ template<typename T>
+ class PinnedArenaAllocator
+ : public ArenaAllocatorBase<T,PinnedArenaWrapper>
+ {
+ };
 
- inline value_type* allocate(std::size_t n)
- {
- value_type* result = nullptr;
- if (m_use_gpu_aware_mpi)
- {
- result = (value_type*) The_Arena()->alloc(n * sizeof(T));
- }
- else
- {
- result = (value_type*) The_Pinned_Arena()->alloc(n * sizeof(T));
- }
- return result;
- }
+ template<typename T>
+ class ManagedArenaAllocator
+ : public ArenaAllocatorBase<T,ManagedArenaWrapper>
+ {
+ };
 
- inline void deallocate(value_type* ptr, std::size_t)
- {
- if (ptr != nullptr)
- {
- if (m_use_gpu_aware_mpi)
- {
- The_Arena()->free(ptr);
- }
- else
- {
- The_Pinned_Arena()->free(ptr);
- }
- }
- }
+ template<typename T>
+ class AsyncArenaAllocator
+ : public ArenaAllocatorBase<T,AsyncArenaWrapper>
+ {
+ };
 
- bool m_use_gpu_aware_mpi;
+ template<typename T>
+ class PolymorphicArenaAllocator
+ : public ArenaAllocatorBase<T,PolymorphicArenaWrapper>
+ {
+ public :
+ constexpr PolymorphicArenaAllocator () = default;
+ explicit constexpr PolymorphicArenaAllocator (Arena* a_arena)
+ : ArenaAllocatorBase<T,PolymorphicArenaWrapper>
+ (PolymorphicArenaWrapper(a_arena))
+ {}
+ };
 
- template <class U, class V>
- friend bool
- operator== (PolymorphicAllocator<U> const& a, PolymorphicAllocator<V> const& b) noexcept
- {
- return a.m_use_gpu_aware_mpi == b.m_use_gpu_aware_mpi;
- }
+ template <typename T>
+ struct RunOnGpu : std::false_type {};
 
- template <class U, class V>
- friend bool
- operator!= (PolymorphicAllocator<U> const& a, PolymorphicAllocator<V> const& b) noexcept
- {
- return a.m_use_gpu_aware_mpi != b.m_use_gpu_aware_mpi;
- }
+ template <class T, class Enable = void>
+ struct IsArenaAllocator : std::false_type {};
+ //
+ template <class T>
+ struct IsArenaAllocator
+ <T,std::enable_if_t<std::is_base_of
+ <ArenaAllocatorBase<typename T::value_type,
+ typename T::arena_wrapper_type>,
+ T>::value>>
+ : std::true_type {};
 
- };
+ template <typename T>
+ struct IsPolymorphicArenaAllocator : std::false_type {};
 
 #ifdef AMREX_USE_GPU
  template <typename T>
@@ -246,6 +193,22 @@ namespace amrex {
  using DefaultAllocator = std::allocator<T>;
 #endif // AMREX_USE_GPU
 
+ template <typename A1, typename A2,
+ std::enable_if_t<IsArenaAllocator<A1>::value &&
+ IsArenaAllocator<A2>::value, int> = 0>
+ bool operator== (A1 const& a1, A2 const& a2)
+ {
+ return a1.arena() == a2.arena();
+ }
+
+ template <typename A1, typename A2,
+ std::enable_if_t<IsArenaAllocator<A1>::value &&
+ IsArenaAllocator<A2>::value, int> = 0>
+ bool operator!= (A1 const& a1, A2 const& a2)
+ {
+ return a1.arena() != a2.arena();
+ }
+
 } // namespace amrex
 
 #endif // AMREX_GPUALLOCATORS_H_
diff --git a/Src/Base/AMReX_GpuContainers.H b/Src/Base/AMReX_GpuContainers.H
@@ -61,16 +61,6 @@ namespace amrex::Gpu {
  template <class T>
  using HostVector = PinnedVector<T>;
 
- /**
- * \brief The behavior of PolymorphicVector changes depending on
- * the amrex.use_gpu_aware_mpi runtime flag. If the flag is true,
- * this vector will use device memory. If it is false, this Vector
- * will use pinned memory.
- *
- */
- template <class T>
- using PolymorphicVector = PODVector<T, PolymorphicAllocator<T> >;
-
  /**
  * \brief This is identical to ManagedVector<T>. The ManagedDeviceVector
  * form is deprecated and will be removed in a future release.
@@ -101,9 +91,6 @@ namespace amrex::Gpu {
 
  template <class T>
  using AsyncVector = PODVector<T>;
-
- template <class T>
- using PolymorphicVector = PODVector<T>;
 #endif
 
  struct HostToDevice {};