Skip to content

Commit

Permalink
TinyProfiler with BArena and PArena (AMReX-Codes#4113)
Browse files Browse the repository at this point in the history
This PR adds the capability to profile BArena and PArena with
TinyProfiler. Previously, only CArena was profiled. Note that some
allocations are still not profiled when running on CPU because
`amrex::DefaultAllocator` and `amrex::PODVector` use `std::allocator<T>`
instead of `amrex::ArenaAllocator<T>`.
  • Loading branch information
AlexanderSinn authored Sep 2, 2024
1 parent 778e782 commit a31abb5
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 50 deletions.
37 changes: 36 additions & 1 deletion Src/Base/AMReX_Arena.H
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,21 @@

#include <AMReX_BLassert.H>
#include <AMReX_INT.H>

#ifdef AMREX_TINY_PROFILING
#include <AMReX_TinyProfiler.H>
#else
namespace amrex {
struct MemStat {};
}
#endif

#include <cstddef>
#include <cstdlib>
#include <limits>
#include <map>
#include <mutex>
#include <unordered_map>
#include <utility>

namespace amrex {
Expand Down Expand Up @@ -156,7 +168,7 @@ public:
* \brief Add this Arena to the list of Arenas that are profiled by TinyProfiler.
* \param memory_name The name of this arena in the TinyProfiler output.
*/
virtual void registerForProfiling (const std::string& memory_name);
void registerForProfiling (const std::string& memory_name);

#ifdef AMREX_USE_GPU
//! Is this GPU stream ordered memory allocator?
Expand Down Expand Up @@ -199,6 +211,29 @@ protected:
virtual std::size_t freeUnused_protected () { return 0; }
void* allocate_system (std::size_t nbytes);
void deallocate_system (void* p, std::size_t nbytes);

struct ArenaProfiler {
//! If this arena is profiled by TinyProfiler
bool m_do_profiling = false;
//! Mutex for the profiling
std::mutex m_arena_profiler_mutex;
//! Data structure used for profiling with TinyProfiler
std::map<std::string, MemStat> m_profiling_stats;
//! Track the currently allocated memory, not used by CArena
std::unordered_map<void*, std::pair<MemStat*, std::size_t>> m_currently_allocated;

~ArenaProfiler ();
ArenaProfiler () noexcept = default;
ArenaProfiler (const ArenaProfiler& rhs) = delete;
ArenaProfiler (ArenaProfiler&& rhs) = delete;
ArenaProfiler& operator= (const ArenaProfiler& rhs) = delete;
ArenaProfiler& operator= (ArenaProfiler&& rhs) = delete;

void profile_alloc (void* ptr, std::size_t nbytes);

void profile_free (void* ptr);

} m_profiler;
};

}
Expand Down
45 changes: 43 additions & 2 deletions Src/Base/AMReX_Arena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,13 @@ Arena::hasFreeDeviceMemory (std::size_t)
}

void
Arena::registerForProfiling (const std::string&)
Arena::registerForProfiling ([[maybe_unused]] const std::string& memory_name)
{
amrex::Abort("Profiling is not implemented for this type of Arena");
#ifdef AMREX_TINY_PROFILING
AMREX_ALWAYS_ASSERT(m_profiler.m_do_profiling == false);
m_profiler.m_do_profiling =
TinyProfiler::RegisterArena(memory_name, m_profiler.m_profiling_stats);
#endif
}

std::size_t
Expand Down Expand Up @@ -330,6 +334,7 @@ Arena::Initialize ()
}

the_async_arena = new PArena(the_async_arena_release_threshold);
the_async_arena->registerForProfiling("Async Memory");

#ifdef AMREX_USE_GPU
if (the_arena->isDevice()) {
Expand Down Expand Up @@ -403,6 +408,7 @@ Arena::Initialize ()
}

the_cpu_arena = The_BArena();
the_cpu_arena->registerForProfiling("Cpu Memory");

// Initialize the null arena
auto* null_arena = The_Null_Arena();
Expand Down Expand Up @@ -654,4 +660,39 @@ The_Comms_Arena ()
}
}

Arena::ArenaProfiler::~ArenaProfiler () {
#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
TinyProfiler::DeregisterArena(m_profiling_stats);
}
#endif
}

void Arena::ArenaProfiler::profile_alloc ([[maybe_unused]] void* ptr,
[[maybe_unused]] std::size_t nbytes) {
#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
std::lock_guard<std::mutex> lock(m_arena_profiler_mutex);
MemStat* stat = TinyProfiler::memory_alloc(nbytes, m_profiling_stats);
if (stat) {
m_currently_allocated.insert({ptr, {stat, nbytes}});
}
}
#endif
}

void Arena::ArenaProfiler::profile_free ([[maybe_unused]] void* ptr) {
#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
std::lock_guard<std::mutex> lock(m_arena_profiler_mutex);
auto it = m_currently_allocated.find(ptr);
if (it != m_currently_allocated.end()) {
auto [stat, nbytes] = it->second;
TinyProfiler::memory_free(nbytes, stat);
m_currently_allocated.erase(it);
}
}
#endif
}

}
5 changes: 4 additions & 1 deletion Src/Base/AMReX_BArena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
void*
amrex::BArena::alloc (std::size_t sz_)
{
return std::malloc(sz_);
void* pt = std::malloc(sz_);
m_profiler.profile_alloc(pt, sz_);
return pt;
}

void
amrex::BArena::free (void* pt)
{
m_profiler.profile_free(pt);
std::free(pt);
}

Expand Down
12 changes: 0 additions & 12 deletions Src/Base/AMReX_CArena.H
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

namespace amrex {

struct MemStat;

/**
* \brief A Concrete Class for Dynamic Memory Management using first fit.
* This is a coalescing memory manager. It allocates (possibly) large
Expand Down Expand Up @@ -75,12 +73,6 @@ public:
*/
[[nodiscard]] bool hasFreeDeviceMemory (std::size_t sz) final;

/**
* \brief Add this Arena to the list of Arenas that are profiled by TinyProfiler.
* \param memory_name The name of this arena in the TinyProfiler output.
*/
void registerForProfiling (const std::string& memory_name) final;

//! The current amount of heap space used by the CArena object.
std::size_t heap_space_used () const noexcept;

Expand Down Expand Up @@ -191,10 +183,6 @@ protected:
std::size_t m_used{0};
//! The amount of memory given out via alloc().
std::size_t m_actually_used{0};
//! If this arena is profiled by TinyProfiler
bool m_do_profiling = false;
//! Data structure used for profiling with TinyProfiler
std::map<std::string, MemStat> m_profiling_stats;


std::mutex carena_mutex;
Expand Down
39 changes: 8 additions & 31 deletions Src/Base/AMReX_CArena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,6 @@
#include <AMReX_MFIter.H>
#include <AMReX_ParallelReduce.H>

#ifdef AMREX_TINY_PROFILING
#include <AMReX_TinyProfiler.H>
#else
namespace amrex {
struct MemStat {};
}
#endif

#include <utility>
#include <cstring>
#include <iostream>
Expand All @@ -32,12 +24,6 @@ CArena::~CArena ()
for (auto const& a : m_alloc) {
deallocate_system(a.first, a.second);
}

#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
TinyProfiler::DeregisterArena(m_profiling_stats);
}
#endif
}

void*
Expand All @@ -53,8 +39,8 @@ CArena::alloc_protected (std::size_t nbytes)
{
MemStat* stat = nullptr;
#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
stat = TinyProfiler::memory_alloc(nbytes, m_profiling_stats);
if (m_profiler.m_do_profiling) {
stat = TinyProfiler::memory_alloc(nbytes, m_profiler.m_profiling_stats);
}
#endif

Expand Down Expand Up @@ -173,10 +159,10 @@ CArena::alloc_in_place (void* pt, std::size_t szmin, std::size_t szmax)
free_node.size(left_size);
}
#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
if (m_profiler.m_do_profiling) {
TinyProfiler::memory_free(busy_it->size(), busy_it->mem_stat());
auto* stat = TinyProfiler::memory_alloc(new_size,
m_profiling_stats);
m_profiler.m_profiling_stats);
const_cast<Node&>(*busy_it).mem_stat(stat);
}
#endif
Expand All @@ -186,10 +172,10 @@ CArena::alloc_in_place (void* pt, std::size_t szmin, std::size_t szmax)
} else if (total_size >= szmin) {
m_freelist.erase(next_it);
#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
if (m_profiler.m_do_profiling) {
TinyProfiler::memory_free(busy_it->size(), busy_it->mem_stat());
auto* stat = TinyProfiler::memory_alloc(total_size,
m_profiling_stats);
m_profiler.m_profiling_stats);
const_cast<Node&>(*busy_it).mem_stat(stat);
}
#endif
Expand Down Expand Up @@ -255,9 +241,9 @@ CArena::shrink_in_place (void* pt, std::size_t new_size)
m_actually_used -= leftover_size;

#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
if (m_profiler.m_do_profiling) {
TinyProfiler::memory_free(old_size, busy_it->mem_stat());
auto* stat = TinyProfiler::memory_alloc(new_size, m_profiling_stats);
auto* stat = TinyProfiler::memory_alloc(new_size, m_profiler.m_profiling_stats);
const_cast<Node&>(*busy_it).mem_stat(stat);
}
#endif
Expand Down Expand Up @@ -431,15 +417,6 @@ CArena::hasFreeDeviceMemory (std::size_t sz)
}
}

void
CArena::registerForProfiling ([[maybe_unused]] const std::string& memory_name)
{
#ifdef AMREX_TINY_PROFILING
m_do_profiling = true;
TinyProfiler::RegisterArena(memory_name, m_profiling_stats);
#endif
}

std::size_t
CArena::heap_space_used () const noexcept
{
Expand Down
2 changes: 2 additions & 0 deletions Src/Base/AMReX_PArena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ PArena::alloc (std::size_t nbytes)
AMREX_HIP_SAFE_CALL(hipMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream()));,
AMREX_CUDA_SAFE_CALL(cudaMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream()));
)
m_profiler.profile_alloc(p, nbytes);
return p;
} else
#endif
Expand Down Expand Up @@ -93,6 +94,7 @@ PArena::free (void* p)

#if defined (AMREX_GPU_STREAM_ALLOC_SUPPORT)
if (Gpu::Device::memoryPoolsSupported()) {
m_profiler.profile_free(p);
AMREX_HIP_OR_CUDA(
AMREX_HIP_SAFE_CALL(hipFreeAsync(p, Gpu::gpuStream()));,
AMREX_CUDA_SAFE_CALL(cudaFreeAsync(p, Gpu::gpuStream()));
Expand Down
2 changes: 1 addition & 1 deletion Src/Base/AMReX_TinyProfiler.H
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public:
static void MemoryInitialize () noexcept;
static void MemoryFinalize (bool bFlushing = false) noexcept;

static void RegisterArena (const std::string& memory_name,
static bool RegisterArena (const std::string& memory_name,
std::map<std::string, MemStat>& memstats) noexcept;

static void DeregisterArena (std::map<std::string, MemStat>& memstats) noexcept;
Expand Down
5 changes: 3 additions & 2 deletions Src/Base/AMReX_TinyProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,14 +490,15 @@ TinyProfiler::MemoryFinalize (bool bFlushing) noexcept
if(os) { os->precision(oldprec); }
}

void
bool
TinyProfiler::RegisterArena (const std::string& memory_name,
std::map<std::string, MemStat>& memstats) noexcept
{
if (!memprof_enabled) { return; }
if (!memprof_enabled) { return false; }

all_memstats.push_back(&memstats);
all_memnames.push_back(memory_name);
return true;
}

void
Expand Down

0 comments on commit a31abb5

Please sign in to comment.