diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index bc716bd2..a76ebf1e 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -24,7 +24,25 @@ add_executable(timestamp timestamp.cpp) target_include_directories(timestamp PUBLIC ../catkit_core) target_link_libraries(timestamp PUBLIC catkit_core) +# Free list allocator benchmark +add_executable(free_list_allocator free_list_allocator.cpp) +target_include_directories(free_list_allocator PUBLIC ../catkit_core) +target_link_libraries(free_list_allocator PUBLIC catkit_core) + +# Pool allocator benchmark +add_executable(pool_allocator pool_allocator.cpp) +target_include_directories(pool_allocator PUBLIC ../catkit_core) +target_link_libraries(pool_allocator PUBLIC catkit_core) + +# Hash map benchmark +add_executable(hash_map hash_map.cpp) +target_include_directories(hash_map PUBLIC ../catkit_core) +target_link_libraries(hash_map PUBLIC catkit_core) + # Add install files install(TARGETS datastream_latency DESTINATION bin) install(TARGETS datastream_submit DESTINATION bin) install(TARGETS timestamp DESTINATION bin) +install(TARGETS free_list_allocator DESTINATION bin) +install(TARGETS pool_allocator DESTINATION bin) +install(TARGETS hash_map DESTINATION bin) diff --git a/benchmarks/free_list_allocator.cpp b/benchmarks/free_list_allocator.cpp new file mode 100644 index 00000000..a66d733a --- /dev/null +++ b/benchmarks/free_list_allocator.cpp @@ -0,0 +1,141 @@ +#include "FreeListAllocator.h" +#include "Timing.h" +#include + +void benchmark_linux_scalability() +{ + const size_t N = 10000000; + + const size_t NUM_BLOCKS = N * 2; + const size_t ALIGNMENT = 32; + + auto *handles = new FreeListAllocator::BlockHandle[N]; + + size_t buffer_size = FreeListAllocator::ComputeMetadataBufferSize(NUM_BLOCKS); + char *buffer = new char[buffer_size]; + + auto allocator = FreeListAllocator::Create(buffer, NUM_BLOCKS, ALIGNMENT, NUM_BLOCKS * ALIGNMENT); + + auto start = GetTimeStamp(); + + for (size_t i = 0; i < N; ++i) + { + handles[i] = allocator->Allocate(16); + } + + for (size_t i = 0; i < N; ++i) + { + allocator->Deallocate(handles[i]); + } + + auto end = GetTimeStamp(); + + std::cout << "Linux Scalability:" << std::endl; + std::cout << "Time: " << (end - start) / 1e9 << " sec" << std::endl; + std::cout << "Throughput: " << 2 * N / ((end - start) / 1e9) << " ops/s" << std::endl; + std::cout << "Time per operation: " << (end - start) / (2 * N) << " ns" << std::endl; + + delete[] handles; + delete[] buffer; +} + +void benchmark_threadtest() +{ + const size_t N = 100; + const size_t M = 100000; + const size_t NUM_BLOCKS = N * 2; + const size_t ALIGNMENT = 32; + + auto *handles = new FreeListAllocator::BlockHandle[M]; + + size_t buffer_size = FreeListAllocator::ComputeMetadataBufferSize(NUM_BLOCKS); + char *buffer = new char[buffer_size]; + + auto allocator = FreeListAllocator::Create(buffer, NUM_BLOCKS, ALIGNMENT, NUM_BLOCKS * ALIGNMENT); + + auto start = GetTimeStamp(); + + for (size_t i = 0; i < M; ++i) + { + for (size_t j = 0; j < N; ++j) + { + handles[j] = allocator->Allocate(16); + } + + for (size_t j = 0; j < N; ++j) + { + allocator->Deallocate(handles[j]); + } + } + + auto end = GetTimeStamp(); + + std::cout << "Threadtest:" << std::endl; + std::cout << "Time: " << (end - start) / 1e9 << " sec" << std::endl; + std::cout << "Throughput: " << 2 * N * M / ((end - start) / 1e9) << " ops/s" << std::endl; + std::cout << "Time per operation: " << (end - start) / (2 * N * M) << " ns" << std::endl; + + delete[] handles; +} + +void benchmark_larson() +{ + const size_t ALIGNMENT = 32; + + const size_t N = 10000000; + const size_t M = 1000; + const size_t MIN_SIZE = 16; + const size_t MAX_SIZE = 128; + const size_t NUM_BLOCKS = M * 2; + + auto *handles = new FreeListAllocator::BlockHandle[M]; + for (size_t i = 0; i < M; ++i) + { + handles[i] = -1; + } + + size_t buffer_size = FreeListAllocator::ComputeMetadataBufferSize(NUM_BLOCKS); + char *buffer = new char[buffer_size]; + + auto allocator = FreeListAllocator::Create(buffer, NUM_BLOCKS, ALIGNMENT, MAX_SIZE * NUM_BLOCKS); + + auto *indices = new size_t[N]; + auto *sizes = new size_t[N]; + for (size_t i = 0; i < N; ++i) + { + indices[i] = rand() % M; + sizes[i] = (MIN_SIZE + (rand() % (MAX_SIZE - MIN_SIZE))) * ALIGNMENT; + } + + auto start = GetTimeStamp(); + + for (size_t i = 0; i < N; ++i) + { + size_t index = indices[i]; + size_t size = sizes[i]; + + if (handles[index] != -1) + { + allocator->Deallocate(handles[index]); + } + + handles[index] = allocator->Allocate(size); + } + + auto end = GetTimeStamp(); + std::cout << "Larson benchmark:" << std::endl; + std::cout << "Time: " << (end - start) / 1e9 << " sec" << std::endl; + std::cout << "Throughput: " << (N * 2 - M) / ((end - start) / 1e9) << " ops/s" << std::endl; + std::cout << "Time per operation: " << (end - start) / (2 * N - M) << " ns" << std::endl; + + delete[] handles; +} + +int main(int argc, char **argv) +{ + benchmark_linux_scalability(); + benchmark_threadtest(); + benchmark_larson(); + + return 0; +} diff --git a/benchmarks/hash_map.cpp b/benchmarks/hash_map.cpp new file mode 100644 index 00000000..d4a29c30 --- /dev/null +++ b/benchmarks/hash_map.cpp @@ -0,0 +1,61 @@ +#include "HashMap.h" +#include "Timing.h" + +#include + +int main(int argc, char **argv) +{ + typedef HashMap MyHashMap; + + std::size_t buffer_size = MyHashMap::CalculateBufferSize(); + std::cout << "Buffer size: " << buffer_size << " bytes" << std::endl; + char *buffer = new char[buffer_size]; + + MyHashMap map(buffer); + map.Initialize(); + + std::uint64_t total_time = 0; + std::size_t N = 5000; + + for (size_t i = 0; i < N; ++i) + { + std::string key = "key" + std::to_string(i); + + auto start = GetTimeStamp(); + bool success = map.Insert(key, uint16_t(i)); + auto end = GetTimeStamp(); + + if (!success) + { + std::cout << "Insertion failed." << std::endl; + } + + total_time += end - start; + } + + std::cout << "Insertion time: " << total_time / N << " ns" << std::endl; + + total_time = 0; + + for (size_t i = 0; i < N; ++i) + { + std::string key = "key" + std::to_string(i); + + auto start = GetTimeStamp(); + auto *value = map.Find(key); + auto end = GetTimeStamp(); + + if (value == nullptr || *value != i) + { + std::cout << "Key not found." << std::endl; + } + + total_time += end - start; + } + + std::cout << "Lookup time: " << total_time / N << " ns" << std::endl; + + delete[] buffer; + + return 0; +} diff --git a/benchmarks/pool_allocator.cpp b/benchmarks/pool_allocator.cpp new file mode 100644 index 00000000..d9357a16 --- /dev/null +++ b/benchmarks/pool_allocator.cpp @@ -0,0 +1,44 @@ +#include "PoolAllocator.h" +#include "Timing.h" +#include + +void benchmark_linux_scalability() +{ + const size_t N = 10000000; + const size_t CAPACITY = 2 * N; + + char *buffer = new char[PoolAllocator::CalculateMetadataBufferSize(CAPACITY)]; + + auto allocator = PoolAllocator::Create(buffer, CAPACITY); + + auto *handles = new PoolAllocator::BlockHandle[N]; + + auto start = GetTimeStamp(); + + for (size_t i = 0; i < N; ++i) + { + handles[i] = allocator->Allocate(); + } + + for (size_t i = 0; i < N; ++i) + { + allocator->Deallocate(handles[i]); + } + + auto end = GetTimeStamp(); + + std::cout << "Linux Scalability:" << std::endl; + std::cout << "Time: " << (end - start) / 1e9 << " sec" << std::endl; + std::cout << "Throughput: " << 2 * N / ((end - start) / 1e9) << " ops/s" << std::endl; + std::cout << "Time per operation: " << (end - start) / (2 * N) << " ns" << std::endl; + + delete[] handles; + delete[] buffer; +} + +int main(int argc, char **argv) +{ + benchmark_linux_scalability(); + + return 0; +} diff --git a/catkit_core/CMakeLists.txt b/catkit_core/CMakeLists.txt index af141bdd..6258d432 100644 --- a/catkit_core/CMakeLists.txt +++ b/catkit_core/CMakeLists.txt @@ -33,6 +33,8 @@ add_library(catkit_core STATIC Tracing.cpp Types.cpp Util.cpp + PoolAllocator.cpp + FreeListAllocator.cpp proto/core.pb.cc proto/logging.pb.cc proto/testbed.pb.cc diff --git a/catkit_core/FreeListAllocator.cpp b/catkit_core/FreeListAllocator.cpp new file mode 100644 index 00000000..cfc8ec10 --- /dev/null +++ b/catkit_core/FreeListAllocator.cpp @@ -0,0 +1,551 @@ +#include "FreeListAllocator.h" +#include "Timing.h" + +#include +#include + +//#define DEBUG_PRINT(a) std::cout << a << std::endl +#define DEBUG_PRINT(a) + +const std::size_t MAX_ATTEMPTS = 5; +const std::uint8_t VERSION[4] = {0, 0, 0, 0}; + +FreeListAllocator::BlockDescriptor::BlockDescriptor() +{ +} + +FreeListAllocator::BlockDescriptor::BlockDescriptor(Offset offset, Size size, bool is_free) +{ + Set(offset, size, is_free); +} + +void FreeListAllocator::BlockDescriptor::Set(const Offset &offset, const Size &size, const bool &is_free) +{ + m_Offset = offset; + m_SizeAndFreeFlag = (size & ~_FREE_FLAG) | (_FREE_FLAG * is_free); +} + +FreeListAllocator::Offset FreeListAllocator::BlockDescriptor::GetOffset() const +{ + return m_Offset; +} + +void FreeListAllocator::BlockDescriptor::SetOffset(const Offset &new_offset) +{ + m_Offset = new_offset; +} + +FreeListAllocator::Size FreeListAllocator::BlockDescriptor::GetSize() const +{ + return m_SizeAndFreeFlag & ~_FREE_FLAG; +} + +void FreeListAllocator::BlockDescriptor::SetSize(const Size &new_size) +{ + m_SizeAndFreeFlag = (new_size & ~_FREE_FLAG) | (m_SizeAndFreeFlag & _FREE_FLAG); +} + +bool FreeListAllocator::BlockDescriptor::IsFree() const +{ + return m_SizeAndFreeFlag & _FREE_FLAG; +} + +void FreeListAllocator::BlockDescriptor::SetFree(const bool &is_free) +{ + m_SizeAndFreeFlag = (m_SizeAndFreeFlag & ~_FREE_FLAG) | (_FREE_FLAG * is_free); +} + +FreeListAllocator::FreeListAllocator(Header *header, std::shared_ptr block_allocator, Block *blocks) + : m_Header(*header), + m_BlockAllocator(block_allocator), + m_Blocks(blocks), + m_MaxNumBlocks(m_Header.max_num_blocks), + m_Head(m_Header.head), + m_Alignment(m_Header.alignment) +{ +} + +std::size_t FreeListAllocator::ComputeMetadataBufferSize(std::size_t max_num_blocks) +{ + std::size_t size = sizeof(Header); + size += PoolAllocator::CalculateMetadataBufferSize(max_num_blocks); + size += sizeof(Block) * max_num_blocks; + + return size; +} + +void FreeListAllocator::GetMemoryLayout(void *metadata_buffer, std::size_t max_num_blocks, void **block_allocator_memory, Block **blocks) +{ + std::size_t offset = sizeof(Header); + *block_allocator_memory = static_cast(static_cast(metadata_buffer) + offset); + + offset += PoolAllocator::CalculateMetadataBufferSize(max_num_blocks); + *blocks = reinterpret_cast(static_cast(metadata_buffer) + offset); +} + +std::shared_ptr FreeListAllocator::Open(void *metadata_buffer) +{ + Header *header = static_cast
(metadata_buffer); + + void *block_allocator_memory; + Block *blocks; + GetMemoryLayout(metadata_buffer, header->max_num_blocks, &block_allocator_memory, &blocks); + + auto block_allocator = PoolAllocator::Open(block_allocator_memory); + + return std::shared_ptr(new FreeListAllocator(header, block_allocator, blocks)); +} + +std::shared_ptr FreeListAllocator::Create(void *metadata_buffer, std::size_t max_num_blocks, std::size_t alignment, std::size_t buffer_size) +{ + Header *header = static_cast
(metadata_buffer); + + void *block_allocator_memory; + Block *blocks; + GetMemoryLayout(metadata_buffer, max_num_blocks, &block_allocator_memory, &blocks); + + // Fill in the header information. + std::copy(VERSION, VERSION + sizeof(VERSION), header->version); + header->max_num_blocks = max_num_blocks; + header->alignment = alignment; + header->total_buffer_size = buffer_size; + + // Create the block allocator. + auto block_allocator = PoolAllocator::Create(block_allocator_memory, max_num_blocks); + + // Initialize the free list. + header->head = block_allocator->Allocate(); + + blocks[header->head].descriptor = BlockDescriptor(0, buffer_size, true); + blocks[header->head].next = INVALID_HANDLE; + + return std::shared_ptr(new FreeListAllocator(header, block_allocator, blocks)); +} + +typename FreeListAllocator::BlockHandle FreeListAllocator::Allocate(std::size_t size) +{ + // Round up the size to the nearest multiple of the alignment. + size = (size + m_Alignment - 1) & ~(m_Alignment - 1); + + DEBUG_PRINT("Allocating " << size); + + for (size_t i = 0; i < MAX_ATTEMPTS; ++i) + { + BlockHandle index = FindFirstFreeBlock(size); + Block &free_block = m_Blocks[index]; + + if (index == INVALID_HANDLE) + { + DEBUG_PRINT("No free block found."); + return INVALID_HANDLE; + } + + BlockDescriptor old_descriptor; + BlockDescriptor new_descriptor; + + // Reduce the size of the free block. + do + { + old_descriptor = free_block.descriptor.load(); + + // If the block is too small or not free, we need to try again. + if (old_descriptor.GetSize() < size || !old_descriptor.IsFree()) + { + // Break out of the nested loop and try again. + DEBUG_PRINT("Block is too small or not free. Size of block is " << old_descriptor.GetSize()); + break; + } + + if (old_descriptor.GetSize() == size) + { + // The block is exactly the right size. + DEBUG_PRINT("Block is exactly the right size."); + + // Mark the block as allocated. + if (MarkBlockAsFree(index, false)) + { + // Remove the block from the free list. + // This is guaranteed to succeed since we own the block. + RemoveBlock(index); + + // Return the block. + return index; + } + else + { + // Try again. + continue; + } + } + + // Reduce the size of the block by the requested size. + new_descriptor = old_descriptor; + new_descriptor.SetSize(old_descriptor.GetSize() - size); + new_descriptor.SetOffset(old_descriptor.GetOffset() + size); + } while (!free_block.descriptor.compare_exchange_weak(old_descriptor, new_descriptor)); + + if (old_descriptor.GetSize() < size || !old_descriptor.IsFree()) + { + // Try again. + continue; + } + + DEBUG_PRINT("Reduced the size of the free block: " << old_descriptor.GetSize() << ", " << new_descriptor.GetSize()); + DEBUG_PRINT("Old descriptor offset: " << old_descriptor.GetOffset()); + DEBUG_PRINT("Old descriptor size: " << old_descriptor.GetSize()); + DEBUG_PRINT("New size: " << size); + + // We now have a block that is large enough to allocate the requested size. + // Add a new block for the remaining free space. + PoolAllocator::BlockHandle allocated_block_handle = m_BlockAllocator->Allocate(); + DEBUG_PRINT("Allocated block handle is " << allocated_block_handle); + + Block &allocated_block = m_Blocks[allocated_block_handle]; + + allocated_block.descriptor = BlockDescriptor(old_descriptor.GetOffset(), size, false); + allocated_block.next = INVALID_HANDLE; + + DEBUG_PRINT("Done setting the descriptor."); + + BlockDescriptor descriptor = allocated_block.descriptor.load(); + + DEBUG_PRINT("Allocated block is " << descriptor.GetOffset() << ", " << descriptor.GetSize()); + + // Return the allocated block. + return allocated_block_handle; + } + + return INVALID_HANDLE; +} + +void FreeListAllocator::Deallocate(BlockHandle index) +{ + if (index == INVALID_HANDLE) + return; + + DEBUG_PRINT("Deallocating block " << index); + Block &block = m_Blocks[index]; + + bool owns_index = true; + + // Try to coalesce the block with its neighbors. + while (true) + { + BlockHandle prev = INVALID_HANDLE; + BlockHandle next = m_Head.load(); + + DEBUG_PRINT("Finding the prev and next blocks."); + + while (next != INVALID_HANDLE && m_Blocks[next].descriptor.load().GetOffset() < block.descriptor.load().GetOffset()) + { + prev = next; + next = m_Blocks[next].next.load(); + } + + // Prev and next are the blocks that are adjacent to the block we are deallocating. + // Try to coalesce the block with its neighbors. + + if (TryCoalesceBlocks(index, prev, owns_index)) + { + // The coalescense attempt was successful. + // The index block is no longer valid. Deallocate it and set the prev block to us. + + if (!owns_index) + RemoveBlock(index); + + m_BlockAllocator->Deallocate(index); + + index = prev; + owns_index = false; + + continue; + } + + if (TryCoalesceBlocks(index, next, owns_index)) + { + // The coalescense attempt was successful. + // The next block is no longer valid. Deallocate it. + + RemoveBlock(index); + m_BlockAllocator->Deallocate(index); + + index = next; + owns_index = false; + + continue; + } + + break; + } + + // If we didn't coalesce the block with its neighbors, add it to the free list. + if (owns_index) + { + InsertBlockSorted(index); + MarkBlockAsFree(index, true); + } +} + +// Try to coalesce two blocks, one of which is owned by us. +// Return whether the coallescing was successful. +bool FreeListAllocator::TryCoalesceBlocks(BlockHandle a, BlockHandle b, bool owner_of_a) +{ + DEBUG_PRINT("Attempting to coalesce blocks " << a << " and " << b); + + if (a == INVALID_HANDLE || b == INVALID_HANDLE) + return false; + + BlockDescriptor descriptor_a = m_Blocks[a].descriptor.load(); + BlockDescriptor descriptor_b = m_Blocks[b].descriptor.load(); + + // Perform a pre-check on the blocks. + if (descriptor_a.GetOffset() < descriptor_b.GetOffset()) + { + if (descriptor_a.GetOffset() + descriptor_a.GetSize() != descriptor_b.GetOffset()) + { + // The blocks are not adjacent. + DEBUG_PRINT("The blocks are not adjacent."); + return false; + } + } + else + { + if (descriptor_b.GetOffset() + descriptor_b.GetSize() != descriptor_a.GetOffset()) + { + // The blocks are not adjacent. + DEBUG_PRINT("The blocks are not adjacent."); + return false; + } + } + + if (!descriptor_b.IsFree()) + { + // The B block was not free and as such cannot be coalesced. + DEBUG_PRINT("The B block was not free."); + + return false; + } + + // We are in principle good to coallesce the blocks. + // Start by owning the A block if we don't already. + if (!owner_of_a) + { + BlockDescriptor descriptor_a_old = descriptor_a; + descriptor_a.SetFree(false); + + // Try to own block A. + if (!m_Blocks[a].descriptor.compare_exchange_strong(descriptor_a_old, descriptor_a)) + { + // The block was changed by someone else. We cannot own it. + DEBUG_PRINT("Starting to own block A failed."); + return false; + } + } + + BlockDescriptor new_descriptor = descriptor_b; + + if (descriptor_a.GetOffset() < descriptor_b.GetOffset()) + { + // The B block is after the A block. + new_descriptor.SetOffset(descriptor_a.GetOffset()); + new_descriptor.SetSize(descriptor_a.GetSize() + descriptor_b.GetSize()); + } + else + { + // The B block is before the A block. + new_descriptor.SetSize(descriptor_a.GetSize() + descriptor_b.GetSize()); + } + + DEBUG_PRINT("Trying to set the new descriptor of the B block, with " << new_descriptor.GetOffset() << " and " << new_descriptor.GetSize()); + + // Try to set the new descriptor of the B block. + if (!m_Blocks[b].descriptor.compare_exchange_weak(descriptor_b, new_descriptor)) + { + // The B block was changed by someone else. Return the A block to its original state and return. + // Note: since we're the owner, this cannot fail. + if (!owner_of_a) + MarkBlockAsFree(a, true); + + return false; + } + + DEBUG_PRINT("Succesfully coalesced blocks " << a << " and " << b); + + return true; +} + +FreeListAllocator::BlockHandle FreeListAllocator::FindFirstFreeBlock(Size size) +{ + BlockHandle current = m_Head.load(); + + while (current != INVALID_HANDLE) + { + Block &block = m_Blocks[current]; + BlockDescriptor descriptor = block.descriptor.load(); + + // Also check the free flag. The block might be on the free list but temporarily reserved. + if (descriptor.GetSize() >= size && descriptor.IsFree()) + { + return current; + } + + current = block.next.load(); + } + + return INVALID_HANDLE; +} + +std::size_t FreeListAllocator::GetOffset(BlockHandle index) +{ + return m_Blocks[index].descriptor.load().GetOffset(); +} + +void FreeListAllocator::InsertBlockSorted(BlockHandle index) +{ + BlockHandle previous = INVALID_HANDLE; + BlockHandle current; + + do + { + current = m_Head.load(); + + while (current != INVALID_HANDLE && m_Blocks[current].descriptor.load().GetOffset() < m_Blocks[index].descriptor.load().GetOffset()) + { + previous = current; + current = m_Blocks[current].next; + } + + if (current == index) + { + // The block is already on the free list. + DEBUG_PRINT("Block " << index << " is already on the free list."); + return; + } + + m_Blocks[index].next = current; + + if (previous == INVALID_HANDLE) + { + DEBUG_PRINT("Attempting to insert the block at the head."); + + if (m_Head.compare_exchange_weak(current, index)) + { + // Successfully inserted the block. + DEBUG_PRINT("Successfully inserted the block."); + return; + } + } + else + { + DEBUG_PRINT("Attempting to insert the block in the middle."); + + if (m_Blocks[previous].next.compare_exchange_weak(current, index)) + { + // Successfully inserted the block. + DEBUG_PRINT("Successfully inserted the block."); + return; + } + } + } while (true); +} + +bool FreeListAllocator::RemoveBlock(BlockHandle index) +{ + BlockHandle previous = INVALID_HANDLE; + BlockHandle current; + + DEBUG_PRINT("Removing block " << index); + + do + { + current = m_Head.load(); + + // Find the previous block. + while (current != index && current != INVALID_HANDLE) + { + previous = current; + current = m_Blocks[current].next; + } + + if (current == INVALID_HANDLE) + { + // The block was not on the free list, even though it was supposed to be free. + DEBUG_PRINT("Block was not on the free list."); + return false; + } + + if (previous == INVALID_HANDLE) + { + if (m_Head.compare_exchange_weak(current, m_Blocks[index].next)) + { + // Successfully removed the block. + return true; + } + } + else + { + if (m_Blocks[previous].next.compare_exchange_weak(current, m_Blocks[index].next)) + { + // Successfully removed the block. + return true; + } + } + } while (true); +} + +bool FreeListAllocator::MarkBlockAsFree(BlockHandle handle, bool mark_free) +{ + DEBUG_PRINT("Marking block " << handle << " as " << (mark_free ? "free" : "allocated")); + + BlockDescriptor descriptor = m_Blocks[handle].descriptor.load(); + + if (descriptor.IsFree() == mark_free) + { + // The block is already in the desired state. + DEBUG_PRINT("The block is already in the desired state."); + return false; + } + + BlockDescriptor new_descriptor = descriptor; + new_descriptor.SetFree(mark_free); + + if (!m_Blocks[handle].descriptor.compare_exchange_strong(descriptor, new_descriptor)) + { + // The block was changed in the meantime and we were unsuccessful. + DEBUG_PRINT("The block was changed in the meantime."); + return false; + } + + DEBUG_PRINT("Successfully marked the block."); + + return true; +} + +void FreeListAllocator::PrintState() +{ + BlockHandle current = m_Head; + + while (current != INVALID_HANDLE) + { + Block &block = m_Blocks[current]; + BlockDescriptor descriptor = block.descriptor.load(); + + std::cout << "Free block " << current << " has (offset, size) = (" << descriptor.GetOffset() << ", " << descriptor.GetSize() << ")." << std::endl; + + current = block.next; + } +} + +size_t FreeListAllocator::GetNumFreeBlocks() const +{ + size_t count = 0; + BlockHandle current = m_Head; + + while (current != INVALID_HANDLE) + { + ++count; + current = m_Blocks[current].next; + } + + return count; +} diff --git a/catkit_core/FreeListAllocator.h b/catkit_core/FreeListAllocator.h new file mode 100644 index 00000000..b32c261e --- /dev/null +++ b/catkit_core/FreeListAllocator.h @@ -0,0 +1,108 @@ +#ifndef FREE_LIST_ALLOCATOR_H +#define FREE_LIST_ALLOCATOR_H + +#include "PoolAllocator.h" + +#include +#include +#include + +// A simple lock-free free list allocator. +class FreeListAllocator +{ +public: + using BlockHandle = PoolAllocator::BlockHandle; + using Offset = std::uint32_t; + using Size = std::uint32_t; + + static const BlockHandle INVALID_HANDLE = PoolAllocator::INVALID_HANDLE; + + static std::size_t ComputeMetadataBufferSize(std::size_t max_num_blocks); + + static std::shared_ptr Create(void *metadata_buffer, std::size_t max_num_blocks, std::size_t alignment, std::size_t buffer_size); + static std::shared_ptr Open(void *metadata_buffer); + + BlockHandle Allocate(std::size_t size); + void Deallocate(BlockHandle index); + + std::size_t GetOffset(BlockHandle index); + + void PrintState(); + size_t GetNumFreeBlocks() const; + +private: + // A unique descriptor of the block. + class BlockDescriptor + { + public: + BlockDescriptor(); + BlockDescriptor(Offset offset, Size size, bool is_free); + + void Set(const Offset &offset, const Size &size, const bool &is_free); + + Offset GetOffset() const; + void SetOffset(const Offset &new_offset); + + Size GetSize() const; + void SetSize(const Size &new_size); + + bool IsFree() const; + void SetFree(const bool &is_free); + + private: + Offset m_Offset; + Size m_SizeAndFreeFlag; + + static constexpr Offset _FREE_FLAG = Offset(1) << 31; + }; + + // Check that the BlockDescriptor is lock-free atomic. + static_assert(std::atomic::is_always_lock_free); + + struct Block + { + std::atomic descriptor; + std::atomic next; + }; + + struct Header + { + std::uint8_t version[4]; + std::uint32_t max_num_blocks; + Size alignment; + Size total_buffer_size; + std::atomic head; + }; + + // Ensure a specific memory layout. + static_assert(offsetof(Header, version) == 0); + static_assert(offsetof(Header, max_num_blocks) == 4); + static_assert(offsetof(Header, alignment) == 8); + static_assert(offsetof(Header, total_buffer_size) == 12); + static_assert(offsetof(Header, head) == 16); + static_assert(sizeof(Header) == 20); + + FreeListAllocator(Header *header, std::shared_ptr block_allocator, Block *blocks); + + static void GetMemoryLayout(void *metadata_buffer, std::size_t max_num_blocks, void **block_allocator_memory, Block **blocks); + + Header &m_Header; + + std::uint32_t &m_MaxNumBlocks; + std::uint32_t &m_Alignment; + std::atomic &m_Head; + + std::shared_ptr m_BlockAllocator; + Block *m_Blocks; + + BlockHandle FindFirstFreeBlock(Size size); + + void InsertBlockSorted(BlockHandle index); + bool RemoveBlock(BlockHandle index); + + bool MarkBlockAsFree(BlockHandle index, bool mark_free); + + bool TryCoalesceBlocks(BlockHandle a, BlockHandle b, bool owner_of_a); +}; + +#endif // FREE_LIST_ALLOCATOR_H diff --git a/catkit_core/HashMap.h b/catkit_core/HashMap.h new file mode 100644 index 00000000..30b88ee9 --- /dev/null +++ b/catkit_core/HashMap.h @@ -0,0 +1,207 @@ +#ifndef HASH_MAP_H +#define HASH_MAP_H + +#include +#include +#include +#include + +// MurmurHash3 32-bit version +uint32_t murmurhash3(const std::string_view &key, uint32_t seed = 0) +{ + const uint8_t *data = reinterpret_cast(key.data()); + size_t len = key.size(); + + uint32_t h = seed; + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + // Partition in blocks of 4 bytes. + const size_t nblocks = len / 4; + const uint32_t* blocks = reinterpret_cast(data); + for (size_t i = 0; i < nblocks; i++) + { + uint32_t k = blocks[i]; + k *= c1; + k = (k << 15) | (k >> 17); + k *= c2; + + h ^= k; + h = (h << 13) | (h >> 19); + h = h * 5 + 0xe6546b64; + } + + // Process leftover bytes. + const uint8_t* tail = data + nblocks * 4; + uint32_t k1 = 0; + + switch (len & 3) + { + case 3: + k1 ^= tail[2] << 16; + case 2: + k1 ^= tail[1] << 8; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = (k1 << 15) | (k1 >> 17); + k1 *= c2; + h ^= k1; + case 0: + ; // Do nothing. + } + + h ^= len; + h ^= (h >> 16); + h *= 0x85ebca6b; + h ^= (h >> 13); + h *= 0xc2b2ae35; + h ^= (h >> 16); + + return h; +} + +// A hash map with the following limitations: +// * entries cannot be removed. +// * key is string type of fixed size. +template +class HashMap +{ +private: + enum EntryFlags : uint8_t + { + UNOCCUPIED = 0, + INITIALIZING = 1, + OCCUPIED = 2 + }; + + struct Entry + { + Value value; + + std::atomic flags = EntryFlags::UNOCCUPIED; + char key[MaxKeyLength]; + }; + + Entry *m_Data; + + size_t GetIndex(std::string_view key) const + { + return murmurhash3(key) % Size; + } + +public: + HashMap(void *buffer) + : m_Data(reinterpret_cast(buffer)) + { + } + + static std::size_t CalculateBufferSize() + { + return sizeof(Entry) * Size; + } + + void Initialize() + { + for (size_t i = 0; i < Size; ++i) + { + m_Data[i].flags = EntryFlags::UNOCCUPIED; + + std::fill(m_Data[i].key, m_Data[i].key + MaxKeyLength, '\0'); + } + } + + bool Insert(std::string_view key, const Value &value) + { + if (key.size() >= MaxKeyLength) + { + // Key is too long to fit in the fixed-size buffer. + return false; + } + + size_t index = GetIndex(key); + + for (size_t i = 0; i < Size; ++i) + { + size_t probe = (index + i) % Size; + + // Try to use this entry. + EntryFlags flags = EntryFlags::UNOCCUPIED; + + bool success = m_Data[probe].flags.compare_exchange_strong(flags, EntryFlags::INITIALIZING, std::memory_order_acq_rel); + + if (!success) + { + // The entry is either occupied or still initializing. + + // If this entry is still initializing, do a spin-wait until it's occupied. + // This should almost never be necessary and should only last a short while if it does. + while (flags == EntryFlags::INITIALIZING) + { + flags = m_Data[probe].flags.load(std::memory_order_acquire); + } + + if (flags == EntryFlags::OCCUPIED) + { + // Check if the key is our key. + if (key == m_Data[probe].key) + { + // Key already exists. + return false; + } + } + } + else + { + // Copy key, ensuring null-termination. + key.copy(m_Data[probe].key, key.size()); + m_Data[probe].key[key.size()] = '\0'; + + // Copy m_Data. + m_Data[probe].value = value; + + // Make occupied. + m_Data[probe].flags.store(EntryFlags::OCCUPIED, std::memory_order_release); + + return true; + } + } + + // Map is full. + return false; + } + + Value *Find(std::string_view key) const + { + if (key.size() >= MaxKeyLength) + { + // Key is too long to fit in the fixed-size buffer. + return nullptr; + } + + size_t index = GetIndex(key); + + for (size_t i = 0; i < Size; ++i) + { + size_t probe = (index + i) % Size; + + EntryFlags flags = m_Data[probe].flags.load(std::memory_order_acquire); + + if (flags == EntryFlags::OCCUPIED && key == m_Data[probe].key) + { + return &m_Data[probe].value; + } + + if (flags != EntryFlags::OCCUPIED) + { + // Key not found. + break; + } + } + + // Key not found. + return nullptr; + } +}; + +#endif // HASH_MAP_H diff --git a/catkit_core/PoolAllocator.cpp b/catkit_core/PoolAllocator.cpp new file mode 100644 index 00000000..715751d4 --- /dev/null +++ b/catkit_core/PoolAllocator.cpp @@ -0,0 +1,103 @@ +#include "PoolAllocator.h" + +#include + +const std::uint8_t VERSION[4] = {0, 0, 0, 0}; + +PoolAllocator::PoolAllocator(Header *header, std::atomic *next) + : m_Header(*header), + m_Next(next), + m_Capacity(m_Header.capacity), + m_Head(m_Header.head) +{ +} + +void PoolAllocator::GetMemoryLayout(void *metadata_buffer, std::atomic **next) +{ + *next = reinterpret_cast *>(static_cast(metadata_buffer) + sizeof(Header)); +} + +std::shared_ptr PoolAllocator::Create(void *metadata_buffer, std::uint32_t capacity) +{ + Header *header = static_cast(metadata_buffer); + + std::atomic *next; + GetMemoryLayout(metadata_buffer, &next); + + // Set version and capacity. + std::copy(VERSION, VERSION + sizeof(VERSION), header->version); + header->capacity = capacity; + + // Initialize the linked list. + header->head.store(0, std::memory_order_relaxed); + + for (std::size_t i = 0; i < capacity; ++i) + { + if (i == capacity - 1) + { + next[i] = INVALID_HANDLE; + } + else + { + next[i] = i + 1; + } + } + + return std::shared_ptr(new PoolAllocator(header, next)); +} + +std::shared_ptr PoolAllocator::Open(void *metadata_buffer) +{ + Header *header = static_cast(metadata_buffer); + + std::atomic *next; + GetMemoryLayout(metadata_buffer, &next); + + return std::shared_ptr(new PoolAllocator(header, next)); +} + +std::size_t PoolAllocator::CalculateMetadataBufferSize(std::uint32_t capacity) +{ + std::size_t size = sizeof(Header); + size += capacity * sizeof(std::atomic); + + return size; +} + +PoolAllocator::BlockHandle PoolAllocator::Allocate() +{ + BlockHandle head = m_Head.load(std::memory_order_relaxed); + BlockHandle next; + + // Pop the first element from the linked list. + do + { + // Check if the pool is empty. + if (head == INVALID_HANDLE) + { + return INVALID_HANDLE; + } + + next = m_Next[head].load(std::memory_order_relaxed); + } while (!m_Head.compare_exchange_weak(head, next)); + + // Return the popped element. + return head; +} + +void PoolAllocator::Deallocate(BlockHandle index) +{ + // Check if the element is within the pool bounds. + if (index >= m_Capacity) + { + return; + } + + BlockHandle head = m_Head.load(std::memory_order_relaxed);; + + // Push the element back on the front of the linked list. + do + { + m_Next[index] = head; + } while (!m_Head.compare_exchange_weak(head, index)); +} diff --git a/catkit_core/PoolAllocator.h b/catkit_core/PoolAllocator.h new file mode 100644 index 00000000..33ffa267 --- /dev/null +++ b/catkit_core/PoolAllocator.h @@ -0,0 +1,50 @@ +#ifndef POOL_ALLOCATOR_H +#define POOL_ALLOCATOR_H + +#include +#include +#include +#include +#include + +// A simple lock-free pool allocator. +class PoolAllocator +{ +public: + using BlockHandle = std::uint32_t; + static const BlockHandle INVALID_HANDLE = std::numeric_limits::max(); + + static std::size_t CalculateMetadataBufferSize(std::uint32_t capacity); + + static std::shared_ptr Create(void *metadata_buffer, std::uint32_t capacity); + static std::shared_ptr Open(void *metadata_buffer); + + BlockHandle Allocate(); + void Deallocate(BlockHandle index); + +private: + struct Header + { + std::uint8_t version[4]; + std::uint32_t capacity; + std::atomic head; + }; + + // Ensure a specific memory layout. + static_assert(offsetof(PoolAllocator::Header, version) == 0); + static_assert(offsetof(PoolAllocator::Header, capacity) == 4); + static_assert(offsetof(PoolAllocator::Header, head) == 8); + static_assert(sizeof(PoolAllocator::Header) == 12); + + PoolAllocator(Header *header, std::atomic *next); + + static void GetMemoryLayout(void *metadata_buffer, std::atomic **next); + + Header &m_Header; + + std::uint32_t &m_Capacity; + std::atomic &m_Head; + std::atomic *m_Next; +}; + +#endif // POOL_ALLOCATOR_H