From 2400e5ae96400c1631ecd6ddfcbdf5f6eae6cc91 Mon Sep 17 00:00:00 2001 From: kstppd Date: Thu, 11 Apr 2024 09:52:46 +0300 Subject: [PATCH 1/2] modify fill in-kernel to reduce paging back and forth --- include/hashinator/defaults.h | 13 +++++++++++++ include/hashinator/hashers.h | 8 ++++---- include/hashinator/hashinator.h | 19 ++++--------------- include/hashinator/kernels_AMD.h | 17 ++++++++++++++--- include/hashinator/kernels_NVIDIA.h | 17 ++++++++++++++--- 5 files changed, 49 insertions(+), 25 deletions(-) diff --git a/include/hashinator/defaults.h b/include/hashinator/defaults.h index 5e0fb52..173bfcf 100644 --- a/include/hashinator/defaults.h +++ b/include/hashinator/defaults.h @@ -40,4 +40,17 @@ constexpr int MAX_BLOCKSIZE = 1024; template using DefaultHashFunction = HashFunctions::Fibonacci; } // namespace defaults + +struct Info { + Info(){}; + Info(int sz) + : sizePower(sz), fill(0), currentMaxBucketOverflow(defaults::BUCKET_OVERFLOW), tombstoneCounter(0), + err(status::invalid) {} + int sizePower; + size_t fill; + size_t currentMaxBucketOverflow; + size_t tombstoneCounter; + status err; +}; + } // namespace Hashinator diff --git a/include/hashinator/hashers.h b/include/hashinator/hashers.h index f7552be..f28dc78 100644 --- a/include/hashinator/hashers.h +++ b/include/hashinator/hashers.h @@ -143,20 +143,20 @@ class Hasher { // Reset wrapper static void reset(hash_pair* src, hash_pair* dst, const int sizePower, - size_t maxoverflow, size_t len, split_gpuStream_t s = 0) { + size_t maxoverflow, Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) { size_t blocks, blockSize; launchParams(len, blocks, blockSize); Hashinator::Hashers::reset_to_empty - <<>>(src, dst, sizePower, maxoverflow, len); + <<>>(src, dst, sizePower, maxoverflow, info, len); SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s)); } // Reset wrapper for all elements - static void reset_all(hash_pair* dst, size_t len, split_gpuStream_t s = 0) { + static void reset_all(hash_pair* dst, Hashinator::Info* info,size_t len, split_gpuStream_t s = 0) { size_t blocksNeeded = len / defaults::MAX_BLOCKSIZE; blocksNeeded = blocksNeeded + (blocksNeeded == 0); - reset_all_to_empty<<>>(dst, len); + reset_all_to_empty<<>>(dst,info, len); SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s)); } diff --git a/include/hashinator/hashinator.h b/include/hashinator/hashinator.h index f65e71f..d432390 100644 --- a/include/hashinator/hashinator.h +++ b/include/hashinator/hashinator.h @@ -54,18 +54,7 @@ using DefaultMetaAllocator = split::split_host_allocator; #define DefaultHasher void #endif -typedef struct Info { - Info(){}; - Info(int sz) - : sizePower(sz), fill(0), currentMaxBucketOverflow(defaults::BUCKET_OVERFLOW), tombstoneCounter(0), - err(status::invalid) {} - int sizePower; - size_t fill; - size_t currentMaxBucketOverflow; - size_t tombstoneCounter; - status err; -} MapInfo; - +using MapInfo = Hashinator::Info; template ::max(), KEY_TYPE TOMBSTONE = EMPTYBUCKET - 1, class HashFunction = HashFunctions::Fibonacci, class DeviceHasher = DefaultHasher, class Meta_Allocator = DefaultMetaAllocator> @@ -453,7 +442,7 @@ class Hashmap { if (prefetches) { buckets.optimizeGPU(s); } - DeviceHasher::reset_all(buckets.data(), buckets.size(), s); + DeviceHasher::reset_all(buckets.data(),_mapInfo, buckets.size(), s); _mapInfo->fill = 0; set_status((_mapInfo->fill == 0) ? success : fail); break; @@ -1239,9 +1228,9 @@ class Hashmap { } // If we do have overflown elements we put them back in the buckets SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s)); - DeviceHasher::reset(overflownElements, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow, + DeviceHasher::reset(overflownElements, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,_mapInfo, nOverflownElements, s); - _mapInfo->fill -= nOverflownElements; + DeviceHasher::insert(overflownElements, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow, &_mapInfo->currentMaxBucketOverflow, &_mapInfo->fill, nOverflownElements, &_mapInfo->err, s); diff --git a/include/hashinator/kernels_AMD.h b/include/hashinator/kernels_AMD.h index fc46534..b55dc5f 100644 --- a/include/hashinator/kernels_AMD.h +++ b/include/hashinator/kernels_AMD.h @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * */ #pragma once - +#include "include/hashinator/defaults.h" namespace Hashinator { namespace Hashers { @@ -24,7 +24,7 @@ namespace Hashers { * Resets all elements in dst to EMPTY, VAL_TYPE() * */ template ::max()> -__global__ void reset_all_to_empty(hash_pair* dst, const size_t len) { +__global__ void reset_all_to_empty(hash_pair* dst, Hashinator::Info* info,const size_t len) { const size_t tid = threadIdx.x + blockIdx.x * blockDim.x; // Early exit here if (tid >= len) { @@ -34,6 +34,11 @@ __global__ void reset_all_to_empty(hash_pair* dst, const siz if (dst[tid].first != EMPTYBUCKET) { dst[tid].first = EMPTYBUCKET; } + + //Thread 0 resets fill + if (tid==0){ + info->fill=0; + } return; } @@ -61,7 +66,7 @@ template , int WARPSIZE = defaults::WARPSIZE, int elementsPerWarp> __global__ void reset_to_empty(hash_pair* src, hash_pair* dst, - const int sizePower, size_t maxoverflow, size_t len) + const int sizePower, size_t maxoverflow, Hashinator::Info* info ,size_t len) { const int VIRTUALWARP = WARPSIZE / elementsPerWarp; @@ -74,6 +79,12 @@ __global__ void reset_to_empty(hash_pair* src, hash_pairfill -= len; + } + uint64_t subwarp_relative_index = (wid) % (WARPSIZE / VIRTUALWARP); uint64_t submask; if constexpr (elementsPerWarp == 1) { diff --git a/include/hashinator/kernels_NVIDIA.h b/include/hashinator/kernels_NVIDIA.h index 4f009c5..2be6e2e 100644 --- a/include/hashinator/kernels_NVIDIA.h +++ b/include/hashinator/kernels_NVIDIA.h @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * */ #pragma once - +#include "defaults.h" namespace Hashinator { namespace Hashers { @@ -24,7 +24,7 @@ namespace Hashers { * Resets all elements in dst to EMPTY, VAL_TYPE() * */ template ::max()> -__global__ void reset_all_to_empty(hash_pair* dst, const size_t len) { +__global__ void reset_all_to_empty(hash_pair* dst, Hashinator::Info* info,const size_t len) { const size_t tid = threadIdx.x + blockIdx.x * blockDim.x; // Early exit here if (tid >= len) { @@ -34,6 +34,11 @@ __global__ void reset_all_to_empty(hash_pair* dst, const siz if (dst[tid].first != EMPTYBUCKET) { dst[tid].first = EMPTYBUCKET; } + + //Thread 0 resets fill + if (tid==0){ + info->fill=0; + } return; } @@ -61,7 +66,7 @@ template , int WARPSIZE = defaults::WARPSIZE, int elementsPerWarp> __global__ void reset_to_empty(hash_pair* src, hash_pair* dst, - const int sizePower, size_t maxoverflow, size_t len) + const int sizePower, size_t maxoverflow,Hashinator::Info* info, size_t len) { const int VIRTUALWARP = WARPSIZE / elementsPerWarp; @@ -74,6 +79,12 @@ __global__ void reset_to_empty(hash_pair* src, hash_pairfill -= len; + } + uint32_t subwarp_relative_index = (wid) % (WARPSIZE / VIRTUALWARP); uint32_t submask; if constexpr (elementsPerWarp == 1) { From aa8daa59f5b93690ace9654a66302c7a35384959 Mon Sep 17 00:00:00 2001 From: kstppd Date: Thu, 11 Apr 2024 10:24:01 +0300 Subject: [PATCH 2/2] delete forgotten fill reset, keep check only if in debug mode --- include/hashinator/hashinator.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/hashinator/hashinator.h b/include/hashinator/hashinator.h index d432390..85f1044 100644 --- a/include/hashinator/hashinator.h +++ b/include/hashinator/hashinator.h @@ -443,8 +443,9 @@ class Hashmap { buckets.optimizeGPU(s); } DeviceHasher::reset_all(buckets.data(),_mapInfo, buckets.size(), s); - _mapInfo->fill = 0; + #ifdef HASHINATOR_DEBUG set_status((_mapInfo->fill == 0) ? success : fail); + #endif break; default: