Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

modify fill in-kernel to reduce paging back and forth #51

Merged
merged 2 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions include/hashinator/defaults.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,17 @@ constexpr int MAX_BLOCKSIZE = 1024;
template <typename T>
using DefaultHashFunction = HashFunctions::Fibonacci<T>;
} // namespace defaults

struct Info {
Info(){};
Info(int sz)
: sizePower(sz), fill(0), currentMaxBucketOverflow(defaults::BUCKET_OVERFLOW), tombstoneCounter(0),
err(status::invalid) {}
int sizePower;
size_t fill;
size_t currentMaxBucketOverflow;
size_t tombstoneCounter;
status err;
};

} // namespace Hashinator
8 changes: 4 additions & 4 deletions include/hashinator/hashers.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,20 +143,20 @@ class Hasher {

// Reset wrapper
static void reset(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* dst, const int sizePower,
size_t maxoverflow, size_t len, split_gpuStream_t s = 0) {
size_t maxoverflow, Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) {
size_t blocks, blockSize;
launchParams(len, blocks, blockSize);
Hashinator::Hashers::reset_to_empty<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE,
elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(src, dst, sizePower, maxoverflow, len);
<<<blocks, blockSize, 0, s>>>(src, dst, sizePower, maxoverflow, info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
}

// Reset wrapper for all elements
static void reset_all(hash_pair<KEY_TYPE, VAL_TYPE>* dst, size_t len, split_gpuStream_t s = 0) {
static void reset_all(hash_pair<KEY_TYPE, VAL_TYPE>* dst, Hashinator::Info* info,size_t len, split_gpuStream_t s = 0) {
size_t blocksNeeded = len / defaults::MAX_BLOCKSIZE;
blocksNeeded = blocksNeeded + (blocksNeeded == 0);
reset_all_to_empty<KEY_TYPE, VAL_TYPE, EMPTYBUCKET><<<blocksNeeded, defaults::MAX_BLOCKSIZE, 0, s>>>(dst, len);
reset_all_to_empty<KEY_TYPE, VAL_TYPE, EMPTYBUCKET><<<blocksNeeded, defaults::MAX_BLOCKSIZE, 0, s>>>(dst,info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
}

Expand Down
22 changes: 6 additions & 16 deletions include/hashinator/hashinator.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,7 @@ using DefaultMetaAllocator = split::split_host_allocator<T>;
#define DefaultHasher void
#endif

typedef struct Info {
Info(){};
Info(int sz)
: sizePower(sz), fill(0), currentMaxBucketOverflow(defaults::BUCKET_OVERFLOW), tombstoneCounter(0),
err(status::invalid) {}
int sizePower;
size_t fill;
size_t currentMaxBucketOverflow;
size_t tombstoneCounter;
status err;
} MapInfo;

using MapInfo = Hashinator::Info;
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max(),
KEY_TYPE TOMBSTONE = EMPTYBUCKET - 1, class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>,
class DeviceHasher = DefaultHasher, class Meta_Allocator = DefaultMetaAllocator<MapInfo>>
Expand Down Expand Up @@ -453,9 +442,10 @@ class Hashmap {
if (prefetches) {
buckets.optimizeGPU(s);
}
DeviceHasher::reset_all(buckets.data(), buckets.size(), s);
_mapInfo->fill = 0;
DeviceHasher::reset_all(buckets.data(),_mapInfo, buckets.size(), s);
#ifdef HASHINATOR_DEBUG
set_status((_mapInfo->fill == 0) ? success : fail);
#endif
break;

default:
Expand Down Expand Up @@ -1239,9 +1229,9 @@ class Hashmap {
}
// If we do have overflown elements we put them back in the buckets
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
DeviceHasher::reset(overflownElements, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,
DeviceHasher::reset(overflownElements, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,_mapInfo,
nOverflownElements, s);
_mapInfo->fill -= nOverflownElements;

DeviceHasher::insert(overflownElements, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,
&_mapInfo->currentMaxBucketOverflow, &_mapInfo->fill, nOverflownElements, &_mapInfo->err, s);

Expand Down
17 changes: 14 additions & 3 deletions include/hashinator/kernels_AMD.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* */
#pragma once

#include "include/hashinator/defaults.h"
namespace Hashinator {
namespace Hashers {

/*
* Resets all elements in dst to EMPTY, VAL_TYPE()
* */
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max()>
__global__ void reset_all_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* dst, const size_t len) {
__global__ void reset_all_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* dst, Hashinator::Info* info,const size_t len) {
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
// Early exit here
if (tid >= len) {
Expand All @@ -34,6 +34,11 @@ __global__ void reset_all_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* dst, const siz
if (dst[tid].first != EMPTYBUCKET) {
dst[tid].first = EMPTYBUCKET;
}

//Thread 0 resets fill
if (tid==0){
info->fill=0;
}
return;
}

Expand Down Expand Up @@ -61,7 +66,7 @@ template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::nume
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void reset_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* dst,
const int sizePower, size_t maxoverflow, size_t len)
const int sizePower, size_t maxoverflow, Hashinator::Info* info ,size_t len)

{
const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
Expand All @@ -74,6 +79,12 @@ __global__ void reset_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY
return;
}

//Thread 0 decrememnts fill by the total number of elements this kernel
//will remove
if (tid==0){
info->fill -= len;
}

uint64_t subwarp_relative_index = (wid) % (WARPSIZE / VIRTUALWARP);
uint64_t submask;
if constexpr (elementsPerWarp == 1) {
Expand Down
17 changes: 14 additions & 3 deletions include/hashinator/kernels_NVIDIA.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* */
#pragma once

#include "defaults.h"
namespace Hashinator {
namespace Hashers {

/*
* Resets all elements in dst to EMPTY, VAL_TYPE()
* */
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max()>
__global__ void reset_all_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* dst, const size_t len) {
__global__ void reset_all_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* dst, Hashinator::Info* info,const size_t len) {
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
// Early exit here
if (tid >= len) {
Expand All @@ -34,6 +34,11 @@ __global__ void reset_all_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* dst, const siz
if (dst[tid].first != EMPTYBUCKET) {
dst[tid].first = EMPTYBUCKET;
}

//Thread 0 resets fill
if (tid==0){
info->fill=0;
}
return;
}

Expand Down Expand Up @@ -61,7 +66,7 @@ template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::nume
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void reset_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* dst,
const int sizePower, size_t maxoverflow, size_t len)
const int sizePower, size_t maxoverflow,Hashinator::Info* info, size_t len)

{
const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
Expand All @@ -74,6 +79,12 @@ __global__ void reset_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY
return;
}

//Thread 0 decrememnts fill by the total number of elements this kernel
//will remove
if (tid==0){
info->fill -= len;
}

uint32_t subwarp_relative_index = (wid) % (WARPSIZE / VIRTUALWARP);
uint32_t submask;
if constexpr (elementsPerWarp == 1) {
Expand Down
Loading