Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pass info #55

Merged
merged 9 commits into from
May 30, 2024
61 changes: 30 additions & 31 deletions include/hashinator/hashers.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,18 @@ class Hasher {

public:
// Overload with separate input for keys and values.
static void insert(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t* d_overflow, size_t* d_fill, size_t len, status* err,
static void insert(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len,
split_gpuStream_t s = 0) {
size_t blocks, blockSize;
*err = status::success;
info->err = status::success;
launchParams(len, blocks, blockSize);
Hashinator::Hashers::insert_kernel<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE,
elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(keys, vals, buckets, sizePower, maxoverflow, d_overflow, d_fill, len, err);
<<<blocks, blockSize, 0, s>>>(keys, vals, buckets, info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
#ifndef NDEBUG
if (*err == status::fail) {
if (info->err == status::fail) {
std::cerr << "***** Hashinator Runtime Warning ********" << std::endl;
std::cerr << "Warning: Hashmap completely overflown in Device Insert.\nNot all ellements were "
"inserted!\nConsider resizing before calling insert"
Expand All @@ -65,17 +65,17 @@ class Hasher {
}

// Overload with input for keys only, using the index as the value
static void insertIndex(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower, size_t maxoverflow,
size_t* d_overflow, size_t* d_fill, size_t len, status* err, split_gpuStream_t s = 0) {
static void insertIndex(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, Hashinator::Info* info,
size_t len, split_gpuStream_t s = 0) {
size_t blocks, blockSize;
*err = status::success;
info->err = status::success;
launchParams(len, blocks, blockSize);
Hashinator::Hashers::insert_index_kernel<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE,
elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(keys, buckets, sizePower, maxoverflow, d_overflow, d_fill, len, err);
<<<blocks, blockSize, 0, s>>>(keys, buckets, info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
#ifndef NDEBUG
if (*err == status::fail) {
if (info->err == status::fail) {
std::cerr << "***** Hashinator Runtime Warning ********" << std::endl;
std::cerr << "Warning: Hashmap completely overflown in Device InsertIndex.\nNot all elements were "
"inserted!\nConsider resizing before calling insert"
Expand All @@ -87,18 +87,17 @@ class Hasher {

// Overload with hash_pair<key,val> (k,v) inputs
// Used by the tombstone cleaning method.
static void insert(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t* d_overflow, size_t* d_fill, size_t len, status* err,
split_gpuStream_t s = 0) {
static void insert(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) {
size_t blocks, blockSize;
*err = status::success;
info->err = status::success;
launchParams(len, blocks, blockSize);
Hashinator::Hashers::insert_kernel<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE,
elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(src, buckets, sizePower, maxoverflow, d_overflow, d_fill, len, err);
<<<blocks, blockSize, 0, s>>>(src, buckets, info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
#ifndef NDEBUG
if (*err == status::fail) {
if (info->err == status::fail) {
std::cerr << "***** Hashinator Runtime Warning ********" << std::endl;
std::cerr << "Warning: Hashmap completely overflown in Device Insert.\nNot all ellements were "
"inserted!\nConsider resizing before calling insert"
Expand All @@ -109,56 +108,56 @@ class Hasher {
}

// Retrieve wrapper
static void retrieve(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t len, split_gpuStream_t s = 0) {
static void retrieve(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) {

size_t blocks, blockSize;
launchParams(len, blocks, blockSize);
retrieve_kernel<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE, elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(keys, vals, buckets, sizePower, maxoverflow);
<<<blocks, blockSize, 0, s>>>(keys, vals, buckets, info);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
}

static void retrieve(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t len, split_gpuStream_t s = 0) {
static void retrieve(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) {

size_t blocks, blockSize;
launchParams(len, blocks, blockSize);
retrieve_kernel<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE, elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(src, buckets, sizePower, maxoverflow);
<<<blocks, blockSize, 0, s>>>(src, buckets, info);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
}

// Delete wrapper
static void erase(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, size_t* d_tombstoneCounter, int sizePower,
size_t maxoverflow, size_t len, split_gpuStream_t s = 0) {
static void erase(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) {

size_t blocks, blockSize;
launchParams(len, blocks, blockSize);
Hashinator::Hashers::delete_kernel<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, TOMBSTONE, HashFunction, defaults::WARPSIZE,
elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(keys, buckets, d_tombstoneCounter, sizePower, maxoverflow, len);
<<<blocks, blockSize, 0, s>>>(keys, buckets, info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
}

// Reset wrapper
static void reset(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* dst, const int sizePower,
size_t maxoverflow, Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) {
static void reset(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* dst,
Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) {
size_t blocks, blockSize;
launchParams(len, blocks, blockSize);
Hashinator::Hashers::reset_to_empty<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE,
elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(src, dst, sizePower, maxoverflow, info, len);
<<<blocks, blockSize, 0, s>>>(src, dst, info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
}

// Reset wrapper for all elements
static void reset_all(hash_pair<KEY_TYPE, VAL_TYPE>* dst, Hashinator::Info* info, size_t len,
split_gpuStream_t s = 0) {
size_t blocksNeeded = len / defaults::MAX_BLOCKSIZE;
blocksNeeded = blocksNeeded + (blocksNeeded == 0);
// fast ceil for positive ints
size_t blocksNeeded = len / defaults::MAX_BLOCKSIZE + (len % defaults::MAX_BLOCKSIZE != 0);
reset_all_to_empty<KEY_TYPE, VAL_TYPE, EMPTYBUCKET>
<<<blocksNeeded, defaults::MAX_BLOCKSIZE, 0, s>>>(dst, info, len);
<<<blocksNeeded, defaults::MAX_BLOCKSIZE, 0, s>>>(dst,info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
}

Expand Down
23 changes: 8 additions & 15 deletions include/hashinator/hashinator.h
Original file line number Diff line number Diff line change
Expand Up @@ -1281,11 +1281,9 @@ class Hashmap {
}
// If we do have overflown elements we put them back in the buckets
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
DeviceHasher::reset(overflownElements, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,
_mapInfo, nOverflownElements, s);
DeviceHasher::reset(overflownElements, buckets.data(), _mapInfo, nOverflownElements, s);

DeviceHasher::insert(overflownElements, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,
&_mapInfo->currentMaxBucketOverflow, &_mapInfo->fill, nOverflownElements, &_mapInfo->err, s);
DeviceHasher::insert(overflownElements, buckets.data(), _mapInfo, nOverflownElements, s);

SPLIT_CHECK_ERR(split_gpuFreeAsync(overflownElements, s));
return;
Expand All @@ -1308,8 +1306,7 @@ class Hashmap {
resize(neededPowerSize, targets::device, s);
}
_mapInfo->currentMaxBucketOverflow = _mapInfo->currentMaxBucketOverflow;
DeviceHasher::insert(keys, vals, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,
&_mapInfo->currentMaxBucketOverflow, &_mapInfo->fill, len, &_mapInfo->err, s);
DeviceHasher::insert(keys, vals, buckets.data(), _mapInfo, len, s);
return;
}

Expand All @@ -1330,8 +1327,7 @@ class Hashmap {
resize(neededPowerSize, targets::device, s);
}
_mapInfo->currentMaxBucketOverflow = _mapInfo->currentMaxBucketOverflow;
DeviceHasher::insertIndex(keys, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,
&_mapInfo->currentMaxBucketOverflow, &_mapInfo->fill, len, &_mapInfo->err, s);
DeviceHasher::insertIndex(keys, buckets.data(), _mapInfo, len, s);
return;
}

Expand All @@ -1350,8 +1346,7 @@ class Hashmap {
if (neededPowerSize > _mapInfo->sizePower) {
resize(neededPowerSize, targets::device, s);
}
DeviceHasher::insert(src, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,
&_mapInfo->currentMaxBucketOverflow, &_mapInfo->fill, len, &_mapInfo->err, s);
DeviceHasher::insert(src, buckets.data(), _mapInfo, len, s);
return;
}

Expand All @@ -1361,8 +1356,7 @@ class Hashmap {
if constexpr (prefetches) {
buckets.optimizeGPU(s);
}
DeviceHasher::retrieve(keys, vals, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow, len,
s);
DeviceHasher::retrieve(keys, vals, buckets.data(), _mapInfo, len, s);
return;
}

Expand All @@ -1372,7 +1366,7 @@ class Hashmap {
if constexpr (prefetches) {
buckets.optimizeGPU(s);
}
DeviceHasher::retrieve(src, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow, len, s);
DeviceHasher::retrieve(src, buckets.data(), _mapInfo, len, s);
return;
}

Expand All @@ -1384,8 +1378,7 @@ class Hashmap {
}
// Remember the last numeber of tombstones
size_t tbStore = tombstone_count();
DeviceHasher::erase(keys, buckets.data(), &_mapInfo->tombstoneCounter, _mapInfo->sizePower,
_mapInfo->currentMaxBucketOverflow, len, s);
DeviceHasher::erase(keys, buckets.data(), _mapInfo, len, s);
size_t tombstonesAdded = tombstone_count() - tbStore;
// Fill should be decremented by the number of tombstones added;
_mapInfo->fill -= tombstonesAdded;
Expand Down
52 changes: 38 additions & 14 deletions include/hashinator/kernels_AMD.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,15 @@ template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::nume
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void reset_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* dst,
const int sizePower, size_t maxoverflow, Hashinator::Info* info ,size_t len)
Hashinator::Info* info ,size_t len)

{
const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const size_t wid = tid / VIRTUALWARP;
const size_t w_tid = tid % VIRTUALWARP;
const int sizePower = info->sizePower;
//const size_t maxoverflow = info->currentMaxBucketOverflow;

// Early quit if we have more warps than elements to insert
if (wid >= len) {
Expand Down Expand Up @@ -134,9 +136,14 @@ __global__ void reset_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max(),
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void insert_kernel(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t* d_overflow, size_t* d_fill, size_t len, status* err) {

__global__ void insert_kernel(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len) {

size_t* d_overflow = &(info->currentMaxBucketOverflow);
size_t* d_fill = &(info->fill);
//status* err = &(info->err);
const int sizePower = info->sizePower;
//const size_t maxoverflow = info->currentMaxBucketOverflow;
const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const size_t wid = tid / VIRTUALWARP;
Expand Down Expand Up @@ -265,9 +272,13 @@ __global__ void insert_kernel(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max(),
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void insert_kernel(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t* d_overflow, size_t* d_fill, size_t len, status* err) {
__global__ void insert_kernel(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len) {

const int sizePower = info->sizePower;
size_t* d_overflow = &(info->currentMaxBucketOverflow);
size_t* d_fill = &(info->fill);
//status* err = &(info->err);
const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const size_t wid = tid / VIRTUALWARP;
Expand Down Expand Up @@ -401,8 +412,8 @@ __global__ void insert_kernel(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max(),
KEY_TYPE TOMBSTONE = EMPTYBUCKET - 1, class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>,
int WARPSIZE = defaults::WARPSIZE, int elementsPerWarp>
__global__ void delete_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, size_t* d_tombstoneCounter,
int sizePower, size_t maxoverflow, size_t len) {
__global__ void delete_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len) {

const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
Expand All @@ -411,6 +422,10 @@ __global__ void delete_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buc
const size_t proper_w_tid = tid % WARPSIZE; // the proper WID as if we had no Virtual warps
const size_t proper_wid = tid / WARPSIZE;
const size_t blockWid = proper_wid % (WARPSIZE / 4); // we have twice the warpsize and half the warps per block
const int sizePower = info->sizePower;
const size_t maxoverflow = info->currentMaxBucketOverflow;
size_t* d_tombstoneCounter = &(info->tombstoneCounter);
//status* err = &(info->err);

__shared__ uint32_t deleteMask[WARPSIZE / 2];

Expand Down Expand Up @@ -492,9 +507,14 @@ __global__ void delete_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buc
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max(),
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void insert_index_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t* d_overflow, size_t* d_fill, size_t len, status* err) {

__global__ void insert_index_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len) {

size_t* d_overflow = &(info->currentMaxBucketOverflow);
size_t* d_fill = &(info->fill);
//status* err = &(info->err);
const int sizePower = info->sizePower;
//const size_t maxoverflow = info->currentMaxBucketOverflow;
const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const size_t wid = tid / VIRTUALWARP;
Expand Down Expand Up @@ -627,13 +647,15 @@ __global__ void insert_index_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max(),
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void retrieve_kernel(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow) {
__global__ void retrieve_kernel(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info) {

const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const size_t wid = tid / VIRTUALWARP;
const size_t w_tid = tid % VIRTUALWARP;
const int sizePower = info->sizePower;
const size_t maxoverflow = info->currentMaxBucketOverflow;

uint64_t subwarp_relative_index = (wid) % (WARPSIZE / VIRTUALWARP);
uint64_t submask;
Expand Down Expand Up @@ -682,12 +704,14 @@ template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::nume
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void retrieve_kernel(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
int sizePower, size_t maxoverflow) {
Hashinator::Info* info) {

const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const size_t wid = tid / VIRTUALWARP;
const size_t w_tid = tid % VIRTUALWARP;
const int sizePower = info->sizePower;
const size_t maxoverflow = info->currentMaxBucketOverflow;

uint64_t subwarp_relative_index = (wid) % (WARPSIZE / VIRTUALWARP);
uint64_t submask;
Expand Down
Loading
Loading