kstppd · kstppd · Oct 13, 2023 · Oct 13, 2023 · Oct 13, 2023 · Oct 15, 2023
diff --git a/include/common.h b/include/common.h
@@ -50,10 +50,9 @@ constexpr inline size_t nextPow2(size_t v) noexcept {
  * @brief Computes the next optimal overflow for the hasher kernels
  */
 HASHINATOR_HOSTDEVICE
-[[nodiscard]]
-constexpr inline size_t nextOverflow(size_t currentOverflow, size_t virtualWarp) noexcept {
- size_t remainder = currentOverflow % virtualWarp;
- return ((remainder)==0)?currentOverflow: currentOverflow + (virtualWarp - remainder);
+[[nodiscard]] constexpr inline size_t nextOverflow(size_t currentOverflow, size_t virtualWarp) noexcept {
+ size_t remainder = currentOverflow % virtualWarp;
+ return ((remainder) == 0) ? currentOverflow : currentOverflow + (virtualWarp - remainder);
 }
 
 /**

diff --git a/include/hashinator/hashers.h b/include/hashinator/hashers.h
@@ -23,6 +23,7 @@
 #include "../splitvector/gpu_wrappers.h"
 #include "defaults.h"
 #include "hashfunctions.h"
+#include "hash_pair.h"
 #ifdef __NVCC__
 #include "kernels_NVIDIA.h"
 #endif
@@ -160,6 +161,68 @@ class Hasher {
  SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
  }
 
+
+ /* ----------------------------------- Members used by Hashinator::Unordered_Set -----------------------------------*/
+
+
+ static void insert_set(KEY_TYPE* keys, KEY_TYPE* buckets, int sizePower,size_t maxoverflow, size_t* d_overflow,
+ size_t* d_fill, size_t len, status* err,split_gpuStream_t s = 0) {
+ //Make sure this is being used by Unordered_Set
+ static_assert(std::is_same<void,VAL_TYPE>::value);
+ size_t blocks, blockSize;
+ *err = status::success;
+ launchParams(len, blocks, blockSize);
+ Hashinator::Hashers::insert_set_kernel<KEY_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE,elementsPerWarp>
+ <<<blocks, blockSize, 0, s>>>(keys, buckets, sizePower, maxoverflow, d_overflow, d_fill, len, err);
+ SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
+#ifndef NDEBUG
+ if (*err == status::fail) {
+ std::cerr << "***** Hashinator Runtime Warning ********" << std::endl;
+ std::cerr << "Warning: Hashmap completely overflown in Device Insert.\nNot all ellements were "
+ "inserted!\nConsider resizing before calling insert"
+ << std::endl;
+ std::cerr << "******************************" << std::endl;
+ }
+#endif
+ }
+
+ // Delete wrapper
+ static void erase_set(KEY_TYPE* keys, KEY_TYPE* buckets, size_t* d_tombstoneCounter, int sizePower,
+ size_t maxoverflow, size_t len, split_gpuStream_t s = 0) {
+
+ //Make sure this is being used by Unordered_Set
+ static_assert(std::is_same<void,VAL_TYPE>::value);
+ size_t blocks, blockSize;
+ launchParams(len, blocks, blockSize);
+ Hashinator::Hashers::delete_set_kernel<KEY_TYPE, EMPTYBUCKET, TOMBSTONE, HashFunction, defaults::WARPSIZE,
+ elementsPerWarp>
+ <<<blocks, blockSize, 0, s>>>(keys, buckets, d_tombstoneCounter, sizePower, maxoverflow, len);
+ SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
+ }
+
+ // Reset wrapper
+ static void reset_set(KEY_TYPE* src, KEY_TYPE* dst, const int sizePower,size_t maxoverflow,
+ size_t len, split_gpuStream_t s = 0) {
+ //Make sure this is being used by Unordered_Set
+ static_assert(std::is_same<void,VAL_TYPE>::value);
+ size_t blocks, blockSize;
+ launchParams(len, blocks, blockSize);
+ Hashinator::Hashers::reset_to_empty_set<KEY_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE,
+ elementsPerWarp>
+ <<<blocks, blockSize, 0, s>>>(src, dst, sizePower, maxoverflow, len);
+ SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
+ }
+ // Reset wrapper for all elements
+ static void reset_all_set(KEY_TYPE* dst, size_t len, split_gpuStream_t s = 0) {
+ //Make sure this is being used by Unordered_Set
+ static_assert(std::is_same<void,VAL_TYPE>::value);
+ size_t blocksNeeded = len / defaults::MAX_BLOCKSIZE;
+ blocksNeeded = blocksNeeded + (blocksNeeded == 0);
+ reset_all_to_empty_set<KEY_TYPE, EMPTYBUCKET><<<blocksNeeded, defaults::MAX_BLOCKSIZE, 0, s>>>(dst, len);
+ SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
+ }
+
+
 private:
  static void launchParams(size_t N, size_t& blocks, size_t& blockSize) {
  // fast ceil for positive ints

diff --git a/include/hashinator/hashinator.h → include/hashinator/hashmap/hashmap.h b/include/hashinator/hashinator.h → include/hashinator/hashmap/hashmap.h
@@ -1,4 +1,4 @@
-/* File: hashinator.h
+/* File: hashmap.h
  * Authors: Kostis Papadakis, Urs Ganse and Markus Battarbee (2023)
  * Description: A hybrid hashmap that can operate on both
  * CPUs and GPUs using CUDA unified memory.
@@ -24,20 +24,20 @@
 #ifdef HASHINATOR_CPU_ONLY_MODE
 #define SPLIT_CPU_ONLY_MODE
 #endif
-#include "../common.h"
-#include "../splitvector/gpu_wrappers.h"
-#include "../splitvector/split_allocators.h"
-#include "../splitvector/splitvec.h"
-#include "defaults.h"
-#include "hash_pair.h"
-#include "hashfunctions.h"
+#include "../../common.h"
+#include "../../splitvector/gpu_wrappers.h"
+#include "../../splitvector/split_allocators.h"
+#include "../../splitvector/splitvec.h"
+#include "../defaults.h"
+#include "../hash_pair.h"
+#include "../hashfunctions.h"
 #include <algorithm>
 #include <cassert>
 #include <limits>
 #include <stdexcept>
 #ifndef HASHINATOR_CPU_ONLY_MODE
-#include "../splitvector/split_tools.h"
-#include "hashers.h"
+#include "../../splitvector/split_tools.h"
+#include "../hashers.h"
 #endif
 
 namespace Hashinator {
@@ -99,7 +99,7 @@ class Hashmap {
 
  // Deallocates the bookeepping info and the device pointer
  void deallocate_device_handles() {
- if (device_map==nullptr){
+ if (device_map == nullptr) {
  return;
  }
 #ifndef HASHINATOR_CPU_ONLY_MODE
@@ -138,11 +138,11 @@ class Hashmap {
  Hashmap(Hashmap<KEY_TYPE, VAL_TYPE>&& other) {
  preallocate_device_handles();
  _mapInfo = other._mapInfo;
- other._mapInfo=nullptr;
+ other._mapInfo = nullptr;
  buckets = std::move(other.buckets);
  };
 
- Hashmap& operator=(const Hashmap<KEY_TYPE,VAL_TYPE>& other) {
+ Hashmap& operator=(const Hashmap<KEY_TYPE, VAL_TYPE>& other) {
  if (this == &other) {
  return *this;
  }
@@ -167,8 +167,8 @@ class Hashmap {
  }
  _metaAllocator.deallocate(_mapInfo, 1);
  _mapInfo = other._mapInfo;
- other._mapInfo=nullptr;
- buckets =std::move(other.buckets);
+ other._mapInfo = nullptr;
+ buckets = std::move(other.buckets);
  return *this;
  }
 
@@ -847,13 +847,13 @@ class Hashmap {
  if (w_tid == winner) {
  KEY_TYPE old = split::s_atomicCAS(&buckets[probingindex].first, EMPTYBUCKET, candidateKey);
  if (old == EMPTYBUCKET) {
- threadOverflow =(probingindex < optimalindex) ? (1 << sizePower) : (probingindex - optimalindex+1);
+ threadOverflow = (probingindex < optimalindex) ? (1 << sizePower) : (probingindex - optimalindex + 1);
  split::s_atomicExch(&buckets[probingindex].second, candidateVal);
  warpDone = 1;
  split::s_atomicAdd(&_mapInfo->fill, 1);
  if (threadOverflow > _mapInfo->currentMaxBucketOverflow) {
  split::s_atomicExch((unsigned long long*)(&_mapInfo->currentMaxBucketOverflow),
- (unsigned long long)nextOverflow(threadOverflow,defaults::WARPSIZE));
+ (unsigned long long)nextOverflow(threadOverflow, defaults::WARPSIZE));
  }
  } else if (old == candidateKey) {
  // Parallel stuff are fun. Major edge case!
@@ -931,14 +931,14 @@ class Hashmap {
  if (w_tid == winner) {
  KEY_TYPE old = split::s_atomicCAS(&buckets[probingindex].first, EMPTYBUCKET, candidateKey);
  if (old == EMPTYBUCKET) {
- threadOverflow = (probingindex < optimalindex) ? (1 << sizePower) : (probingindex - optimalindex+1);
+ threadOverflow = (probingindex < optimalindex) ? (1 << sizePower) : (probingindex - optimalindex + 1);
  split::s_atomicExch(&buckets[probingindex].second, candidateVal);
  warpDone = 1;
  localCount = 1;
  split::s_atomicAdd(&_mapInfo->fill, 1);
  if (threadOverflow > _mapInfo->currentMaxBucketOverflow) {
  split::s_atomicExch((unsigned long long*)(&_mapInfo->currentMaxBucketOverflow),
- (unsigned long long)nextOverflow(threadOverflow,defaults::WARPSIZE));
+ (unsigned long long)nextOverflow(threadOverflow, defaults::WARPSIZE));
  }
  } else if (old == candidateKey) {
  // Parallel stuff are fun. Major edge case!
@@ -1113,7 +1113,7 @@ class Hashmap {
  split::tools::Cuda_mempool mPool(memory_for_pool, s);
  size_t retval =
  split::tools::copy_if_raw<hash_pair<KEY_TYPE, VAL_TYPE>, Rule, defaults::MAX_BLOCKSIZE, defaults::WARPSIZE>(
- buckets, elements, rule, nBlocks, mPool, s);
+ buckets.data(), elements, buckets.size(), rule, nBlocks, mPool, s);
  return retval;
  }
 
@@ -1133,8 +1133,8 @@ class Hashmap {
  return elements.size();
  }
  template <typename Rule>
- size_t extractKeysByPattern(split::SplitVector<KEY_TYPE>& elements, Rule rule, void *stack, size_t max_size, split_gpuStream_t s = 0,
- bool prefetches = true) {
+ size_t extractKeysByPattern(split::SplitVector<KEY_TYPE>& elements, Rule rule, void* stack, size_t max_size,
+ split_gpuStream_t s = 0, bool prefetches = true) {
  elements.resize(_mapInfo->fill + 1, true);
  if (prefetches) {
  elements.optimizeGPU(s);
@@ -1152,7 +1152,8 @@ class Hashmap {
  };
  return extractKeysByPattern(elements, rule, s, prefetches);
  }
- size_t extractAllKeys(split::SplitVector<KEY_TYPE>& elements, void *stack, size_t max_size, split_gpuStream_t s = 0, bool prefetches = true) {
+ size_t extractAllKeys(split::SplitVector<KEY_TYPE>& elements, void* stack, size_t max_size, split_gpuStream_t s = 0,
+ bool prefetches = true) {
  // Extract all keys
  auto rule = [] __host__ __device__(const hash_pair<KEY_TYPE, VAL_TYPE>& kval) -> bool {
  return kval.first != EMPTYBUCKET && kval.first != TOMBSTONE;
@@ -1371,7 +1372,7 @@ class Hashmap {
 
  public:
  HASHINATOR_DEVICEONLY
- device_iterator(Hashmap<KEY_TYPE, VAL_TYPE>& hashtable, size_t index) : index(index),hashtable(&hashtable) {}
+ device_iterator(Hashmap<KEY_TYPE, VAL_TYPE>& hashtable, size_t index) : index(index), hashtable(&hashtable) {}
 
  HASHINATOR_DEVICEONLY
  size_t getIndex() { return index; }
@@ -1418,7 +1419,7 @@ class Hashmap {
  public:
  HASHINATOR_DEVICEONLY
  explicit const_device_iterator(const Hashmap<KEY_TYPE, VAL_TYPE>& hashtable, size_t index)
- : index(index), hashtable(&hashtable){}
+ : index(index), hashtable(&hashtable) {}
 
  HASHINATOR_DEVICEONLY
  size_t getIndex() { return index; }
@@ -1629,7 +1630,8 @@ class Hashmap {
  void set_element(const KEY_TYPE& key, VAL_TYPE val) {
  size_t thread_overflowLookup = 0;
  insert_element(key, val, thread_overflowLookup);
- atomicMax((unsigned long long*)&(_mapInfo->currentMaxBucketOverflow), nextOverflow(thread_overflowLookup,defaults::WARPSIZE/defaults::elementsPerWarp));
+ atomicMax((unsigned long long*)&(_mapInfo->currentMaxBucketOverflow),
+ nextOverflow(thread_overflowLookup, defaults::WARPSIZE / defaults::elementsPerWarp));
  }
 
  HASHINATOR_DEVICEONLY