kstppd · kstppd · Oct 13, 2023 · Oct 13, 2023 · Oct 13, 2023 · Oct 15, 2023
diff --git a/README.md b/README.md
@@ -157,3 +157,4 @@ Hashinator and SplitVector include a suite of unit tests using [googletest](http
 ## Credits for people who contributed but do not appear in the contribution list.
 + Special thanks to [Urs Ganse](https://github.com/ursg) for the initial CPU version of Hashinator.
 + Thanks to [Jaro Hokkanen](https://github.com/hokkanen). Hashinator's arch agnostic approach was inspired by his work.
+
diff --git a/include/common.h b/include/common.h
@@ -76,6 +76,8 @@ inline bool isDeviceAccessible(void* ptr){
  }
  return true;
 #endif
+ (void)ptr;
+ return false;
 }
 
 /**

diff --git a/include/hashinator/hashers.h b/include/hashinator/hashers.h
@@ -23,6 +23,7 @@
 #include "../splitvector/gpu_wrappers.h"
 #include "defaults.h"
 #include "hashfunctions.h"
+#include "hash_pair.h"
 #ifdef __NVCC__
 #include "kernels_NVIDIA.h"
 #endif
@@ -160,6 +161,68 @@ class Hasher {
  SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
  }
 
+
+ /* ----------------------------------- Members used by Hashinator::Unordered_Set -----------------------------------*/
+
+
+ static void insert_set(KEY_TYPE* keys, KEY_TYPE* buckets, int sizePower,size_t maxoverflow, size_t* d_overflow,
+ size_t* d_fill, size_t len, status* err,split_gpuStream_t s = 0) {
+ //Make sure this is being used by Unordered_Set
+ static_assert(std::is_same<void,VAL_TYPE>::value);
+ size_t blocks, blockSize;
+ *err = status::success;
+ launchParams(len, blocks, blockSize);
+ Hashinator::Hashers::insert_set_kernel<KEY_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE,elementsPerWarp>
+ <<<blocks, blockSize, 0, s>>>(keys, buckets, sizePower, maxoverflow, d_overflow, d_fill, len, err);
+ SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
+#ifndef NDEBUG
+ if (*err == status::fail) {
+ std::cerr << "***** Hashinator Runtime Warning ********" << std::endl;
+ std::cerr << "Warning: Hashmap completely overflown in Device Insert.\nNot all ellements were "
+ "inserted!\nConsider resizing before calling insert"
+ << std::endl;
+ std::cerr << "******************************" << std::endl;
+ }
+#endif
+ }
+
+ // Delete wrapper
+ static void erase_set(KEY_TYPE* keys, KEY_TYPE* buckets, size_t* d_tombstoneCounter, int sizePower,
+ size_t maxoverflow, size_t len, split_gpuStream_t s = 0) {
+
+ //Make sure this is being used by Unordered_Set
+ static_assert(std::is_same<void,VAL_TYPE>::value);
+ size_t blocks, blockSize;
+ launchParams(len, blocks, blockSize);
+ Hashinator::Hashers::delete_set_kernel<KEY_TYPE, EMPTYBUCKET, TOMBSTONE, HashFunction, defaults::WARPSIZE,
+ elementsPerWarp>
+ <<<blocks, blockSize, 0, s>>>(keys, buckets, d_tombstoneCounter, sizePower, maxoverflow, len);
+ SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
+ }
+
+ // Reset wrapper
+ static void reset_set(KEY_TYPE* src, KEY_TYPE* dst, const int sizePower,size_t maxoverflow,
+ size_t len, split_gpuStream_t s = 0) {
+ //Make sure this is being used by Unordered_Set
+ static_assert(std::is_same<void,VAL_TYPE>::value);
+ size_t blocks, blockSize;
+ launchParams(len, blocks, blockSize);
+ Hashinator::Hashers::reset_to_empty_set<KEY_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE,
+ elementsPerWarp>
+ <<<blocks, blockSize, 0, s>>>(src, dst, sizePower, maxoverflow, len);
+ SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
+ }
+ // Reset wrapper for all elements
+ static void reset_all_set(KEY_TYPE* dst, size_t len, split_gpuStream_t s = 0) {
+ //Make sure this is being used by Unordered_Set
+ static_assert(std::is_same<void,VAL_TYPE>::value);
+ size_t blocksNeeded = len / defaults::MAX_BLOCKSIZE;
+ blocksNeeded = blocksNeeded + (blocksNeeded == 0);
+ reset_all_to_empty_set<KEY_TYPE, EMPTYBUCKET><<<blocksNeeded, defaults::MAX_BLOCKSIZE, 0, s>>>(dst, len);
+ SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
+ }
+
+
 private:
  static void launchParams(size_t N, size_t& blocks, size_t& blockSize) {
  // fast ceil for positive ints

diff --git a/include/hashinator/hashinator.h → include/hashinator/hashmap.h b/include/hashinator/hashinator.h → include/hashinator/hashmap.h
@@ -1,4 +1,4 @@
-/* File: hashinator.h
+/* File: hashmap.h
  * Authors: Kostis Papadakis, Urs Ganse and Markus Battarbee (2023)
  * Description: A hybrid hashmap that can operate on both
  * CPUs and GPUs using CUDA unified memory.
@@ -99,7 +99,7 @@ class Hashmap {
 
  // Deallocates the bookeepping info and the device pointer
  void deallocate_device_handles() {
- if (device_map==nullptr){
+ if (device_map == nullptr) {
  return;
  }
 #ifndef HASHINATOR_CPU_ONLY_MODE
@@ -138,11 +138,11 @@ class Hashmap {
  Hashmap(Hashmap<KEY_TYPE, VAL_TYPE>&& other) {
  preallocate_device_handles();
  _mapInfo = other._mapInfo;
- other._mapInfo=nullptr;
+ other._mapInfo = nullptr;
  buckets = std::move(other.buckets);
  };
 
- Hashmap& operator=(const Hashmap<KEY_TYPE,VAL_TYPE>& other) {
+ Hashmap& operator=(const Hashmap<KEY_TYPE, VAL_TYPE>& other) {
  if (this == &other) {
  return *this;
  }
@@ -167,8 +167,8 @@ class Hashmap {
  }
  _metaAllocator.deallocate(_mapInfo, 1);
  _mapInfo = other._mapInfo;
- other._mapInfo=nullptr;
- buckets =std::move(other.buckets);
+ other._mapInfo = nullptr;
+ buckets = std::move(other.buckets);
  return *this;
  }
 
@@ -853,13 +853,13 @@ class Hashmap {
  if (w_tid == winner) {
  KEY_TYPE old = split::s_atomicCAS(&buckets[probingindex].first, EMPTYBUCKET, candidateKey);
  if (old == EMPTYBUCKET) {
- threadOverflow =(probingindex < optimalindex) ? (1 << sizePower) : (probingindex - optimalindex+1);
+ threadOverflow = (probingindex < optimalindex) ? (1 << sizePower) : (probingindex - optimalindex + 1);
  split::s_atomicExch(&buckets[probingindex].second, candidateVal);
  warpDone = 1;
  split::s_atomicAdd(&_mapInfo->fill, 1);
  if (threadOverflow > _mapInfo->currentMaxBucketOverflow) {
  split::s_atomicExch((unsigned long long*)(&_mapInfo->currentMaxBucketOverflow),
- (unsigned long long)nextOverflow(threadOverflow,defaults::WARPSIZE));
+ (unsigned long long)nextOverflow(threadOverflow, defaults::WARPSIZE));
  }
  } else if (old == candidateKey) {
  // Parallel stuff are fun. Major edge case!
@@ -937,14 +937,14 @@ class Hashmap {
  if (w_tid == winner) {
  KEY_TYPE old = split::s_atomicCAS(&buckets[probingindex].first, EMPTYBUCKET, candidateKey);
  if (old == EMPTYBUCKET) {
- threadOverflow = (probingindex < optimalindex) ? (1 << sizePower) : (probingindex - optimalindex+1);
+ threadOverflow = (probingindex < optimalindex) ? (1 << sizePower) : (probingindex - optimalindex + 1);
  split::s_atomicExch(&buckets[probingindex].second, candidateVal);
  warpDone = 1;
  localCount = 1;
  split::s_atomicAdd(&_mapInfo->fill, 1);
  if (threadOverflow > _mapInfo->currentMaxBucketOverflow) {
  split::s_atomicExch((unsigned long long*)(&_mapInfo->currentMaxBucketOverflow),
- (unsigned long long)nextOverflow(threadOverflow,defaults::WARPSIZE));
+ (unsigned long long)nextOverflow(threadOverflow, defaults::WARPSIZE));
  }
  } else if (old == candidateKey) {
  // Parallel stuff are fun. Major edge case!
@@ -1119,7 +1119,7 @@ class Hashmap {
  split::tools::splitStackArena mPool(memory_for_pool, s);
  size_t retval =
  split::tools::copy_if_raw<hash_pair<KEY_TYPE, VAL_TYPE>, Rule, defaults::MAX_BLOCKSIZE, defaults::WARPSIZE>(
- buckets, elements, rule, nBlocks, mPool, s);
+ buckets.data(), elements, buckets.size(), rule, nBlocks, mPool, s);
  return retval;
  }
  template <typename Rule>
@@ -1152,8 +1152,8 @@ class Hashmap {
  return elements.size();
  }
  template <typename Rule>
- size_t extractKeysByPattern(split::SplitVector<KEY_TYPE>& elements, Rule rule, void *stack, size_t max_size, split_gpuStream_t s = 0,
- bool prefetches = true) {
+ size_t extractKeysByPattern(split::SplitVector<KEY_TYPE>& elements, Rule rule, void* stack, size_t max_size,
+ split_gpuStream_t s = 0, bool prefetches = true) {
  elements.resize(_mapInfo->fill + 1, true);
  if (prefetches) {
  elements.optimizeGPU(s);
@@ -1177,7 +1177,8 @@ class Hashmap {
  };
  return extractKeysByPattern(elements, rule, s, prefetches);
  }
- size_t extractAllKeys(split::SplitVector<KEY_TYPE>& elements, void *stack, size_t max_size, split_gpuStream_t s = 0, bool prefetches = true) {
+ size_t extractAllKeys(split::SplitVector<KEY_TYPE>& elements, void* stack, size_t max_size, split_gpuStream_t s = 0,
+ bool prefetches = true) {
  // Extract all keys
  auto rule = [] __host__ __device__(const hash_pair<KEY_TYPE, VAL_TYPE>& kval) -> bool {
  return kval.first != EMPTYBUCKET && kval.first != TOMBSTONE;
@@ -1409,7 +1410,7 @@ class Hashmap {
 
  public:
  HASHINATOR_DEVICEONLY
- device_iterator(Hashmap<KEY_TYPE, VAL_TYPE>& hashtable, size_t index) : index(index),hashtable(&hashtable) {}
+ device_iterator(Hashmap<KEY_TYPE, VAL_TYPE>& hashtable, size_t index) : index(index), hashtable(&hashtable) {}
 
  HASHINATOR_DEVICEONLY
  size_t getIndex() { return index; }
@@ -1456,7 +1457,7 @@ class Hashmap {
  public:
  HASHINATOR_DEVICEONLY
  explicit const_device_iterator(const Hashmap<KEY_TYPE, VAL_TYPE>& hashtable, size_t index)
- : index(index), hashtable(&hashtable){}
+ : index(index), hashtable(&hashtable) {}
 
  HASHINATOR_DEVICEONLY
  size_t getIndex() { return index; }
@@ -1667,7 +1668,8 @@ class Hashmap {
  void set_element(const KEY_TYPE& key, VAL_TYPE val) {
  size_t thread_overflowLookup = 0;
  insert_element(key, val, thread_overflowLookup);
- atomicMax((unsigned long long*)&(_mapInfo->currentMaxBucketOverflow), nextOverflow(thread_overflowLookup,defaults::WARPSIZE/defaults::elementsPerWarp));
+ atomicMax((unsigned long long*)&(_mapInfo->currentMaxBucketOverflow),
+ nextOverflow(thread_overflowLookup, defaults::WARPSIZE / defaults::elementsPerWarp));
  }
 
  HASHINATOR_DEVICEONLY