Skip to content

Commit

Permalink
Merge pull request #26 from psychocoderHPC/fix-hipWarpSize
Browse files Browse the repository at this point in the history
support for any HIP compiler
  • Loading branch information
kstppd authored Oct 4, 2023
2 parents 078890d + 5a5a160 commit b7f3f5a
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 17 deletions.
9 changes: 6 additions & 3 deletions include/hashinator/defaults.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,16 @@ namespace defaults {
#ifdef __NVCC__
constexpr int WARPSIZE = 32;
constexpr int BUCKET_OVERFLOW = 32;
#elif (__HIP__ && __AMDGCN_WAVEFRONT_SIZE)
// AMDs GPU warp size depends on the GPU architecture
constexpr int WARPSIZE = __AMDGCN_WAVEFRONT_SIZE;
constexpr int BUCKET_OVERFLOW = __AMDGCN_WAVEFRONT_SIZE;
#else
constexpr int WARPSIZE = 64;
constexpr int BUCKET_OVERFLOW = 64;
#error "Warp size not known, please use a CUDA or HIP compiler."
#endif
constexpr int elementsPerWarp = 1;
constexpr int MAX_BLOCKSIZE = 1024;
template <typename T>
using DefaultHashFunction = HashFunctions::Fibonacci<T>;
} // namespace defaults
} // namespace Hashinator
} // namespace Hashinator
8 changes: 4 additions & 4 deletions include/hashinator/hashers.h
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ __global__ void insert_index_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE
}

#endif
#ifdef __HIP_PLATFORM_HCC___
#ifdef __HIP__

/*
* Resets all elements pointed by src to EMPTY in dst
Expand Down Expand Up @@ -1103,7 +1103,7 @@ __global__ void retrieve_kernel(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TY
VIRTUALWARP * subwarp_relative_index + VIRTUALWARP);
}
#endif
#ifdef __HIP_PLATFORM_HCC___
#ifdef __HIP__
uint64_t subwarp_relative_index = (wid) % (WARPSIZE / VIRTUALWARP);
uint64_t submask;
if constexpr (elementsPerWarp == 1) {
Expand Down Expand Up @@ -1168,7 +1168,7 @@ __global__ void retrieve_kernel(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KE
VIRTUALWARP * subwarp_relative_index + VIRTUALWARP);
}
#endif
#ifdef __HIP_PLATFORM_HCC___
#ifdef __HIP__
uint64_t subwarp_relative_index = (wid) % (WARPSIZE / VIRTUALWARP);
uint64_t submask;
if constexpr (elementsPerWarp == 1) {
Expand Down Expand Up @@ -1239,7 +1239,7 @@ __global__ void delete_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buc
VIRTUALWARP * subwarp_relative_index + VIRTUALWARP);
}
#endif
#ifdef __HIP_PLATFORM_HCC___
#ifdef __HIP__
uint64_t subwarp_relative_index = (wid) % (WARPSIZE / VIRTUALWARP);
uint64_t submask;
if constexpr (elementsPerWarp == 1) {
Expand Down
2 changes: 1 addition & 1 deletion include/splitvector/archMacros.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
#define split_gpuMemoryAdvise cudaMemoryAdvise
#define split_gpuMemAdvise cudaMemAdvise

#elif __HIP_PLATFORM_HCC___
#elif __HIP__

#define split_gpuGetLastError hipGetLastError
#define split_gpuGetErrorString hipGetErrorString
Expand Down
14 changes: 7 additions & 7 deletions include/splitvector/gpu_wrappers.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

#ifdef __NVCC__
#include <cuda_runtime_api.h>
#else
#elifdef __HIP__
#include <hip/hip_runtime.h>
#include <hip/hip_runtime_api.h>
#endif
Expand Down Expand Up @@ -154,7 +154,7 @@ __device__ __forceinline__ T s_warpVote(bool predicate, T votingMask = T(-1)) no
return __ballot_sync(votingMask, predicate);
#endif

#ifdef __HIP_PLATFORM_HCC___
#ifdef __HIP__
return __ballot(predicate);
#endif
}
Expand All @@ -172,7 +172,7 @@ __device__ __forceinline__ int s_findFirstSig(T mask) noexcept {
return __ffs(mask);
#endif

#ifdef __HIP_PLATFORM_HCC___
#ifdef __HIP__
return __ffsll((unsigned long long)mask);
#endif
}
Expand All @@ -191,7 +191,7 @@ __device__ __forceinline__ int s_warpVoteAny(bool predicate, T votingMask = T(-1
return __any_sync(votingMask, predicate);
#endif

#ifdef __HIP_PLATFORM_HCC___
#ifdef __HIP__
return __any(predicate);
#endif
}
Expand All @@ -208,7 +208,7 @@ __device__ __forceinline__ uint32_t s_pop_count(T mask) noexcept {
#ifdef __NVCC__
return __popc(mask);
#endif
#ifdef __HIP_PLATFORM_HCC___
#ifdef __HIP__
if constexpr (sizeof(T) == 4) {
return __popc(mask);
} else if constexpr (sizeof(mask) == 8) {
Expand Down Expand Up @@ -236,7 +236,7 @@ __device__ __forceinline__ T s_shuffle(T variable, unsigned int source, U mask =
#ifdef __NVCC__
return __shfl_sync(mask, variable, source);
#endif
#ifdef __HIP_PLATFORM_HCC___
#ifdef __HIP__
return __shfl(variable, source);
#endif
}
Expand All @@ -257,7 +257,7 @@ __device__ __forceinline__ T s_shuffle_down(T variable, unsigned int delta, U ma
#ifdef __NVCC__
return __shfl_down_sync(mask, variable, delta);
#endif
#ifdef __HIP_PLATFORM_HCC___
#ifdef __HIP__
return __shfl_down(variable, delta);
#endif
}
Expand Down
3 changes: 2 additions & 1 deletion include/splitvector/split_allocators.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
* */
#pragma once
#include "archMacros.h"
#include "gpu_wrappers.h"
#include <cassert>
namespace split {

Expand All @@ -37,7 +38,7 @@ static void cuda_error(cudaError_t err, const char* file, int line) {
}
}
#endif
#ifdef __HIP_PLATFORM_HCC___
#ifdef __HIP__
/* Define the HIP error checking macro */
#define SPLIT_CHECK_ERR(err) (split::hip_error(err, __FILE__, __LINE__))
static void hip_error(hipError_t err, const char* file, int line) {
Expand Down
2 changes: 1 addition & 1 deletion include/splitvector/split_tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#define SPLIT_VOTING_MASK 0xFFFFFFFF // 32-bit wide for split_gpu warps
#define WARPLENGTH 32
#endif
#ifdef __HIP_PLATFORM_HCC___
#ifdef __HIP__
#define SPLIT_VOTING_MASK 0xFFFFFFFFFFFFFFFFull // 64-bit wide for amd warps
#define WARPLENGTH 64

Expand Down

0 comments on commit b7f3f5a

Please sign in to comment.