Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dead lock when using keys of int64 converted from int32 #147

Open
rhdong opened this issue Jun 26, 2023 · 1 comment
Open

dead lock when using keys of int64 converted from int32 #147

rhdong opened this issue Jun 26, 2023 · 1 comment
Assignees
Labels
bug Something isn't working

Comments

@rhdong
Copy link
Member

rhdong commented Jun 26, 2023

#include "random"
#include "hierarchical_kv/merlin_hashtable.cuh"

using K = uint64_t;
using S = uint64_t;
using V = float;
using MerlinHashTable = nv::merlin::HashTable<K, V, S>;
using TableOption = nv::merlin::HashTableOptions;

using namespace nv::merlin;
using namespace std;

#define CUCO_CUDA_TRY(...)                                               \
  GET_CUCO_CUDA_TRY_MACRO(__VA_ARGS__, CUCO_CUDA_TRY_2, CUCO_CUDA_TRY_1) \
  (__VA_ARGS__)
#define GET_CUCO_CUDA_TRY_MACRO(_1, _2, NAME, ...) NAME
#define CUCO_CUDA_TRY_2(_call, _exception_type)                         \
  do {                                                                  \
    cudaError_t const error = (_call);                                  \
    if (cudaSuccess != error) {                                         \
      cudaGetLastError();                                               \
      throw _exception_type{std::string{"ERROR at "} + __FILE__ + ":" + \
                            CUCO_STRINGIFY(__LINE__) + ": " +           \
                            cudaGetErrorName(error) + " " +             \
                            cudaGetErrorString(error)};                 \
    }                                                                   \
  } while (0);
#define CUCO_CUDA_TRY_1(_call) CUCO_CUDA_TRY_2(_call, cuco::cuda_error)


__device__ void select() {

}


void test_merlin(bool block = true) {
  TableOption options;
  options.init_capacity = 128 * 1024 * 1024UL;
  options.max_capacity = 128 * 1024 * 1024UL;
  options.dim = 8;
  options.max_hbm_for_vectors = nv::merlin::GB(30);
  options.evict_strategy = EvictStrategy::kLru;

//  MerlinHashTable table;
  std::shared_ptr<MerlinHashTable> table = std::make_shared<MerlinHashTable>();
  table->init(options);

  size_t total_key_length = 1ul << 27;
  size_t total_key_range = 1ul << 27;
  size_t total_key_per_op = 1ul << 20;
  int dim_size = 8;

  K* h_keys;
  V* h_values;
  K* d_keys;
  V* d_values;
  K* h_cold_keys;
  K* d_cold_keys;

  K* d_input_keys;
  V* d_output_values;
  bool* d_founds;

  K* d_evicted_keys;
  V* d_evicted_values;
  S* d_evicted_scores;

  h_keys = static_cast<K*>(std::malloc(total_key_length * sizeof(K)));
  h_values = static_cast<V*>(std::malloc(total_key_length * dim_size * sizeof(V)));
  h_cold_keys = static_cast<K*>(std::malloc(total_key_length * sizeof(K)));
  CUCO_CUDA_TRY(cudaMalloc(&d_keys, total_key_length * sizeof(K)));
  CUCO_CUDA_TRY(cudaMalloc(&d_values, total_key_length * dim_size * sizeof(V)));
  CUCO_CUDA_TRY(cudaMalloc(&d_cold_keys, total_key_length * sizeof(K)));

  CUCO_CUDA_TRY(cudaMalloc(&d_input_keys, total_key_per_op * sizeof(K)));
  CUCO_CUDA_TRY(cudaMalloc(&d_output_values, total_key_per_op * dim_size * sizeof(V)));
  CUCO_CUDA_TRY(cudaMalloc(&d_founds, total_key_per_op * sizeof(bool)));
  CUCO_CUDA_TRY(cudaMemset(d_output_values, 0, total_key_per_op * dim_size * sizeof(V)));
  CUCO_CUDA_TRY(cudaMemset(d_founds, 0, total_key_per_op * sizeof(bool)));

  CUCO_CUDA_TRY(cudaMalloc(&d_evicted_keys, total_key_per_op * sizeof(K)));
  CUCO_CUDA_TRY(cudaMalloc(&d_evicted_values, total_key_per_op * dim_size * sizeof(V)));
  CUCO_CUDA_TRY(cudaMalloc(&d_evicted_scores, total_key_per_op * sizeof(S)));
  CUCO_CUDA_TRY(cudaMemset(d_evicted_keys, 0, total_key_per_op * sizeof(K)));
  CUCO_CUDA_TRY(cudaMemset(d_evicted_values, 0, total_key_per_op * dim_size * sizeof(V)));
  CUCO_CUDA_TRY(cudaMemset(d_evicted_scores, 0, total_key_per_op * sizeof(S)));

  std::random_device dev;
  std::mt19937 mt(dev());
  std::uniform_int_distribution<uint64_t> dist(0, 1ul << 50);
  for (int i = 0; i < total_key_length; i++) {
    h_keys[i] = dist(mt) % total_key_range;
    h_cold_keys[i] = dist(mt);
    for (int j = 0; j < dim_size; j++) {
      h_values[i * dim_size + j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);;
    }
  }

  cudaMemcpy(d_keys, h_keys, total_key_length * sizeof(K), cudaMemcpyHostToDevice);
  cudaMemcpy(d_cold_keys, h_cold_keys, total_key_length * sizeof(K), cudaMemcpyHostToDevice);
  cudaMemcpy(d_values, h_values, total_key_length * dim_size * sizeof(V), cudaMemcpyHostToDevice);

  cudaStream_t stream;
  cudaStreamCreate(&stream);
  cudaStreamSynchronize(stream);

  cout << "start" << endl;
  int count = 0;
  for (;;) {
    size_t start_index = dist(mt) % (total_key_length - total_key_per_op);
    cudaMemcpyAsync(d_input_keys, d_keys + start_index, total_key_per_op * sizeof(K),
                    cudaMemcpyDeviceToDevice, stream);
    cudaMemcpyAsync(d_input_keys, d_cold_keys + start_index, total_key_per_op / 10 * sizeof(K),
                    cudaMemcpyDeviceToDevice, stream);
    table->find(total_key_per_op, d_input_keys, d_output_values, d_founds, nullptr, stream);

    start_index = dist(mt) % (total_key_length - total_key_per_op);
    cudaMemcpyAsync(d_input_keys, d_keys + start_index, total_key_per_op * sizeof(K),
                    cudaMemcpyDeviceToDevice, stream);
    cudaMemcpyAsync(d_input_keys, d_cold_keys + start_index, total_key_per_op / 10 * sizeof(K),
                    cudaMemcpyDeviceToDevice, stream);
    table->insert_and_evict(total_key_per_op, d_input_keys, d_values + start_index * dim_size,
                            nullptr, d_evicted_keys, d_evicted_values, d_evicted_scores, stream);

    cudaStreamSynchronize(stream);
    count++;
    if (count % 100 == 0) {
    }
    cout << "find and insert_and_evict: " << count << endl;
  }

}

@rhdong rhdong self-assigned this Jun 26, 2023
@rhdong rhdong added the bug Something isn't working label Jun 26, 2023
@jiashuy
Copy link
Collaborator

jiashuy commented Jun 27, 2023

I think it may be related to the signed int conversion.
For example, when int32_t converged to int64_t, the sign bit is also extended which results in the 64 bits key is one of the reserved keys.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

2 participants