Skip to content

Commit

Permalink
5.7 cherry pick - Benchmark perf. improvements for discrete distribut…
Browse files Browse the repository at this point in the history
…ions (#379)

* Remove workaround with hipGridDim_x * hipBlockDim_x

It makes no difference anymore on ROCm >= 5.5, probably after
switching to code object v5.

* Use restrict with tables of discrete distributions

This change improves performance in device API benchamrks
for most engines.

---------

Co-authored-by: Anton Gorenko <[email protected]>
  • Loading branch information
RobsonRLemos and ex-rzr authored Aug 21, 2023
1 parent 116073c commit f647dae
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 22 deletions.
11 changes: 1 addition & 10 deletions benchmark/benchmark_rocrand_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,7 @@ void generate_kernel(GeneratorState * states,
const Extra extra)
{
const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x;

// Using gridDim.x * blockDim.x should actually a performance improvement, however, this kernel
// just so happen to use an unfortunate amount of registers that the changes introduced in
// https://github.com/llvm/llvm-project/commit/ba0d079c7aa52bc0ae860d16dd4a33b0dc5cfff7,
// cause adverse code generation that degrades performance.
#ifdef USE_HIP_CPU
const unsigned int stride = gridDim.x * blockDim.x;
#else
const unsigned int stride = hipGridDim_x * hipBlockDim_x;
#endif
const unsigned int stride = gridDim.x * blockDim.x;

GeneratorState state = states[state_id];
unsigned int index = state_id;
Expand Down
41 changes: 29 additions & 12 deletions library/include/rocrand/rocrand_discrete.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -57,16 +57,25 @@
namespace rocrand_device {
namespace detail {

FQUALIFIERS unsigned int discrete_alias(const double x, const rocrand_discrete_distribution_st& dis)
FQUALIFIERS unsigned int discrete_alias(const double x,
const unsigned int size,
const unsigned int offset,
const unsigned int* __restrict__ alias,
const double* __restrict__ probability)
{
// Calculate value using Alias table

// x is [0, 1)
const double nx = dis.size * x;
const double fnx = floor(nx);
const double y = nx - fnx;
const double nx = size * x;
const double fnx = floor(nx);
const double y = nx - fnx;
const unsigned int i = static_cast<unsigned int>(fnx);
return dis.offset + (y < dis.probability[i] ? i : dis.alias[i]);
return offset + (y < probability[i] ? i : alias[i]);
}

FQUALIFIERS unsigned int discrete_alias(const double x, const rocrand_discrete_distribution_st& dis)
{
return discrete_alias(x, dis.size, dis.offset, dis.alias, dis.probability);
}

FQUALIFIERS
Expand Down Expand Up @@ -94,17 +103,20 @@ FQUALIFIERS unsigned int discrete_alias(const unsigned long long int
return discrete_alias(x, dis);
}

FQUALIFIERS unsigned int discrete_cdf(const double x, const rocrand_discrete_distribution_st& dis)
FQUALIFIERS unsigned int discrete_cdf(const double x,
const unsigned int size,
const unsigned int offset,
const double* __restrict__ cdf)
{
// Calculate value using binary search in CDF

unsigned int min = 0;
unsigned int max = dis.size - 1;
unsigned int max = size - 1;
do
{
const unsigned int center = (min + max) / 2;
const double p = dis.cdf[center];
if (x > p)
const double p = cdf[center];
if(x > p)
{
min = center + 1;
}
Expand All @@ -113,9 +125,14 @@ FQUALIFIERS unsigned int discrete_cdf(const double x, const rocrand_discrete_dis
max = center;
}
}
while (min != max);
while(min != max);

return offset + min;
}

return dis.offset + min;
FQUALIFIERS unsigned int discrete_cdf(const double x, const rocrand_discrete_distribution_st& dis)
{
return discrete_cdf(x, dis.size, dis.offset, dis.cdf);
}

FQUALIFIERS
Expand Down

0 comments on commit f647dae

Please sign in to comment.