From f647daef597db7ca0322ea592054338cb47099cc Mon Sep 17 00:00:00 2001 From: Robson Lemos <31936621+RobsonRLemos@users.noreply.github.com> Date: Mon, 21 Aug 2023 13:18:46 -0600 Subject: [PATCH] 5.7 cherry pick - Benchmark perf. improvements for discrete distributions (#379) * Remove workaround with hipGridDim_x * hipBlockDim_x It makes no difference anymore on ROCm >= 5.5, probably after switching to code object v5. * Use restrict with tables of discrete distributions This change improves performance in device API benchamrks for most engines. --------- Co-authored-by: Anton Gorenko --- benchmark/benchmark_rocrand_kernel.cpp | 11 +----- library/include/rocrand/rocrand_discrete.h | 41 +++++++++++++++------- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/benchmark/benchmark_rocrand_kernel.cpp b/benchmark/benchmark_rocrand_kernel.cpp index 3875ca73..640cf098 100644 --- a/benchmark/benchmark_rocrand_kernel.cpp +++ b/benchmark/benchmark_rocrand_kernel.cpp @@ -94,16 +94,7 @@ void generate_kernel(GeneratorState * states, const Extra extra) { const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x; - - // Using gridDim.x * blockDim.x should actually a performance improvement, however, this kernel - // just so happen to use an unfortunate amount of registers that the changes introduced in - // https://github.com/llvm/llvm-project/commit/ba0d079c7aa52bc0ae860d16dd4a33b0dc5cfff7, - // cause adverse code generation that degrades performance. -#ifdef USE_HIP_CPU - const unsigned int stride = gridDim.x * blockDim.x; -#else - const unsigned int stride = hipGridDim_x * hipBlockDim_x; -#endif + const unsigned int stride = gridDim.x * blockDim.x; GeneratorState state = states[state_id]; unsigned int index = state_id; diff --git a/library/include/rocrand/rocrand_discrete.h b/library/include/rocrand/rocrand_discrete.h index df632437..1472c013 100644 --- a/library/include/rocrand/rocrand_discrete.h +++ b/library/include/rocrand/rocrand_discrete.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -57,16 +57,25 @@ namespace rocrand_device { namespace detail { -FQUALIFIERS unsigned int discrete_alias(const double x, const rocrand_discrete_distribution_st& dis) +FQUALIFIERS unsigned int discrete_alias(const double x, + const unsigned int size, + const unsigned int offset, + const unsigned int* __restrict__ alias, + const double* __restrict__ probability) { // Calculate value using Alias table // x is [0, 1) - const double nx = dis.size * x; - const double fnx = floor(nx); - const double y = nx - fnx; + const double nx = size * x; + const double fnx = floor(nx); + const double y = nx - fnx; const unsigned int i = static_cast(fnx); - return dis.offset + (y < dis.probability[i] ? i : dis.alias[i]); + return offset + (y < probability[i] ? i : alias[i]); +} + +FQUALIFIERS unsigned int discrete_alias(const double x, const rocrand_discrete_distribution_st& dis) +{ + return discrete_alias(x, dis.size, dis.offset, dis.alias, dis.probability); } FQUALIFIERS @@ -94,17 +103,20 @@ FQUALIFIERS unsigned int discrete_alias(const unsigned long long int return discrete_alias(x, dis); } -FQUALIFIERS unsigned int discrete_cdf(const double x, const rocrand_discrete_distribution_st& dis) +FQUALIFIERS unsigned int discrete_cdf(const double x, + const unsigned int size, + const unsigned int offset, + const double* __restrict__ cdf) { // Calculate value using binary search in CDF unsigned int min = 0; - unsigned int max = dis.size - 1; + unsigned int max = size - 1; do { const unsigned int center = (min + max) / 2; - const double p = dis.cdf[center]; - if (x > p) + const double p = cdf[center]; + if(x > p) { min = center + 1; } @@ -113,9 +125,14 @@ FQUALIFIERS unsigned int discrete_cdf(const double x, const rocrand_discrete_dis max = center; } } - while (min != max); + while(min != max); + + return offset + min; +} - return dis.offset + min; +FQUALIFIERS unsigned int discrete_cdf(const double x, const rocrand_discrete_distribution_st& dis) +{ + return discrete_cdf(x, dis.size, dis.offset, dis.cdf); } FQUALIFIERS