Skip to content

Commit

Permalink
Apply suggestions from code review
Browse files Browse the repository at this point in the history
Co-authored-by: Alexander Sinn <[email protected]>
  • Loading branch information
WeiqunZhang and AlexanderSinn authored Oct 16, 2024
1 parent a2c2201 commit bca9c1b
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 9 deletions.
8 changes: 5 additions & 3 deletions Src/Base/AMReX_GpuLaunch.H
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ namespace Gpu {

struct ExecConfig
{
Long ntotalthreads;
Long start_idx;
int nblocks;
};

Expand All @@ -195,6 +195,7 @@ namespace Gpu {
// loops inside GPU kernels.
auto nlaunches = int((N+nmax-1)/nmax);
Vector<ExecConfig> r(nlaunches);
Long ndone = 0;
for (int i = 0; i < nlaunches; ++i) {
int nblocks;
if (N > nmax) {
Expand All @@ -203,8 +204,9 @@ namespace Gpu {
} else {
nblocks = int((N+MT-1)/MT);
}
// Total # of threads in this launch
r[i].ntotalthreads = Long(nblocks) * MT;
// At which element ID the kernel should start
r[i].start_idx = ndone;
ndone += Long(nblocks) * MT;
// # of blocks in this launch
r[i].nblocks = nblocks;
}
Expand Down
9 changes: 3 additions & 6 deletions Src/Base/AMReX_GpuLaunchFunctsG.H
Original file line number Diff line number Diff line change
Expand Up @@ -800,22 +800,19 @@ ParallelFor (Gpu::KernelInfo const&, T n, L const& f) noexcept
static_assert(sizeof(T) >= 2);
if (amrex::isEmpty(n)) { return; }
const auto& nec = Gpu::makeNExecutionConfigs<MT>(n);
T ndone = 0;
for (auto const& ec : nec) {
T nleft = n - ndone;
const T start_idx = T(ec.start_idx);
const T nleft = n - start_idx;
AMREX_LAUNCH_KERNEL(MT, ec.nblocks, MT, 0, Gpu::gpuStream(),
[=] AMREX_GPU_DEVICE () noexcept {
// This will not overflow, even though nblocks*MT might.
auto tid = T(MT)*T(blockIdx.x)+T(threadIdx.x);
if (tid < nleft) {
detail::call_f_scalar_handler(f, tid+ndone,
detail::call_f_scalar_handler(f, tid+start_idx,
Gpu::Handler(amrex::min((std::uint64_t(nleft-tid)+(std::uint64_t)threadIdx.x),
(std::uint64_t)blockDim.x)));
}
});
if (Long(nleft) > ec.ntotalthreads) {
ndone += ec.ntotalthreads;
}
}
AMREX_GPU_ERROR_CHECK();
}
Expand Down

0 comments on commit bca9c1b

Please sign in to comment.