From 9220578840024b296843dc595432471086037a25 Mon Sep 17 00:00:00 2001 From: Qianqian Fang Date: Mon, 30 Oct 2023 22:55:31 -0400 Subject: [PATCH] [bug] apply #41 like 2xfloat-buffer for dref accumulation, fix #195 --- src/mcx_core.cu | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/mcx_core.cu b/src/mcx_core.cu index ea17a919..4c7cb50b 100644 --- a/src/mcx_core.cu +++ b/src/mcx_core.cu @@ -1044,7 +1044,17 @@ __device__ inline int launchnewphoton(MCXpos* p, MCXdir* v, Stokes* s, MCXtime* if (gcfg->srctype != MCX_SRC_PATTERN && gcfg->srctype != MCX_SRC_PATTERN3D) { #ifdef USE_ATOMIC +#ifdef USE_DOUBLE atomicAdd(& field[*idx1d + tshift * gcfg->dimlen.z], -p->w); +#else + float oldval = atomicAdd(& field[*idx1d + tshift * gcfg->dimlen.z], -p->w); + + if (fabsf(oldval) > MAX_ACCUM) { + atomicadd(& field[*idx1d + tshift * gcfg->dimlen.z], ((oldval > 0.f) ? -MAX_ACCUM : MAX_ACCUM)); + atomicadd(& field[*idx1d + tshift * gcfg->dimlen.z + gcfg->dimlen.w], ((oldval > 0.f) ? MAX_ACCUM : -MAX_ACCUM)); + } + +#endif #else field[*idx1d + tshift * gcfg->dimlen.z] += -p->w; #endif @@ -1052,7 +1062,17 @@ __device__ inline int launchnewphoton(MCXpos* p, MCXdir* v, Stokes* s, MCXtime* for (int i = 0; i < gcfg->srcnum; i++) { if (fabsf(ppath[gcfg->w0offset + i]) > 0.f) { #ifdef USE_ATOMIC +#ifdef USE_DOUBLE atomicAdd(& field[(*idx1d + tshift * gcfg->dimlen.z)*gcfg->srcnum + i], -((gcfg->srcnum == 1) ? p->w : p->w * ppath[gcfg->w0offset + i])); +#else + float oldval = atomicAdd(& field[(*idx1d + tshift * gcfg->dimlen.z) * gcfg->srcnum + i], -((gcfg->srcnum == 1) ? p->w : p->w * ppath[gcfg->w0offset + i])); + + if (fabsf(oldval) > MAX_ACCUM) { + atomicadd(& field[(*idx1d + tshift * gcfg->dimlen.z)*gcfg->srcnum + i], ((oldval > 0.f) ? -MAX_ACCUM : MAX_ACCUM)); + atomicadd(& field[(*idx1d + tshift * gcfg->dimlen.z)*gcfg->srcnum + i + gcfg->dimlen.w], ((oldval > 0.f) ? MAX_ACCUM : -MAX_ACCUM)); + } + +#endif #else field[(*idx1d + tshift * gcfg->dimlen.z)*gcfg->srcnum + i] += -((gcfg->srcnum == 1) ? p->w : p->w * ppath[gcfg->w0offset + i]); #endif