Skip to content

Commit

Permalink
Merge pull request #235 from insertinterestingnamehere/syncvar
Browse files Browse the repository at this point in the history
Explicit Atomics in Syncvar Implementation
  • Loading branch information
insertinterestingnamehere authored Mar 26, 2024
2 parents 0e2fcf0 + 999643b commit 1f86571
Showing 1 changed file with 30 additions and 41 deletions.
71 changes: 30 additions & 41 deletions src/syncvar.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,13 @@ extern unsigned int QTHREAD_LOCKING_STRIPES;
#define BUILD_UNLOCKED_SYNCVAR(data, state) (((data) << 4) | ((state) << 1))
#define QTHREAD_CHOOSE_STRIPE(addr) (((size_t)addr >> 4) & (QTHREAD_LOCKING_STRIPES - 1))

#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64)
#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64 || QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM || QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)
# define UNLOCK_THIS_UNMODIFIED_SYNCVAR(addr, unlocked) do { \
(addr)->u.s.lock = 0; \
atomic_store_explicit((_Atomic uint64_t*)&(addr)->u.w, (unlocked), memory_order_relaxed);\
} while (0)
# define UNLOCK_THIS_MODIFIED_SYNCVAR(addr, val, state) do { \
MACHINE_FENCE; \
(addr)->u.w = BUILD_UNLOCKED_SYNCVAR(val, state); \
atomic_store_explicit((_Atomic uint64_t*)&(addr)->u.w, BUILD_UNLOCKED_SYNCVAR(val, state), memory_order_relaxed); \
} while (0)
#elif ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \
Expand All @@ -99,13 +99,13 @@ extern unsigned int QTHREAD_LOCKING_STRIPES;
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO))
# define UNLOCK_THIS_UNMODIFIED_SYNCVAR(addr, unlocked) do { \
(addr)->u.w = (unlocked); \
atomic_store_explicit((_Atomic uint64_t*)&(addr)->u.w, (unlocked), memory_order_relaxed); \
} while (0)
# define UNLOCK_THIS_MODIFIED_SYNCVAR(addr, val, state) do { \
MACHINE_FENCE; \
(addr)->u.w = BUILD_UNLOCKED_SYNCVAR(val, state); \
atomic_store_explicit((_Atomic uint64_t*)&(addr)->u.w, BUILD_UNLOCKED_SYNCVAR(val, state), memory_order_relaxed); \
} while (0)
#else /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) */
#else /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64 || QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM || QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64) */
# define UNLOCK_THIS_UNMODIFIED_SYNCVAR(addr, unlocked) do { \
/* this has its own pthread mutex, so does not need memory synch */ \
qthread_cas64(&((addr)->u.w), (addr)->u.w, (unlocked)); \
Expand All @@ -114,7 +114,7 @@ extern unsigned int QTHREAD_LOCKING_STRIPES;
/* this has its own pthread mutex, so does not need memory synch */ \
qthread_cas64(&((addr)->u.w), (addr)->u.w, BUILD_UNLOCKED_SYNCVAR(val, state)); \
} while (0)
#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) */
#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64 || QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM || QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64) */

static uint64_t qthread_mwaitc(syncvar_t *const restrict addr,
unsigned char const statemask,
Expand Down Expand Up @@ -172,7 +172,7 @@ static uint64_t qthread_mwaitc(syncvar_t *const restrict addr,
{
syncvar_t tmp;
loop_start:
tmp = *addr;
tmp.u.w = atomic_load_explicit((_Atomic uint64_t*)addr, memory_order_relaxed);
do {
unlocked = tmp; // may be locked or unlocked, we don't know
if (unlocked.u.s.lock == 1) {
Expand Down Expand Up @@ -246,38 +246,22 @@ int qthread_syncvar_status(syncvar_t *const v)
eflags_t e = { 0, 0, 0, 0, 0 };
unsigned int realret;

#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO)
uint64_t ret = qthread_mwaitc(v, 0xff, INT_MAX, &e);
qassert_ret(e.cf == 0, QTHREAD_TIMEOUT); /* there better not have been a timeout */
realret = (e.of << 2) | (e.pf << 1) | e.sf;
MACHINE_FENCE;
v->u.w = BUILD_UNLOCKED_SYNCVAR(ret, realret);
return (realret & 0x2) ? 0 : 1;

#else
# if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64))
{
/* I'm being optimistic here; this only works if a basic 64-bit load is
* atomic (on most platforms it is). Thus, if I've done an atomic read
* and the syncvar is unlocked, then I figure I can trust
* that state and do not need to do a locked atomic operation of any
* kind (e.g. cas) */
syncvar_t local_copy_of_v = *v;
if (local_copy_of_v.u.s.lock == 0) {
/* short-circuit */
return (local_copy_of_v.u.s.state & 0x2) ? 0 : 1;
}
/* If I've done an atomic read
* and the syncvar is unlocked, then I figure I can trust
* that state and do not need to do a locked atomic operation of any
* kind (e.g. cas) */
syncvar_t local_copy_of_v;
local_copy_of_v.u.w = atomic_load_explicit((_Atomic uint64_t*)v, memory_order_relaxed);
if (local_copy_of_v.u.s.lock == 0) {
/* short-circuit */
return (local_copy_of_v.u.s.state & 0x2) ? 0 : 1;
}
# endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)) */
(void)qthread_mwaitc(v, 0xff, INT_MAX, &e);
qassert_ret(e.cf == 0, QTHREAD_TIMEOUT); /* there better not have been a timeout */
realret = v->u.s.state;
UNLOCK_THIS_UNMODIFIED_SYNCVAR(v, BUILD_UNLOCKED_SYNCVAR(v->u.s.data, v->u.s.state));
local_copy_of_v.u.w = atomic_load_explicit((_Atomic uint64_t*)v, memory_order_relaxed);
realret = local_copy_of_v.u.s.state;
UNLOCK_THIS_UNMODIFIED_SYNCVAR(v, BUILD_UNLOCKED_SYNCVAR(local_copy_of_v.u.s.data, local_copy_of_v.u.s.state));
return (realret & 0x2) ? 0 : 1;
#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) */
} /*}}} */

static aligned_t qthread_syncvar_nonblocker_thread(void *arg)
Expand Down Expand Up @@ -379,14 +363,17 @@ int API_FUNC qthread_syncvar_readFF(uint64_t *restrict dest,
#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64))
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)
{
/* I'm being optimistic here; this only works if a basic 64-bit load is
* atomic (on most platforms it is). Thus, if I've done an atomic read
* and the syncvar is both unlocked and full, then I figure I can trust
* that state and do not need to do a locked atomic operation of any
* kind (e.g. cas) */
syncvar_t local_copy_of_src = *src;
syncvar_t local_copy_of_src;
local_copy_of_src.u.w = atomic_load_explicit((_Atomic uint64_t*)src, memory_order_relaxed);
if ((local_copy_of_src.u.s.lock == 0) && ((local_copy_of_src.u.s.state & 2) == 0)) { /* full and unlocked */
/* short-circuit */
if (dest) {
Expand All @@ -395,7 +382,7 @@ int API_FUNC qthread_syncvar_readFF(uint64_t *restrict dest,
return QTHREAD_SUCCESS;
}
}
#endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)) */
#endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)) */
ret = qthread_mwaitc(src, SYNCFEB_FULL, INITIAL_TIMEOUT, &e);
qthread_debug(SYNCVAR_DETAILS, "2 src(%p) = %x, ret = %x\n", src,
(uintptr_t)src->u.w, ret);
Expand Down Expand Up @@ -503,7 +490,9 @@ int API_FUNC qthread_syncvar_readFF_nb(uint64_t *restrict dest,
#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64))
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || \
(QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64))
{
/* I'm being optimistic here; this only works if a basic 64-bit load is
* atomic (on most platforms it is). Thus, if I've done an atomic read
Expand All @@ -519,7 +508,7 @@ int API_FUNC qthread_syncvar_readFF_nb(uint64_t *restrict dest,
return QTHREAD_SUCCESS;
}
}
#endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)) */
#endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)) */
ret = qthread_mwaitc(src, SYNCFEB_FULL, 1, &e);
qthread_debug(SYNCVAR_DETAILS, "2 src(%p) = %x, ret = %x\n", src,
(uintptr_t)src->u.w, ret);
Expand Down

0 comments on commit 1f86571

Please sign in to comment.