Skip to content

Commit

Permalink
boss only wakes 1 worker (defers thief wake); sleeping workers chain …
Browse files Browse the repository at this point in the history
…wakes
  • Loading branch information
Kyle Singer committed Nov 25, 2024
1 parent 8b950f2 commit b3a5709
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 25 deletions.
2 changes: 1 addition & 1 deletion runtime/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ void __cilkrts_internal_invoke_cilkified_root(__cilkrts_stack_frame *sf) {
// occur, rather than all at once. Initial testing of this approach did not
// seem to perform well, however. One possible reason why could be because
// of the extra kernel interactions involved in waking workers gradually.
wake_thieves(g);
async_wake_thieves(g);
/* request_more_thieves(g, g->nworkers); */

// Start the workers if necessary
Expand Down
16 changes: 13 additions & 3 deletions runtime/scheduler.c
Original file line number Diff line number Diff line change
Expand Up @@ -1605,7 +1605,10 @@ void worker_scheduler(__cilkrts_worker *w) {
atomic_load_explicit(&rts->done, memory_order_relaxed)) {
busy_pause();
}
if (thief_should_wait(rts)) {
const uint32_t local_wake = take_current_wake_value(rts);
/*if (local_wake == (nworkers - 1u)) {
deferred_wake_thieves(rts);
} else */if (thief_should_wait(local_wake)) {
break;
}
}
Expand Down Expand Up @@ -1652,11 +1655,18 @@ void *scheduler_thread_proc(void *arg) {
// Wait for g->start == 1 to start executing the work-stealing loop. We
// use a condition variable to wait on g->start, because this approach
// seems to result in better performance.
if (thief_should_wait(rts)) {
uint32_t local_wake = take_current_wake_value(rts);
if (thief_should_wait(local_wake)) {
disengage_worker(rts, nworkers, self);
l->wake_val = thief_wait(rts);
local_wake = thief_wait(rts);
l->wake_val = local_wake;
reengage_worker(rts, nworkers, self);
deferred_wake_thieves(rts);
}

//if (local_wake == (rts->nworkers - 1u)) {
// deferred_wake_thieves(rts);
//}
CILK_STOP_TIMING(w, INTERVAL_SLEEP_UNCILK);

// Check if we should exit this scheduling function.
Expand Down
56 changes: 35 additions & 21 deletions runtime/worker_coord.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#include "global.h"

#define USER_USE_FUTEX 1
#define USER_USE_FUTEX 0
#ifdef __linux__
#define USE_FUTEX USER_USE_FUTEX
#else
Expand Down Expand Up @@ -299,54 +299,68 @@ static inline uint32_t thief_wait(global_state *g) {
return thief_disengage(g);
}

// Called by a thief thread. Check if the thief should start waiting for the
// start of a cilkified region. If a new cilkified region has been started
// already, update the global state to indicate that this worker is engaged in
// work stealing.
static inline bool thief_should_wait(global_state *g) {
static inline uint32_t take_current_wake_value(global_state *const g) {
_Atomic uint32_t *futexp = &g->disengaged_thieves_futex;
uint32_t val = atomic_load_explicit(futexp, memory_order_relaxed);
#if USE_FUTEX
while (val > 0) {
if (atomic_compare_exchange_weak_explicit(futexp, &val, val - 1,
memory_order_release,
memory_order_relaxed))
return false;
break;
busy_loop_pause();
val = atomic_load_explicit(futexp, memory_order_relaxed);
}
return true;
#else
if (val == 0)
return true;

pthread_mutex_t *lock = &g->disengaged_lock;
pthread_mutex_lock(lock);
val = atomic_load_explicit(futexp, memory_order_relaxed);
if (val > 0) {
atomic_store_explicit(futexp, val - 1, memory_order_release);
if (val != 0) {
pthread_mutex_t *lock = &g->disengaged_lock;
pthread_mutex_lock(lock);
val = atomic_load_explicit(futexp, memory_order_relaxed);
if (val > 0) {
atomic_store_explicit(futexp, val - 1, memory_order_release);
}
pthread_mutex_unlock(lock);
return false;
}
pthread_mutex_unlock(lock);
return true;
#endif

return val;
}

// Called by a thief thread. Check if the thief should start waiting for the
// start of a cilkified region. If a new cilkified region has been started
// already, update the global state to indicate that this worker is engaged in
// work stealing.
static inline bool thief_should_wait(const uint32_t wake_value) {
return wake_value == 0u;
}

// Signal the thief threads to start work-stealing (or terminate, if
// g->terminate == 1).
static inline void wake_thieves(global_state *g) {
static inline void async_wake_thieves(global_state *const g) {
#if USE_FUTEX
atomic_store_explicit(&g->disengaged_thieves_futex, g->nworkers - 1,
memory_order_release);
long s = futex(&g->disengaged_thieves_futex, FUTEX_WAKE_PRIVATE, INT_MAX,
long s = futex(&g->disengaged_thieves_futex, FUTEX_WAKE_PRIVATE, 1,
NULL, NULL, 0);
if (s == -1)
errExit("futex-FUTEX_WAKE");
#else
pthread_mutex_lock(&g->disengaged_lock);
atomic_store_explicit(&g->disengaged_thieves_futex, g->nworkers - 1,
memory_order_release);
pthread_cond_signal(&g->disengaged_cond_var);
pthread_mutex_unlock(&g->disengaged_lock);
#endif
}

static inline void deferred_wake_thieves(global_state *const g) {
#if USE_FUTEX
long s = futex(&g->disengaged_thieves_futex, FUTEX_WAKE_PRIVATE, 1,
NULL, NULL, 0);
if (s == -1)
errExit("futex-FUTEX_WAKE");
#else
pthread_mutex_lock(&g->disengaged_lock);
pthread_cond_broadcast(&g->disengaged_cond_var);
pthread_mutex_unlock(&g->disengaged_lock);
#endif
Expand Down

0 comments on commit b3a5709

Please sign in to comment.