From e0bd8118d04b55b7adf3d9ba256ad4bb53e66512 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 13 Feb 2024 14:15:16 -0500 Subject: [PATCH] Linux: Cleanup taskq threads spawn/exit This changes taskq_thread_should_stop() to limit maximum exit rate for idle threads to one per 5 seconds. I believe the previous one was broken, not allowing any thread exits for tasks arriving more than one at a time and so completing while others are running. Also while there: - Remove taskq_thread_spawn() calls on task allocation errors. - Remove extra taskq_thread_should_stop() call. Reviewed-by: Brian Behlendorf Reviewed-by: Rich Ercolani Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15873 --- include/os/linux/spl/sys/taskq.h | 2 +- man/man4/spl.4 | 18 ++----- module/os/linux/spl/spl-taskq.c | 85 +++++++++++--------------------- 3 files changed, 34 insertions(+), 71 deletions(-) diff --git a/include/os/linux/spl/sys/taskq.h b/include/os/linux/spl/sys/taskq.h index aa5860c56e83..b73dab631e04 100644 --- a/include/os/linux/spl/sys/taskq.h +++ b/include/os/linux/spl/sys/taskq.h @@ -104,7 +104,7 @@ typedef struct taskq { /* list node for the cpu hotplug callback */ struct hlist_node tq_hp_cb_node; boolean_t tq_hp_support; - unsigned long lastshouldstop; /* when to purge dynamic */ + unsigned long lastspawnstop; /* when to purge dynamic */ } taskq_t; typedef struct taskq_ent { diff --git a/man/man4/spl.4 b/man/man4/spl.4 index 414a92394858..5cc12764e18c 100644 --- a/man/man4/spl.4 +++ b/man/man4/spl.4 @@ -186,18 +186,8 @@ reading it could cause a lock-up if the list grow too large without limiting the output. "(truncated)" will be shown if the list is larger than the limit. . -.It Sy spl_taskq_thread_timeout_ms Ns = Ns Sy 10000 Pq uint -(Linux-only) -How long a taskq has to have had no work before we tear it down. -Previously, we would tear down a dynamic taskq worker as soon -as we noticed it had no work, but it was observed that this led -to a lot of churn in tearing down things we then immediately -spawned anew. -In practice, it seems any nonzero value will remove the vast -majority of this churn, while the nontrivially larger value -was chosen to help filter out the little remaining churn on -a mostly idle system. -Setting this value to -.Sy 0 -will revert to the previous behavior. +.It Sy spl_taskq_thread_timeout_ms Ns = Ns Sy 5000 Pq uint +Minimum idle threads exit interval for dynamic taskqs. +Smaller values allow idle threads exit more often and potentially be +respawned again on demand, causing more churn. .El diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c index 79a1a8e5a5aa..c384b7b378c3 100644 --- a/module/os/linux/spl/spl-taskq.c +++ b/module/os/linux/spl/spl-taskq.c @@ -36,12 +36,12 @@ static int spl_taskq_thread_bind = 0; module_param(spl_taskq_thread_bind, int, 0644); MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default"); -static uint_t spl_taskq_thread_timeout_ms = 10000; +static uint_t spl_taskq_thread_timeout_ms = 5000; /* BEGIN CSTYLED */ module_param(spl_taskq_thread_timeout_ms, uint, 0644); /* END CSTYLED */ MODULE_PARM_DESC(spl_taskq_thread_timeout_ms, - "Time to require a dynamic thread be idle before it gets cleaned up"); + "Minimum idle threads exit interval for dynamic taskqs"); static int spl_taskq_thread_dynamic = 1; module_param(spl_taskq_thread_dynamic, int, 0444); @@ -594,8 +594,7 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags) ASSERT(tq->tq_nactive <= tq->tq_nthreads); if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) { /* Dynamic taskq may be able to spawn another thread */ - if (!(tq->tq_flags & TASKQ_DYNAMIC) || - taskq_thread_spawn(tq) == 0) + if (taskq_thread_spawn(tq) == 0) goto out; } @@ -629,11 +628,11 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags) spin_unlock(&t->tqent_lock); wake_up(&tq->tq_work_waitq); -out: + /* Spawn additional taskq threads if required. */ if (!(flags & TQ_NOQUEUE) && tq->tq_nactive == tq->tq_nthreads) (void) taskq_thread_spawn(tq); - +out: spin_unlock_irqrestore(&tq->tq_lock, irqflags); return (rc); } @@ -676,10 +675,11 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg, ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC)); spin_unlock(&t->tqent_lock); -out: + /* Spawn additional taskq threads if required. */ if (tq->tq_nactive == tq->tq_nthreads) (void) taskq_thread_spawn(tq); +out: spin_unlock_irqrestore(&tq->tq_lock, irqflags); return (rc); } @@ -704,9 +704,8 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) { /* Dynamic taskq may be able to spawn another thread */ - if (!(tq->tq_flags & TASKQ_DYNAMIC) || - taskq_thread_spawn(tq) == 0) - goto out2; + if (taskq_thread_spawn(tq) == 0) + goto out; flags |= TQ_FRONT; } @@ -742,11 +741,11 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, spin_unlock(&t->tqent_lock); wake_up(&tq->tq_work_waitq); -out: + /* Spawn additional taskq threads if required. */ if (tq->tq_nactive == tq->tq_nthreads) (void) taskq_thread_spawn(tq); -out2: +out: spin_unlock_irqrestore(&tq->tq_lock, irqflags); } EXPORT_SYMBOL(taskq_dispatch_ent); @@ -825,6 +824,7 @@ taskq_thread_spawn(taskq_t *tq) if (!(tq->tq_flags & TASKQ_DYNAMIC)) return (0); + tq->lastspawnstop = jiffies; if ((tq->tq_nthreads + tq->tq_nspawn < tq->tq_maxthreads) && (tq->tq_flags & TASKQ_ACTIVE)) { spawning = (++tq->tq_nspawn); @@ -836,9 +836,9 @@ taskq_thread_spawn(taskq_t *tq) } /* - * Threads in a dynamic taskq should only exit once it has been completely - * drained and no other threads are actively servicing tasks. This prevents - * threads from being created and destroyed more than is required. + * Threads in a dynamic taskq may exit once there is no more work to do. + * To prevent threads from being created and destroyed too often limit + * the exit rate to one per spl_taskq_thread_timeout_ms. * * The first thread is the thread list is treated as the primary thread. * There is nothing special about the primary thread but in order to avoid @@ -847,44 +847,22 @@ taskq_thread_spawn(taskq_t *tq) static int taskq_thread_should_stop(taskq_t *tq, taskq_thread_t *tqt) { - if (!(tq->tq_flags & TASKQ_DYNAMIC)) + ASSERT(!taskq_next_ent(tq)); + if (!(tq->tq_flags & TASKQ_DYNAMIC) || !spl_taskq_thread_dynamic) return (0); - + if (!(tq->tq_flags & TASKQ_ACTIVE)) + return (1); if (list_first_entry(&(tq->tq_thread_list), taskq_thread_t, tqt_thread_list) == tqt) return (0); - - int no_work = - ((tq->tq_nspawn == 0) && /* No threads are being spawned */ - (tq->tq_nactive == 0) && /* No threads are handling tasks */ - (tq->tq_nthreads > 1) && /* More than 1 thread is running */ - (!taskq_next_ent(tq)) && /* There are no pending tasks */ - (spl_taskq_thread_dynamic)); /* Dynamic taskqs are allowed */ - - /* - * If we would have said stop before, let's instead wait a bit, maybe - * we'll see more work come our way soon... - */ - if (no_work) { - /* if it's 0, we want the old behavior. */ - /* if the taskq is being torn down, we also want to go away. */ - if (spl_taskq_thread_timeout_ms == 0 || - !(tq->tq_flags & TASKQ_ACTIVE)) - return (1); - unsigned long lasttime = tq->lastshouldstop; - if (lasttime > 0) { - if (time_after(jiffies, lasttime + - msecs_to_jiffies(spl_taskq_thread_timeout_ms))) - return (1); - else - return (0); - } else { - tq->lastshouldstop = jiffies; - } - } else { - tq->lastshouldstop = 0; - } - return (0); + ASSERT3U(tq->tq_nthreads, >, 1); + if (tq->tq_nspawn != 0) + return (0); + if (time_before(jiffies, tq->lastspawnstop + + msecs_to_jiffies(spl_taskq_thread_timeout_ms))) + return (0); + tq->lastspawnstop = jiffies; + return (1); } static int @@ -935,10 +913,8 @@ taskq_thread(void *args) if (list_empty(&tq->tq_pend_list) && list_empty(&tq->tq_prio_list)) { - if (taskq_thread_should_stop(tq, tqt)) { - wake_up_all(&tq->tq_wait_waitq); + if (taskq_thread_should_stop(tq, tqt)) break; - } add_wait_queue_exclusive(&tq->tq_work_waitq, &wait); spin_unlock_irqrestore(&tq->tq_lock, flags); @@ -1013,9 +989,6 @@ taskq_thread(void *args) tqt->tqt_id = TASKQID_INVALID; tqt->tqt_flags = 0; wake_up_all(&tq->tq_wait_waitq); - } else { - if (taskq_thread_should_stop(tq, tqt)) - break; } set_current_state(TASK_INTERRUPTIBLE); @@ -1122,7 +1095,7 @@ taskq_create(const char *name, int threads_arg, pri_t pri, tq->tq_flags = (flags | TASKQ_ACTIVE); tq->tq_next_id = TASKQID_INITIAL; tq->tq_lowest_id = TASKQID_INITIAL; - tq->lastshouldstop = 0; + tq->lastspawnstop = jiffies; INIT_LIST_HEAD(&tq->tq_free_list); INIT_LIST_HEAD(&tq->tq_pend_list); INIT_LIST_HEAD(&tq->tq_prio_list);