From 7610f523b4f4dde65e30d246d2a37cc7709d755b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 19 Mar 2024 14:39:14 +0000 Subject: [PATCH] core/uwsgi: stop using pthread_cancel() When working to reproduce #2615 I saw many strange "defunct" (zombie) workers. The master called waitpid(-1, ...) but it return 0 even there are some zombies. Finally, master sends KILL signal (MERCY) and worker is restarted. I believe this strange zombie was born from pthread_cancel. Subthreads calls pthread_cancel() for main thread and it cause strange process. pthread_cancel() is very hard to use and debug. I can not even attach the strange zombie with gdb --pid. I think it is not maintainable. In the end we can remove six_feet_under_lock and make wait_for_threads() static. --- core/loop.c | 3 --- core/utils.c | 21 --------------------- core/uwsgi.c | 46 ++++++---------------------------------------- uwsgi.h | 1 - 4 files changed, 6 insertions(+), 65 deletions(-) diff --git a/core/loop.c b/core/loop.c index cd3cf6126..8c64279bc 100644 --- a/core/loop.c +++ b/core/loop.c @@ -81,9 +81,6 @@ void uwsgi_setup_thread_req(long core_id, struct wsgi_request *wsgi_req) { int i; sigset_t smask; - - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &i); - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &i); pthread_setspecific(uwsgi.tur_key, (void *) wsgi_req); if (core_id > 0) { diff --git a/core/utils.c b/core/utils.c index 44c7a52f3..c944f6478 100644 --- a/core/utils.c +++ b/core/utils.c @@ -1034,12 +1034,6 @@ void uwsgi_destroy_request(struct wsgi_request *wsgi_req) { close_and_free_request(wsgi_req); - int foo; - if (uwsgi.threads > 1) { - // now the thread can die... - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &foo); - } - // reset for avoiding following requests to fail on non-uwsgi protocols // thanks Marko Tiikkaja for catching it wsgi_req->uh->_pktsize = 0; @@ -1131,11 +1125,6 @@ void uwsgi_close_request(struct wsgi_request *wsgi_req) { func(wsgi_req); } - if (uwsgi.threads > 1) { - // now the thread can die... - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &tmp_id); - } - // leave harakiri mode if (uwsgi.workers[uwsgi.mywid].cores[wsgi_req->async_id].harakiri > 0) { set_harakiri(wsgi_req, 0); @@ -1583,18 +1572,12 @@ int wsgi_req_accept(int queue, struct wsgi_request *wsgi_req) { } } - // kill the thread after the request completion - if (uwsgi.threads > 1) - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &ret); - if (uwsgi.signal_socket > -1 && (interesting_fd == uwsgi.signal_socket || interesting_fd == uwsgi.my_signal_socket)) { thunder_unlock; uwsgi_receive_signal(wsgi_req, interesting_fd, "worker", uwsgi.mywid); - if (uwsgi.threads > 1) - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &ret); return -1; } @@ -1605,8 +1588,6 @@ int wsgi_req_accept(int queue, struct wsgi_request *wsgi_req) { wsgi_req->fd = wsgi_req->socket->proto_accept(wsgi_req, interesting_fd); thunder_unlock; if (wsgi_req->fd < 0) { - if (uwsgi.threads > 1) - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &ret); return -1; } @@ -1621,8 +1602,6 @@ int wsgi_req_accept(int queue, struct wsgi_request *wsgi_req) { } thunder_unlock; - if (uwsgi.threads > 1) - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &ret); return -1; } diff --git a/core/uwsgi.c b/core/uwsgi.c index dca9c6896..1bf257f4e 100755 --- a/core/uwsgi.c +++ b/core/uwsgi.c @@ -1226,30 +1226,17 @@ void warn_pipe() { } } -// in threading mode we need to use the cancel pthread subsystem -void wait_for_threads() { +// This function is called from signal handler or main thread to wait worker threads. +// `uwsgi.workers[uwsgi.mywid].manage_next_request` should be set to 0 to stop worker threads. +static void wait_for_threads() { int i, ret; - // on some platform thread cancellation is REALLY flaky + // This option was added because we used pthread_cancel(). + // thread cancellation is REALLY flaky if (uwsgi.no_threads_wait) return; - int sudden_death = 0; - - pthread_mutex_lock(&uwsgi.six_feet_under_lock); - for (i = 1; i < uwsgi.threads; i++) { - if (!pthread_equal(uwsgi.workers[uwsgi.mywid].cores[i].thread_id, pthread_self())) { - if (pthread_cancel(uwsgi.workers[uwsgi.mywid].cores[i].thread_id)) { - uwsgi_error("pthread_cancel()\n"); - sudden_death = 1; - } - } - } - - if (sudden_death) - goto end; - // wait for thread termination - for (i = 1; i < uwsgi.threads; i++) { + for (i = 0; i < uwsgi.threads; i++) { if (!pthread_equal(uwsgi.workers[uwsgi.mywid].cores[i].thread_id, pthread_self())) { ret = pthread_join(uwsgi.workers[uwsgi.mywid].cores[i].thread_id, NULL); if (ret) { @@ -1261,26 +1248,6 @@ void wait_for_threads() { } } } - - // cancel initial thread last since after pthread_cancel() and - // pthread_join() is called on it, the whole process will appear to be - // a zombie. although it won't eliminate process zombie time, but it - // should minimize it. - if (!pthread_equal(uwsgi.workers[uwsgi.mywid].cores[0].thread_id, pthread_self())) { - if (pthread_cancel(uwsgi.workers[uwsgi.mywid].cores[0].thread_id)) { - uwsgi_error("pthread_cancel() on initial thread\n"); - goto end; - } - - ret = pthread_join(uwsgi.workers[uwsgi.mywid].cores[0].thread_id, NULL); - if (ret) { - uwsgi_log("pthread_join() = %d on initial thread\n", ret); - } - } - -end: - - pthread_mutex_unlock(&uwsgi.six_feet_under_lock); } @@ -3641,7 +3608,6 @@ void uwsgi_worker_run() { if (uwsgi.cores > 1) { uwsgi.workers[uwsgi.mywid].cores[0].thread_id = pthread_self(); - pthread_mutex_init(&uwsgi.six_feet_under_lock, NULL); } uwsgi_ignition(); diff --git a/uwsgi.h b/uwsgi.h index c512cc74e..39c3282db 100755 --- a/uwsgi.h +++ b/uwsgi.h @@ -2526,7 +2526,6 @@ struct uwsgi_server { // avoid thundering herd in threaded modes pthread_mutex_t thunder_mutex; - pthread_mutex_t six_feet_under_lock; pthread_mutex_t lock_static; int use_thunder_lock;