From 3f1f7f4d9bc7364a5e7051df03a9a438c2e3b6dd Mon Sep 17 00:00:00 2001 From: Tiago Vale Date: Thu, 27 Dec 2018 08:19:46 +0000 Subject: [PATCH] Bug #28936888 XCOM'S ACCEPTOR_LEARNER_TASK USES-AFTER-FREE A REFERENCE TO A SERVER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem ======================================================================== Running the GCS tests with ASAN seldomly reports a user-after-free of the server reference that the acceptor_learner_task uses. Here is an excerpt of ASAN's output: ==43936==ERROR: AddressSanitizer: heap-use-after-free on address 0x63100021c840 at pc 0x000000530ff8 bp 0x7fc0427e8530 sp 0x7fc0427e8520 WRITE of size 8 at 0x63100021c840 thread T3 #0 0x530ff7 in server_detected /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:962 #1 0x533814 in buffered_read_bytes /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:1249 #2 0x5481af in buffered_read_msg /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:1399 #3 0x51e171 in acceptor_learner_task /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:4690 #4 0x562357 in task_loop /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task.c:1140 #5 0x5003b2 in xcom_taskmain2 /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:1324 #6 0x6a278a in Gcs_xcom_proxy_impl::xcom_init(unsigned short, node_address*) /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_proxy.cc:164 #7 0x59b3c1 in xcom_taskmain_startup /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_control_interface.cc:107 #8 0x7fc04a2e4dd4 in start_thread (/lib64/libpthread.so.0+0x7dd4) #9 0x7fc047ff2bfc in __clone (/lib64/libc.so.6+0xfebfc) 0x63100021c840 is located 64 bytes inside of 65688-byte region [0x63100021c800,0x63100022c898) freed by thread T3 here: #0 0x7fc04a5d7508 in __interceptor_free (/lib64/libasan.so.4+0xde508) #1 0x52cf86 in freesrv /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:836 #2 0x52ea78 in srv_unref /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:868 #3 0x524c30 in reply_handler_task /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:4914 #4 0x562357 in task_loop /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task.c:1140 #5 0x5003b2 in xcom_taskmain2 /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:1324 #6 0x6a278a in Gcs_xcom_proxy_impl::xcom_init(unsigned short, node_address*) /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_proxy.cc:164 #7 0x59b3c1 in xcom_taskmain_startup /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_control_interface.cc:107 #8 0x7fc04a2e4dd4 in start_thread (/lib64/libpthread.so.0+0x7dd4) previously allocated by thread T3 here: #0 0x7fc04a5d7a88 in __interceptor_calloc (/lib64/libasan.so.4+0xdea88) #1 0x543604 in mksrv /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:721 #2 0x543b4c in addsrv /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:755 #3 0x54af61 in update_servers /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:1747 #4 0x501082 in site_install_action /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:1572 #5 0x55447c in import_config /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/site_def.c:486 #6 0x506dfc in handle_x_snapshot /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:5257 #7 0x50c444 in xcom_fsm /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:5325 #8 0x516c36 in dispatch_op /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:4510 #9 0x521997 in acceptor_learner_task /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:4772 #10 0x562357 in task_loop /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task.c:1140 #11 0x5003b2 in xcom_taskmain2 /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:1324 #12 0x6a278a in Gcs_xcom_proxy_impl::xcom_init(unsigned short, node_address*) /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_proxy.cc:164 #13 0x59b3c1 in xcom_taskmain_startup /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_control_interface.cc:107 #14 0x7fc04a2e4dd4 in start_thread (/lib64/libpthread.so.0+0x7dd4) Analysis ======================================================================== The server structure is reference counted by the associated sender_task and reply_handler_task. When they finish, they unreference the server, which leads to its memory being freed. However, the acceptor_learner_task keeps a "naked" reference to the server structure. Under the right ordering of operations, i.e. the sender_task and reply_handler_task terminating after the acceptor_learner_task acquires, but before it uses, the reference to the server structure, leads to the acceptor_learner_task accessing the server structure after it has been freed. Solution ======================================================================== Let the acceptor_learner_task also reference count the server structure so it is not freed while still in use. Reviewed-by: André Negrão Reviewed-by: Venkatesh Venugopal RB: 21209 --- .../libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c | 6 ++++++ .../libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c b/rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c index a05b9f38508e..34124e940c99 100644 --- a/rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c +++ b/rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c @@ -4006,7 +4006,11 @@ int acceptor_learner_task(task_arg arg) common to both the sender_task, reply_handler_task, and the ac‐ ceptor_learner_task. */ + // Allow the previous server reference to be freed. + if (ep->srv) srv_unref(ep->srv); ep->srv = get_server(site, ep->p->from); + // Prevent the new server reference from being freed. + if (ep->srv) srv_ref(ep->srv); ep->p->refcnt = 1; /* Refcnt from other end is void here */ MAY_DBG(FN; NDBG(ep->rfd.fd, d); NDBG(task_now(), f); @@ -4120,6 +4124,8 @@ int acceptor_learner_task(task_arg arg) if (ep->buf) X_FREE(ep->buf); free(ep->in_buf); + // Allow the server reference to be freed. + if (ep->srv) srv_unref(ep->srv); TASK_END; } diff --git a/rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c b/rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c index a29b024c6a33..3e324ed65999 100644 --- a/rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c +++ b/rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c @@ -747,6 +747,12 @@ mksrv(char *srv, xcom_port port) s->reply_handler = task_new(reply_handler_task, void_arg(s), "reply_handler_task", XCOM_THREAD_DEBUG); } reset_srv_buf(&s->out_buf); + /* + Keep the server from being freed if the acceptor_learner_task calls + srv_unref on the server before the {local_,}server_task and + reply_handler_task begin. + */ + srv_ref(s); return s; } @@ -872,6 +878,9 @@ static void shut_srv(server *s) task_terminate(s->sender); if (s->reply_handler) task_terminate(s->reply_handler); + + // Allow the server to be freed. This unref pairs with the ref from mksrv. + srv_unref(s); }