From 9ed80696da8e02ba76a7be112201593ba85d19a0 Mon Sep 17 00:00:00 2001 From: "winters.zc" Date: Tue, 14 Mar 2023 15:25:40 +0800 Subject: [PATCH] Fix high cpu usage caused by fd leak We found a problem of high CPU usage of the supervisor. This problem is caused by continuous polling of a wrong fd in the main loop of the supervisor. Busy polling leads to a CPU usage close to 100%. (We can confirm this problem through the strace tool) This issue can be reproduced by: 1. Continuously initiate arbitrary requests to supervisor through supervisorctl 2. After the socket fd is closed, trigger the supervisor's subprocess to rotate the log (or reopen the file) 3. If the above steps are completed within a single main loop of the supervisor, the problem can be triggered The reason for the problem is that supervisor relies on using _ignore_invalid() in the main loop to close fds. This method has a flaw that if fd is reused before _ignore_invalid() is called, then the fd may always exist in the fd list of poll . This commit fixes the problem. By checking the validity of the fd in the event list in the main loop, if the fd is not in the combined_map, it is considered to be an invalid fd and will be removed from the list. --- supervisor/supervisord.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/supervisor/supervisord.py b/supervisor/supervisord.py index 0a4f3e697..2265db9c7 100755 --- a/supervisor/supervisord.py +++ b/supervisor/supervisord.py @@ -222,6 +222,14 @@ def runforever(self): raise except: combined_map[fd].handle_error() + else: + # if the fd is not in combined_map, we should unregister it. otherwise, + # it will be polled every time, which may cause 100% cpu usage + self.options.logger.warn('unexpected read event from fd %r' % fd) + try: + self.options.poller.unregister_readable(fd) + except: + pass for fd in w: if fd in combined_map: @@ -237,6 +245,12 @@ def runforever(self): raise except: combined_map[fd].handle_error() + else: + self.options.logger.warn('unexpected write event from fd %r' % fd) + try: + self.options.poller.unregister_writable(fd) + except: + pass for group in pgroups: group.transition()