From a82d36743a0b58f8ab4b8d17c612440f3814d3d0 Mon Sep 17 00:00:00 2001 From: Douglas Kerr Date: Sun, 8 Mar 2020 09:21:14 -0700 Subject: [PATCH 1/2] Implement kswapd0hack service - This service defends against the problem of kswapd0 eventually taking all of the CPU when the system is under heavy load. It reboots the machine when kswapd0 CPU consumption rises to 10%. Web search for "kswapd0 taking a lot of cpu" for many writeups on this problem. --- CommunityView/confcvserver/confcvserver.sh | 21 ++++- CommunityView/kswapd0hack/kswapd0hack.service | 35 ++++++++ CommunityView/kswapd0hack/kswapd0hack.sh | 63 ++++++++++++++ .../kswapd0hack/test/testKswapd0hack.sh | 86 +++++++++++++++++++ 4 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 CommunityView/kswapd0hack/kswapd0hack.service create mode 100644 CommunityView/kswapd0hack/kswapd0hack.sh create mode 100644 CommunityView/kswapd0hack/test/testKswapd0hack.sh diff --git a/CommunityView/confcvserver/confcvserver.sh b/CommunityView/confcvserver/confcvserver.sh index 208ba79..f44a316 100644 --- a/CommunityView/confcvserver/confcvserver.sh +++ b/CommunityView/confcvserver/confcvserver.sh @@ -52,6 +52,7 @@ config_dir=/etc/opt/communityview # XXX future use var_dir=/var/opt/communityview log_dir=$var_dir/log systemd_dir=/lib/systemd/system +kswapd0hack_code_dir=/opt/communityview # log file for this script scriptlog=confcvserver.log @@ -229,7 +230,7 @@ configure() { # and remove the service file if it's there systemctl stop communityview || true systemctl disable communityview || true - tgt=$systemd_dir/communityview.service + local tgt=$systemd_dir/communityview.service rm -f "$tgt" task="creating the upload user account" @@ -455,6 +456,24 @@ configure() { chmod 755 $code_dir/$name editcrontab $name "7 8,14,20 * * * $code_dir/$name" + task="installing hack to defend against kswapd0 bug" + echo "***** $task" | tee /dev/tty + local name=kswapd0hack + systemctl stop $name.service || true + systemctl disable $name.service || true + tgt=$systemd_dir/$name.service + rm -f "$tgt" + cp $our_dir/../$name/$name.service "$tgt" + chmod 644 "$tgt" + chown root:root "$tgt" + tgt="$kswapd0hack_code_dir"/$name + mk_dir "$kswapd0hack_code_dir" + cp $our_dir/../$name/$name.sh "$tgt" + chmod 755 "$tgt" + chown root:root "$tgt" + systemctl enable $name.service + systemctl start $name.service + # accounts-daemon seems to have a bug wherein it frequently goes crazy # and sucks up all the CPU task="permanently disabling accounts-daemon" diff --git a/CommunityView/kswapd0hack/kswapd0hack.service b/CommunityView/kswapd0hack/kswapd0hack.service new file mode 100644 index 0000000..c467a4d --- /dev/null +++ b/CommunityView/kswapd0hack/kswapd0hack.service @@ -0,0 +1,35 @@ +################################################################################ +# +# Copyright (C) 2019 Neighborhood Guard, Inc. All rights reserved. +# Original author: Douglas Kerr +# +# This file is part of CommunityView. +# +# CommunityView is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CommunityView is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with FTP_Upload. If not, see . +# +################################################################################ + +[Unit] +Description=Monitor kswapd0 CPU and reboot if too high +After=network.target + +[Service] +Type=simple +User=root +WorkingDirectory=/var/opt/communityview/log +ExecStart=/bin/sh /opt/communityview/kswapd0hack +Restart=always + +[Install] +WantedBy=multi-user.target diff --git a/CommunityView/kswapd0hack/kswapd0hack.sh b/CommunityView/kswapd0hack/kswapd0hack.sh new file mode 100644 index 0000000..442f3dd --- /dev/null +++ b/CommunityView/kswapd0hack/kswapd0hack.sh @@ -0,0 +1,63 @@ +#!/bin/sh +################################################################################ +# +# Copyright (C) 2019 Neighborhood Guard, Inc. All rights reserved. +# Original author: Douglas Kerr +# +# This file is part of CommunityView. +# +# CommunityView is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CommunityView is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with FTP_Upload. If not, see . +# +################################################################################ + +# If kswapd0 uses over 10% of the CPU, reboot. +# Check every ten minutes. + +logfile=/var/opt/communityview/log/kswapd0hack.log + +ckcpu() { + kspct=`ps -C kswapd0 -o '%cpu' --no-header` + + # get integer portion of CPU percentage + kspct_i=`echo "$kspct" | sed 's/ *\([0-9]*\).*/\1/'` + + if expr $kspct_i '>=' 1 > /dev/null + then + echo `date --iso-8601=seconds` \ + "kswapd0 using" $kspct"% of CPU." \ + >> "$logfile" + echo `date --iso-8601=seconds` `uptime` \ + >> "$logfile" + fi + if expr $kspct_i '>=' 10 > /dev/null + then + echo `date --iso-8601=seconds` \ + "kswapd0 using" $kspct"% of CPU. Rebooting." \ + >> "$logfile" + echo `date --iso-8601=seconds` `uptime` \ + >> "$logfile" + ps auxww >> "$logfile" + mv "$logfile" "$logfile.old" + shutdown -r now + fi +} + +if [ ! "$UNIT_TEST_IN_PROGRESS" ] +then + while true + do + ckcpu + sleep 600 + done +fi diff --git a/CommunityView/kswapd0hack/test/testKswapd0hack.sh b/CommunityView/kswapd0hack/test/testKswapd0hack.sh new file mode 100644 index 0000000..6688400 --- /dev/null +++ b/CommunityView/kswapd0hack/test/testKswapd0hack.sh @@ -0,0 +1,86 @@ +#!/bin/sh +################################################################################ +# +# Copyright (C) 2020 Neighborhood Guard, Inc. All rights reserved. +# Original author: Douglas Kerr +# +# This file is part of FTP_Upload. +# +# FTP_Upload is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# FTP_Upload is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with FTP_Upload. If not, see . +# +################################################################################ + +UNIT_TEST_IN_PROGRESS=1 + +. ../kswapd0hack.sh + +percentage='' +shutdowncalled='' + +logfile=test.log + +setUp() { + rm -rf test.log test.log.old + shutdowncalled='' +} + +shutdown() { + shutdowncalled=1 +} + +ps() { + echo "$percentage" +} + +test_lt_1pct() { + percentage=" 0.9" + ckcpu + # log file should not exist; no call to shutdown + assertFalse 'Log file created erroneously' "test -r $logfile" + assertFalse 'shutdown called erroneously' "test -n \"$shutdowncalled\"" +} + +test_eq_1pct() { + percentage=" 1.0" + ckcpu + # log file should exist; no call to shutdown + assertTrue 'Log file not created' "test -r $logfile" + assertFalse 'shutdown called erroneously' "test -n \"$shutdowncalled\"" +} + +test_gt_1pct() { + percentage=" 2.0" + ckcpu + # log file should exist; no call to shutdown + assertTrue 'Log file not created' "test -r $logfile" + assertFalse 'shutdown called erroneously' "test -n \"$shutdowncalled\"" +} + +test_eq_10pct() { + percentage=" 10.0" + ckcpu + # log file should exist; no call to shutdown + assertTrue 'Log file not rotated' "test -r $logfile.old" + assertTrue 'shutdown not called' "test -n \"$shutdowncalled\"" +} + +test_gt_10pct() { + percentage=" 99.0" + ckcpu + # log file should exist; no call to shutdown + assertTrue 'Log file not rotated' "test -r $logfile.old" + assertTrue 'shutdown not called' "test -n \"$shutdowncalled\"" +} + +. `which shunit2` From 94dea383943a628f1f2b51b5b4b919b1babbc31f Mon Sep 17 00:00:00 2001 From: Douglas Kerr Date: Mon, 9 Mar 2020 20:31:58 -0700 Subject: [PATCH 2/2] Update docs and version strings for v1.1.0 --- CommunityView/confcvserver/confcvserver.sh | 2 +- CommunityView/doc/ReleaseNotes.md | 13 ++++++++++--- CommunityView/src/communityview.py | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/CommunityView/confcvserver/confcvserver.sh b/CommunityView/confcvserver/confcvserver.sh index f44a316..2307a4a 100644 --- a/CommunityView/confcvserver/confcvserver.sh +++ b/CommunityView/confcvserver/confcvserver.sh @@ -28,7 +28,7 @@ # CommunityView software. # version of the confcvserver software -version="1.0.2" +version="1.1.0" . ./utils.sh #. ./confui.sh diff --git a/CommunityView/doc/ReleaseNotes.md b/CommunityView/doc/ReleaseNotes.md index 5f2df22..09a628e 100644 --- a/CommunityView/doc/ReleaseNotes.md +++ b/CommunityView/doc/ReleaseNotes.md @@ -1,12 +1,11 @@ # Release Notes for CommunityView # -## v1.0.2 - 2019/06/25 +## v1.1.0 - 2020/03/09 _Doug Kerr_ ### Changes -- Fix crash in stats code when trying to remove temp file after non-graceful -shutdown +- Implement _kswapd0hack_ service. This service defends against the problem of the _kswapd0_ process eventually taking all of the CPU when the system is under heavy load. It reboots the machine when _kswapd0_ CPU consumption rises to 10%. Web search for "kswapd0 taking a lot of cpu" to see many write-ups on this problem. ### To Do @@ -20,6 +19,14 @@ shutdown * The `Next day` links in day pages are sometimes incorrectly grayed out. +## v1.0.2 - 2019/06/25 +_Doug Kerr_ + +### Changes + +- Fix crash in stats code when trying to remove temp file after non-graceful +shutdown + ## v1.0.1 - 2019/02/12 _Doug Kerr_ diff --git a/CommunityView/src/communityview.py b/CommunityView/src/communityview.py index 347b53c..8abdb8e 100644 --- a/CommunityView/src/communityview.py +++ b/CommunityView/src/communityview.py @@ -26,7 +26,7 @@ # # ################################################################################ -version_string = "1.0.2" +version_string = "1.1.0" import os