Skip to content

Commit

Permalink
Integrate systemd watchdog
Browse files Browse the repository at this point in the history
To allow restarting the service when it gets stuck. The default service
file sets it to 60 seconds, keepalive intervals are sent in half that
time as recommended by the systemd docs.

Implements #1250
  • Loading branch information
askmeaboutlo0m committed Oct 14, 2024
1 parent bd03f18 commit e46dc9a
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 0 deletions.
14 changes: 14 additions & 0 deletions src/thinsrv/initsys.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,20 @@ void notifyStatus(const QString &status);
*/
QList<int> getListenFds();

/**
* If a watchdog is enabled, this returns a positive millisecond value in which
* to call the watchdog function to let the init system know that the process is
* doing fine. If no watchdog is enabled, it returns a value <= 0. Can only be
* called once, since it may unset environment variables.
*/
int getWatchdogMsec();

/**
* Signal the init system watchdog that we're doing fine. Should be called in an
* interval that getWatchDogMsec gave you.
*/
void watchdog();

}

#endif // INITSYS_H
10 changes: 10 additions & 0 deletions src/thinsrv/initsys_dummy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,14 @@ QList<int> getListenFds()
return QList<int>();
}

int getWatchdogMsec()
{
return 0;
}

void watchdog()
{
// dummy
}

}
35 changes: 35 additions & 0 deletions src/thinsrv/initsys_systemd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,39 @@ QList<int> getListenFds()
return fds;
}

int getWatchdogMsec()
{
uint64_t usec;
int result = sd_watchdog_enabled(1, &usec);
if(result < 0) {
qWarning("sd_watchdog_enabled: error %d", result);
return -1;
} else if(result == 0) {
return 0;
} else {
// The systemd docs recommend sending keepalives every half interval.
uint64_t msec = usec / uint64_t(2000);
if(msec == uint64_t(0)) {
qWarning(
"Watchdog timeout %" PRIu64
"usec results in keepalive interval of 0",
usec);
return -2;
} else if(msec > uint64_t(60000)) {
qWarning(
"Excessive watchdog timeout of %" PRIu64
"usec, using 1 minute keepalive interval instead",
usec);
return 60000;
} else {
return int(msec);
}
}
}

void watchdog()
{
sd_notify(0, "WATCHDOG=1");
}

}
18 changes: 18 additions & 0 deletions src/thinsrv/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "libserver/jsonapi.h" // for datatype registration
#include "thinsrv/headless/headless.h"
#include "thinsrv/initsys.h"
#include <QTimer>
#include <cstdio>
#include <cstring>
#ifdef HAVE_SERVERGUI
Expand Down Expand Up @@ -74,6 +75,23 @@ int main(int argc, char *argv[])
}

initsys::notifyReady();
int watchdogMsec = initsys::getWatchdogMsec();
if(watchdogMsec > 0) {
QTimer *watchdogTimer = new QTimer;
watchdogTimer->setInterval(watchdogMsec);
if(watchdogMsec < 5000) {
qInfo("Setting coarse watchdog timer every %dms", watchdogMsec);
watchdogTimer->setTimerType(Qt::CoarseTimer);
} else {
qInfo(
"Setting very coarse watchdog timer every %dms", watchdogMsec);
watchdogTimer->setTimerType(Qt::VeryCoarseTimer);
}
QObject::connect(watchdogTimer, &QTimer::timeout, &initsys::watchdog);
watchdogTimer->start();
} else {
qInfo("Watchdog timer not enabled");
}

return app->exec();
}
2 changes: 2 additions & 0 deletions src/thinsrv/systemd/service.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ After=network.target
Type=notify
NotifyAccess=main
ExecStart=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_BINDIR@/@srvname@@CMAKE_EXECUTABLE_SUFFIX@
Restart=always
WatchdogSec=60s
User=nobody

[Install]
Expand Down

0 comments on commit e46dc9a

Please sign in to comment.