Skip to content

Commit

Permalink
Run all checks with a signal (#44)
Browse files Browse the repository at this point in the history
* run all checks with signal

Sending a SIGURG triggers an immediate, additional run of all checks.
  • Loading branch information
corubba authored Jan 26, 2024
1 parent d16bfde commit d422db2
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 3 deletions.
2 changes: 2 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,8 @@ You can launch it by supplying a configuration file and a directory with configu

At the root of the project there is System V init and a Systemd unit file for proper integration with OS startup tools.

Sending a ``SIGURG`` signal to a running anycast-healthchecker process will trigger an immediate, additional (not changing the regular interval) execution of all active checks. For services with ``check_rise`` and/or ``check_fail`` set to ``1``, this can be used to make external events faster advertise and/or withdraw their prefixes.

Systemd and SysVinit integration
################################

Expand Down
12 changes: 11 additions & 1 deletion anycast_healthchecker/healthchecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import sys
import logging
from queue import Queue
import threading

from anycast_healthchecker import PROGRAM_NAME, METRIC_PREFIX
from anycast_healthchecker.servicecheck import ServiceCheck
Expand Down Expand Up @@ -76,6 +77,8 @@ def __init__(self, config, bird_configuration):
)
self.ip_prefixes[ip_version] = _ip_prefixes

self._urgent_event = threading.Event()

self.log.info('initialize healthchecker')

def _update_bird_conf_file(self, operation):
Expand Down Expand Up @@ -247,7 +250,8 @@ def run(self):
metric_state,
metric_check_duration,
metric_check_ip_assignment,
metric_check_timeout)
metric_check_timeout,
self._urgent_event)
_thread.start()

# Stay running until we are stopped
Expand Down Expand Up @@ -277,3 +281,9 @@ def run(self):
self.bird_configuration[ip_version]['reconfigure_cmd'])
else:
run_custom_bird_reconfigure(operation)

def run_all_checks_now(self):
"""Immediately run all checks. This does not change the usual interval."""
self.log.info("Immediatly running all checks")
self._urgent_event.set()
self._urgent_event.clear()
5 changes: 5 additions & 0 deletions anycast_healthchecker/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ def main():

# Create our master process.
checker = healthchecker.HealthChecker(config, bird_configuration)

# Register our SIGURG handler to immediately trigger all checks.
signal.signal(signal.SIGURG, lambda signum, frame: checker.run_all_checks_now())

# and start working
logger.info("starting %s version %s", PROGRAM_NAME, __version__)
checker.run()

Expand Down
5 changes: 3 additions & 2 deletions anycast_healthchecker/servicecheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class ServiceCheck(Thread):

def __init__(self, service, config, action, splay_startup, metric_state,
metric_check_duration, metric_check_ip_assignment,
metric_check_timeout):
metric_check_timeout, urgent_event):
"""Set the name of thread to be the name of the service."""
super(ServiceCheck, self).__init__()
self.name = service # Used by Thread()
Expand Down Expand Up @@ -85,6 +85,7 @@ def __init__(self, service, config, action, splay_startup, metric_state,
bird_reconfigure_cmd=config.get('custom_bird_reconfigure_cmd',
None)
)
self.urgent_event = urgent_event
self.log.info("loading check for %s", self.name, extra=self.extra)

self.metric_state = metric_state
Expand Down Expand Up @@ -365,4 +366,4 @@ def _run(self):
if sleep < 0:
sleep += interval
self.log.debug("sleeping for %.3fsecs", sleep, extra=self.extra)
time.sleep(sleep)
self.urgent_event.wait(timeout=sleep)

0 comments on commit d422db2

Please sign in to comment.