From 943c4fac92964f4164680a9245d46a1adf620d1b Mon Sep 17 00:00:00 2001 From: Ilya Mashchenko Date: Fri, 5 Jul 2024 11:33:18 +0300 Subject: [PATCH] health add alarm docker container down (#18075) --- src/health/health.d/docker.conf | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/health/health.d/docker.conf b/src/health/health.d/docker.conf index 668614d4dcd29e..edb63a08ca06f0 100644 --- a/src/health/health.d/docker.conf +++ b/src/health/health.d/docker.conf @@ -1,4 +1,6 @@ - template: docker_container_unhealthy +# you can disable an alarm notification by setting the 'to' line to: silent + +template: docker_container_unhealthy on: docker.container_health_status class: Errors type: Containers @@ -10,3 +12,22 @@ component: Docker summary: Docker container ${label:container_name} health info: ${label:container_name} docker container health status is unhealthy to: sysadmin + +# This alert monitors the status of Docker containers and triggers if any container is exited (down). +# To enable this alert for specific containers, you need to modify the "chart labels" filter. +# This filter uses Netdata's simple pattern matching syntax. + + template: docker_container_down + on: docker.container_state + class: Errors + type: Containers + component: Docker +chart labels: container_name=!* + units: status + every: 10s + lookup: average -10s of exited + warn: $this > 0 + delay: down 1m multiplier 1.5 max 2h + summary: Docker container ${label:container_name} down + info: Docker container ${label:container_name} is currently not running + to: sysadmin