From b4bae45307c2f048924e73ca08cceb2b48dfb328 Mon Sep 17 00:00:00 2001 From: Tiina Turban Date: Mon, 12 Feb 2024 15:07:51 +0100 Subject: [PATCH] chore: Make Celery Queue Depth page less companion to https://github.com/PostHog/charts/pull/773 We've been paging for this daily for more than 10 days. In all of those cases there hasn't been any action and it resolves itself. This creates alert fatigue, so let's make the alert less sensitive. We can additionally add a non-paging alert, if this is deemed useful. --- charts/posthog/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/posthog/values.yaml b/charts/posthog/values.yaml index 8e226b51..35dd7c48 100644 --- a/charts/posthog/values.yaml +++ b/charts/posthog/values.yaml @@ -2528,12 +2528,12 @@ prometheus: - alert: CeleryQueueDepth expr: (max (posthog_celery_queue_depth)) > 1000 - for: 10m + for: 60m labels: rotation: common severity: critical annotations: - summary: Celery job execution delayed for more than 10 minutes. + summary: Celery job execution delayed for more than 60 minutes. description: | The Celery jobs queue (stored in Redis) is filling up faster than it is consumed. This impacts our monitoring, as some paging monitors depend on metrics exported by Celery jobs.