From cbb59f7a40520618d50dfa52b1dafc3afb233c73 Mon Sep 17 00:00:00 2001 From: Anirudh Pillai Date: Thu, 17 Oct 2024 16:46:10 +0100 Subject: [PATCH] fix(alerts): alert monitoring + trendlines only for absolute --- .../components/Alerts/insightAlertsLogic.ts | 3 +- posthog/tasks/alerts/checks.py | 33 +++++++++++++++++-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/frontend/src/lib/components/Alerts/insightAlertsLogic.ts b/frontend/src/lib/components/Alerts/insightAlertsLogic.ts index 6bca4dc317fa1..5ef92b84b8de2 100644 --- a/frontend/src/lib/components/Alerts/insightAlertsLogic.ts +++ b/frontend/src/lib/components/Alerts/insightAlertsLogic.ts @@ -3,7 +3,7 @@ import { loaders } from 'kea-loaders' import api from 'lib/api' import { insightVizDataLogic } from 'scenes/insights/insightVizDataLogic' -import { GoalLine, InsightThresholdType } from '~/queries/schema' +import { AlertConditionType, GoalLine, InsightThresholdType } from '~/queries/schema' import { getBreakdown, isInsightVizNode, isTrendsQuery } from '~/queries/utils' import { InsightLogicProps } from '~/types' @@ -67,6 +67,7 @@ export const insightAlertsLogic = kea([ alerts.flatMap((alert) => { if ( alert.threshold.configuration.type !== InsightThresholdType.ABSOLUTE || + alert.condition.type !== AlertConditionType.ABSOLUTE_VALUE || !alert.threshold.configuration.bounds ) { return [] diff --git a/posthog/tasks/alerts/checks.py b/posthog/tasks/alerts/checks.py index 4986899047faa..9ef41fbf7d3cf 100644 --- a/posthog/tasks/alerts/checks.py +++ b/posthog/tasks/alerts/checks.py @@ -34,12 +34,22 @@ alert_calculation_interval_to_relativedelta, ) from posthog.tasks.alerts.trends import check_trends_alert +import time +import math logger = structlog.get_logger(__name__) -class AlertCheckException(Exception): ... +class AlertCheckException(Exception): + """ + Required for custom exceptions to pass stack trace to sentry. + Subclassing through other ways doesn't transfer the traceback. + https://stackoverflow.com/a/69963663/5540417 + """ + + def __init__(self, err): + self.__traceback__ = err.__traceback__ HOURLY_ALERTS_BACKLOG_GAUGE = Gauge( @@ -102,6 +112,9 @@ def alerts_backlog_task() -> None: DAILY_ALERTS_BACKLOG_GAUGE.set(daily_alerts_breaching_sla) + # sleeping 30s for prometheus to pick up the metrics sent during task + time.sleep(30) + @shared_task( ignore_result=True, @@ -158,6 +171,8 @@ def check_alert_task(alert_id: str) -> None: def check_alert(alert_id: str) -> None: + task_start_time = time.time() + try: alert = AlertConfiguration.objects.get(id=alert_id, enabled=True) except AlertConfiguration.DoesNotExist: @@ -199,8 +214,15 @@ def check_alert(alert_id: str) -> None: check_alert_and_notify_atomically(alert) except Exception as err: ALERT_CHECK_ERROR_COUNTER.inc() + logger.exception(AlertCheckException(err)) - capture_exception(AlertCheckException(err)) + capture_exception( + AlertCheckException(err), + tags={ + "alert_configuration_id": alert_id, + }, + ) + # raise again so alert check is retried depending on error type raise finally: @@ -209,6 +231,13 @@ def check_alert(alert_id: str) -> None: alert.is_calculating = False alert.save() + task_duration = time.time() - task_start_time + + # Ensure task runs at least 40s + # for prometheus to pick up the metrics sent during task + time_left_to_run = 40 - math.floor(task_duration) + time.sleep(time_left_to_run) + @transaction.atomic def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None: