From 4fc35bd77b2904983f9a750a6e7a71280fadd048 Mon Sep 17 00:00:00 2001 From: Anirudh Pillai Date: Wed, 27 Nov 2024 16:46:16 +0000 Subject: [PATCH] fix: send alert metrics to correct ph instance (#26467) --- posthog/tasks/alerts/checks.py | 42 ++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/posthog/tasks/alerts/checks.py b/posthog/tasks/alerts/checks.py index 0431a8cb610cb..83d1fc7bbd1cf 100644 --- a/posthog/tasks/alerts/checks.py +++ b/posthog/tasks/alerts/checks.py @@ -2,7 +2,7 @@ import traceback from datetime import datetime, timedelta, UTC -from typing import cast +from typing import Any, cast from dateutil.relativedelta import relativedelta from celery import shared_task @@ -36,7 +36,8 @@ alert_calculation_interval_to_relativedelta, ) from posthog.tasks.alerts.trends import check_trends_alert -import posthoganalytics +from posthog.ph_client import get_ph_client +from posthoganalytics import Posthog logger = structlog.get_logger(__name__) @@ -76,6 +77,13 @@ def __init__(self, err: Exception): ANIRUDH_DISTINCT_ID = "wcPbDRs08GtNzrNIXfzHvYAkwUaekW7UrAo4y3coznT" +def _capture_ph_event(ph_client: Posthog | None, *args: Any, **kwargs: Any) -> None: + if ph_client: + ph_client.capture(*args, **kwargs) + + return None + + @shared_task(ignore_result=True) def checks_cleanup_task() -> None: AlertCheck.clean_up_old_checks() @@ -91,6 +99,7 @@ def alerts_backlog_task() -> None: - hourly alerts - alerts that haven't been checked in the last hour + 5min - daily alerts - alerts that haven't been checked in the last hour + 15min """ + ph_client = get_ph_client() now = datetime.now(UTC) hourly_alerts_breaching_sla = AlertConfiguration.objects.filter( @@ -103,7 +112,8 @@ def alerts_backlog_task() -> None: HOURLY_ALERTS_BACKLOG_GAUGE.set(hourly_alerts_breaching_sla) - posthoganalytics.capture( + _capture_ph_event( + ph_client, ANIRUDH_DISTINCT_ID, "alert check backlog", properties={ @@ -124,7 +134,8 @@ def alerts_backlog_task() -> None: DAILY_ALERTS_BACKLOG_GAUGE.set(daily_alerts_breaching_sla) - posthoganalytics.capture( + _capture_ph_event( + ph_client, ANIRUDH_DISTINCT_ID, "alert check backlog", properties={ @@ -135,6 +146,8 @@ def alerts_backlog_task() -> None: # sleeping 30s for prometheus to pick up the metrics sent during task time.sleep(30) + if ph_client: + ph_client.shutdown() @shared_task( @@ -219,6 +232,8 @@ def check_alert_task(alert_id: str) -> None: def check_alert(alert_id: str) -> None: + ph_client = get_ph_client() + try: alert = AlertConfiguration.objects.get(id=alert_id, enabled=True) except AlertConfiguration.DoesNotExist: @@ -261,12 +276,13 @@ def check_alert(alert_id: str) -> None: alert.save() try: - check_alert_and_notify_atomically(alert) + check_alert_and_notify_atomically(alert, ph_client) except Exception as err: ALERT_CHECK_ERROR_COUNTER.inc() user = cast(User, alert.created_by) - posthoganalytics.capture( + _capture_ph_event( + ph_client, cast(str, user.distinct_id), "alert check failed", properties={ @@ -296,9 +312,12 @@ def check_alert(alert_id: str) -> None: alert.is_calculating = False alert.save() + if ph_client: + ph_client.shutdown() + @transaction.atomic -def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None: +def check_alert_and_notify_atomically(alert: AlertConfiguration, ph_client: Posthog | None) -> None: """ Computes insight results, checks alert for breaches and notifies user. Only commits updates to alert state if all of the above complete successfully. @@ -312,7 +331,8 @@ def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None: user = cast(User, alert.created_by) # Event to count alert checks - posthoganalytics.capture( + _capture_ph_event( + ph_client, cast(str, user.distinct_id), "alert check", properties={ @@ -334,7 +354,8 @@ def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None: except Exception as err: error_message = f"Alert id = {alert.id}, failed to evaluate" - posthoganalytics.capture( + _capture_ph_event( + ph_client, cast(str, user.distinct_id), "alert check failed", properties={ @@ -372,7 +393,8 @@ def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None: except Exception as err: error_message = f"AlertCheckError: error sending notifications for alert_id = {alert.id}" - posthoganalytics.capture( + _capture_ph_event( + ph_client, cast(str, user.distinct_id), "alert check failed", properties={