Skip to content

Commit

Permalink
fix: send alert metrics to correct ph instance (#26467)
Browse files Browse the repository at this point in the history
  • Loading branch information
anirudhpillai authored Nov 27, 2024
1 parent dd56af5 commit 4fc35bd
Showing 1 changed file with 32 additions and 10 deletions.
42 changes: 32 additions & 10 deletions posthog/tasks/alerts/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import traceback

from datetime import datetime, timedelta, UTC
from typing import cast
from typing import Any, cast
from dateutil.relativedelta import relativedelta

from celery import shared_task
Expand Down Expand Up @@ -36,7 +36,8 @@
alert_calculation_interval_to_relativedelta,
)
from posthog.tasks.alerts.trends import check_trends_alert
import posthoganalytics
from posthog.ph_client import get_ph_client
from posthoganalytics import Posthog


logger = structlog.get_logger(__name__)
Expand Down Expand Up @@ -76,6 +77,13 @@ def __init__(self, err: Exception):
ANIRUDH_DISTINCT_ID = "wcPbDRs08GtNzrNIXfzHvYAkwUaekW7UrAo4y3coznT"


def _capture_ph_event(ph_client: Posthog | None, *args: Any, **kwargs: Any) -> None:
if ph_client:
ph_client.capture(*args, **kwargs)

return None


@shared_task(ignore_result=True)
def checks_cleanup_task() -> None:
AlertCheck.clean_up_old_checks()
Expand All @@ -91,6 +99,7 @@ def alerts_backlog_task() -> None:
- hourly alerts - alerts that haven't been checked in the last hour + 5min
- daily alerts - alerts that haven't been checked in the last hour + 15min
"""
ph_client = get_ph_client()
now = datetime.now(UTC)

hourly_alerts_breaching_sla = AlertConfiguration.objects.filter(
Expand All @@ -103,7 +112,8 @@ def alerts_backlog_task() -> None:

HOURLY_ALERTS_BACKLOG_GAUGE.set(hourly_alerts_breaching_sla)

posthoganalytics.capture(
_capture_ph_event(
ph_client,
ANIRUDH_DISTINCT_ID,
"alert check backlog",
properties={
Expand All @@ -124,7 +134,8 @@ def alerts_backlog_task() -> None:

DAILY_ALERTS_BACKLOG_GAUGE.set(daily_alerts_breaching_sla)

posthoganalytics.capture(
_capture_ph_event(
ph_client,
ANIRUDH_DISTINCT_ID,
"alert check backlog",
properties={
Expand All @@ -135,6 +146,8 @@ def alerts_backlog_task() -> None:

# sleeping 30s for prometheus to pick up the metrics sent during task
time.sleep(30)
if ph_client:
ph_client.shutdown()


@shared_task(
Expand Down Expand Up @@ -219,6 +232,8 @@ def check_alert_task(alert_id: str) -> None:


def check_alert(alert_id: str) -> None:
ph_client = get_ph_client()

try:
alert = AlertConfiguration.objects.get(id=alert_id, enabled=True)
except AlertConfiguration.DoesNotExist:
Expand Down Expand Up @@ -261,12 +276,13 @@ def check_alert(alert_id: str) -> None:
alert.save()

try:
check_alert_and_notify_atomically(alert)
check_alert_and_notify_atomically(alert, ph_client)
except Exception as err:
ALERT_CHECK_ERROR_COUNTER.inc()
user = cast(User, alert.created_by)

posthoganalytics.capture(
_capture_ph_event(
ph_client,
cast(str, user.distinct_id),
"alert check failed",
properties={
Expand Down Expand Up @@ -296,9 +312,12 @@ def check_alert(alert_id: str) -> None:
alert.is_calculating = False
alert.save()

if ph_client:
ph_client.shutdown()


@transaction.atomic
def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None:
def check_alert_and_notify_atomically(alert: AlertConfiguration, ph_client: Posthog | None) -> None:
"""
Computes insight results, checks alert for breaches and notifies user.
Only commits updates to alert state if all of the above complete successfully.
Expand All @@ -312,7 +331,8 @@ def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None:
user = cast(User, alert.created_by)

# Event to count alert checks
posthoganalytics.capture(
_capture_ph_event(
ph_client,
cast(str, user.distinct_id),
"alert check",
properties={
Expand All @@ -334,7 +354,8 @@ def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None:
except Exception as err:
error_message = f"Alert id = {alert.id}, failed to evaluate"

posthoganalytics.capture(
_capture_ph_event(
ph_client,
cast(str, user.distinct_id),
"alert check failed",
properties={
Expand Down Expand Up @@ -372,7 +393,8 @@ def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None:
except Exception as err:
error_message = f"AlertCheckError: error sending notifications for alert_id = {alert.id}"

posthoganalytics.capture(
_capture_ph_event(
ph_client,
cast(str, user.distinct_id),
"alert check failed",
properties={
Expand Down

0 comments on commit 4fc35bd

Please sign in to comment.