Skip to content

Commit

Permalink
fix(alerts): alert monitoring + trendlines only for absolute
Browse files Browse the repository at this point in the history
  • Loading branch information
anirudhpillai committed Oct 17, 2024
1 parent cb1b316 commit cbb59f7
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 3 deletions.
3 changes: 2 additions & 1 deletion frontend/src/lib/components/Alerts/insightAlertsLogic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { loaders } from 'kea-loaders'
import api from 'lib/api'
import { insightVizDataLogic } from 'scenes/insights/insightVizDataLogic'

import { GoalLine, InsightThresholdType } from '~/queries/schema'
import { AlertConditionType, GoalLine, InsightThresholdType } from '~/queries/schema'
import { getBreakdown, isInsightVizNode, isTrendsQuery } from '~/queries/utils'
import { InsightLogicProps } from '~/types'

Expand Down Expand Up @@ -67,6 +67,7 @@ export const insightAlertsLogic = kea<insightAlertsLogicType>([
alerts.flatMap((alert) => {
if (
alert.threshold.configuration.type !== InsightThresholdType.ABSOLUTE ||
alert.condition.type !== AlertConditionType.ABSOLUTE_VALUE ||
!alert.threshold.configuration.bounds
) {
return []
Expand Down
33 changes: 31 additions & 2 deletions posthog/tasks/alerts/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,22 @@
alert_calculation_interval_to_relativedelta,
)
from posthog.tasks.alerts.trends import check_trends_alert
import time
import math


logger = structlog.get_logger(__name__)


class AlertCheckException(Exception): ...
class AlertCheckException(Exception):
"""
Required for custom exceptions to pass stack trace to sentry.
Subclassing through other ways doesn't transfer the traceback.
https://stackoverflow.com/a/69963663/5540417
"""

def __init__(self, err):

Check failure on line 51 in posthog/tasks/alerts/checks.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Function is missing a type annotation
self.__traceback__ = err.__traceback__


HOURLY_ALERTS_BACKLOG_GAUGE = Gauge(
Expand Down Expand Up @@ -102,6 +112,9 @@ def alerts_backlog_task() -> None:

DAILY_ALERTS_BACKLOG_GAUGE.set(daily_alerts_breaching_sla)

# sleeping 30s for prometheus to pick up the metrics sent during task
time.sleep(30)


@shared_task(
ignore_result=True,
Expand Down Expand Up @@ -158,6 +171,8 @@ def check_alert_task(alert_id: str) -> None:


def check_alert(alert_id: str) -> None:
task_start_time = time.time()

try:
alert = AlertConfiguration.objects.get(id=alert_id, enabled=True)
except AlertConfiguration.DoesNotExist:
Expand Down Expand Up @@ -199,8 +214,15 @@ def check_alert(alert_id: str) -> None:
check_alert_and_notify_atomically(alert)
except Exception as err:
ALERT_CHECK_ERROR_COUNTER.inc()

logger.exception(AlertCheckException(err))
capture_exception(AlertCheckException(err))
capture_exception(
AlertCheckException(err),
tags={
"alert_configuration_id": alert_id,
},
)

# raise again so alert check is retried depending on error type
raise
finally:
Expand All @@ -209,6 +231,13 @@ def check_alert(alert_id: str) -> None:
alert.is_calculating = False
alert.save()

task_duration = time.time() - task_start_time

# Ensure task runs at least 40s
# for prometheus to pick up the metrics sent during task
time_left_to_run = 40 - math.floor(task_duration)
time.sleep(time_left_to_run)


@transaction.atomic
def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None:
Expand Down

0 comments on commit cbb59f7

Please sign in to comment.