-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d1f1d04
commit 13e139c
Showing
1 changed file
with
67 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,7 @@ | |
alert_calculation_interval_to_relativedelta, | ||
) | ||
from posthog.tasks.alerts.trends import check_trends_alert | ||
import posthoganalytics | ||
|
||
|
||
logger = structlog.get_logger(__name__) | ||
|
@@ -100,6 +101,15 @@ def alerts_backlog_task() -> None: | |
|
||
HOURLY_ALERTS_BACKLOG_GAUGE.set(hourly_alerts_breaching_sla) | ||
|
||
posthoganalytics.capture( | ||
"[email protected]", | ||
"alert check backlog", | ||
properties={ | ||
"alert_check_frequency": AlertCalculationInterval.HOURLY, | ||
"backlog": hourly_alerts_breaching_sla, | ||
}, | ||
) | ||
|
||
now = datetime.now(UTC) | ||
|
||
daily_alerts_breaching_sla = AlertConfiguration.objects.filter( | ||
|
@@ -112,6 +122,15 @@ def alerts_backlog_task() -> None: | |
|
||
DAILY_ALERTS_BACKLOG_GAUGE.set(daily_alerts_breaching_sla) | ||
|
||
posthoganalytics.capture( | ||
"[email protected]", | ||
"alert check backlog", | ||
properties={ | ||
"alert_check_frequency": AlertCalculationInterval.DAILY, | ||
"backlog": daily_alerts_breaching_sla, | ||
}, | ||
) | ||
|
||
# sleeping 30s for prometheus to pick up the metrics sent during task | ||
time.sleep(30) | ||
|
||
|
@@ -244,6 +263,16 @@ def check_alert(alert_id: str) -> None: | |
except Exception as err: | ||
ALERT_CHECK_ERROR_COUNTER.inc() | ||
|
||
posthoganalytics.capture( | ||
alert.created_by.email, | ||
"alert check failed", | ||
properties={ | ||
"alert_id": alert.id, | ||
"error": f"AlertCheckError: {err}", | ||
"traceback": traceback.format_exc(), | ||
}, | ||
) | ||
|
||
logger.exception(AlertCheckException(err)) | ||
capture_exception( | ||
AlertCheckException(err), | ||
|
@@ -276,6 +305,16 @@ def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None: | |
set_tag("alert_config_id", alert.id) | ||
|
||
ALERT_COMPUTED_COUNTER.inc() | ||
|
||
# Event to count alert checks | ||
posthoganalytics.capture( | ||
alert.created_by.email, | ||
"alert check", | ||
properties={ | ||
"alert_id": alert.id, | ||
}, | ||
) | ||
|
||
value = breaches = error = None | ||
|
||
# 1. Evaluate insight and get alert value | ||
|
@@ -288,8 +327,21 @@ def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None: | |
# as celery task can be retried according to config | ||
raise | ||
except Exception as err: | ||
logger.exception(f"Alert id = {alert.id}, failed to evaluate", exc_info=err) | ||
set_tag("evaluation_error_message", traceback.format_exc()) | ||
error_message = f"Alert id = {alert.id}, failed to evaluate" | ||
evaluation_error_message = traceback.format_exc() | ||
|
||
posthoganalytics.capture( | ||
alert.created_by.email, | ||
"alert check failed", | ||
properties={ | ||
"alert_id": alert.id, | ||
"error": error_message, | ||
"traceback": evaluation_error_message, | ||
}, | ||
) | ||
|
||
logger.exception(error_message, exc_info=err) | ||
set_tag("evaluation_error_message", evaluation_error_message) | ||
capture_exception(AlertCheckException(err)) | ||
|
||
# error can be on user side (incorrectly configured insight/alert) | ||
|
@@ -316,9 +368,21 @@ def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None: | |
send_notifications_for_breaches(alert, breaches) | ||
except Exception as err: | ||
error_message = f"AlertCheckError: error sending notifications for alert_id = {alert.id}" | ||
evaluation_error_message = traceback.format_exc() | ||
|
||
posthoganalytics.capture( | ||
alert.created_by.email, | ||
"alert check failed", | ||
properties={ | ||
"alert_id": alert.id, | ||
"error": error_message, | ||
"traceback": evaluation_error_message, | ||
}, | ||
) | ||
|
||
logger.exception(error_message, exc_info=err) | ||
|
||
set_tag("evaluation_error_message", traceback.format_exc()) | ||
set_tag("evaluation_error_message", evaluation_error_message) | ||
capture_exception(Exception(error_message)) | ||
|
||
# don't want alert state to be updated (so that it's retried as next_check_at won't be updated) | ||
|