Skip to content

Commit

Permalink
chore: add gauge for snapshots with missing snapshots (#23159)
Browse files Browse the repository at this point in the history
  • Loading branch information
pauldambra authored Jun 22, 2024
1 parent ecb9199 commit ebe0e93
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
9 changes: 9 additions & 0 deletions posthog/tasks/scheduled.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
update_quota_limiting,
verify_persons_data_in_sync,
update_survey_iteration,
invalid_web_replays,
)
from posthog.utils import get_crontab

Expand Down Expand Up @@ -199,6 +200,7 @@ def setup_periodic_tasks(sender: Celery, **kwargs: Any) -> None:
pg_plugin_server_query_timing.s(),
name="PG plugin server query timing",
)

add_periodic_task_with_expiry(
sender,
60,
Expand Down Expand Up @@ -311,3 +313,10 @@ def setup_periodic_tasks(sender: Celery, **kwargs: Any) -> None:
check_data_import_row_limits.s(),
name="check external data rows synced",
)

add_periodic_task_with_expiry(
sender,
3600,
invalid_web_replays.s(),
name="Invalid web replays count",
)
36 changes: 36 additions & 0 deletions posthog/tasks/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,42 @@ def ingestion_lag() -> None:
pass


@shared_task(ignore_result=True)
def invalid_web_replays() -> None:
from posthog.client import sync_execute

# ultimately I want to observe values by team id, but at the moment that would be lots of series, let's reduce the value first
query = """
select
--team_id,
count()
from (
select any(team_id) as team_id, argMinMerge(first_url) as first_url, argMinMerge(snapshot_source) as snapshot_source
from session_replay_events
where min_first_timestamp >= now() - interval 1 hour
and min_first_timestamp <= now()
group by session_id
having first_url is null and snapshot_source = 'web'
)
--group by team_id
"""

try:
results = sync_execute(
query,
)
with pushed_metrics_registry("celery_replay_tracking") as registry:
gauge = Gauge(
"replay_tracking_web_replay_with_missing_first_url",
"Acts as a proxy for replay sessions which haven't received a full snapshot",
registry=registry,
)
count = results[0][0]
gauge.set(count)
except:
pass


KNOWN_CELERY_TASK_IDENTIFIERS = {
"pluginJob",
"runEveryHour",
Expand Down

0 comments on commit ebe0e93

Please sign in to comment.