Skip to content

Commit

Permalink
feat: Add batch export metrics to app_metrics (#21760)
Browse files Browse the repository at this point in the history
* feat: Add batch export metrics to app_metrics

* chore: Comment why 'last_updated_at' is used
  • Loading branch information
tomasfarias authored Apr 23, 2024
1 parent 0266ef3 commit ec02c4c
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 23 deletions.
92 changes: 71 additions & 21 deletions posthog/api/app_metrics.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from typing import Any
import datetime as dt
import uuid
from typing import Any

from django.db.models import Sum
from django.db.models.functions import Coalesce, TruncDay
from rest_framework import mixins, request, response, viewsets
from rest_framework.decorators import action

from posthog.api.routing import TeamAndOrgViewSetMixin
from posthog.models import BatchExportRun
from posthog.models.plugin import PluginConfig
from posthog.queries.app_metrics.app_metrics import (
AppMetricsErrorDetailsQuery,
Expand All @@ -19,6 +23,7 @@
AppMetricsErrorsRequestSerializer,
AppMetricsRequestSerializer,
)
from posthog.utils import relative_date_parse


class AppMetricsViewSet(TeamAndOrgViewSetMixin, mixins.RetrieveModelMixin, viewsets.GenericViewSet):
Expand All @@ -27,28 +32,24 @@ class AppMetricsViewSet(TeamAndOrgViewSetMixin, mixins.RetrieveModelMixin, views

def retrieve(self, request: request.Request, *args: Any, **kwargs: Any) -> response.Response:
try:
# probe if we have a valid uuid, and thus are requesting metrics for a batch export
uuid.UUID(kwargs["pk"])
rows = self.get_batch_export_runs_app_metrics_queryset(batch_export_id=kwargs["pk"])

dates = [row["dates"].strftime("%Y-%m-%d") for row in rows]
successes = [row["successes"] for row in rows]
failures = [row["failures"] for row in rows]
return response.Response(
{
"metrics": [
{
"dates": [
"2024-01-04",
"2024-01-05",
"2024-01-06",
"2024-01-07",
"2024-01-08",
"2024-01-09",
"2024-01-10",
"2024-01-11",
],
"successes": [0, 0, 0, 0, 0, 0, 9379, 6237],
"successes_on_retry": [0, 0, 0, 0, 0, 0, 0, 0],
"failures": [0, 0, 0, 0, 0, 0, 665, 0],
"totals": {"successes": 15616, "successes_on_retry": 0, "failures": 665},
}
],
"metrics": {
"dates": dates,
"successes": successes,
"successes_on_retry": [0] * len(dates),
"failures": failures,
"totals": {
"successes": sum(successes),
"successes_on_retry": 0,
"failures": sum(failures),
},
},
"errors": None,
}
)
Expand All @@ -74,6 +75,55 @@ def error_details(self, request: request.Request, *args: Any, **kwargs: Any) ->
error_details = AppMetricsErrorDetailsQuery(self.team, plugin_config.pk, filter).run()
return response.Response({"result": error_details})

def get_batch_export_runs_app_metrics_queryset(self, batch_export_id: str):
"""Use the Django ORM to fetch app metrics for batch export runs.
Attempts to (roughly) match the following (much more readable) query:
```
select
date_trunc('day', last_updated_at) as dates,
sum(coalesce(records_completed, 0)) as successes,
sum(coalesce(records_total_count, 0)) - sum(coalesce(records_completed, 0)) as failures
from
posthog_batchexportrun
where
batch_export_id = :batch_export_id
and last_updated_at between :date_from and :date_to
group by
date_trunc('day', last_updated_at)
order by
dates
```
A truncated 'last_updated_at' is used as the grouping date as it reflects when a particular run
was last updated. It feels easier to explain to users that if they see metrics for today, those
correspond to runs that happened today, even if the runs themselves exported data from a year ago
(because it was a backfill).
Raises:
ValueError: If provided 'batch_export_id' is not a valid UUID.
"""
batch_export_uuid = uuid.UUID(batch_export_id)

after = self.request.GET.get("date_from", "-30d")
before = self.request.GET.get("date_to", None)
after_datetime = relative_date_parse(after, self.team.timezone_info)
before_datetime = (
relative_date_parse(before, self.team.timezone_info) if before else dt.datetime.now(dt.timezone.utc)
)
date_range = (after_datetime, before_datetime)
return (
BatchExportRun.objects.filter(batch_export_id=batch_export_uuid, last_updated_at__range=date_range)
.annotate(dates=TruncDay("last_updated_at"))
.values("dates")
.annotate(
successes=Sum(Coalesce("records_completed", 0)),
failures=Sum(Coalesce("records_total_count", 0)) - Sum(Coalesce("records_completed", 0)),
)
.order_by("dates")
.all()
)


class HistoricalExportsAppMetricsViewSet(
TeamAndOrgViewSetMixin,
Expand Down
4 changes: 2 additions & 2 deletions posthog/api/test/batch_exports/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
from rest_framework import status


def create_batch_export(client: TestClient, team_id: int, batch_export_data: dict):
def create_batch_export(client: TestClient, team_id: int, batch_export_data: dict | str):
return client.post(
f"/api/projects/{team_id}/batch_exports",
batch_export_data,
content_type="application/json",
)


def create_batch_export_ok(client: TestClient, team_id: int, batch_export_data: dict):
def create_batch_export_ok(client: TestClient, team_id: int, batch_export_data: dict | str):
response = create_batch_export(client, team_id, batch_export_data)
assert response.status_code == status.HTTP_201_CREATED, response.json()
return response.json()
Expand Down
84 changes: 84 additions & 0 deletions posthog/api/test/test_app_metrics.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import datetime as dt
import json
from unittest import mock

from freezegun.api import freeze_time
from rest_framework import status

from posthog.api.test.batch_exports.conftest import start_test_worker
from posthog.api.test.batch_exports.operations import create_batch_export_ok
from posthog.batch_exports.models import BatchExportRun
from posthog.models.activity_logging.activity_log import Detail, Trigger, log_activity
from posthog.models.plugin import Plugin, PluginConfig
from posthog.models.utils import UUIDT
from posthog.queries.app_metrics.test.test_app_metrics import create_app_metric
from posthog.temporal.common.client import sync_connect
from posthog.test.base import APIBaseTest, ClickhouseTestMixin

SAMPLE_PAYLOAD = {"dateRange": ["2021-06-10", "2022-06-12"], "parallelism": 1}
Expand Down Expand Up @@ -72,6 +78,84 @@ def test_retrieve(self):
},
)

def test_retrieve_batch_export_runs_app_metrics(self):
"""Test batch export metrics returned by app metrics endpoint."""
destination_data = {
"type": "S3",
"config": {
"bucket_name": "my-production-s3-bucket",
"region": "us-east-1",
"prefix": "posthog-events/",
"aws_access_key_id": "abc123",
"aws_secret_access_key": "secret",
},
}

batch_export_data = {
"name": "my-production-s3-bucket-destination",
"destination": destination_data,
"interval": "hour",
}

temporal = sync_connect()

now = dt.datetime(2021, 12, 5, 13, 23, 0, tzinfo=dt.timezone.utc)
with start_test_worker(temporal):
response = create_batch_export_ok(
self.client,
self.team.pk,
json.dumps(batch_export_data),
)

batch_export_id = response["id"]
for days_ago in range(0, 7):
last_updated_at = now - dt.timedelta(days=days_ago)

with freeze_time(last_updated_at):
# Since 'last_updated_at' uses 'auto_now', passing the argument is ignored.
# We have to re-freeze time to get each run created on a single date.
BatchExportRun.objects.create(
batch_export_id=batch_export_id,
data_interval_end=last_updated_at,
data_interval_start=last_updated_at - dt.timedelta(hours=1),
status=BatchExportRun.Status.COMPLETED,
records_completed=3,
records_total_count=3,
)

BatchExportRun.objects.create(
batch_export_id=batch_export_id,
data_interval_end=last_updated_at - dt.timedelta(hours=2),
data_interval_start=last_updated_at - dt.timedelta(hours=3),
status=BatchExportRun.Status.FAILED,
records_completed=0,
records_total_count=5,
)

response = self.client.get(f"/api/projects/@current/app_metrics/{batch_export_id}?date_from=-7d")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(
response.json(),
{
"metrics": {
"dates": [
"2021-11-29",
"2021-11-30",
"2021-12-01",
"2021-12-02",
"2021-12-03",
"2021-12-04",
"2021-12-05",
],
"successes": [3, 3, 3, 3, 3, 3, 3],
"successes_on_retry": [0, 0, 0, 0, 0, 0, 0],
"failures": [5, 5, 5, 5, 5, 5, 5],
"totals": {"successes": 21, "successes_on_retry": 0, "failures": 35},
},
"errors": None,
},
)

def test_list_historical_exports(self):
self._create_activity_log(
activity="job_triggered",
Expand Down

0 comments on commit ec02c4c

Please sign in to comment.