From 352276239a8628a211885567f78f524383aaa4f4 Mon Sep 17 00:00:00 2001 From: Anirudh Pillai Date: Mon, 19 Aug 2024 21:34:48 +0100 Subject: [PATCH 1/5] feat: monitoring for endpoints --- posthog/api/dashboards/dashboard.py | 4 ++++ posthog/api/insight.py | 4 ++++ posthog/api/monitoring.py | 37 +++++++++++++++++++++++++++++ posthog/api/query.py | 4 ++++ posthog/tasks/calculate_cohort.py | 10 ++++---- 5 files changed, 55 insertions(+), 4 deletions(-) create mode 100644 posthog/api/monitoring.py diff --git a/posthog/api/dashboards/dashboard.py b/posthog/api/dashboards/dashboard.py index 7c1b0c093845d..b4d25a007c30e 100644 --- a/posthog/api/dashboards/dashboard.py +++ b/posthog/api/dashboards/dashboard.py @@ -29,6 +29,7 @@ from posthog.models.tagged_item import TaggedItem from posthog.models.user import User from posthog.user_permissions import UserPermissionsSerializerMixin +from posthog.api.monitoring import monitor, Feature logger = structlog.get_logger(__name__) @@ -161,6 +162,7 @@ def validate_filters(self, value) -> dict: return value + @monitor(feature=Feature.DASHBOARD, endpoint="dashboard", method="POST") def create(self, validated_data: dict, *args: Any, **kwargs: Any) -> Dashboard: request = self.context["request"] validated_data["created_by"] = request.user @@ -260,6 +262,7 @@ def _deep_duplicate_tiles(self, dashboard: Dashboard, existing_tile: DashboardTi color=existing_tile.color, ) + @monitor(feature=Feature.DASHBOARD, endpoint="update", method="PATCH") def update(self, instance: Dashboard, validated_data: dict, *args: Any, **kwargs: Any) -> Dashboard: can_user_restrict = self.user_permissions.dashboard(instance).can_restrict if "restriction_level" in validated_data and not can_user_restrict: @@ -451,6 +454,7 @@ def safely_get_queryset(self, queryset) -> QuerySet: return queryset + @monitor(feature=Feature.DASHBOARD, endpoint="dashboard", method="GET") def retrieve(self, request: Request, *args: Any, **kwargs: Any) -> Response: pk = kwargs["pk"] queryset = self.get_queryset() diff --git a/posthog/api/insight.py b/posthog/api/insight.py index f51a1705966f8..7711561374c3c 100644 --- a/posthog/api/insight.py +++ b/posthog/api/insight.py @@ -103,6 +103,7 @@ relative_date_parse, str_to_bool, ) +from posthog.api.monitoring import monitor, Feature logger = structlog.get_logger(__name__) @@ -329,6 +330,7 @@ class Meta: "is_cached", ) + @monitor(feature=Feature.INSIGHT, endpoint="insight", method="POST") def create(self, validated_data: dict, *args: Any, **kwargs: Any) -> Insight: request = self.context["request"] tags = validated_data.pop("tags", None) # tags are created separately as global tag relationships @@ -368,6 +370,7 @@ def create(self, validated_data: dict, *args: Any, **kwargs: Any) -> Insight: return insight @transaction.atomic() + @monitor(feature=Feature.INSIGHT, endpoint="insight", method="POST") def update(self, instance: Insight, validated_data: dict, **kwargs) -> Insight: dashboards_before_change: list[Union[str, dict]] = [] try: @@ -786,6 +789,7 @@ def _filter_request(self, request: request.Request, queryset: QuerySet) -> Query ), ], ) + @monitor(feature=Feature.INSIGHT, endpoint="insight", method="GET") def retrieve(self, request, *args, **kwargs): instance = self.get_object() serializer_context = self.get_serializer_context() diff --git a/posthog/api/monitoring.py b/posthog/api/monitoring.py new file mode 100644 index 0000000000000..2357b2e4bbc09 --- /dev/null +++ b/posthog/api/monitoring.py @@ -0,0 +1,37 @@ +from enum import StrEnum +from prometheus_client import Counter +from sentry_sdk import set_tag + + +class Feature(StrEnum): + COHORT = "cohort" + DASHBOARD = "dashboard" + INSIGHT = "insight" + QUERY = "query" + + +API_REQUESTS_COUNTER = Counter( + "api_requests", + "Number of API requests", + labelnames=["endpoint", "method"], +) + + +def monitor(*, feature: Feature | None, endpoint: str, method: str) -> callable: + """ + Decorator to increment the API requests counter + Sets sentry tags for the endpoint and method + """ + + def decorator(func: callable) -> callable: + def wrapper(*args, **kwargs): + API_REQUESTS_COUNTER.labels(endpoint=endpoint, method=method).inc() + + if feature: + set_tag("feature", feature.value) + + return func(*args, **kwargs) + + return wrapper + + return decorator diff --git a/posthog/api/query.py b/posthog/api/query.py index acda400e51b35..c9dde3bc9a052 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -32,6 +32,7 @@ TeamRateThrottle, ) from posthog.schema import QueryRequest, QueryResponseAlternative, QueryStatusResponse +from posthog.api.monitoring import monitor, Feature class QueryThrottle(TeamRateThrottle): @@ -59,6 +60,7 @@ def get_throttles(self): 200: QueryResponseAlternative, }, ) + @monitor(feature=Feature.QUERY, endpoint="query", method="POST") def create(self, request, *args, **kwargs) -> Response: data = self.get_model(request.data, QueryRequest) client_query_id = data.client_query_id or uuid.uuid4().hex @@ -99,6 +101,7 @@ def create(self, request, *args, **kwargs) -> Response: description="(Experimental)", responses={200: QueryStatusResponse}, ) + @monitor(feature=Feature.QUERY, endpoint="query", method="GET") def retrieve(self, request: Request, pk=None, *args, **kwargs) -> JsonResponse: show_progress: bool = request.query_params.get("show_progress", False) == "true" show_progress = ( @@ -124,6 +127,7 @@ def retrieve(self, request: Request, pk=None, *args, **kwargs) -> JsonResponse: 204: OpenApiResponse(description="Query cancelled"), }, ) + @monitor(feature=Feature.QUERY, endpoint="query", method="DELETE") def destroy(self, request, pk=None, *args, **kwargs): cancel_query(self.team.pk, pk) return Response(status=204) diff --git a/posthog/tasks/calculate_cohort.py b/posthog/tasks/calculate_cohort.py index 67d57e67f1bce..d0ab32db43f2a 100644 --- a/posthog/tasks/calculate_cohort.py +++ b/posthog/tasks/calculate_cohort.py @@ -14,6 +14,7 @@ from posthog.models.user import User from prometheus_client import Gauge from sentry_sdk import set_tag +from posthog.api.monitoring import Feature COHORT_RECALCULATIONS_BACKLOG_GAUGE = Gauge( "cohort_recalculations_backlog", @@ -55,10 +56,6 @@ def calculate_cohorts() -> None: def update_cohort(cohort: Cohort, *, initiating_user: Optional[User]) -> None: - set_tag("component", "cohort") - set_tag("cohort_id", cohort.id) - set_tag("team_id", cohort.team.id) - pending_version = get_and_update_pending_version(cohort) calculate_cohort_ch.delay(cohort.id, pending_version, initiating_user.id if initiating_user else None) @@ -72,6 +69,11 @@ def clear_stale_cohort(cohort_id: int, before_version: int) -> None: @shared_task(ignore_result=True, max_retries=2) def calculate_cohort_ch(cohort_id: int, pending_version: int, initiating_user_id: Optional[int] = None) -> None: cohort: Cohort = Cohort.objects.get(pk=cohort_id) + + set_tag("feature", Feature.COHORT.value) + set_tag("cohort_id", cohort.id) + set_tag("team_id", cohort.team.id) + cohort.calculate_people_ch(pending_version, initiating_user_id=initiating_user_id) From ce973321ad33991e77c4004cbab7213a4f3b1aa5 Mon Sep 17 00:00:00 2001 From: Anirudh Pillai Date: Tue, 20 Aug 2024 09:00:13 +0100 Subject: [PATCH 2/5] fix type --- posthog/api/monitoring.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/posthog/api/monitoring.py b/posthog/api/monitoring.py index 2357b2e4bbc09..be850f2c355e3 100644 --- a/posthog/api/monitoring.py +++ b/posthog/api/monitoring.py @@ -1,6 +1,7 @@ from enum import StrEnum from prometheus_client import Counter from sentry_sdk import set_tag +from collections.abc import Callable class Feature(StrEnum): @@ -17,13 +18,13 @@ class Feature(StrEnum): ) -def monitor(*, feature: Feature | None, endpoint: str, method: str) -> callable: +def monitor(*, feature: Feature | None, endpoint: str, method: str) -> Callable: """ Decorator to increment the API requests counter Sets sentry tags for the endpoint and method """ - def decorator(func: callable) -> callable: + def decorator(func: Callable) -> Callable: def wrapper(*args, **kwargs): API_REQUESTS_COUNTER.labels(endpoint=endpoint, method=method).inc() From dfe88ce21311bd81214a6838afa493fe5312dc15 Mon Sep 17 00:00:00 2001 From: Anirudh Pillai Date: Tue, 20 Aug 2024 09:34:34 +0100 Subject: [PATCH 3/5] replace statsd metrics with prometheus --- posthog/clickhouse/client/execute.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/posthog/clickhouse/client/execute.py b/posthog/clickhouse/client/execute.py index 1fcc1402c948a..539ddfe10f151 100644 --- a/posthog/clickhouse/client/execute.py +++ b/posthog/clickhouse/client/execute.py @@ -10,7 +10,6 @@ import sqlparse from clickhouse_driver import Client as SyncClient from django.conf import settings as app_settings -from statshog.defaults.django import statsd from posthog.clickhouse.client.connection import Workload, get_pool from posthog.clickhouse.client.escape import substitute_params @@ -18,6 +17,18 @@ from posthog.errors import wrap_query_error from posthog.settings import TEST from posthog.utils import generate_short_id, patchable +from prometheus_client import Counter, Gauge + +QUERY_ERROR_COUNTER = Counter( + "clickhouse_query_failure", + "Query execution failure signal is dispatched when a query fails.", + labelnames=["exception_type"], +) + +QUERY_EXECUTION_TIME_GAUGE = Gauge( + "clickhouse_query_execution_time", + "Clickhouse query execution time", +) InsertParams = Union[list, tuple, types.GeneratorType] NonInsertParams = dict[str, Any] @@ -128,16 +139,13 @@ def sync_execute( ) except Exception as e: err = wrap_query_error(e) - statsd.incr( - "clickhouse_sync_execution_failure", - tags={"failed": True, "reason": type(err).__name__}, - ) + QUERY_ERROR_COUNTER.labels(exception_type=type(err).__name__).inc() raise err from e finally: execution_time = perf_counter() - start_time - statsd.timing("clickhouse_sync_execution_time", execution_time * 1000.0) + QUERY_EXECUTION_TIME_GAUGE.set(execution_time * 1000.0) if query_counter := getattr(thread_local_storage, "query_counter", None): query_counter.total_query_time += execution_time From 8fce860b0cf0ea64e25197ae19d65579d070bd33 Mon Sep 17 00:00:00 2001 From: Anirudh Pillai Date: Tue, 20 Aug 2024 13:06:11 +0100 Subject: [PATCH 4/5] query_type tag --- posthog/hogql_queries/query_runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index c42142898240b..6d7a599b3f599 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -542,6 +542,7 @@ def run( tag_queries(cache_key=cache_key) tag_queries(sentry_trace=get_traceparent()) set_tag("cache_key", cache_key) + set_tag("query_type", self.query.__name__) if insight_id: tag_queries(insight_id=insight_id) set_tag("insight_id", str(insight_id)) From d80a20a7b5036168552e56decff5113829094311 Mon Sep 17 00:00:00 2001 From: Anirudh Pillai Date: Tue, 20 Aug 2024 13:12:05 +0100 Subject: [PATCH 5/5] fix name --- posthog/hogql_queries/query_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index 6d7a599b3f599..55e2426b46ccf 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -542,7 +542,7 @@ def run( tag_queries(cache_key=cache_key) tag_queries(sentry_trace=get_traceparent()) set_tag("cache_key", cache_key) - set_tag("query_type", self.query.__name__) + set_tag("query_type", self.query.__class__.__name__) if insight_id: tag_queries(insight_id=insight_id) set_tag("insight_id", str(insight_id))