From 49f8a6884f0cb0fa7d60a408c362ee3fab64c078 Mon Sep 17 00:00:00 2001 From: Peter Kirkham Date: Tue, 10 Dec 2024 16:08:56 +0000 Subject: [PATCH] feat: add first matching event for trends math (#26774) Co-authored-by: Peter Kirkham Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Sandy Spicer --- frontend/src/queries/schema.json | 10 +++- frontend/src/scenes/trends/mathsLogic.tsx | 23 ++++++-- frontend/src/types.ts | 1 + .../insights/trends/aggregation_operations.py | 11 +++- .../trends/test/test_trends_query_runner.py | 54 +++++++++++++++++++ .../insights/trends/trends_query_builder.py | 7 +-- .../insights/utils/aggregations.py | 12 +++-- posthog/schema.py | 1 + 8 files changed, 108 insertions(+), 11 deletions(-) diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 778388309977b..ec98b627d4bd2 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -1470,7 +1470,15 @@ "type": "string" }, "BaseMathType": { - "enum": ["total", "dau", "weekly_active", "monthly_active", "unique_session", "first_time_for_user"], + "enum": [ + "total", + "dau", + "weekly_active", + "monthly_active", + "unique_session", + "first_time_for_user", + "first_matching_event_for_user" + ], "type": "string" }, "BinCountValue": { diff --git a/frontend/src/scenes/trends/mathsLogic.tsx b/frontend/src/scenes/trends/mathsLogic.tsx index 04756fd135dd9..7f907d3f1f246 100644 --- a/frontend/src/scenes/trends/mathsLogic.tsx +++ b/frontend/src/scenes/trends/mathsLogic.tsx @@ -145,12 +145,29 @@ export const BASE_MATH_DEFINITIONS: Record = { shortName: 'first time', description: ( <> - Only count events if users do it for the first time. + Only the first time the user performed this event will count, and only if it matches the event filters.

- Example: If a single user performs an event for the first time ever within a given period, it counts - as 1. Subsequent events by the same user will not be counted. + Example: If the we are looking for pageview events to posthog.com/about, but the user's first + pageview was on posthog.com, it will not match, even if they went to posthog.com/about afterwards. + + + ), + category: MathCategory.EventCount, + }, + [BaseMathType.FirstMatchingEventForUser]: { + name: 'First matching event for user', + shortName: 'first matching event', + description: ( + <> + The first time the user performed this event that matches the event filters will count. +
+
+ + Example: If the we are looking for pageview events to posthog.com/about, and the user's first + pageview was on posthog.com but then they navigated to posthog.com/about, it will match the pageview + event from posthog.com/about ), diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 8dc6522457e02..fe4d7d990f1b0 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -3614,6 +3614,7 @@ export enum BaseMathType { MonthlyActiveUsers = 'monthly_active', UniqueSessions = 'unique_session', FirstTimeForUser = 'first_time_for_user', + FirstMatchingEventForUser = 'first_matching_event_for_user', } export enum PropertyMathType { diff --git a/posthog/hogql_queries/insights/trends/aggregation_operations.py b/posthog/hogql_queries/insights/trends/aggregation_operations.py index 84e7c181f7c13..9e88bfca8b4aa 100644 --- a/posthog/hogql_queries/insights/trends/aggregation_operations.py +++ b/posthog/hogql_queries/insights/trends/aggregation_operations.py @@ -92,6 +92,7 @@ def requires_query_orchestration(self) -> bool: "weekly_active", "monthly_active", "first_time_for_user", + "first_matching_event_for_user", ] return self.is_count_per_actor_variant() or self.series.math in math_to_return_true @@ -116,6 +117,9 @@ def is_active_users_math(self): def is_first_time_ever_math(self): return self.series.math == "first_time_for_user" + def is_first_matching_event(self): + return self.series.math == "first_matching_event_for_user" + def _math_func(self, method: str, override_chain: Optional[list[str | int]]) -> ast.Call: if override_chain is not None: return ast.Call(name=method, args=[ast.Field(chain=override_chain)]) @@ -452,7 +456,11 @@ def _first_time_parent_query(self, inner_query: ast.SelectQuery): return query def get_first_time_math_query_orchestrator( - self, events_where_clause: ast.Expr, sample_value: ast.RatioExpr, event_name_filter: ast.Expr | None = None + self, + events_where_clause: ast.Expr, + sample_value: ast.RatioExpr, + event_name_filter: ast.Expr | None = None, + is_first_matching_event: bool = False, ): date_placeholders = self.query_date_range.to_placeholders() date_from = parse_expr( @@ -479,6 +487,7 @@ def __init__(self): filters=events_where_clause, event_or_action_filter=event_name_filter, ratio=sample_value, + is_first_matching_event=is_first_matching_event, ) self.parent_query_builder = QueryAlternator(parent_select) diff --git a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py index e72c4b66b4d79..c3c7d04a71af6 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py @@ -5013,3 +5013,57 @@ def test_trends_aggregation_total_with_null(self): assert len(response.results) == 1 assert response.results[0]["data"] == [1.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.1] + + def test_trends_aggregation_first_matching_event_for_user(self): + _create_person( + team=self.team, + distinct_ids=["p1"], + properties={}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-08T12:00:00Z", + properties={"$browser": "Chrome"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-09T12:00:00Z", + properties={"$browser": "Chrome"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-10T12:00:00Z", + properties={"$browser": "Firefox"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-11T12:00:00Z", + properties={"$browser": "Firefox"}, + ) + flush_persons_and_events() + + response = self._run_trends_query( + "2020-01-08", + "2020-01-11", + IntervalType.DAY, + [ + EventsNode( + event="$pageview", + math=BaseMathType.FIRST_MATCHING_EVENT_FOR_USER, + properties=[EventPropertyFilter(key="$browser", operator=PropertyOperator.EXACT, value="Firefox")], + ) + ], + TrendsFilter(display=ChartDisplayType.ACTIONS_LINE_GRAPH), + ) + + assert len(response.results) == 1 + assert response.results[0]["count"] == 1 + assert response.results[0]["data"] == [0, 0, 1, 0] diff --git a/posthog/hogql_queries/insights/trends/trends_query_builder.py b/posthog/hogql_queries/insights/trends/trends_query_builder.py index 826d52c1e556f..a0e1b185ce806 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_builder.py +++ b/posthog/hogql_queries/insights/trends/trends_query_builder.py @@ -294,14 +294,15 @@ def _get_events_subquery( return wrapper # Just complex series aggregation - elif ( - self._aggregation_operation.requires_query_orchestration() - and self._aggregation_operation.is_first_time_ever_math() + elif self._aggregation_operation.requires_query_orchestration() and ( + self._aggregation_operation.is_first_time_ever_math() + or self._aggregation_operation.is_first_matching_event() ): return self._aggregation_operation.get_first_time_math_query_orchestrator( events_where_clause=events_filter, sample_value=self._sample_value(), event_name_filter=self._event_or_action_where_expr(), + is_first_matching_event=self._aggregation_operation.is_first_matching_event(), ).build() elif self._aggregation_operation.requires_query_orchestration(): return self._aggregation_operation.get_actors_query_orchestrator( diff --git a/posthog/hogql_queries/insights/utils/aggregations.py b/posthog/hogql_queries/insights/utils/aggregations.py index cb4fd72377759..bdf30f527d921 100644 --- a/posthog/hogql_queries/insights/utils/aggregations.py +++ b/posthog/hogql_queries/insights/utils/aggregations.py @@ -68,20 +68,26 @@ def __init__( filters: ast.Expr | None = None, event_or_action_filter: ast.Expr | None = None, ratio: ast.RatioExpr | None = None, + is_first_matching_event: bool = False, ): - query.select = self._select_expr(date_from, filters) + query.select = self._select_expr(date_from, filters, is_first_matching_event) query.select_from = self._select_from_expr(ratio) query.where = self._where_expr(date_to, event_or_action_filter) query.group_by = self._group_by_expr() query.having = self._having_expr() super().__init__(query) - def _select_expr(self, date_from: ast.Expr, filters: ast.Expr | None = None): + def _select_expr(self, date_from: ast.Expr, filters: ast.Expr | None = None, is_first_matching_event: bool = False): aggregation_filters = date_from if filters is None else ast.And(exprs=[date_from, filters]) + min_timestamp_expr = ( + ast.Call(name="min", args=[ast.Field(chain=["timestamp"])]) + if not is_first_matching_event or filters is None + else ast.Call(name="minIf", args=[ast.Field(chain=["timestamp"]), filters]) + ) return [ ast.Alias( alias="min_timestamp", - expr=ast.Call(name="min", args=[ast.Field(chain=["timestamp"])]), + expr=min_timestamp_expr, ), ast.Alias( alias="min_timestamp_with_condition", diff --git a/posthog/schema.py b/posthog/schema.py index d73a55849bb79..12f9bb871873e 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -420,6 +420,7 @@ class BaseMathType(StrEnum): MONTHLY_ACTIVE = "monthly_active" UNIQUE_SESSION = "unique_session" FIRST_TIME_FOR_USER = "first_time_for_user" + FIRST_MATCHING_EVENT_FOR_USER = "first_matching_event_for_user" class BreakdownAttributionType(StrEnum):