From 2a80480ad38c490be9469f6d2dcf66892ab5f4aa Mon Sep 17 00:00:00 2001 From: Rafael Audibert <32079912+rafaeelaudibert@users.noreply.github.com> Date: Wed, 18 Dec 2024 10:36:48 -0300 Subject: [PATCH] feat(web-analytics): Support diff on Web Goals dashboard (#26999) --- .../web_analytics/test/test_web_goals.py | 98 +++--- .../hogql_queries/web_analytics/web_goals.py | 284 ++++++++++-------- 2 files changed, 215 insertions(+), 167 deletions(-) diff --git a/posthog/hogql_queries/web_analytics/test/test_web_goals.py b/posthog/hogql_queries/web_analytics/test/test_web_goals.py index 4f7aefb8b42ff..231ba14ada7fd 100644 --- a/posthog/hogql_queries/web_analytics/test/test_web_goals.py +++ b/posthog/hogql_queries/web_analytics/test/test_web_goals.py @@ -1,12 +1,12 @@ from datetime import datetime from typing import Optional -from freezegun import freeze_time from posthog.hogql_queries.web_analytics.web_goals import WebGoalsQueryRunner from posthog.models import Action, Person, Element from posthog.models.utils import uuid7 from posthog.schema import ( + CompareFilter, DateRange, SessionTableVersion, HogQLQueryModifiers, @@ -21,29 +21,7 @@ class TestWebGoalsQueryRunner(ClickhouseTestMixin, APIBaseTest): - def _create_events(self, data, event="$pageview"): - person_result = [] - for id, timestamps in data: - with freeze_time(timestamps[0][0]): - person_result.append( - _create_person( - team_id=self.team.pk, - distinct_ids=[id], - properties={ - "name": id, - **({"email": "test@posthog.com"} if id == "test" else {}), - }, - ) - ) - for timestamp, session_id, pathname in timestamps: - _create_event( - team=self.team, - event=event, - distinct_id=id, - timestamp=timestamp, - properties={"$session_id": session_id, "$pathname": pathname}, - ) - return person_result + TIMESTAMP = "2024-12-01" def _create_person(self): distinct_id = str(uuid7()) @@ -70,6 +48,7 @@ def _visit_web_analytics(self, person: Person, session_id: Optional[str] = None) team=self.team, event="$pageview", distinct_id=person.uuid, + timestamp=self.TIMESTAMP, properties={ "$pathname": "/project/2/web", "$current_url": "https://us.posthog.com/project/2/web", @@ -82,6 +61,7 @@ def _click_pay(self, person: Person, session_id: Optional[str] = None): team=self.team, event="$autocapture", distinct_id=person.uuid, + timestamp=self.TIMESTAMP, elements=[Element(nth_of_type=1, nth_child=0, tag_name="button", text="Pay $10")], properties={"$session_id": session_id or person.uuid}, ) @@ -130,6 +110,7 @@ def _run_web_goals_query( limit=None, path_cleaning_filters=None, properties=None, + compare=True, session_table_version: SessionTableVersion = SessionTableVersion.V2, filter_test_accounts: Optional[bool] = False, ): @@ -139,30 +120,47 @@ def _run_web_goals_query( properties=properties or [], limit=limit, filterTestAccounts=filter_test_accounts, + compareFilter=CompareFilter(compare=compare), ) self.team.path_cleaning_filters = path_cleaning_filters or [] runner = WebGoalsQueryRunner(team=self.team, query=query, modifiers=modifiers) return runner.calculate() def test_no_crash_when_no_data_or_actions(self): - results = self._run_web_goals_query("all", None).results + results = self._run_web_goals_query("2024-11-01", None).results assert results == [] def test_no_crash_when_no_data_and_some_actions(self): self._create_actions() - results = self._run_web_goals_query("all", None).results + results = self._run_web_goals_query("2024-11-01", None).results assert results == [ - ["Contacted Sales", 0, 0, None], - ["Visited Web Analytics", 0, 0, None], - ["Clicked Pay", 0, 0, None], + ["Contacted Sales", (0, 0), (0, 0), (0, 0)], + ["Visited Web Analytics", (0, 0), (0, 0), (0, 0)], + ["Clicked Pay", (0, 0), (0, 0), (0, 0)], + ] + + def test_no_comparison(self): + self._create_actions() + p1, s1 = self._create_person() + self._visit_web_analytics(p1, s1) + + results = self._run_web_goals_query("2024-11-01", None, compare=False).results + assert results == [ + ["Contacted Sales", (0, None), (0, None), (0, None)], + ["Visited Web Analytics", (1, None), (1, None), (1, None)], + ["Clicked Pay", (0, None), (0, None), (0, None)], ] def test_one_user_one_action(self): self._create_actions() p1, s1 = self._create_person() self._visit_web_analytics(p1, s1) - results = self._run_web_goals_query("all", None).results - assert results == [["Contacted Sales", 0, 0, 0], ["Visited Web Analytics", 1, 1, 1], ["Clicked Pay", 0, 0, 0]] + results = self._run_web_goals_query("2024-11-01", None).results + assert results == [ + ["Contacted Sales", (0, 0), (0, 0), (0, 0)], + ["Visited Web Analytics", (1, 0), (1, 0), (1, 0)], + ["Clicked Pay", (0, 0), (0, 0), (0, 0)], + ] def test_one_user_two_similar_actions_across_sessions(self): self._create_actions() @@ -170,16 +168,24 @@ def test_one_user_two_similar_actions_across_sessions(self): self._visit_web_analytics(p1, s1) s2 = str(uuid7()) self._visit_web_analytics(p1, s2) - results = self._run_web_goals_query("all", None).results - assert results == [["Contacted Sales", 0, 0, 0], ["Visited Web Analytics", 1, 2, 1], ["Clicked Pay", 0, 0, 0]] + results = self._run_web_goals_query("2024-11-01", None).results + assert results == [ + ["Contacted Sales", (0, 0), (0, 0), (0, 0)], + ["Visited Web Analytics", (1, 0), (2, 0), (1, 0)], + ["Clicked Pay", (0, 0), (0, 0), (0, 0)], + ] def test_one_user_two_different_actions(self): self._create_actions() p1, s1 = self._create_person() self._visit_web_analytics(p1, s1) self._click_pay(p1, s1) - results = self._run_web_goals_query("all", None).results - assert results == [["Contacted Sales", 0, 0, 0], ["Visited Web Analytics", 1, 1, 1], ["Clicked Pay", 1, 1, 1]] + results = self._run_web_goals_query("2024-11-01", None).results + assert results == [ + ["Contacted Sales", (0, 0), (0, 0), (0, 0)], + ["Visited Web Analytics", (1, 0), (1, 0), (1, 0)], + ["Clicked Pay", (1, 0), (1, 0), (1, 0)], + ] def test_one_users_one_action_each(self): self._create_actions() @@ -187,11 +193,11 @@ def test_one_users_one_action_each(self): p2, s2 = self._create_person() self._visit_web_analytics(p1, s1) self._click_pay(p2, s2) - results = self._run_web_goals_query("all", None).results + results = self._run_web_goals_query("2024-11-01", None).results assert results == [ - ["Contacted Sales", 0, 0, 0], - ["Visited Web Analytics", 1, 1, 0.5], - ["Clicked Pay", 1, 1, 0.5], + ["Contacted Sales", (0, 0), (0, 0), (0, 0)], + ["Visited Web Analytics", (1, 0), (1, 0), (0.5, 0)], + ["Clicked Pay", (1, 0), (1, 0), (0.5, 0)], ] def test_many_users_and_actions(self): @@ -217,11 +223,11 @@ def test_many_users_and_actions(self): self._click_pay(p, s) self._click_pay(p, s) - results = self._run_web_goals_query("all", None).results + results = self._run_web_goals_query("2024-11-01", None).results assert results == [ - ["Contacted Sales", 0, 0, 0], - ["Visited Web Analytics", 11, 12, 11 / 15], - ["Clicked Pay", 7, 8, 7 / 15], + ["Contacted Sales", (0, 0), (0, 0), (0, 0)], + ["Visited Web Analytics", (11, 0), (12, 0), (11 / 15, 0)], + ["Clicked Pay", (7, 0), (8, 0), (7 / 15, 0)], ] def test_dont_show_deleted_actions(self): @@ -231,8 +237,8 @@ def test_dont_show_deleted_actions(self): self._visit_web_analytics(p1, s1) self._click_pay(p2, s2) actions[0].delete() - results = self._run_web_goals_query("all", None).results + results = self._run_web_goals_query("2024-11-01", None).results assert results == [ - ["Contacted Sales", 0, 0, 0], - ["Visited Web Analytics", 1, 1, 0.5], + ["Contacted Sales", (0, 0), (0, 0), (0, 0)], + ["Visited Web Analytics", (1, 0), (1, 0), (0.5, 0)], ] diff --git a/posthog/hogql_queries/web_analytics/web_goals.py b/posthog/hogql_queries/web_analytics/web_goals.py index 7bd36573340fb..a89c933a369c8 100644 --- a/posthog/hogql_queries/web_analytics/web_goals.py +++ b/posthog/hogql_queries/web_analytics/web_goals.py @@ -1,20 +1,29 @@ -from typing import Optional - -from datetime import datetime +from collections.abc import Sequence +from typing import TypeVar +from collections.abc import Iterator from posthog.hogql import ast from posthog.hogql.parser import parse_select from posthog.hogql.property import property_to_expr, get_property_type, action_to_expr from posthog.hogql.query import execute_hogql_query -from posthog.hogql_queries.utils.query_date_range import QueryDateRange from posthog.hogql_queries.web_analytics.web_analytics_query_runner import ( WebAnalyticsQueryRunner, ) from posthog.models import Action -from posthog.models.filters.mixins.utils import cached_property from posthog.schema import WebGoalsQueryResponse, WebGoalsQuery, CachedWebGoalsQueryResponse +# Returns an array `seq` split into chunks of size `size` +# Example: +# chunker([1, 2, 3, 4, 5], 2) -> [[1, 2], [3, 4], [5]] +T = TypeVar("T") + + +def chunker(seq: Sequence[T], size: int) -> Iterator[Sequence[T]]: + for pos in range(0, len(seq), size): + yield seq[pos : pos + size] + + class NoActionsError(Exception): pass @@ -25,93 +34,153 @@ class WebGoalsQueryRunner(WebAnalyticsQueryRunner): cached_response: CachedWebGoalsQueryResponse def to_query(self) -> ast.SelectQuery | ast.SelectSetQuery: + with self.timings.measure("actions"): + actions = Action.objects.filter(team__project_id=self.team.project_id, deleted=False).order_by( + "pinned_at", "-last_calculated_at" + )[:5] + if not actions: + raise NoActionsError("No actions found") + with self.timings.measure("date_expr"): - start = self.query_date_range.date_from_as_hogql() - end = self.query_date_range.date_to_as_hogql() - - actions = Action.objects.filter(team__project_id=self.team.project_id, deleted=False).order_by( - "pinned_at", "-last_calculated_at" - )[:5] - if not actions: - raise NoActionsError("No actions found") - - inner_aliases: list[ast.Expr] = [] - outer_aliases: list[ast.Expr] = [] - action_exprs: list[ast.Expr] = [] - for n, action in enumerate(actions): - expr = action_to_expr(action) - action_exprs.append(expr) - inner_aliases.append(ast.Alias(alias=f"action_count_{n}", expr=ast.Call(name="countIf", args=[expr]))) - inner_aliases.append( - ast.Alias( - alias=f"action_person_id_{n}", - expr=ast.Call( - name="if", - args=[ - ast.CompareOperation( - op=ast.CompareOperationOp.Gt, - left=ast.Field(chain=[f"action_count_{n}"]), - right=ast.Constant(value=0), - ), - ast.Field(chain=["person_id"]), - ast.Constant(value=None), - ], + current_period = self._current_period_expression("timestamp") + previous_period = self._previous_period_expression("timestamp") + + with self.timings.measure("aliases"): + inner_aliases: list[ast.Expr] = [] + outer_aliases: list[ast.Expr] = [] + action_exprs: list[ast.Expr] = [] + for n, action in enumerate(actions): + expr = action_to_expr(action) + action_exprs.append(expr) + + # Current/previous count + inner_aliases.append( + ast.Alias( + alias=f"action_current_count_{n}", + expr=ast.Call(name="countIf", args=[ast.And(exprs=[expr, current_period])]), + ) + ) + inner_aliases.append( + ast.Alias( + alias=f"action_previous_count_{n}", + expr=ast.Call(name="countIf", args=[ast.And(exprs=[expr, previous_period])]), + ) + ) + + # Current/Previous Person ID + inner_aliases.append( + ast.Alias( + alias=f"action_current_person_id_{n}", + expr=ast.Call( + name="if", + args=[ + ast.CompareOperation( + op=ast.CompareOperationOp.Gt, + left=ast.Field(chain=[f"action_current_count_{n}"]), + right=ast.Constant(value=0), + ), + ast.Field(chain=["person_id"]), + ast.Constant(value=None), + ], + ), + ) + ) + + inner_aliases.append( + ast.Alias( + alias=f"action_previous_person_id_{n}", + expr=ast.Call( + name="if", + args=[ + ast.CompareOperation( + op=ast.CompareOperationOp.Gt, + left=ast.Field(chain=[f"action_previous_count_{n}"]), + right=ast.Constant(value=0), + ), + ast.Field(chain=["person_id"]), + ast.Constant(value=None), + ], + ), + ) + ) + + # Outer aliases + outer_aliases.append(ast.Alias(alias=f"action_name_{n}", expr=ast.Constant(value=action.name))) + outer_aliases.append( + ast.Alias( + alias=f"action_total_{n}", + expr=ast.Tuple( + exprs=[ + ast.Call(name="sum", args=[ast.Field(chain=[f"action_current_count_{n}"])]), + ast.Call(name="sum", args=[ast.Field(chain=[f"action_previous_count_{n}"])]) + if self.query_compare_to_date_range + else ast.Constant(value=None), + ] + ), ), ) - ) - outer_aliases.append(ast.Alias(alias=f"action_name_{n}", expr=ast.Constant(value=action.name))) - outer_aliases.append( - ast.Alias( - alias=f"action_total_{n}", expr=ast.Call(name="sum", args=[ast.Field(chain=[f"action_count_{n}"])]) - ), - ) - outer_aliases.append( - ast.Alias( - alias=f"action_uniques_{n}", - expr=ast.Call(name="uniq", args=[ast.Field(chain=[f"action_person_id_{n}"])]), - ), - ) - inner_select = parse_select( - """ + outer_aliases.append( + ast.Alias( + alias=f"action_uniques_{n}", + expr=ast.Tuple( + exprs=[ + ast.Call(name="uniq", args=[ast.Field(chain=[f"action_current_person_id_{n}"])]), + ast.Call(name="uniq", args=[ast.Field(chain=[f"action_previous_person_id_{n}"])]) + if self.query_compare_to_date_range + else ast.Constant(value=None), + ] + ), + ), + ) + + with self.timings.measure("inner_select"): + inner_select = parse_select( + """ SELECT - any(events.person_id) as person_id + any(events.person_id) as person_id, + min(session.$start_timestamp) as start_timestamp FROM events WHERE and( events.`$session_id` IS NOT NULL, event = '$pageview' OR {action_where}, - timestamp >= {start}, - timestamp < {end}, + {periods_expression}, {event_properties}, {session_properties} ) GROUP BY events.`$session_id` """, - placeholders={ - "start": start, - "end": end, - "event_properties": self.event_properties(), - "session_properties": self.session_properties(), - "action_where": ast.Or(exprs=action_exprs), - }, - ) - assert isinstance(inner_select, ast.SelectQuery) - for alias in inner_aliases: - inner_select.select.append(alias) + placeholders={ + "periods_expression": self._periods_expression("timestamp"), + "event_properties": self.event_properties(), + "session_properties": self.session_properties(), + "action_where": ast.Or(exprs=action_exprs), + }, + ) + assert isinstance(inner_select, ast.SelectQuery) + for alias in inner_aliases: + inner_select.select.append(alias) - outer_select = parse_select( - """ + with self.timings.measure("outer_select"): + outer_select = parse_select( + """ SELECT - uniq(person_id) as total_people + uniqIf(person_id, {current_period}) as current_total_people, + uniqIf(person_id, {previous_period}) as previous_total_people FROM {inner_select} - """, - placeholders={ - "inner_select": inner_select, - }, - ) - assert isinstance(outer_select, ast.SelectQuery) - for alias in outer_aliases: - outer_select.select.append(alias) +WHERE {periods_expression} + """, + placeholders={ + "inner_select": inner_select, + "periods_expression": self._periods_expression("start_timestamp"), + "current_period": self._current_period_expression("start_timestamp"), + "previous_period": self._previous_period_expression("start_timestamp"), + }, + ) + + assert isinstance(outer_select, ast.SelectQuery) + for alias in outer_aliases: + outer_select.select.append(alias) return outer_select @@ -132,16 +201,27 @@ def calculate(self): assert response.results row = response.results[0] - num_visitors = row[0] - num_actions = (len(row) - 1) // 3 + current_visitors = row[0] + previous_visitors = row[1] results = [] - for i in range(num_actions): - action_name = row[(i * 3) + 1] - action_total = row[(i * 3) + 2] - action_unique = row[(i * 3) + 3] - action_rate = action_unique / num_visitors if num_visitors else None - results.append([action_name, action_unique, action_total, action_rate]) + for action_name, action_total, action_unique in chunker(row[2:], 3): + action_unique_current, action_unique_previous = action_unique + current_action_rate = ( + action_unique_current / current_visitors + if current_visitors > 0 + else 0 + if action_unique_current is not None + else None + ) + previous_action_rate = ( + action_unique_previous / previous_visitors + if previous_visitors > 0 + else 0 + if action_unique_previous is not None + else None + ) + results.append([action_name, action_unique, action_total, (current_action_rate, previous_action_rate)]) return WebGoalsQueryResponse( columns=[ @@ -155,19 +235,6 @@ def calculate(self): modifiers=self.modifiers, ) - @cached_property - def query_date_range(self): - return QueryDateRange( - date_range=self.query.dateRange, - team=self.team, - interval=None, - now=datetime.now(), - ) - - def all_properties(self) -> ast.Expr: - properties = self.query.properties + self._test_account_filters - return property_to_expr(properties, team=self.team) - def event_properties(self) -> ast.Expr: properties = [ p for p in self.query.properties + self._test_account_filters if get_property_type(p) in ["event", "person"] @@ -179,28 +246,3 @@ def session_properties(self) -> ast.Expr: p for p in self.query.properties + self._test_account_filters if get_property_type(p) == "session" ] return property_to_expr(properties, team=self.team, scope="event") - - -def to_data( - key: str, - kind: str, - value: Optional[float], - previous: Optional[float], - is_increase_bad: Optional[bool] = None, -) -> dict: - if kind == "percentage": - if value is not None: - value = value * 100 - if previous is not None: - previous = previous * 100 - - return { - "key": key, - "kind": kind, - "isIncreaseBad": is_increase_bad, - "value": value, - "previous": previous, - "changeFromPreviousPct": round(100 * (value - previous) / previous) - if value is not None and previous is not None and previous != 0 - else None, - }