From 135c2059a3eb6084f310a29b255f3740ce3b3f17 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Tue, 23 Jan 2024 10:18:49 +0000 Subject: [PATCH] chore(stickiness): Added a basic set of stickiness tests (#19908) Added a basic set of stickiness tests --- .../insights/stickiness_query_runner.py | 18 +- .../test/test_stickiness_query_runner.py | 500 ++++++++++++++++++ 2 files changed, 514 insertions(+), 4 deletions(-) create mode 100644 posthog/hogql_queries/insights/test/test_stickiness_query_runner.py diff --git a/posthog/hogql_queries/insights/stickiness_query_runner.py b/posthog/hogql_queries/insights/stickiness_query_runner.py index 80796d94649a9..a54a9c1cfbdae 100644 --- a/posthog/hogql_queries/insights/stickiness_query_runner.py +++ b/posthog/hogql_queries/insights/stickiness_query_runner.py @@ -82,11 +82,20 @@ def _refresh_frequency(self): return refresh_frequency def _events_query(self, series_with_extra: SeriesWithExtras) -> ast.SelectQuery: + num_intervals_column_expr = ast.Alias( + alias="num_intervals", + expr=ast.Call( + distinct=True, + name="count", + args=[self.query_date_range.date_to_start_of_interval_hogql(ast.Field(chain=["e", "timestamp"]))], + ), + ) + select_query = parse_select( """ SELECT count(DISTINCT aggregation_target) as aggregation_target, num_intervals FROM ( - SELECT e.person_id as aggregation_target, count(DISTINCT toStartOfDay(e.timestamp)) as num_intervals + SELECT e.person_id as aggregation_target, {num_intervals_column_expr} FROM events e SAMPLE {sample} WHERE {where_clause} @@ -100,6 +109,7 @@ def _events_query(self, series_with_extra: SeriesWithExtras) -> ast.SelectQuery: "where_clause": self.where_clause(series_with_extra), "num_intervals": ast.Constant(value=self.intervals_num()), "sample": self._sample_value(), + "num_intervals_column_expr": num_intervals_column_expr, }, ) @@ -111,7 +121,7 @@ def to_query(self) -> List[ast.SelectQuery]: # type: ignore for series in self.series: date_range = self.date_range(series) - interval_subtract = ast.Call( + interval_addition = ast.Call( name=f"toInterval{date_range.interval_name.capitalize()}", args=[ast.Constant(value=2)], ) @@ -123,7 +133,7 @@ def to_query(self) -> List[ast.SelectQuery]: # type: ignore SELECT sum(aggregation_target) as aggregation_target, num_intervals FROM ( SELECT 0 as aggregation_target, (number + 1) as num_intervals - FROM numbers(dateDiff({interval}, {date_from} - {interval_subtract}, {date_to})) + FROM numbers(dateDiff({interval}, {date_from}, {date_to} + {interval_addition})) UNION ALL {events_query} ) @@ -133,7 +143,7 @@ def to_query(self) -> List[ast.SelectQuery]: # type: ignore """, placeholders={ **date_range.to_placeholders(), - "interval_subtract": interval_subtract, + "interval_addition": interval_addition, "events_query": self._events_query(series), }, ) diff --git a/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py b/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py new file mode 100644 index 0000000000000..8d81fd7d56eb2 --- /dev/null +++ b/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py @@ -0,0 +1,500 @@ +from dataclasses import dataclass +from typing import Dict, List, Optional, Union +from django.test import override_settings + +from freezegun import freeze_time +from posthog.hogql_queries.insights.stickiness_query_runner import StickinessQueryRunner +from posthog.models.action.action import Action +from posthog.models.action_step import ActionStep +from posthog.models.property_definition import PropertyDefinition +from posthog.schema import ( + ActionsNode, + CohortPropertyFilter, + DateRange, + ElementPropertyFilter, + EmptyPropertyFilter, + EventPropertyFilter, + EventsNode, + FeaturePropertyFilter, + GroupPropertyFilter, + HogQLPropertyFilter, + IntervalType, + PersonPropertyFilter, + PropertyGroupFilter, + PropertyOperator, + RecordingDurationFilter, + SessionPropertyFilter, + StickinessFilter, + StickinessQuery, + StickinessQueryResponse, +) +from posthog.test.base import APIBaseTest, _create_event, _create_person + + +@dataclass +class Series: + event: str + timestamps: List[str] + + +@dataclass +class SeriesTestData: + distinct_id: str + events: List[Series] + properties: Dict[str, str | int] + + +StickinessProperties = Union[ + List[ + Union[ + EventPropertyFilter, + PersonPropertyFilter, + ElementPropertyFilter, + SessionPropertyFilter, + CohortPropertyFilter, + RecordingDurationFilter, + GroupPropertyFilter, + FeaturePropertyFilter, + HogQLPropertyFilter, + EmptyPropertyFilter, + ] + ], + PropertyGroupFilter, +] + + +class TestStickinessQueryRunner(APIBaseTest): + default_date_from = "2020-01-11" + default_date_to = "2020-01-20" + + def _create_events(self, data: List[SeriesTestData]): + person_result = [] + properties_to_create: Dict[str, str] = {} + for person in data: + first_timestamp = person.events[0].timestamps[0] + + for key, value in person.properties.items(): + if key not in properties_to_create: + if isinstance(value, bool): + type = "Boolean" + elif isinstance(value, int): + type = "Numeric" + else: + type = "String" + + properties_to_create[key] = type + + with freeze_time(first_timestamp): + person_result.append( + _create_person( + team_id=self.team.pk, + distinct_ids=[person.distinct_id], + properties={ + "name": person.distinct_id, + **({"email": "test@posthog.com"} if person.distinct_id == "p1" else {}), + }, + ) + ) + for event in person.events: + for timestamp in event.timestamps: + _create_event( + team=self.team, + event=event.event, + distinct_id=person.distinct_id, + timestamp=timestamp, + properties=person.properties, + ) + + for key, value in properties_to_create.items(): + PropertyDefinition.objects.create(team=self.team, name=key, property_type=value) + + return person_result + + def _create_test_events(self): + self._create_events( + [ + SeriesTestData( + distinct_id="p1", + events=[ + Series( + event="$pageview", + timestamps=[ + "2020-01-11T12:00:00Z", + "2020-01-12T12:00:00Z", + "2020-01-13T12:00:00Z", + "2020-01-14T12:00:00Z", + "2020-01-15T12:00:00Z", + "2020-01-16T12:00:00Z", + "2020-01-17T12:00:00Z", + "2020-01-18T12:00:00Z", + "2020-01-19T12:00:00Z", + ], + ), + Series( + event="$pageleave", + timestamps=[ + "2020-01-11T12:00:00Z", + "2020-01-12T12:00:00Z", + "2020-01-13T12:00:00Z", + "2020-01-14T12:00:00Z", + "2020-01-15T12:00:00Z", + "2020-01-16T12:00:00Z", + "2020-01-17T12:00:00Z", + "2020-01-18T12:00:00Z", + "2020-01-19T12:00:00Z", + ], + ), + ], + properties={"$browser": "Chrome", "prop": 10, "bool_field": True}, + ), + SeriesTestData( + distinct_id="p2", + events=[ + Series( + event="$pageview", + timestamps=[ + "2020-01-11T12:00:00Z", + "2020-01-13T12:00:00Z", + "2020-01-15T12:00:00Z", + "2020-01-17T12:00:00Z", + "2020-01-19T12:00:00Z", + ], + ), + Series( + event="$pageleave", + timestamps=[ + "2020-01-11T12:00:00Z", + "2020-01-13T12:00:00Z", + "2020-01-15T12:00:00Z", + "2020-01-17T12:00:00Z", + "2020-01-19T12:00:00Z", + ], + ), + ], + properties={"$browser": "Firefox", "prop": 20, "bool_field": False}, + ), + ] + ) + + def _run_query( + self, + series: Optional[List[EventsNode | ActionsNode]] = None, + date_from: Optional[str] = None, + date_to: Optional[str] = None, + interval: Optional[IntervalType] = None, + properties: Optional[StickinessProperties] = None, + filters: Optional[StickinessFilter] = None, + filter_test_accounts: Optional[bool] = False, + ): + query_series: List[EventsNode | ActionsNode] = [EventsNode(event="$pageview")] if series is None else series + query_date_from = date_from or self.default_date_from + query_date_to = date_to or self.default_date_to + query_interval = interval or IntervalType.day + + query = StickinessQuery( + series=query_series, + dateRange=DateRange(date_from=query_date_from, date_to=query_date_to), + interval=query_interval, + properties=properties, + stickinessFilter=filters, + filterTestAccounts=filter_test_accounts, + ) + return StickinessQueryRunner(team=self.team, query=query).calculate() + + def test_stickiness_runs(self): + self._create_test_events() + + response = self._run_query() + assert isinstance(response, StickinessQueryResponse) + assert isinstance(response.results, List) + assert isinstance(response.results[0], Dict) + + @override_settings(PERSON_ON_EVENTS_V2_OVERRIDE=True) + def test_stickiness_runs_with_poe(self): + self._create_test_events() + + response = self._run_query() + assert isinstance(response, StickinessQueryResponse) + assert isinstance(response.results, List) + assert isinstance(response.results[0], Dict) + + def test_days(self): + self._create_test_events() + + response = self._run_query() + + result = response.results[0] + + assert result["days"] == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + + def test_count(self): + self._create_test_events() + + response = self._run_query() + + result = response.results[0] + + assert result["count"] == 2 + + def test_labels(self): + self._create_test_events() + + response = self._run_query() + + result = response.results[0] + + assert result["label"] == "$pageview" + assert result["labels"] == [ + "1 day", + "2 days", + "3 days", + "4 days", + "5 days", + "6 days", + "7 days", + "8 days", + "9 days", + "10 days", + "11 days", + ] + + def test_interval_hour(self): + self._create_test_events() + + response = self._run_query(interval=IntervalType.hour, date_from="2020-01-11", date_to="2020-01-12") + + result = response.results[0] + + hours_labels = [f"{hour + 1} hour{'' if hour == 0 else 's'}" for hour in range(26)] + hours_data = [0] * 26 + hours_data[0] = 2 + + assert result["label"] == "$pageview" + assert result["labels"] == hours_labels + assert result["days"] == [hour + 1 for hour in range(26)] + assert result["data"] == hours_data + + def test_interval_day(self): + self._create_test_events() + + response = self._run_query(interval=IntervalType.day) + + result = response.results[0] + + assert result["label"] == "$pageview" + assert result["labels"] == [ + "1 day", + "2 days", + "3 days", + "4 days", + "5 days", + "6 days", + "7 days", + "8 days", + "9 days", + "10 days", + "11 days", + ] + assert result["days"] == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + assert result["data"] == [ + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 1, + 0, + 0, + ] + + def test_interval_week(self): + self._create_test_events() + + response = self._run_query(interval=IntervalType.week) + + result = response.results[0] + + assert result["label"] == "$pageview" + assert result["labels"] == ["1 week", "2 weeks", "3 weeks", "4 weeks"] + assert result["days"] == [1, 2, 3, 4] + assert result["data"] == [0, 0, 2, 0] + + def test_interval_month(self): + self._create_test_events() + + response = self._run_query(interval=IntervalType.month) + + result = response.results[0] + + assert result["label"] == "$pageview" + assert result["labels"] == ["1 month", "2 months"] + assert result["days"] == [1, 2] + assert result["data"] == [2, 0] + + def test_property_filtering(self): + self._create_test_events() + + response = self._run_query( + properties=[EventPropertyFilter(key="$browser", operator=PropertyOperator.exact, value="Chrome")] + ) + + result = response.results[0] + + assert result["data"] == [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + ] + + def test_property_filtering_hogql(self): + self._create_test_events() + + response = self._run_query(properties=[HogQLPropertyFilter(key="properties.$browser == 'Chrome'")]) + + result = response.results[0] + + assert result["data"] == [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + ] + + def test_event_filtering(self): + self._create_test_events() + + series: List[EventsNode | ActionsNode] = [ + EventsNode( + event="$pageview", + properties=[EventPropertyFilter(key="$browser", operator=PropertyOperator.exact, value="Chrome")], + ) + ] + + response = self._run_query(series=series) + + result = response.results[0] + + assert result["data"] == [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + ] + + def test_any_event(self): + self._create_test_events() + + series: List[EventsNode | ActionsNode] = [ + EventsNode( + event=None, + ) + ] + + response = self._run_query(series=series) + + result = response.results[0] + + assert result["label"] == "All events" + assert result["data"] == [ + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 1, + 0, + 0, + ] + + def test_actions(self): + self._create_test_events() + + action = Action.objects.create(name="$pageview", team=self.team) + ActionStep.objects.create( + action=action, + event="$pageview", + properties=[{"key": "$browser", "type": "event", "value": "Chrome", "operator": "exact"}], + ) + + series: List[EventsNode | ActionsNode] = [ActionsNode(id=action.pk)] + + response = self._run_query(series=series) + + result = response.results[0] + + assert result["data"] == [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + ] + + def test_compare(self): + self._create_test_events() + + response = self._run_query(filters=StickinessFilter(compare=True)) + + assert response.results[0]["count"] == 2 + assert response.results[0]["compare_label"] == "current" + + assert response.results[1]["count"] == 0 + assert response.results[1]["compare_label"] == "previous" + + def test_filter_test_accounts(self): + self._create_test_events() + + self.team.test_account_filters = [{"key": "$browser", "type": "event", "value": "Chrome", "operator": "exact"}] + self.team.save() + + response = self._run_query(filter_test_accounts=True) + + result = response.results[0] + + assert result["data"] == [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + ]