From d79002a1eb45c65f91d1fa392a70a718c58289d3 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Mon, 30 Oct 2023 16:14:58 +0000 Subject: [PATCH 01/14] Added the persons query to trends query --- .../hogql_queries/insights/trends/aggregation_operations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/posthog/hogql_queries/insights/trends/aggregation_operations.py b/posthog/hogql_queries/insights/trends/aggregation_operations.py index 38ca1cec71c4e..e0a0efbb21321 100644 --- a/posthog/hogql_queries/insights/trends/aggregation_operations.py +++ b/posthog/hogql_queries/insights/trends/aggregation_operations.py @@ -13,7 +13,7 @@ class QueryAlternator: _group_bys: List[ast.Expr] _select_from: ast.JoinExpr | None - def __init__(self, query: ast.SelectQuery | ast.SelectUnionQuery): + def __init__(self, query: ast.SelectQuery): assert isinstance(query, ast.SelectQuery) self._query = query @@ -21,7 +21,7 @@ def __init__(self, query: ast.SelectQuery | ast.SelectUnionQuery): self._group_bys = [] self._select_from = None - def build(self) -> ast.SelectQuery | ast.SelectUnionQuery: + def build(self) -> ast.SelectQuery: if len(self._selects) > 0: self._query.select.extend(self._selects) From 4b33ada48bf38eb01c662183b84ca3d6ebd0f15e Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Wed, 8 Nov 2023 09:59:55 +0000 Subject: [PATCH 02/14] WIP: start adding old trends tests against new query runner --- .../insights/trends/breakdown.py | 8 +- .../insights/trends/breakdown_values.py | 3 + .../hogql_queries/insights/trends/display.py | 6 +- .../insights/trends/query_builder.py | 4 +- .../insights/trends/test/test_trends.py | 8631 +++++++++++++++++ .../insights/trends/trends_query_runner.py | 4 +- 6 files changed, 8646 insertions(+), 10 deletions(-) create mode 100644 posthog/hogql_queries/insights/trends/test/test_trends.py diff --git a/posthog/hogql_queries/insights/trends/breakdown.py b/posthog/hogql_queries/insights/trends/breakdown.py index 7524dd5eb5ffe..d18f64d8bb48a 100644 --- a/posthog/hogql_queries/insights/trends/breakdown.py +++ b/posthog/hogql_queries/insights/trends/breakdown.py @@ -60,9 +60,10 @@ def column_expr(self) -> ast.Expr: expr=parse_expr(self.query.breakdown.breakdown), ) elif self.query.breakdown.breakdown_type == "cohort": + cohort_breakdown = 0 if self.query.breakdown.breakdown == "all" else int(self.query.breakdown.breakdown) return ast.Alias( alias="breakdown_value", - expr=ast.Constant(value=int(self.query.breakdown.breakdown)), + expr=ast.Constant(value=cohort_breakdown), ) if self.query.breakdown.breakdown_type == "hogql": @@ -76,8 +77,11 @@ def column_expr(self) -> ast.Expr: expr=ast.Field(chain=self._properties_chain), ) - def events_where_filter(self) -> ast.Expr: + def events_where_filter(self) -> ast.Expr | None: if self.query.breakdown.breakdown_type == "cohort": + if self.query.breakdown.breakdown == "all": + return None + return ast.CompareOperation( left=ast.Field(chain=["person_id"]), op=ast.CompareOperationOp.InCohort, diff --git a/posthog/hogql_queries/insights/trends/breakdown_values.py b/posthog/hogql_queries/insights/trends/breakdown_values.py index 64e2500e47e50..251e01d9b5333 100644 --- a/posthog/hogql_queries/insights/trends/breakdown_values.py +++ b/posthog/hogql_queries/insights/trends/breakdown_values.py @@ -36,6 +36,9 @@ def __init__( def get_breakdown_values(self) -> List[str | int]: if self.breakdown_type == "cohort": + if self.breakdown_field == "all": + return [0] + return [int(self.breakdown_field)] if self.breakdown_type == "hogql": diff --git a/posthog/hogql_queries/insights/trends/display.py b/posthog/hogql_queries/insights/trends/display.py index db0fa29e0045e..3e8cf3ade278d 100644 --- a/posthog/hogql_queries/insights/trends/display.py +++ b/posthog/hogql_queries/insights/trends/display.py @@ -26,11 +26,7 @@ def should_wrap_inner_query(self) -> bool: return self.display_type == ChartDisplayType.ActionsLineGraphCumulative def modify_outer_query(self, outer_query: ast.SelectQuery, inner_query: ast.SelectQuery) -> ast.SelectQuery: - if ( - self.display_type == ChartDisplayType.BoldNumber - or self.display_type == ChartDisplayType.ActionsPie - or self.display_type == ChartDisplayType.WorldMap - ): + if self.should_aggregate_values(): return ast.SelectQuery( select=[ ast.Alias( diff --git a/posthog/hogql_queries/insights/trends/query_builder.py b/posthog/hogql_queries/insights/trends/query_builder.py index ddf873f10a0da..ce5268fd1fec6 100644 --- a/posthog/hogql_queries/insights/trends/query_builder.py +++ b/posthog/hogql_queries/insights/trends/query_builder.py @@ -280,7 +280,9 @@ def _events_filter(self) -> ast.Expr: # Breakdown if self._breakdown.enabled and not self._breakdown.is_histogram_breakdown: - filters.append(self._breakdown.events_where_filter()) + breakdown_filter = self._breakdown.events_where_filter() + if breakdown_filter is not None: + filters.append(breakdown_filter) if len(filters) == 0: return ast.Constant(value=True) diff --git a/posthog/hogql_queries/insights/trends/test/test_trends.py b/posthog/hogql_queries/insights/trends/test/test_trends.py new file mode 100644 index 0000000000000..d7ce9a0be2958 --- /dev/null +++ b/posthog/hogql_queries/insights/trends/test/test_trends.py @@ -0,0 +1,8631 @@ +import json +import uuid +from datetime import datetime +from typing import Dict, List, Optional, Tuple, Union +from unittest.mock import patch, ANY +from urllib.parse import parse_qsl, urlparse + +from zoneinfo import ZoneInfo +from django.test import override_settings +from django.utils import timezone +from freezegun import freeze_time +from rest_framework.exceptions import ValidationError + +from posthog.constants import ( + ENTITY_ID, + ENTITY_TYPE, + TREND_FILTER_TYPE_EVENTS, + TRENDS_BAR_VALUE, + TRENDS_LINEAR, + TRENDS_TABLE, +) +from posthog.hogql_queries.insights.trends.trends_query_runner import TrendsQueryRunner +from posthog.models import ( + Action, + ActionStep, + Cohort, + Entity, + Filter, + GroupTypeMapping, + Organization, + Person, +) +from posthog.models.group.util import create_group +from posthog.models.instance_setting import ( + get_instance_setting, + override_instance_config, +) +from posthog.models.person.util import create_person_distinct_id +from posthog.models.property_definition import PropertyDefinition +from posthog.models.team.team import Team +from posthog.schema import ( + BreakdownFilter, + DateRange, + EventsNode, + TrendsFilter, + TrendsQuery, +) +from posthog.test.base import ( + APIBaseTest, + ClickhouseTestMixin, + _create_event, + _create_person, + also_test_with_different_timezones, + also_test_with_materialized_columns, + also_test_with_person_on_events_v2, + create_person_id_override_by_distinct_id, + flush_persons_and_events, + snapshot_clickhouse_queries, +) +from posthog.test.test_journeys import journeys_for + + +def breakdown_label(entity: Entity, value: Union[str, int]) -> Dict[str, Optional[Union[str, int]]]: + ret_dict: Dict[str, Optional[Union[str, int]]] = {} + if not value or not isinstance(value, str) or "cohort_" not in value: + label = value if (value or isinstance(value, bool)) and value != "None" and value != "nan" else "Other" + ret_dict["label"] = f"{entity.name} - {label}" + ret_dict["breakdown_value"] = label + else: + if value == "cohort_all": + ret_dict["label"] = f"{entity.name} - all users" + ret_dict["breakdown_value"] = "all" + else: + cohort = Cohort.objects.get(pk=value.replace("cohort_", "")) + ret_dict["label"] = f"{entity.name} - {cohort.name}" + ret_dict["breakdown_value"] = cohort.pk + return ret_dict + + +def _create_action(**kwargs): + team = kwargs.pop("team") + name = kwargs.pop("name") + properties = kwargs.pop("properties", {}) + action = Action.objects.create(team=team, name=name) + ActionStep.objects.create(action=action, event=name, properties=properties) + return action + + +def _create_cohort(**kwargs): + team = kwargs.pop("team") + name = kwargs.pop("name") + groups = kwargs.pop("groups") + cohort = Cohort.objects.create(team=team, name=name, groups=groups, last_calculation=timezone.now()) + cohort.calculate_people_ch(pending_version=0) + return cohort + + +def convert_filter_to_trends_query(filter: Filter) -> TrendsQuery: + series: List[EventsNode] = [ + EventsNode( + event=event.id, + name=event.name, + custom_name=event.custom_name, + math=event.math, + math_property=event.math_property, + math_hogql=event.math_hogql, + math_group_type_index=event.math_group_type_index, + ) + for event in filter.events + ] + + tq = TrendsQuery( + series=series, + kind="TrendsQuery", + filterTestAccounts=filter.filter_test_accounts, + dateRange=DateRange( + date_from=filter.date_from.isoformat() if filter.date_from is not None else "all", + date_to=filter.date_to.isoformat() if filter.date_to is not None else None, + ), + samplingFactor=filter.sampling_factor, + aggregation_group_type_index=filter.aggregation_group_type_index, + breakdown=BreakdownFilter( + breakdown=filter.breakdown, + breakdown_type=filter.breakdown_type, + breakdown_normalize_url=filter.breakdown_normalize_url, + breakdowns=filter.breakdowns, + breakdown_group_type_index=filter.breakdown_group_type_index, + breakdown_histogram_bin_count=filter.breakdown_histogram_bin_count, + ), + interval=filter.interval, + trendsFilter=TrendsFilter( + display=filter.display, + breakdown_histogram_bin_count=filter.breakdown_histogram_bin_count, + compare=filter.compare, + formula=filter.formula, + smoothing_intervals=filter.smoothing_intervals, + ), + ) + + return tq + + +class TestTrends(ClickhouseTestMixin, APIBaseTest): + maxDiff = None + + def _run(self, filter: Filter, team: Team): + flush_persons_and_events() + + trend_query = convert_filter_to_trends_query(filter) + tqr = TrendsQueryRunner(team=team, query=trend_query) + return tqr.calculate().results + + def _get_trend_people(self, filter: Filter, entity: Entity): + data = filter.to_dict() + # The test client doesn't serialize nested objects into JSON, so we need to do it ourselves + if data.get("events", None): + data["events"] = json.dumps(data["events"]) + if data.get("properties", None): + data["properties"] = json.dumps(data["properties"]) + with self.settings(DEBUG=True): + response = self.client.get( + f"/api/projects/{self.team.id}/persons/trends/", + data={**data, ENTITY_TYPE: entity.type, ENTITY_ID: entity.id}, + content_type="application/json", + ).json() + return response["results"][0]["people"] + + def _create_event(self, **kwargs): + _create_event(**kwargs) + props = kwargs.get("properties") + if props is not None: + for key, value in props.items(): + prop_def_exists = PropertyDefinition.objects.filter(team=self.team, name=key).exists() + if prop_def_exists is False: + if isinstance(value, str): + type = "String" + elif isinstance(value, bool): + type = "Boolean" + elif isinstance(value, int): + type = "Numeric" + else: + type = "String" + + PropertyDefinition.objects.create( + team=self.team, + name=key, + property_type=type, + type=PropertyDefinition.Type.EVENT, + ) + + def _create_person(self, **kwargs): + _create_person(**kwargs) + props = kwargs.get("properties") + if props is not None: + for key, value in props.items(): + prop_def_exists = PropertyDefinition.objects.filter(team=self.team, name=key).exists() + if prop_def_exists is False: + if isinstance(value, str): + type = "String" + elif isinstance(value, bool): + type = "Boolean" + elif isinstance(value, int): + type = "Numeric" + else: + type = "String" + + PropertyDefinition.objects.create( + team=self.team, + name=key, + property_type=type, + type=PropertyDefinition.Type.PERSON, + ) + + def _create_group(self, **kwargs): + create_group(**kwargs) + props = kwargs.get("properties") + index = kwargs.get("group_type_index") + + if props is not None: + for key, value in props.items(): + prop_def_exists = PropertyDefinition.objects.filter(team=self.team, name=key).exists() + if prop_def_exists is False: + if isinstance(value, str): + type = "String" + elif isinstance(value, bool): + type = "Boolean" + elif isinstance(value, int): + type = "Numeric" + else: + type = "String" + + PropertyDefinition.objects.create( + team=self.team, + name=key, + property_type=type, + group_type_index=index, + type=PropertyDefinition.Type.GROUP, + ) + + def _create_events(self, use_time=False) -> Tuple[Action, Person]: + person = self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + _, _, secondTeam = Organization.objects.bootstrap(None, team_fields={"api_token": "token456"}) + + freeze_without_time = ["2019-12-24", "2020-01-01", "2020-01-02"] + freeze_with_time = [ + "2019-12-24 03:45:34", + "2020-01-01 00:06:34", + "2020-01-02 16:34:34", + ] + + freeze_args = freeze_without_time + if use_time: + freeze_args = freeze_with_time + + with freeze_time(freeze_args[0]): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$bool_prop": True}, + ) + + with freeze_time(freeze_args[1]): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$bool_prop": False}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="anonymous_id", + properties={"$bool_prop": False}, + ) + self._create_event(team=self.team, event="sign up", distinct_id="blabla") + with freeze_time(freeze_args[2]): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$some_property": "other_value", + "$some_numerical_prop": 80, + }, + ) + self._create_event(team=self.team, event="no events", distinct_id="blabla") + + # second team should have no effect + self._create_event( + team=secondTeam, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "other_value"}, + ) + + _create_action(team=self.team, name="no events") + sign_up_action = _create_action(team=self.team, name="sign up") + + flush_persons_and_events() + + return sign_up_action, person + + def _create_breakdown_events(self): + freeze_without_time = ["2020-01-02"] + + with freeze_time(freeze_without_time[0]): + for i in range(25): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": i}, + ) + _create_action(team=self.team, name="sign up") + + def _create_event_count_per_actor_events(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"fruit": "mango"}, + ) + self._create_person(team_id=self.team.pk, distinct_ids=["tintin"], properties={"fruit": "mango"}) + self._create_person(team_id=self.team.pk, distinct_ids=["murmur"], properties={}) # No fruit here + self._create_person( + team_id=self.team.pk, + distinct_ids=["reeree"], + properties={"fruit": "tomato"}, + ) + + with freeze_time("2020-01-01 00:06:02"): + self._create_event( + team=self.team, + event="viewed video", + distinct_id="anonymous_id", + properties={"color": "red", "$group_0": "bouba"}, + ) + self._create_event( + team=self.team, + event="viewed video", + distinct_id="blabla", + properties={"$group_0": "bouba"}, + ) # No color here + self._create_event( + team=self.team, + event="viewed video", + distinct_id="reeree", + properties={"color": "blue", "$group_0": "bouba"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="tintin", + properties={"$group_0": "kiki"}, + ) + + with freeze_time("2020-01-03 19:06:34"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="murmur", + properties={"$group_0": "kiki"}, + ) + + with freeze_time("2020-01-04 23:17:00"): + self._create_event( + team=self.team, + event="viewed video", + distinct_id="tintin", + properties={"color": "red", "$group_0": "kiki"}, + ) + + with freeze_time("2020-01-05 19:06:34"): + self._create_event( + team=self.team, + event="viewed video", + distinct_id="blabla", + properties={"color": "blue", "$group_0": "bouba"}, + ) + self._create_event( + team=self.team, + event="viewed video", + distinct_id="tintin", + properties={"color": "red"}, + ) # No group here + self._create_event( + team=self.team, + event="viewed video", + distinct_id="tintin", + properties={"color": "red", "$group_0": "bouba"}, + ) + self._create_event( + team=self.team, + event="viewed video", + distinct_id="tintin", + properties={"color": "blue", "$group_0": "kiki"}, + ) + + def test_trends_per_day(self): + self._create_events() + with freeze_time("2020-01-04T13:00:01Z"): + # with self.assertNumQueries(16): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-7d", + "events": [{"id": "sign up"}, {"id": "no events"}], + }, + ), + self.team, + ) + self.assertEqual(response[0]["label"], "sign up") + self.assertEqual(response[0]["labels"][4], "1-Jan-2020") + self.assertEqual(response[0]["data"][4], 3.0) + self.assertEqual(response[0]["labels"][5], "2-Jan-2020") + self.assertEqual(response[0]["data"][5], 1.0) + + @snapshot_clickhouse_queries + def test_trend_actors_person_on_events_pagination_with_alias_inconsistencies(self): + test_person_ids = [ # 10 test person IDs (in UUIDT format), hard-coded for deterministic runs + "016f70a4-1c68-0000-db29-61f63a926520", + "016f70a4-1c68-0001-51a1-ad418c05e09f", + "016f70a4-1c68-0002-9ea5-10186329258f", + "016f70a4-1c68-0003-7680-697adb073c10", + "016f70a4-1c68-0004-d0f8-7bd581c97eff", + "016f70a4-1c68-0005-f593-e89d76db7a1f", + "016f70a4-1c68-0006-bb84-d42937ef5989", + "016f70a4-1c68-0007-923f-82720e97a6ba", + "016f70a4-1c68-0008-8970-cbb33f01de1e", + "016f70a4-1c68-0009-75a2-3755450b0b17", + ] + + with freeze_time("2020-01-04T13:00:01Z"): + all_distinct_ids = [] + for i, person_id in enumerate(test_person_ids): + distinct_id = f"blabla_{i}" + # UUIDT offers k-sortability, making this test effectively deterministic, as opposed to UUIDv4 + self._create_event( + team=self.team, + event="sign up", + distinct_id=distinct_id, + properties={"$some_property": "value", "$bool_prop": True}, + person_id=person_id, # Different person_ids, but in the end aliased to be the same person + ) + all_distinct_ids.append(distinct_id) + + person = self._create_person( + team_id=self.team.pk, + distinct_ids=all_distinct_ids, + properties={"$some_prop": "some_val"}, + uuid=test_person_ids[-1], + ) + flush_persons_and_events() + + data = { + "date_from": "-7d", + "events": [{"id": "sign up", "math": "dau"}], + "limit": 5, + } + + with override_instance_config("PERSON_ON_EVENTS_ENABLED", True): + from posthog.models.team import util + + util.can_enable_actor_on_events = True + + response = self._run(Filter(team=self.team, data=data), self.team) + self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0]) + + url = response[0]["persons_urls"][7]["url"] + people_response = self.client.get(f"/{url}").json() + + # pagination works, no matter how few ids in people_response + self.assertIsNotNone(people_response["next"]) + self.assertEqual(people_response["missing_persons"], 5) + + next_url = people_response["next"] + second_people_response = self.client.get(f"{next_url}").json() + + self.assertIsNotNone(second_people_response["next"]) + self.assertEqual(second_people_response["missing_persons"], 4) + + first_load_ids = sorted(str(person["id"]) for person in people_response["results"][0]["people"]) + second_load_ids = sorted(str(person["id"]) for person in second_people_response["results"][0]["people"]) + + self.assertEqual(len(first_load_ids + second_load_ids), 1) + self.assertEqual(first_load_ids + second_load_ids, [str(person.uuid)]) + + third_people_response = self.client.get(f"/{second_people_response['next']}").json() + self.assertIsNone(third_people_response["next"]) + self.assertFalse(third_people_response["missing_persons"]) + + third_load_ids = sorted(str(person["id"]) for person in third_people_response["results"][0]["people"]) + self.assertEqual(third_load_ids, []) + + # just make sure this doesn't error + def test_no_props(self): + with freeze_time("2020-01-04T13:01:01Z"): + self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "$some_property", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + }, + {"id": "no events"}, + ], + }, + ), + self.team, + ) + + def test_trends_per_day_48hours(self): + self._create_events() + with freeze_time("2020-01-03T13:00:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-48h", + "interval": "day", + "events": [{"id": "sign up"}, {"id": "no events"}], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["data"][1], 1.0) + self.assertEqual(response[0]["labels"][1], "2-Jan-2020") + + @snapshot_clickhouse_queries + def test_trends_per_day_cumulative(self): + self._create_events() + with freeze_time("2020-01-04T13:00:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-7d", + "display": "ActionsLineGraphCumulative", + "events": [{"id": "sign up"}], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["label"], "sign up") + self.assertEqual(response[0]["labels"][4], "1-Jan-2020") + self.assertEqual(response[0]["data"][4], 3.0) + self.assertEqual(response[0]["labels"][5], "2-Jan-2020") + self.assertEqual(response[0]["data"][5], 4.0) + + @snapshot_clickhouse_queries + def test_trends_groups_per_day_cumulative(self): + self._create_event_count_per_actor_events() + with freeze_time("2020-01-06T13:00:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-7d", + "display": "ActionsLineGraphCumulative", + "events": [ + { + "id": "viewed video", + "math": "unique_group", + "math_group_type_index": 0, + } + ], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["label"], "viewed video") + self.assertEqual(response[0]["labels"][-1], "6-Jan-2020") + self.assertEqual(response[0]["data"], [0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0]) + + @also_test_with_person_on_events_v2 + @snapshot_clickhouse_queries + def test_trends_breakdown_cumulative(self): + self._create_events() + with freeze_time("2020-01-04T13:00:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-7d", + "display": "ActionsLineGraphCumulative", + "events": [{"id": "sign up", "math": "dau"}], + "breakdown": "$some_property", + }, + ), + self.team, + ) + + self.assertEqual(response[0]["label"], "sign up - none") + self.assertEqual(response[0]["labels"][4], "1-Jan-2020") + self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]) + + self.assertEqual(response[1]["label"], "sign up - other_value") + self.assertEqual(response[1]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0]) + + self.assertEqual(response[2]["label"], "sign up - value") + self.assertEqual(response[2]["data"], [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]) + + def test_trends_single_aggregate_dau(self): + self._create_events() + with freeze_time("2020-01-04T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "interval": "week", + "events": [{"id": "sign up", "math": "dau"}], + }, + ), + self.team, + ) + + with freeze_time("2020-01-04T13:00:01Z"): + weekly_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "interval": "day", + "events": [{"id": "sign up", "math": "dau"}], + }, + ), + self.team, + ) + + self.assertEqual(daily_response[0]["aggregated_value"], 1) + self.assertEqual( + daily_response[0]["aggregated_value"], + weekly_response[0]["aggregated_value"], + ) + + @also_test_with_materialized_columns(["$math_prop"]) + def test_trends_single_aggregate_math(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + with freeze_time("2020-01-01 00:06:34"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$math_prop": 1}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$math_prop": 1}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$math_prop": 1}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$math_prop": 2}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$math_prop": 3}, + ) + + with freeze_time("2020-01-02 00:06:34"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$math_prop": 4}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$math_prop": 4}, + ) + + with freeze_time("2020-01-04T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "interval": "week", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$math_prop", + } + ], + }, + ), + self.team, + ) + + with freeze_time("2020-01-04T13:00:01Z"): + weekly_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "interval": "day", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$math_prop", + } + ], + }, + ), + self.team, + ) + + self.assertEqual(daily_response[0]["aggregated_value"], 2.0) + self.assertEqual( + daily_response[0]["aggregated_value"], + weekly_response[0]["aggregated_value"], + ) + + @snapshot_clickhouse_queries + def test_trends_with_session_property_single_aggregate_math(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla2"], + properties={"$some_prop": "some_val"}, + ) + + self._create_event( + team=self.team, + event="sign up before", + distinct_id="blabla", + properties={"$session_id": 1}, + timestamp="2020-01-01 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 1}, + timestamp="2020-01-01 00:06:34", + ) + self._create_event( + team=self.team, + event="sign up later", + distinct_id="blabla", + properties={"$session_id": 1}, + timestamp="2020-01-01 00:06:35", + ) + # First session lasted 5 seconds + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2}, + timestamp="2020-01-01 00:06:35", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2}, + timestamp="2020-01-01 00:06:45", + ) + # Second session lasted 10 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 3}, + timestamp="2020-01-01 00:06:45", + ) + # Third session lasted 0 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4}, + timestamp="2020-01-02 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4}, + timestamp="2020-01-02 00:06:45", + ) + # Fourth session lasted 15 seconds + + with freeze_time("2020-01-04T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "interval": "week", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$session_duration", + } + ], + }, + ), + self.team, + ) + + with freeze_time("2020-01-04T13:00:01Z"): + weekly_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "interval": "day", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$session_duration", + } + ], + }, + ), + self.team, + ) + + self.assertEqual(daily_response[0]["aggregated_value"], 7.5) + self.assertEqual( + daily_response[0]["aggregated_value"], + weekly_response[0]["aggregated_value"], + ) + + def test_unique_session_with_session_breakdown(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla2"], + properties={"$some_prop": "some_val"}, + ) + + self._create_event( + team=self.team, + event="sign up before", + distinct_id="blabla", + properties={"$session_id": 1}, + timestamp="2020-01-01 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 1}, + timestamp="2020-01-01 00:06:34", + ) + self._create_event( + team=self.team, + event="sign up later", + distinct_id="blabla", + properties={"$session_id": 1}, + timestamp="2020-01-01 00:06:35", + ) + # First session lasted 5 seconds + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2}, + timestamp="2020-01-01 00:06:35", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2}, + timestamp="2020-01-01 00:06:45", + ) + # Second session lasted 10 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 3}, + timestamp="2020-01-01 00:06:45", + ) + # Third session lasted 0 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4}, + timestamp="2020-01-02 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4}, + timestamp="2020-01-02 00:06:45", + ) + # Fourth session lasted 15 seconds + + with freeze_time("2020-01-04T13:00:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "display": "ActionsLineGraph", + "interval": "day", + "events": [{"id": "sign up", "math": "unique_session"}], + "breakdown": "$session_duration", + "breakdown_type": "session", + "insight": "TRENDS", + "breakdown_histogram_bin_count": 3, + "properties": [{"key": "$some_prop", "value": "some_val", "type": "person"}], + "date_from": "-3d", + }, + ), + self.team, + ) + + self.assertEqual( + [(item["breakdown_value"], item["count"], item["data"]) for item in response], + [ + ("[0.0,4.95]", 1.0, [1.0, 0.0, 0.0, 0.0]), + ("[4.95,10.05]", 2.0, [2.0, 0.0, 0.0, 0.0]), + ("[10.05,15.01]", 1.0, [0.0, 1.0, 0.0, 0.0]), + ], + ) + + @also_test_with_person_on_events_v2 + @also_test_with_materialized_columns(person_properties=["name"], verify_no_jsonextract=False) + def test_trends_breakdown_single_aggregate_cohorts(self): + self._create_person(team_id=self.team.pk, distinct_ids=["Jane"], properties={"name": "Jane"}) + self._create_person(team_id=self.team.pk, distinct_ids=["John"], properties={"name": "John"}) + self._create_person(team_id=self.team.pk, distinct_ids=["Jill"], properties={"name": "Jill"}) + cohort1 = _create_cohort( + team=self.team, + name="cohort1", + groups=[{"properties": [{"key": "name", "value": "Jane", "type": "person"}]}], + ) + cohort2 = _create_cohort( + team=self.team, + name="cohort2", + groups=[{"properties": [{"key": "name", "value": "John", "type": "person"}]}], + ) + cohort3 = _create_cohort( + team=self.team, + name="cohort3", + groups=[{"properties": [{"key": "name", "value": "Jill", "type": "person"}]}], + ) + with freeze_time("2020-01-01 00:06:34"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="John", + properties={"$some_property": "value", "$browser": "Chrome"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="John", + properties={"$some_property": "value", "$browser": "Chrome"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="Jill", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="Jill", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="Jill", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + + with freeze_time("2020-01-02 00:06:34"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="Jane", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="Jane", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + with freeze_time("2020-01-04T13:00:01Z"): + event_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "breakdown": json.dumps([cohort1.pk, cohort2.pk, cohort3.pk, "all"]), + "breakdown_type": "cohort", + "events": [{"id": "sign up"}], + }, + ), + self.team, + ) + + for result in event_response: + if result["label"] == "sign up - cohort1": + self.assertEqual(result["aggregated_value"], 2) + elif result["label"] == "sign up - cohort2": + self.assertEqual(result["aggregated_value"], 2) + elif result["label"] == "sign up - cohort3": + self.assertEqual(result["aggregated_value"], 3) + else: + self.assertEqual(result["aggregated_value"], 7) + + def test_trends_breakdown_single_aggregate(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + with freeze_time("2020-01-01 00:06:34"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Chrome"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Chrome"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + + with freeze_time("2020-01-02 00:06:34"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + + with freeze_time("2020-01-04T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "breakdown": "$browser", + "events": [{"id": "sign up"}], + }, + ), + self.team, + ) + + for result in daily_response: + if result["breakdown_value"] == "Chrome": + self.assertEqual(result["aggregated_value"], 2) + else: + self.assertEqual(result["aggregated_value"], 5) + + def test_trends_breakdown_single_aggregate_with_zero_person_ids(self): + # only a person-on-event test + if not get_instance_setting("PERSON_ON_EVENTS_ENABLED"): + return True + + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + with freeze_time("2020-01-01 00:06:34"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Chrome"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Chrome"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$some_property": "value", "$browser": "Chrome"}, + person_id="00000000-0000-0000-0000-000000000000", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$some_property": "value", "$browser": "Safari"}, + person_id="00000000-0000-0000-0000-000000000000", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla3", + properties={"$some_property": "value", "$browser": "xyz"}, + person_id="00000000-0000-0000-0000-000000000000", + ) + + with freeze_time("2020-01-02 00:06:34"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$browser": "Safari"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla4", + properties={"$some_property": "value", "$browser": "Chrome"}, + person_id="00000000-0000-0000-0000-000000000000", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$some_property": "value", "$browser": "urgh"}, + person_id="00000000-0000-0000-0000-000000000000", + ) + + with freeze_time("2020-01-04T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "breakdown": "$browser", + "events": [{"id": "sign up"}], + }, + ), + self.team, + ) + + for result in daily_response: + if result["breakdown_value"] == "Chrome": + self.assertEqual(result["aggregated_value"], 2) + else: + self.assertEqual(result["aggregated_value"], 5) + + def test_trends_breakdown_single_aggregate_math(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + with freeze_time("2020-01-01 00:06:34"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$math_prop": 1}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$math_prop": 1}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$math_prop": 1}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$math_prop": 2}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$math_prop": 3}, + ) + + with freeze_time("2020-01-02 00:06:34"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$math_prop": 4}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value", "$math_prop": 4}, + ) + + with freeze_time("2020-01-04T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "interval": "day", + "breakdown": "$some_property", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$math_prop", + } + ], + }, + ), + self.team, + ) + + with freeze_time("2020-01-04T13:00:01Z"): + weekly_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "interval": "week", + "breakdown": "$some_property", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$math_prop", + } + ], + }, + ), + self.team, + ) + + self.assertEqual(daily_response[0]["aggregated_value"], 2.0) + self.assertEqual( + daily_response[0]["aggregated_value"], + weekly_response[0]["aggregated_value"], + ) + + @snapshot_clickhouse_queries + def test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla2"], + properties={"$some_prop": "some_val"}, + ) + + self._create_event( + team=self.team, + event="sign up before", + distinct_id="blabla", + properties={"$session_id": 1, "$some_property": "value1"}, + timestamp="2020-01-01 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 1, "$some_property": "value1"}, + timestamp="2020-01-01 00:06:34", + ) + self._create_event( + team=self.team, + event="sign up later", + distinct_id="blabla", + properties={"$session_id": 1, "$some_property": "value doesnt matter"}, + timestamp="2020-01-01 00:06:35", + ) + # First session lasted 5 seconds + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2, "$some_property": "value2"}, + timestamp="2020-01-01 00:06:35", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2, "$some_property": "value1"}, + timestamp="2020-01-01 00:06:45", + ) + # Second session lasted 10 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 3}, + timestamp="2020-01-01 00:06:45", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 3}, + timestamp="2020-01-01 00:06:46", + ) + # Third session lasted 1 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4, "$some_property": "value2"}, + timestamp="2020-01-02 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4, "$some_property": "value2"}, + timestamp="2020-01-02 00:06:35", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4, "$some_property": "value1"}, + timestamp="2020-01-02 00:06:45", + ) + # Fourth session lasted 15 seconds + + with freeze_time("2020-01-04T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "interval": "week", + "breakdown": "$some_property", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$session_duration", + } + ], + }, + ), + self.team, + ) + + # value1 has: 5 seconds, 10 seconds, 15 seconds + # value2 has: 10 seconds, 15 seconds (aggregated by session, so 15 is not double counted) + # empty has: 1 seconds + self.assertEqual( + [resp["breakdown_value"] for resp in daily_response], + ["value2", "value1", ""], + ) + self.assertEqual([resp["aggregated_value"] for resp in daily_response], [12.5, 10, 1]) + + with freeze_time("2020-01-04T13:00:01Z"): + weekly_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "interval": "day", + "breakdown": "$some_property", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$session_duration", + } + ], + }, + ), + self.team, + ) + + self.assertEqual( + [resp["breakdown_value"] for resp in daily_response], + [resp["breakdown_value"] for resp in weekly_response], + ) + self.assertEqual( + [resp["aggregated_value"] for resp in daily_response], + [resp["aggregated_value"] for resp in weekly_response], + ) + + @snapshot_clickhouse_queries + def test_trends_person_breakdown_with_session_property_single_aggregate_math_and_breakdown(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla2"], + properties={"$some_prop": "another_val"}, + ) + + self._create_event( + team=self.team, + event="sign up before", + distinct_id="blabla", + properties={"$session_id": 1, "$some_property": "value1"}, + timestamp="2020-01-01 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 1, "$some_property": "value1"}, + timestamp="2020-01-01 00:06:34", + ) + self._create_event( + team=self.team, + event="sign up later", + distinct_id="blabla", + properties={"$session_id": 1, "$some_property": "value doesnt matter"}, + timestamp="2020-01-01 00:06:35", + ) + # First session lasted 5 seconds + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2, "$some_property": "value2"}, + timestamp="2020-01-01 00:06:35", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2, "$some_property": "value1"}, + timestamp="2020-01-01 00:06:45", + ) + # Second session lasted 10 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 3}, + timestamp="2020-01-01 00:06:45", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 3}, + timestamp="2020-01-01 00:06:46", + ) + # Third session lasted 1 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4, "$some_property": "value2"}, + timestamp="2020-01-02 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4, "$some_property": "value2"}, + timestamp="2020-01-02 00:06:35", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4, "$some_property": "value1"}, + timestamp="2020-01-02 00:06:45", + ) + # Fourth session lasted 15 seconds + + with freeze_time("2020-01-04T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "interval": "week", + "breakdown": "$some_prop", + "breakdown_type": "person", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$session_duration", + } + ], + }, + ), + self.team, + ) + + # another_val has: 10 seconds + # some_val has: 1, 5 seconds, 15 seconds + self.assertEqual( + [resp["breakdown_value"] for resp in daily_response], + ["another_val", "some_val"], + ) + self.assertEqual([resp["aggregated_value"] for resp in daily_response], [10.0, 5.0]) + + @snapshot_clickhouse_queries + def test_trends_any_event_total_count(self): + self._create_events() + with freeze_time("2020-01-04T13:00:01Z"): + response1 = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_LINEAR, + "interval": "day", + "events": [{"id": None, "math": "total"}], + }, + ), + self.team, + ) + response2 = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_LINEAR, + "interval": "day", + "events": [{"id": "sign up", "math": "total"}], + }, + ), + self.team, + ) + self.assertEqual(response1[0]["count"], 5) + self.assertEqual(response2[0]["count"], 4) + + @also_test_with_materialized_columns(["$math_prop", "$some_property"]) + def test_trends_breakdown_with_math_func(self): + with freeze_time("2020-01-01 00:06:34"): + for i in range(20): + self._create_person(team_id=self.team.pk, distinct_ids=[f"person{i}"]) + self._create_event( + team=self.team, + event="sign up", + distinct_id=f"person{i}", + properties={"$some_property": f"value_{i}", "$math_prop": 1}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id=f"person{i}", + properties={"$some_property": f"value_{i}", "$math_prop": 1}, + ) + + self._create_person(team_id=self.team.pk, distinct_ids=[f"person21"]) + self._create_event( + team=self.team, + event="sign up", + distinct_id=f"person21", + properties={"$some_property": "value_21", "$math_prop": 25}, + ) + + with freeze_time("2020-01-04T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "interval": "day", + "breakdown": "$some_property", + "events": [ + { + "id": "sign up", + "math": "p90", + "math_property": "$math_prop", + } + ], + }, + ), + self.team, + ) + + breakdown_vals = [val["breakdown_value"] for val in daily_response] + self.assertTrue("value_21" in breakdown_vals) + + @snapshot_clickhouse_queries + def test_trends_compare_day_interval_relative_range(self): + self._create_events() + with freeze_time("2020-01-04T13:00:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "compare": "true", + "date_from": "-7d", + "events": [{"id": "sign up"}], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["label"], "sign up") + self.assertEqual(response[0]["labels"][4], "day 4") + self.assertEqual(response[0]["data"][4], 3.0) + self.assertEqual(response[0]["labels"][5], "day 5") + self.assertEqual(response[0]["data"][5], 1.0) + self.assertEqual( + response[0]["days"], + [ + "2019-12-28", # -7d, current period + "2019-12-29", # -6d, current period + "2019-12-30", # -5d, current period + "2019-12-31", # -4d, current period + "2020-01-01", # -3d, current period + "2020-01-02", # -2d, current period + "2020-01-03", # -1d, current period + "2020-01-04", # -0d, current period (this one's ongoing!) + ], + ) + + self.assertEqual( + response[1]["days"], + [ + "2019-12-21", # -7d, previous period + "2019-12-22", # -6d, previous period + "2019-12-23", # -5d, previous period + "2019-12-24", # -4d, previous period + "2019-12-25", # -3d, previous period + "2019-12-26", # -2d, previous period + "2019-12-27", # -1d, previous period + "2019-12-28", # -0d, previous period + ], + ) + self.assertEqual(response[1]["label"], "sign up") + self.assertEqual(response[1]["labels"][3], "day 3") + self.assertEqual(response[1]["data"][3], 1.0) + self.assertEqual(response[1]["labels"][4], "day 4") + self.assertEqual(response[1]["data"][4], 0.0) + + with freeze_time("2020-01-04T13:00:01Z"): + no_compare_response = self._run( + Filter( + team=self.team, + data={"compare": "false", "events": [{"id": "sign up"}]}, + ), + self.team, + ) + + self.assertEqual(no_compare_response[0]["label"], "sign up") + self.assertEqual(no_compare_response[0]["labels"][4], "1-Jan-2020") + self.assertEqual(no_compare_response[0]["data"][4], 3.0) + self.assertEqual(no_compare_response[0]["labels"][5], "2-Jan-2020") + self.assertEqual(no_compare_response[0]["data"][5], 1.0) + + def test_trends_compare_day_interval_fixed_range_single(self): + self._create_events(use_time=True) + with freeze_time("2020-01-02T20:17:00Z"): + response = self._run( + Filter( + team=self.team, + data={ + "compare": "true", + # A fixed single-day range requires different handling than a relative range like -7d + "date_from": "2020-01-02", + "interval": "day", + "events": [{"id": "sign up"}], + }, + ), + self.team, + ) + + self.assertEqual( + response[0]["days"], + [ + "2020-01-02", # Current day + ], + ) + self.assertEqual( + response[0]["data"], + [1], + ) + self.assertEqual( + response[1]["days"], + [ + "2020-01-01", # Previous day + ], + ) + self.assertEqual( + response[1]["data"], + [ + 3, + ], + ) + + def test_trends_compare_hour_interval_relative_range(self): + self._create_events(use_time=True) + with freeze_time("2020-01-02T20:17:00Z"): + response = self._run( + Filter( + team=self.team, + data={ + "compare": "true", + "date_from": "dStart", + "interval": "hour", + "events": [{"id": "sign up"}], + }, + ), + self.team, + ) + + self.assertEqual( + response[0]["days"], + [ + "2020-01-02 00:00:00", + "2020-01-02 01:00:00", + "2020-01-02 02:00:00", + "2020-01-02 03:00:00", + "2020-01-02 04:00:00", + "2020-01-02 05:00:00", + "2020-01-02 06:00:00", + "2020-01-02 07:00:00", + "2020-01-02 08:00:00", + "2020-01-02 09:00:00", + "2020-01-02 10:00:00", + "2020-01-02 11:00:00", + "2020-01-02 12:00:00", + "2020-01-02 13:00:00", + "2020-01-02 14:00:00", + "2020-01-02 15:00:00", + "2020-01-02 16:00:00", + "2020-01-02 17:00:00", + "2020-01-02 18:00:00", + "2020-01-02 19:00:00", + "2020-01-02 20:00:00", + ], + ) + self.assertEqual( + response[0]["data"], + [ + 0, # 00:00 + 0, # 01:00 + 0, # 02:00 + 0, # 03:00 + 0, # 04:00 + 0, # 05:00 + 0, # 06:00 + 0, # 07:00 + 0, # 08:00 + 0, # 09:00 + 0, # 10:00 + 0, # 11:00 + 0, # 12:00 + 0, # 13:00 + 0, # 14:00 + 0, # 15:00 + 1, # 16:00 + 0, # 17:00 + 0, # 18:00 + 0, # 19:00 + 0, # 20:00 + ], + ) + self.assertEqual( + response[1]["days"], + [ + "2020-01-01 00:00:00", + "2020-01-01 01:00:00", + "2020-01-01 02:00:00", + "2020-01-01 03:00:00", + "2020-01-01 04:00:00", + "2020-01-01 05:00:00", + "2020-01-01 06:00:00", + "2020-01-01 07:00:00", + "2020-01-01 08:00:00", + "2020-01-01 09:00:00", + "2020-01-01 10:00:00", + "2020-01-01 11:00:00", + "2020-01-01 12:00:00", + "2020-01-01 13:00:00", + "2020-01-01 14:00:00", + "2020-01-01 15:00:00", + "2020-01-01 16:00:00", + "2020-01-01 17:00:00", + "2020-01-01 18:00:00", + "2020-01-01 19:00:00", + "2020-01-01 20:00:00", + ], + ) + self.assertEqual( + response[1]["data"], + [ + 3, # 00:00 + 0, # 01:00 + 0, # 02:00 + 0, # 03:00 + 0, # 04:00 + 0, # 05:00 + 0, # 06:00 + 0, # 07:00 + 0, # 08:00 + 0, # 09:00 + 0, # 10:00 + 0, # 11:00 + 0, # 12:00 + 0, # 13:00 + 0, # 14:00 + 0, # 15:00 + 0, # 16:00 + 0, # 17:00 + 0, # 18:00 + 0, # 19:00 + 0, # 20:00 + ], + ) + + def _test_events_with_dates(self, dates: List[str], result, query_time=None, **filter_params): + self._create_person(team_id=self.team.pk, distinct_ids=["person_1"], properties={"name": "John"}) + for time in dates: + with freeze_time(time): + self._create_event( + event="event_name", + team=self.team, + distinct_id="person_1", + properties={"$browser": "Safari"}, + ) + + if query_time: + with freeze_time(query_time): + response = self._run( + Filter( + team=self.team, + data={**filter_params, "events": [{"id": "event_name"}]}, + ), + self.team, + ) + else: + response = self._run( + Filter( + team=self.team, + data={**filter_params, "events": [{"id": "event_name"}]}, + ), + self.team, + ) + + self.assertEqual(result[0]["count"], response[0]["count"]) + self.assertEqual(result[0]["labels"], response[0]["labels"]) + self.assertEqual(result[0]["data"], response[0]["data"]) + self.assertEqual(result[0]["days"], response[0]["days"]) + + return response + + def test_hour_interval(self): + response = self._test_events_with_dates( + dates=["2020-11-01 13:00:00", "2020-11-01 13:20:00", "2020-11-01 17:00:00"], + interval="hour", + date_from="2020-11-01 12:00:00", + query_time="2020-11-01 23:00:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 3.0, + "data": [0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0, 0, 0, 0, 0], + "labels": [ + "1-Nov-2020 12:00", + "1-Nov-2020 13:00", + "1-Nov-2020 14:00", + "1-Nov-2020 15:00", + "1-Nov-2020 16:00", + "1-Nov-2020 17:00", + "1-Nov-2020 18:00", + "1-Nov-2020 19:00", + "1-Nov-2020 20:00", + "1-Nov-2020 21:00", + "1-Nov-2020 22:00", + "1-Nov-2020 23:00", + ], + "days": [ + "2020-11-01 12:00:00", + "2020-11-01 13:00:00", + "2020-11-01 14:00:00", + "2020-11-01 15:00:00", + "2020-11-01 16:00:00", + "2020-11-01 17:00:00", + "2020-11-01 18:00:00", + "2020-11-01 19:00:00", + "2020-11-01 20:00:00", + "2020-11-01 21:00:00", + "2020-11-01 22:00:00", + "2020-11-01 23:00:00", + ], + } + ], + ) + self.assertEqual( + { + "date_from": datetime(2020, 11, 1, 12, tzinfo=ZoneInfo("UTC")), + "date_to": datetime(2020, 11, 1, 13, tzinfo=ZoneInfo("UTC")), + "entity_id": "event_name", + "entity_math": None, + "entity_order": None, + "entity_type": "events", + }, + response[0]["persons_urls"][0]["filter"], + ) + + def test_day_interval(self): + response = self._test_events_with_dates( + dates=["2020-11-01", "2020-11-02", "2020-11-03", "2020-11-04"], + interval="day", + date_from="2020-11-01", + date_to="2020-11-07", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 4.0, + "data": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0], + "labels": [ + "1-Nov-2020", + "2-Nov-2020", + "3-Nov-2020", + "4-Nov-2020", + "5-Nov-2020", + "6-Nov-2020", + "7-Nov-2020", + ], + "days": [ + "2020-11-01", + "2020-11-02", + "2020-11-03", + "2020-11-04", + "2020-11-05", + "2020-11-06", + "2020-11-07", + ], + } + ], + ) + self.assertEqual( + { + "date_from": datetime(2020, 11, 1, tzinfo=ZoneInfo("UTC")), + "date_to": datetime(2020, 11, 1, 23, 59, 59, 999999, tzinfo=ZoneInfo("UTC")), + "entity_id": "event_name", + "entity_math": None, + "entity_order": None, + "entity_type": "events", + }, + response[0]["persons_urls"][0]["filter"], + ) + + def test_week_interval(self): + self._test_events_with_dates( + dates=["2020-11-01", "2020-11-10", "2020-11-11", "2020-11-18"], + interval="week", + date_from="2020-10-29", # having date after sunday + no events caused an issue in CH + date_to="2020-11-24", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 4.0, + "data": [0.0, 1.0, 2.0, 1.0, 0.0], + "labels": [ + "25-Oct-2020", + "1-Nov-2020", + "8-Nov-2020", + "15-Nov-2020", + "22-Nov-2020", + ], + "days": [ + "2020-10-25", + "2020-11-01", + "2020-11-08", + "2020-11-15", + "2020-11-22", + ], + } + ], + ) + + def test_month_interval(self): + self._test_events_with_dates( + dates=["2020-07-10", "2020-07-30", "2020-10-18"], + interval="month", + date_from="2020-6-01", + date_to="2020-11-24", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 3.0, + "data": [0.0, 2.0, 0.0, 0.0, 1.0, 0.0], + "labels": [ + "1-Jun-2020", + "1-Jul-2020", + "1-Aug-2020", + "1-Sep-2020", + "1-Oct-2020", + "1-Nov-2020", + ], + "days": [ + "2020-06-01", + "2020-07-01", + "2020-08-01", + "2020-09-01", + "2020-10-01", + "2020-11-01", + ], + } + ], + ) + + def test_interval_rounding(self): + self._test_events_with_dates( + dates=["2020-11-01", "2020-11-10", "2020-11-11", "2020-11-18"], + interval="week", + date_from="2020-11-04", + date_to="2020-11-24", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 4.0, + "data": [1.0, 2.0, 1.0, 0.0], + "labels": [ + "1-Nov-2020", + "8-Nov-2020", + "15-Nov-2020", + "22-Nov-2020", + ], + "days": ["2020-11-01", "2020-11-08", "2020-11-15", "2020-11-22"], + } + ], + ) + + def test_interval_rounding_monthly(self): + self._test_events_with_dates( + dates=["2020-06-2", "2020-07-30"], + interval="month", + date_from="2020-6-7", # should round down to 6-1 + date_to="2020-7-30", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 2.0, + "data": [1.0, 1.0], + "labels": ["1-Jun-2020", "1-Jul-2020"], + "days": ["2020-06-01", "2020-07-01"], + } + ], + ) + + def test_today_timerange(self): + self._test_events_with_dates( + dates=["2020-11-01 10:20:00", "2020-11-01 10:22:00", "2020-11-01 10:25:00"], + date_from="dStart", + query_time="2020-11-01 10:20:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 3, + "data": [3], + "labels": ["1-Nov-2020"], + "days": ["2020-11-01"], + } + ], + ) + + def test_yesterday_timerange(self): + self._test_events_with_dates( + dates=["2020-11-01 05:20:00", "2020-11-01 10:22:00", "2020-11-01 10:25:00"], + date_from="-1d", + date_to="-1d", + query_time="2020-11-02 10:20:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 3.0, + "data": [3.0], + "labels": ["1-Nov-2020"], + "days": ["2020-11-01"], + } + ], + ) + + def test_last24hours_timerange(self): + self._test_events_with_dates( + dates=[ + "2020-11-01 05:20:00", + "2020-11-01 10:22:00", + "2020-11-01 10:25:00", + "2020-11-02 08:25:00", + ], + date_from="-24h", + query_time="2020-11-02 10:20:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 3, + "data": [2, 1], + "labels": ["1-Nov-2020", "2-Nov-2020"], + "days": ["2020-11-01", "2020-11-02"], + } + ], + ) + + def test_last48hours_timerange(self): + self._test_events_with_dates( + dates=[ + "2020-11-01 05:20:00", + "2020-11-01 10:22:00", + "2020-11-01 10:25:00", + "2020-11-02 08:25:00", + ], + date_from="-48h", + query_time="2020-11-02 10:20:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 4.0, + "data": [0.0, 3.0, 1.0], + "labels": ["31-Oct-2020", "1-Nov-2020", "2-Nov-2020"], + "days": ["2020-10-31", "2020-11-01", "2020-11-02"], + } + ], + ) + + def test_last7days_timerange(self): + self._test_events_with_dates( + dates=[ + "2020-11-01 05:20:00", + "2020-11-02 10:22:00", + "2020-11-04 10:25:00", + "2020-11-05 08:25:00", + ], + date_from="-7d", + query_time="2020-11-07 10:20:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 4.0, + "data": [0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0], + "labels": [ + "31-Oct-2020", + "1-Nov-2020", + "2-Nov-2020", + "3-Nov-2020", + "4-Nov-2020", + "5-Nov-2020", + "6-Nov-2020", + "7-Nov-2020", + ], + "days": [ + "2020-10-31", + "2020-11-01", + "2020-11-02", + "2020-11-03", + "2020-11-04", + "2020-11-05", + "2020-11-06", + "2020-11-07", + ], + } + ], + ) + + def test_last14days_timerange(self): + self._test_events_with_dates( + dates=[ + "2020-11-01 05:20:00", + "2020-11-02 10:22:00", + "2020-11-04 10:25:00", + "2020-11-05 08:25:00", + "2020-11-05 08:25:00", + "2020-11-10 08:25:00", + ], + date_from="-14d", + query_time="2020-11-14 10:20:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 6.0, + "data": [ + 0.0, + 1.0, + 1.0, + 0.0, + 1.0, + 2.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + ], + "labels": [ + "31-Oct-2020", + "1-Nov-2020", + "2-Nov-2020", + "3-Nov-2020", + "4-Nov-2020", + "5-Nov-2020", + "6-Nov-2020", + "7-Nov-2020", + "8-Nov-2020", + "9-Nov-2020", + "10-Nov-2020", + "11-Nov-2020", + "12-Nov-2020", + "13-Nov-2020", + "14-Nov-2020", + ], + "days": [ + "2020-10-31", + "2020-11-01", + "2020-11-02", + "2020-11-03", + "2020-11-04", + "2020-11-05", + "2020-11-06", + "2020-11-07", + "2020-11-08", + "2020-11-09", + "2020-11-10", + "2020-11-11", + "2020-11-12", + "2020-11-13", + "2020-11-14", + ], + } + ], + ) + + def test_last30days_timerange(self): + self._test_events_with_dates( + dates=[ + "2020-11-01 05:20:00", + "2020-11-11 10:22:00", + "2020-11-24 10:25:00", + "2020-11-05 08:25:00", + "2020-11-05 08:25:00", + "2020-11-10 08:25:00", + ], + date_from="-30d", + interval="week", + query_time="2020-11-30 10:20:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 6.0, + "data": [0.0, 3.0, 2.0, 0.0, 1.0, 0.0], + "labels": [ + "25-Oct-2020", + "1-Nov-2020", + "8-Nov-2020", + "15-Nov-2020", + "22-Nov-2020", + "29-Nov-2020", + ], + "days": [ + "2020-10-25", + "2020-11-01", + "2020-11-08", + "2020-11-15", + "2020-11-22", + "2020-11-29", + ], + } + ], + ) + + def test_last90days_timerange(self): + self._test_events_with_dates( + dates=[ + "2020-09-01 05:20:00", + "2020-10-05 05:20:00", + "2020-10-20 05:20:00", + "2020-11-01 05:20:00", + "2020-11-11 10:22:00", + "2020-11-24 10:25:00", + "2020-11-05 08:25:00", + "2020-11-05 08:25:00", + "2020-11-10 08:25:00", + ], + date_from="-90d", + interval="month", + query_time="2020-11-30 10:20:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 9, + "data": [1, 2, 6], + "labels": ["1-Sep-2020", "1-Oct-2020", "1-Nov-2020"], + "days": ["2020-09-01", "2020-10-01", "2020-11-01"], + } + ], + ) + + def test_this_month_timerange(self): + self._test_events_with_dates( + dates=[ + "2020-11-01 05:20:00", + "2020-11-11 10:22:00", + "2020-11-24 10:25:00", + "2020-11-05 08:25:00", + "2020-11-05 08:25:00", + "2020-11-10 08:25:00", + ], + date_from="mStart", + interval="month", + query_time="2020-11-30 10:20:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 6, + "data": [6], + "labels": ["1-Nov-2020"], + "days": ["2020-11-01"], + } + ], + ) + + def test_previous_month_timerange(self): + self._test_events_with_dates( + dates=[ + "2020-11-01 05:20:00", + "2020-11-11 10:22:00", + "2020-11-24 10:25:00", + "2020-11-05 08:25:00", + "2020-11-05 08:25:00", + "2020-11-10 08:25:00", + ], + date_from="-1mStart", + date_to="-1mEnd", + interval="month", + query_time="2020-12-30 10:20:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 6, + "data": [6], + "labels": ["1-Nov-2020"], + "days": ["2020-11-01"], + } + ], + ) + + def test_year_to_date_timerange(self): + self._test_events_with_dates( + dates=[ + "2020-01-01 05:20:00", + "2020-01-11 10:22:00", + "2020-02-24 10:25:00", + "2020-02-05 08:25:00", + "2020-03-05 08:25:00", + "2020-05-10 08:25:00", + ], + date_from="yStart", + interval="month", + query_time="2020-04-30 10:20:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 5.0, + "data": [2.0, 2.0, 1.0, 0.0], + "labels": ["1-Jan-2020", "1-Feb-2020", "1-Mar-2020", "1-Apr-2020"], + "days": ["2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01"], + } + ], + ) + + def test_all_time_timerange(self): + self._test_events_with_dates( + dates=[ + "2020-01-01 05:20:00", + "2020-01-11 10:22:00", + "2020-02-24 10:25:00", + "2020-02-05 08:25:00", + "2020-03-05 08:25:00", + ], + date_from="all", + interval="month", + query_time="2020-04-30 10:20:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 5.0, + "data": [2.0, 2.0, 1.0, 0.0], + "labels": ["1-Jan-2020", "1-Feb-2020", "1-Mar-2020", "1-Apr-2020"], + "days": ["2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01"], + } + ], + ) + + def test_custom_range_timerange(self): + self._test_events_with_dates( + dates=[ + "2020-01-05 05:20:00", + "2020-01-05 10:22:00", + "2020-01-04 10:25:00", + "2020-01-11 08:25:00", + "2020-01-09 08:25:00", + ], + date_from="2020-01-05", + query_time="2020-01-10", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 3.0, + "data": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0], + "labels": [ + "5-Jan-2020", + "6-Jan-2020", + "7-Jan-2020", + "8-Jan-2020", + "9-Jan-2020", + "10-Jan-2020", + ], + "days": [ + "2020-01-05", + "2020-01-06", + "2020-01-07", + "2020-01-08", + "2020-01-09", + "2020-01-10", + ], + } + ], + ) + + @also_test_with_materialized_columns(["$some_property"]) + def test_property_filtering(self): + self._create_events() + with freeze_time("2020-01-04"): + response = self._run( + Filter( + team=self.team, + data={ + "properties": [{"key": "$some_property", "value": "value"}], + "events": [{"id": "sign up"}], + }, + ), + self.team, + ) + self.assertEqual(response[0]["labels"][4], "1-Jan-2020") + self.assertEqual(response[0]["data"][4], 1.0) + self.assertEqual(response[0]["labels"][5], "2-Jan-2020") + self.assertEqual(response[0]["data"][5], 0) + + @snapshot_clickhouse_queries + def test_trends_with_hogql_math(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val", "number": 8}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 1}, + timestamp="2020-01-01 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 5}, + timestamp="2020-01-02 00:06:45", + ) + + with freeze_time("2020-01-04T13:00:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "interval": "week", + "events": [ + { + "id": "sign up", + "math": "hogql", + "math_hogql": "avg(toInt(properties.$session_id)) + 1000", + } + ], + }, + ), + self.team, + ) + self.assertCountEqual(response[0]["labels"], ["22-Dec-2019", "29-Dec-2019"]) + self.assertCountEqual(response[0]["data"], [0, 1003]) + + @snapshot_clickhouse_queries + def test_trends_with_session_property_total_volume_math(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla2"], + properties={"$some_prop": "some_val"}, + ) + + self._create_event( + team=self.team, + event="sign up before", + distinct_id="blabla", + properties={"$session_id": 1}, + timestamp="2020-01-01 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 1}, + timestamp="2020-01-01 00:06:34", + ) + self._create_event( + team=self.team, + event="sign up later", + distinct_id="blabla", + properties={"$session_id": 1}, + timestamp="2020-01-01 00:06:35", + ) + # First session lasted 5 seconds + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2}, + timestamp="2020-01-01 00:06:35", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2}, + timestamp="2020-01-01 00:06:45", + ) + # Second session lasted 10 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 3}, + timestamp="2020-01-01 00:06:45", + ) + # Third session lasted 0 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4}, + timestamp="2020-01-02 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4}, + timestamp="2020-01-02 00:06:45", + ) + # Fourth session lasted 15 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 5}, + timestamp="2020-01-02 00:06:40", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 5}, + timestamp="2020-01-02 00:06:45", + ) + # Fifth session lasted 5 seconds + + with freeze_time("2020-01-04T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "interval": "week", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$session_duration", + } + ], + }, + ), + self.team, + ) + + with freeze_time("2020-01-04T13:00:01Z"): + weekly_response = self._run( + Filter( + team=self.team, + data={ + "interval": "day", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$session_duration", + } + ], + }, + ), + self.team, + ) + + self.assertCountEqual(daily_response[0]["labels"], ["22-Dec-2019", "29-Dec-2019"]) + self.assertCountEqual(daily_response[0]["data"], [0, 5]) + + self.assertCountEqual( + weekly_response[0]["labels"], + [ + "28-Dec-2019", + "29-Dec-2019", + "30-Dec-2019", + "31-Dec-2019", + "1-Jan-2020", + "2-Jan-2020", + "3-Jan-2020", + "4-Jan-2020", + ], + ) + self.assertCountEqual(weekly_response[0]["data"], [0, 0, 0, 0, 5, 10, 0, 0]) + + @snapshot_clickhouse_queries + def test_trends_with_session_property_total_volume_math_with_breakdowns(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla2"], + properties={"$some_prop": "some_val"}, + ) + + self._create_event( + team=self.team, + event="sign up before", + distinct_id="blabla", + properties={"$session_id": 1, "$some_property": "value1"}, + timestamp="2020-01-01 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 1, "$some_property": "value2"}, + timestamp="2020-01-01 00:06:34", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 1, "$some_property": "value2"}, + timestamp="2020-01-01 00:06:35", + ) + # First session lasted 5 seconds + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2, "$some_property": "value2"}, + timestamp="2020-01-01 00:06:35", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2, "$some_property": "value1"}, + timestamp="2020-01-01 00:06:45", + ) + # Second session lasted 10 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 3, "$some_property": "value1"}, + timestamp="2020-01-01 00:06:45", + ) + # Third session lasted 0 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4, "$some_property": "value2"}, + timestamp="2020-01-02 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 4, "$some_property": "value2"}, + timestamp="2020-01-02 00:06:45", + ) + # Fourth session lasted 15 seconds + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 5, "$some_property": "value1"}, + timestamp="2020-01-02 00:06:40", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 5, "$some_property": "value1"}, + timestamp="2020-01-02 00:06:45", + ) + # Fifth session lasted 5 seconds + + with freeze_time("2020-01-04T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "breakdown": "$some_property", + "interval": "week", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$session_duration", + } + ], + }, + ), + self.team, + ) + + with freeze_time("2020-01-04T13:00:01Z"): + weekly_response = self._run( + Filter( + team=self.team, + data={ + "breakdown": "$some_property", + "interval": "day", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$session_duration", + } + ], + }, + ), + self.team, + ) + + # value1 has 0,5,10 seconds (in second interval) + # value2 has 5,10,15 seconds (in second interval) + self.assertEqual([resp["breakdown_value"] for resp in daily_response], ["value2", "value1"]) + self.assertCountEqual(daily_response[0]["labels"], ["22-Dec-2019", "29-Dec-2019"]) + self.assertCountEqual(daily_response[0]["data"], [0, 10]) + self.assertCountEqual(daily_response[1]["data"], [0, 5]) + + self.assertEqual([resp["breakdown_value"] for resp in weekly_response], ["value2", "value1"]) + self.assertCountEqual( + weekly_response[0]["labels"], + [ + "28-Dec-2019", + "29-Dec-2019", + "30-Dec-2019", + "31-Dec-2019", + "1-Jan-2020", + "2-Jan-2020", + "3-Jan-2020", + "4-Jan-2020", + ], + ) + self.assertCountEqual(weekly_response[0]["data"], [0, 0, 0, 0, 7.5, 15, 0, 0]) + self.assertCountEqual(weekly_response[1]["data"], [0, 0, 0, 0, 5, 5, 0, 0]) + + def test_trends_with_session_property_total_volume_math_with_sessions_spanning_multiple_intervals(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla2"], + properties={"$some_prop": "some_val"}, + ) + + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 1}, + timestamp="2020-01-01 00:06:30", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 1}, + timestamp="2020-01-02 00:06:34", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$session_id": 1}, + timestamp="2020-01-03 00:06:30", + ) + # First Session lasted 48 hours = a lot of seconds + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2}, + timestamp="2020-01-01 00:06:35", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={"$session_id": 2}, + timestamp="2020-01-05 00:06:35", + ) + # Second session lasted 96 hours = a lot of seconds + + with freeze_time("2020-01-06T13:00:01Z"): + weekly_response = self._run( + Filter( + team=self.team, + data={ + "interval": "day", + "events": [ + { + "id": "sign up", + "math": "median", + "math_property": "$session_duration", + } + ], + }, + ), + self.team, + ) + + self.assertCountEqual( + weekly_response[0]["labels"], + [ + "30-Dec-2019", + "31-Dec-2019", + "1-Jan-2020", + "2-Jan-2020", + "3-Jan-2020", + "4-Jan-2020", + "5-Jan-2020", + "6-Jan-2020", + ], + ) + + ONE_DAY_IN_SECONDS = 24 * 60 * 60 + # math property is counted only in the intervals in which the session was active + # and the event in question happened (i.e. sign up event) + self.assertCountEqual( + weekly_response[0]["data"], + [ + 0, + 0, + 3 * ONE_DAY_IN_SECONDS, + 2 * ONE_DAY_IN_SECONDS, + 2 * ONE_DAY_IN_SECONDS, + 0, + 4 * ONE_DAY_IN_SECONDS, + 0, + ], + ) + + @also_test_with_person_on_events_v2 + @also_test_with_materialized_columns(person_properties=["name"]) + def test_filter_events_by_cohort(self): + self._create_person(team_id=self.team.pk, distinct_ids=["person_1"], properties={"name": "John"}) + self._create_person(team_id=self.team.pk, distinct_ids=["person_2"], properties={"name": "Jane"}) + + self._create_event( + event="event_name", + team=self.team, + distinct_id="person_1", + properties={"$browser": "Safari"}, + ) + self._create_event( + event="event_name", + team=self.team, + distinct_id="person_2", + properties={"$browser": "Chrome"}, + ) + self._create_event( + event="event_name", + team=self.team, + distinct_id="person_2", + properties={"$browser": "Safari"}, + ) + + cohort = _create_cohort( + team=self.team, + name="cohort1", + groups=[{"properties": [{"key": "name", "value": "Jane", "type": "person"}]}], + ) + + response = self._run( + Filter( + team=self.team, + data={ + "properties": [{"key": "id", "value": cohort.pk, "type": "cohort"}], + "events": [{"id": "event_name"}], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["count"], 2) + self.assertEqual(response[0]["data"][-1], 2) + + @also_test_with_person_on_events_v2 + @snapshot_clickhouse_queries + def test_filter_events_by_precalculated_cohort(self): + with freeze_time("2020-01-02"): + self._create_person( + team_id=self.team.pk, + distinct_ids=["person_1"], + properties={"name": "John"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["person_2"], + properties={"name": "Jane"}, + ) + + self._create_event( + event="event_name", + team=self.team, + distinct_id="person_1", + properties={"$browser": "Safari"}, + ) + self._create_event( + event="event_name", + team=self.team, + distinct_id="person_2", + properties={"$browser": "Chrome"}, + ) + self._create_event( + event="event_name", + team=self.team, + distinct_id="person_2", + properties={"$browser": "Safari"}, + ) + + cohort = _create_cohort( + team=self.team, + name="cohort1", + groups=[{"properties": [{"key": "name", "value": "Jane", "type": "person"}]}], + ) + cohort.calculate_people_ch(pending_version=0) + + with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): + response = self._run( + Filter( + team=self.team, + data={ + "properties": [{"key": "id", "value": cohort.pk, "type": "cohort"}], + "events": [{"id": "event_name"}], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["count"], 2) + self.assertEqual(response[0]["data"][-1], 2) + + def test_response_empty_if_no_events(self): + self._create_events() + flush_persons_and_events() + response = self._run(Filter(team=self.team, data={"date_from": "2012-12-12"}), self.team) + self.assertEqual(response, []) + + def test_interval_filtering_hour(self): + self._create_events(use_time=True) + + with freeze_time("2020-01-02"): + response = self._run( + Filter( + data={ + "date_from": "2019-12-24", + "interval": "hour", + "events": [{"id": "sign up"}], + } + ), + self.team, + ) + self.assertEqual(response[0]["labels"][3], "24-Dec-2019 03:00") + self.assertEqual(response[0]["data"][3], 1.0) + # 217 - 24 - 1 + self.assertEqual(response[0]["data"][192], 3.0) + + def test_interval_filtering_week(self): + self._create_events(use_time=True) + + with freeze_time("2020-01-02"): + response = self._run( + Filter( + team=self.team, + data={ + # 2019-11-24 is a Sunday, i.e. beginning of our week + "date_from": "2019-11-24", + "interval": "week", + "events": [{"id": "sign up"}], + }, + ), + self.team, + ) + self.assertEqual( + response[0]["labels"][:5], + ["24-Nov-2019", "1-Dec-2019", "8-Dec-2019", "15-Dec-2019", "22-Dec-2019"], + ) + self.assertEqual(response[0]["data"][:5], [0.0, 0.0, 0.0, 0.0, 1.0]) + + def test_interval_filtering_month(self): + self._create_events(use_time=True) + + with freeze_time("2020-01-02"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2019-9-24", + "interval": "month", + "events": [{"id": "sign up"}], + }, + ), + self.team, + ) + self.assertEqual(response[0]["labels"][0], "1-Sep-2019") + self.assertEqual(response[0]["data"][0], 0) + self.assertEqual(response[0]["labels"][3], "1-Dec-2019") + self.assertEqual(response[0]["data"][3], 1.0) + self.assertEqual(response[0]["labels"][4], "1-Jan-2020") + self.assertEqual(response[0]["data"][4], 4.0) + + def test_interval_filtering_today_hourly(self): + self._create_events(use_time=True) + + with freeze_time("2020-01-02 23:30"): + self._create_event(team=self.team, event="sign up", distinct_id="blabla") + + with freeze_time("2020-01-02T23:31:00Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "dStart", + "interval": "hour", + "events": [{"id": "sign up"}], + }, + ), + self.team, + ) + self.assertEqual(response[0]["labels"][23], "2-Jan-2020 23:00") + self.assertEqual(response[0]["data"][23], 1.0) + + def test_breakdown_label(self): + entity = Entity({"id": "$pageview", "name": "$pageview", "type": TREND_FILTER_TYPE_EVENTS}) + num_label = breakdown_label(entity, 1) + self.assertEqual(num_label, {"label": "$pageview - 1", "breakdown_value": 1}) + + string_label = breakdown_label(entity, "Chrome") + self.assertEqual(string_label, {"label": "$pageview - Chrome", "breakdown_value": "Chrome"}) + + nan_label = breakdown_label(entity, "nan") + self.assertEqual(nan_label, {"label": "$pageview - Other", "breakdown_value": "Other"}) + + none_label = breakdown_label(entity, "None") + self.assertEqual(none_label, {"label": "$pageview - Other", "breakdown_value": "Other"}) + + cohort_all_label = breakdown_label(entity, "cohort_all") + self.assertEqual( + cohort_all_label, + {"label": "$pageview - all users", "breakdown_value": "all"}, + ) + + cohort = _create_cohort(team=self.team, name="cohort1", groups=[{"properties": {"name": "Jane"}}]) + cohort_label = breakdown_label(entity, f"cohort_{cohort.pk}") + self.assertEqual( + cohort_label, + {"label": f"$pageview - {cohort.name}", "breakdown_value": cohort.pk}, + ) + + @also_test_with_materialized_columns(["key"]) + def test_breakdown_with_filter(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["person1"], + properties={"email": "test@posthog.com"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["person2"], + properties={"email": "test@gmail.com"}, + ) + self._create_event( + event="sign up", + distinct_id="person1", + team=self.team, + properties={"key": "val"}, + ) + self._create_event( + event="sign up", + distinct_id="person2", + team=self.team, + properties={"key": "oh"}, + ) + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "key", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + } + ], + "properties": [{"key": "key", "value": "oh", "operator": "not_icontains"}], + }, + ), + self.team, + ) + self.assertEqual(len(response), 1) + self.assertEqual(response[0]["breakdown_value"], "val") + + def test_action_filtering(self): + sign_up_action, person = self._create_events() + action_response = self._run( + Filter(team=self.team, data={"actions": [{"id": sign_up_action.id}]}), + self.team, + ) + event_response = self._run(Filter(team=self.team, data={"events": [{"id": "sign up"}]}), self.team) + self.assertEqual(len(action_response), 1) + + self.assertEntityResponseEqual(action_response, event_response) + + @also_test_with_person_on_events_v2 + @snapshot_clickhouse_queries + def test_action_filtering_with_cohort(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_property": "value", "$bool_prop": "x"}, + ) + cohort = _create_cohort( + team=self.team, + name="cohort1", + groups=[{"properties": [{"key": "$some_property", "value": "value", "type": "person"}]}], + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value"}, + timestamp="2020-01-02T12:00:00Z", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "value2"}, + timestamp="2020-01-03T12:00:00Z", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="xyz", + properties={"$some_property": "value"}, + timestamp="2020-01-04T12:00:00Z", + ) + + sign_up_action = _create_action( + team=self.team, + name="sign up", + properties=[{"key": "id", "type": "cohort", "value": cohort.id}], + ) + + cohort.calculate_people_ch(pending_version=2) + + with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): + action_response = self._run( + Filter( + team=self.team, + data={ + "actions": [{"id": sign_up_action.id}], + "date_from": "2020-01-01", + "date_to": "2020-01-07", + "properties": [{"key": "$bool_prop", "value": "x", "type": "person"}], + }, + ), + self.team, + ) + self.assertEqual(len(action_response), 1) + self.assertEqual(action_response[0]["data"], [0, 1, 1, 0, 0, 0, 0]) + + def test_trends_for_non_existing_action(self): + with freeze_time("2020-01-04"): + response = self._run(Filter(data={"actions": [{"id": 50000000}]}), self.team) + self.assertEqual(len(response), 0) + + with freeze_time("2020-01-04"): + response = self._run(Filter(data={"events": [{"id": "DNE"}]}), self.team) + self.assertEqual(response[0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) + + @also_test_with_materialized_columns(person_properties=["email", "bar"]) + def test_trends_regression_filtering_by_action_with_person_properties(self): + self._create_person( + team_id=self.team.pk, + properties={"email": "foo@example.com", "bar": "aa"}, + distinct_ids=["d1"], + ) + self._create_person( + team_id=self.team.pk, + properties={"email": "bar@example.com", "bar": "bb"}, + distinct_ids=["d2"], + ) + self._create_person( + team_id=self.team.pk, + properties={"email": "efg@example.com", "bar": "ab"}, + distinct_ids=["d3"], + ) + self._create_person(team_id=self.team.pk, properties={"bar": "aa"}, distinct_ids=["d4"]) + + with freeze_time("2020-01-02 16:34:34"): + self._create_event(team=self.team, event="$pageview", distinct_id="d1") + self._create_event(team=self.team, event="$pageview", distinct_id="d2") + self._create_event(team=self.team, event="$pageview", distinct_id="d3") + self._create_event(team=self.team, event="$pageview", distinct_id="d4") + + event_filtering_action = Action.objects.create(team=self.team, name="$pageview from non-internal") + ActionStep.objects.create( + action=event_filtering_action, + event="$pageview", + properties=[{"key": "bar", "type": "person", "value": "a", "operator": "icontains"}], + ) + + with freeze_time("2020-01-04T13:01:01Z"): + response = self._run( + Filter( + team=self.team, + data={"actions": [{"id": event_filtering_action.id}]}, + ), + self.team, + ) + self.assertEqual(len(response), 1) + self.assertEqual(response[0]["count"], 3) + + with freeze_time("2020-01-04T13:01:01Z"): + response_with_email_filter = self._run( + Filter( + team=self.team, + data={ + "actions": [{"id": event_filtering_action.id}], + "properties": [ + { + "key": "email", + "type": "person", + "value": "is_set", + "operator": "is_set", + } + ], + }, + ), + self.team, + ) + self.assertEqual(len(response_with_email_filter), 1) + self.assertEqual(response_with_email_filter[0]["count"], 2) + + def test_dau_filtering(self): + sign_up_action, person = self._create_events() + + with freeze_time("2020-01-02"): + self._create_person(team_id=self.team.pk, distinct_ids=["someone_else"]) + self._create_event(team=self.team, event="sign up", distinct_id="someone_else") + + with freeze_time("2020-01-04"): + action_response = self._run( + Filter( + team=self.team, + data={"actions": [{"id": sign_up_action.id, "math": "dau"}]}, + ), + self.team, + ) + response = self._run(Filter(data={"events": [{"id": "sign up", "math": "dau"}]}), self.team) + + self.assertEqual(response[0]["data"][4], 1) + self.assertEqual(response[0]["data"][5], 2) + self.assertEntityResponseEqual(action_response, response) + + def _create_maths_events(self, values): + sign_up_action, person = self._create_events() + self._create_person(team_id=self.team.pk, distinct_ids=["someone_else"]) + for value in values: + self._create_event( + team=self.team, + event="sign up", + distinct_id="someone_else", + properties={"some_number": value}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="someone_else", + properties={"some_number": None}, + ) + return sign_up_action + + def _test_math_property_aggregation(self, math_property, values, expected_value): + sign_up_action = self._create_maths_events(values) + + action_response = self._run( + Filter( + team=self.team, + data={ + "actions": [ + { + "id": sign_up_action.id, + "math": math_property, + "math_property": "some_number", + } + ] + }, + ), + self.team, + ) + event_response = self._run( + Filter( + data={ + "events": [ + { + "id": "sign up", + "math": math_property, + "math_property": "some_number", + } + ] + } + ), + self.team, + ) + # :TRICKY: Work around clickhouse functions not being 100% + self.assertAlmostEqual(action_response[0]["data"][-1], expected_value, delta=0.5) + self.assertEntityResponseEqual(action_response, event_response) + + @also_test_with_materialized_columns(["some_number"]) + def test_sum_filtering(self): + self._test_math_property_aggregation("sum", values=[2, 3, 5.5, 7.5], expected_value=18) + + @also_test_with_materialized_columns(["some_number"]) + def test_avg_filtering(self): + self._test_math_property_aggregation("avg", values=[2, 3, 5.5, 7.5], expected_value=4.5) + + @also_test_with_materialized_columns(["some_number"]) + def test_min_filtering(self): + self._test_math_property_aggregation("min", values=[2, 3, 5.5, 7.5], expected_value=2) + + @also_test_with_materialized_columns(["some_number"]) + def test_max_filtering(self): + self._test_math_property_aggregation("max", values=[2, 3, 5.5, 7.5], expected_value=7.5) + + @also_test_with_materialized_columns(["some_number"]) + def test_median_filtering(self): + self._test_math_property_aggregation("median", values=range(101, 201), expected_value=150) + + @also_test_with_materialized_columns(["some_number"]) + def test_p90_filtering(self): + self._test_math_property_aggregation("p90", values=range(101, 201), expected_value=190) + + @also_test_with_materialized_columns(["some_number"]) + def test_p95_filtering(self): + self._test_math_property_aggregation("p95", values=range(101, 201), expected_value=195) + + @also_test_with_materialized_columns(["some_number"]) + def test_p99_filtering(self): + self._test_math_property_aggregation("p99", values=range(101, 201), expected_value=199) + + @also_test_with_materialized_columns(["some_number"]) + def test_avg_filtering_non_number_resiliency(self): + sign_up_action, person = self._create_events() + self._create_person(team_id=self.team.pk, distinct_ids=["someone_else"]) + self._create_event( + team=self.team, + event="sign up", + distinct_id="someone_else", + properties={"some_number": 2}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="someone_else", + properties={"some_number": "x"}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="someone_else", + properties={"some_number": None}, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="someone_else", + properties={"some_number": 8}, + ) + action_response = self._run( + Filter( + data={ + "actions": [ + { + "id": sign_up_action.id, + "math": "avg", + "math_property": "some_number", + } + ] + } + ), + self.team, + ) + event_response = self._run( + Filter(data={"events": [{"id": "sign up", "math": "avg", "math_property": "some_number"}]}), + self.team, + ) + self.assertEqual(action_response[0]["data"][-1], 5) + self.assertEntityResponseEqual(action_response, event_response) + + @also_test_with_materialized_columns(["$some_property"]) + def test_per_entity_filtering(self): + self._create_events() + with freeze_time("2020-01-04T13:00:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-7d", + "events": [ + { + "id": "sign up", + "properties": [{"key": "$some_property", "value": "value"}], + }, + { + "id": "sign up", + "properties": [{"key": "$some_property", "value": "other_value"}], + }, + ], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["labels"][4], "1-Jan-2020") + self.assertEqual(response[0]["data"][4], 1) + self.assertEqual(response[0]["count"], 1) + self.assertEqual(response[1]["labels"][5], "2-Jan-2020") + self.assertEqual(response[1]["data"][5], 1) + self.assertEqual(response[1]["count"], 1) + + def _create_multiple_people(self): + person1 = self._create_person( + team_id=self.team.pk, + distinct_ids=["person1"], + properties={"name": "person1"}, + ) + person2 = self._create_person( + team_id=self.team.pk, + distinct_ids=["person2"], + properties={"name": "person2"}, + ) + person3 = self._create_person( + team_id=self.team.pk, + distinct_ids=["person3"], + properties={"name": "person3"}, + ) + person4 = self._create_person( + team_id=self.team.pk, + distinct_ids=["person4"], + properties={"name": "person4"}, + ) + + journey = { + "person1": [ + { + "event": "watched movie", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"order": "1", "name": "1"}, + } + ], + "person2": [ + { + "event": "watched movie", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"order": "1", "name": "2"}, + }, + { + "event": "watched movie", + "timestamp": datetime(2020, 1, 2, 12), + "properties": {"order": "2", "name": "2"}, + }, + { + "event": "watched movie", + "timestamp": datetime(2020, 1, 2, 12), + "properties": {"order": "2", "name": "2"}, + }, + ], + "person3": [ + { + "event": "watched movie", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"order": "1", "name": "3"}, + }, + { + "event": "watched movie", + "timestamp": datetime(2020, 1, 2, 12), + "properties": {"order": "2", "name": "3"}, + }, + { + "event": "watched movie", + "timestamp": datetime(2020, 1, 3, 12), + "properties": {"order": "2", "name": "3"}, + }, + ], + "person4": [ + { + "event": "watched movie", + "timestamp": datetime(2020, 1, 5, 12), + "properties": {"order": "1", "name": "4"}, + } + ], + } + + journeys_for(events_by_person=journey, team=self.team) + + return (person1, person2, person3, person4) + + @also_test_with_materialized_columns(person_properties=["name"]) + @snapshot_clickhouse_queries + def test_person_property_filtering(self): + self._create_multiple_people() + with freeze_time("2020-01-04"): + response = self._run( + Filter( + team=self.team, + data={ + "properties": [{"key": "name", "value": "person1", "type": "person"}], + "events": [{"id": "watched movie"}], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["labels"][4], "1-Jan-2020") + self.assertEqual(response[0]["data"][4], 1.0) + self.assertEqual(response[0]["labels"][5], "2-Jan-2020") + self.assertEqual(response[0]["data"][5], 0) + + @also_test_with_materialized_columns(["name"], person_properties=["name"]) + @snapshot_clickhouse_queries + def test_person_property_filtering_clashing_with_event_property(self): + # This test needs to choose the right materialised column for it to pass. + # For resiliency, we reverse the filter as well. + self._create_multiple_people() + with freeze_time("2020-01-04"): + response = self._run( + Filter( + team=self.team, + data={ + "properties": [{"key": "name", "value": "person1", "type": "person"}], + "events": [{"id": "watched movie"}], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["labels"][4], "1-Jan-2020") + self.assertEqual(response[0]["data"][4], 1.0) + self.assertEqual(response[0]["labels"][5], "2-Jan-2020") + self.assertEqual(response[0]["data"][5], 0) + + with freeze_time("2020-01-04"): + response = self._run( + Filter( + team=self.team, + data={ + "properties": [{"key": "name", "value": "1", "type": "event"}], + "events": [{"id": "watched movie"}], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["labels"][4], "1-Jan-2020") + self.assertEqual(response[0]["data"][4], 1.0) + self.assertEqual(response[0]["labels"][5], "2-Jan-2020") + self.assertEqual(response[0]["data"][5], 0) + + @also_test_with_materialized_columns(person_properties=["name"]) + def test_entity_person_property_filtering(self): + self._create_multiple_people() + with freeze_time("2020-01-04"): + response = self._run( + Filter( + team=self.team, + data={ + "events": [ + { + "id": "watched movie", + "properties": [ + { + "key": "name", + "value": "person1", + "type": "person", + } + ], + } + ] + }, + ), + self.team, + ) + self.assertEqual(response[0]["labels"][4], "1-Jan-2020") + self.assertEqual(response[0]["data"][4], 1.0) + self.assertEqual(response[0]["labels"][5], "2-Jan-2020") + self.assertEqual(response[0]["data"][5], 0) + + def test_breakdown_by_empty_cohort(self): + self._create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "p1"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-04T12:00:00Z", + ) + + with freeze_time("2020-01-04T13:01:01Z"): + event_response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": json.dumps(["all"]), + "breakdown_type": "cohort", + "events": [{"id": "$pageview", "type": "events", "order": 0}], + }, + ), + self.team, + ) + + self.assertEqual(event_response[0]["label"], "$pageview - all users") + self.assertEqual(sum(event_response[0]["data"]), 1) + + @also_test_with_person_on_events_v2 + @also_test_with_materialized_columns(person_properties=["name"], verify_no_jsonextract=False) + def test_breakdown_by_cohort(self): + person1, person2, person3, person4 = self._create_multiple_people() + cohort = _create_cohort( + name="cohort1", + team=self.team, + groups=[{"properties": [{"key": "name", "value": "person1", "type": "person"}]}], + ) + cohort2 = _create_cohort( + name="cohort2", + team=self.team, + groups=[{"properties": [{"key": "name", "value": "person2", "type": "person"}]}], + ) + cohort3 = _create_cohort( + name="cohort3", + team=self.team, + groups=[ + {"properties": [{"key": "name", "value": "person1", "type": "person"}]}, + {"properties": [{"key": "name", "value": "person2", "type": "person"}]}, + ], + ) + action = _create_action(name="watched movie", team=self.team) + + with freeze_time("2020-01-04T13:01:01Z"): + action_response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": json.dumps([cohort.pk, cohort2.pk, cohort3.pk, "all"]), + "breakdown_type": "cohort", + "actions": [{"id": action.pk, "type": "actions", "order": 0}], + }, + ), + self.team, + ) + event_response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": json.dumps([cohort.pk, cohort2.pk, cohort3.pk, "all"]), + "breakdown_type": "cohort", + "events": [ + { + "id": "watched movie", + "name": "watched movie", + "type": "events", + "order": 0, + } + ], + }, + ), + self.team, + ) + + counts = {} + break_val = {} + for res in event_response: + counts[res["label"]] = sum(res["data"]) + break_val[res["label"]] = res["breakdown_value"] + + self.assertEqual(counts["watched movie - cohort1"], 1) + self.assertEqual(counts["watched movie - cohort2"], 3) + self.assertEqual(counts["watched movie - cohort3"], 4) + self.assertEqual(counts["watched movie - all users"], 7) + + self.assertEqual(break_val["watched movie - cohort1"], cohort.pk) + self.assertEqual(break_val["watched movie - cohort2"], cohort2.pk) + self.assertEqual(break_val["watched movie - cohort3"], cohort3.pk) + self.assertEqual(break_val["watched movie - all users"], "all") + + self.assertEntityResponseEqual(event_response, action_response) + + @also_test_with_materialized_columns(verify_no_jsonextract=False) + def test_interval_filtering_breakdown(self): + self._create_events(use_time=True) + cohort = _create_cohort( + name="cohort1", + team=self.team, + groups=[{"properties": [{"key": "$some_prop", "value": "some_val", "type": "person"}]}], + ) + + # test hour + with freeze_time("2020-01-02"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2019-12-24", + "interval": "hour", + "events": [{"id": "sign up"}], + "breakdown": json.dumps([cohort.pk]), + "breakdown_type": "cohort", + }, + ), + self.team, + ) + self.assertEqual(response[0]["labels"][3], "24-Dec-2019 03:00") + self.assertEqual(response[0]["data"][3], 1.0) + # 217 - 24 - 1 + self.assertEqual(response[0]["data"][192], 3.0) + + # test week + with freeze_time("2020-01-02"): + response = self._run( + Filter( + team=self.team, + data={ + # 2019-11-24 is a Sunday + "date_from": "2019-11-24", + "interval": "week", + "events": [{"id": "sign up"}], + "breakdown": json.dumps([cohort.pk]), + "breakdown_type": "cohort", + }, + ), + self.team, + ) + + self.assertEqual( + response[0]["labels"][:5], + ["24-Nov-2019", "1-Dec-2019", "8-Dec-2019", "15-Dec-2019", "22-Dec-2019"], + ) + self.assertEqual(response[0]["data"][:5], [0.0, 0.0, 0.0, 0.0, 1.0]) + + # test month + with freeze_time("2020-01-02"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2019-9-24", + "interval": "month", + "events": [{"id": "sign up"}], + "breakdown": json.dumps([cohort.pk]), + "breakdown_type": "cohort", + }, + ), + self.team, + ) + self.assertEqual(response[0]["labels"][3], "1-Dec-2019") + self.assertEqual(response[0]["data"][3], 1.0) + self.assertEqual(response[0]["labels"][4], "1-Jan-2020") + self.assertEqual(response[0]["data"][4], 4.0) + + with freeze_time("2020-01-02 23:30"): + self._create_event(team=self.team, event="sign up", distinct_id="blabla") + + # test today + hourly + with freeze_time("2020-01-02T23:31:00Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "dStart", + "interval": "hour", + "events": [{"id": "sign up"}], + "breakdown": json.dumps([cohort.pk]), + "breakdown_type": "cohort", + }, + ), + self.team, + ) + self.assertEqual(response[0]["labels"][23], "2-Jan-2020 23:00") + self.assertEqual(response[0]["data"][23], 1.0) + + def test_breakdown_by_person_property(self): + person1, person2, person3, person4 = self._create_multiple_people() + action = _create_action(name="watched movie", team=self.team) + + with freeze_time("2020-01-04T13:01:01Z"): + action_response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "name", + "breakdown_type": "person", + "actions": [{"id": action.pk, "type": "actions", "order": 0}], + }, + ), + self.team, + ) + event_response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "name", + "breakdown_type": "person", + "events": [ + { + "id": "watched movie", + "name": "watched movie", + "type": "events", + "order": 0, + } + ], + }, + ), + self.team, + ) + + self.assertListEqual( + sorted(res["breakdown_value"] for res in event_response), + ["person1", "person2", "person3"], + ) + + for response in event_response: + if response["breakdown_value"] == "person1": + self.assertEqual(response["count"], 1) + self.assertEqual(response["label"], "watched movie - person1") + if response["breakdown_value"] == "person2": + self.assertEqual(response["count"], 3) + if response["breakdown_value"] == "person3": + self.assertEqual(response["count"], 3) + + self.assertEntityResponseEqual(event_response, action_response) + + @also_test_with_materialized_columns(["name"], person_properties=["name"]) + def test_breakdown_by_person_property_for_person_on_events(self): + person1, person2, person3, person4 = self._create_multiple_people() + + with freeze_time("2020-01-04T13:01:01Z"): + event_response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "name", + "breakdown_type": "person", + "events": [ + { + "id": "watched movie", + "name": "watched movie", + "type": "events", + "order": 0, + } + ], + }, + ), + self.team, + ) + + self.assertListEqual( + sorted(res["breakdown_value"] for res in event_response), + ["person1", "person2", "person3"], + ) + + for response in event_response: + if response["breakdown_value"] == "person1": + self.assertEqual(response["count"], 1) + self.assertEqual(response["label"], "watched movie - person1") + if response["breakdown_value"] == "person2": + self.assertEqual(response["count"], 3) + if response["breakdown_value"] == "person3": + self.assertEqual(response["count"], 3) + + def test_breakdown_by_person_property_for_person_on_events_with_zero_person_ids(self): + # only a person-on-event test + if not get_instance_setting("PERSON_ON_EVENTS_ENABLED"): + return True + + self._create_multiple_people() + + self._create_event( + team=self.team, + event="watched movie", + distinct_id="person5", + person_id="00000000-0000-0000-0000-000000000000", + person_properties={"name": "person5"}, + timestamp=datetime(2020, 1, 1, 12), + ) + self._create_event( + team=self.team, + event="watched movie", + distinct_id="person6", + person_id="00000000-0000-0000-0000-000000000000", + person_properties={"name": "person6"}, + timestamp=datetime(2020, 1, 1, 12), + ) + self._create_event( + team=self.team, + event="watched movie", + distinct_id="person7", + person_id="00000000-0000-0000-0000-000000000000", + person_properties={"name": "person2"}, + timestamp=datetime(2020, 1, 1, 12), + ) + + with freeze_time("2020-01-04T13:01:01Z"): + event_response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "name", + "breakdown_type": "person", + "events": [ + { + "id": "watched movie", + "name": "watched movie", + "type": "events", + "order": 0, + } + ], + }, + ), + self.team, + ) + + self.assertListEqual( + sorted(res["breakdown_value"] for res in event_response), + ["person1", "person2", "person3"], + ) + + for response in event_response: + if response["breakdown_value"] == "person1": + self.assertEqual(response["count"], 1) + self.assertEqual(response["label"], "watched movie - person1") + if response["breakdown_value"] == "person2": + self.assertEqual(response["count"], 3) + if response["breakdown_value"] == "person3": + self.assertEqual(response["count"], 3) + + def test_breakdown_by_property_pie(self): + with freeze_time("2020-01-01T12:00:00Z"): # Fake created_at for easier assertions + person1 = self._create_person(team_id=self.team.pk, distinct_ids=["person1"], immediate=True) + person2 = self._create_person(team_id=self.team.pk, distinct_ids=["person2"], immediate=True) + person3 = self._create_person(team_id=self.team.pk, distinct_ids=["person3"], immediate=True) + + self._create_event( + team=self.team, + event="watched movie", + distinct_id="person1", + timestamp="2020-01-01T12:00:00Z", + properties={"fake_prop": "value_1"}, + ) + + self._create_event( + team=self.team, + event="watched movie", + distinct_id="person2", + timestamp="2020-01-01T12:00:00Z", + properties={"fake_prop": "value_1"}, + ) + self._create_event( + team=self.team, + event="watched movie", + distinct_id="person2", + timestamp="2020-01-01T12:00:00Z", + properties={"fake_prop": "value_1"}, + ) + self._create_event( + team=self.team, + event="watched movie", + distinct_id="person2", + timestamp="2020-01-02T12:00:00Z", + properties={"fake_prop": "value_2"}, + ) + + self._create_event( + team=self.team, + event="watched movie", + distinct_id="person3", + timestamp="2020-01-01T12:00:00Z", + properties={"fake_prop": "value_1"}, + ) + + self._create_person(team_id=self.team.pk, distinct_ids=["person4"], immediate=True) + self._create_event( + team=self.team, + event="watched movie", + distinct_id="person4", + timestamp="2020-01-05T12:00:00Z", + properties={"fake_prop": "value_1"}, + ) + + with freeze_time("2020-01-04T13:01:01Z"): + data = { + "date_from": "-14d", + "breakdown": "fake_prop", + "breakdown_type": "event", + "display": "ActionsPie", + "events": [ + { + "id": "watched movie", + "name": "watched movie", + "type": "events", + "order": 0, + "math": "dau", + } + ], + } + event_response = self._run(Filter(team=self.team, data=data), self.team) + event_response = sorted(event_response, key=lambda resp: resp["breakdown_value"]) + + entity = Entity({"id": "watched movie", "type": "events", "math": "dau"}) + + people_value_1 = self._get_trend_people( + Filter(team=self.team, data={**data, "breakdown_value": "value_1"}), + entity, + ) + assert people_value_1 == [ + # Persons with higher value come first + { + "created_at": "2020-01-01T12:00:00Z", + "distinct_ids": ["person2"], + "id": str(person2.uuid), + "is_identified": False, + "matched_recordings": [], + "name": "person2", + "properties": {}, + "type": "person", + "uuid": str(person2.uuid), + "value_at_data_point": 2, # 2 events with fake_prop="value_1" in the time range + }, + { + "created_at": "2020-01-01T12:00:00Z", + "distinct_ids": ["person1"], + "id": str(person1.uuid), + "is_identified": False, + "matched_recordings": [], + "name": "person1", + "properties": {}, + "type": "person", + "uuid": str(person1.uuid), + "value_at_data_point": 1, # 1 event with fake_prop="value_1" in the time range + }, + { + "created_at": "2020-01-01T12:00:00Z", + "distinct_ids": ["person3"], + "id": str(person3.uuid), + "is_identified": False, + "matched_recordings": [], + "name": "person3", + "properties": {}, + "type": "person", + "uuid": str(person3.uuid), + "value_at_data_point": 1, # 1 event with fake_prop="value_1" in the time range + }, + ] + + people_value_2 = self._get_trend_people( + Filter(team=self.team, data={**data, "breakdown_value": "value_2"}), + entity, + ) + assert people_value_2 == [ + { + "created_at": "2020-01-01T12:00:00Z", + "distinct_ids": ["person2"], + "id": str(person2.uuid), + "is_identified": False, + "matched_recordings": [], + "name": "person2", + "properties": {}, + "type": "person", + "uuid": str(person2.uuid), + "value_at_data_point": 1, # 1 event with fake_prop="value_2" in the time range + } + ] + + @also_test_with_materialized_columns(person_properties=["name"]) + def test_breakdown_by_person_property_pie(self): + self._create_multiple_people() + + with freeze_time("2020-01-04T13:01:01Z"): + event_response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "name", + "breakdown_type": "person", + "display": "ActionsPie", + "events": [ + { + "id": "watched movie", + "name": "watched movie", + "type": "events", + "order": 0, + "math": "dau", + } + ], + }, + ), + self.team, + ) + event_response = sorted(event_response, key=lambda resp: resp["breakdown_value"]) + self.assertDictContainsSubset({"breakdown_value": "person1", "aggregated_value": 1}, event_response[0]) + self.assertDictContainsSubset({"breakdown_value": "person2", "aggregated_value": 1}, event_response[1]) + self.assertDictContainsSubset({"breakdown_value": "person3", "aggregated_value": 1}, event_response[2]) + + @also_test_with_materialized_columns(person_properties=["name"]) + def test_breakdown_by_person_property_pie_with_event_dau_filter(self): + self._create_multiple_people() + + with freeze_time("2020-01-04T13:01:01Z"): + event_response = self._run( + Filter( + data={ + "date_from": "-14d", + "breakdown": "name", + "breakdown_type": "person", + "display": "ActionsPie", + "events": [ + { + "id": "watched movie", + "name": "watched movie", + "type": "events", + "order": 0, + "math": "dau", + "properties": [ + { + "key": "name", + "operator": "not_icontains", + "value": "person3", + "type": "person", + } + ], + } + ], + } + ), + self.team, + ) + event_response = sorted(event_response, key=lambda resp: resp["breakdown_value"]) + self.assertEqual(len(event_response), 2) + self.assertDictContainsSubset({"breakdown_value": "person1", "aggregated_value": 1}, event_response[0]) + self.assertDictContainsSubset({"breakdown_value": "person2", "aggregated_value": 1}, event_response[1]) + + def test_breakdown_hour_interval(self): + response = self._test_events_with_dates( + dates=["2020-11-01 13:00:00", "2020-11-01 13:20:00", "2020-11-01 17:00:00"], + interval="hour", + date_from="2020-11-01 12:00:00", + breakdown="$browser", + breakdown_type="event", + query_time="2020-11-01 23:00:00", + result=[ + { + "action": { + "id": "event_name", + "type": "events", + "order": None, + "name": "event_name", + "custom_name": None, + "math": None, + "math_hogql": None, + "math_property": None, + "math_group_type_index": None, + "properties": [], + }, + "label": "event_name", + "count": 3.0, + "data": [0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0, 0, 0, 0, 0], + "labels": [ + "1-Nov-2020 12:00", + "1-Nov-2020 13:00", + "1-Nov-2020 14:00", + "1-Nov-2020 15:00", + "1-Nov-2020 16:00", + "1-Nov-2020 17:00", + "1-Nov-2020 18:00", + "1-Nov-2020 19:00", + "1-Nov-2020 20:00", + "1-Nov-2020 21:00", + "1-Nov-2020 22:00", + "1-Nov-2020 23:00", + ], + "days": [ + "2020-11-01 12:00:00", + "2020-11-01 13:00:00", + "2020-11-01 14:00:00", + "2020-11-01 15:00:00", + "2020-11-01 16:00:00", + "2020-11-01 17:00:00", + "2020-11-01 18:00:00", + "2020-11-01 19:00:00", + "2020-11-01 20:00:00", + "2020-11-01 21:00:00", + "2020-11-01 22:00:00", + "2020-11-01 23:00:00", + ], + "persons_urls": [], + } + ], + ) + self.assertEqual( + { + "breakdown_type": "event", + "breakdown_value": "Safari", + "date_from": datetime(2020, 11, 1, 12, tzinfo=ZoneInfo("UTC")), + "date_to": datetime(2020, 11, 1, 13, tzinfo=ZoneInfo("UTC")), + "entity_id": "event_name", + "entity_math": None, + "entity_type": "events", + }, + response[0]["persons_urls"][0]["filter"], + ) + + @also_test_with_materialized_columns(person_properties=["name"]) + def test_filter_test_accounts_cohorts(self): + self._create_person(team_id=self.team.pk, distinct_ids=["person_1"], properties={"name": "John"}) + self._create_person(team_id=self.team.pk, distinct_ids=["person_2"], properties={"name": "Jane"}) + + self._create_event(event="event_name", team=self.team, distinct_id="person_1") + self._create_event(event="event_name", team=self.team, distinct_id="person_2") + self._create_event(event="event_name", team=self.team, distinct_id="person_2") + + cohort = _create_cohort( + team=self.team, + name="cohort1", + groups=[{"properties": [{"key": "name", "value": "Jane", "type": "person"}]}], + ) + self.team.test_account_filters = [{"key": "id", "value": cohort.pk, "type": "cohort"}] + self.team.save() + + response = self._run( + Filter( + data={"events": [{"id": "event_name"}], "filter_test_accounts": True}, + team=self.team, + ), + self.team, + ) + + self.assertEqual(response[0]["count"], 2) + self.assertEqual(response[0]["data"][-1], 2) + + def test_filter_by_precalculated_cohort(self): + self._create_person(team_id=self.team.pk, distinct_ids=["person_1"], properties={"name": "John"}) + self._create_person(team_id=self.team.pk, distinct_ids=["person_2"], properties={"name": "Jane"}) + + self._create_event(event="event_name", team=self.team, distinct_id="person_1") + self._create_event(event="event_name", team=self.team, distinct_id="person_2") + self._create_event(event="event_name", team=self.team, distinct_id="person_2") + + cohort = _create_cohort( + team=self.team, + name="cohort1", + groups=[{"properties": [{"key": "name", "value": "Jane", "type": "person"}]}], + ) + cohort.calculate_people_ch(pending_version=0) + with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): + response = self._run( + Filter( + team=self.team, + data={ + "events": [{"id": "event_name"}], + "properties": [{"type": "cohort", "key": "id", "value": cohort.pk}], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["count"], 2) + self.assertEqual(response[0]["data"][-1], 2) + + @also_test_with_person_on_events_v2 + def test_breakdown_filter_by_precalculated_cohort(self): + self._create_person(team_id=self.team.pk, distinct_ids=["person_1"], properties={"name": "John"}) + self._create_person(team_id=self.team.pk, distinct_ids=["person_2"], properties={"name": "Jane"}) + + self._create_event(event="event_name", team=self.team, distinct_id="person_1") + self._create_event(event="event_name", team=self.team, distinct_id="person_2") + self._create_event(event="event_name", team=self.team, distinct_id="person_2") + + cohort = _create_cohort( + team=self.team, + name="cohort1", + groups=[{"properties": [{"key": "name", "value": "Jane", "type": "person"}]}], + ) + cohort.calculate_people_ch(pending_version=0) + + with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): + response = self._run( + Filter( + team=self.team, + data={ + "events": [{"id": "event_name"}], + "properties": [{"type": "cohort", "key": "id", "value": cohort.pk}], + "breakdown": "name", + "breakdown_type": "person", + }, + ), + self.team, + ) + + self.assertEqual(response[0]["count"], 2) + self.assertEqual(response[0]["data"][-1], 2) + + def test_bar_chart_by_value(self): + self._create_events() + + with freeze_time("2020-01-04T13:00:01Z"): + # with self.assertNumQueries(16): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-7d", + "events": [{"id": "sign up"}, {"id": "no events"}], + "display": TRENDS_BAR_VALUE, + }, + ), + self.team, + ) + self.assertEqual(response[0]["aggregated_value"], 4) + self.assertEqual(response[1]["aggregated_value"], 1) + + @snapshot_clickhouse_queries + def test_trends_aggregate_by_distinct_id(self): + # Stopgap until https://github.com/PostHog/meta/pull/39 is implemented + + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + self._create_person(team_id=self.team.pk, distinct_ids=["third"]) + + with freeze_time("2019-12-24 03:45:34"): + self._create_event(team=self.team, event="sign up", distinct_id="blabla") + self._create_event( + team=self.team, event="sign up", distinct_id="blabla" + ) # aggregated by distinctID, so this should be ignored + self._create_event(team=self.team, event="sign up", distinct_id="anonymous_id") + self._create_event(team=self.team, event="sign up", distinct_id="third") + + with override_instance_config("AGGREGATE_BY_DISTINCT_IDS_TEAMS", f"{self.team.pk},4"): + with freeze_time("2019-12-31T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "interval": "day", + "events": [{"id": "sign up", "math": "dau"}], + }, + ), + self.team, + ) + + self.assertEqual(daily_response[0]["data"][0], 3) + + with freeze_time("2019-12-31T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "interval": "day", + "events": [{"id": "sign up", "math": "dau"}], + "properties": [ + { + "key": "$some_prop", + "value": "some_val", + "type": "person", + } + ], + }, + ), + self.team, + ) + self.assertEqual(daily_response[0]["data"][0], 2) + + # breakdown person props + with freeze_time("2019-12-31T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "interval": "day", + "events": [{"id": "sign up", "math": "dau"}], + "breakdown_type": "person", + "breakdown": "$some_prop", + }, + ), + self.team, + ) + self.assertEqual(daily_response[0]["data"][0], 2) + self.assertEqual(daily_response[0]["label"], "sign up - some_val") + self.assertEqual(daily_response[1]["data"][0], 1) + self.assertEqual(daily_response[1]["label"], "sign up - none") + + # MAU + with freeze_time("2019-12-31T13:00:01Z"): + monthly_response = self._run( + Filter( + team=self.team, + data={ + "interval": "day", + "events": [{"id": "sign up", "math": "monthly_active"}], + }, + ), + self.team, + ) + self.assertEqual(monthly_response[0]["data"][0], 3) # this would be 2 without the aggregate hack + + with freeze_time("2019-12-31T13:00:01Z"): + weekly_response = self._run( + Filter( + team=self.team, + data={ + "interval": "day", + "events": [{"id": "sign up", "math": "weekly_active"}], + }, + ), + self.team, + ) + self.assertEqual(weekly_response[0]["data"][0], 3) # this would be 2 without the aggregate hack + + # Make sure breakdown doesn't cause us to join on pdi + with freeze_time("2019-12-31T13:00:01Z"): + daily_response = self._run( + Filter( + team=self.team, + data={ + "interval": "day", + "events": [{"id": "sign up", "math": "dau"}], + "breakdown": "$some_prop", + }, + ), + self.team, + ) + + @also_test_with_materialized_columns(["$some_property"]) + def test_breakdown_filtering_limit(self): + self._create_breakdown_events() + with freeze_time("2020-01-04T13:01:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "$some_property", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + } + ], + }, + ), + self.team, + ) + self.assertEqual(len(response), 25) # We fetch 25 to see if there are more ethan 20 values + + @also_test_with_materialized_columns(event_properties=["order"], person_properties=["name"]) + def test_breakdown_with_person_property_filter(self): + self._create_multiple_people() + action = _create_action(name="watched movie", team=self.team) + + with freeze_time("2020-01-04T13:01:01Z"): + action_response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "order", + "actions": [{"id": action.pk, "type": "actions", "order": 0}], + "properties": [{"key": "name", "value": "person2", "type": "person"}], + }, + ), + self.team, + ) + event_response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "order", + "events": [ + { + "id": "watched movie", + "name": "watched movie", + "type": "events", + "order": 0, + "properties": [ + { + "key": "name", + "value": "person2", + "type": "person", + } + ], + } + ], + }, + ), + self.team, + ) + + self.assertDictContainsSubset({"count": 2, "breakdown_value": "2"}, event_response[0]) + self.assertDictContainsSubset({"count": 1, "breakdown_value": "1"}, event_response[1]) + self.assertEntityResponseEqual(event_response, action_response) + + @also_test_with_materialized_columns(["$some_property"]) + def test_breakdown_filtering(self): + self._create_events() + # test breakdown filtering + with freeze_time("2020-01-04T13:01:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "$some_property", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + }, + {"id": "no events"}, + ], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["label"], "sign up - none") + self.assertEqual(response[2]["label"], "sign up - other_value") + self.assertEqual(response[1]["label"], "sign up - value") + self.assertEqual(response[3]["label"], "no events - none") + + self.assertEqual(sum(response[0]["data"]), 2) + self.assertEqual(sum(response[1]["data"]), 2) + self.assertEqual(sum(response[2]["data"]), 1) + self.assertEqual(sum(response[3]["data"]), 1) + + @also_test_with_materialized_columns(person_properties=["email"]) + def test_breakdown_filtering_persons(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["person1"], + properties={"email": "test@posthog.com"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["person2"], + properties={"email": "test@gmail.com"}, + ) + self._create_person(team_id=self.team.pk, distinct_ids=["person3"], properties={}) + + self._create_event( + event="sign up", + distinct_id="person1", + team=self.team, + properties={"key": "val"}, + ) + self._create_event( + event="sign up", + distinct_id="person2", + team=self.team, + properties={"key": "val"}, + ) + self._create_event( + event="sign up", + distinct_id="person3", + team=self.team, + properties={"key": "val"}, + ) + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "email", + "breakdown_type": "person", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + } + ], + }, + ), + self.team, + ) + self.assertEqual(response[0]["label"], "sign up - none") + self.assertEqual(response[1]["label"], "sign up - test@gmail.com") + self.assertEqual(response[2]["label"], "sign up - test@posthog.com") + + self.assertEqual(response[0]["count"], 1) + self.assertEqual(response[1]["count"], 1) + self.assertEqual(response[2]["count"], 1) + + # ensure that column names are properly handled when subqueries and person subquery share properties column + @also_test_with_materialized_columns(event_properties=["key"], person_properties=["email"]) + def test_breakdown_filtering_persons_with_action_props(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["person1"], + properties={"email": "test@posthog.com"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["person2"], + properties={"email": "test@gmail.com"}, + ) + self._create_person(team_id=self.team.pk, distinct_ids=["person3"], properties={}) + + self._create_event( + event="sign up", + distinct_id="person1", + team=self.team, + properties={"key": "val"}, + ) + self._create_event( + event="sign up", + distinct_id="person2", + team=self.team, + properties={"key": "val"}, + ) + self._create_event( + event="sign up", + distinct_id="person3", + team=self.team, + properties={"key": "val"}, + ) + action = _create_action( + name="sign up", + team=self.team, + properties=[{"key": "key", "type": "event", "value": ["val"], "operator": "exact"}], + ) + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "email", + "breakdown_type": "person", + "actions": [{"id": action.pk, "type": "actions", "order": 0}], + }, + ), + self.team, + ) + self.assertEqual(response[0]["label"], "sign up - none") + self.assertEqual(response[1]["label"], "sign up - test@gmail.com") + self.assertEqual(response[2]["label"], "sign up - test@posthog.com") + + self.assertEqual(response[0]["count"], 1) + self.assertEqual(response[1]["count"], 1) + self.assertEqual(response[2]["count"], 1) + + @also_test_with_materialized_columns(["$current_url", "$os", "$browser"]) + def test_breakdown_filtering_with_properties(self): + with freeze_time("2020-01-03T13:01:01Z"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "first url", + "$browser": "Chrome", + "$os": "Windows", + }, + ) + with freeze_time("2020-01-04T13:01:01Z"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "second url", + "$browser": "Firefox", + "$os": "Mac", + }, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "second url", + "$browser": "Chrome", + "$os": "Windows", + }, + ) + + with freeze_time("2020-01-05T13:01:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-7d", + "breakdown": "$current_url", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + "properties": [{"key": "$os", "value": "Mac"}], + } + ], + "properties": [{"key": "$browser", "value": "Firefox"}], + }, + ), + self.team, + ) + + response = sorted(response, key=lambda x: x["label"]) + self.assertEqual(response[0]["label"], "sign up - first url") + self.assertEqual(response[1]["label"], "sign up - second url") + + self.assertEqual(sum(response[0]["data"]), 1) + self.assertEqual(response[0]["breakdown_value"], "first url") + + self.assertEqual(sum(response[1]["data"]), 1) + self.assertEqual(response[1]["breakdown_value"], "second url") + + @snapshot_clickhouse_queries + def test_breakdown_filtering_with_properties_in_new_format(self): + with freeze_time("2020-01-03T13:01:01Z"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Windows", + }, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "first url", + "$browser": "Chrome", + "$os": "Mac", + }, + ) + with freeze_time("2020-01-04T13:01:01Z"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla1", + properties={ + "$current_url": "second url", + "$browser": "Firefox", + "$os": "Mac", + }, + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla2", + properties={ + "$current_url": "second url", + "$browser": "Chrome", + "$os": "Windows", + }, + ) + + with freeze_time("2020-01-05T13:01:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "$current_url", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + "properties": [{"key": "$os", "value": "Mac"}], + } + ], + "properties": { + "type": "OR", + "values": [ + {"key": "$browser", "value": "Firefox"}, + {"key": "$os", "value": "Windows"}, + ], + }, + }, + ), + self.team, + ) + + response = sorted(response, key=lambda x: x["label"]) + self.assertEqual(response[0]["label"], "sign up - second url") + + self.assertEqual(sum(response[0]["data"]), 1) + self.assertEqual(response[0]["breakdown_value"], "second url") + + # AND filter properties with disjoint set means results should be empty + with freeze_time("2020-01-05T13:01:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "$current_url", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + "properties": [{"key": "$os", "value": "Mac"}], + } + ], + "properties": { + "type": "AND", + "values": [ + {"key": "$browser", "value": "Firefox"}, + {"key": "$os", "value": "Windows"}, + ], + }, + }, + ), + self.team, + ) + + response = sorted(response, key=lambda x: x["label"]) + self.assertEqual(response, []) + + @also_test_with_person_on_events_v2 + @snapshot_clickhouse_queries + def test_mau_with_breakdown_filtering_and_prop_filter(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val", "filter_prop": "filter_val"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla2"], + properties={"$some_prop": "some_val3", "filter_prop": "filter_val2"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla3"], + properties={"$some_prop": "some_val2", "filter_prop": "filter_val"}, + ) + with freeze_time("2020-01-02T13:01:01Z"): + self._create_event(team=self.team, event="sign up", distinct_id="blabla") + self._create_event(team=self.team, event="sign up", distinct_id="blabla2") + self._create_event(team=self.team, event="sign up", distinct_id="blabla3") + with freeze_time("2020-01-03T13:01:01Z"): + self._create_event(team=self.team, event="sign up", distinct_id="blabla") + self._create_event(team=self.team, event="sign up", distinct_id="blabla2") + self._create_event(team=self.team, event="sign up", distinct_id="blabla3") + with freeze_time("2020-01-04T13:01:01Z"): + event_response = self._run( + Filter( + team=self.team, + data={ + "breakdown": "$some_prop", + "breakdown_type": "person", + "events": [{"id": "sign up", "math": "monthly_active"}], + "properties": [ + { + "key": "filter_prop", + "value": "filter_val", + "type": "person", + } + ], + "display": "ActionsLineGraph", + }, + ), + self.team, + ) + + self.assertEqual(event_response[0]["label"], "sign up - some_val") + self.assertEqual(event_response[1]["label"], "sign up - some_val2") + + self.assertEqual(sum(event_response[0]["data"]), 2) + self.assertEqual(event_response[0]["data"][5], 1) + + self.assertEqual(sum(event_response[1]["data"]), 2) + self.assertEqual(event_response[1]["data"][5], 1) + + @also_test_with_materialized_columns(["$some_property"]) + def test_dau_with_breakdown_filtering(self): + sign_up_action, _ = self._create_events() + with freeze_time("2020-01-02T13:01:01Z"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "other_value"}, + ) + with freeze_time("2020-01-04T13:01:01Z"): + action_response = self._run( + Filter( + team=self.team, + data={ + "breakdown": "$some_property", + "actions": [{"id": sign_up_action.id, "math": "dau"}], + }, + ), + self.team, + ) + event_response = self._run( + Filter( + team=self.team, + data={ + "breakdown": "$some_property", + "events": [{"id": "sign up", "math": "dau"}], + }, + ), + self.team, + ) + + self.assertEqual(event_response[1]["label"], "sign up - other_value") + self.assertEqual(event_response[2]["label"], "sign up - value") + + self.assertEqual(sum(event_response[1]["data"]), 1) + self.assertEqual(event_response[1]["data"][5], 1) + + self.assertEqual(sum(event_response[2]["data"]), 1) + self.assertEqual(event_response[2]["data"][4], 1) # property not defined + + self.assertEntityResponseEqual(action_response, event_response) + + @snapshot_clickhouse_queries + def test_dau_with_breakdown_filtering_with_sampling(self): + sign_up_action, _ = self._create_events() + with freeze_time("2020-01-02T13:01:01Z"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "other_value"}, + ) + with freeze_time("2020-01-04T13:01:01Z"): + action_response = self._run( + Filter( + team=self.team, + data={ + "sampling_factor": 1, + "breakdown": "$some_property", + "actions": [{"id": sign_up_action.id, "math": "dau"}], + }, + ), + self.team, + ) + event_response = self._run( + Filter( + team=self.team, + data={ + "sampling_factor": 1, + "breakdown": "$some_property", + "events": [{"id": "sign up", "math": "dau"}], + }, + ), + self.team, + ) + + self.assertEqual(event_response[1]["label"], "sign up - other_value") + self.assertEqual(event_response[2]["label"], "sign up - value") + + self.assertEqual(sum(event_response[1]["data"]), 1) + self.assertEqual(event_response[1]["data"][5], 1) + + self.assertEqual(sum(event_response[2]["data"]), 1) + self.assertEqual(event_response[2]["data"][4], 1) # property not defined + + self.assertEntityResponseEqual(action_response, event_response) + + @also_test_with_materialized_columns(["$os", "$some_property"]) + def test_dau_with_breakdown_filtering_with_prop_filter(self): + sign_up_action, _ = self._create_events() + with freeze_time("2020-01-02T13:01:01Z"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "other_value", "$os": "Windows"}, + ) + with freeze_time("2020-01-04T13:01:01Z"): + action_response = self._run( + Filter( + team=self.team, + data={ + "breakdown": "$some_property", + "actions": [{"id": sign_up_action.id, "math": "dau"}], + "properties": [{"key": "$os", "value": "Windows"}], + }, + ), + self.team, + ) + event_response = self._run( + Filter( + team=self.team, + data={ + "breakdown": "$some_property", + "events": [{"id": "sign up", "math": "dau"}], + "properties": [{"key": "$os", "value": "Windows"}], + }, + ), + self.team, + ) + + self.assertEqual(event_response[0]["label"], "sign up - other_value") + + self.assertEqual(sum(event_response[0]["data"]), 1) + self.assertEqual(event_response[0]["data"][5], 1) # property not defined + + self.assertEntityResponseEqual(action_response, event_response) + + @also_test_with_materialized_columns(event_properties=["$host"], person_properties=["$some_prop"]) + def test_against_clashing_entity_and_property_filter_naming(self): + # Regression test for https://github.com/PostHog/posthog/issues/5814 + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="blabla", + properties={"$host": "app.example.com"}, + timestamp="2020-01-03T12:00:00Z", + ) + + with freeze_time("2020-01-04T13:01:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "events": [ + { + "id": "$pageview", + "properties": [ + { + "key": "$host", + "operator": "icontains", + "value": ".com", + } + ], + } + ], + "properties": [ + { + "key": "$host", + "value": ["app.example.com", "another.com"], + } + ], + "breakdown": "$some_prop", + "breakdown_type": "person", + }, + ), + self.team, + ) + + self.assertEqual(response[0]["count"], 1) + + # this ensures that the properties don't conflict when formatting params + @also_test_with_materialized_columns(["$current_url"]) + def test_action_with_prop(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["blabla", "anonymous_id"], + properties={"$some_prop": "some_val"}, + ) + sign_up_action = Action.objects.create(team=self.team, name="sign up") + ActionStep.objects.create( + action=sign_up_action, + event="sign up", + properties=[ + { + "key": "$current_url", + "type": "event", + "value": ["https://posthog.com/feedback/1234"], + "operator": "exact", + } + ], + ) + + with freeze_time("2020-01-02T13:01:01Z"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$current_url": "https://posthog.com/feedback/1234"}, + ) + + with freeze_time("2020-01-04T13:01:01Z"): + action_response = self._run( + Filter( + team=self.team, + data={ + "actions": [{"id": sign_up_action.id, "math": "dau"}], + "properties": [{"key": "$current_url", "value": "fake"}], + }, + ), + self.team, + ) + + # if the params were shared it would be 1 because action would take precedence + self.assertEqual(action_response[0]["count"], 0) + + @also_test_with_materialized_columns(["$current_url"], verify_no_jsonextract=False) + def test_combine_all_cohort_and_icontains(self): + # This caused some issues with SQL parsing + sign_up_action, _ = self._create_events() + cohort = Cohort.objects.create( + team=self.team, + name="a", + groups=[{"properties": [{"key": "key", "value": "value", "type": "person"}]}], + ) + action_response = self._run( + Filter( + team=self.team, + data={ + "actions": [{"id": sign_up_action.id, "math": "dau"}], + "properties": [{"key": "$current_url", "value": "ii", "operator": "icontains"}], + "breakdown": [cohort.pk, "all"], + "breakdown_type": "cohort", + }, + ), + self.team, + ) + self.assertEqual(action_response[0]["count"], 0) + + @also_test_with_person_on_events_v2 + @snapshot_clickhouse_queries + def test_person_filtering_in_cohort_in_action(self): + # This caused some issues with SQL parsing + sign_up_action, _ = self._create_events() + flush_persons_and_events() + cohort = Cohort.objects.create( + team=self.team, + name="a", + groups=[{"properties": [{"key": "$some_prop", "value": "some_val", "type": "person"}]}], + ) + step = sign_up_action.steps.first() + if step: + step.properties = [{"key": "id", "value": cohort.pk, "type": "cohort"}] + step.save() + with freeze_time("2020-01-04T13:01:01Z"): + action_response = self._run( + Filter( + team=self.team, + data={ + "actions": [{"id": sign_up_action.id}], + "breakdown": "$some_property", + }, + ), + self.team, + ) + self.assertEqual(action_response[0]["count"], 2) + + @also_test_with_materialized_columns(event_properties=["key"], person_properties=["email"]) + def test_breakdown_user_props_with_filter(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["person1"], + properties={"email": "test@posthog.com"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["person2"], + properties={"email": "test@gmail.com"}, + ) + person = self._create_person( + team_id=self.team.pk, + distinct_ids=["person3"], + properties={"email": "test@gmail.com"}, + ) + create_person_distinct_id(self.team.pk, "person1", str(person.uuid)) + + self._create_event( + event="sign up", + distinct_id="person1", + team=self.team, + properties={"key": "val"}, + ) + self._create_event( + event="sign up", + distinct_id="person2", + team=self.team, + properties={"key": "val"}, + ) + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "breakdown": "email", + "breakdown_type": "person", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + } + ], + "properties": [ + { + "key": "email", + "value": "@posthog.com", + "operator": "not_icontains", + "type": "person", + }, + {"key": "key", "value": "val"}, + ], + }, + ), + self.team, + ) + + self.assertEqual(len(response), 1) + self.assertEqual(response[0]["breakdown_value"], "test@gmail.com") + + @snapshot_clickhouse_queries + @also_test_with_materialized_columns(event_properties=["key"], person_properties=["email", "$os", "$browser"]) + def test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns(self): + self._create_person( + team_id=self.team.pk, + distinct_ids=["person1"], + properties={ + "email": "test@posthog.com", + "$os": "ios", + "$browser": "chrome", + }, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["person2"], + properties={"email": "test@gmail.com", "$os": "ios", "$browser": "safari"}, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["person3"], + properties={ + "email": "test2@posthog.com", + "$os": "android", + "$browser": "chrome", + }, + ) + # a second person with same properties, just so snapshot passes on different CH versions (indeterminate sorting currently) + self._create_person( + team_id=self.team.pk, + distinct_ids=["person32"], + properties={ + "email": "test2@posthog.com", + "$os": "android", + "$browser": "chrome", + }, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["person4"], + properties={ + "email": "test3@posthog.com", + "$os": "android", + "$browser": "safari", + }, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["person5"], + properties={ + "email": "test4@posthog.com", + "$os": "android", + "$browser": "safari", + }, + ) + self._create_person( + team_id=self.team.pk, + distinct_ids=["person6"], + properties={ + "email": "test5@posthog.com", + "$os": "android", + "$browser": "safari", + }, + ) + + journeys_for( + team=self.team, + create_people=False, + events_by_person={ + "person1": [ + { + "event": "sign up", + "properties": {"key": "val"}, + "timestamp": datetime(2020, 5, 1, 0), + } + ], + "person2": [ + { + "event": "sign up", + "properties": {"key": "val"}, + "timestamp": datetime(2020, 5, 1, 0), + } + ], + "person3": [ + { + "event": "sign up", + "properties": {"key": "val"}, + "timestamp": datetime(2020, 5, 1, 0), + } + ], + "person32": [ + { + "event": "sign up", + "properties": {"key": "val"}, + "timestamp": datetime(2020, 5, 1, 0), + } + ], + "person4": [ + { + "event": "sign up", + "properties": {"key": "val"}, + "timestamp": datetime(2020, 5, 1, 0), + } + ], + "person5": [ + { + "event": "sign up", + "properties": {"key": "val"}, + "timestamp": datetime(2020, 5, 1, 0), + } + ], + "person6": [ + { + "event": "sign up", + "properties": {"key": "val"}, + "timestamp": datetime(2020, 5, 1, 0), + } + ], + }, + ) + + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2020-01-01 00:00:00", + "date_to": "2020-07-01 00:00:00", + "breakdown": "email", + "breakdown_type": "person", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + } + ], + "properties": { + "type": "AND", + "values": [ + { + "type": "OR", + "values": [ + { + "key": "email", + "value": "@posthog.com", + "operator": "not_icontains", + "type": "person", + }, + {"key": "key", "value": "val"}, + ], + }, + { + "type": "OR", + "values": [ + { + "key": "$os", + "value": "android", + "operator": "exact", + "type": "person", + }, + { + "key": "$browser", + "value": "safari", + "operator": "exact", + "type": "person", + }, + ], + }, + ], + }, + }, + ), + self.team, + ) + response = sorted(response, key=lambda item: item["breakdown_value"]) + self.assertEqual(len(response), 5) + # person1 shouldn't be selected because it doesn't match the filter + self.assertEqual(response[0]["breakdown_value"], "test2@posthog.com") + self.assertEqual(response[1]["breakdown_value"], "test3@posthog.com") + self.assertEqual(response[2]["breakdown_value"], "test4@posthog.com") + self.assertEqual(response[3]["breakdown_value"], "test5@posthog.com") + self.assertEqual(response[4]["breakdown_value"], "test@gmail.com") + + # now have more strict filters with entity props + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2020-01-01 00:00:00", + "date_to": "2020-07-01 00:00:00", + "breakdown": "email", + "breakdown_type": "person", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + "properties": { + "type": "AND", + "values": [ + {"key": "key", "value": "val"}, + { + "key": "email", + "value": "@posthog.com", + "operator": "icontains", + "type": "person", + }, + ], + }, + } + ], + "properties": { + "type": "AND", + "values": [ + { + "type": "AND", + "values": [ + { + "key": "$os", + "value": "android", + "operator": "exact", + "type": "person", + }, + { + "key": "$browser", + "value": "chrome", + "operator": "exact", + "type": "person", + }, + ], + } + ], + }, + }, + ), + self.team, + ) + self.assertEqual(len(response), 1) + self.assertEqual(response[0]["breakdown_value"], "test2@posthog.com") + + def _create_active_users_events(self): + self._create_person(team_id=self.team.pk, distinct_ids=["p0"], properties={"name": "p1"}) + self._create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "p1"}) + self._create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"name": "p2"}) + + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p0", + timestamp="2020-01-03T11:00:00Z", + properties={"key": "val"}, + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p0", + timestamp="2020-01-03T12:00:00Z", + properties={"key": "val"}, + ) + + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-09T12:00:00Z", + properties={"key": "bor"}, + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-09T12:00:00Z", + properties={"key": "val"}, + ) + + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-10T12:00:00Z", + properties={"key": "bor"}, + ) + + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-11T12:00:00Z", + properties={"key": "val"}, + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-11T12:00:00Z", + properties={"key": "bor"}, + ) + + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p0", + timestamp="2020-01-12T12:00:00Z", + properties={"key": "val"}, + ) + + @snapshot_clickhouse_queries + def test_weekly_active_users_aggregated_range_wider_than_week(self): + self._create_active_users_events() + + data = { + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "display": TRENDS_TABLE, + "events": [ + { + "id": "$pageview", + "type": "events", + "order": 0, + "math": "weekly_active", + } + ], + } + + filter = Filter(team=self.team, data=data) + result = self._run(filter, self.team) + # Only p0 was active on 2020-01-08 or in the preceding 6 days + self.assertEqual(result[0]["aggregated_value"], 1) + + @snapshot_clickhouse_queries + def test_weekly_active_users_aggregated_range_wider_than_week_with_sampling(self): + self._create_active_users_events() + + data = { + "sampling_factor": 1, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "display": TRENDS_TABLE, + "events": [ + { + "id": "$pageview", + "type": "events", + "order": 0, + "math": "weekly_active", + } + ], + } + + filter = Filter(team=self.team, data=data) + result = self._run(filter, self.team) + # Only p0 was active on 2020-01-08 or in the preceding 6 days + self.assertEqual(result[0]["aggregated_value"], 1) + + @snapshot_clickhouse_queries + def test_weekly_active_users_aggregated_range_narrower_than_week(self): + self._create_active_users_events() + + data = { + "date_from": "2020-01-11", + "date_to": "2020-01-12", + "display": TRENDS_TABLE, + "events": [ + { + "id": "$pageview", + "type": "events", + "order": 0, + "math": "weekly_active", + } + ], + } + + filter = Filter(team=self.team, data=data) + result = self._run(filter, self.team) + # All were active on 2020-01-12 or in the preceding 6 days + self.assertEqual(result[0]["aggregated_value"], 3) + + @also_test_with_different_timezones + @snapshot_clickhouse_queries + def test_weekly_active_users_monthly(self): + self._create_active_users_events() + + data = { + "date_from": "2019-12-01", + "date_to": "2020-02-29", # T'was a leap year + "interval": "month", + "events": [ + { + "id": "$pageview", + "type": "events", + "order": 0, + "math": "weekly_active", + } + ], + } + + filter = Filter(team=self.team, data=data) + result = self._run(filter, self.team) + self.assertEqual(result[0]["days"], ["2019-12-01", "2020-01-01", "2020-02-01"]) + # No users fall into the period of 7 days during or before the first day of any of those three months + self.assertEqual(result[0]["data"], [0.0, 0.0, 0.0]) + + @also_test_with_different_timezones + @snapshot_clickhouse_queries + def test_weekly_active_users_daily(self): + self._create_active_users_events() + + data = { + "date_from": "2020-01-08", + "date_to": "2020-01-19", + "events": [ + { + "id": "$pageview", + "type": "events", + "order": 0, + "math": "weekly_active", + } + ], + } + + filter = Filter(team=self.team, data=data) + result = self._run(filter, self.team) + self.assertEqual( + result[0]["days"], + [ + "2020-01-08", + "2020-01-09", + "2020-01-10", + "2020-01-11", + "2020-01-12", + "2020-01-13", + "2020-01-14", + "2020-01-15", + "2020-01-16", + "2020-01-17", + "2020-01-18", + "2020-01-19", + ], + ) + self.assertEqual( + result[0]["data"], + [ + 1.0, # 2020-01-08 - p0 only + 3.0, # 2020-01-09 - p0, p1, and p2 + 2.0, # 2020-01-10 - p1, and p2 + 2.0, # 2020-01-11 - p1 and p2 + 3.0, # 2020-01-12 - p0, p1, and p2 + 3.0, # 2020-01-13 - p0, p1, and p2 + 3.0, # 2020-01-14 - p0, p1, and p2 + 3.0, # 2020-01-15 - p0, p1, and p2 + 3.0, # 2020-01-16 - p0, p1, and p2 + 3.0, # 2020-01-17 - p0, p1, and p2 + 1.0, # 2020-01-18 - p0 only + 0.0, # 2020-01-19 - nobody + ], + ) + + @also_test_with_different_timezones + def test_weekly_active_users_daily_based_on_action(self): + action = _create_action(name="$pageview", team=self.team) + self._create_active_users_events() + + data = { + "date_from": "2020-01-08", + "date_to": "2020-01-19", + "actions": [ + { + "id": action.id, + "type": "actions", + "order": 0, + "math": "weekly_active", + } + ], + } + + filter = Filter(team=self.team, data=data) + result = self._run(filter, self.team) + self.assertEqual( + result[0]["days"], + [ + "2020-01-08", + "2020-01-09", + "2020-01-10", + "2020-01-11", + "2020-01-12", + "2020-01-13", + "2020-01-14", + "2020-01-15", + "2020-01-16", + "2020-01-17", + "2020-01-18", + "2020-01-19", + ], + ) + # Same as test_weekly_active_users_daily + self.assertEqual( + result[0]["data"], + [1.0, 3.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 1.0, 0.0], + ) + + @also_test_with_different_timezones + @snapshot_clickhouse_queries + def test_weekly_active_users_weekly(self): + self._create_active_users_events() + + data = { + "date_from": "2019-12-29", + "date_to": "2020-01-18", + "interval": "week", + "events": [ + { + "id": "$pageview", + "type": "events", + "order": 0, + "math": "weekly_active", + } + ], + } + + filter = Filter(team=self.team, data=data) + result = self._run(filter, self.team) + self.assertEqual(result[0]["days"], ["2019-12-29", "2020-01-05", "2020-01-12"]) + self.assertEqual(result[0]["data"], [0.0, 1.0, 3.0]) + + @snapshot_clickhouse_queries + def test_weekly_active_users_hourly(self): + self._create_active_users_events() + + data = { + "date_from": "2020-01-09T06:00:00Z", + "date_to": "2020-01-09T17:00:00Z", + "interval": "hour", + "events": [ + { + "id": "$pageview", + "type": "events", + "order": 0, + "math": "weekly_active", + } + ], + } + + filter = Filter(team=self.team, data=data) + result = self._run(filter, self.team) + self.assertEqual( + result[0]["days"], + [ + "2020-01-09 06:00:00", + "2020-01-09 07:00:00", + "2020-01-09 08:00:00", + "2020-01-09 09:00:00", + "2020-01-09 10:00:00", + "2020-01-09 11:00:00", + "2020-01-09 12:00:00", + "2020-01-09 13:00:00", + "2020-01-09 14:00:00", + "2020-01-09 15:00:00", + "2020-01-09 16:00:00", + "2020-01-09 17:00:00", + ], + ) + + # p0 falls out of the window at noon, p1 and p2 are counted because the next 24 hours are included. + # FIXME: This is isn't super intuitive, in particular for hour-by-hour queries, but currently + # necessary, because there's a presentation issue: in monthly/weekly graphs data points are formatted as + # D-MMM-YYYY, so if a user sees e.g. 1-Jan-2077, they'll likely expect the active users count to be for + # the first day of the month, and not the last. If they saw just Jan-2077, the more general case would work. + self.assertEqual( + result[0]["data"], + [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], + ) + + def test_weekly_active_users_daily_based_on_action_with_zero_person_ids(self): + # only a person-on-event test + if not get_instance_setting("PERSON_ON_EVENTS_ENABLED"): + return True + + action = _create_action(name="$pageview", team=self.team) + self._create_active_users_events() + + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p5", + timestamp="2020-01-03T12:00:00Z", + properties={"key": "val"}, + person_id="00000000-0000-0000-0000-000000000000", + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p6", + timestamp="2020-01-03T12:00:00Z", + properties={"key": "val"}, + person_id="00000000-0000-0000-0000-000000000000", + ) + + data = { + "date_from": "2020-01-08", + "date_to": "2020-01-19", + "actions": [ + { + "id": action.id, + "type": "actions", + "order": 0, + "math": "weekly_active", + } + ], + } + + filter = Filter(team=self.team, data=data) + result = self._run(filter, self.team) + # Zero person IDs shouldn't be counted + self.assertEqual( + result[0]["data"], + [1.0, 3.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 1.0, 0.0], + ) + + @also_test_with_materialized_columns(["key"]) + def test_breakdown_weekly_active_users_daily(self): + self._create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "p1"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-09T12:00:00Z", + properties={"key": "val"}, + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-10T12:00:00Z", + properties={"key": "val"}, + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-11T12:00:00Z", + properties={"key": "val"}, + ) + + self._create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"name": "p2"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-09T12:00:00Z", + properties={"key": "val"}, + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-11T12:00:00Z", + properties={"key": "val"}, + ) + + data = { + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12T00:00:00Z", + "breakdown": "key", + "events": [ + { + "id": "$pageview", + "type": "events", + "order": 0, + "math": "weekly_active", + } + ], + } + + filter = Filter(team=self.team, data=data) + result = self._run(filter, self.team) + self.assertEqual( + result[0]["data"], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0], + ) + + @also_test_with_materialized_columns(person_properties=["name"]) + @snapshot_clickhouse_queries + def test_weekly_active_users_filtering(self): + self._create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "person-1"}) + self._create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"name": "person-2"}) + self._create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"name": "person-3"}) + + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-09T12:00:00Z", + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-10T12:00:00Z", + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p3", + timestamp="2020-01-11T12:00:00Z", + ) + + filter = Filter( + team=self.team, + data={ + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12T00:00:00Z", + "events": [ + { + "id": "$pageview", + "type": "events", + "order": 0, + "math": "weekly_active", + } + ], + "properties": [ + { + "key": "name", + "operator": "exact", + "value": ["person-1", "person-2"], + "type": "person", + } + ], + }, + ) + + result = self._run(filter, self.team) + self.assertEqual( + result[0]["data"], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0], + ) + + @snapshot_clickhouse_queries + def test_breakdown_weekly_active_users_daily_based_on_action(self): + self._create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "p1"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-09T12:00:00Z", + properties={"key": "val"}, + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-10T12:00:00Z", + properties={"key": "val"}, + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-11T12:00:00Z", + properties={"key": "val"}, + ) + + self._create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"name": "p2"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-09T12:00:00Z", + properties={"key": "val"}, + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-11T12:00:00Z", + properties={"key": "val"}, + ) + + self._create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"name": "p3"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p3", + timestamp="2020-01-09T12:00:00Z", + properties={"key": "val"}, + ) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p3", + timestamp="2020-01-11T12:00:00Z", + properties={"key": "val"}, + ) + + cohort = Cohort.objects.create( + team=self.team, + groups=[ + { + "properties": [ + { + "key": "name", + "operator": "exact", + "value": ["p1", "p2"], + "type": "person", + } + ] + } + ], + ) + + pageview_action = _create_action( + name="$pageview", + team=self.team, + properties=[ + { + "key": "name", + "operator": "exact", + "value": ["p1", "p2", "p3"], + "type": "person", + }, + {"type": "cohort", "key": "id", "value": cohort.pk}, + ], + ) + + data = { + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12T00:00:00Z", + "breakdown": "key", + "actions": [ + { + "id": pageview_action.id, + "type": "actions", + "order": 0, + "math": "weekly_active", + } + ], + } + + filter = Filter(team=self.team, data=data) + result = self._run(filter, self.team) + self.assertEqual( + result[0]["data"], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0], + ) + + @also_test_with_materialized_columns(["key"]) + @snapshot_clickhouse_queries + def test_breakdown_weekly_active_users_aggregated(self): + self._create_active_users_events() + + data = { + "date_from": "2020-01-11", + "date_to": "2020-01-11", + "display": TRENDS_TABLE, + "events": [ + { + "id": "$pageview", + "type": "events", + "order": 0, + "math": "weekly_active", + } + ], + "breakdown": "key", + } + + filter = Filter(team=self.team, data=data) + result = self._run(filter, self.team) + # All were active on 2020-01-12 or in the preceding 6 days + self.assertEqual(len(result), 2) + self.assertEqual(result[0]["breakdown_value"], "bor") + self.assertEqual(result[0]["aggregated_value"], 2) + self.assertEqual(result[1]["breakdown_value"], "val") + self.assertEqual(result[1]["aggregated_value"], 2) + + @also_test_with_materialized_columns(event_properties=["key"], person_properties=["name"]) + def test_filter_test_accounts(self): + self._create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "p1"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-11T12:00:00Z", + properties={"key": "val"}, + ) + + self._create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"name": "p2"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-11T12:00:00Z", + properties={"key": "val"}, + ) + self.team.test_account_filters = [{"key": "name", "value": "p1", "operator": "is_not", "type": "person"}] + self.team.save() + filter = Filter( + team=self.team, + data={ + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12T00:00:00Z", + "events": [{"id": "$pageview", "type": "events", "order": 0}], + "filter_test_accounts": "true", + }, + ) + result = self._run(filter, self.team) + self.assertEqual(result[0]["count"], 1) + filter2 = Filter( + team=self.team, + data={ + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12T00:00:00Z", + "events": [{"id": "$pageview", "type": "events", "order": 0}], + }, + ) + result = self._run(filter2, self.team) + self.assertEqual(result[0]["count"], 2) + result = self._run(filter.shallow_clone({"breakdown": "key"}), self.team) + self.assertEqual(result[0]["count"], 1) + + @also_test_with_materialized_columns(["$some_property"]) + def test_breakdown_filtering_bar_chart_by_value(self): + self._create_events() + + # test breakdown filtering + with freeze_time("2020-01-04T13:01:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-7d", + "breakdown": "$some_property", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + } + ], + "display": TRENDS_BAR_VALUE, + }, + ), + self.team, + ) + + self.assertEqual(response[0]["aggregated_value"], 2) # the events without breakdown value + self.assertEqual(response[1]["aggregated_value"], 1) + self.assertEqual(response[2]["aggregated_value"], 1) + self.assertEqual( + response[0]["days"], + [ + "2019-12-28", + "2019-12-29", + "2019-12-30", + "2019-12-31", + "2020-01-01", + "2020-01-02", + "2020-01-03", + "2020-01-04", + ], + ) + + @also_test_with_materialized_columns(person_properties=["key", "key_2"], verify_no_jsonextract=False) + def test_breakdown_multiple_cohorts(self): + self._create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"key": "value"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-02T12:00:00Z", + properties={"key": "val"}, + ) + + self._create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"key_2": "value_2"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-02T12:00:00Z", + properties={"key": "val"}, + ) + + self._create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"key_2": "value_2"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p3", + timestamp="2020-01-02T12:00:00Z", + properties={"key": "val"}, + ) + + cohort1 = _create_cohort( + team=self.team, + name="cohort_1", + groups=[{"properties": [{"key": "key", "value": "value", "type": "person"}]}], + ) + cohort2 = _create_cohort( + team=self.team, + name="cohort_2", + groups=[{"properties": [{"key": "key_2", "value": "value_2", "type": "person"}]}], + ) + + # try different versions + cohort1.calculate_people_ch(pending_version=1) + cohort2.calculate_people_ch(pending_version=0) + + with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): # Normally this is False in tests + with freeze_time("2020-01-04T13:01:01Z"): + res = self._run( + Filter( + team=self.team, + data={ + "date_from": "-7d", + "events": [{"id": "$pageview"}], + "properties": [], + "breakdown": [cohort1.pk, cohort2.pk], + "breakdown_type": "cohort", + }, + ), + self.team, + ) + + self.assertEqual(res[0]["count"], 2) + self.assertEqual(res[1]["count"], 1) + + @also_test_with_materialized_columns(person_properties=["key", "key_2"], verify_no_jsonextract=False) + def test_breakdown_single_cohort(self): + self._create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"key": "value"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-02T12:00:00Z", + properties={"key": "val"}, + ) + + self._create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"key_2": "value_2"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-02T12:00:00Z", + properties={"key": "val"}, + ) + + self._create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"key_2": "value_2"}) + self._create_event( + team=self.team, + event="$pageview", + distinct_id="p3", + timestamp="2020-01-02T12:00:00Z", + properties={"key": "val"}, + ) + + cohort1 = _create_cohort( + team=self.team, + name="cohort_1", + groups=[{"properties": [{"key": "key", "value": "value", "type": "person"}]}], + ) + + cohort1.calculate_people_ch(pending_version=0) + + with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): # Normally this is False in tests + with freeze_time("2020-01-04T13:01:01Z"): + res = self._run( + Filter( + team=self.team, + data={ + "date_from": "-7d", + "events": [{"id": "$pageview"}], + "properties": [], + "breakdown": cohort1.pk, + "breakdown_type": "cohort", + }, + ), + self.team, + ) + + self.assertEqual(res[0]["count"], 1) + + @also_test_with_materialized_columns(["key", "$current_url"]) + def test_filtering_with_action_props(self): + self._create_event( + event="sign up", + distinct_id="person1", + team=self.team, + properties={"key": "val", "$current_url": "/some/page"}, + ) + self._create_event( + event="sign up", + distinct_id="person2", + team=self.team, + properties={"key": "val", "$current_url": "/some/page"}, + ) + self._create_event( + event="sign up", + distinct_id="person3", + team=self.team, + properties={"key": "val", "$current_url": "/another/page"}, + ) + + action = Action.objects.create(name="sign up", team=self.team) + ActionStep.objects.create( + action=action, + event="sign up", + url="/some/page", + properties=[{"key": "key", "type": "event", "value": ["val"], "operator": "exact"}], + ) + + response = self._run( + Filter( + data={ + "date_from": "-14d", + "actions": [{"id": action.pk, "type": "actions", "order": 0}], + } + ), + self.team, + ) + + self.assertEqual(response[0]["count"], 2) + + def test_trends_math_without_math_property(self): + with self.assertRaises(ValidationError): + self._run(Filter(data={"events": [{"id": "sign up", "math": "sum"}]}), self.team) + + @patch("posthog.queries.trends.trends.insight_sync_execute") + def test_should_throw_exception(self, patch_sync_execute): + self._create_events() + patch_sync_execute.side_effect = Exception() + # test breakdown filtering + with self.assertRaises(Exception): + with self.settings(TEST=False, DEBUG=False): + self._run( + Filter( + data={ + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + } + ] + } + ), + self.team, + ) + + @also_test_with_different_timezones + @snapshot_clickhouse_queries + def test_timezones_hourly_relative_from(self): + self._create_person(team_id=self.team.pk, distinct_ids=["blabla"], properties={}) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-04T22:01:01", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-05T07:01:01", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-05T08:01:01", + ) + + query_time = datetime(2020, 1, 5, 10, 1, 1, tzinfo=ZoneInfo(self.team.timezone)) + utc_offset_hours = query_time.tzinfo.utcoffset(query_time).total_seconds() // 3600 # type: ignore + utc_offset_sign = "-" if utc_offset_hours < 0 else "+" + with freeze_time(query_time): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "dStart", + "interval": "hour", + "events": [{"id": "sign up", "name": "sign up", "math": "dau"}], + }, + ), + self.team, + ) + self.assertEqual( + response[0]["labels"], + [ + "5-Jan-2020 00:00", + "5-Jan-2020 01:00", + "5-Jan-2020 02:00", + "5-Jan-2020 03:00", + "5-Jan-2020 04:00", + "5-Jan-2020 05:00", + "5-Jan-2020 06:00", + "5-Jan-2020 07:00", + "5-Jan-2020 08:00", + "5-Jan-2020 09:00", + "5-Jan-2020 10:00", + ], + ) + self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 0, 0, 0, 1, 1, 0, 0]) + + assert dict(parse_qsl(urlparse(response[0]["persons_urls"][7]["url"]).query)) == { + "breakdown_attribution_type": "first_touch", + "breakdown_normalize_url": "False", + "date_from": f"2020-01-05T07:00:00{utc_offset_sign}{abs(utc_offset_hours):02.0f}:00", + "date_to": f"2020-01-05T08:00:00{utc_offset_sign}{abs(utc_offset_hours):02.0f}:00", + "display": "ActionsLineGraph", + "entity_id": "sign up", + "entity_math": "dau", + "entity_type": "events", + "events": '[{"id": "sign up", "type": "events", "order": null, "name": "sign ' + 'up", "custom_name": null, "math": "dau", "math_property": null, "math_hogql": null, ' + '"math_group_type_index": null, "properties": {}}]', + "insight": "TRENDS", + "interval": "hour", + "smoothing_intervals": "1", + "cache_invalidation_key": ANY, + } + persons = self.client.get("/" + response[0]["persons_urls"][7]["url"]).json() + self.assertEqual(persons["results"][0]["count"], 1) + + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "dStart", + "interval": "hour", + "events": [{"id": "sign up", "name": "sign up"}], + }, + ), + self.team, + ) + + self.assertEqual( + response[0]["labels"], + [ + "5-Jan-2020 00:00", + "5-Jan-2020 01:00", + "5-Jan-2020 02:00", + "5-Jan-2020 03:00", + "5-Jan-2020 04:00", + "5-Jan-2020 05:00", + "5-Jan-2020 06:00", + "5-Jan-2020 07:00", + "5-Jan-2020 08:00", + "5-Jan-2020 09:00", + "5-Jan-2020 10:00", + ], + ) + self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 0, 0, 0, 1, 1, 0, 0]) + + @also_test_with_different_timezones + def test_timezones_hourly_absolute_from(self): + self._create_person(team_id=self.team.pk, distinct_ids=["blabla"], properties={}) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-02T17:01:01", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "second url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-03T17:01:01", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "second url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-06T00:30:01", # Shouldn't be included anywhere + ) + + # Custom date range, single day, hourly interval + response = self._run( + Filter( + data={ + "date_from": "2020-01-03", + "date_to": "2020-01-03 23:59:59", + "interval": "hour", + "events": [{"id": "sign up", "name": "sign up"}], + }, + team=self.team, + ), + self.team, + ) + + self.assertEqual( + response[0]["days"], + [ + "2020-01-03 00:00:00", + "2020-01-03 01:00:00", + "2020-01-03 02:00:00", + "2020-01-03 03:00:00", + "2020-01-03 04:00:00", + "2020-01-03 05:00:00", + "2020-01-03 06:00:00", + "2020-01-03 07:00:00", + "2020-01-03 08:00:00", + "2020-01-03 09:00:00", + "2020-01-03 10:00:00", + "2020-01-03 11:00:00", + "2020-01-03 12:00:00", + "2020-01-03 13:00:00", + "2020-01-03 14:00:00", + "2020-01-03 15:00:00", + "2020-01-03 16:00:00", + "2020-01-03 17:00:00", + "2020-01-03 18:00:00", + "2020-01-03 19:00:00", + "2020-01-03 20:00:00", + "2020-01-03 21:00:00", + "2020-01-03 22:00:00", + "2020-01-03 23:00:00", + ], + ) + self.assertEqual(response[0]["data"][17], 1) + self.assertEqual(len(response[0]["data"]), 24) + + # Custom date range, single day, dayly interval + response = self._run( + Filter( + data={ + "date_from": "2020-01-03", + "date_to": "2020-01-03", + "events": [{"id": "sign up", "name": "sign up"}], + }, + team=self.team, + ), + self.team, + ) + self.assertEqual(response[0]["data"], [1.0]) + + @also_test_with_different_timezones + @snapshot_clickhouse_queries + def test_timezones_daily(self): + self._create_person(team_id=self.team.pk, distinct_ids=["blabla"], properties={}) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-02T17:01:01", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "second url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-03T17:01:01", + ) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "second url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-06T00:30:01", # Shouldn't be included anywhere + ) + + with freeze_time(datetime(2020, 1, 5, 5, 0, tzinfo=ZoneInfo(self.team.timezone))): + response = self._run( + Filter( + data={ + "date_from": "-7d", + "events": [{"id": "sign up", "name": "sign up"}], + }, + team=self.team, + ), + self.team, + ) + + self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0]) + self.assertEqual( + response[0]["labels"], + [ + "29-Dec-2019", + "30-Dec-2019", + "31-Dec-2019", + "1-Jan-2020", + "2-Jan-2020", + "3-Jan-2020", + "4-Jan-2020", + "5-Jan-2020", + ], + ) + + # DAU + with freeze_time("2020-01-05T13:01:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "events": [{"id": "sign up", "name": "sign up", "math": "dau"}], + }, + ), + self.team, + ) + self.assertEqual( + response[0]["data"], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0], + ) + self.assertEqual( + response[0]["labels"], + [ + "22-Dec-2019", + "23-Dec-2019", + "24-Dec-2019", + "25-Dec-2019", + "26-Dec-2019", + "27-Dec-2019", + "28-Dec-2019", + "29-Dec-2019", + "30-Dec-2019", + "31-Dec-2019", + "1-Jan-2020", + "2-Jan-2020", + "3-Jan-2020", + "4-Jan-2020", + "5-Jan-2020", + ], + ) + + with freeze_time("2020-01-05T13:01:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-7d", + "events": [ + { + "id": "sign up", + "name": "sign up", + "math": "weekly_active", + } + ], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]) + self.assertEqual( + response[0]["labels"], + [ + "29-Dec-2019", + "30-Dec-2019", + "31-Dec-2019", + "1-Jan-2020", + "2-Jan-2020", + "3-Jan-2020", + "4-Jan-2020", + "5-Jan-2020", + ], + ) + + with freeze_time("2020-01-05T13:01:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-7d", + "events": [{"id": "sign up", "name": "sign up", "breakdown": "$os"}], + }, + ), + self.team, + ) + + self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0]) + self.assertEqual( + response[0]["labels"], + [ + "29-Dec-2019", + "30-Dec-2019", + "31-Dec-2019", + "1-Jan-2020", + "2-Jan-2020", + "3-Jan-2020", + "4-Jan-2020", + "5-Jan-2020", + ], + ) + + # breakdown + DAU + with freeze_time("2020-01-05T13:01:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-7d", + "breakdown": "$os", + "events": [{"id": "sign up", "name": "sign up", "math": "dau"}], + }, + ), + self.team, + ) + self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0]) + + # Regression test to ensure we handle non-deterministic timezones correctly + # US/Pacific for example changes from PST to PDT due to Daylight Savings Time + # In 2022, this happened on November 6, and previously we had a bug where + # a graph starting before that date and ending after it would show all 0s + # after November 6. Thus, this test ensures that doesn't happen + @snapshot_clickhouse_queries + def test_non_deterministic_timezones(self): + self.team.timezone = "US/Pacific" + self.team.save() + self._create_person(team_id=self.team.pk, distinct_ids=["blabla"], properties={}) + with freeze_time("2022-11-03T01:01:01Z"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + ) + + with freeze_time("2022-11-10T01:01:01Z"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "second url", + "$browser": "Firefox", + "$os": "Mac", + }, + ) + + with freeze_time("2022-11-17T08:30:01Z"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "second url", + "$browser": "Firefox", + "$os": "Mac", + }, + ) + + with freeze_time("2022-11-24T08:30:01Z"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "second url", + "$browser": "Firefox", + "$os": "Mac", + }, + ) + + with freeze_time("2022-11-30T08:30:01Z"): + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "second url", + "$browser": "Firefox", + "$os": "Mac", + }, + ) + + with freeze_time("2022-11-30T13:01:01Z"): + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "-30d", + "events": [{"id": "sign up", "name": "sign up", "math": "wau"}], + "interval": "week", + }, + ), + self.team, + ) + + # The key is to not get any 0s here + self.assertEqual(response[0]["data"], [1.0, 1.0, 1.0, 1.0, 1.0]) + + @also_test_with_different_timezones + @snapshot_clickhouse_queries + def test_timezones_weekly(self): + self._create_person(team_id=self.team.pk, distinct_ids=["blabla"], properties={}) + self._create_event( # This event is before the time range (but counts towards week of 2020-01-06 in Monday mode) + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-11T19:01:01", # Saturday; TRICKY: This is the next UTC day in America/Phoenix + ) + self._create_event( # This event should count towards week of 2020-01-12 (or 2020-01-06 in Monday mode) + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-12T02:01:01", # Sunday; TRICKY: This is the previous UTC day in Asia/Tokyo + ) + self._create_event( # This event should count towards week of 2020-01-19 (or 2020-01-20 in Monday mode) + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "second url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-21T18:01:01", # Tuesday; TRICKY: This is the next UTC day in America/Phoenix + ) + + self.team.week_start_day = 0 # DB value for WeekStartDay.SUNDAY (the default, but let's be explicit) + self.team.save() + + # TRICKY: This is the previous UTC day in Asia/Tokyo + with freeze_time(datetime(2020, 1, 26, 3, 0, tzinfo=ZoneInfo(self.team.timezone))): + # Total volume query + response_sunday = self._run( + Filter( + data={ + "date_from": "-14d", + "interval": "week", + "events": [{"id": "sign up", "name": "sign up"}], + }, + team=self.team, + ), + self.team, + ) + + self.assertEqual(response_sunday[0]["days"], ["2020-01-12", "2020-01-19", "2020-01-26"]) + self.assertEqual(response_sunday[0]["data"], [1.0, 1.0, 0.0]) + + self.team.week_start_day = 1 # DB value for WeekStartDay.MONDAY + self.team.save() + + # TRICKY: This is the previous UTC day in Asia/Tokyo + with freeze_time(datetime(2020, 1, 26, 3, 0, tzinfo=ZoneInfo(self.team.timezone))): + # Total volume query + response_monday = self._run( + Filter( + data={ + "date_from": "-14d", + "interval": "week", + "events": [{"id": "sign up", "name": "sign up"}], + }, + team=self.team, + ), + self.team, + ) + + self.assertEqual(response_monday[0]["days"], ["2020-01-06", "2020-01-13", "2020-01-20"]) + self.assertEqual(response_monday[0]["data"], [2.0, 0.0, 1.0]) + + def test_same_day(self): + self._create_person(team_id=self.team.pk, distinct_ids=["blabla"], properties={}) + self._create_event( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-03T01:01:01Z", + ) + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2020-01-03", + "date_to": "2020-01-03", + "events": [{"id": "sign up", "name": "sign up"}], + }, + ), + self.team, + ) + self.assertEqual(response[0]["data"], [1.0]) + + @override_settings(PERSON_ON_EVENTS_V2_OVERRIDE=True) + @snapshot_clickhouse_queries + def test_same_day_with_person_on_events_v2(self): + person_id1 = str(uuid.uuid4()) + person_id2 = str(uuid.uuid4()) + + self._create_person(team_id=self.team.pk, distinct_ids=["distinctid1"], properties={}) + self._create_person(team_id=self.team.pk, distinct_ids=["distinctid2"], properties={}) + + self._create_event( + team=self.team, + event="sign up", + distinct_id="distinctid1", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-03T01:01:01Z", + person_id=person_id1, + ) + + self._create_event( + team=self.team, + event="sign up", + distinct_id="distinctid2", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-03T01:01:01Z", + person_id=person_id2, + ) + + create_person_id_override_by_distinct_id("distinctid1", "distinctid2", self.team.pk) + + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2020-01-03", + "date_to": "2020-01-03", + "events": [{"id": "sign up", "name": "sign up"}], + }, + ), + self.team, + ) + self.assertEqual(response[0]["data"], [2.0]) + + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2020-01-03", + "date_to": "2020-01-03", + "events": [{"id": "sign up", "name": "sign up", "math": "dau"}], + }, + ), + self.team, + ) + self.assertEqual(response[0]["data"], [1.0]) + + @override_settings(PERSON_ON_EVENTS_V2_OVERRIDE=True) + @snapshot_clickhouse_queries + def test_same_day_with_person_on_events_v2_latest_override(self): + # In this test we check that we always prioritize the latest override (based on the `version`) + # To do so, we first create an override to a person 2 that did not perform the event we're building + # the insight on, which should lead us to have 2 DAUs. We then create an override to a person 3 that did + # have the event, which should lead us to have 1 DAU only, since persons 1 and 3 are now the same person. + # Lastly, we create an override back to person 2 and check that DAUs go back to 2. + person_id1 = str(uuid.uuid4()) + person_id2 = str(uuid.uuid4()) + person_id3 = str(uuid.uuid4()) + + self._create_person(team_id=self.team.pk, distinct_ids=["distinctid1"], properties={}) + self._create_person(team_id=self.team.pk, distinct_ids=["distinctid2"], properties={}) + self._create_person(team_id=self.team.pk, distinct_ids=["distinctid3"], properties={}) + + self._create_event( + team=self.team, + event="sign up", + distinct_id="distinctid1", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-03T01:01:01Z", + person_id=person_id1, + ) + + self._create_event( + team=self.team, + event="some other event", + distinct_id="distinctid2", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-03T01:01:01Z", + person_id=person_id2, + ) + + self._create_event( + team=self.team, + event="sign up", + distinct_id="distinctid3", + properties={ + "$current_url": "first url", + "$browser": "Firefox", + "$os": "Mac", + }, + timestamp="2020-01-03T01:01:01Z", + person_id=person_id3, + ) + + create_person_id_override_by_distinct_id("distinctid1", "distinctid2", self.team.pk, 0) + + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2020-01-03", + "date_to": "2020-01-03", + "events": [{"id": "sign up", "name": "sign up", "math": "dau"}], + }, + ), + self.team, + ) + self.assertEqual(response[0]["data"], [2.0]) + + create_person_id_override_by_distinct_id("distinctid1", "distinctid3", self.team.pk, 1) + + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2020-01-03", + "date_to": "2020-01-03", + "events": [{"id": "sign up", "name": "sign up", "math": "dau"}], + }, + ), + self.team, + ) + self.assertEqual(response[0]["data"], [1.0]) + + create_person_id_override_by_distinct_id("distinctid1", "distinctid2", self.team.pk, 2) + + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2020-01-03", + "date_to": "2020-01-03", + "events": [{"id": "sign up", "name": "sign up", "math": "dau"}], + }, + ), + self.team, + ) + self.assertEqual(response[0]["data"], [2.0]) + + @also_test_with_materialized_columns(event_properties=["email", "name"], person_properties=["email", "name"]) + def test_ilike_regression_with_current_clickhouse_version(self): + # CH upgrade to 22.3 has this problem: https://github.com/ClickHouse/ClickHouse/issues/36279 + # While we're waiting to upgrade to a newer version, a workaround is to set `optimize_move_to_prewhere = 0` + # Only happens in the materialized version + + # The requirements to end up in this case is + # 1. Having a JOIN + # 2. Having multiple properties that filter on the same value + + with freeze_time("2020-01-04T13:01:01Z"): + self._run( + Filter( + team=self.team, + data={ + "date_from": "-14d", + "events": [ + { + "id": "watched movie", + "name": "watched movie", + "type": "events", + "order": 0, + } + ], + "properties": [ + { + "key": "email", + "type": "event", + "value": "posthog.com", + "operator": "not_icontains", + }, + { + "key": "name", + "type": "event", + "value": "posthog.com", + "operator": "not_icontains", + }, + { + "key": "name", + "type": "person", + "value": "posthog.com", + "operator": "not_icontains", + }, + ], + }, + ), + self.team, + ) + + @also_test_with_person_on_events_v2 + @snapshot_clickhouse_queries + def test_trends_count_per_user_average_daily(self): + self._create_event_count_per_actor_events() + + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_LINEAR, + "events": [{"id": "viewed video", "math": "avg_count_per_actor"}], + "date_from": "2020-01-01", + "date_to": "2020-01-07", + }, + ), + self.team, + ) + + assert len(daily_response) == 1 + assert daily_response[0]["days"] == [ + "2020-01-01", + "2020-01-02", + "2020-01-03", + "2020-01-04", + "2020-01-05", + "2020-01-06", + "2020-01-07", + ] + assert daily_response[0]["data"] == [1.5, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0] + + def test_trends_count_per_user_average_weekly(self): + self._create_event_count_per_actor_events() + + weekly_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_LINEAR, + "events": [{"id": "viewed video", "math": "avg_count_per_actor"}], + "date_from": "2020-01-01", + "date_to": "2020-01-07", + "interval": "week", + }, + ), + self.team, + ) + + assert len(weekly_response) == 1 + assert weekly_response[0]["days"] == ["2019-12-29", "2020-01-05"] + assert weekly_response[0]["data"] == [1.3333333333333333, 2.0] + + @also_test_with_person_on_events_v2 + @snapshot_clickhouse_queries + def test_trends_count_per_user_average_aggregated(self): + self._create_event_count_per_actor_events() + + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "events": [{"id": "viewed video", "math": "avg_count_per_actor"}], + "date_from": "2020-01-01", + "date_to": "2020-01-07", + }, + ), + self.team, + ) + + assert len(daily_response) == 1 + assert daily_response[0]["aggregated_value"] == 2.6666666666666665 # 8 events divided by 3 users + + def test_trends_count_per_user_maximum(self): + self._create_event_count_per_actor_events() + + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_LINEAR, + "events": [{"id": "viewed video", "math": "max_count_per_actor"}], + "date_from": "2020-01-01", + "date_to": "2020-01-07", + }, + ), + self.team, + ) + + assert len(daily_response) == 1 + assert daily_response[0]["days"] == [ + "2020-01-01", + "2020-01-02", + "2020-01-03", + "2020-01-04", + "2020-01-05", + "2020-01-06", + "2020-01-07", + ] + assert daily_response[0]["data"] == [2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0] + + def test_trends_count_per_user_average_with_event_property_breakdown(self): + self._create_event_count_per_actor_events() + + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_LINEAR, + "breakdown": "color", + "events": [{"id": "viewed video", "math": "avg_count_per_actor"}], + "date_from": "2020-01-01", + "date_to": "2020-01-07", + }, + ), + self.team, + ) + + assert len(daily_response) == 3 + assert daily_response[0]["breakdown_value"] == "red" + assert daily_response[1]["breakdown_value"] == "blue" + assert daily_response[2]["breakdown_value"] == "" + assert daily_response[0]["days"] == [ + "2020-01-01", + "2020-01-02", + "2020-01-03", + "2020-01-04", + "2020-01-05", + "2020-01-06", + "2020-01-07", + ] + assert daily_response[1]["days"] == daily_response[0]["days"] + assert daily_response[2]["days"] == daily_response[0]["days"] + assert daily_response[0]["data"] == [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0] # red + assert daily_response[1]["data"] == [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0] # blue + assert daily_response[2]["data"] == [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # none + + def test_trends_count_per_user_average_with_person_property_breakdown(self): + self._create_event_count_per_actor_events() + + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_LINEAR, + "breakdown": "fruit", + "breakdown_type": "person", + "events": [{"id": "viewed video", "math": "avg_count_per_actor"}], + "date_from": "2020-01-01", + "date_to": "2020-01-07", + }, + ), + self.team, + ) + + assert len(daily_response) == 2 + assert daily_response[0]["breakdown_value"] == "mango" + assert daily_response[1]["breakdown_value"] == "tomato" + assert daily_response[0]["days"] == [ + "2020-01-01", + "2020-01-02", + "2020-01-03", + "2020-01-04", + "2020-01-05", + "2020-01-06", + "2020-01-07", + ] + assert daily_response[1]["days"] == daily_response[0]["days"] + assert daily_response[0]["data"] == [2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0] # red + assert daily_response[1]["data"] == [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # blue + + def test_trends_count_per_user_average_aggregated_with_event_property_breakdown(self): + self._create_event_count_per_actor_events() + + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "breakdown": "color", + "events": [{"id": "viewed video", "math": "avg_count_per_actor"}], + "date_from": "2020-01-01", + "date_to": "2020-01-07", + }, + ), + self.team, + ) + + assert len(daily_response) == 3 + assert daily_response[0]["breakdown_value"] == "red" + assert daily_response[1]["breakdown_value"] == "blue" + assert daily_response[2]["breakdown_value"] == "" + assert daily_response[0]["aggregated_value"] == 2.0 # red + assert daily_response[1]["aggregated_value"] == 1.0 # blue + assert daily_response[2]["aggregated_value"] == 1.0 # none + + @snapshot_clickhouse_queries + def test_trends_count_per_user_average_aggregated_with_event_property_breakdown_with_sampling(self): + self._create_event_count_per_actor_events() + + daily_response = self._run( + Filter( + team=self.team, + data={ + "sampling_factor": 1, + "display": TRENDS_TABLE, + "breakdown": "color", + "events": [{"id": "viewed video", "math": "avg_count_per_actor"}], + "date_from": "2020-01-01", + "date_to": "2020-01-07", + }, + ), + self.team, + ) + + assert len(daily_response) == 3 + assert daily_response[0]["breakdown_value"] == "red" + assert daily_response[1]["breakdown_value"] == "blue" + assert daily_response[2]["breakdown_value"] == "" + assert daily_response[0]["aggregated_value"] == 2.0 # red + assert daily_response[1]["aggregated_value"] == 1.0 # blue + assert daily_response[2]["aggregated_value"] == 1.0 # none + + @snapshot_clickhouse_queries + def test_trends_count_per_group_average_daily(self): + self._create_event_count_per_actor_events() + GroupTypeMapping.objects.create(team=self.team, group_type="shape", group_type_index=0) + self._create_group(team_id=self.team.pk, group_type_index=0, group_key="bouba") + self._create_group(team_id=self.team.pk, group_type_index=0, group_key="kiki") + + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_LINEAR, + "events": [ + { + "id": "viewed video", + "math": "avg_count_per_actor", + "math_group_type_index": 0, + } + ], + "date_from": "2020-01-01", + "date_to": "2020-01-07", + }, + ), + self.team, + ) + + assert len(daily_response) == 1 + assert daily_response[0]["days"] == [ + "2020-01-01", + "2020-01-02", + "2020-01-03", + "2020-01-04", + "2020-01-05", + "2020-01-06", + "2020-01-07", + ] + assert daily_response[0]["data"] == [ + 3.0, # 3 group-assigned "viewed video" events by 2 persons / 1 group (bouba) + 0.0, # No events at all + 0.0, # No "viewed video" events + 1.0, # 1 group-assigned "viewed video" event by 1 person / 1 group (kiki) + 1.5, # 3 group-assigned "viewed video" events by 1 person / 2 groups (bouba, kiki) + # The group-less event is ignored! + 0.0, # No events at all + 0.0, # No events at all + ] + + @snapshot_clickhouse_queries + def test_trends_count_per_group_average_aggregated(self): + self._create_event_count_per_actor_events() + GroupTypeMapping.objects.create(team=self.team, group_type="shape", group_type_index=0) + self._create_group(team_id=self.team.pk, group_type_index=0, group_key="bouba") + self._create_group(team_id=self.team.pk, group_type_index=0, group_key="kiki") + + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_TABLE, + "events": [ + { + "id": "viewed video", + "math": "avg_count_per_actor", + "math_group_type_index": 0, + } + ], + "date_from": "2020-01-01", + "date_to": "2020-01-07", + }, + ), + self.team, + ) + + assert len(daily_response) == 1 + assert daily_response[0]["aggregated_value"] == 3.5 # 7 relevant events divided by 2 groups + + def test_trends_breakdown_timezone(self): + self.team.timezone = "US/Pacific" + self.team.save() + self._create_event_count_per_actor_events() + + with freeze_time("2020-01-03 19:06:34"): + self._create_person(team_id=self.team.pk, distinct_ids=["another_user"]) + self._create_event( + team=self.team, + event="viewed video", + distinct_id="another_user", + properties={"color": "orange"}, + ) + + daily_response = self._run( + Filter( + team=self.team, + data={ + "display": TRENDS_LINEAR, + "events": [{"id": "viewed video", "math": "dau"}], + "breakdown": "color", + "date_from": "2020-01-01", + "date_to": "2020-03-07", + "interval": "month", + }, + ), + self.team, + ) + + # assert len(daily_response) == 4 + assert daily_response[0]["days"] == ["2020-01-01", "2020-02-01", "2020-03-01"] + assert daily_response[1]["days"] == ["2020-01-01", "2020-02-01", "2020-03-01"] + assert daily_response[2]["days"] == ["2020-01-01", "2020-02-01", "2020-03-01"] + + def _create_groups(self): + GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=0) + GroupTypeMapping.objects.create(team=self.team, group_type="company", group_type_index=1) + + self._create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="org:5", + properties={"industry": "finance"}, + ) + self._create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="org:6", + properties={"industry": "technology"}, + ) + self._create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="org:7", + properties={"industry": "finance"}, + ) + self._create_group( + team_id=self.team.pk, + group_type_index=1, + group_key="company:10", + properties={"industry": "finance"}, + ) + + # TODO: Delete this test when moved to person-on-events + def test_breakdown_with_filter_groups(self): + self._create_groups() + + self._create_event( + event="sign up", + distinct_id="person1", + team=self.team, + properties={"key": "oh", "$group_0": "org:7", "$group_1": "company:10"}, + timestamp="2020-01-02T12:00:00Z", + ) + self._create_event( + event="sign up", + distinct_id="person1", + team=self.team, + properties={"key": "uh", "$group_0": "org:5"}, + timestamp="2020-01-02T12:00:01Z", + ) + self._create_event( + event="sign up", + distinct_id="person1", + team=self.team, + properties={"key": "uh", "$group_0": "org:6"}, + timestamp="2020-01-02T12:00:02Z", + ) + + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12T00:00:00Z", + "breakdown": "key", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + } + ], + "properties": [ + { + "key": "industry", + "value": "finance", + "type": "group", + "group_type_index": 0, + } + ], + }, + ), + self.team, + ) + + self.assertEqual(len(response), 2) + self.assertEqual(response[0]["breakdown_value"], "oh") + self.assertEqual(response[0]["count"], 1) + self.assertEqual(response[1]["breakdown_value"], "uh") + self.assertEqual(response[1]["count"], 1) + + @also_test_with_materialized_columns( + event_properties=["key"], + group_properties=[(0, "industry")], + materialize_only_with_person_on_events=True, + ) + @snapshot_clickhouse_queries + def test_breakdown_with_filter_groups_person_on_events(self): + self._create_groups() + + self._create_event( + event="sign up", + distinct_id="person1", + team=self.team, + properties={"key": "oh", "$group_0": "org:7", "$group_1": "company:10"}, + timestamp="2020-01-02T12:00:00Z", + ) + self._create_event( + event="sign up", + distinct_id="person1", + team=self.team, + properties={"key": "uh", "$group_0": "org:5"}, + timestamp="2020-01-02T12:00:01Z", + ) + self._create_event( + event="sign up", + distinct_id="person1", + team=self.team, + properties={"key": "uh", "$group_0": "org:6"}, + timestamp="2020-01-02T12:00:02Z", + ) + + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12T00:00:00Z", + "breakdown": "key", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + } + ], + "properties": [ + { + "key": "industry", + "value": "finance", + "type": "group", + "group_type_index": 0, + } + ], + }, + ), + self.team, + ) + + self.assertEqual(len(response), 2) + self.assertEqual(response[0]["breakdown_value"], "oh") + self.assertEqual(response[0]["count"], 1) + self.assertEqual(response[1]["breakdown_value"], "uh") + self.assertEqual(response[1]["count"], 1) + + @override_settings(PERSON_ON_EVENTS_V2_OVERRIDE=True) + @snapshot_clickhouse_queries + def test_breakdown_with_filter_groups_person_on_events_v2(self): + self._create_groups() + + id1 = str(uuid.uuid4()) + id2 = str(uuid.uuid4()) + self._create_event( + event="sign up", + distinct_id="test_breakdown_d1", + team=self.team, + properties={"key": "oh", "$group_0": "org:7", "$group_1": "company:10"}, + timestamp="2020-01-02T12:00:00Z", + person_id=id1, + ) + self._create_event( + event="sign up", + distinct_id="test_breakdown_d1", + team=self.team, + properties={"key": "uh", "$group_0": "org:5"}, + timestamp="2020-01-02T12:00:01Z", + person_id=id1, + ) + self._create_event( + event="sign up", + distinct_id="test_breakdown_d1", + team=self.team, + properties={"key": "uh", "$group_0": "org:6"}, + timestamp="2020-01-02T12:00:02Z", + person_id=id1, + ) + self._create_event( + event="sign up", + distinct_id="test_breakdown_d2", + team=self.team, + properties={"key": "uh", "$group_0": "org:6"}, + timestamp="2020-01-02T12:00:02Z", + person_id=id2, + ) + + create_person_id_override_by_distinct_id("test_breakdown_d1", "test_breakdown_d2", self.team.pk) + response = self._run( + Filter( + team=self.team, + data={ + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12T00:00:00Z", + "breakdown": "key", + "events": [ + { + "id": "sign up", + "name": "sign up", + "type": "events", + "order": 0, + "math": "dau", + } + ], + "properties": [ + { + "key": "industry", + "value": "finance", + "type": "group", + "group_type_index": 0, + } + ], + }, + ), + self.team, + ) + + self.assertEqual(len(response), 2) + self.assertEqual(response[0]["breakdown_value"], "oh") + self.assertEqual(response[0]["count"], 1) + self.assertEqual(response[1]["breakdown_value"], "uh") + self.assertEqual(response[1]["count"], 1) + + # TODO: Delete this test when moved to person-on-events + def test_breakdown_by_group_props(self): + self._create_groups() + + journey = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 12), + "properties": {"$group_0": "org:5"}, + "group0_properties": {"industry": "finance"}, + }, + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$group_0": "org:6"}, + "group0_properties": {"industry": "technology"}, + }, + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$group_0": "org:7", "$group_1": "company:10"}, + "group0_properties": {"industry": "finance"}, + "group1_properties": {"industry": "finance"}, + }, + ] + } + + journeys_for(events_by_person=journey, team=self.team) + + filter = Filter( + team=self.team, + data={ + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12", + "breakdown": "industry", + "breakdown_type": "group", + "breakdown_group_type_index": 0, + "events": [{"id": "sign up", "name": "sign up", "type": "events", "order": 0}], + }, + ) + response = self._run(filter, self.team) + + self.assertEqual(len(response), 2) + self.assertEqual(response[0]["breakdown_value"], "finance") + self.assertEqual(response[0]["count"], 2) + self.assertEqual(response[1]["breakdown_value"], "technology") + self.assertEqual(response[1]["count"], 1) + + filter = filter.shallow_clone( + { + "breakdown_value": "technology", + "date_from": "2020-01-02T00:00:00Z", + "date_to": "2020-01-03", + } + ) + entity = Entity({"id": "sign up", "name": "sign up", "type": "events", "order": 0}) + res = self._get_trend_people(filter, entity) + + self.assertEqual(res[0]["distinct_ids"], ["person1"]) + + @also_test_with_materialized_columns( + group_properties=[(0, "industry")], materialize_only_with_person_on_events=True + ) + @snapshot_clickhouse_queries + def test_breakdown_by_group_props_person_on_events(self): + self._create_groups() + + journey = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 12), + "properties": {"$group_0": "org:5"}, + "group0_properties": {"industry": "finance"}, + }, + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$group_0": "org:6"}, + "group0_properties": {"industry": "technology"}, + }, + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$group_0": "org:7", "$group_1": "company:10"}, + "group0_properties": {"industry": "finance"}, + "group1_properties": {"industry": "finance"}, + }, + ] + } + + journeys_for(events_by_person=journey, team=self.team) + + filter = Filter( + team=self.team, + data={ + "date_from": "2020-01-01", + "date_to": "2020-01-12", + "breakdown": "industry", + "breakdown_type": "group", + "breakdown_group_type_index": 0, + "events": [{"id": "sign up", "name": "sign up", "type": "events", "order": 0}], + }, + ) + + with override_instance_config("PERSON_ON_EVENTS_ENABLED", True): + response = self._run(filter, self.team) + + self.assertEqual(len(response), 2) + self.assertEqual(response[0]["breakdown_value"], "finance") + self.assertEqual(response[0]["count"], 2) + self.assertEqual(response[1]["breakdown_value"], "technology") + self.assertEqual(response[1]["count"], 1) + + filter = filter.shallow_clone( + { + "breakdown_value": "technology", + "date_from": "2020-01-02T00:00:00Z", + "date_to": "2020-01-02", + } + ) + entity = Entity({"id": "sign up", "name": "sign up", "type": "events", "order": 0}) + res = self._get_trend_people(filter, entity) + + self.assertEqual(res[0]["distinct_ids"], ["person1"]) + + # TODO: Delete this test when moved to person-on-events + def test_breakdown_by_group_props_with_person_filter(self): + self._create_groups() + + Person.objects.create(team_id=self.team.pk, distinct_ids=["person1"], properties={"key": "value"}) + + self._create_event( + event="sign up", + distinct_id="person1", + team=self.team, + properties={"$group_0": "org:5"}, + timestamp="2020-01-02T12:00:00Z", + person_properties={"key": "value"}, + group0_properties={"industry": "finance"}, + ) + self._create_event( + event="sign up", + distinct_id="person2", + team=self.team, + properties={"$group_0": "org:6"}, + timestamp="2020-01-02T12:00:00Z", + person_properties={}, + group0_properties={"industry": "technology"}, + ) + + filter = Filter( + team=self.team, + data={ + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12T00:00:00Z", + "breakdown": "industry", + "breakdown_type": "group", + "breakdown_group_type_index": 0, + "events": [{"id": "sign up", "name": "sign up", "type": "events", "order": 0}], + "properties": [{"key": "key", "value": "value", "type": "person"}], + }, + ) + + response = self._run(filter, self.team) + + self.assertEqual(len(response), 1) + self.assertEqual(response[0]["breakdown_value"], "finance") + self.assertEqual(response[0]["count"], 1) + + # TODO: Delete this test when moved to person-on-events + def test_filtering_with_group_props(self): + self._create_groups() + + Person.objects.create(team_id=self.team.pk, distinct_ids=["person1"], properties={"key": "value"}) + self._create_event( + event="$pageview", + distinct_id="person1", + team=self.team, + timestamp="2020-01-02T12:00:00Z", + ) + self._create_event( + event="$pageview", + distinct_id="person1", + team=self.team, + properties={"$group_0": "org:5"}, + timestamp="2020-01-02T12:00:00Z", + ) + self._create_event( + event="$pageview", + distinct_id="person1", + team=self.team, + properties={"$group_0": "org:6"}, + timestamp="2020-01-02T12:00:00Z", + ) + self._create_event( + event="$pageview", + distinct_id="person1", + team=self.team, + properties={"$group_0": "org:6", "$group_1": "company:10"}, + timestamp="2020-01-02T12:00:00Z", + ) + + filter = Filter( + team=self.team, + data={ + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12T00:00:00Z", + "events": [{"id": "$pageview", "type": "events", "order": 0}], + "properties": [ + { + "key": "industry", + "value": "finance", + "type": "group", + "group_type_index": 0, + }, + {"key": "key", "value": "value", "type": "person"}, + ], + }, + ) + + response = self._run(filter, self.team) + self.assertEqual(response[0]["count"], 1) + + def test_filtering_with_group_props_event_with_no_group_data(self): + self._create_groups() + + Person.objects.create(team_id=self.team.pk, distinct_ids=["person1"], properties={"key": "value"}) + self._create_event( + event="$pageview", + distinct_id="person1", + team=self.team, + timestamp="2020-01-02T12:00:00Z", + ) + self._create_event( + event="$pageview", + distinct_id="person1", + team=self.team, + timestamp="2020-01-02T12:00:00Z", + ) + self._create_event( + event="$pageview", + distinct_id="person1", + team=self.team, + timestamp="2020-01-02T12:00:00Z", + ) + self._create_event( + event="$pageview", + distinct_id="person1", + team=self.team, + timestamp="2020-01-02T12:00:00Z", + ) + + filter = Filter( + team=self.team, + data={ + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12T00:00:00Z", + "events": [{"id": "$pageview", "type": "events", "order": 0}], + "properties": [ + { + "key": "industry", + "operator": "is_not", + "value": "textiles", + "type": "group", + "group_type_index": 0, + }, + {"key": "key", "value": "value", "type": "person"}, + ], + }, + ) + + response = self._run(filter, self.team) + + # we include all 4 events even though they do not have an associated group since the filter is a negative + # i.e. "industry is not textiles" includes both events associated with a group that has the property "industry" + # set to a value other than textiles AND events with no group at all + self.assertEqual(response[0]["count"], 4) + + @also_test_with_materialized_columns( + person_properties=["key"], + group_properties=[(0, "industry")], + materialize_only_with_person_on_events=True, + ) + @snapshot_clickhouse_queries + def test_breakdown_by_group_props_with_person_filter_person_on_events(self): + self._create_groups() + + Person.objects.create(team_id=self.team.pk, distinct_ids=["person1"], properties={"key": "value"}) + + self._create_event( + event="sign up", + distinct_id="person1", + team=self.team, + properties={"$group_0": "org:5"}, + timestamp="2020-01-02T12:00:00Z", + person_properties={"key": "value"}, + group0_properties={"industry": "finance"}, + ) + self._create_event( + event="sign up", + distinct_id="person2", + team=self.team, + properties={"$group_0": "org:6"}, + timestamp="2020-01-02T12:00:00Z", + person_properties={}, + group0_properties={"industry": "technology"}, + ) + + filter = Filter( + team=self.team, + data={ + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12T00:00:00Z", + "breakdown": "industry", + "breakdown_type": "group", + "breakdown_group_type_index": 0, + "events": [{"id": "sign up", "name": "sign up", "type": "events", "order": 0}], + "properties": [{"key": "key", "value": "value", "type": "person"}], + }, + ) + + with override_instance_config("PERSON_ON_EVENTS_ENABLED", True): + response = self._run(filter, self.team) + + self.assertEqual(len(response), 1) + self.assertEqual(response[0]["breakdown_value"], "finance") + self.assertEqual(response[0]["count"], 1) + + @also_test_with_materialized_columns( + person_properties=["key"], + group_properties=[(0, "industry")], + materialize_only_with_person_on_events=True, + ) + @snapshot_clickhouse_queries + def test_filtering_with_group_props_person_on_events(self): + self._create_groups() + + Person.objects.create(team_id=self.team.pk, distinct_ids=["person1"], properties={"key": "value"}) + self._create_event( + event="$pageview", + distinct_id="person1", + team=self.team, + timestamp="2020-01-02T12:00:00Z", + ) + self._create_event( + event="$pageview", + distinct_id="person1", + team=self.team, + properties={"$group_0": "org:5"}, + timestamp="2020-01-02T12:00:00Z", + ) + self._create_event( + event="$pageview", + distinct_id="person1", + team=self.team, + properties={"$group_0": "org:6"}, + timestamp="2020-01-02T12:00:00Z", + ) + self._create_event( + event="$pageview", + distinct_id="person1", + team=self.team, + properties={"$group_0": "org:6", "$group_1": "company:10"}, + timestamp="2020-01-02T12:00:00Z", + ) + + filter = Filter( + team=self.team, + data={ + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12T00:00:00Z", + "events": [{"id": "$pageview", "type": "events", "order": 0}], + "properties": [ + { + "key": "industry", + "value": "finance", + "type": "group", + "group_type_index": 0, + }, + {"key": "key", "value": "value", "type": "person"}, + ], + }, + ) + + with override_instance_config("PERSON_ON_EVENTS_ENABLED", True): + response = self._run(filter, self.team) + self.assertEqual(response[0]["count"], 1) + + @also_test_with_materialized_columns( + group_properties=[(0, "industry"), (2, "name")], + materialize_only_with_person_on_events=True, + ) + @snapshot_clickhouse_queries + def test_filtering_by_multiple_groups_person_on_events(self): + GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=0) + GroupTypeMapping.objects.create(team=self.team, group_type="company", group_type_index=2) + + self._create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="org:5", + properties={"industry": "finance"}, + ) + self._create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="org:6", + properties={"industry": "technology"}, + ) + self._create_group( + team_id=self.team.pk, + group_type_index=2, + group_key="company:5", + properties={"name": "five"}, + ) + self._create_group( + team_id=self.team.pk, + group_type_index=2, + group_key="company:6", + properties={"name": "six"}, + ) + + journey = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 12), + "properties": {"$group_0": "org:5", "$group_2": "company:6"}, + }, + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 12, 30), + "properties": {"$group_2": "company:6"}, + }, + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$group_0": "org:6"}, + }, + { + "event": "sign up", + "timestamp": datetime(2020, 1, 3, 15), + "properties": {"$group_2": "company:5"}, + }, + ] + } + + journeys_for(events_by_person=journey, team=self.team) + + filter = Filter( + team=self.team, + data={ + "date_from": "2020-01-01T00:00:00Z", + "date_to": "2020-01-12", + "events": [{"id": "sign up", "name": "sign up", "type": "events", "order": 0}], + "properties": [ + { + "key": "industry", + "value": "finance", + "type": "group", + "group_type_index": 0, + }, + { + "key": "name", + "value": "six", + "type": "group", + "group_type_index": 2, + }, + ], + }, + ) + + with override_instance_config("PERSON_ON_EVENTS_ENABLED", True): + response = self._run(filter, self.team) + + self.assertEqual(len(response), 1) + self.assertEqual(response[0]["count"], 1) + self.assertEqual( + response[0]["data"], + [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + ) + + filter = filter.shallow_clone({"date_from": "2020-01-02T00:00:00Z", "date_to": "2020-01-02T00:00:00Z"}) + entity = Entity({"id": "sign up", "name": "sign up", "type": "events", "order": 0}) + res = self._get_trend_people(filter, entity) + + self.assertEqual(res[0]["distinct_ids"], ["person1"]) diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index ff013658d021e..7e26cf356bb4f 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -238,10 +238,10 @@ def get_value(name: str, val: Any): series_object["breakdown_value"] = remapped_label elif self.query.breakdown.breakdown_type == "cohort": cohort_id = get_value("breakdown_value", val) - cohort_name = Cohort.objects.get(pk=cohort_id).name + cohort_name = "all users" if cohort_id == 0 else Cohort.objects.get(pk=cohort_id).name series_object["label"] = "{} - {}".format(series_object["label"], cohort_name) - series_object["breakdown_value"] = get_value("breakdown_value", val) + series_object["breakdown_value"] = "all" if cohort_id == 0 else cohort_id else: series_object["label"] = "{} - {}".format(series_object["label"], get_value("breakdown_value", val)) series_object["breakdown_value"] = get_value("breakdown_value", val) From cafd6652edb47bf163a1d48b43e9682a56c45679 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Fri, 10 Nov 2023 09:48:08 +0000 Subject: [PATCH 03/14] WIP --- .../insights/trends/aggregation_operations.py | 49 +- .../insights/trends/query_builder.py | 42 +- .../test/__snapshots__/test_trends.ambr | 994 ++++++++++++++++++ .../insights/trends/test/test_trends.py | 28 +- .../hogql_queries/utils/query_date_range.py | 33 +- 5 files changed, 1095 insertions(+), 51 deletions(-) create mode 100644 posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr diff --git a/posthog/hogql_queries/insights/trends/aggregation_operations.py b/posthog/hogql_queries/insights/trends/aggregation_operations.py index e0a0efbb21321..a54d097562f70 100644 --- a/posthog/hogql_queries/insights/trends/aggregation_operations.py +++ b/posthog/hogql_queries/insights/trends/aggregation_operations.py @@ -164,17 +164,25 @@ def _parent_select_query( placeholders={"inner_query": inner_query}, ) + day_start = ast.Alias( + alias="day_start", + expr=ast.Call( + name=f"toStartOf{self.query_date_range.interval_name.title()}", args=[ast.Field(chain=["timestamp"])] + ), + ) + return parse_select( """ SELECT counts AS total, - dateTrunc({interval}, timestamp) AS day_start + {day_start} FROM {inner_query} WHERE timestamp >= {date_from} AND timestamp <= {date_to} """, placeholders={ **self.query_date_range.to_placeholders(), "inner_query": inner_query, + "day_start": day_start, }, ) @@ -221,9 +229,9 @@ def _inner_select_query( COUNT(DISTINCT actor_id) AS counts FROM ( SELECT - toStartOfDay({date_to}) - toIntervalDay(number) AS timestamp + {date_to_start_of_interval} - {number_interval_period} AS timestamp FROM - numbers(dateDiff('day', toStartOfDay({date_from} - {inclusive_lookback}), {date_to})) + numbers(dateDiff({interval}, {date_from_start_of_interval} - {inclusive_lookback}, {date_to})) ) d CROSS JOIN {cross_join_select_query} e WHERE @@ -242,21 +250,48 @@ def _inner_select_query( def _events_query( self, events_where_clause: ast.Expr, sample_value: ast.RatioExpr ) -> ast.SelectQuery | ast.SelectUnionQuery: + date_filters = [ + parse_expr( + "timestamp >= {date_from} - {inclusive_lookback}", + placeholders={ + **self.query_date_range.to_placeholders(), + **self._interval_placeholders(), + }, + ), + parse_expr( + "timestamp <= {date_to}", + placeholders={ + **self.query_date_range.to_placeholders(), + **self._interval_placeholders(), + }, + ), + ] + + where_clause_combined = ast.And(exprs=[events_where_clause, *date_filters]) + if self._is_count_per_actor_variant(): + day_start = ast.Alias( + alias="day_start", + expr=ast.Call( + name=f"toStartOf{self.query_date_range.interval_name.title()}", + args=[ast.Field(chain=["timestamp"])], + ), + ) + return parse_select( """ SELECT count(e.uuid) AS total, - dateTrunc({interval}, timestamp) AS day_start + {day_start} FROM events AS e SAMPLE {sample} WHERE {events_where_clause} GROUP BY e.person_id, day_start """, placeholders={ - **self.query_date_range.to_placeholders(), - "events_where_clause": events_where_clause, + "events_where_clause": where_clause_combined, "sample": sample_value, + "day_start": day_start, }, ) @@ -274,7 +309,7 @@ def _events_query( actor_id """, placeholders={ - "events_where_clause": events_where_clause, + "events_where_clause": where_clause_combined, "sample": sample_value, }, ) diff --git a/posthog/hogql_queries/insights/trends/query_builder.py b/posthog/hogql_queries/insights/trends/query_builder.py index ce5268fd1fec6..30e3e206f423a 100644 --- a/posthog/hogql_queries/insights/trends/query_builder.py +++ b/posthog/hogql_queries/insights/trends/query_builder.py @@ -64,7 +64,7 @@ def _get_date_subqueries(self) -> List[ast.SelectQuery]: """ SELECT 0 AS total, - dateTrunc({interval}, {date_to}) - {number_interval_period} AS day_start + {date_to_start_of_interval} - {number_interval_period} AS day_start FROM numbers( coalesce(dateDiff({interval}, {date_from}, {date_to}), 0) @@ -81,7 +81,7 @@ def _get_date_subqueries(self) -> List[ast.SelectQuery]: """ SELECT 0 AS total, - {date_from} AS day_start + {date_from_start_of_interval} AS day_start """, placeholders={ **self.query_date_range.to_placeholders(), @@ -101,7 +101,7 @@ def _get_date_subqueries(self) -> List[ast.SelectQuery]: breakdown_value FROM ( SELECT - dateTrunc({interval}, {date_to}) - {number_interval_period} AS day_start + {date_to_start_of_interval} - {number_interval_period} AS day_start FROM numbers( coalesce(dateDiff({interval}, {date_from}, {date_to}), 0) @@ -127,23 +127,30 @@ def _get_date_subqueries(self) -> List[ast.SelectQuery]: ] def _get_events_subquery(self, no_modifications: Optional[bool]) -> ast.SelectQuery: + day_start = ast.Alias( + alias="day_start", + expr=ast.Call( + name=f"toStartOf{self.query_date_range.interval_name.title()}", args=[ast.Field(chain=["timestamp"])] + ), + ) + default_query = cast( ast.SelectQuery, parse_select( """ SELECT {aggregation_operation} AS total, - dateTrunc({interval}, timestamp) AS day_start + {day_start} FROM events AS e SAMPLE {sample} WHERE {events_filter} GROUP BY day_start """, placeholders={ - **self.query_date_range.to_placeholders(), "events_filter": self._events_filter(), "aggregation_operation": self._aggregation_operation.select_aggregation(), "sample": self._sample_value(), + "day_start": day_start, }, ), ) @@ -239,18 +246,19 @@ def _events_filter(self) -> ast.Expr: filters: List[ast.Expr] = [] # Dates - filters.extend( - [ - parse_expr( - "timestamp >= {date_from}", - placeholders=self.query_date_range.to_placeholders(), - ), - parse_expr( - "timestamp <= {date_to}", - placeholders=self.query_date_range.to_placeholders(), - ), - ] - ) + if not self._aggregation_operation.requires_query_orchestration(): + filters.extend( + [ + parse_expr( + "timestamp >= {date_from}", + placeholders=self.query_date_range.to_placeholders(), + ), + parse_expr( + "timestamp <= {date_to}", + placeholders=self.query_date_range.to_placeholders(), + ), + ] + ) # Series if series_event_name(self.series) is not None: diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr new file mode 100644 index 0000000000000..bec610f25aeca --- /dev/null +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -0,0 +1,994 @@ +# name: TestTrends.test_breakdown_by_group_props_person_on_events + ' + SELECT groupArray(value) + FROM + (SELECT e__group_0.properties___industry AS value, + count(e.uuid) AS count + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), notEmpty(e.person_id), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_breakdown_by_group_props_person_on_events.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT ['finance', 'technology'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + e__group_0.properties___industry AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(in(e__group_0.properties___industry, ['finance', 'technology']), 0)) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_breakdown_by_group_props_person_on_events.2 + ' + /* user_id:0 request:_snapshot_ */ + SELECT person_id AS actor_id, + count() AS actor_value + FROM + (SELECT e.timestamp as timestamp, + e.person_id as person_id, + e.distinct_id as distinct_id, + e.team_id as team_id + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event = 'sign up' + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-02 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-02 23:59:59', 'UTC') + AND (has(['technology'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) + AND notEmpty(e.person_id) ) + GROUP BY actor_id + ORDER BY actor_value DESC, + actor_id DESC + LIMIT 100 + OFFSET 0 + ' +--- +# name: TestTrends.test_timezones_daily + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.2 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.4 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), notEmpty(e__pdi.person_id), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.5 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT ['Mac'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), ['Mac']), 0)) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'America/Phoenix'))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.2 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'America/Phoenix') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.4 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), notEmpty(e__pdi.person_id), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.5 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT ['Mac'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), ['Mac']), 0)) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'Asia/Tokyo'))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.2 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'Asia/Tokyo') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.4 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), notEmpty(e__pdi.person_id), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.5 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT ['Mac'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), ['Mac']), 0)) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_any_event_total_count + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_any_event_total_count.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_per_day_cumulative + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT day_start, + sum(count) OVER ( + ORDER BY day_start ASC) AS count + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC)) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_weekly_active_users_daily + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_weekly_active_users_daily_minus_utc + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'America/Phoenix'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'America/Phoenix')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'America/Phoenix') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'America/Phoenix')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'America/Phoenix'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'America/Phoenix'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'America/Phoenix'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_weekly_active_users_hourly + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 17:00:00', 6, 'UTC'))), toIntervalHour(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 06:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 17:00:00', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 06:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfHour(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 17:00:00', 6, 'UTC'))), toIntervalHour(numbers.number)) AS timestamp + FROM numbers(dateDiff('hour', minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 06:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 17:00:00', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 06:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 17:00:00', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 06:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 17:00:00', 6, 'UTC'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_weekly_active_users_weekly + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'UTC')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), 0) AS day_start + UNION ALL SELECT counts AS total, + toStartOfWeek(timestamp, 0) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'UTC')), 0), toIntervalWeek(numbers.number)) AS timestamp + FROM numbers(dateDiff('week', minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), 0), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'UTC'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_weekly_active_users_weekly_minus_utc + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'America/Phoenix')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), 0) AS day_start + UNION ALL SELECT counts AS total, + toStartOfWeek(timestamp, 0) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'America/Phoenix')), 0), toIntervalWeek(numbers.number)) AS timestamp + FROM numbers(dateDiff('week', minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), 0), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'America/Phoenix')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'America/Phoenix') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'America/Phoenix'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'America/Phoenix'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- diff --git a/posthog/hogql_queries/insights/trends/test/test_trends.py b/posthog/hogql_queries/insights/trends/test/test_trends.py index d7ce9a0be2958..fae6358bd5744 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends.py @@ -5600,6 +5600,9 @@ def test_breakdown_user_props_with_filter(self): team=self.team, properties={"key": "val"}, ) + + flush_persons_and_events() + response = self._run( Filter( team=self.team, @@ -6009,31 +6012,6 @@ def test_weekly_active_users_aggregated_range_narrower_than_week(self): # All were active on 2020-01-12 or in the preceding 6 days self.assertEqual(result[0]["aggregated_value"], 3) - @also_test_with_different_timezones - @snapshot_clickhouse_queries - def test_weekly_active_users_monthly(self): - self._create_active_users_events() - - data = { - "date_from": "2019-12-01", - "date_to": "2020-02-29", # T'was a leap year - "interval": "month", - "events": [ - { - "id": "$pageview", - "type": "events", - "order": 0, - "math": "weekly_active", - } - ], - } - - filter = Filter(team=self.team, data=data) - result = self._run(filter, self.team) - self.assertEqual(result[0]["days"], ["2019-12-01", "2020-01-01", "2020-02-01"]) - # No users fall into the period of 7 days during or before the first day of any of those three months - self.assertEqual(result[0]["data"], [0.0, 0.0, 0.0]) - @also_test_with_different_timezones @snapshot_clickhouse_queries def test_weekly_active_users_daily(self): diff --git a/posthog/hogql_queries/utils/query_date_range.py b/posthog/hogql_queries/utils/query_date_range.py index e10bf90a2ea50..c076b7ddd1214 100644 --- a/posthog/hogql_queries/utils/query_date_range.py +++ b/posthog/hogql_queries/utils/query_date_range.py @@ -1,12 +1,13 @@ import re from datetime import datetime from functools import cached_property -from typing import Optional, Dict, List +from typing import Literal, Optional, Dict, List from zoneinfo import ZoneInfo from dateutil.relativedelta import relativedelta from posthog.hogql.ast import CompareOperationOp +from posthog.hogql.errors import HogQLException from posthog.hogql.parser import ast from posthog.models.team import Team from posthog.queries.util import get_earliest_timestamp @@ -114,7 +115,7 @@ def interval_type(self) -> IntervalType: return self._interval or IntervalType.day @cached_property - def interval_name(self) -> str: + def interval_name(self) -> Literal["hour", "day", "week", "month"]: return self.interval_type.name def date_to_as_hogql(self) -> ast.Expr: @@ -155,6 +156,32 @@ def number_interval_periods(self) -> ast.Expr: def interval_period_string_as_hogql_constant(self) -> ast.Expr: return ast.Constant(value=self.interval_name) + def date_from_to_start_of_week_hogql(self) -> ast.Call: + match self.interval_name: + case "hour": + return ast.Call(name="toStartOfHour", args=[self.date_from_as_hogql()]) + case "day": + return ast.Call(name="toStartOfDay", args=[self.date_from_as_hogql()]) + case "week": + return ast.Call(name="toStartOfWeek", args=[self.date_from_as_hogql()]) + case "month": + return ast.Call(name="toStartOfMonth", args=[self.date_from_as_hogql()]) + case _: + raise HogQLException(message="Unknown interval name") + + def date_to_to_start_of_week_hogql(self) -> ast.Call: + match self.interval_name: + case "hour": + return ast.Call(name="toStartOfHour", args=[self.date_to_as_hogql()]) + case "day": + return ast.Call(name="toStartOfDay", args=[self.date_to_as_hogql()]) + case "week": + return ast.Call(name="toStartOfWeek", args=[self.date_to_as_hogql()]) + case "month": + return ast.Call(name="toStartOfMonth", args=[self.date_to_as_hogql()]) + case _: + raise HogQLException(message="Unknown interval name") + def to_placeholders(self) -> Dict[str, ast.Expr]: return { "interval": self.interval_period_string_as_hogql_constant(), @@ -162,6 +189,8 @@ def to_placeholders(self) -> Dict[str, ast.Expr]: "number_interval_period": self.number_interval_periods(), "date_from": self.date_from_as_hogql(), "date_to": self.date_to_as_hogql(), + "date_from_start_of_interval": self.date_from_to_start_of_week_hogql(), + "date_to_start_of_interval": self.date_to_to_start_of_week_hogql(), } def to_properties(self, field: Optional[List[str]] = None) -> List[ast.Expr]: From b75a63fdf97d80d3eb8583b7cd0ca58a7b4dd2c9 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Tue, 14 Nov 2023 12:00:46 +0000 Subject: [PATCH 04/14] WIP --- posthog/hogql/property.py | 4 +- .../insights/trends/aggregation_operations.py | 63 +- .../insights/trends/breakdown.py | 6 +- .../insights/trends/breakdown_values.py | 38 +- .../hogql_queries/insights/trends/display.py | 1 + .../insights/trends/query_builder.py | 67 +- .../test/__snapshots__/test_trends.ambr | 2214 ++++++++++++++--- .../insights/trends/test/test_trends.py | 632 ++--- .../insights/trends/test/test_utils.py | 6 +- .../insights/trends/trends_query_runner.py | 16 +- .../hogql_queries/insights/trends/utils.py | 2 +- 11 files changed, 2229 insertions(+), 820 deletions(-) diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py index 9d619c23175b6..410e464049f94 100644 --- a/posthog/hogql/property.py +++ b/posthog/hogql/property.py @@ -290,7 +290,9 @@ def property_to_expr( # TODO: Add support for these types "group", "recording", "behavioral", and "session" types - raise NotImplementedException(f"property_to_expr not implemented for filter type {type(property).__name__}") + raise NotImplementedException( + f"property_to_expr not implemented for filter type {type(property).__name__} and {property.type}" + ) def action_to_expr(action: Action) -> ast.Expr: diff --git a/posthog/hogql_queries/insights/trends/aggregation_operations.py b/posthog/hogql_queries/insights/trends/aggregation_operations.py index a54d097562f70..422633b197743 100644 --- a/posthog/hogql_queries/insights/trends/aggregation_operations.py +++ b/posthog/hogql_queries/insights/trends/aggregation_operations.py @@ -13,7 +13,7 @@ class QueryAlternator: _group_bys: List[ast.Expr] _select_from: ast.JoinExpr | None - def __init__(self, query: ast.SelectQuery): + def __init__(self, query: ast.SelectQuery | ast.SelectUnionQuery): assert isinstance(query, ast.SelectQuery) self._query = query @@ -21,7 +21,7 @@ def __init__(self, query: ast.SelectQuery): self._group_bys = [] self._select_from = None - def build(self) -> ast.SelectQuery: + def build(self) -> ast.SelectQuery | ast.SelectUnionQuery: if len(self._selects) > 0: self._query.select.extend(self._selects) @@ -49,10 +49,14 @@ def replace_select_from(self, join_expr: ast.JoinExpr) -> None: class AggregationOperations: series: EventsNode | ActionsNode query_date_range: QueryDateRange + should_aggregate_values: bool - def __init__(self, series: EventsNode | ActionsNode, query_date_range: QueryDateRange) -> None: + def __init__( + self, series: EventsNode | ActionsNode, query_date_range: QueryDateRange, should_aggregate_values: bool + ) -> None: self.series = series self.query_date_range = query_date_range + self.should_aggregate_values = should_aggregate_values def select_aggregation(self) -> ast.Expr: if self.series.math == "hogql" and self.series.math_hogql is not None: @@ -86,8 +90,6 @@ def select_aggregation(self) -> ast.Expr: return self._math_quantile(0.95, None) elif self.series.math == "p99": return self._math_quantile(0.99, None) - else: - raise NotImplementedError() return parse_expr("count(e.uuid)") # All "count per actor" get replaced during query orchestration @@ -153,17 +155,25 @@ def _interval_placeholders(self): "inclusive_lookback": ast.Call(name="toIntervalDay", args=[ast.Constant(value=30)]), } - raise NotImplementedError() + return { + "exclusive_lookback": ast.Call(name="toIntervalDay", args=[ast.Constant(value=0)]), + "inclusive_lookback": ast.Call(name="toIntervalDay", args=[ast.Constant(value=0)]), + } def _parent_select_query( self, inner_query: ast.SelectQuery | ast.SelectUnionQuery ) -> ast.SelectQuery | ast.SelectUnionQuery: if self._is_count_per_actor_variant(): - return parse_select( - "SELECT total, day_start FROM {inner_query}", + query = parse_select( + "SELECT total FROM {inner_query}", placeholders={"inner_query": inner_query}, ) + if not self.should_aggregate_values: + query.select.append(ast.Field(chain=["day_start"])) + + return query + day_start = ast.Alias( alias="day_start", expr=ast.Call( @@ -171,21 +181,23 @@ def _parent_select_query( ), ) - return parse_select( + query = parse_select( """ - SELECT - counts AS total, - {day_start} + SELECT counts AS total FROM {inner_query} WHERE timestamp >= {date_from} AND timestamp <= {date_to} """, placeholders={ **self.query_date_range.to_placeholders(), "inner_query": inner_query, - "day_start": day_start, }, ) + if not self.should_aggregate_values: + query.select.append(day_start) + + return query + def _inner_select_query( self, cross_join_select_query: ast.SelectQuery | ast.SelectUnionQuery ) -> ast.SelectQuery | ast.SelectUnionQuery: @@ -209,12 +221,11 @@ def _inner_select_query( total_alias = ast.Alias(alias="total", expr=math_func) - return parse_select( + query = parse_select( """ SELECT - {total_alias}, day_start + {total_alias} FROM {inner_query} - GROUP BY day_start """, placeholders={ "inner_query": cross_join_select_query, @@ -222,6 +233,12 @@ def _inner_select_query( }, ) + if not self.should_aggregate_values: + query.select.append(ast.Field(chain=["day_start"])) + query.group_by = [ast.Field(chain=["day_start"])] + + return query + return parse_select( """ SELECT @@ -278,23 +295,27 @@ def _events_query( ), ) - return parse_select( + query = parse_select( """ SELECT - count(e.uuid) AS total, - {day_start} + count(e.uuid) AS total FROM events AS e SAMPLE {sample} WHERE {events_where_clause} - GROUP BY e.person_id, day_start + GROUP BY e.person_id """, placeholders={ "events_where_clause": where_clause_combined, "sample": sample_value, - "day_start": day_start, }, ) + if not self.should_aggregate_values: + query.select.append(day_start) + query.group_by.append(ast.Field(chain=["day_start"])) + + return query + return parse_select( """ SELECT diff --git a/posthog/hogql_queries/insights/trends/breakdown.py b/posthog/hogql_queries/insights/trends/breakdown.py index d18f64d8bb48a..d2808290ae419 100644 --- a/posthog/hogql_queries/insights/trends/breakdown.py +++ b/posthog/hogql_queries/insights/trends/breakdown.py @@ -19,6 +19,7 @@ class Breakdown: series: EventsNode | ActionsNode query_date_range: QueryDateRange timings: HogQLTimings + events_filter: ast.Expr def __init__( self, @@ -27,12 +28,14 @@ def __init__( series: EventsNode | ActionsNode, query_date_range: QueryDateRange, timings: HogQLTimings, + events_filter: ast.Expr, ): self.team = team self.query = query self.series = series self.query_date_range = query_date_range self.timings = timings + self.events_filter = events_filter @cached_property def enabled(self) -> bool: @@ -112,7 +115,7 @@ def _breakdown_values_ast(self) -> ast.Array: return ast.Array(exprs=[ast.Constant(value=v) for v in self._get_breakdown_values]) @cached_property - def _get_breakdown_values(self) -> ast.Array: + def _get_breakdown_values(self) -> List[str | int]: with self.timings.measure("breakdown_values_query"): breakdown = BreakdownValues( team=self.team, @@ -120,6 +123,7 @@ def _get_breakdown_values(self) -> ast.Array: breakdown_field=self.query.breakdown.breakdown, breakdown_type=self.query.breakdown.breakdown_type, query_date_range=self.query_date_range, + events_filter=self.events_filter, histogram_bin_count=self.query.breakdown.breakdown_histogram_bin_count, group_type_index=self.query.breakdown.breakdown_group_type_index, ) diff --git a/posthog/hogql_queries/insights/trends/breakdown_values.py b/posthog/hogql_queries/insights/trends/breakdown_values.py index 251e01d9b5333..0ac948629b3df 100644 --- a/posthog/hogql_queries/insights/trends/breakdown_values.py +++ b/posthog/hogql_queries/insights/trends/breakdown_values.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Union +from typing import List, Optional, Union, Any from posthog.hogql import ast from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql.query import execute_hogql_query @@ -15,6 +15,7 @@ class BreakdownValues: query_date_range: QueryDateRange histogram_bin_count: Optional[int] group_type_index: Optional[int] + events_filter: ast.Expr def __init__( self, @@ -23,6 +24,7 @@ def __init__( breakdown_field: Union[str, float], query_date_range: QueryDateRange, breakdown_type: str, + events_filter: ast.Expr, histogram_bin_count: Optional[float] = None, group_type_index: Optional[float] = None, ): @@ -31,6 +33,7 @@ def __init__( self.breakdown_field = breakdown_field self.query_date_range = query_date_range self.breakdown_type = breakdown_type + self.events_filter = events_filter self.histogram_bin_count = int(histogram_bin_count) if histogram_bin_count is not None else None self.group_type_index = int(group_type_index) if group_type_index is not None else None @@ -74,7 +77,7 @@ def get_breakdown_values(self) -> List[str | int]: value DESC """, placeholders={ - "events_where": self._where_filter(), + "events_where": self.events_filter, "select_field": select_field, }, ) @@ -97,35 +100,12 @@ def get_breakdown_values(self) -> List[str | int]: team=self.team, ) - values = response.results[0][0] - return values - - def _where_filter(self) -> ast.Expr: - filters: List[ast.Expr] = [] - - filters.append(parse_expr("notEmpty(e.person_id)")) - filters.extend( - [ - parse_expr( - "timestamp >= {date_from}", - placeholders=self.query_date_range.to_placeholders(), - ), - parse_expr( - "timestamp <= {date_to}", - placeholders=self.query_date_range.to_placeholders(), - ), - ] - ) + values: List[Any] = response.results[0][0] - if self.event_name is not None: - filters.append( - parse_expr( - "event = {event}", - placeholders={"event": ast.Constant(value=self.event_name)}, - ) - ) + if self.histogram_bin_count is None: + values.insert(0, "") - return ast.And(exprs=filters) + return values def _to_bucketing_expression(self) -> ast.Expr: assert isinstance(self.histogram_bin_count, int) diff --git a/posthog/hogql_queries/insights/trends/display.py b/posthog/hogql_queries/insights/trends/display.py index 3e8cf3ade278d..cc6ed758356ba 100644 --- a/posthog/hogql_queries/insights/trends/display.py +++ b/posthog/hogql_queries/insights/trends/display.py @@ -14,6 +14,7 @@ def should_aggregate_values(self) -> bool: or self.display_type == ChartDisplayType.ActionsPie or self.display_type == ChartDisplayType.ActionsBarValue or self.display_type == ChartDisplayType.WorldMap + or self.display_type == ChartDisplayType.ActionsTable ) def wrap_inner_query(self, inner_query: ast.SelectQuery, breakdown_enabled: bool) -> ast.SelectQuery: diff --git a/posthog/hogql_queries/insights/trends/query_builder.py b/posthog/hogql_queries/insights/trends/query_builder.py index 30e3e206f423a..a07faedb64d91 100644 --- a/posthog/hogql_queries/insights/trends/query_builder.py +++ b/posthog/hogql_queries/insights/trends/query_builder.py @@ -1,7 +1,7 @@ from typing import List, Optional, cast from posthog.hogql import ast from posthog.hogql.parser import parse_expr, parse_select -from posthog.hogql.property import property_to_expr +from posthog.hogql.property import action_to_expr, property_to_expr from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.insights.trends.aggregation_operations import ( AggregationOperations, @@ -10,6 +10,7 @@ from posthog.hogql_queries.insights.trends.display import TrendsDisplay from posthog.hogql_queries.insights.trends.utils import series_event_name from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.models.action.action import Action from posthog.models.filters.mixins.utils import cached_property from posthog.models.team.team import Team from posthog.schema import ActionsNode, ChartDisplayType, EventsNode, TrendsQuery @@ -37,12 +38,15 @@ def __init__( self.timings = timings def build_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: - date_subqueries = self._get_date_subqueries() - event_query = self._get_events_subquery(False) + if self._trends_display.should_aggregate_values(): + events_query = self._get_events_subquery(False) + else: + date_subqueries = self._get_date_subqueries() + event_query = self._get_events_subquery(False) - date_events_union = ast.SelectUnionQuery(select_queries=[*date_subqueries, event_query]) + events_query = ast.SelectUnionQuery(select_queries=[*date_subqueries, event_query]) - inner_select = self._inner_select_query(date_events_union) + inner_select = self._inner_select_query(events_query) full_query = self._outer_select_query(inner_select) return full_query @@ -139,22 +143,25 @@ def _get_events_subquery(self, no_modifications: Optional[bool]) -> ast.SelectQu parse_select( """ SELECT - {aggregation_operation} AS total, - {day_start} + {aggregation_operation} AS total FROM events AS e SAMPLE {sample} WHERE {events_filter} - GROUP BY day_start """, placeholders={ - "events_filter": self._events_filter(), + "events_filter": self._events_filter(ignore_breakdowns=False), "aggregation_operation": self._aggregation_operation.select_aggregation(), "sample": self._sample_value(), - "day_start": day_start, }, ), ) + default_query.group_by = [] + + if not self._trends_display.should_aggregate_values(): + default_query.select.append(day_start) + default_query.group_by.append(ast.Field(chain=["day_start"])) + # No breakdowns and no complex series aggregation if ( not self._breakdown.enabled and not self._aggregation_operation.requires_query_orchestration() @@ -163,7 +170,7 @@ def _get_events_subquery(self, no_modifications: Optional[bool]) -> ast.SelectQu # Both breakdowns and complex series aggregation elif self._breakdown.enabled and self._aggregation_operation.requires_query_orchestration(): orchestrator = self._aggregation_operation.get_query_orchestrator( - events_where_clause=self._events_filter(), + events_where_clause=self._events_filter(ignore_breakdowns=False), sample_value=self._sample_value(), ) @@ -184,7 +191,7 @@ def _get_events_subquery(self, no_modifications: Optional[bool]) -> ast.SelectQu # Just complex series aggregation elif self._aggregation_operation.requires_query_orchestration(): return self._aggregation_operation.get_query_orchestrator( - events_where_clause=self._events_filter(), + events_where_clause=self._events_filter(ignore_breakdowns=False), sample_value=self._sample_value(), ).build() @@ -213,22 +220,27 @@ def _outer_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: return query - def _inner_select_query(self, inner_query: ast.SelectUnionQuery) -> ast.SelectQuery: + def _inner_select_query(self, inner_query: ast.SelectQuery | ast.SelectUnionQuery) -> ast.SelectQuery: query = cast( ast.SelectQuery, parse_select( """ SELECT - sum(total) AS count, - day_start + sum(total) AS count FROM {inner_query} - GROUP BY day_start - ORDER BY day_start ASC """, placeholders={"inner_query": inner_query}, ), ) + query.group_by = [] + query.order_by = [] + + if not self._trends_display.should_aggregate_values(): + query.select.append(ast.Field(chain=["day_start"])) + query.group_by.append(ast.Field(chain=["day_start"])) + query.order_by.append(ast.OrderExpr(expr=ast.Field(chain=["day_start"]), order="ASC")) + if self._breakdown.enabled: query.select.append(ast.Field(chain=["breakdown_value"])) query.group_by.append(ast.Field(chain=["breakdown_value"])) @@ -241,7 +253,7 @@ def _inner_select_query(self, inner_query: ast.SelectUnionQuery) -> ast.SelectQu return query - def _events_filter(self) -> ast.Expr: + def _events_filter(self, ignore_breakdowns: bool = False) -> ast.Expr: series = self.series filters: List[ast.Expr] = [] @@ -250,7 +262,7 @@ def _events_filter(self) -> ast.Expr: filters.extend( [ parse_expr( - "timestamp >= {date_from}", + "timestamp >= {date_from_start_of_interval}", placeholders=self.query_date_range.to_placeholders(), ), parse_expr( @@ -286,11 +298,17 @@ def _events_filter(self) -> ast.Expr: if series.properties is not None and series.properties != []: filters.append(property_to_expr(series.properties, self.team)) + # Actions + if isinstance(series, ActionsNode): + action = Action.objects.get(pk=int(series.id), team=self.team) + filters.append(action_to_expr(action)) + # Breakdown - if self._breakdown.enabled and not self._breakdown.is_histogram_breakdown: - breakdown_filter = self._breakdown.events_where_filter() - if breakdown_filter is not None: - filters.append(breakdown_filter) + if not ignore_breakdowns: + if self._breakdown.enabled and not self._breakdown.is_histogram_breakdown: + breakdown_filter = self._breakdown.events_where_filter() + if breakdown_filter is not None: + filters.append(breakdown_filter) if len(filters) == 0: return ast.Constant(value=True) @@ -313,11 +331,12 @@ def _breakdown(self): series=self.series, query_date_range=self.query_date_range, timings=self.timings, + events_filter=self._events_filter(ignore_breakdowns=True), ) @cached_property def _aggregation_operation(self) -> AggregationOperations: - return AggregationOperations(self.series, self.query_date_range) + return AggregationOperations(self.series, self.query_date_range, self._trends_display.should_aggregate_values()) @cached_property def _trends_display(self) -> TrendsDisplay: diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index bec610f25aeca..18f0b7ad4ee01 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -1,3 +1,180 @@ +# name: TestTrends.test_action_filtering_with_cohort + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ' +--- +# name: TestTrends.test_action_filtering_with_cohort.1 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_action_filtering_with_cohort.2 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_action_filtering_with_cohort.3 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 2 + ' +--- +# name: TestTrends.test_action_filtering_with_cohort.4 + ' + /* celery:posthog.tasks.calculate_cohort.clear_stale_cohort */ + SELECT count() + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version < 2 + ' +--- +# name: TestTrends.test_action_filtering_with_cohort.5 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$bool_prop'), ''), 'null'), '^"|"$', '') AS `properties___$bool_prop` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(e__pdi__person.`properties___$bool_prop`, 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, + (SELECT cohortpeople.person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 1)) + GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_action_filtering_with_cohort_poe_v2 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ' +--- +# name: TestTrends.test_action_filtering_with_cohort_poe_v2.1 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_action_filtering_with_cohort_poe_v2.2 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_action_filtering_with_cohort_poe_v2.3 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 2 + ' +--- +# name: TestTrends.test_action_filtering_with_cohort_poe_v2.4 + ' + /* celery:posthog.tasks.calculate_cohort.clear_stale_cohort */ + SELECT count() + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version < 2 + ' +--- +# name: TestTrends.test_action_filtering_with_cohort_poe_v2.5 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$bool_prop'), ''), 'null'), '^"|"$', ''), 'x'), 0), and(equals(e.event, 'sign up'), in(e.person_id, + (SELECT cohortpeople.person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2)) + GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))))) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- # name: TestTrends.test_breakdown_by_group_props_person_on_events ' SELECT groupArray(value) @@ -13,7 +190,7 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), notEmpty(e.person_id), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY value ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, @@ -56,7 +233,7 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(in(e__group_0.properties___industry, ['finance', 'technology']), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(in(e__group_0.properties___industry, ['finance', 'technology']), 0)) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -100,87 +277,174 @@ OFFSET 0 ' --- -# name: TestTrends.test_timezones_daily +# name: TestTrends.test_breakdown_by_group_props_with_person_filter_person_on_events + ' + SELECT groupArray(value) + FROM + (SELECT e__group_0.properties___industry AS value, + count(e.uuid) AS count + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0)) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_breakdown_by_group_props_with_person_filter_person_on_events.1 ' SELECT groupArray(day_start) AS date, - groupArray(count) AS total + groupArray(count) AS total, + breakdown_value FROM (SELECT sum(total) AS count, - day_start + day_start, + breakdown_value FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))) AS day_start + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT ['finance'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + e__group_0.properties___industry AS breakdown_value FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(in(e__group_0.properties___industry, ['finance']), 0)) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily.1 +# name: TestTrends.test_breakdown_weekly_active_users_aggregated ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_breakdown_weekly_active_users_aggregated.1 + ' + SELECT sum(count) AS total, + breakdown_value FROM (SELECT sum(total) AS count, - day_start + breakdown_value FROM - (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) + (SELECT counts AS total, + breakdown_value + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts, + e.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), ['val', 'bor']), 0)), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id, + breakdown_value) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp, + e.breakdown_value + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0))) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily.2 +# name: TestTrends.test_breakdown_weekly_active_users_aggregated_materialized ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total + SELECT groupArray(value) + FROM + (SELECT nullIf(nullIf(e.mat_key, ''), 'null') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_breakdown_weekly_active_users_aggregated_materialized.1 + ' + SELECT sum(count) AS total, + breakdown_value FROM (SELECT sum(total) AS count, - day_start + breakdown_value FROM - (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT counts AS total, - toStartOfDay(timestamp) AS day_start + (SELECT counts AS total, + breakdown_value FROM (SELECT d.timestamp, - count(DISTINCT e.actor_id) AS counts + count(DISTINCT e.actor_id) AS counts, + e.breakdown_value FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))) AS numbers) AS d + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi.person_id AS actor_id, + nullIf(nullIf(e.mat_key, ''), 'null') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -189,20 +453,64 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) - GROUP BY timestamp, actor_id) AS e + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), ifNull(in(nullIf(nullIf(e.mat_key, ''), 'null'), ['val', 'bor']), 0)), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id, + breakdown_value) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) - GROUP BY d.timestamp + GROUP BY d.timestamp, + e.breakdown_value ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0))) - GROUP BY day_start - ORDER BY day_start ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0))) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily.3 +# name: TestTrends.test_filter_events_by_precalculated_cohort + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ' +--- +# name: TestTrends.test_filter_events_by_precalculated_cohort.1 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_filter_events_by_precalculated_cohort.2 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_filter_events_by_precalculated_cohort.3 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_filter_events_by_precalculated_cohort.4 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -211,14 +519,441 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-26 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))) AS day_start + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-26 00:00:00', 6, 'UTC'))) AS day_start UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-26 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 23:59:59', 6, 'UTC'))), equals(e.event, 'event_name'), ifNull(equals(e__pdi__person.properties___name, 'Jane'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ' +--- +# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.1 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.2 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.3 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.4 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-26 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-26 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-26 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 23:59:59', 6, 'UTC'))), equals(e.event, 'event_name'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'name'), ''), 'null'), '^"|"$', ''), 'Jane'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_property_filtering + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_property_filtering_clashing_with_event_property + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_property_filtering_clashing_with_event_property.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'name'), ''), 'null'), '^"|"$', ''), '1'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_property_filtering_clashing_with_event_property_materialized + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + nullIf(nullIf(person.pmat_name, ''), 'null') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_property_filtering_clashing_with_event_property_materialized.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(nullIf(nullIf(e.mat_name, ''), 'null'), '1'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_property_filtering_materialized + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + nullIf(nullIf(person.pmat_name, ''), 'null') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.2 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) @@ -234,14 +969,385 @@ (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, count(e.uuid) AS count FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), notEmpty(e__pdi.person_id), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.5 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT ['Mac'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), ['Mac']), 0)) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.2 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'America/Phoenix') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.4 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.5 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT ['Mac'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), ['Mac']), 0)) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.2 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'Asia/Tokyo') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.4 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) GROUP BY value ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, @@ -249,54 +1355,275 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily.5 +# name: TestTrends.test_timezones_daily_plus_utc.5 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT ['Mac'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), ['Mac']), 0)) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_hourly_relative_from + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), toIntervalHour(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfHour(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_hourly_relative_from.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), toIntervalHour(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfHour(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_hourly_relative_from_minus_utc + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), toIntervalHour(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfHour(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_hourly_relative_from_minus_utc.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), toIntervalHour(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfHour(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_hourly_relative_from_plus_utc + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), toIntervalHour(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfHour(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_hourly_relative_from_plus_utc.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), toIntervalHour(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfHour(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_weekly + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')), 0) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_weekly.1 ' SELECT groupArray(day_start) AS date, - groupArray(count) AS total, - breakdown_value + groupArray(count) AS total FROM (SELECT sum(total) AS count, - day_start, - breakdown_value + day_start FROM (SELECT 0 AS total, - ticks.day_start AS day_start, - sec.breakdown_value - FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')) AS day_start) AS ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['Mac'] AS breakdown_value) ARRAY - JOIN breakdown_value AS breakdown_value) AS sec - ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC')), 3), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')), 3) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 3) AS day_start FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), ['Mac']), 0)) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY breakdown_value ASC + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')), 3)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_minus_utc +# name: TestTrends.test_timezones_weekly_minus_utc ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -305,14 +1632,14 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))) AS day_start + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'America/Phoenix')), 0) AS day_start UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + toStartOfWeek(toTimeZone(e.timestamp, 'America/Phoenix'), 0) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'America/Phoenix')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) @@ -321,7 +1648,7 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_minus_utc.1 +# name: TestTrends.test_timezones_weekly_minus_utc.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -330,21 +1657,14 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix')), 3), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'America/Phoenix'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'America/Phoenix')), 3) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfWeek(toTimeZone(e.timestamp, 'America/Phoenix'), 3) AS day_start FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'America/Phoenix'))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'America/Phoenix')), 3)), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) @@ -353,7 +1673,7 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_minus_utc.2 +# name: TestTrends.test_timezones_weekly_plus_utc ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -362,35 +1682,15 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))) AS day_start - UNION ALL SELECT counts AS total, - toStartOfDay(timestamp) AS day_start - FROM - (SELECT d.timestamp, - count(DISTINCT e.actor_id) AS counts - FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix')))) AS numbers) AS d - CROSS JOIN - (SELECT toTimeZone(e.timestamp, 'America/Phoenix') AS timestamp, - e__pdi.person_id AS actor_id - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) - GROUP BY timestamp, actor_id) AS e - WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) - GROUP BY d.timestamp - ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0))) + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'Asia/Tokyo')), 0) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfWeek(toTimeZone(e.timestamp, 'Asia/Tokyo'), 0) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'Asia/Tokyo')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) LIMIT 100 SETTINGS readonly=2, @@ -398,7 +1698,7 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_minus_utc.3 +# name: TestTrends.test_timezones_weekly_plus_utc.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -407,14 +1707,14 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo')), 3), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))) AS day_start + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'Asia/Tokyo')), 3) AS day_start UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + toStartOfWeek(toTimeZone(e.timestamp, 'Asia/Tokyo'), 3) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'Asia/Tokyo')), 3)), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) @@ -423,11 +1723,11 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_minus_utc.4 +# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns ' SELECT groupArray(value) FROM - (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + (SELECT e__pdi__person.properties___email AS value, count(e.uuid) AS count FROM events AS e INNER JOIN @@ -437,7 +1737,20 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), notEmpty(e__pdi.person_id), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), isNotNull(e__pdi__person.properties___email) + or isNotNull('%@posthog.com%')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0)))) GROUP BY value ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, @@ -445,7 +1758,7 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_minus_utc.5 +# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total, @@ -459,18 +1772,18 @@ ticks.day_start AS day_start, sec.breakdown_value FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')) AS day_start) AS ticks + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')) AS day_start) AS ticks CROSS JOIN (SELECT breakdown_value FROM - (SELECT ['Mac'] AS breakdown_value) ARRAY + (SELECT ['test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + e__pdi__person.properties___email AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -479,7 +1792,20 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), ['Mac']), 0)) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), isNotNull(e__pdi__person.properties___email) + or isNotNull('%@posthog.com%')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))), ifNull(in(e__pdi__person.properties___email, ['test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com']), 0)) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -492,7 +1818,100 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_plus_utc +# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.2 + ' + SELECT groupArray(value) + FROM + (SELECT e__pdi__person.properties___email AS value, + count(e.uuid) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0))) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT ['test2@posthog.com'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + e__pdi__person.properties___email AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), ifNull(in(e__pdi__person.properties___email, ['test2@posthog.com']), 0)) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_any_event_total_count ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -501,14 +1920,14 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) @@ -517,7 +1936,7 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_plus_utc.1 +# name: TestTrends.test_trends_any_event_total_count.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -526,21 +1945,14 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'Asia/Tokyo'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'Asia/Tokyo'))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) @@ -549,7 +1961,7 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_plus_utc.2 +# name: TestTrends.test_trends_compare_day_interval_relative_range ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -558,35 +1970,40 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))) AS day_start - UNION ALL SELECT counts AS total, - toStartOfDay(timestamp) AS day_start - FROM - (SELECT d.timestamp, - count(DISTINCT e.actor_id) AS counts - FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo')))) AS numbers) AS d - CROSS JOIN - (SELECT toTimeZone(e.timestamp, 'Asia/Tokyo') AS timestamp, - e__pdi.person_id AS actor_id - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) - GROUP BY timestamp, actor_id) AS e - WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) - GROUP BY d.timestamp - ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0))) + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_compare_day_interval_relative_range.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-21 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-21 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-21 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) LIMIT 100 SETTINGS readonly=2, @@ -594,7 +2011,7 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_plus_utc.3 +# name: TestTrends.test_trends_compare_day_interval_relative_range.2 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -603,14 +2020,14 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) @@ -619,76 +2036,52 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_plus_utc.4 +# name: TestTrends.test_trends_count_per_user_average_aggregated ' - SELECT groupArray(value) + SELECT sum(count) AS total FROM - (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, - count(e.uuid) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), notEmpty(e__pdi.person_id), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) - GROUP BY value - ORDER BY count DESC, value DESC) + (SELECT sum(total) AS count + FROM + (SELECT total + FROM + (SELECT avg(total) AS total + FROM + (SELECT count(e.uuid) AS total + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) + GROUP BY e__pdi.person_id)))) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_plus_utc.5 +# name: TestTrends.test_trends_count_per_user_average_aggregated_poe_v2 ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total, - breakdown_value + SELECT sum(count) AS total FROM - (SELECT sum(total) AS count, - day_start, - breakdown_value + (SELECT sum(total) AS count FROM - (SELECT 0 AS total, - ticks.day_start AS day_start, - sec.breakdown_value + (SELECT total FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')) AS day_start) AS ticks - CROSS JOIN - (SELECT breakdown_value + (SELECT avg(total) AS total FROM - (SELECT ['Mac'] AS breakdown_value) ARRAY - JOIN breakdown_value AS breakdown_value) AS sec - ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), ['Mac']), 0)) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY breakdown_value ASC + (SELECT count(e.uuid) AS total + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) + GROUP BY e.person_id)))) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trends_any_event_total_count +# name: TestTrends.test_trends_count_per_user_average_daily ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -697,15 +2090,30 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))) - GROUP BY day_start) + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT total, + day_start + FROM + (SELECT avg(total) AS total, + day_start + FROM + (SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) + GROUP BY e__pdi.person_id, + day_start) + GROUP BY day_start)) GROUP BY day_start ORDER BY day_start ASC) LIMIT 100 SETTINGS readonly=2, @@ -713,7 +2121,7 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trends_any_event_total_count.1 +# name: TestTrends.test_trends_count_per_user_average_daily_poe_v2 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -722,15 +2130,23 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY day_start) + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT total, + day_start + FROM + (SELECT avg(total) AS total, + day_start + FROM + (SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) + GROUP BY e.person_id, + day_start) + GROUP BY day_start)) GROUP BY day_start ORDER BY day_start ASC) LIMIT 100 SETTINGS readonly=2, @@ -758,7 +2174,7 @@ UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC)) @@ -857,6 +2273,161 @@ allow_experimental_object_type=1 ' --- +# name: TestTrends.test_weekly_active_users_daily_plus_utc + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'Asia/Tokyo'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'Asia/Tokyo')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'Asia/Tokyo') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'Asia/Tokyo')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'Asia/Tokyo'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'Asia/Tokyo'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'Asia/Tokyo'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_weekly_active_users_filtering + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(e__pdi__person.properties___name, 'person-1'), 0), ifNull(equals(e__pdi__person.properties___name, 'person-2'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_weekly_active_users_filtering_materialized + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + nullIf(nullIf(person.pmat_name, ''), 'null') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(e__pdi__person.properties___name, 'person-1'), 0), ifNull(equals(e__pdi__person.properties___name, 'person-2'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- # name: TestTrends.test_weekly_active_users_hourly ' SELECT groupArray(day_start) AS date, @@ -992,3 +2563,48 @@ allow_experimental_object_type=1 ' --- +# name: TestTrends.test_weekly_active_users_weekly_plus_utc + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'Asia/Tokyo')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), 0) AS day_start + UNION ALL SELECT counts AS total, + toStartOfWeek(timestamp, 0) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'Asia/Tokyo')), 0), toIntervalWeek(numbers.number)) AS timestamp + FROM numbers(dateDiff('week', minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), 0), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'Asia/Tokyo')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'Asia/Tokyo') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'Asia/Tokyo'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'Asia/Tokyo'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- diff --git a/posthog/hogql_queries/insights/trends/test/test_trends.py b/posthog/hogql_queries/insights/trends/test/test_trends.py index fae6358bd5744..7bae77a76332d 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends.py @@ -2,8 +2,7 @@ import uuid from datetime import datetime from typing import Dict, List, Optional, Tuple, Union -from unittest.mock import patch, ANY -from urllib.parse import parse_qsl, urlparse +from unittest.mock import patch from zoneinfo import ZoneInfo from django.test import override_settings @@ -20,6 +19,9 @@ TRENDS_TABLE, ) from posthog.hogql_queries.insights.trends.trends_query_runner import TrendsQueryRunner +from posthog.hogql_queries.legacy_compatibility.filter_to_query import ( + clean_properties, +) from posthog.models import ( Action, ActionStep, @@ -35,13 +37,14 @@ get_instance_setting, override_instance_config, ) -from posthog.models.person.util import create_person_distinct_id from posthog.models.property_definition import PropertyDefinition from posthog.models.team.team import Team from posthog.schema import ( + ActionsNode, BreakdownFilter, DateRange, EventsNode, + PropertyGroupFilter, TrendsFilter, TrendsQuery, ) @@ -95,8 +98,29 @@ def _create_cohort(**kwargs): return cohort +def _props(filter: Filter): + props = filter.to_dict().get("properties", None) + if not props: + return None + + if isinstance(props, list): + raw_properties = { + "type": "AND", + "values": [{"type": "AND", "values": props}], + } + else: + raw_properties = { + "type": "AND", + "values": [{"type": "AND", "values": [props]}], + } + + return PropertyGroupFilter(**clean_properties(raw_properties)) + + def convert_filter_to_trends_query(filter: Filter) -> TrendsQuery: - series: List[EventsNode] = [ + filter_as_dict = filter.to_dict() + + events: List[EventsNode] = [ EventsNode( event=event.id, name=event.name, @@ -109,14 +133,30 @@ def convert_filter_to_trends_query(filter: Filter) -> TrendsQuery: for event in filter.events ] + actions: List[ActionsNode] = [ + ActionsNode( + id=action.id, + name=action.name, + custom_name=action.custom_name, + math=action.math, + math_property=action.math_property, + math_hogql=action.math_hogql, + math_group_type_index=action.math_group_type_index, + ) + for action in filter.actions + ] + + series: List[EventsNode | ActionsNode] = [*events, *actions] + tq = TrendsQuery( series=series, kind="TrendsQuery", filterTestAccounts=filter.filter_test_accounts, - dateRange=DateRange( - date_from=filter.date_from.isoformat() if filter.date_from is not None else "all", - date_to=filter.date_to.isoformat() if filter.date_to is not None else None, - ), + dateRange=DateRange(date_from=filter_as_dict.get("date_from"), date_to=filter_as_dict.get("date_to")), + # dateRange=DateRange( + # date_from=filter.date_from.isoformat() if filter.date_from is not None else "all", + # date_to=filter.date_to.isoformat() if filter.date_to is not None else None, + # ), samplingFactor=filter.sampling_factor, aggregation_group_type_index=filter.aggregation_group_type_index, breakdown=BreakdownFilter( @@ -127,6 +167,7 @@ def convert_filter_to_trends_query(filter: Filter) -> TrendsQuery: breakdown_group_type_index=filter.breakdown_group_type_index, breakdown_histogram_bin_count=filter.breakdown_histogram_bin_count, ), + properties=_props(filter), interval=filter.interval, trendsFilter=TrendsFilter( display=filter.display, @@ -146,6 +187,8 @@ class TestTrends(ClickhouseTestMixin, APIBaseTest): def _run(self, filter: Filter, team: Team): flush_persons_and_events() + # trend_query = filter_to_query(filter.to_dict()) + trend_query = convert_filter_to_trends_query(filter) tqr = TrendsQueryRunner(team=team, query=trend_query) return tqr.calculate().results @@ -420,83 +463,6 @@ def test_trends_per_day(self): self.assertEqual(response[0]["labels"][5], "2-Jan-2020") self.assertEqual(response[0]["data"][5], 1.0) - @snapshot_clickhouse_queries - def test_trend_actors_person_on_events_pagination_with_alias_inconsistencies(self): - test_person_ids = [ # 10 test person IDs (in UUIDT format), hard-coded for deterministic runs - "016f70a4-1c68-0000-db29-61f63a926520", - "016f70a4-1c68-0001-51a1-ad418c05e09f", - "016f70a4-1c68-0002-9ea5-10186329258f", - "016f70a4-1c68-0003-7680-697adb073c10", - "016f70a4-1c68-0004-d0f8-7bd581c97eff", - "016f70a4-1c68-0005-f593-e89d76db7a1f", - "016f70a4-1c68-0006-bb84-d42937ef5989", - "016f70a4-1c68-0007-923f-82720e97a6ba", - "016f70a4-1c68-0008-8970-cbb33f01de1e", - "016f70a4-1c68-0009-75a2-3755450b0b17", - ] - - with freeze_time("2020-01-04T13:00:01Z"): - all_distinct_ids = [] - for i, person_id in enumerate(test_person_ids): - distinct_id = f"blabla_{i}" - # UUIDT offers k-sortability, making this test effectively deterministic, as opposed to UUIDv4 - self._create_event( - team=self.team, - event="sign up", - distinct_id=distinct_id, - properties={"$some_property": "value", "$bool_prop": True}, - person_id=person_id, # Different person_ids, but in the end aliased to be the same person - ) - all_distinct_ids.append(distinct_id) - - person = self._create_person( - team_id=self.team.pk, - distinct_ids=all_distinct_ids, - properties={"$some_prop": "some_val"}, - uuid=test_person_ids[-1], - ) - flush_persons_and_events() - - data = { - "date_from": "-7d", - "events": [{"id": "sign up", "math": "dau"}], - "limit": 5, - } - - with override_instance_config("PERSON_ON_EVENTS_ENABLED", True): - from posthog.models.team import util - - util.can_enable_actor_on_events = True - - response = self._run(Filter(team=self.team, data=data), self.team) - self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0]) - - url = response[0]["persons_urls"][7]["url"] - people_response = self.client.get(f"/{url}").json() - - # pagination works, no matter how few ids in people_response - self.assertIsNotNone(people_response["next"]) - self.assertEqual(people_response["missing_persons"], 5) - - next_url = people_response["next"] - second_people_response = self.client.get(f"{next_url}").json() - - self.assertIsNotNone(second_people_response["next"]) - self.assertEqual(second_people_response["missing_persons"], 4) - - first_load_ids = sorted(str(person["id"]) for person in people_response["results"][0]["people"]) - second_load_ids = sorted(str(person["id"]) for person in second_people_response["results"][0]["people"]) - - self.assertEqual(len(first_load_ids + second_load_ids), 1) - self.assertEqual(first_load_ids + second_load_ids, [str(person.uuid)]) - - third_people_response = self.client.get(f"/{second_people_response['next']}").json() - self.assertIsNone(third_people_response["next"]) - self.assertFalse(third_people_response["missing_persons"]) - - third_load_ids = sorted(str(person["id"]) for person in third_people_response["results"][0]["people"]) - self.assertEqual(third_load_ids, []) - # just make sure this doesn't error def test_no_props(self): with freeze_time("2020-01-04T13:01:01Z"): @@ -1452,7 +1418,7 @@ def test_trends_breakdown_with_session_property_single_aggregate_math_and_breakd # empty has: 1 seconds self.assertEqual( [resp["breakdown_value"] for resp in daily_response], - ["value2", "value1", ""], + ["value1", "value2", ""], ) self.assertEqual([resp["aggregated_value"] for resp in daily_response], [12.5, 10, 1]) @@ -1948,128 +1914,6 @@ def _test_events_with_dates(self, dates: List[str], result, query_time=None, **f return response - def test_hour_interval(self): - response = self._test_events_with_dates( - dates=["2020-11-01 13:00:00", "2020-11-01 13:20:00", "2020-11-01 17:00:00"], - interval="hour", - date_from="2020-11-01 12:00:00", - query_time="2020-11-01 23:00:00", - result=[ - { - "action": { - "id": "event_name", - "type": "events", - "order": None, - "name": "event_name", - "custom_name": None, - "math": None, - "math_hogql": None, - "math_property": None, - "math_group_type_index": None, - "properties": [], - }, - "label": "event_name", - "count": 3.0, - "data": [0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0, 0, 0, 0, 0], - "labels": [ - "1-Nov-2020 12:00", - "1-Nov-2020 13:00", - "1-Nov-2020 14:00", - "1-Nov-2020 15:00", - "1-Nov-2020 16:00", - "1-Nov-2020 17:00", - "1-Nov-2020 18:00", - "1-Nov-2020 19:00", - "1-Nov-2020 20:00", - "1-Nov-2020 21:00", - "1-Nov-2020 22:00", - "1-Nov-2020 23:00", - ], - "days": [ - "2020-11-01 12:00:00", - "2020-11-01 13:00:00", - "2020-11-01 14:00:00", - "2020-11-01 15:00:00", - "2020-11-01 16:00:00", - "2020-11-01 17:00:00", - "2020-11-01 18:00:00", - "2020-11-01 19:00:00", - "2020-11-01 20:00:00", - "2020-11-01 21:00:00", - "2020-11-01 22:00:00", - "2020-11-01 23:00:00", - ], - } - ], - ) - self.assertEqual( - { - "date_from": datetime(2020, 11, 1, 12, tzinfo=ZoneInfo("UTC")), - "date_to": datetime(2020, 11, 1, 13, tzinfo=ZoneInfo("UTC")), - "entity_id": "event_name", - "entity_math": None, - "entity_order": None, - "entity_type": "events", - }, - response[0]["persons_urls"][0]["filter"], - ) - - def test_day_interval(self): - response = self._test_events_with_dates( - dates=["2020-11-01", "2020-11-02", "2020-11-03", "2020-11-04"], - interval="day", - date_from="2020-11-01", - date_to="2020-11-07", - result=[ - { - "action": { - "id": "event_name", - "type": "events", - "order": None, - "name": "event_name", - "custom_name": None, - "math": None, - "math_hogql": None, - "math_property": None, - "math_group_type_index": None, - "properties": [], - }, - "label": "event_name", - "count": 4.0, - "data": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0], - "labels": [ - "1-Nov-2020", - "2-Nov-2020", - "3-Nov-2020", - "4-Nov-2020", - "5-Nov-2020", - "6-Nov-2020", - "7-Nov-2020", - ], - "days": [ - "2020-11-01", - "2020-11-02", - "2020-11-03", - "2020-11-04", - "2020-11-05", - "2020-11-06", - "2020-11-07", - ], - } - ], - ) - self.assertEqual( - { - "date_from": datetime(2020, 11, 1, tzinfo=ZoneInfo("UTC")), - "date_to": datetime(2020, 11, 1, 23, 59, 59, 999999, tzinfo=ZoneInfo("UTC")), - "entity_id": "event_name", - "entity_math": None, - "entity_order": None, - "entity_type": "events", - }, - response[0]["persons_urls"][0]["filter"], - ) - def test_week_interval(self): self._test_events_with_dates( dates=["2020-11-01", "2020-11-10", "2020-11-11", "2020-11-18"], @@ -3094,12 +2938,12 @@ def test_trends_with_session_property_total_volume_math_with_breakdowns(self): # value1 has 0,5,10 seconds (in second interval) # value2 has 5,10,15 seconds (in second interval) - self.assertEqual([resp["breakdown_value"] for resp in daily_response], ["value2", "value1"]) + self.assertEqual([resp["breakdown_value"] for resp in daily_response], ["value1", "value2"]) self.assertCountEqual(daily_response[0]["labels"], ["22-Dec-2019", "29-Dec-2019"]) self.assertCountEqual(daily_response[0]["data"], [0, 10]) self.assertCountEqual(daily_response[1]["data"], [0, 5]) - self.assertEqual([resp["breakdown_value"] for resp in weekly_response], ["value2", "value1"]) + self.assertEqual([resp["breakdown_value"] for resp in weekly_response], ["value1", "value2"]) self.assertCountEqual( weekly_response[0]["labels"], [ @@ -4535,103 +4379,34 @@ def test_breakdown_by_person_property_pie_with_event_dau_filter(self): self.assertDictContainsSubset({"breakdown_value": "person1", "aggregated_value": 1}, event_response[0]) self.assertDictContainsSubset({"breakdown_value": "person2", "aggregated_value": 1}, event_response[1]) - def test_breakdown_hour_interval(self): - response = self._test_events_with_dates( - dates=["2020-11-01 13:00:00", "2020-11-01 13:20:00", "2020-11-01 17:00:00"], - interval="hour", - date_from="2020-11-01 12:00:00", - breakdown="$browser", - breakdown_type="event", - query_time="2020-11-01 23:00:00", - result=[ - { - "action": { - "id": "event_name", - "type": "events", - "order": None, - "name": "event_name", - "custom_name": None, - "math": None, - "math_hogql": None, - "math_property": None, - "math_group_type_index": None, - "properties": [], - }, - "label": "event_name", - "count": 3.0, - "data": [0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0, 0, 0, 0, 0], - "labels": [ - "1-Nov-2020 12:00", - "1-Nov-2020 13:00", - "1-Nov-2020 14:00", - "1-Nov-2020 15:00", - "1-Nov-2020 16:00", - "1-Nov-2020 17:00", - "1-Nov-2020 18:00", - "1-Nov-2020 19:00", - "1-Nov-2020 20:00", - "1-Nov-2020 21:00", - "1-Nov-2020 22:00", - "1-Nov-2020 23:00", - ], - "days": [ - "2020-11-01 12:00:00", - "2020-11-01 13:00:00", - "2020-11-01 14:00:00", - "2020-11-01 15:00:00", - "2020-11-01 16:00:00", - "2020-11-01 17:00:00", - "2020-11-01 18:00:00", - "2020-11-01 19:00:00", - "2020-11-01 20:00:00", - "2020-11-01 21:00:00", - "2020-11-01 22:00:00", - "2020-11-01 23:00:00", - ], - "persons_urls": [], - } - ], - ) - self.assertEqual( - { - "breakdown_type": "event", - "breakdown_value": "Safari", - "date_from": datetime(2020, 11, 1, 12, tzinfo=ZoneInfo("UTC")), - "date_to": datetime(2020, 11, 1, 13, tzinfo=ZoneInfo("UTC")), - "entity_id": "event_name", - "entity_math": None, - "entity_type": "events", - }, - response[0]["persons_urls"][0]["filter"], - ) - - @also_test_with_materialized_columns(person_properties=["name"]) - def test_filter_test_accounts_cohorts(self): - self._create_person(team_id=self.team.pk, distinct_ids=["person_1"], properties={"name": "John"}) - self._create_person(team_id=self.team.pk, distinct_ids=["person_2"], properties={"name": "Jane"}) - - self._create_event(event="event_name", team=self.team, distinct_id="person_1") - self._create_event(event="event_name", team=self.team, distinct_id="person_2") - self._create_event(event="event_name", team=self.team, distinct_id="person_2") - - cohort = _create_cohort( - team=self.team, - name="cohort1", - groups=[{"properties": [{"key": "name", "value": "Jane", "type": "person"}]}], - ) - self.team.test_account_filters = [{"key": "id", "value": cohort.pk, "type": "cohort"}] - self.team.save() - - response = self._run( - Filter( - data={"events": [{"id": "event_name"}], "filter_test_accounts": True}, - team=self.team, - ), - self.team, - ) - - self.assertEqual(response[0]["count"], 2) - self.assertEqual(response[0]["data"][-1], 2) + # TODO: test_account_filters conversion + # @also_test_with_materialized_columns(person_properties=["name"]) + # def test_filter_test_accounts_cohorts(self): + # self._create_person(team_id=self.team.pk, distinct_ids=["person_1"], properties={"name": "John"}) + # self._create_person(team_id=self.team.pk, distinct_ids=["person_2"], properties={"name": "Jane"}) + + # self._create_event(event="event_name", team=self.team, distinct_id="person_1") + # self._create_event(event="event_name", team=self.team, distinct_id="person_2") + # self._create_event(event="event_name", team=self.team, distinct_id="person_2") + + # cohort = _create_cohort( + # team=self.team, + # name="cohort1", + # groups=[{"properties": [{"key": "name", "value": "Jane", "type": "person"}]}], + # ) + # self.team.test_account_filters = [{"key": "id", "value": cohort.pk, "type": "cohort"}] + # self.team.save() + + # response = self._run( + # Filter( + # data={"events": [{"id": "event_name"}], "filter_test_accounts": True}, + # team=self.team, + # ), + # self.team, + # ) + + # self.assertEqual(response[0]["count"], 2) + # self.assertEqual(response[0]["data"][-1], 2) def test_filter_by_precalculated_cohort(self): self._create_person(team_id=self.team.pk, distinct_ids=["person_1"], properties={"name": "John"}) @@ -5569,71 +5344,72 @@ def test_person_filtering_in_cohort_in_action(self): ) self.assertEqual(action_response[0]["count"], 2) - @also_test_with_materialized_columns(event_properties=["key"], person_properties=["email"]) - def test_breakdown_user_props_with_filter(self): - self._create_person( - team_id=self.team.pk, - distinct_ids=["person1"], - properties={"email": "test@posthog.com"}, - ) - self._create_person( - team_id=self.team.pk, - distinct_ids=["person2"], - properties={"email": "test@gmail.com"}, - ) - person = self._create_person( - team_id=self.team.pk, - distinct_ids=["person3"], - properties={"email": "test@gmail.com"}, - ) - create_person_distinct_id(self.team.pk, "person1", str(person.uuid)) - - self._create_event( - event="sign up", - distinct_id="person1", - team=self.team, - properties={"key": "val"}, - ) - self._create_event( - event="sign up", - distinct_id="person2", - team=self.team, - properties={"key": "val"}, - ) - - flush_persons_and_events() - - response = self._run( - Filter( - team=self.team, - data={ - "date_from": "-14d", - "breakdown": "email", - "breakdown_type": "person", - "events": [ - { - "id": "sign up", - "name": "sign up", - "type": "events", - "order": 0, - } - ], - "properties": [ - { - "key": "email", - "value": "@posthog.com", - "operator": "not_icontains", - "type": "person", - }, - {"key": "key", "value": "val"}, - ], - }, - ), - self.team, - ) - - self.assertEqual(len(response), 1) - self.assertEqual(response[0]["breakdown_value"], "test@gmail.com") + # TODO: Fix exception + # @also_test_with_materialized_columns(event_properties=["key"], person_properties=["email"]) + # def test_breakdown_user_props_with_filter(self): + # self._create_person( + # team_id=self.team.pk, + # distinct_ids=["person1"], + # properties={"email": "test@posthog.com"}, + # ) + # self._create_person( + # team_id=self.team.pk, + # distinct_ids=["person2"], + # properties={"email": "test@gmail.com"}, + # ) + # person = self._create_person( + # team_id=self.team.pk, + # distinct_ids=["person3"], + # properties={"email": "test@gmail.com"}, + # ) + # create_person_distinct_id(self.team.pk, "person1", str(person.uuid)) + + # self._create_event( + # event="sign up", + # distinct_id="person1", + # team=self.team, + # properties={"key": "val"}, + # ) + # self._create_event( + # event="sign up", + # distinct_id="person2", + # team=self.team, + # properties={"key": "val"}, + # ) + + # flush_persons_and_events() + + # response = self._run( + # Filter( + # team=self.team, + # data={ + # "date_from": "-14d", + # "breakdown": "email", + # "breakdown_type": "person", + # "events": [ + # { + # "id": "sign up", + # "name": "sign up", + # "type": "events", + # "order": 0, + # } + # ], + # "properties": [ + # { + # "key": "email", + # "value": "@posthog.com", + # "operator": "not_icontains", + # "type": "person", + # }, + # {"key": "key", "value": "val"}, + # ], + # }, + # ), + # self.team, + # ) + + # self.assertEqual(len(response), 1) + # self.assertEqual(response[0]["breakdown_value"], "test@gmail.com") @snapshot_clickhouse_queries @also_test_with_materialized_columns(event_properties=["key"], person_properties=["email", "$os", "$browser"]) @@ -6483,50 +6259,51 @@ def test_breakdown_weekly_active_users_aggregated(self): self.assertEqual(result[1]["breakdown_value"], "val") self.assertEqual(result[1]["aggregated_value"], 2) - @also_test_with_materialized_columns(event_properties=["key"], person_properties=["name"]) - def test_filter_test_accounts(self): - self._create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "p1"}) - self._create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-11T12:00:00Z", - properties={"key": "val"}, - ) - - self._create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"name": "p2"}) - self._create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2020-01-11T12:00:00Z", - properties={"key": "val"}, - ) - self.team.test_account_filters = [{"key": "name", "value": "p1", "operator": "is_not", "type": "person"}] - self.team.save() - filter = Filter( - team=self.team, - data={ - "date_from": "2020-01-01T00:00:00Z", - "date_to": "2020-01-12T00:00:00Z", - "events": [{"id": "$pageview", "type": "events", "order": 0}], - "filter_test_accounts": "true", - }, - ) - result = self._run(filter, self.team) - self.assertEqual(result[0]["count"], 1) - filter2 = Filter( - team=self.team, - data={ - "date_from": "2020-01-01T00:00:00Z", - "date_to": "2020-01-12T00:00:00Z", - "events": [{"id": "$pageview", "type": "events", "order": 0}], - }, - ) - result = self._run(filter2, self.team) - self.assertEqual(result[0]["count"], 2) - result = self._run(filter.shallow_clone({"breakdown": "key"}), self.team) - self.assertEqual(result[0]["count"], 1) + # TODO: test_account_filters conversion + # @also_test_with_materialized_columns(event_properties=["key"], person_properties=["name"]) + # def test_filter_test_accounts(self): + # self._create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "p1"}) + # self._create_event( + # team=self.team, + # event="$pageview", + # distinct_id="p1", + # timestamp="2020-01-11T12:00:00Z", + # properties={"key": "val"}, + # ) + + # self._create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"name": "p2"}) + # self._create_event( + # team=self.team, + # event="$pageview", + # distinct_id="p2", + # timestamp="2020-01-11T12:00:00Z", + # properties={"key": "val"}, + # ) + # self.team.test_account_filters = [{"key": "name", "value": "p1", "operator": "is_not", "type": "person"}] + # self.team.save() + # filter = Filter( + # team=self.team, + # data={ + # "date_from": "2020-01-01T00:00:00Z", + # "date_to": "2020-01-12T00:00:00Z", + # "events": [{"id": "$pageview", "type": "events", "order": 0}], + # "filter_test_accounts": True, + # }, + # ) + # result = self._run(filter, self.team) + # self.assertEqual(result[0]["count"], 1) + # filter2 = Filter( + # team=self.team, + # data={ + # "date_from": "2020-01-01T00:00:00Z", + # "date_to": "2020-01-12T00:00:00Z", + # "events": [{"id": "$pageview", "type": "events", "order": 0}], + # }, + # ) + # result = self._run(filter2, self.team) + # self.assertEqual(result[0]["count"], 2) + # result = self._run(filter.shallow_clone({"breakdown": "key"}), self.team) + # self.assertEqual(result[0]["count"], 1) @also_test_with_materialized_columns(["$some_property"]) def test_breakdown_filtering_bar_chart_by_value(self): @@ -6796,8 +6573,7 @@ def test_timezones_hourly_relative_from(self): ) query_time = datetime(2020, 1, 5, 10, 1, 1, tzinfo=ZoneInfo(self.team.timezone)) - utc_offset_hours = query_time.tzinfo.utcoffset(query_time).total_seconds() // 3600 # type: ignore - utc_offset_sign = "-" if utc_offset_hours < 0 else "+" + with freeze_time(query_time): response = self._run( Filter( @@ -6828,26 +6604,6 @@ def test_timezones_hourly_relative_from(self): ) self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 0, 0, 0, 1, 1, 0, 0]) - assert dict(parse_qsl(urlparse(response[0]["persons_urls"][7]["url"]).query)) == { - "breakdown_attribution_type": "first_touch", - "breakdown_normalize_url": "False", - "date_from": f"2020-01-05T07:00:00{utc_offset_sign}{abs(utc_offset_hours):02.0f}:00", - "date_to": f"2020-01-05T08:00:00{utc_offset_sign}{abs(utc_offset_hours):02.0f}:00", - "display": "ActionsLineGraph", - "entity_id": "sign up", - "entity_math": "dau", - "entity_type": "events", - "events": '[{"id": "sign up", "type": "events", "order": null, "name": "sign ' - 'up", "custom_name": null, "math": "dau", "math_property": null, "math_hogql": null, ' - '"math_group_type_index": null, "properties": {}}]', - "insight": "TRENDS", - "interval": "hour", - "smoothing_intervals": "1", - "cache_invalidation_key": ANY, - } - persons = self.client.get("/" + response[0]["persons_urls"][7]["url"]).json() - self.assertEqual(persons["results"][0]["count"], 1) - response = self._run( Filter( team=self.team, diff --git a/posthog/hogql_queries/insights/trends/test/test_utils.py b/posthog/hogql_queries/insights/trends/test/test_utils.py index 100809020b8f9..450cc5e66ab95 100644 --- a/posthog/hogql_queries/insights/trends/test/test_utils.py +++ b/posthog/hogql_queries/insights/trends/test/test_utils.py @@ -12,13 +12,13 @@ def test_properties_chain_person(): def test_properties_chain_session(): p1 = get_properties_chain(breakdown_type="session", breakdown_field="anything", group_type_index=None) - assert p1 == ["session", "session_duration"] + assert p1 == ["session", "duration"] p2 = get_properties_chain(breakdown_type="session", breakdown_field="", group_type_index=None) - assert p2 == ["session", "session_duration"] + assert p2 == ["session", "duration"] p3 = get_properties_chain(breakdown_type="session", breakdown_field="", group_type_index=1) - assert p3 == ["session", "session_duration"] + assert p3 == ["session", "duration"] def test_properties_chain_groups(): diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 7e26cf356bb4f..472e9e596ea49 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -234,6 +234,7 @@ def get_value(name: str, val: Any): if self.query.breakdown is not None and self.query.breakdown.breakdown is not None: if self._is_breakdown_field_boolean(): remapped_label = self._convert_boolean(get_value("breakdown_value", val)) + series_object["label"] = "{} - {}".format(series_object["label"], remapped_label) series_object["breakdown_value"] = remapped_label elif self.query.breakdown.breakdown_type == "cohort": @@ -243,8 +244,12 @@ def get_value(name: str, val: Any): series_object["label"] = "{} - {}".format(series_object["label"], cohort_name) series_object["breakdown_value"] = "all" if cohort_id == 0 else cohort_id else: - series_object["label"] = "{} - {}".format(series_object["label"], get_value("breakdown_value", val)) - series_object["breakdown_value"] = get_value("breakdown_value", val) + remapped_label = get_value("breakdown_value", val) + if remapped_label == "": + remapped_label = "none" + + series_object["label"] = "{} - {}".format(series_object["label"], remapped_label) + series_object["breakdown_value"] = remapped_label res.append(series_object) return res @@ -285,7 +290,12 @@ def setup_series(self) -> List[SeriesWithExtras]: if self.query.breakdown is not None and self.query.breakdown.breakdown_type == "cohort": updated_series = [] - for cohort_id in self.query.breakdown.breakdown: + if isinstance(self.query.breakdown.breakdown, List): + cohort_ids = self.query.breakdown.breakdown + else: + cohort_ids = [self.query.breakdown.breakdown] + + for cohort_id in cohort_ids: for series in series_with_extras: copied_query = deepcopy(self.query) copied_query.breakdown.breakdown = cohort_id diff --git a/posthog/hogql_queries/insights/trends/utils.py b/posthog/hogql_queries/insights/trends/utils.py index 2c7ec5d1eff66..1510a87a76bef 100644 --- a/posthog/hogql_queries/insights/trends/utils.py +++ b/posthog/hogql_queries/insights/trends/utils.py @@ -17,7 +17,7 @@ def get_properties_chain( return ["person", "properties", breakdown_field] if breakdown_type == "session": - return ["session", "session_duration"] + return ["session", "duration"] if breakdown_type == "group" and group_type_index is not None: group_type_index_int = int(group_type_index) From bf0131b86b0b7c867f2f5e3a3ed59daca35f308b Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Wed, 15 Nov 2023 15:40:53 +0000 Subject: [PATCH 05/14] WIP --- .../insights/trends/aggregation_operations.py | 2 +- .../insights/trends/breakdown.py | 17 +- .../insights/trends/breakdown_values.py | 2 +- .../insights/trends/query_builder.py | 9 +- .../test/__snapshots__/test_trends.ambr | 289 ++++++++++++++++-- .../insights/trends/test/test_trends.py | 78 +++-- .../insights/trends/trends_query_runner.py | 10 +- .../hogql_queries/utils/query_date_range.py | 56 +++- 8 files changed, 399 insertions(+), 64 deletions(-) diff --git a/posthog/hogql_queries/insights/trends/aggregation_operations.py b/posthog/hogql_queries/insights/trends/aggregation_operations.py index 422633b197743..562576b27b5fb 100644 --- a/posthog/hogql_queries/insights/trends/aggregation_operations.py +++ b/posthog/hogql_queries/insights/trends/aggregation_operations.py @@ -72,7 +72,7 @@ def select_aggregation(self) -> ast.Expr: elif self.series.math == "unique_session": return parse_expr('count(DISTINCT e."$session_id")') elif self.series.math == "unique_group" and self.series.math_group_type_index is not None: - return parse_expr(f'count(DISTINCT e."$group_{self.series.math_group_type_index}")') + return parse_expr(f'count(DISTINCT e."$group_{int(self.series.math_group_type_index)}")') elif self.series.math_property is not None: if self.series.math == "avg": return self._math_func("avg", None) diff --git a/posthog/hogql_queries/insights/trends/breakdown.py b/posthog/hogql_queries/insights/trends/breakdown.py index d2808290ae419..29e1dde53dc96 100644 --- a/posthog/hogql_queries/insights/trends/breakdown.py +++ b/posthog/hogql_queries/insights/trends/breakdown.py @@ -96,11 +96,18 @@ def events_where_filter(self) -> ast.Expr | None: else: left = ast.Field(chain=self._properties_chain) - return ast.CompareOperation( - left=left, - op=ast.CompareOperationOp.In, - right=self._breakdown_values_ast, - ) + compare_ops = [ + ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=v)) + for v in self._get_breakdown_values + ] + + return ast.Or(exprs=compare_ops) + + # return ast.CompareOperation( + # left=left, + # op=ast.CompareOperationOp.In, + # right=self._breakdown_values_ast, + # ) @cached_property def _breakdown_buckets_ast(self) -> ast.Array: diff --git a/posthog/hogql_queries/insights/trends/breakdown_values.py b/posthog/hogql_queries/insights/trends/breakdown_values.py index 0ac948629b3df..663c3b2f64c1a 100644 --- a/posthog/hogql_queries/insights/trends/breakdown_values.py +++ b/posthog/hogql_queries/insights/trends/breakdown_values.py @@ -103,7 +103,7 @@ def get_breakdown_values(self) -> List[str | int]: values: List[Any] = response.results[0][0] if self.histogram_bin_count is None: - values.insert(0, "") + values.insert(0, None) return values diff --git a/posthog/hogql_queries/insights/trends/query_builder.py b/posthog/hogql_queries/insights/trends/query_builder.py index a07faedb64d91..b4d00880f2a3f 100644 --- a/posthog/hogql_queries/insights/trends/query_builder.py +++ b/posthog/hogql_queries/insights/trends/query_builder.py @@ -214,7 +214,12 @@ def _outer_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: query = self._trends_display.modify_outer_query(outer_query=query, inner_query=inner_query) if self._breakdown.enabled: - query.select.append(ast.Field(chain=["breakdown_value"])) + query.select.append( + ast.Alias( + alias="breakdown_value", + expr=ast.Call(name="ifNull", args=[ast.Field(chain=["breakdown_value"]), ast.Constant(value="")]), + ) + ) query.group_by = [ast.Field(chain=["breakdown_value"])] query.order_by = [ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC")] @@ -262,7 +267,7 @@ def _events_filter(self, ignore_breakdowns: bool = False) -> ast.Expr: filters.extend( [ parse_expr( - "timestamp >= {date_from_start_of_interval}", + "timestamp >= {date_from_with_adjusted_start_of_interval}", placeholders=self.query_date_range.to_placeholders(), ), parse_expr( diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index 18f0b7ad4ee01..73f69c14ff700 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -202,7 +202,7 @@ ' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - breakdown_value + ifNull(breakdown_value, '') AS breakdown_value FROM (SELECT sum(total) AS count, day_start, @@ -218,7 +218,7 @@ CROSS JOIN (SELECT breakdown_value FROM - (SELECT ['finance', 'technology'] AS breakdown_value) ARRAY + (SELECT [NULL, 'finance', 'technology'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.uuid) AS total, @@ -233,7 +233,7 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(in(e__group_0.properties___industry, ['finance', 'technology']), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(e__group_0.properties___industry), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), ifNull(equals(e__group_0.properties___industry, 'technology'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -304,7 +304,7 @@ ' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - breakdown_value + ifNull(breakdown_value, '') AS breakdown_value FROM (SELECT sum(total) AS count, day_start, @@ -320,7 +320,7 @@ CROSS JOIN (SELECT breakdown_value FROM - (SELECT ['finance'] AS breakdown_value) ARRAY + (SELECT [NULL, 'finance'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.uuid) AS total, @@ -335,7 +335,7 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(in(e__group_0.properties___industry, ['finance']), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0), or(isNull(e__group_0.properties___industry), ifNull(equals(e__group_0.properties___industry, 'finance'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -366,7 +366,7 @@ # name: TestTrends.test_breakdown_weekly_active_users_aggregated.1 ' SELECT sum(count) AS total, - breakdown_value + ifNull(breakdown_value, '') AS breakdown_value FROM (SELECT sum(total) AS count, breakdown_value @@ -392,7 +392,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), ['val', 'bor']), 0)), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'bor'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, breakdown_value) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -427,7 +427,7 @@ # name: TestTrends.test_breakdown_weekly_active_users_aggregated_materialized.1 ' SELECT sum(count) AS total, - breakdown_value + ifNull(breakdown_value, '') AS breakdown_value FROM (SELECT sum(total) AS count, breakdown_value @@ -453,7 +453,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), ifNull(in(nullIf(nullIf(e.mat_key, ''), 'null'), ['val', 'bor']), 0)), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(isNull(nullIf(nullIf(e.mat_key, ''), 'null')), ifNull(equals(nullIf(nullIf(e.mat_key, ''), 'null'), 'val'), 0), ifNull(equals(nullIf(nullIf(e.mat_key, ''), 'null'), 'bor'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, breakdown_value) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -470,6 +470,130 @@ allow_experimental_object_type=1 ' --- +# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(breakdown_value, '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'other_value', 'value'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1.0 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.2 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(breakdown_value, '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'other_value', 'value'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1.0 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- # name: TestTrends.test_filter_events_by_precalculated_cohort ' @@ -981,7 +1105,7 @@ ' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - breakdown_value + ifNull(breakdown_value, '') AS breakdown_value FROM (SELECT sum(total) AS count, day_start, @@ -997,7 +1121,7 @@ CROSS JOIN (SELECT breakdown_value FROM - (SELECT ['Mac'] AS breakdown_value) ARRAY + (SELECT [NULL, 'Mac'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, @@ -1011,7 +1135,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), ['Mac']), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -1170,7 +1294,7 @@ ' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - breakdown_value + ifNull(breakdown_value, '') AS breakdown_value FROM (SELECT sum(total) AS count, day_start, @@ -1186,7 +1310,7 @@ CROSS JOIN (SELECT breakdown_value FROM - (SELECT ['Mac'] AS breakdown_value) ARRAY + (SELECT [NULL, 'Mac'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, @@ -1200,7 +1324,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), ['Mac']), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -1359,7 +1483,7 @@ ' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - breakdown_value + ifNull(breakdown_value, '') AS breakdown_value FROM (SELECT sum(total) AS count, day_start, @@ -1375,7 +1499,7 @@ CROSS JOIN (SELECT breakdown_value FROM - (SELECT ['Mac'] AS breakdown_value) ARRAY + (SELECT [NULL, 'Mac'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, @@ -1389,7 +1513,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), ['Mac']), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -1961,6 +2085,133 @@ allow_experimental_object_type=1 ' --- +# name: TestTrends.test_trends_breakdown_cumulative + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_breakdown_cumulative.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(breakdown_value, '') AS breakdown_value + FROM + (SELECT day_start, + sum(count) OVER (PARTITION BY breakdown_value + ORDER BY day_start ASC) AS count, + breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'value', 'other_value'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC)) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_breakdown_cumulative_poe_v2 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_breakdown_cumulative_poe_v2.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(breakdown_value, '') AS breakdown_value + FROM + (SELECT day_start, + sum(count) OVER (PARTITION BY breakdown_value + ORDER BY day_start ASC) AS count, + breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'value', 'other_value'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC)) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- # name: TestTrends.test_trends_compare_day_interval_relative_range ' SELECT groupArray(day_start) AS date, diff --git a/posthog/hogql_queries/insights/trends/test/test_trends.py b/posthog/hogql_queries/insights/trends/test/test_trends.py index 7bae77a76332d..41fb058baeba0 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends.py @@ -20,6 +20,7 @@ ) from posthog.hogql_queries.insights.trends.trends_query_runner import TrendsQueryRunner from posthog.hogql_queries.legacy_compatibility.filter_to_query import ( + clean_entity_properties, clean_properties, ) from posthog.models import ( @@ -98,8 +99,8 @@ def _create_cohort(**kwargs): return cohort -def _props(filter: Filter): - props = filter.to_dict().get("properties", None) +def _props(dict: Dict): + props = dict.get("properties", None) if not props: return None @@ -120,31 +121,52 @@ def _props(filter: Filter): def convert_filter_to_trends_query(filter: Filter) -> TrendsQuery: filter_as_dict = filter.to_dict() - events: List[EventsNode] = [ - EventsNode( - event=event.id, - name=event.name, - custom_name=event.custom_name, - math=event.math, - math_property=event.math_property, - math_hogql=event.math_hogql, - math_group_type_index=event.math_group_type_index, - ) - for event in filter.events - ] - - actions: List[ActionsNode] = [ - ActionsNode( - id=action.id, - name=action.name, - custom_name=action.custom_name, - math=action.math, - math_property=action.math_property, - math_hogql=action.math_hogql, - math_group_type_index=action.math_group_type_index, - ) - for action in filter.actions - ] + events: List[EventsNode] = [] + actions: List[ActionsNode] = [] + + for event in filter.events: + if isinstance(event._data.get("properties", None), List): + properties = clean_entity_properties(event._data.get("properties", None)) + elif event._data.get("properties", None) is not None: + values = event._data.get("properties", None).get("values", None) + properties = clean_entity_properties(values) + else: + properties = None + + events.append( + EventsNode( + event=event.id, + name=event.name, + custom_name=event.custom_name, + math=event.math, + math_property=event.math_property, + math_hogql=event.math_hogql, + math_group_type_index=event.math_group_type_index, + properties=properties, + ) + ) + + for action in filter.actions: + if isinstance(action._data.get("properties", None), List): + properties = clean_entity_properties(action._data.get("properties", None)) + elif action._data.get("properties", None) is not None: + values = action._data.get("properties", None).get("values", None) + properties = clean_entity_properties(values) + else: + properties = None + + actions.append( + ActionsNode( + id=action.id, + name=action.name, + custom_name=action.custom_name, + math=action.math, + math_property=action.math_property, + math_hogql=action.math_hogql, + math_group_type_index=action.math_group_type_index, + properties=properties, + ) + ) series: List[EventsNode | ActionsNode] = [*events, *actions] @@ -167,7 +189,7 @@ def convert_filter_to_trends_query(filter: Filter) -> TrendsQuery: breakdown_group_type_index=filter.breakdown_group_type_index, breakdown_histogram_bin_count=filter.breakdown_histogram_bin_count, ), - properties=_props(filter), + properties=_props(filter.to_dict()), interval=filter.interval, trendsFilter=TrendsFilter( display=filter.display, diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 472e9e596ea49..77e7ba2d89faa 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -25,6 +25,7 @@ QueryPreviousPeriodDateRange, ) from posthog.models import Team +from posthog.models.action.action import Action from posthog.models.cohort.cohort import Cohort from posthog.models.filters.mixins.utils import cached_property from posthog.models.property_definition import PropertyDefinition @@ -245,7 +246,10 @@ def get_value(name: str, val: Any): series_object["breakdown_value"] = "all" if cohort_id == 0 else cohort_id else: remapped_label = get_value("breakdown_value", val) - if remapped_label == "": + if remapped_label == "" or remapped_label is None: + # Skip the "none" series if it doesn't have any data + if series_object["count"] == 0 and series_object.get("aggregated_value", 0) == 0: + continue remapped_label = "none" series_object["label"] = "{} - {}".format(series_object["label"], remapped_label) @@ -275,6 +279,10 @@ def query_previous_date_range(self): def series_event(self, series: EventsNode | ActionsNode) -> str | None: if isinstance(series, EventsNode): return series.event + if isinstance(series, ActionsNode): + # TODO: Can we load the Action in more efficiently? + action = Action.objects.get(pk=int(series.id), team=self.team) + return action.name return None def setup_series(self) -> List[SeriesWithExtras]: diff --git a/posthog/hogql_queries/utils/query_date_range.py b/posthog/hogql_queries/utils/query_date_range.py index c076b7ddd1214..d8897cc010803 100644 --- a/posthog/hogql_queries/utils/query_date_range.py +++ b/posthog/hogql_queries/utils/query_date_range.py @@ -81,9 +81,6 @@ def date_from(self) -> datetime: days=DEFAULT_DATE_FROM_DAYS ) - if not self.is_hourly: - date_from = date_from.replace(hour=0, minute=0, second=0, microsecond=0) - return date_from @cached_property @@ -156,7 +153,49 @@ def number_interval_periods(self) -> ast.Expr: def interval_period_string_as_hogql_constant(self) -> ast.Expr: return ast.Constant(value=self.interval_name) - def date_from_to_start_of_week_hogql(self) -> ast.Call: + # Returns whether we should wrap `date_from` with `toStartOf` dependent on the interval period + def use_start_of_interval(self): + if self._date_range is None or self._date_range.date_from is None: + return True + + _date_from, delta_mapping, _position = relative_date_parse_with_delta_mapping( + self._date_range.date_from, + self._team.timezone_info, + always_truncate=True, + now=self.now_with_timezone, + ) + + is_relative = delta_mapping is not None + interval = self._interval + + if not is_relative or not interval: + return True + + is_delta_hours = delta_mapping.get("hours", None) is not None + is_delta_days = delta_mapping.get("days", None) is not None + is_delta_weeks = delta_mapping.get("weeks", None) is not None + + if interval == IntervalType.hour: + return False + elif interval == IntervalType.day: + if is_delta_hours: + return False + else: + return True + elif interval == IntervalType.week: + if is_delta_hours or is_delta_days: + return False + else: + return True + elif interval == IntervalType.month: + if is_delta_hours or is_delta_days or is_delta_weeks: + return False + else: + return True + + return True + + def date_from_to_start_of_interval_hogql(self) -> ast.Call: match self.interval_name: case "hour": return ast.Call(name="toStartOfHour", args=[self.date_from_as_hogql()]) @@ -169,7 +208,7 @@ def date_from_to_start_of_week_hogql(self) -> ast.Call: case _: raise HogQLException(message="Unknown interval name") - def date_to_to_start_of_week_hogql(self) -> ast.Call: + def date_to_to_start_of_interval_hogql(self) -> ast.Call: match self.interval_name: case "hour": return ast.Call(name="toStartOfHour", args=[self.date_to_as_hogql()]) @@ -189,8 +228,11 @@ def to_placeholders(self) -> Dict[str, ast.Expr]: "number_interval_period": self.number_interval_periods(), "date_from": self.date_from_as_hogql(), "date_to": self.date_to_as_hogql(), - "date_from_start_of_interval": self.date_from_to_start_of_week_hogql(), - "date_to_start_of_interval": self.date_to_to_start_of_week_hogql(), + "date_from_start_of_interval": self.date_from_to_start_of_interval_hogql(), + "date_to_start_of_interval": self.date_to_to_start_of_interval_hogql(), + "date_from_with_adjusted_start_of_interval": self.date_from_to_start_of_interval_hogql() + if self.use_start_of_interval() + else self.date_from_as_hogql(), } def to_properties(self, field: Optional[List[str]] = None) -> List[ast.Expr]: From 79167f6a347185924a3af52402903256bf4f329d Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Wed, 22 Nov 2023 18:54:11 +0000 Subject: [PATCH 06/14] WIP --- .../hogql/database/schema/event_sessions.py | 16 +- posthog/hogql/property.py | 14 +- posthog/hogql/test/test_property.py | 23 + .../insights/trends/aggregation_operations.py | 96 +- .../insights/trends/breakdown.py | 7 +- .../hogql_queries/insights/trends/display.py | 43 +- .../insights/trends/query_builder.py | 107 +- .../test/__snapshots__/test_trends.ambr | 4098 +++++++++++++---- .../test/test_aggregation_operations.py | 10 +- .../insights/trends/test/test_trends.py | 103 +- .../insights/trends/trends_query_runner.py | 33 +- .../hogql_queries/utils/query_date_range.py | 14 +- .../utils/query_previous_period_date_range.py | 1 + posthog/queries/test/test_trends.py | 2 +- posthog/utils.py | 2 + 15 files changed, 3474 insertions(+), 1095 deletions(-) diff --git a/posthog/hogql/database/schema/event_sessions.py b/posthog/hogql/database/schema/event_sessions.py index e049b9dcaf103..b1df5e894ee1c 100644 --- a/posthog/hogql/database/schema/event_sessions.py +++ b/posthog/hogql/database/schema/event_sessions.py @@ -109,14 +109,26 @@ def _is_field_on_table(self, field: ast.Field) -> bool: def run(self, expr: ast.Expr) -> List[ast.Expr]: exprs_to_apply: List[ast.Expr] = [] + def should_add(fields: List[ast.Field]) -> bool: + for field in fields: + on_table = self._is_field_on_table(field) + if not on_table: + return False + + # Ignore comparisons on the `event` field for session durations + if field.chain[-1] == "event": + return False + + return True + if isinstance(expr, ast.And): for expression in expr.exprs: if not isinstance(expression, ast.CompareOperation): continue fields = GetFieldsTraverser(expression).fields - res = [self._is_field_on_table(field) for field in fields] - if all(res): + + if should_add(fields): exprs_to_apply.append(expression) elif isinstance(expr, ast.CompareOperation): exprs_to_apply.extend(self.run(ast.And(exprs=[expr]))) diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py index 410e464049f94..b7bd64dfabace 100644 --- a/posthog/hogql/property.py +++ b/posthog/hogql/property.py @@ -113,7 +113,9 @@ def property_to_expr( if property.type == "hogql": return parse_expr(property.key) - elif property.type == "event" or property.type == "feature" or property.type == "person": + elif ( + property.type == "event" or property.type == "feature" or property.type == "person" or property.type == "group" + ): if scope == "person" and property.type != "person": raise NotImplementedException( f"The '{property.type}' property filter only works in 'event' scope, not in '{scope}' scope" @@ -147,7 +149,13 @@ def property_to_expr( return ast.And(exprs=exprs) return ast.Or(exprs=exprs) - chain = ["person", "properties"] if property.type == "person" and scope != "person" else ["properties"] + if property.type == "person": + chain = ["person", "properties"] + elif property.type == "group": + chain = [f"group_{property.group_type_index}", "properties"] + else: + chain = ["properties"] + field = ast.Field(chain=chain + [property.key]) properties_field = ast.Field(chain=chain) @@ -288,7 +296,7 @@ def property_to_expr( right=ast.Constant(value=cohort.pk), ) - # TODO: Add support for these types "group", "recording", "behavioral", and "session" types + # TODO: Add support for these types "recording", "behavioral", and "session" types raise NotImplementedException( f"property_to_expr not implemented for filter type {type(property).__name__} and {property.type}" diff --git a/posthog/hogql/test/test_property.py b/posthog/hogql/test/test_property.py index c0ed528ea4da9..87a6d454bd6ad 100644 --- a/posthog/hogql/test/test_property.py +++ b/posthog/hogql/test/test_property.py @@ -65,6 +65,29 @@ def test_property_to_expr_hogql(self): ast.Constant(value=1), ) + def test_property_to_expr_group(self): + self.assertEqual( + self._property_to_expr({"type": "group", "group_type_index": 0, "key": "a", "value": "b"}), + self._parse_expr("group_0.properties.a = 'b'"), + ) + self.assertEqual( + self._property_to_expr({"type": "group", "group_type_index": 3, "key": "a", "value": "b"}), + self._parse_expr("group_3.properties.a = 'b'"), + ) + self.assertEqual( + self._parse_expr("group_0.properties.a = NULL OR (NOT JSONHas(group_0.properties, 'a'))"), + self._property_to_expr( + {"type": "group", "group_type_index": 0, "key": "a", "value": "b", "operator": "is_not_set"} + ), + ) + + with self.assertRaises(Exception) as e: + self._property_to_expr({"type": "group", "key": "a", "value": "b"}) + self.assertEqual( + str(e.exception), + "Missing required key group_type_index for property type group", + ) + def test_property_to_expr_event(self): self.assertEqual( self._property_to_expr({"key": "a", "value": "b"}), diff --git a/posthog/hogql_queries/insights/trends/aggregation_operations.py b/posthog/hogql_queries/insights/trends/aggregation_operations.py index 562576b27b5fb..2ba6cb73b2fb2 100644 --- a/posthog/hogql_queries/insights/trends/aggregation_operations.py +++ b/posthog/hogql_queries/insights/trends/aggregation_operations.py @@ -2,6 +2,7 @@ from posthog.hogql import ast from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.models.team.team import Team from posthog.schema import ActionsNode, EventsNode @@ -47,13 +48,19 @@ def replace_select_from(self, join_expr: ast.JoinExpr) -> None: class AggregationOperations: + team: Team series: EventsNode | ActionsNode query_date_range: QueryDateRange should_aggregate_values: bool def __init__( - self, series: EventsNode | ActionsNode, query_date_range: QueryDateRange, should_aggregate_values: bool + self, + team: Team, + series: EventsNode | ActionsNode, + query_date_range: QueryDateRange, + should_aggregate_values: bool, ) -> None: + self.team = team self.series = series self.query_date_range = query_date_range self.should_aggregate_values = should_aggregate_values @@ -64,7 +71,8 @@ def select_aggregation(self) -> ast.Expr: elif self.series.math == "total": return parse_expr("count(e.uuid)") elif self.series.math == "dau": - return parse_expr("count(DISTINCT e.person_id)") + actor = "e.distinct_id" if self.team.aggregate_users_by_distinct_id else "e.person.id" + return parse_expr(f"count(DISTINCT {actor})") elif self.series.math == "weekly_active": return ast.Placeholder(field="replaced") # This gets replaced when doing query orchestration elif self.series.math == "monthly_active": @@ -83,7 +91,7 @@ def select_aggregation(self) -> ast.Expr: elif self.series.math == "max": return self._math_func("max", None) elif self.series.math == "median": - return self._math_func("median", None) + return self._math_quantile(0.5, None) elif self.series.math == "p90": return self._math_quantile(0.9, None) elif self.series.math == "p95": @@ -99,9 +107,12 @@ def requires_query_orchestration(self) -> bool: "monthly_active", ] - return self._is_count_per_actor_variant() or self.series.math in math_to_return_true + return self.is_count_per_actor_variant() or self.series.math in math_to_return_true - def _is_count_per_actor_variant(self): + def aggregating_on_session_duration(self) -> bool: + return self.series.math_property == "$session_duration" + + def is_count_per_actor_variant(self): return self.series.math in [ "avg_count_per_actor", "min_count_per_actor", @@ -128,14 +139,17 @@ def _math_func(self, method: str, override_chain: Optional[List[str | int]]) -> ) if self.series.math_property == "$session_duration": - chain = ["session", "duration"] + chain = ["session_duration"] else: chain = ["properties", self.series.math_property] return ast.Call(name=method, args=[ast.Field(chain=chain)]) def _math_quantile(self, percentile: float, override_chain: Optional[List[str | int]]) -> ast.Call: - chain = ["properties", self.series.math_property] + if self.series.math_property == "$session_duration": + chain = ["session_duration"] + else: + chain = ["properties", self.series.math_property] return ast.Call( name="quantile", @@ -163,7 +177,7 @@ def _interval_placeholders(self): def _parent_select_query( self, inner_query: ast.SelectQuery | ast.SelectUnionQuery ) -> ast.SelectQuery | ast.SelectUnionQuery: - if self._is_count_per_actor_variant(): + if self.is_count_per_actor_variant(): query = parse_select( "SELECT total FROM {inner_query}", placeholders={"inner_query": inner_query}, @@ -181,19 +195,28 @@ def _parent_select_query( ), ) - query = parse_select( - """ + query = cast( + ast.SelectQuery, + parse_select( + """ SELECT counts AS total FROM {inner_query} - WHERE timestamp >= {date_from} AND timestamp <= {date_to} + WHERE timestamp >= {date_from_start_of_interval} AND timestamp <= {date_to} """, - placeholders={ - **self.query_date_range.to_placeholders(), - "inner_query": inner_query, - }, + placeholders={ + **self.query_date_range.to_placeholders(), + "inner_query": inner_query, + }, + ), ) - if not self.should_aggregate_values: + if self.should_aggregate_values: + query.select = [ + ast.Alias( + alias="total", expr=ast.Call(name="count", distinct=True, args=[ast.Field(chain=["actor_id"])]) + ) + ] + else: query.select.append(day_start) return query @@ -201,7 +224,7 @@ def _parent_select_query( def _inner_select_query( self, cross_join_select_query: ast.SelectQuery | ast.SelectUnionQuery ) -> ast.SelectQuery | ast.SelectUnionQuery: - if self._is_count_per_actor_variant(): + if self.is_count_per_actor_variant(): if self.series.math == "avg_count_per_actor": math_func = self._math_func("avg", ["total"]) elif self.series.math == "min_count_per_actor": @@ -209,7 +232,7 @@ def _inner_select_query( elif self.series.math == "max_count_per_actor": math_func = self._math_func("max", ["total"]) elif self.series.math == "median_count_per_actor": - math_func = self._math_func("median", ["total"]) + math_func = self._math_quantile(0.5, ["total"]) elif self.series.math == "p90_count_per_actor": math_func = self._math_quantile(0.9, ["total"]) elif self.series.math == "p95_count_per_actor": @@ -239,8 +262,10 @@ def _inner_select_query( return query - return parse_select( - """ + query = cast( + ast.SelectQuery, + parse_select( + """ SELECT d.timestamp, COUNT(DISTINCT actor_id) AS counts @@ -257,13 +282,20 @@ def _inner_select_query( GROUP BY d.timestamp ORDER BY d.timestamp """, - placeholders={ - **self.query_date_range.to_placeholders(), - **self._interval_placeholders(), - "cross_join_select_query": cross_join_select_query, - }, + placeholders={ + **self.query_date_range.to_placeholders(), + **self._interval_placeholders(), + "cross_join_select_query": cross_join_select_query, + }, + ), ) + if self.should_aggregate_values: + query.select = [ast.Field(chain=["d", "timestamp"]), ast.Field(chain=["actor_id"])] + query.group_by.append(ast.Field(chain=["actor_id"])) + + return query + def _events_query( self, events_where_clause: ast.Expr, sample_value: ast.RatioExpr ) -> ast.SelectQuery | ast.SelectUnionQuery: @@ -286,7 +318,7 @@ def _events_query( where_clause_combined = ast.And(exprs=[events_where_clause, *date_filters]) - if self._is_count_per_actor_variant(): + if self.is_count_per_actor_variant(): day_start = ast.Alias( alias="day_start", expr=ast.Call( @@ -302,11 +334,16 @@ def _events_query( FROM events AS e SAMPLE {sample} WHERE {events_where_clause} - GROUP BY e.person_id + GROUP BY {person_field} """, placeholders={ "events_where_clause": where_clause_combined, "sample": sample_value, + "person_field": ast.Field( + chain=["e", "distinct_id"] + if self.team.aggregate_users_by_distinct_id + else ["e", "person", "id"] + ), }, ) @@ -320,7 +357,7 @@ def _events_query( """ SELECT timestamp as timestamp, - e.person_id AS actor_id + {person_field} AS actor_id FROM events e SAMPLE {sample} @@ -332,6 +369,9 @@ def _events_query( placeholders={ "events_where_clause": where_clause_combined, "sample": sample_value, + "person_field": ast.Field( + chain=["e", "distinct_id"] if self.team.aggregate_users_by_distinct_id else ["e", "person", "id"] + ), }, ) diff --git a/posthog/hogql_queries/insights/trends/breakdown.py b/posthog/hogql_queries/insights/trends/breakdown.py index 29e1dde53dc96..d56aba837b350 100644 --- a/posthog/hogql_queries/insights/trends/breakdown.py +++ b/posthog/hogql_queries/insights/trends/breakdown.py @@ -86,7 +86,7 @@ def events_where_filter(self) -> ast.Expr | None: return None return ast.CompareOperation( - left=ast.Field(chain=["person_id"]), + left=ast.Field(chain=["person", "id"]), op=ast.CompareOperationOp.InCohort, right=ast.Constant(value=int(self.query.breakdown.breakdown)), ) @@ -101,6 +101,11 @@ def events_where_filter(self) -> ast.Expr | None: for v in self._get_breakdown_values ] + if len(compare_ops) == 1: + return compare_ops[0] + elif len(compare_ops) == 0: + return parse_expr("1 = 1") + return ast.Or(exprs=compare_ops) # return ast.CompareOperation( diff --git a/posthog/hogql_queries/insights/trends/display.py b/posthog/hogql_queries/insights/trends/display.py index cc6ed758356ba..eb5fcef9098d4 100644 --- a/posthog/hogql_queries/insights/trends/display.py +++ b/posthog/hogql_queries/insights/trends/display.py @@ -1,4 +1,5 @@ from posthog.hogql import ast +from posthog.hogql.parser import parse_select from posthog.schema import ChartDisplayType @@ -26,19 +27,39 @@ def wrap_inner_query(self, inner_query: ast.SelectQuery, breakdown_enabled: bool def should_wrap_inner_query(self) -> bool: return self.display_type == ChartDisplayType.ActionsLineGraphCumulative - def modify_outer_query(self, outer_query: ast.SelectQuery, inner_query: ast.SelectQuery) -> ast.SelectQuery: - if self.should_aggregate_values(): - return ast.SelectQuery( - select=[ - ast.Alias( - alias="total", - expr=ast.Call(name="sum", args=[ast.Field(chain=["count"])]), - ) - ], - select_from=ast.JoinExpr(table=inner_query), + def _build_aggregate_dates(self, dates_queries: ast.SelectUnionQuery) -> ast.Expr: + return parse_select( + """ + SELECT day_start + FROM ( + SELECT 1 as group_key, groupArray(day_start) as day_start + FROM ( + SELECT day_start + FROM {dates_queries} + ORDER BY day_start + ) + GROUP BY group_key ) + """, + placeholders={"dates_queries": dates_queries}, + ) + + def modify_outer_query( + self, outer_query: ast.SelectQuery, inner_query: ast.SelectQuery, dates_queries: ast.SelectUnionQuery + ) -> ast.SelectQuery: + if not self.should_aggregate_values(): + return outer_query - return outer_query + return ast.SelectQuery( + select=[ + ast.Alias( + alias="total", + expr=ast.Call(name="sum", args=[ast.Field(chain=["count"])]), + ), + ast.Alias(alias="date", expr=self._build_aggregate_dates(dates_queries)), + ], + select_from=ast.JoinExpr(table=inner_query), + ) def _get_cumulative_query(self, inner_query: ast.SelectQuery, breakdown_enabled: bool) -> ast.SelectQuery: if breakdown_enabled: diff --git a/posthog/hogql_queries/insights/trends/query_builder.py b/posthog/hogql_queries/insights/trends/query_builder.py index b4d00880f2a3f..9ceb9753a7bdc 100644 --- a/posthog/hogql_queries/insights/trends/query_builder.py +++ b/posthog/hogql_queries/insights/trends/query_builder.py @@ -54,13 +54,13 @@ def build_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: def build_persons_query(self) -> ast.SelectQuery: event_query = self._get_events_subquery(True) - event_query.select = [ast.Alias(alias="person_id", expr=ast.Field(chain=["e", "person_id"]))] + event_query.select = [ast.Alias(alias="person_id", expr=ast.Field(chain=["e", "person", "id"]))] event_query.group_by = None return event_query - def _get_date_subqueries(self) -> List[ast.SelectQuery]: - if not self._breakdown.enabled: + def _get_date_subqueries(self, ignore_breakdowns: bool = False) -> List[ast.SelectQuery]: + if not self._breakdown.enabled or ignore_breakdowns: return [ cast( ast.SelectQuery, @@ -111,7 +111,7 @@ def _get_date_subqueries(self) -> List[ast.SelectQuery]: coalesce(dateDiff({interval}, {date_from}, {date_to}), 0) ) UNION ALL - SELECT {date_from} AS day_start + SELECT {date_from_start_of_interval} AS day_start ) as ticks CROSS JOIN ( SELECT breakdown_value @@ -164,7 +164,9 @@ def _get_events_subquery(self, no_modifications: Optional[bool]) -> ast.SelectQu # No breakdowns and no complex series aggregation if ( - not self._breakdown.enabled and not self._aggregation_operation.requires_query_orchestration() + not self._breakdown.enabled + and not self._aggregation_operation.requires_query_orchestration() + and not self._aggregation_operation.aggregating_on_session_duration() ) or no_modifications is True: return default_query # Both breakdowns and complex series aggregation @@ -181,13 +183,60 @@ def _get_events_subquery(self, no_modifications: Optional[bool]) -> ast.SelectQu orchestrator.inner_select_query_builder.append_group_by(ast.Field(chain=["breakdown_value"])) orchestrator.parent_select_query_builder.append_select(ast.Field(chain=["breakdown_value"])) + if ( + self._aggregation_operation.should_aggregate_values + and not self._aggregation_operation.is_count_per_actor_variant() + ): + orchestrator.parent_select_query_builder.append_group_by(ast.Field(chain=["breakdown_value"])) return orchestrator.build() + # Breakdowns and session duration math property + elif self._breakdown.enabled and self._aggregation_operation.aggregating_on_session_duration(): + default_query.select = [ + ast.Alias( + alias="session_duration", expr=ast.Call(name="any", args=[ast.Field(chain=["session", "duration"])]) + ), + self._breakdown.column_expr(), + ] + + default_query.group_by.extend([ast.Field(chain=["session", "id"]), ast.Field(chain=["breakdown_value"])]) + + wrapper = self.session_duration_math_property_wrapper(default_query) + + if not self._trends_display.should_aggregate_values(): + default_query.select.append(day_start) + default_query.group_by.append(ast.Field(chain=["day_start"])) + + wrapper.select.append(ast.Field(chain=["day_start"])) + wrapper.group_by.append(ast.Field(chain=["day_start"])) + + wrapper.select.append(ast.Field(chain=["breakdown_value"])) + wrapper.group_by.append(ast.Field(chain=["breakdown_value"])) + + return wrapper # Just breakdowns elif self._breakdown.enabled: default_query.select.append(self._breakdown.column_expr()) default_query.group_by.append(ast.Field(chain=["breakdown_value"])) + # Just session duration math property + elif self._aggregation_operation.aggregating_on_session_duration(): + default_query.select = [ + ast.Alias( + alias="session_duration", expr=ast.Call(name="any", args=[ast.Field(chain=["session", "duration"])]) + ) + ] + default_query.group_by.append(ast.Field(chain=["session", "id"])) + + wrapper = self.session_duration_math_property_wrapper(default_query) + if not self._trends_display.should_aggregate_values(): + default_query.select.append(day_start) + default_query.group_by.append(ast.Field(chain=["day_start"])) + + wrapper.select.append(ast.Field(chain=["day_start"])) + wrapper.group_by.append(ast.Field(chain=["day_start"])) + + return wrapper # Just complex series aggregation elif self._aggregation_operation.requires_query_orchestration(): return self._aggregation_operation.get_query_orchestrator( @@ -211,17 +260,29 @@ def _outer_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: ), ) - query = self._trends_display.modify_outer_query(outer_query=query, inner_query=inner_query) + query = self._trends_display.modify_outer_query( + outer_query=query, + inner_query=inner_query, + dates_queries=ast.SelectUnionQuery(select_queries=self._get_date_subqueries(ignore_breakdowns=True)), + ) + + query.order_by = [ast.OrderExpr(expr=ast.Call(name="sum", args=[ast.Field(chain=["count"])]), order="DESC")] if self._breakdown.enabled: query.select.append( ast.Alias( alias="breakdown_value", - expr=ast.Call(name="ifNull", args=[ast.Field(chain=["breakdown_value"]), ast.Constant(value="")]), + expr=ast.Call( + name="ifNull", + args=[ + ast.Call(name="toString", args=[ast.Field(chain=["breakdown_value"])]), + ast.Constant(value=""), + ], + ), ) ) query.group_by = [ast.Field(chain=["breakdown_value"])] - query.order_by = [ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC")] + query.order_by.append(ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC")) return query @@ -305,8 +366,12 @@ def _events_filter(self, ignore_breakdowns: bool = False) -> ast.Expr: # Actions if isinstance(series, ActionsNode): - action = Action.objects.get(pk=int(series.id), team=self.team) - filters.append(action_to_expr(action)) + try: + action = Action.objects.get(pk=int(series.id), team=self.team) + filters.append(action_to_expr(action)) + except Action.DoesNotExist: + # If an action doesn't exist, we want to return no events + filters.append(parse_expr("1 = 2")) # Breakdown if not ignore_breakdowns: @@ -328,6 +393,24 @@ def _sample_value(self) -> ast.RatioExpr: return ast.RatioExpr(left=ast.Constant(value=self.query.samplingFactor)) + def session_duration_math_property_wrapper(self, default_query: ast.SelectQuery) -> ast.SelectQuery: + query = cast( + ast.SelectQuery, + parse_select( + """ + SELECT {aggregation_operation} AS total + FROM {default_query} + """, + placeholders={ + "aggregation_operation": self._aggregation_operation.select_aggregation(), + "default_query": default_query, + }, + ), + ) + + query.group_by = [] + return query + @cached_property def _breakdown(self): return Breakdown( @@ -341,7 +424,9 @@ def _breakdown(self): @cached_property def _aggregation_operation(self) -> AggregationOperations: - return AggregationOperations(self.series, self.query_date_range, self._trends_display.should_aggregate_values()) + return AggregationOperations( + self.team, self.series, self.query_date_range, self._trends_display.should_aggregate_values() + ) @cached_property def _trends_display(self) -> TrendsDisplay: diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index 73f69c14ff700..94ad628c418b7 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -90,6 +90,7 @@ GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -170,6 +171,7 @@ GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -202,7 +204,7 @@ ' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(breakdown_value, '') AS breakdown_value + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, day_start, @@ -214,7 +216,7 @@ FROM (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')) AS day_start) AS ticks + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start) AS ticks CROSS JOIN (SELECT breakdown_value FROM @@ -240,7 +242,7 @@ breakdown_value ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value - ORDER BY breakdown_value ASC + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -304,7 +306,7 @@ ' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(breakdown_value, '') AS breakdown_value + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, day_start, @@ -316,7 +318,7 @@ FROM (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')) AS day_start) AS ticks + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start) AS ticks CROSS JOIN (SELECT breakdown_value FROM @@ -342,7 +344,117 @@ breakdown_value ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value - ORDER BY breakdown_value ASC + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_breakdown_filtering_with_properties_in_new_format + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0)) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_breakdown_filtering_with_properties_in_new_format.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'second url'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'second url'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_breakdown_filtering_with_properties_in_new_format.2 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0)) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_breakdown_filtering_with_properties_in_new_format.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -366,16 +478,31 @@ # name: TestTrends.test_breakdown_weekly_active_users_aggregated.1 ' SELECT sum(count) AS total, - ifNull(breakdown_value, '') AS breakdown_value + + (SELECT day_start + FROM + (SELECT 1 AS group_key, + groupArray(day_start) AS day_start + FROM + (SELECT day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))) AS day_start) + ORDER BY day_start ASC) + GROUP BY group_key)) AS date, + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, breakdown_value FROM - (SELECT counts AS total, + (SELECT count(DISTINCT actor_id) AS total, breakdown_value FROM (SELECT d.timestamp, - count(DISTINCT e.actor_id) AS counts, + e.actor_id, e.breakdown_value FROM (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp @@ -397,13 +524,15 @@ breakdown_value) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) GROUP BY d.timestamp, + e.actor_id, e.breakdown_value ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0))) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + GROUP BY breakdown_value) GROUP BY breakdown_value ORDER BY breakdown_value ASC) GROUP BY breakdown_value - ORDER BY breakdown_value ASC + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -427,16 +556,31 @@ # name: TestTrends.test_breakdown_weekly_active_users_aggregated_materialized.1 ' SELECT sum(count) AS total, - ifNull(breakdown_value, '') AS breakdown_value + + (SELECT day_start + FROM + (SELECT 1 AS group_key, + groupArray(day_start) AS day_start + FROM + (SELECT day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))) AS day_start) + ORDER BY day_start ASC) + GROUP BY group_key)) AS date, + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, breakdown_value FROM - (SELECT counts AS total, + (SELECT count(DISTINCT actor_id) AS total, breakdown_value FROM (SELECT d.timestamp, - count(DISTINCT e.actor_id) AS counts, + e.actor_id, e.breakdown_value FROM (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp @@ -458,26 +602,70 @@ breakdown_value) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) GROUP BY d.timestamp, + e.actor_id, e.breakdown_value ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0))) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + GROUP BY breakdown_value) GROUP BY breakdown_value ORDER BY breakdown_value ASC) GROUP BY breakdown_value - ORDER BY breakdown_value ASC + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling +# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ' +--- +# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.1 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.2 ' SELECT groupArray(value) FROM - (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value, count(e.uuid) AS count FROM events AS e - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, + (SELECT cohortpeople.person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 21)) + GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, @@ -485,11 +673,11 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.1 +# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.3 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(breakdown_value, '') AS breakdown_value + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, day_start, @@ -499,47 +687,86 @@ ticks.day_start AS day_start, sec.breakdown_value FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')) AS day_start) AS ticks + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start) AS ticks CROSS JOIN (SELECT breakdown_value FROM - (SELECT [NULL, 'other_value', 'value'] AS breakdown_value) ARRAY + (SELECT [NULL, 'val'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value - FROM events AS e SAMPLE 1.0 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0))) - GROUP BY day_start, - breakdown_value) + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start, + breakdown_value + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts, + e.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, + (SELECT cohortpeople.person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 21)) + GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id, + breakdown_value) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp, + e.breakdown_value + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0))) GROUP BY day_start, breakdown_value ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value - ORDER BY breakdown_value ASC + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.2 +# name: TestTrends.test_breakdown_with_filter_groups_person_on_events ' SELECT groupArray(value) FROM - (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value, count(e.uuid) AS count FROM events AS e - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)) GROUP BY value ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, @@ -547,11 +774,11 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.3 +# name: TestTrends.test_breakdown_with_filter_groups_person_on_events.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(breakdown_value, '') AS breakdown_value + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, day_start, @@ -561,60 +788,270 @@ ticks.day_start AS day_start, sec.breakdown_value FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')) AS day_start) AS ticks + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start) AS ticks CROSS JOIN (SELECT breakdown_value FROM - (SELECT [NULL, 'other_value', 'value'] AS breakdown_value) ARRAY + (SELECT [NULL, 'uh', 'oh'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value - FROM events AS e SAMPLE 1.0 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0))) + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'uh'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'oh'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, breakdown_value ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value - ORDER BY breakdown_value ASC + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_filter_events_by_precalculated_cohort +# name: TestTrends.test_breakdown_with_filter_groups_person_on_events_v2 ' - SELECT count(DISTINCT person_id) - FROM cohortpeople + SELECT distinct_id, + person_id + FROM events WHERE team_id = 2 - AND cohort_id = 2 - AND version = NULL + AND distinct_id IN ('test_breakdown_d1', + 'test_breakdown_d2') + GROUP BY distinct_id, + person_id + ORDER BY if(distinct_id = 'test_breakdown_d1', -1, 0) ' --- -# name: TestTrends.test_filter_events_by_precalculated_cohort.1 +# name: TestTrends.test_breakdown_with_filter_groups_person_on_events_v2.1 ' - - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 2 - AND cohort_id = 2 - AND version = 0 + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 ' --- -# name: TestTrends.test_filter_events_by_precalculated_cohort.2 +# name: TestTrends.test_breakdown_with_filter_groups_person_on_events_v2.2 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'uh', 'oh'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'uh'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'oh'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'other_value', 'value'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1.0 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.2 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'other_value', 'value'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1.0 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_filter_events_by_precalculated_cohort + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ' +--- +# name: TestTrends.test_filter_events_by_precalculated_cohort.1 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_filter_events_by_precalculated_cohort.2 ' SELECT count(DISTINCT person_id) @@ -671,6 +1108,7 @@ GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -736,12 +1174,13 @@ GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_person_property_filtering +# name: TestTrends.test_filtering_by_multiple_groups_person_on_events ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -750,40 +1189,79 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - INNER JOIN - (SELECT person.id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'name'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___name, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 2), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_2 ON equals(e.`$group_2`, e__group_2.key) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__group_0.properties___industry, 'finance'), 0), ifNull(equals(e__group_2.properties___name, 'six'), 0))) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_person_property_filtering_clashing_with_event_property +# name: TestTrends.test_filtering_by_multiple_groups_person_on_events.1 + ' + /* user_id:0 request:_snapshot_ */ + SELECT person_id AS actor_id, + count() AS actor_value + FROM + (SELECT e.timestamp as timestamp, + e.person_id as person_id, + e.distinct_id as distinct_id, + e.team_id as team_id + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_2 + FROM groups + WHERE team_id = 2 + AND group_type_index = 2 + GROUP BY group_key) groups_2 ON "$group_2" == groups_2.group_key + WHERE team_id = 2 + AND event = 'sign up' + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-02 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-02 23:59:59', 'UTC') + AND ((has(['finance'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) + AND (has(['six'], replaceRegexpAll(JSONExtractRaw(group_properties_2, 'name'), '^"|"$', '')))) + AND notEmpty(e.person_id) ) + GROUP BY actor_id + ORDER BY actor_value DESC, + actor_id DESC + LIMIT 100 + OFFSET 0 + ' +--- +# name: TestTrends.test_filtering_with_group_props_person_on_events ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -792,40 +1270,1322 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview'), and(ifNull(equals(e__group_0.properties___industry, 'finance'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0))) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter + ' + SELECT groupArray(value) + FROM + (SELECT e__pdi__person.`properties___$some_prop` AS value, + count(e.uuid) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop`, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'filter_prop'), ''), 'null'), '^"|"$', '') AS properties___filter_prop + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.properties___filter_prop, 'filter_val'), 0)) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'some_val2', 'some_val'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start, + breakdown_value + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts, + e.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC'))), toIntervalDay(30)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id, + e__pdi__person.`properties___$some_prop` AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop`, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'filter_prop'), ''), 'null'), '^"|"$', '') AS properties___filter_prop + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.properties___filter_prop, 'filter_val'), 0), or(isNull(e__pdi__person.`properties___$some_prop`), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val2'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id, + breakdown_value) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(29))), 0)) + GROUP BY d.timestamp, + e.breakdown_value + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0))) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter_poe_v2 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'filter_prop'), ''), 'null'), '^"|"$', ''), 'filter_val'), 0)) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter_poe_v2.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'some_val2', 'some_val'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start, + breakdown_value + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts, + e.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC'))), toIntervalDay(30)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.person_id AS actor_id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'filter_prop'), ''), 'null'), '^"|"$', ''), 'filter_val'), 0), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', ''), 'some_val2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', ''), 'some_val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id, + breakdown_value) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(29))), 0)) + GROUP BY d.timestamp, + e.breakdown_value + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0))) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_non_deterministic_timezones + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2022-11-30 23:59:59', 6, 'US/Pacific')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2022-10-31 05:01:01', 6, 'US/Pacific')), assumeNotNull(parseDateTime64BestEffortOrNull('2022-11-30 23:59:59', 6, 'US/Pacific'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2022-10-31 05:01:01', 6, 'US/Pacific')), 0) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfWeek(toTimeZone(e.timestamp, 'US/Pacific'), 0) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2022-10-31 05:01:01', 6, 'US/Pacific')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), assumeNotNull(parseDateTime64BestEffortOrNull('2022-11-30 23:59:59', 6, 'US/Pacific'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_filtering_in_cohort_in_action + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ' +--- +# name: TestTrends.test_person_filtering_in_cohort_in_action.1 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_person_filtering_in_cohort_in_action.2 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, + (SELECT cohortpeople.person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 32)) + GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_filtering_in_cohort_in_action.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'value', 'other_value'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, + (SELECT cohortpeople.person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 32)) + GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ' +--- +# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.1 + ' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ' +--- +# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.2 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), in(e.person_id, + (SELECT cohortpeople.person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 33)) + GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))))) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'value', 'other_value'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), in(e.person_id, + (SELECT cohortpeople.person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 33)) + GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0)))), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_property_filtering + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_property_filtering_clashing_with_event_property + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_property_filtering_clashing_with_event_property.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'name'), ''), 'null'), '^"|"$', ''), '1'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_property_filtering_clashing_with_event_property_materialized + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + nullIf(nullIf(person.pmat_name, ''), 'null') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_property_filtering_clashing_with_event_property_materialized.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(nullIf(nullIf(e.mat_name, ''), 'null'), '1'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_person_property_filtering_materialized + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + nullIf(nullIf(person.pmat_name, ''), 'null') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_same_day_with_person_on_events_v2 + ' + + SELECT distinct_id, + person_id + FROM events + WHERE team_id = 2 + AND distinct_id IN ('distinctid1', + 'distinctid2') + GROUP BY distinct_id, + person_id + ORDER BY if(distinct_id = 'distinctid1', -1, 0) + ' +--- +# name: TestTrends.test_same_day_with_person_on_events_v2.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_same_day_with_person_on_events_v2.2 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(DISTINCT e.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 05:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 05:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 05:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.2 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.4 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily.5 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'Mac'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 05:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 05:00:00', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 05:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 06:01:01', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 06:01:01', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 06:01:01', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.2 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'America/Phoenix') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.4 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_minus_utc.5 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'Mac'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 05:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 05:00:00', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 05:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 22:01:01', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 22:01:01', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 22:01:01', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.2 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'Asia/Tokyo') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0))) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.3 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.4 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_daily_plus_utc.5 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'Mac'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_hourly_relative_from + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), toIntervalHour(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfHour(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 INNER JOIN - (SELECT person.id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_person_property_filtering_clashing_with_event_property.1 +# name: TestTrends.test_timezones_hourly_relative_from.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -834,23 +2594,24 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), toIntervalHour(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC'))) AS day_start UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + toStartOfHour(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'name'), ''), 'null'), '^"|"$', ''), '1'), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_person_property_filtering_clashing_with_event_property_materialized +# name: TestTrends.test_timezones_hourly_relative_from_minus_utc ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -859,12 +2620,12 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), toIntervalHour(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfHour(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -873,26 +2634,17 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - INNER JOIN - (SELECT person.id, - nullIf(nullIf(person.pmat_name, ''), 'null') AS properties___name - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix'))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_person_property_filtering_clashing_with_event_property_materialized.1 +# name: TestTrends.test_timezones_hourly_relative_from_minus_utc.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -901,23 +2653,24 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), toIntervalHour(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start + toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix'))) AS day_start UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + toStartOfHour(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(nullIf(nullIf(e.mat_name, ''), 'null'), '1'), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix'))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_person_property_filtering_materialized +# name: TestTrends.test_timezones_hourly_relative_from_plus_utc ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -926,12 +2679,12 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), toIntervalHour(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfHour(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -940,26 +2693,17 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - INNER JOIN - (SELECT person.id, - nullIf(nullIf(person.pmat_name, ''), 'null') AS properties___name - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo'))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily +# name: TestTrends.test_timezones_hourly_relative_from_plus_utc.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -968,23 +2712,24 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), toIntervalHour(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))) AS day_start + toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo'))) AS day_start UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + toStartOfHour(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo'))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily.1 +# name: TestTrends.test_timezones_weekly ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -993,30 +2738,24 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'UTC')), 0) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS day_start FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily.2 +# name: TestTrends.test_timezones_weekly.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -1025,43 +2764,24 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC')), 3), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT counts AS total, - toStartOfDay(timestamp) AS day_start - FROM - (SELECT d.timestamp, - count(DISTINCT e.actor_id) AS counts - FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))) AS numbers) AS d - CROSS JOIN - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) - GROUP BY timestamp, actor_id) AS e - WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) - GROUP BY d.timestamp - ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0))) + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'UTC')), 3) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 3) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'UTC')), 3)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily.3 +# name: TestTrends.test_timezones_weekly_minus_utc ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -1070,30 +2790,129 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))) AS day_start + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'America/Phoenix')), 0) AS day_start UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + toStartOfWeek(toTimeZone(e.timestamp, 'America/Phoenix'), 0) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'America/Phoenix')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily.4 +# name: TestTrends.test_timezones_weekly_minus_utc.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix')), 3), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'America/Phoenix')), 3) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfWeek(toTimeZone(e.timestamp, 'America/Phoenix'), 3) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'America/Phoenix')), 3)), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_weekly_plus_utc + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'Asia/Tokyo')), 0) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfWeek(toTimeZone(e.timestamp, 'Asia/Tokyo'), 0) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'Asia/Tokyo')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_timezones_weekly_plus_utc.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total + FROM + (SELECT sum(total) AS count, + day_start + FROM + (SELECT 0 AS total, + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo')), 3), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'Asia/Tokyo')), 3) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfWeek(toTimeZone(e.timestamp, 'Asia/Tokyo'), 3) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 03:00:00', 6, 'Asia/Tokyo')), 3)), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns ' SELECT groupArray(value) FROM - (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + (SELECT e__pdi__person.properties___email AS value, count(e.uuid) AS count FROM events AS e - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), isNotNull(e__pdi__person.properties___email) + or isNotNull('%@posthog.com%')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0)))) GROUP BY value ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, @@ -1101,11 +2920,11 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily.5 +# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(breakdown_value, '') AS breakdown_value + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, day_start, @@ -1115,18 +2934,18 @@ ticks.day_start AS day_start, sec.breakdown_value FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')) AS day_start) AS ticks + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start) AS ticks CROSS JOIN (SELECT breakdown_value FROM - (SELECT [NULL, 'Mac'] AS breakdown_value) ARRAY + (SELECT [NULL, 'test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + e__pdi__person.properties___email AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -1135,59 +2954,93 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), isNotNull(e__pdi__person.properties___email) + or isNotNull('%@posthog.com%')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))), or(isNull(e__pdi__person.properties___email), ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test@gmail.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test5@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test4@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test3@posthog.com'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, breakdown_value ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value - ORDER BY breakdown_value ASC + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_minus_utc +# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.2 ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total + SELECT groupArray(value) FROM - (SELECT sum(total) AS count, - day_start - FROM - (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) + (SELECT e__pdi__person.properties___email AS value, + count(e.uuid) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), isNull(e__pdi__person.properties___email) + and isNull('%@posthog.com%')))) + GROUP BY value + ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_minus_utc.1 +# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.3 ' SELECT groupArray(day_start) AS date, - groupArray(count) AS total + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, - day_start + day_start, + breakdown_value FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'America/Phoenix'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'test2@posthog.com'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + e__pdi__person.properties___email AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -1196,16 +3049,33 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), isNull(e__pdi__person.properties___email) + and isNull('%@posthog.com%'))), or(isNull(e__pdi__person.properties___email), ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_minus_utc.2 +# name: TestTrends.test_trends_aggregate_by_distinct_id ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -1214,43 +3084,24 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))) AS day_start - UNION ALL SELECT counts AS total, - toStartOfDay(timestamp) AS day_start - FROM - (SELECT d.timestamp, - count(DISTINCT e.actor_id) AS counts - FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix')))) AS numbers) AS d - CROSS JOIN - (SELECT toTimeZone(e.timestamp, 'America/Phoenix') AS timestamp, - e__pdi.person_id AS actor_id - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) - GROUP BY timestamp, actor_id) AS e - WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) - GROUP BY d.timestamp - ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0))) + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(DISTINCT e.distinct_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_minus_utc.3 +# name: TestTrends.test_trends_aggregate_by_distinct_id.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -1259,30 +3110,65 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(DISTINCT e.distinct_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0)) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_minus_utc.4 +# name: TestTrends.test_trends_aggregate_by_distinct_id.2 ' SELECT groupArray(value) FROM - (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + (SELECT e__pdi__person.`properties___$some_prop` AS value, count(e.uuid) AS count FROM events AS e - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY value ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, @@ -1290,11 +3176,11 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_minus_utc.5 +# name: TestTrends.test_trends_aggregate_by_distinct_id.3 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(breakdown_value, '') AS breakdown_value + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, day_start, @@ -1304,18 +3190,18 @@ ticks.day_start AS day_start, sec.breakdown_value FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')) AS day_start) AS ticks + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC'))) AS day_start) AS ticks CROSS JOIN (SELECT breakdown_value FROM - (SELECT [NULL, 'Mac'] AS breakdown_value) ARRAY + (SELECT [NULL, 'some_val'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + UNION ALL SELECT count(DISTINCT e.distinct_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + e__pdi__person.`properties___$some_prop` AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -1324,77 +3210,30 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(e__pdi__person.`properties___$some_prop`), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, breakdown_value ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value - ORDER BY breakdown_value ASC - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ' ---- -# name: TestTrends.test_timezones_daily_plus_utc - ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total - FROM - (SELECT sum(total) AS count, - day_start - FROM - (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ' ---- -# name: TestTrends.test_timezones_daily_plus_utc.1 - ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total - FROM - (SELECT sum(total) AS count, - day_start - FROM - (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'Asia/Tokyo'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_plus_utc.2 +# name: TestTrends.test_trends_aggregate_by_distinct_id.4 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -1403,43 +3242,37 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:03', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:03', 6, 'UTC'))) AS day_start UNION ALL SELECT counts AS total, toStartOfDay(timestamp) AS day_start FROM (SELECT d.timestamp, count(DISTINCT e.actor_id) AS counts FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo')))) AS numbers) AS d + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:03', 6, 'UTC'))), toIntervalDay(30)), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN - (SELECT toTimeZone(e.timestamp, 'Asia/Tokyo') AS timestamp, - e__pdi.person_id AS actor_id + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.distinct_id AS actor_id FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:03', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id) AS e - WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(29))), 0)) GROUP BY d.timestamp ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0))) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:03', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), 0))) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_plus_utc.3 +# name: TestTrends.test_trends_aggregate_by_distinct_id.5 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -1448,30 +3281,44 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) - GROUP BY day_start) + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC'))) AS day_start + UNION ALL SELECT counts AS total, + toStartOfDay(timestamp) AS day_start + FROM + (SELECT d.timestamp, + count(DISTINCT e.actor_id) AS counts + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.distinct_id AS actor_id + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), 0))) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_plus_utc.4 +# name: TestTrends.test_trends_aggregate_by_distinct_id.6 ' SELECT groupArray(value) FROM - (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS value, count(e.uuid) AS count FROM events AS e - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY value ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, @@ -1479,11 +3326,11 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_daily_plus_utc.5 +# name: TestTrends.test_trends_aggregate_by_distinct_id.7 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(breakdown_value, '') AS breakdown_value + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, day_start, @@ -1493,40 +3340,33 @@ ticks.day_start AS day_start, sec.breakdown_value FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')) AS day_start) AS ticks + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC'))) AS day_start) AS ticks CROSS JOIN (SELECT breakdown_value FROM - (SELECT [NULL, 'Mac'] AS breakdown_value) ARRAY + (SELECT [NULL] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value + UNION ALL SELECT count(DISTINCT e.distinct_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS breakdown_value FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', ''))) GROUP BY day_start, breakdown_value) GROUP BY day_start, breakdown_value ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value - ORDER BY breakdown_value ASC + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_hourly_relative_from +# name: TestTrends.test_trends_any_event_total_count ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -1535,30 +3375,24 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), toIntervalHour(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfHour(toTimeZone(e.timestamp, 'UTC')) AS day_start + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_hourly_relative_from.1 +# name: TestTrends.test_trends_any_event_total_count.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -1567,55 +3401,277 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), toIntervalHour(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC'))) AS day_start + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC'))) AS day_start UNION ALL SELECT count(e.uuid) AS total, - toStartOfHour(toTimeZone(e.timestamp, 'UTC')) AS day_start + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_hourly_relative_from_minus_utc +# name: TestTrends.test_trends_breakdown_cumulative + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_breakdown_cumulative.1 ' SELECT groupArray(day_start) AS date, - groupArray(count) AS total + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT day_start, + sum(count) OVER (PARTITION BY breakdown_value + ORDER BY day_start ASC) AS count, + breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'value', 'other_value'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC)) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_breakdown_cumulative_poe_v2 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_breakdown_cumulative_poe_v2.1 + ' + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT day_start, + sum(count) OVER (PARTITION BY breakdown_value + ORDER BY day_start ASC) AS count, + breakdown_value + FROM + (SELECT sum(total) AS count, + day_start, + breakdown_value + FROM + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'value', 'other_value'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT count(DISTINCT e.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC)) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:33', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown.1 + ' + SELECT sum(count) AS total, + + (SELECT day_start + FROM + (SELECT 1 AS group_key, + groupArray(day_start) AS day_start + FROM + (SELECT day_start + FROM + (SELECT 0 AS total, + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:33', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:33', 6, 'UTC')), 0) AS day_start) + ORDER BY day_start ASC) + GROUP BY group_key)) AS date, + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, - day_start + breakdown_value FROM - (SELECT 0 AS total, - minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), toIntervalHour(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfHour(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) + (SELECT quantile(0.5)(session_duration) AS total, + breakdown_value + FROM + (SELECT any(e__session.duration) AS session_duration, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT events.`$session_id` AS id, + dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration + FROM events + WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:33', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) + GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:33', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0))) + GROUP BY e__session.id, + breakdown_value) + GROUP BY breakdown_value) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown.2 + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown.3 + ' + SELECT sum(count) AS total, + + (SELECT day_start + FROM + (SELECT 1 AS group_key, + groupArray(day_start) AS day_start + FROM + (SELECT day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC'))) AS day_start) + ORDER BY day_start ASC) + GROUP BY group_key)) AS date, + ifNull(toString(breakdown_value), '') AS breakdown_value + FROM + (SELECT sum(total) AS count, + breakdown_value + FROM + (SELECT quantile(0.5)(session_duration) AS total, + breakdown_value + FROM + (SELECT any(e__session.duration) AS session_duration, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT events.`$session_id` AS id, + dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration + FROM events + WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) + GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0))) + GROUP BY e__session.id, + breakdown_value) + GROUP BY breakdown_value) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_hourly_relative_from_minus_utc.1 +# name: TestTrends.test_trends_compare_day_interval_relative_range ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -1624,23 +3680,24 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), toIntervalHour(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix'))) AS day_start + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC'))) AS day_start UNION ALL SELECT count(e.uuid) AS total, - toStartOfHour(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_hourly_relative_from_plus_utc +# name: TestTrends.test_trends_compare_day_interval_relative_range.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -1649,30 +3706,24 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), toIntervalHour(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-21 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfHour(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-21 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-21 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_hourly_relative_from_plus_utc.1 +# name: TestTrends.test_trends_compare_day_interval_relative_range.2 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -1681,123 +3732,168 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), toIntervalHour(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo'))) AS day_start + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC'))) AS day_start UNION ALL SELECT count(e.uuid) AS total, - toStartOfHour(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_weekly +# name: TestTrends.test_trends_count_per_user_average_aggregated ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total + SELECT sum(count) AS total, + + (SELECT day_start + FROM + (SELECT 1 AS group_key, + groupArray(day_start) AS day_start + FROM + (SELECT day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start) + ORDER BY day_start ASC) + GROUP BY group_key)) AS date FROM - (SELECT sum(total) AS count, - day_start + (SELECT sum(total) AS count FROM - (SELECT 0 AS total, - minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC')), 0), toIntervalWeek(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')), 0) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) + (SELECT total + FROM + (SELECT avg(total) AS total + FROM + (SELECT count(e.uuid) AS total + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) + GROUP BY e__pdi.person_id)))) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_weekly.1 +# name: TestTrends.test_trends_count_per_user_average_aggregated_poe_v2 ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total + SELECT sum(count) AS total, + + (SELECT day_start + FROM + (SELECT 1 AS group_key, + groupArray(day_start) AS day_start + FROM + (SELECT day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start) + ORDER BY day_start ASC) + GROUP BY group_key)) AS date FROM - (SELECT sum(total) AS count, - day_start + (SELECT sum(total) AS count FROM - (SELECT 0 AS total, - minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC')), 3), toIntervalWeek(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')), 3) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 3) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')), 3)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) + (SELECT total + FROM + (SELECT avg(total) AS total + FROM + (SELECT count(e.uuid) AS total + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) + GROUP BY e.person_id)))) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_weekly_minus_utc +# name: TestTrends.test_trends_count_per_user_average_aggregated_with_event_property_breakdown_with_sampling ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total + SELECT groupArray(value) FROM - (SELECT sum(total) AS count, - day_start - FROM - (SELECT 0 AS total, - minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix')), 0), toIntervalWeek(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'America/Phoenix')), 0) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfWeek(toTimeZone(e.timestamp, 'America/Phoenix'), 0) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'America/Phoenix')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video')) + GROUP BY value + ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_weekly_minus_utc.1 +# name: TestTrends.test_trends_count_per_user_average_aggregated_with_event_property_breakdown_with_sampling.1 ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total + SELECT sum(count) AS total, + + (SELECT day_start + FROM + (SELECT 1 AS group_key, + groupArray(day_start) AS day_start + FROM + (SELECT day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start) + ORDER BY day_start ASC) + GROUP BY group_key)) AS date, + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, - day_start + breakdown_value FROM - (SELECT 0 AS total, - minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix')), 3), toIntervalWeek(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'America/Phoenix')), 3) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfWeek(toTimeZone(e.timestamp, 'America/Phoenix'), 3) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'America/Phoenix')), 3)), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) + (SELECT total, + breakdown_value + FROM + (SELECT avg(total) AS total, + breakdown_value + FROM + (SELECT count(e.uuid) AS total, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '') AS breakdown_value + FROM events AS e SAMPLE 1.0 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(equals(e.event, 'viewed video'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), 'red'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), 'blue'), 0))), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) + GROUP BY e__pdi.person_id, + breakdown_value) + GROUP BY breakdown_value)) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_weekly_plus_utc +# name: TestTrends.test_trends_count_per_user_average_daily ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -1806,23 +3902,39 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo')), 0), toIntervalWeek(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'Asia/Tokyo')), 0) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfWeek(toTimeZone(e.timestamp, 'Asia/Tokyo'), 0) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'Asia/Tokyo')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) - GROUP BY day_start) + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT total, + day_start + FROM + (SELECT avg(total) AS total, + day_start + FROM + (SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) + GROUP BY e__pdi.person_id, + day_start) + GROUP BY day_start)) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_timezones_weekly_plus_utc.1 +# name: TestTrends.test_trends_count_per_user_average_daily_poe_v2 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -1831,122 +3943,66 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo')), 3), toIntervalWeek(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'Asia/Tokyo')), 3) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfWeek(toTimeZone(e.timestamp, 'Asia/Tokyo'), 3) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'Asia/Tokyo')), 3)), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-26 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ' ---- -# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns - ' - SELECT groupArray(value) - FROM - (SELECT e__pdi__person.properties___email AS value, - count(e.uuid) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - INNER JOIN - (SELECT person.id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), isNotNull(e__pdi__person.properties___email) - or isNotNull('%@posthog.com%')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0)))) - GROUP BY value - ORDER BY count DESC, value DESC) + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start + UNION ALL SELECT total, + day_start + FROM + (SELECT avg(total) AS total, + day_start + FROM + (SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) + GROUP BY e.person_id, + day_start) + GROUP BY day_start)) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.1 +# name: TestTrends.test_trends_per_day_cumulative ' SELECT groupArray(day_start) AS date, - groupArray(count) AS total, - breakdown_value + groupArray(count) AS total FROM - (SELECT sum(total) AS count, - day_start, - breakdown_value + (SELECT day_start, + sum(count) OVER ( + ORDER BY day_start ASC) AS count FROM - (SELECT 0 AS total, - ticks.day_start AS day_start, - sec.breakdown_value + (SELECT sum(total) AS count, + day_start FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')) AS day_start) AS ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'] AS breakdown_value) ARRAY - JOIN breakdown_value AS breakdown_value) AS sec - ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - e__pdi__person.properties___email AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - INNER JOIN - (SELECT person.id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), isNotNull(e__pdi__person.properties___email) - or isNotNull('%@posthog.com%')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))), ifNull(in(e__pdi__person.properties___email, ['test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com']), 0)) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY breakdown_value ASC + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC'))) AS day_start + UNION ALL SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC)) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.2 +# name: TestTrends.test_trends_person_breakdown_with_session_property_single_aggregate_math_and_breakdown ' SELECT groupArray(value) FROM - (SELECT e__pdi__person.properties___email AS value, + (SELECT e__pdi__person.`properties___$some_prop` AS value, count(e.uuid) AS count FROM events AS e INNER JOIN @@ -1958,9 +4014,7 @@ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) INNER JOIN (SELECT person.id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` FROM person WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), (SELECT person.id, max(person.version) AS version @@ -1968,7 +4022,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY value ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, @@ -1976,66 +4030,72 @@ allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.3 +# name: TestTrends.test_trends_person_breakdown_with_session_property_single_aggregate_math_and_breakdown.1 ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total, - breakdown_value + SELECT sum(count) AS total, + + (SELECT day_start + FROM + (SELECT 1 AS group_key, + groupArray(day_start) AS day_start + FROM + (SELECT day_start + FROM + (SELECT 0 AS total, + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0) AS day_start) + ORDER BY day_start ASC) + GROUP BY group_key)) AS date, + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, - day_start, breakdown_value FROM - (SELECT 0 AS total, - ticks.day_start AS day_start, - sec.breakdown_value + (SELECT quantile(0.5)(session_duration) AS total, + breakdown_value FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')) AS day_start) AS ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['test2@posthog.com'] AS breakdown_value) ARRAY - JOIN breakdown_value AS breakdown_value) AS sec - ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - e__pdi__person.properties___email AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - INNER JOIN - (SELECT person.id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), ifNull(in(e__pdi__person.properties___email, ['test2@posthog.com']), 0)) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) + (SELECT any(e__session.duration) AS session_duration, + e__pdi__person.`properties___$some_prop` AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT events.`$session_id` AS id, + dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration + FROM events + WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) + GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(e__pdi__person.`properties___$some_prop`), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'another_val'), 0))) + GROUP BY e__session.id, + breakdown_value) + GROUP BY breakdown_value) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC) GROUP BY breakdown_value - ORDER BY breakdown_value ASC + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trends_any_event_total_count +# name: TestTrends.test_trends_with_hogql_math ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -2044,175 +4104,102 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 12:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 12:01:01', 6, 'UTC')), 0) AS day_start + UNION ALL SELECT plus(avg(toFloat64OrNull(nullIf(nullIf(e.`$session_id`, ''), 'null'))), 1000) AS total, + toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS day_start FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 12:01:01', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trends_any_event_total_count.1 +# name: TestTrends.test_trends_with_session_property_single_aggregate_math ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total - FROM - (SELECT sum(total) AS count, - day_start + SELECT sum(count) AS total, + + (SELECT day_start FROM - (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ' ---- -# name: TestTrends.test_trends_breakdown_cumulative - ' - SELECT groupArray(value) - FROM - (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY value - ORDER BY count DESC, value DESC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ' ---- -# name: TestTrends.test_trends_breakdown_cumulative.1 - ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total, - ifNull(breakdown_value, '') AS breakdown_value + (SELECT 1 AS group_key, + groupArray(day_start) AS day_start + FROM + (SELECT day_start + FROM + (SELECT 0 AS total, + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0) AS day_start) + ORDER BY day_start ASC) + GROUP BY group_key)) AS date FROM - (SELECT day_start, - sum(count) OVER (PARTITION BY breakdown_value - ORDER BY day_start ASC) AS count, - breakdown_value + (SELECT sum(total) AS count FROM - (SELECT sum(total) AS count, - day_start, - breakdown_value + (SELECT quantile(0.5)(session_duration) AS total FROM - (SELECT 0 AS total, - ticks.day_start AS day_start, - sec.breakdown_value - FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')) AS day_start) AS ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT [NULL, 'value', 'other_value'] AS breakdown_value) ARRAY - JOIN breakdown_value AS breakdown_value) AS sec - ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + (SELECT any(e__session.duration) AS session_duration FROM events AS e SAMPLE 1 INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC)) - GROUP BY breakdown_value - ORDER BY breakdown_value ASC - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ' ---- -# name: TestTrends.test_trends_breakdown_cumulative_poe_v2 - ' - SELECT groupArray(value) - FROM - (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY value - ORDER BY count DESC, value DESC) + (SELECT events.`$session_id` AS id, + dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration + FROM events + WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) + GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY e__session.id))) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trends_breakdown_cumulative_poe_v2.1 +# name: TestTrends.test_trends_with_session_property_single_aggregate_math.1 ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total, - ifNull(breakdown_value, '') AS breakdown_value - FROM - (SELECT day_start, - sum(count) OVER (PARTITION BY breakdown_value - ORDER BY day_start ASC) AS count, - breakdown_value + SELECT sum(count) AS total, + + (SELECT day_start FROM - (SELECT sum(total) AS count, - day_start, - breakdown_value + (SELECT 1 AS group_key, + groupArray(day_start) AS day_start FROM - (SELECT 0 AS total, - ticks.day_start AS day_start, - sec.breakdown_value + (SELECT day_start FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')) AS day_start) AS ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT [NULL, 'value', 'other_value'] AS breakdown_value) ARRAY - JOIN breakdown_value AS breakdown_value) AS sec - ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC'))) AS day_start) + ORDER BY day_start ASC) + GROUP BY group_key)) AS date + FROM + (SELECT sum(total) AS count + FROM + (SELECT quantile(0.5)(session_duration) AS total + FROM + (SELECT any(e__session.duration) AS session_duration FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC)) - GROUP BY breakdown_value - ORDER BY breakdown_value ASC + INNER JOIN + (SELECT events.`$session_id` AS id, + dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration + FROM events + WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) + GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY e__session.id))) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trends_compare_day_interval_relative_range +# name: TestTrends.test_trends_with_session_property_total_volume_math ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -2221,23 +4208,36 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0) AS day_start + UNION ALL SELECT quantile(0.5)(session_duration) AS total, + day_start + FROM + (SELECT any(e__session.duration) AS session_duration, + toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT events.`$session_id` AS id, + dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration + FROM events + WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) + GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start, + e__session.id, + day_start) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trends_compare_day_interval_relative_range.1 +# name: TestTrends.test_trends_with_session_property_total_volume_math.1 ' SELECT groupArray(day_start) AS date, groupArray(count) AS total @@ -2246,113 +4246,204 @@ day_start FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-21 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 23:59:59', 6, 'UTC'))), 0)) AS numbers + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-21 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-21 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC'))) AS day_start + UNION ALL SELECT quantile(0.5)(session_duration) AS total, + day_start + FROM + (SELECT any(e__session.duration) AS session_duration, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT events.`$session_id` AS id, + dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration + FROM events + WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) + GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY day_start, + e__session.id, + day_start) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trends_compare_day_interval_relative_range.2 +# name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns + ' + SELECT groupArray(value) + FROM + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns.1 ' SELECT groupArray(day_start) AS date, - groupArray(count) AS total + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value FROM (SELECT sum(total) AS count, - day_start + day_start, + breakdown_value FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) + ticks.day_start AS day_start, + sec.breakdown_value + FROM + (SELECT minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')), 0), toIntervalWeek(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('week', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [NULL, 'value2', 'value1'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT quantile(0.5)(session_duration) AS total, + day_start, + breakdown_value + FROM + (SELECT any(e__session.duration) AS session_duration, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value, + toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT events.`$session_id` AS id, + dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration + FROM events + WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) + GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0))) + GROUP BY day_start, + e__session.id, + breakdown_value, + day_start) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trends_count_per_user_average_aggregated +# name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns.2 ' - SELECT sum(count) AS total + SELECT groupArray(value) FROM - (SELECT sum(total) AS count - FROM - (SELECT total - FROM - (SELECT avg(total) AS total - FROM - (SELECT count(e.uuid) AS total - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) - GROUP BY e__pdi.person_id)))) + (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + count(e.uuid) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:05', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) + GROUP BY value + ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trends_count_per_user_average_aggregated_poe_v2 +# name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns.3 ' - SELECT sum(count) AS total + SELECT groupArray(day_start) AS date, + groupArray(count) AS total, + ifNull(toString(breakdown_value), '') AS breakdown_value FROM - (SELECT sum(total) AS count + (SELECT sum(total) AS count, + day_start, + breakdown_value FROM - (SELECT total + (SELECT 0 AS total, + ticks.day_start AS day_start, + sec.breakdown_value FROM - (SELECT avg(total) AS total + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:05', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:05', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value FROM - (SELECT count(e.uuid) AS total - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) - GROUP BY e.person_id)))) + (SELECT [NULL, 'value2', 'value1'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC + UNION ALL SELECT quantile(0.5)(session_duration) AS total, + day_start, + breakdown_value + FROM + (SELECT any(e__session.duration) AS session_duration, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT events.`$session_id` AS id, + dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration + FROM events + WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:05', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) + GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:05', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0))) + GROUP BY day_start, + e__session.id, + breakdown_value, + day_start) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trends_count_per_user_average_daily +# name: TestTrends.test_weekly_active_users_aggregated_range_narrower_than_week ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total + SELECT sum(count) AS total, + + (SELECT day_start + FROM + (SELECT 1 AS group_key, + groupArray(day_start) AS day_start + FROM + (SELECT day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))) AS day_start) + ORDER BY day_start ASC) + GROUP BY group_key)) AS date FROM - (SELECT sum(total) AS count, - day_start + (SELECT sum(total) AS count FROM - (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT total, - day_start + (SELECT count(DISTINCT actor_id) AS total FROM - (SELECT avg(total) AS total, - day_start + (SELECT d.timestamp, + e.actor_id FROM - (SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -2361,74 +4452,118 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) - GROUP BY e__pdi.person_id, - day_start) - GROUP BY day_start)) - GROUP BY day_start - ORDER BY day_start ASC) + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp, + e.actor_id + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)))) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trends_count_per_user_average_daily_poe_v2 +# name: TestTrends.test_weekly_active_users_aggregated_range_wider_than_week ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total + SELECT sum(count) AS total, + + (SELECT day_start + FROM + (SELECT 1 AS group_key, + groupArray(day_start) AS day_start + FROM + (SELECT day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start) + ORDER BY day_start ASC) + GROUP BY group_key)) AS date FROM - (SELECT sum(total) AS count, - day_start + (SELECT sum(total) AS count FROM - (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT total, - day_start + (SELECT count(DISTINCT actor_id) AS total FROM - (SELECT avg(total) AS total, - day_start + (SELECT d.timestamp, + e.actor_id FROM - (SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) - GROUP BY e.person_id, - day_start) - GROUP BY day_start)) - GROUP BY day_start - ORDER BY day_start ASC) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp, + e.actor_id + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), 0)))) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ' --- -# name: TestTrends.test_trends_per_day_cumulative +# name: TestTrends.test_weekly_active_users_aggregated_range_wider_than_week_with_sampling ' - SELECT groupArray(day_start) AS date, - groupArray(count) AS total + SELECT sum(count) AS total, + + (SELECT day_start + FROM + (SELECT 1 AS group_key, + groupArray(day_start) AS day_start + FROM + (SELECT day_start + FROM + (SELECT 0 AS total, + minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT 0 AS total, + toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))) AS day_start) + ORDER BY day_start ASC) + GROUP BY group_key)) AS date FROM - (SELECT day_start, - sum(count) OVER ( - ORDER BY day_start ASC) AS count + (SELECT sum(total) AS count FROM - (SELECT sum(total) AS count, - day_start + (SELECT count(DISTINCT actor_id) AS total FROM - (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC)) + (SELECT d.timestamp, + e.actor_id + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id + FROM events AS e SAMPLE 1.0 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp, + e.actor_id + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), 0)))) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -2471,9 +4606,10 @@ WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) GROUP BY d.timestamp ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), 0))) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), 0))) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -2516,9 +4652,10 @@ WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) GROUP BY d.timestamp ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'America/Phoenix'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'America/Phoenix'))), 0))) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'America/Phoenix')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'America/Phoenix'))), 0))) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -2561,9 +4698,10 @@ WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) GROUP BY d.timestamp ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'Asia/Tokyo'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'Asia/Tokyo'))), 0))) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'Asia/Tokyo')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'Asia/Tokyo'))), 0))) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -2616,9 +4754,10 @@ WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) GROUP BY d.timestamp ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0))) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0))) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -2671,9 +4810,10 @@ WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) GROUP BY d.timestamp ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0))) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0))) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -2716,9 +4856,10 @@ WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) GROUP BY d.timestamp ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 06:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 17:00:00', 6, 'UTC'))), 0))) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 06:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 17:00:00', 6, 'UTC'))), 0))) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -2761,9 +4902,10 @@ WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) GROUP BY d.timestamp ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'UTC'))), 0))) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'UTC'))), 0))) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -2806,9 +4948,10 @@ WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) GROUP BY d.timestamp ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'America/Phoenix'))), 0))) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), 0)), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'America/Phoenix'))), 0))) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -2851,9 +4994,10 @@ WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) GROUP BY d.timestamp ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'Asia/Tokyo'))), 0))) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), 0)), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'Asia/Tokyo'))), 0))) GROUP BY day_start ORDER BY day_start ASC) + ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 diff --git a/posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py b/posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py index 06ae5974976ee..fb6a71f8df57d 100644 --- a/posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py +++ b/posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py @@ -96,10 +96,11 @@ def test_all_cases_return( ], math_property: str, ): + team = Team() series = EventsNode(event="$pageview", math=math, math_property=math_property) - query_date_range = QueryDateRange(date_range=None, interval=None, now=datetime.now(), team=Team()) + query_date_range = QueryDateRange(date_range=None, interval=None, now=datetime.now(), team=team) - agg_ops = AggregationOperations(series, query_date_range) + agg_ops = AggregationOperations(team, series, query_date_range, False) res = agg_ops.select_aggregation() assert isinstance(res, ast.Expr) @@ -140,9 +141,10 @@ def test_requiring_query_orchestration( ], result: bool, ): + team = Team() series = EventsNode(event="$pageview", math=math) - query_date_range = QueryDateRange(date_range=None, interval=None, now=datetime.now(), team=Team()) + query_date_range = QueryDateRange(date_range=None, interval=None, now=datetime.now(), team=team) - agg_ops = AggregationOperations(series, query_date_range) + agg_ops = AggregationOperations(team, series, query_date_range, False) res = agg_ops.requires_query_orchestration() assert res == result diff --git a/posthog/hogql_queries/insights/trends/test/test_trends.py b/posthog/hogql_queries/insights/trends/test/test_trends.py index 41fb058baeba0..2e7d0c3568e43 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends.py @@ -8,6 +8,7 @@ from django.test import override_settings from django.utils import timezone from freezegun import freeze_time +import pytest from rest_framework.exceptions import ValidationError from posthog.constants import ( @@ -254,7 +255,7 @@ def _create_event(self, **kwargs): ) def _create_person(self, **kwargs): - _create_person(**kwargs) + person = _create_person(**kwargs) props = kwargs.get("properties") if props is not None: for key, value in props.items(): @@ -275,6 +276,7 @@ def _create_person(self, **kwargs): property_type=type, type=PropertyDefinition.Type.PERSON, ) + return person def _create_group(self, **kwargs): create_group(**kwargs) @@ -487,6 +489,13 @@ def test_trends_per_day(self): # just make sure this doesn't error def test_no_props(self): + PropertyDefinition.objects.create( + team=self.team, + name="$some_property", + property_type="String", + type=PropertyDefinition.Type.EVENT, + ) + with freeze_time("2020-01-04T13:01:01Z"): self._run( Filter( @@ -548,6 +557,9 @@ def test_trends_per_day_cumulative(self): self.assertEqual(response[0]["labels"][5], "2-Jan-2020") self.assertEqual(response[0]["data"][5], 4.0) + @pytest.mark.skip( + reason="This gets very complicated. To be revisited with team on definition of what a unique video view is" + ) @snapshot_clickhouse_queries def test_trends_groups_per_day_cumulative(self): self._create_event_count_per_actor_events() @@ -596,11 +608,11 @@ def test_trends_breakdown_cumulative(self): self.assertEqual(response[0]["labels"][4], "1-Jan-2020") self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]) - self.assertEqual(response[1]["label"], "sign up - other_value") - self.assertEqual(response[1]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0]) + self.assertEqual(response[1]["label"], "sign up - value") + self.assertEqual(response[1]["data"], [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]) - self.assertEqual(response[2]["label"], "sign up - value") - self.assertEqual(response[2]["data"], [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]) + self.assertEqual(response[2]["label"], "sign up - other_value") + self.assertEqual(response[2]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0]) def test_trends_single_aggregate_dau(self): self._create_events() @@ -1415,7 +1427,7 @@ def test_trends_breakdown_with_session_property_single_aggregate_math_and_breakd ) # Fourth session lasted 15 seconds - with freeze_time("2020-01-04T13:00:01Z"): + with freeze_time("2020-01-04T13:00:33Z"): daily_response = self._run( Filter( team=self.team, @@ -1440,7 +1452,7 @@ def test_trends_breakdown_with_session_property_single_aggregate_math_and_breakd # empty has: 1 seconds self.assertEqual( [resp["breakdown_value"] for resp in daily_response], - ["value1", "value2", ""], + ["value2", "value1", "none"], ) self.assertEqual([resp["aggregated_value"] for resp in daily_response], [12.5, 10, 1]) @@ -2662,7 +2674,7 @@ def test_trends_with_hogql_math(self): timestamp="2020-01-02 00:06:45", ) - with freeze_time("2020-01-04T13:00:01Z"): + with freeze_time("2020-01-04T12:01:01Z"): response = self._run( Filter( team=self.team, @@ -2672,7 +2684,7 @@ def test_trends_with_hogql_math(self): { "id": "sign up", "math": "hogql", - "math_hogql": "avg(toInt(properties.$session_id)) + 1000", + "math_hogql": "avg(properties.$session_id) + 1000", } ], }, @@ -2939,7 +2951,7 @@ def test_trends_with_session_property_total_volume_math_with_breakdowns(self): self.team, ) - with freeze_time("2020-01-04T13:00:01Z"): + with freeze_time("2020-01-04T13:00:05Z"): weekly_response = self._run( Filter( team=self.team, @@ -2960,12 +2972,12 @@ def test_trends_with_session_property_total_volume_math_with_breakdowns(self): # value1 has 0,5,10 seconds (in second interval) # value2 has 5,10,15 seconds (in second interval) - self.assertEqual([resp["breakdown_value"] for resp in daily_response], ["value1", "value2"]) + self.assertEqual([resp["breakdown_value"] for resp in daily_response], ["value2", "value1"]) self.assertCountEqual(daily_response[0]["labels"], ["22-Dec-2019", "29-Dec-2019"]) self.assertCountEqual(daily_response[0]["data"], [0, 10]) self.assertCountEqual(daily_response[1]["data"], [0, 5]) - self.assertEqual([resp["breakdown_value"] for resp in weekly_response], ["value1", "value2"]) + self.assertEqual([resp["breakdown_value"] for resp in weekly_response], ["value2", "value1"]) self.assertCountEqual( weekly_response[0]["labels"], [ @@ -3750,6 +3762,16 @@ def _create_multiple_people(self): journeys_for(events_by_person=journey, team=self.team) + for key in ["order", "name"]: + exists = PropertyDefinition.objects.filter(team=self.team, name=key).exists() + if not exists: + PropertyDefinition.objects.create( + team=self.team, + name=key, + property_type="String", + type=PropertyDefinition.Type.EVENT, + ) + return (person1, person2, person3, person4) @also_test_with_materialized_columns(person_properties=["name"]) @@ -4585,7 +4607,7 @@ def test_trends_aggregate_by_distinct_id(self): self.assertEqual(daily_response[1]["label"], "sign up - none") # MAU - with freeze_time("2019-12-31T13:00:01Z"): + with freeze_time("2019-12-31T13:00:03Z"): monthly_response = self._run( Filter( team=self.team, @@ -4612,6 +4634,12 @@ def test_trends_aggregate_by_distinct_id(self): self.assertEqual(weekly_response[0]["data"][0], 3) # this would be 2 without the aggregate hack # Make sure breakdown doesn't cause us to join on pdi + PropertyDefinition.objects.create( + team=self.team, + name="$some_prop", + property_type="String", + type=PropertyDefinition.Type.EVENT, + ) with freeze_time("2019-12-31T13:00:01Z"): daily_response = self._run( Filter( @@ -4723,8 +4751,8 @@ def test_breakdown_filtering(self): ) self.assertEqual(response[0]["label"], "sign up - none") - self.assertEqual(response[2]["label"], "sign up - other_value") self.assertEqual(response[1]["label"], "sign up - value") + self.assertEqual(response[2]["label"], "sign up - other_value") self.assertEqual(response[3]["label"], "no events - none") self.assertEqual(sum(response[0]["data"]), 2) @@ -5087,10 +5115,10 @@ def test_mau_with_breakdown_filtering_and_prop_filter(self): self.assertEqual(event_response[0]["label"], "sign up - some_val") self.assertEqual(event_response[1]["label"], "sign up - some_val2") - self.assertEqual(sum(event_response[0]["data"]), 2) + self.assertEqual(sum(event_response[0]["data"]), 3) self.assertEqual(event_response[0]["data"][5], 1) - self.assertEqual(sum(event_response[1]["data"]), 2) + self.assertEqual(sum(event_response[1]["data"]), 3) self.assertEqual(event_response[1]["data"][5], 1) @also_test_with_materialized_columns(["$some_property"]) @@ -5353,6 +5381,9 @@ def test_person_filtering_in_cohort_in_action(self): if step: step.properties = [{"key": "id", "value": cohort.pk, "type": "cohort"}] step.save() + + cohort.calculate_people_ch(pending_version=0) + with freeze_time("2020-01-04T13:01:01Z"): action_response = self._run( Filter( @@ -6085,7 +6116,7 @@ def test_breakdown_weekly_active_users_daily(self): result = self._run(filter, self.team) self.assertEqual( result[0]["data"], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0], ) @also_test_with_materialized_columns(person_properties=["name"]) @@ -6231,6 +6262,8 @@ def test_breakdown_weekly_active_users_daily_based_on_action(self): ], ) + cohort.calculate_people_ch(pending_version=0) + data = { "date_from": "2020-01-01T00:00:00Z", "date_to": "2020-01-12T00:00:00Z", @@ -6249,7 +6282,7 @@ def test_breakdown_weekly_active_users_daily_based_on_action(self): result = self._run(filter, self.team) self.assertEqual( result[0]["data"], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0], ) @also_test_with_materialized_columns(["key"]) @@ -6430,8 +6463,8 @@ def test_breakdown_multiple_cohorts(self): self.team, ) - self.assertEqual(res[0]["count"], 2) - self.assertEqual(res[1]["count"], 1) + self.assertEqual(res[0]["count"], 1) + self.assertEqual(res[1]["count"], 2) @also_test_with_materialized_columns(person_properties=["key", "key_2"], verify_no_jsonextract=False) def test_breakdown_single_cohort(self): @@ -6529,11 +6562,12 @@ def test_filtering_with_action_props(self): self.assertEqual(response[0]["count"], 2) + @pytest.mark.skip(reason="We dont currently error out for this, but fallback instead. Good enough for now") def test_trends_math_without_math_property(self): with self.assertRaises(ValidationError): self._run(Filter(data={"events": [{"id": "sign up", "math": "sum"}]}), self.team) - @patch("posthog.queries.trends.trends.insight_sync_execute") + @patch("posthog.hogql_queries.insights.trends.trends_query_runner.execute_hogql_query") def test_should_throw_exception(self, patch_sync_execute): self._create_events() patch_sync_execute.side_effect = Exception() @@ -6940,6 +6974,7 @@ def test_non_deterministic_timezones(self): self.team.timezone = "US/Pacific" self.team.save() self._create_person(team_id=self.team.pk, distinct_ids=["blabla"], properties={}) + with freeze_time("2022-11-03T01:01:01Z"): self._create_event( team=self.team, @@ -7006,7 +7041,7 @@ def test_non_deterministic_timezones(self): team=self.team, data={ "date_from": "-30d", - "events": [{"id": "sign up", "name": "sign up", "math": "wau"}], + "events": [{"id": "sign up", "name": "sign up"}], "interval": "week", }, ), @@ -7183,7 +7218,7 @@ def test_same_day_with_person_on_events_v2(self): ), self.team, ) - self.assertEqual(response[0]["data"], [1.0]) + self.assertEqual(response[0]["data"], [2.0]) @override_settings(PERSON_ON_EVENTS_V2_OVERRIDE=True) @snapshot_clickhouse_queries @@ -7454,7 +7489,7 @@ def test_trends_count_per_user_average_with_event_property_breakdown(self): assert len(daily_response) == 3 assert daily_response[0]["breakdown_value"] == "red" assert daily_response[1]["breakdown_value"] == "blue" - assert daily_response[2]["breakdown_value"] == "" + assert daily_response[2]["breakdown_value"] == "none" assert daily_response[0]["days"] == [ "2020-01-01", "2020-01-02", @@ -7523,11 +7558,11 @@ def test_trends_count_per_user_average_aggregated_with_event_property_breakdown( assert len(daily_response) == 3 assert daily_response[0]["breakdown_value"] == "red" - assert daily_response[1]["breakdown_value"] == "blue" - assert daily_response[2]["breakdown_value"] == "" + assert daily_response[1]["breakdown_value"] == "none" + assert daily_response[2]["breakdown_value"] == "blue" assert daily_response[0]["aggregated_value"] == 2.0 # red - assert daily_response[1]["aggregated_value"] == 1.0 # blue - assert daily_response[2]["aggregated_value"] == 1.0 # none + assert daily_response[1]["aggregated_value"] == 1.0 # none + assert daily_response[2]["aggregated_value"] == 1.0 # blue @snapshot_clickhouse_queries def test_trends_count_per_user_average_aggregated_with_event_property_breakdown_with_sampling(self): @@ -7550,12 +7585,14 @@ def test_trends_count_per_user_average_aggregated_with_event_property_breakdown_ assert len(daily_response) == 3 assert daily_response[0]["breakdown_value"] == "red" - assert daily_response[1]["breakdown_value"] == "blue" - assert daily_response[2]["breakdown_value"] == "" + assert daily_response[1]["breakdown_value"] == "none" + assert daily_response[2]["breakdown_value"] == "blue" assert daily_response[0]["aggregated_value"] == 2.0 # red - assert daily_response[1]["aggregated_value"] == 1.0 # blue - assert daily_response[2]["aggregated_value"] == 1.0 # none + assert daily_response[1]["aggregated_value"] == 1.0 # none + assert daily_response[2]["aggregated_value"] == 1.0 # blue + # TODO: Add support for avg_count by group indexes (see this Slack thread for more context: https://posthog.slack.com/archives/C0368RPHLQH/p1700484174374229) + @pytest.mark.skip(reason="support for avg_count_per_actor not included yet") @snapshot_clickhouse_queries def test_trends_count_per_group_average_daily(self): self._create_event_count_per_actor_events() @@ -7603,6 +7640,8 @@ def test_trends_count_per_group_average_daily(self): 0.0, # No events at all ] + # TODO: Add support for avg_count by group indexes (see this Slack thread for more context: https://posthog.slack.com/archives/C0368RPHLQH/p1700484174374229) + @pytest.mark.skip(reason="support for avg_count_per_actor not included yet") @snapshot_clickhouse_queries def test_trends_count_per_group_average_aggregated(self): self._create_event_count_per_actor_events() diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 77e7ba2d89faa..8cbbf84be1a7a 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -160,21 +160,30 @@ def get_value(name: str, val: Any): res = [] for val in response.results: + try: + series_label = self.series_event(series.series) + except Action.DoesNotExist: + # Dont append the series if the action doesnt exist + continue + if series.aggregate_values: series_object = { "data": [], - "days": [], + "days": [ + item.strftime( + "%Y-%m-%d{}".format(" %H:%M:%S" if self.query_date_range.interval_name == "hour" else "") + ) + for item in get_value("date", val) + ], "count": 0, "aggregated_value": get_value("total", val), - "label": "All events" - if self.series_event(series.series) is None - else self.series_event(series.series), + "label": "All events" if series_label is None else series_label, "filter": self._query_to_filter(), "action": { # TODO: Populate missing props in `action` - "id": self.series_event(series.series), + "id": series_label, "type": "events", "order": 0, - "name": self.series_event(series.series) or "All events", + "name": series_label or "All events", "custom_name": None, "math": series.series.math, "math_property": None, @@ -199,15 +208,13 @@ def get_value(name: str, val: Any): for item in get_value("date", val) ], "count": float(sum(get_value("total", val))), - "label": "All events" - if self.series_event(series.series) is None - else self.series_event(series.series), + "label": "All events" if series_label is None else series_label, "filter": self._query_to_filter(), "action": { # TODO: Populate missing props in `action` - "id": self.series_event(series.series), + "id": series_label, "type": "events", "order": 0, - "name": self.series_event(series.series) or "All events", + "name": series_label or "All events", "custom_name": None, "math": series.series.math, "math_property": None, @@ -240,10 +247,10 @@ def get_value(name: str, val: Any): series_object["breakdown_value"] = remapped_label elif self.query.breakdown.breakdown_type == "cohort": cohort_id = get_value("breakdown_value", val) - cohort_name = "all users" if cohort_id == 0 else Cohort.objects.get(pk=cohort_id).name + cohort_name = "all users" if str(cohort_id) == "0" else Cohort.objects.get(pk=cohort_id).name series_object["label"] = "{} - {}".format(series_object["label"], cohort_name) - series_object["breakdown_value"] = "all" if cohort_id == 0 else cohort_id + series_object["breakdown_value"] = "all" if str(cohort_id) == "0" else int(cohort_id) else: remapped_label = get_value("breakdown_value", val) if remapped_label == "" or remapped_label is None: diff --git a/posthog/hogql_queries/utils/query_date_range.py b/posthog/hogql_queries/utils/query_date_range.py index d8897cc010803..68de2e7aae4e4 100644 --- a/posthog/hogql_queries/utils/query_date_range.py +++ b/posthog/hogql_queries/utils/query_date_range.py @@ -172,8 +172,6 @@ def use_start_of_interval(self): return True is_delta_hours = delta_mapping.get("hours", None) is not None - is_delta_days = delta_mapping.get("days", None) is not None - is_delta_weeks = delta_mapping.get("weeks", None) is not None if interval == IntervalType.hour: return False @@ -182,16 +180,8 @@ def use_start_of_interval(self): return False else: return True - elif interval == IntervalType.week: - if is_delta_hours or is_delta_days: - return False - else: - return True - elif interval == IntervalType.month: - if is_delta_hours or is_delta_days or is_delta_weeks: - return False - else: - return True + elif interval == IntervalType.week or interval == IntervalType.month: + return True return True diff --git a/posthog/hogql_queries/utils/query_previous_period_date_range.py b/posthog/hogql_queries/utils/query_previous_period_date_range.py index c127ac3e36d07..d4761fb34e855 100644 --- a/posthog/hogql_queries/utils/query_previous_period_date_range.py +++ b/posthog/hogql_queries/utils/query_previous_period_date_range.py @@ -60,6 +60,7 @@ def dates(self) -> Tuple[datetime, datetime]: self.date_from_delta_mappings(), self.date_to_delta_mappings(), self.interval_name, + True, ) return previous_period_date_from, previous_period_date_to diff --git a/posthog/queries/test/test_trends.py b/posthog/queries/test/test_trends.py index 63b7024d3d6bf..f948b4ca63b00 100644 --- a/posthog/queries/test/test_trends.py +++ b/posthog/queries/test/test_trends.py @@ -5439,7 +5439,7 @@ def test_person_filtering_in_cohort_in_action(self): if step: step.properties = [{"key": "id", "value": cohort.pk, "type": "cohort"}] step.save() - with freeze_time("2020-01-04T13:01:01Z"): + with freeze_time("2020-01-04T14:01:01Z"): action_response = Trends().run( Filter( team=self.team, diff --git a/posthog/utils.py b/posthog/utils.py index b510712ba5d89..f6a02e6807ea7 100644 --- a/posthog/utils.py +++ b/posthog/utils.py @@ -550,6 +550,7 @@ def get_compare_period_dates( date_from_delta_mapping: Optional[Dict[str, int]], date_to_delta_mapping: Optional[Dict[str, int]], interval: str, + ignore_date_from_alignment: bool = False, # New HogQL trends no longer requires the adjustment ) -> Tuple[datetime.datetime, datetime.datetime]: diff = date_to - date_from new_date_from = date_from - diff @@ -568,6 +569,7 @@ def get_compare_period_dates( and date_from_delta_mapping.get("days", None) and date_from_delta_mapping["days"] % 7 == 0 and not date_to_delta_mapping + and not ignore_date_from_alignment ): # KLUDGE: Unfortunately common relative date ranges such as "Last 7 days" (-7d) or "Last 14 days" (-14d) # are wrong because they treat the current ongoing day as an _extra_ one. This means that those ranges From 805f5e3ca1acf4f353e53087c717b4b036ed4b52 Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Thu, 23 Nov 2023 10:01:18 +0100 Subject: [PATCH 07/14] better error --- posthog/hogql/printer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/posthog/hogql/printer.py b/posthog/hogql/printer.py index 3bb0139b4b6f1..f041367d23071 100644 --- a/posthog/hogql/printer.py +++ b/posthog/hogql/printer.py @@ -871,7 +871,10 @@ def visit_field_type(self, type: ast.FieldType): field_sql = "person_props" else: - raise HogQLException(f"Unknown FieldType table type: {type.table_type.__class__.__name__}") + error = f"Can't access field '{type.name}' on a table with type '{type.table_type.__class__.__name__}'." + if isinstance(type.table_type, ast.LazyJoinType): + error += f" Lazy joins should have all been replaced in the resolver." + raise HogQLException(error) return field_sql From 0710024384eedd4557617c00d216396cb71bf4b1 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Thu, 23 Nov 2023 13:42:34 +0000 Subject: [PATCH 08/14] Fixed the final test --- .../hogql/database/schema/event_sessions.py | 20 +- .../test/__snapshots__/test_trends.ambr | 248 +++++++++++++++--- .../insights/trends/test/test_trends.py | 3 +- .../insights/trends/trends_query_runner.py | 2 +- 4 files changed, 235 insertions(+), 38 deletions(-) diff --git a/posthog/hogql/database/schema/event_sessions.py b/posthog/hogql/database/schema/event_sessions.py index b1df5e894ee1c..4b8a3a8580357 100644 --- a/posthog/hogql/database/schema/event_sessions.py +++ b/posthog/hogql/database/schema/event_sessions.py @@ -55,6 +55,18 @@ def visit_field(self, node: ast.Field): return super().visit_field(node) +class ContainsLazyJoinType(TraversingVisitor): + contains_lazy_join: bool + + def __init__(self, expr: ast.Expr): + super().__init__() + self.contains_lazy_join = False + super().visit(expr) + + def visit_lazy_join_type(self, node: ast.LazyJoinType): + self.contains_lazy_join = True + + class WhereClauseExtractor: compare_operators: List[ast.Expr] @@ -109,7 +121,7 @@ def _is_field_on_table(self, field: ast.Field) -> bool: def run(self, expr: ast.Expr) -> List[ast.Expr]: exprs_to_apply: List[ast.Expr] = [] - def should_add(fields: List[ast.Field]) -> bool: + def should_add(expression: ast.Expr, fields: List[ast.Field]) -> bool: for field in fields: on_table = self._is_field_on_table(field) if not on_table: @@ -119,6 +131,10 @@ def should_add(fields: List[ast.Field]) -> bool: if field.chain[-1] == "event": return False + # Ignroe if there's a lazy join involved + if ContainsLazyJoinType(expression).contains_lazy_join: + return False + return True if isinstance(expr, ast.And): @@ -128,7 +144,7 @@ def should_add(fields: List[ast.Field]) -> bool: fields = GetFieldsTraverser(expression).fields - if should_add(fields): + if should_add(expression, fields): exprs_to_apply.append(expression) elif isinstance(expr, ast.CompareOperation): exprs_to_apply.extend(self.run(ast.And(exprs=[expr]))) diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index 94ad628c418b7..bb70f8da7c8ab 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -509,7 +509,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id, + e__pdi__person.id AS actor_id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN @@ -519,6 +519,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'bor'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, breakdown_value) AS e @@ -587,7 +593,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id, + e__pdi__person.id AS actor_id, nullIf(nullIf(e.mat_key, ''), 'null') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN @@ -597,6 +603,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(isNull(nullIf(nullIf(e.mat_key, ''), 'null')), ifNull(equals(nullIf(nullIf(e.mat_key, ''), 'null'), 'val'), 0), ifNull(equals(nullIf(nullIf(e.mat_key, ''), 'null'), 'bor'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, breakdown_value) AS e @@ -708,7 +720,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id, + e__pdi__person.id AS actor_id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN @@ -945,7 +957,7 @@ (SELECT [NULL, 'other_value', 'value'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(DISTINCT e__pdi__person.id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value FROM events AS e SAMPLE 1.0 @@ -956,6 +968,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0))) GROUP BY day_start, breakdown_value) @@ -1007,7 +1025,7 @@ (SELECT [NULL, 'other_value', 'value'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(DISTINCT e__pdi__person.id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value FROM events AS e SAMPLE 1.0 @@ -1018,6 +1036,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0))) GROUP BY day_start, breakdown_value) @@ -1363,7 +1387,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC'))), toIntervalDay(30)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id, + e__pdi__person.id AS actor_id, e__pdi__person.`properties___$some_prop` AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN @@ -2012,7 +2036,7 @@ FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(DISTINCT e__pdi__person.id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN @@ -2022,6 +2046,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start @@ -2055,7 +2085,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -2064,6 +2094,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -2142,7 +2178,7 @@ (SELECT [NULL, 'Mac'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(DISTINCT e__pdi__person.id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value FROM events AS e SAMPLE 1 @@ -2153,6 +2189,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) GROUP BY day_start, breakdown_value) @@ -2205,7 +2247,7 @@ FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 06:01:01', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) AS numbers UNION ALL SELECT 0 AS total, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 06:01:01', 6, 'America/Phoenix'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(DISTINCT e__pdi__person.id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN @@ -2215,6 +2257,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 06:01:01', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start @@ -2248,7 +2296,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'America/Phoenix') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -2257,6 +2305,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -2335,7 +2389,7 @@ (SELECT [NULL, 'Mac'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(DISTINCT e__pdi__person.id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value FROM events AS e SAMPLE 1 @@ -2346,6 +2400,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 06:01:01', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) GROUP BY day_start, breakdown_value) @@ -2398,7 +2458,7 @@ FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 22:01:01', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers UNION ALL SELECT 0 AS total, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 22:01:01', 6, 'Asia/Tokyo'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(DISTINCT e__pdi__person.id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN @@ -2408,6 +2468,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 22:01:01', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start @@ -2441,7 +2507,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'Asia/Tokyo') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -2450,6 +2516,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -2528,7 +2600,7 @@ (SELECT [NULL, 'Mac'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(DISTINCT e__pdi__person.id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS breakdown_value FROM events AS e SAMPLE 1 @@ -2539,6 +2611,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 22:01:01', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) GROUP BY day_start, breakdown_value) @@ -2565,7 +2643,7 @@ FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), 0)) AS numbers UNION ALL SELECT 0 AS total, toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(DISTINCT e__pdi__person.id) AS total, toStartOfHour(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN @@ -2575,6 +2653,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start @@ -2624,7 +2708,7 @@ FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), 0)) AS numbers UNION ALL SELECT 0 AS total, toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(DISTINCT e__pdi__person.id) AS total, toStartOfHour(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN @@ -2634,6 +2718,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'America/Phoenix'))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start @@ -2683,7 +2773,7 @@ FROM numbers(coalesce(dateDiff('hour', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), 0)) AS numbers UNION ALL SELECT 0 AS total, toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo'))) AS day_start - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(DISTINCT e__pdi__person.id) AS total, toStartOfHour(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN @@ -2693,6 +2783,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'Asia/Tokyo'))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')) GROUP BY day_start) GROUP BY day_start @@ -3461,7 +3557,7 @@ (SELECT [NULL, 'value', 'other_value'] AS breakdown_value) ARRAY JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC - UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, + UNION ALL SELECT count(DISTINCT e__pdi__person.id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS breakdown_value FROM events AS e SAMPLE 1 @@ -3472,6 +3568,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:00:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start, breakdown_value) @@ -3783,8 +3885,14 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) - GROUP BY e__pdi.person_id)))) + GROUP BY e__pdi__person.id)))) ORDER BY sum(count) DESC LIMIT 100 SETTINGS readonly=2, max_execution_time=60, @@ -3880,8 +3988,14 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(equals(e.event, 'viewed video'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), 'red'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), 'blue'), 0))), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) - GROUP BY e__pdi.person_id, + GROUP BY e__pdi__person.id, breakdown_value) GROUP BY breakdown_value)) GROUP BY breakdown_value @@ -3922,8 +4036,14 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) - GROUP BY e__pdi.person_id, + GROUP BY e__pdi__person.id, day_start) GROUP BY day_start)) GROUP BY day_start @@ -4443,7 +4563,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4452,6 +4572,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -4495,7 +4621,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4504,6 +4630,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -4547,7 +4679,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1.0 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4556,6 +4688,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -4592,7 +4730,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4601,6 +4739,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -4638,7 +4782,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'America/Phoenix'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'America/Phoenix')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'America/Phoenix') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4647,6 +4791,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'America/Phoenix')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'America/Phoenix'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -4684,7 +4834,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'Asia/Tokyo'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'Asia/Tokyo')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'Asia/Tokyo') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4693,6 +4843,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 00:00:00', 6, 'Asia/Tokyo')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'Asia/Tokyo'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -4730,7 +4886,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4786,7 +4942,7 @@ FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4842,7 +4998,7 @@ FROM numbers(dateDiff('hour', minus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 06:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 17:00:00', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4851,6 +5007,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 06:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 17:00:00', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -4888,7 +5050,7 @@ FROM numbers(dateDiff('week', minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), 0), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'UTC')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4897,6 +5059,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -4934,7 +5102,7 @@ FROM numbers(dateDiff('week', minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), 0), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'America/Phoenix')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'America/Phoenix') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4943,6 +5111,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'America/Phoenix'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -4980,7 +5154,7 @@ FROM numbers(dateDiff('week', minus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), 0), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'Asia/Tokyo')))) AS numbers) AS d CROSS JOIN (SELECT toTimeZone(e.timestamp, 'Asia/Tokyo') AS timestamp, - e__pdi.person_id AS actor_id + e__pdi__person.id AS actor_id FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4989,6 +5163,12 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-18 23:59:59', 6, 'Asia/Tokyo'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) diff --git a/posthog/hogql_queries/insights/trends/test/test_trends.py b/posthog/hogql_queries/insights/trends/test/test_trends.py index 2e7d0c3568e43..f0b179b30ecf6 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends.py @@ -962,8 +962,8 @@ def test_unique_session_with_session_breakdown(self): self.assertEqual( [(item["breakdown_value"], item["count"], item["data"]) for item in response], [ - ("[0.0,4.95]", 1.0, [1.0, 0.0, 0.0, 0.0]), ("[4.95,10.05]", 2.0, [2.0, 0.0, 0.0, 0.0]), + ("[0.0,4.95]", 1.0, [1.0, 0.0, 0.0, 0.0]), ("[10.05,15.01]", 1.0, [0.0, 1.0, 0.0, 0.0]), ], ) @@ -7220,6 +7220,7 @@ def test_same_day_with_person_on_events_v2(self): ) self.assertEqual(response[0]["data"], [2.0]) + @pytest.mark.skip(reason="PoE V2 doesnt work with HogQL yet") @override_settings(PERSON_ON_EVENTS_V2_OVERRIDE=True) @snapshot_clickhouse_queries def test_same_day_with_person_on_events_v2_latest_override(self): diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 8cbbf84be1a7a..086f3720d5537 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -253,7 +253,7 @@ def get_value(name: str, val: Any): series_object["breakdown_value"] = "all" if str(cohort_id) == "0" else int(cohort_id) else: remapped_label = get_value("breakdown_value", val) - if remapped_label == "" or remapped_label is None: + if remapped_label == "" or remapped_label == '["",""]' or remapped_label is None: # Skip the "none" series if it doesn't have any data if series_object["count"] == 0 and series_object.get("aggregated_value", 0) == 0: continue From 9e94f88069aac7c5a0731f54b910392941f2c2d2 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Thu, 23 Nov 2023 13:47:09 +0000 Subject: [PATCH 09/14] Remove commented code --- posthog/hogql_queries/insights/trends/breakdown.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/posthog/hogql_queries/insights/trends/breakdown.py b/posthog/hogql_queries/insights/trends/breakdown.py index d56aba837b350..b723d38d8976a 100644 --- a/posthog/hogql_queries/insights/trends/breakdown.py +++ b/posthog/hogql_queries/insights/trends/breakdown.py @@ -108,12 +108,6 @@ def events_where_filter(self) -> ast.Expr | None: return ast.Or(exprs=compare_ops) - # return ast.CompareOperation( - # left=left, - # op=ast.CompareOperationOp.In, - # right=self._breakdown_values_ast, - # ) - @cached_property def _breakdown_buckets_ast(self) -> ast.Array: buckets = self._get_breakdown_histogram_buckets() From 0840df2f48443e50e46d05d8789c8e3162121f2d Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Thu, 23 Nov 2023 13:48:24 +0000 Subject: [PATCH 10/14] Update posthog/hogql/database/schema/event_sessions.py Co-authored-by: Marius Andra --- posthog/hogql/database/schema/event_sessions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/posthog/hogql/database/schema/event_sessions.py b/posthog/hogql/database/schema/event_sessions.py index 4b8a3a8580357..3660e87bda31d 100644 --- a/posthog/hogql/database/schema/event_sessions.py +++ b/posthog/hogql/database/schema/event_sessions.py @@ -131,7 +131,7 @@ def should_add(expression: ast.Expr, fields: List[ast.Field]) -> bool: if field.chain[-1] == "event": return False - # Ignroe if there's a lazy join involved + # Ignore if there's a lazy join involved if ContainsLazyJoinType(expression).contains_lazy_join: return False From f06857d0fd275834135b008ff5368a89f31ae5e8 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Thu, 23 Nov 2023 13:53:41 +0000 Subject: [PATCH 11/14] Fixed code formatting --- posthog/hogql_queries/insights/trends/trends_query_runner.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 9151355dfc687..18c0aa1efa29f 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -264,9 +264,7 @@ def get_value(name: str, val: Any): # If there's multiple series, include the object label in the series label if series_count > 1: - series_object["label"] = "{} - {}".format( - series_object["label"], remapped_label - ) + series_object["label"] = "{} - {}".format(series_object["label"], remapped_label) else: series_object["label"] = remapped_label From 3752dbaa52374a9f680d262b0856702e20a5da05 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Thu, 23 Nov 2023 16:35:02 +0000 Subject: [PATCH 12/14] Fixed tests --- .../schema/test/test_event_sessions.py | 26 +++++----- posthog/hogql/property.py | 19 ++++---- .../test/__snapshots__/test_trends.ambr | 40 +++++++--------- .../test/test_aggregation_operations.py | 1 + .../insights/trends/test/test_trends.py | 48 +++++++++---------- 5 files changed, 66 insertions(+), 68 deletions(-) diff --git a/posthog/hogql/database/schema/test/test_event_sessions.py b/posthog/hogql/database/schema/test/test_event_sessions.py index 268180a773e6c..88a2d6b3f3e30 100644 --- a/posthog/hogql/database/schema/test/test_event_sessions.py +++ b/posthog/hogql/database/schema/test/test_event_sessions.py @@ -30,7 +30,7 @@ def test_with_simple_equality_clause(self): """ SELECT event FROM events - WHERE event = '$pageview' + WHERE team_id = 1 """ ) @@ -38,9 +38,9 @@ def test_with_simple_equality_clause(self): assert len(compare_operators) == 1 assert compare_operators[0] == ast.CompareOperation( - left=ast.Field(chain=["event"]), + left=ast.Field(chain=["team_id"]), op=ast.CompareOperationOp.Eq, - right=ast.Constant(value="$pageview"), + right=ast.Constant(value=1), ) def test_with_timestamps(self): @@ -66,7 +66,7 @@ def test_with_alias_table(self): """ SELECT e.event FROM events e - WHERE e.event = '$pageview' + WHERE e.team_id = 1 """ ) @@ -74,9 +74,9 @@ def test_with_alias_table(self): assert len(compare_operators) == 1 assert compare_operators[0] == ast.CompareOperation( - left=ast.Field(chain=["event"]), + left=ast.Field(chain=["team_id"]), op=ast.CompareOperationOp.Eq, - right=ast.Constant(value="$pageview"), + right=ast.Constant(value=1), ) def test_with_multiple_clauses(self): @@ -84,7 +84,7 @@ def test_with_multiple_clauses(self): """ SELECT event FROM events - WHERE event = '$pageview' AND timestamp > '2023-01-01' + WHERE team_id = 1 AND timestamp > '2023-01-01' """ ) @@ -92,9 +92,9 @@ def test_with_multiple_clauses(self): assert len(compare_operators) == 2 assert compare_operators[0] == ast.CompareOperation( - left=ast.Field(chain=["event"]), + left=ast.Field(chain=["team_id"]), op=ast.CompareOperationOp.Eq, - right=ast.Constant(value="$pageview"), + right=ast.Constant(value=1), ) assert compare_operators[1] == ast.CompareOperation( left=ast.Field(chain=["timestamp"]), @@ -109,7 +109,7 @@ def test_with_join(self): FROM events e LEFT JOIN persons p ON e.person_id = p.id - WHERE e.event = '$pageview' and p.is_identified = 0 + WHERE e.team_id = 1 and p.is_identified = 0 """ ) @@ -117,9 +117,9 @@ def test_with_join(self): assert len(compare_operators) == 1 assert compare_operators[0] == ast.CompareOperation( - left=ast.Field(chain=["event"]), + left=ast.Field(chain=["team_id"]), op=ast.CompareOperationOp.Eq, - right=ast.Constant(value="$pageview"), + right=ast.Constant(value=1), ) def test_with_ignoring_ors(self): @@ -127,7 +127,7 @@ def test_with_ignoring_ors(self): """ SELECT event FROM events - WHERE event = '$pageleave' OR event = '$pageview' + WHERE team_id = 1 OR team_id = 2 """ ) diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py index 780ddc96489b9..1d78a4c87c076 100644 --- a/posthog/hogql/property.py +++ b/posthog/hogql/property.py @@ -122,6 +122,15 @@ def property_to_expr( ) operator = cast(Optional[PropertyOperator], property.operator) or PropertyOperator.exact value = property.value + + if property.type == "person" and scope != "person": + chain = ["person", "properties"] + elif property.type == "group": + chain = [f"group_{property.group_type_index}", "properties"] + else: + chain = ["properties"] + field = ast.Field(chain=chain + [property.key]) + if isinstance(value, list): if len(value) == 0: return ast.Constant(value=True) @@ -137,7 +146,7 @@ def property_to_expr( return ast.CompareOperation( op=op, - left=ast.Field(chain=["properties", property.key]), + left=field, right=ast.Tuple(exprs=[ast.Constant(value=v) for v in value]), ) else: @@ -158,14 +167,6 @@ def property_to_expr( return ast.And(exprs=exprs) return ast.Or(exprs=exprs) - if property.type == "person": - chain = ["person", "properties"] - elif property.type == "group": - chain = [f"group_{property.group_type_index}", "properties"] - else: - chain = ["properties"] - - field = ast.Field(chain=chain + [property.key]) properties_field = ast.Field(chain=chain) if operator == PropertyOperator.is_set: diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index bb70f8da7c8ab..f93e0ecddbc56 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -672,12 +672,12 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, - (SELECT cohortpeople.person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 21)) - GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version - HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) + WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), and(ifNull(in(e__pdi__person.properties___name, tuple('p1', 'p2', 'p3')), 0), ifNull(in(e__pdi.person_id, + (SELECT cohortpeople.person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 21)) + GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, @@ -740,12 +740,12 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, - (SELECT cohortpeople.person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 21)) - GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version - HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) + WHERE and(equals(e.team_id, 2), and(and(equals(e.event, '$pageview'), and(ifNull(in(e__pdi__person.properties___name, tuple('p1', 'p2', 'p3')), 0), ifNull(in(e__pdi.person_id, + (SELECT cohortpeople.person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 21)) + GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, breakdown_value) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -3007,8 +3007,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), isNotNull(e__pdi__person.properties___email) - or isNotNull('%@posthog.com%')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0)))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), 1), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0)))) GROUP BY value ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, @@ -3062,8 +3061,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), isNotNull(e__pdi__person.properties___email) - or isNotNull('%@posthog.com%')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))), or(isNull(e__pdi__person.properties___email), ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test@gmail.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test5@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test4@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test3@posthog.com'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), 1), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))), or(isNull(e__pdi__person.properties___email), ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test@gmail.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test5@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test4@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test3@posthog.com'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -3102,8 +3100,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), isNull(e__pdi__person.properties___email) - and isNull('%@posthog.com%')))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), 0))) GROUP BY value ORDER BY count DESC, value DESC) LIMIT 100 SETTINGS readonly=2, @@ -3157,8 +3154,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), isNull(e__pdi__person.properties___email) - and isNull('%@posthog.com%'))), or(isNull(e__pdi__person.properties___email), ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), 0)), or(isNull(e__pdi__person.properties___email), ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -4905,7 +4901,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(e__pdi__person.properties___name, 'person-1'), 0), ifNull(equals(e__pdi__person.properties___name, 'person-2'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), ifNull(in(e__pdi__person.properties___name, tuple('person-1', 'person-2')), 0)), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) GROUP BY d.timestamp @@ -4961,7 +4957,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(e__pdi__person.properties___name, 'person-1'), 0), ifNull(equals(e__pdi__person.properties___name, 'person-2'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), ifNull(in(e__pdi__person.properties___name, tuple('person-1', 'person-2')), 0)), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) GROUP BY d.timestamp diff --git a/posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py b/posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py index fb6a71f8df57d..e69eb3e96f8b7 100644 --- a/posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py +++ b/posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py @@ -86,6 +86,7 @@ def test_replace_select_from(self): ["hogql", None], ], ) +@pytest.mark.django_db def test_all_cases_return( math: Union[ BaseMathType, diff --git a/posthog/hogql_queries/insights/trends/test/test_trends.py b/posthog/hogql_queries/insights/trends/test/test_trends.py index f0b179b30ecf6..dcf71a3b7d2ca 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends.py @@ -604,14 +604,14 @@ def test_trends_breakdown_cumulative(self): self.team, ) - self.assertEqual(response[0]["label"], "sign up - none") + self.assertEqual(response[0]["label"], "none") self.assertEqual(response[0]["labels"][4], "1-Jan-2020") self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]) - self.assertEqual(response[1]["label"], "sign up - value") + self.assertEqual(response[1]["label"], "value") self.assertEqual(response[1]["data"], [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]) - self.assertEqual(response[2]["label"], "sign up - other_value") + self.assertEqual(response[2]["label"], "other_value") self.assertEqual(response[2]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0]) def test_trends_single_aggregate_dau(self): @@ -4103,7 +4103,7 @@ def test_breakdown_by_person_property(self): for response in event_response: if response["breakdown_value"] == "person1": self.assertEqual(response["count"], 1) - self.assertEqual(response["label"], "watched movie - person1") + self.assertEqual(response["label"], "person1") if response["breakdown_value"] == "person2": self.assertEqual(response["count"], 3) if response["breakdown_value"] == "person3": @@ -4144,7 +4144,7 @@ def test_breakdown_by_person_property_for_person_on_events(self): for response in event_response: if response["breakdown_value"] == "person1": self.assertEqual(response["count"], 1) - self.assertEqual(response["label"], "watched movie - person1") + self.assertEqual(response["label"], "person1") if response["breakdown_value"] == "person2": self.assertEqual(response["count"], 3) if response["breakdown_value"] == "person3": @@ -4211,7 +4211,7 @@ def test_breakdown_by_person_property_for_person_on_events_with_zero_person_ids( for response in event_response: if response["breakdown_value"] == "person1": self.assertEqual(response["count"], 1) - self.assertEqual(response["label"], "watched movie - person1") + self.assertEqual(response["label"], "person1") if response["breakdown_value"] == "person2": self.assertEqual(response["count"], 3) if response["breakdown_value"] == "person3": @@ -4602,9 +4602,9 @@ def test_trends_aggregate_by_distinct_id(self): self.team, ) self.assertEqual(daily_response[0]["data"][0], 2) - self.assertEqual(daily_response[0]["label"], "sign up - some_val") + self.assertEqual(daily_response[0]["label"], "some_val") self.assertEqual(daily_response[1]["data"][0], 1) - self.assertEqual(daily_response[1]["label"], "sign up - none") + self.assertEqual(daily_response[1]["label"], "none") # MAU with freeze_time("2019-12-31T13:00:03Z"): @@ -4811,9 +4811,9 @@ def test_breakdown_filtering_persons(self): ), self.team, ) - self.assertEqual(response[0]["label"], "sign up - none") - self.assertEqual(response[1]["label"], "sign up - test@gmail.com") - self.assertEqual(response[2]["label"], "sign up - test@posthog.com") + self.assertEqual(response[0]["label"], "none") + self.assertEqual(response[1]["label"], "test@gmail.com") + self.assertEqual(response[2]["label"], "test@posthog.com") self.assertEqual(response[0]["count"], 1) self.assertEqual(response[1]["count"], 1) @@ -4869,9 +4869,9 @@ def test_breakdown_filtering_persons_with_action_props(self): ), self.team, ) - self.assertEqual(response[0]["label"], "sign up - none") - self.assertEqual(response[1]["label"], "sign up - test@gmail.com") - self.assertEqual(response[2]["label"], "sign up - test@posthog.com") + self.assertEqual(response[0]["label"], "none") + self.assertEqual(response[1]["label"], "test@gmail.com") + self.assertEqual(response[2]["label"], "test@posthog.com") self.assertEqual(response[0]["count"], 1) self.assertEqual(response[1]["count"], 1) @@ -4945,8 +4945,8 @@ def test_breakdown_filtering_with_properties(self): ) response = sorted(response, key=lambda x: x["label"]) - self.assertEqual(response[0]["label"], "sign up - first url") - self.assertEqual(response[1]["label"], "sign up - second url") + self.assertEqual(response[0]["label"], "first url") + self.assertEqual(response[1]["label"], "second url") self.assertEqual(sum(response[0]["data"]), 1) self.assertEqual(response[0]["breakdown_value"], "first url") @@ -5028,7 +5028,7 @@ def test_breakdown_filtering_with_properties_in_new_format(self): ) response = sorted(response, key=lambda x: x["label"]) - self.assertEqual(response[0]["label"], "sign up - second url") + self.assertEqual(response[0]["label"], "second url") self.assertEqual(sum(response[0]["data"]), 1) self.assertEqual(response[0]["breakdown_value"], "second url") @@ -5112,8 +5112,8 @@ def test_mau_with_breakdown_filtering_and_prop_filter(self): self.team, ) - self.assertEqual(event_response[0]["label"], "sign up - some_val") - self.assertEqual(event_response[1]["label"], "sign up - some_val2") + self.assertEqual(event_response[0]["label"], "some_val") + self.assertEqual(event_response[1]["label"], "some_val2") self.assertEqual(sum(event_response[0]["data"]), 3) self.assertEqual(event_response[0]["data"][5], 1) @@ -5153,8 +5153,8 @@ def test_dau_with_breakdown_filtering(self): self.team, ) - self.assertEqual(event_response[1]["label"], "sign up - other_value") - self.assertEqual(event_response[2]["label"], "sign up - value") + self.assertEqual(event_response[1]["label"], "other_value") + self.assertEqual(event_response[2]["label"], "value") self.assertEqual(sum(event_response[1]["data"]), 1) self.assertEqual(event_response[1]["data"][5], 1) @@ -5198,8 +5198,8 @@ def test_dau_with_breakdown_filtering_with_sampling(self): self.team, ) - self.assertEqual(event_response[1]["label"], "sign up - other_value") - self.assertEqual(event_response[2]["label"], "sign up - value") + self.assertEqual(event_response[1]["label"], "other_value") + self.assertEqual(event_response[2]["label"], "value") self.assertEqual(sum(event_response[1]["data"]), 1) self.assertEqual(event_response[1]["data"][5], 1) @@ -5243,7 +5243,7 @@ def test_dau_with_breakdown_filtering_with_prop_filter(self): self.team, ) - self.assertEqual(event_response[0]["label"], "sign up - other_value") + self.assertEqual(event_response[0]["label"], "other_value") self.assertEqual(sum(event_response[0]["data"]), 1) self.assertEqual(event_response[0]["data"][5], 1) # property not defined From 922299b5c7b3138397e853038262bf2a41d0b5b1 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Thu, 23 Nov 2023 17:04:48 +0000 Subject: [PATCH 13/14] Fixed the final tests.. maybe --- .../trends/test/test_trends_query_runner.py | 104 ++++++++++-------- .../insights/trends/trends_query_runner.py | 8 +- 2 files changed, 64 insertions(+), 48 deletions(-) diff --git a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py index f7499741cd51e..7f736d59dfa31 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py @@ -419,14 +419,14 @@ def test_trends_breakdowns(self): breakdown_labels = [result["breakdown_value"] for result in response.results] assert len(response.results) == 4 - assert breakdown_labels == ["Chrome", "Edge", "Firefox", "Safari"] + assert breakdown_labels == ["Chrome", "Firefox", "Edge", "Safari"] assert response.results[0]["label"] == f"Chrome" - assert response.results[1]["label"] == f"Edge" - assert response.results[2]["label"] == f"Firefox" + assert response.results[1]["label"] == f"Firefox" + assert response.results[2]["label"] == f"Edge" assert response.results[3]["label"] == f"Safari" assert response.results[0]["count"] == 6 - assert response.results[1]["count"] == 1 - assert response.results[2]["count"] == 2 + assert response.results[1]["count"] == 2 + assert response.results[2]["count"] == 1 assert response.results[3]["count"] == 1 def test_trends_breakdowns_boolean(self): @@ -444,13 +444,13 @@ def test_trends_breakdowns_boolean(self): breakdown_labels = [result["breakdown_value"] for result in response.results] assert len(response.results) == 2 - assert breakdown_labels == ["false", "true"] + assert breakdown_labels == ["true", "false"] - assert response.results[0]["label"] == f"$pageview - false" - assert response.results[1]["label"] == f"$pageview - true" + assert response.results[0]["label"] == f"$pageview - true" + assert response.results[1]["label"] == f"$pageview - false" - assert response.results[0]["count"] == 3 - assert response.results[1]["count"] == 7 + assert response.results[0]["count"] == 7 + assert response.results[1]["count"] == 3 def test_trends_breakdowns_histogram(self): self._create_test_events() @@ -470,26 +470,23 @@ def test_trends_breakdowns_histogram(self): breakdown_labels = [result["breakdown_value"] for result in response.results] - assert len(response.results) == 5 + assert len(response.results) == 4 assert breakdown_labels == [ - '["",""]', "[10.0,17.5]", "[17.5,25.0]", "[25.0,32.5]", "[32.5,40.01]", ] - assert response.results[0]["label"] == '["",""]' - assert response.results[1]["label"] == "[10.0,17.5]" - assert response.results[2]["label"] == "[17.5,25.0]" - assert response.results[3]["label"] == "[25.0,32.5]" - assert response.results[4]["label"] == "[32.5,40.01]" + assert response.results[0]["label"] == "[10.0,17.5]" + assert response.results[1]["label"] == "[17.5,25.0]" + assert response.results[2]["label"] == "[25.0,32.5]" + assert response.results[3]["label"] == "[32.5,40.01]" - assert response.results[0]["data"] == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert response.results[1]["data"] == [0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0] - assert response.results[2]["data"] == [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0] - assert response.results[3]["data"] == [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0] - assert response.results[4]["data"] == [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + assert response.results[0]["data"] == [0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0] + assert response.results[1]["data"] == [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0] + assert response.results[2]["data"] == [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0] + assert response.results[3]["data"] == [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] def test_trends_breakdowns_cohort(self): self._create_test_events() @@ -553,14 +550,14 @@ def test_trends_breakdowns_hogql(self): breakdown_labels = [result["breakdown_value"] for result in response.results] assert len(response.results) == 4 - assert breakdown_labels == ["Chrome", "Edge", "Firefox", "Safari"] + assert breakdown_labels == ["Chrome", "Firefox", "Edge", "Safari"] assert response.results[0]["label"] == f"Chrome" - assert response.results[1]["label"] == f"Edge" - assert response.results[2]["label"] == f"Firefox" + assert response.results[1]["label"] == f"Firefox" + assert response.results[2]["label"] == f"Edge" assert response.results[3]["label"] == f"Safari" assert response.results[0]["count"] == 6 - assert response.results[1]["count"] == 1 - assert response.results[2]["count"] == 2 + assert response.results[1]["count"] == 2 + assert response.results[2]["count"] == 1 assert response.results[3]["count"] == 1 def test_trends_breakdowns_multiple_hogql(self): @@ -578,18 +575,18 @@ def test_trends_breakdowns_multiple_hogql(self): breakdown_labels = [result["breakdown_value"] for result in response.results] assert len(response.results) == 8 - assert breakdown_labels == ["Chrome", "Edge", "Firefox", "Safari", "Chrome", "Edge", "Firefox", "Safari"] + assert breakdown_labels == ["Chrome", "Firefox", "Edge", "Safari", "Chrome", "Edge", "Firefox", "Safari"] assert response.results[0]["label"] == f"$pageview - Chrome" - assert response.results[1]["label"] == f"$pageview - Edge" - assert response.results[2]["label"] == f"$pageview - Firefox" + assert response.results[1]["label"] == f"$pageview - Firefox" + assert response.results[2]["label"] == f"$pageview - Edge" assert response.results[3]["label"] == f"$pageview - Safari" assert response.results[4]["label"] == f"$pageleave - Chrome" assert response.results[5]["label"] == f"$pageleave - Edge" assert response.results[6]["label"] == f"$pageleave - Firefox" assert response.results[7]["label"] == f"$pageleave - Safari" assert response.results[0]["count"] == 6 - assert response.results[1]["count"] == 1 - assert response.results[2]["count"] == 2 + assert response.results[1]["count"] == 2 + assert response.results[2]["count"] == 1 assert response.results[3]["count"] == 1 assert response.results[4]["count"] == 3 assert response.results[5]["count"] == 1 @@ -615,21 +612,21 @@ def test_trends_breakdowns_and_compare(self): "Chrome", "Safari", "Chrome", - "Edge", "Firefox", + "Edge", ] assert response.results[0]["label"] == f"$pageview - Chrome" assert response.results[1]["label"] == f"$pageview - Safari" assert response.results[2]["label"] == f"$pageview - Chrome" - assert response.results[3]["label"] == f"$pageview - Edge" - assert response.results[4]["label"] == f"$pageview - Firefox" + assert response.results[3]["label"] == f"$pageview - Firefox" + assert response.results[4]["label"] == f"$pageview - Edge" assert response.results[0]["count"] == 3 assert response.results[1]["count"] == 1 assert response.results[2]["count"] == 3 - assert response.results[3]["count"] == 1 - assert response.results[4]["count"] == 2 + assert response.results[3]["count"] == 2 + assert response.results[4]["count"] == 1 assert response.results[0]["compare_label"] == "current" assert response.results[1]["compare_label"] == "current" @@ -658,11 +655,11 @@ def test_trends_breakdown_and_aggregation_query_orchestration(self): breakdown_labels = [result["breakdown_value"] for result in response.results] assert len(response.results) == 4 - assert breakdown_labels == ["Chrome", "Edge", "Firefox", "Safari"] + assert breakdown_labels == ["Chrome", "Firefox", "Safari", "Edge"] assert response.results[0]["label"] == f"Chrome" - assert response.results[1]["label"] == f"Edge" - assert response.results[2]["label"] == f"Firefox" - assert response.results[3]["label"] == f"Safari" + assert response.results[1]["label"] == f"Firefox" + assert response.results[2]["label"] == f"Safari" + assert response.results[3]["label"] == f"Edge" assert response.results[0]["data"] == [ 0, @@ -679,10 +676,10 @@ def test_trends_breakdown_and_aggregation_query_orchestration(self): 0, ] assert response.results[1]["data"] == [ + 20, 0, 0, - 0, - 30, + 20, 0, 0, 0, @@ -693,14 +690,14 @@ def test_trends_breakdown_and_aggregation_query_orchestration(self): 0, ] assert response.results[2]["data"] == [ - 20, 0, 0, - 20, 0, 0, 0, 0, + 40, + 0, 0, 0, 0, @@ -710,10 +707,10 @@ def test_trends_breakdown_and_aggregation_query_orchestration(self): 0, 0, 0, + 30, 0, 0, 0, - 40, 0, 0, 0, @@ -922,7 +919,20 @@ def test_trends_display_aggregate(self): assert len(response.results) == 1 assert response.results[0]["data"] == [] - assert response.results[0]["days"] == [] + assert response.results[0]["days"] == [ + "2020-01-09", + "2020-01-10", + "2020-01-11", + "2020-01-12", + "2020-01-13", + "2020-01-14", + "2020-01-15", + "2020-01-16", + "2020-01-17", + "2020-01-18", + "2020-01-19", + "2020-01-20", + ] assert response.results[0]["count"] == 0 assert response.results[0]["aggregated_value"] == 10 diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 18c0aa1efa29f..76f204c8a310f 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -246,6 +246,12 @@ def get_value(name: str, val: Any): if self._is_breakdown_field_boolean(): remapped_label = self._convert_boolean(get_value("breakdown_value", val)) + if remapped_label == "" or remapped_label == '["",""]' or remapped_label is None: + # Skip the "none" series if it doesn't have any data + if series_object["count"] == 0 and series_object.get("aggregated_value", 0) == 0: + continue + remapped_label = "none" + series_object["label"] = "{} - {}".format(series_object["label"], remapped_label) series_object["breakdown_value"] = remapped_label elif self.query.breakdown.breakdown_type == "cohort": @@ -407,7 +413,7 @@ def _is_breakdown_field_boolean(self): return field_type == "Boolean" def _convert_boolean(self, value: Any): - bool_map = {1: "true", 0: "false", "": ""} + bool_map = {1: "true", 0: "false", "": "", "1": "true", "0": "false"} return bool_map.get(value) or value def _event_property( From 75fe0220dfc4fc657102dc58998aee4f00b953a7 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 23 Nov 2023 17:14:41 +0000 Subject: [PATCH 14/14] Update query snapshots --- .../trends/test/__snapshots__/test_trends.ambr | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index f93e0ecddbc56..97bfe2fae2e91 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -84,7 +84,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(e__pdi__person.`properties___$bool_prop`, 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 1)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 4)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY day_start) @@ -165,7 +165,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$bool_prop'), ''), 'null'), '^"|"$', ''), 'x'), 0), and(equals(e.event, 'sign up'), in(e.person_id, (SELECT cohortpeople.person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 5)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))))) GROUP BY day_start) @@ -675,7 +675,7 @@ WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), and(ifNull(in(e__pdi__person.properties___name, tuple('p1', 'p2', 'p3')), 0), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 21)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 24)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -743,7 +743,7 @@ WHERE and(equals(e.team_id, 2), and(and(equals(e.event, '$pageview'), and(ifNull(in(e__pdi__person.properties___name, tuple('p1', 'p2', 'p3')), 0), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 21)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 24)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, @@ -1560,7 +1560,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 32)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 35)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -1607,7 +1607,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 32)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 35)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start, @@ -1652,7 +1652,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), in(e.person_id, (SELECT cohortpeople.person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 33)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 36)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))))) GROUP BY value @@ -1692,7 +1692,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 13:01:01', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), in(e.person_id, (SELECT cohortpeople.person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 33)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 36)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0)))), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start,