From c6173d45873ab4cbaf9030092a2a3aaf529b5d71 Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Fri, 13 Oct 2023 15:07:11 +0200 Subject: [PATCH] feat(database): faster queries from the persons table (#17811) --- frontend/src/queries/schema.json | 5 + frontend/src/queries/schema.ts | 3 +- frontend/src/scenes/debug/HogQLDebug.tsx | 22 +- posthog/hogql/database/models.py | 7 +- .../hogql/database/schema/cohort_people.py | 3 +- posthog/hogql/database/schema/groups.py | 7 +- posthog/hogql/database/schema/log_entries.py | 5 +- .../database/schema/person_distinct_ids.py | 7 +- .../hogql/database/schema/person_overrides.py | 7 +- posthog/hogql/database/schema/persons.py | 59 ++- posthog/hogql/database/schema/persons_pdi.py | 8 +- .../database/schema/session_replay_events.py | 3 +- posthog/hogql/modifiers.py | 3 + .../hogql/test/__snapshots__/test_query.ambr | 401 ++++++++++++++++++ posthog/hogql/test/test_modifiers.py | 13 +- posthog/hogql/test/test_printer.py | 67 ++- posthog/hogql/test/test_query.py | 276 ++++-------- posthog/hogql/test/utils.py | 11 + posthog/hogql/transforms/lazy_tables.py | 4 +- .../test/__snapshots__/test_lazy_tables.ambr | 194 +++++++++ .../__snapshots__/test_property_types.ambr | 76 ++++ .../hogql/transforms/test/test_lazy_tables.py | 143 ++----- .../transforms/test/test_property_types.py | 71 +--- posthog/schema.py | 14 +- posthog/warehouse/models/view_link.py | 5 +- 25 files changed, 1001 insertions(+), 413 deletions(-) create mode 100644 posthog/hogql/test/__snapshots__/test_query.ambr create mode 100644 posthog/hogql/test/utils.py create mode 100644 posthog/hogql/transforms/test/__snapshots__/test_lazy_tables.ambr create mode 100644 posthog/hogql/transforms/test/__snapshots__/test_property_types.ambr diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 2bad6b4aecc86..fb40ba56fdea5 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -1235,7 +1235,12 @@ "additionalProperties": false, "description": "HogQL Query Options are automatically set per team. However, they can be overriden in the query.", "properties": { + "personsArgMaxVersion": { + "enum": ["v1", "v2"], + "type": "string" + }, "personsOnEventsMode": { + "enum": ["disabled", "v1_enabled", "v2_enabled"], "type": "string" } }, diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index c312e54709501..6d0168d21f1fd 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -132,7 +132,8 @@ export interface DataNode extends Node { /** HogQL Query Options are automatically set per team. However, they can be overriden in the query. */ export interface HogQLQueryModifiers { - personsOnEventsMode?: string + personsOnEventsMode?: 'disabled' | 'v1_enabled' | 'v2_enabled' + personsArgMaxVersion?: 'v1' | 'v2' } export interface HogQLQueryResponse { diff --git a/frontend/src/scenes/debug/HogQLDebug.tsx b/frontend/src/scenes/debug/HogQLDebug.tsx index 24861251e371a..17e35a65d4c10 100644 --- a/frontend/src/scenes/debug/HogQLDebug.tsx +++ b/frontend/src/scenes/debug/HogQLDebug.tsx @@ -25,9 +25,9 @@ export function HogQLDebug({ query, setQuery }: HogQLDebugProps): JSX.Element { -
+
- POE: + POE Version: + + + Persons ArgMax Version + + setQuery({ + ...query, + modifiers: { ...query.modifiers, personsArgMaxVersion: value }, + } as HogQLQuery) + } + value={query.modifiers?.personsArgMaxVersion ?? response?.modifiers?.personsArgMaxVersion} />
diff --git a/posthog/hogql/database/models.py b/posthog/hogql/database/models.py index e5283eb68142e..9c7fcac1e8703 100644 --- a/posthog/hogql/database/models.py +++ b/posthog/hogql/database/models.py @@ -2,6 +2,7 @@ from pydantic import ConfigDict, BaseModel from posthog.hogql.errors import HogQLException, NotImplementedException +from posthog.schema import HogQLQueryModifiers if TYPE_CHECKING: from posthog.hogql.context import HogQLContext @@ -100,19 +101,19 @@ def get_asterisk(self): class LazyJoin(FieldOrTable): model_config = ConfigDict(extra="forbid") - join_function: Callable[[str, str, Dict[str, Any]], Any] + join_function: Callable[[str, str, Dict[str, Any], HogQLQueryModifiers], Any] join_table: Table from_field: str class LazyTable(Table): """ - A table that is replaced with a subquery returned from `lazy_select(requested_fields: Dict[name, chain])` + A table that is replaced with a subquery returned from `lazy_select(requested_fields: Dict[name, chain], modifiers: HogQLQueryModifiers)` """ model_config = ConfigDict(extra="forbid") - def lazy_select(self, requested_fields: Dict[str, List[str]]) -> Any: + def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers) -> Any: raise NotImplementedException("LazyTable.lazy_select not overridden") diff --git a/posthog/hogql/database/schema/cohort_people.py b/posthog/hogql/database/schema/cohort_people.py index ee5202fe9ed2a..7aa94704e2c96 100644 --- a/posthog/hogql/database/schema/cohort_people.py +++ b/posthog/hogql/database/schema/cohort_people.py @@ -9,6 +9,7 @@ FieldOrTable, ) from posthog.hogql.database.schema.persons import PersonsTable, join_with_persons_table +from posthog.schema import HogQLQueryModifiers COHORT_PEOPLE_FIELDS = { "person_id": StringDatabaseField(name="person_id"), @@ -56,7 +57,7 @@ def to_printed_hogql(self): class CohortPeople(LazyTable): fields: Dict[str, FieldOrTable] = COHORT_PEOPLE_FIELDS - def lazy_select(self, requested_fields: Dict[str, Any]): + def lazy_select(self, requested_fields: Dict[str, Any], modifiers: HogQLQueryModifiers): return select_from_cohort_people_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/groups.py b/posthog/hogql/database/schema/groups.py index 6a674488d9cfd..0619bf1b5ad3d 100644 --- a/posthog/hogql/database/schema/groups.py +++ b/posthog/hogql/database/schema/groups.py @@ -11,6 +11,7 @@ FieldOrTable, ) from posthog.hogql.errors import HogQLException +from posthog.schema import HogQLQueryModifiers GROUPS_TABLE_FIELDS = { "index": IntegerDatabaseField(name="group_type_index"), @@ -32,7 +33,9 @@ def select_from_groups_table(requested_fields: Dict[str, List[str]]): def join_with_group_n_table(group_index: int): - def join_with_group_table(from_table: str, to_table: str, requested_fields: Dict[str, Any]): + def join_with_group_table( + from_table: str, to_table: str, requested_fields: Dict[str, Any], modifiers: HogQLQueryModifiers + ): from posthog.hogql import ast if not requested_fields: @@ -72,7 +75,7 @@ def to_printed_hogql(self): class GroupsTable(LazyTable): fields: Dict[str, FieldOrTable] = GROUPS_TABLE_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str]]): + def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): return select_from_groups_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/log_entries.py b/posthog/hogql/database/schema/log_entries.py index c8caa1b5c16c7..a7ac459aab4ab 100644 --- a/posthog/hogql/database/schema/log_entries.py +++ b/posthog/hogql/database/schema/log_entries.py @@ -9,6 +9,7 @@ LazyTable, FieldOrTable, ) +from posthog.schema import HogQLQueryModifiers LOG_ENTRIES_FIELDS: Dict[str, FieldOrTable] = { "team_id": IntegerDatabaseField(name="team_id"), @@ -34,7 +35,7 @@ def to_printed_hogql(self): class ReplayConsoleLogsLogEntriesTable(LazyTable): fields: Dict[str, FieldOrTable] = LOG_ENTRIES_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str]]): + def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): fields: List[ast.Expr] = [ast.Field(chain=["log_entries"] + chain) for name, chain in requested_fields.items()] return ast.SelectQuery( @@ -57,7 +58,7 @@ def to_printed_hogql(self): class BatchExportLogEntriesTable(LazyTable): fields: Dict[str, FieldOrTable] = LOG_ENTRIES_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str]]): + def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): fields: List[ast.Expr] = [ast.Field(chain=["log_entries"] + chain) for name, chain in requested_fields.items()] return ast.SelectQuery( diff --git a/posthog/hogql/database/schema/person_distinct_ids.py b/posthog/hogql/database/schema/person_distinct_ids.py index 5d62f832a9875..3765c44673890 100644 --- a/posthog/hogql/database/schema/person_distinct_ids.py +++ b/posthog/hogql/database/schema/person_distinct_ids.py @@ -12,6 +12,7 @@ ) from posthog.hogql.database.schema.persons import PersonsTable, join_with_persons_table from posthog.hogql.errors import HogQLException +from posthog.schema import HogQLQueryModifiers PERSON_DISTINCT_IDS_FIELDS = { "team_id": IntegerDatabaseField(name="team_id"), @@ -34,7 +35,9 @@ def select_from_person_distinct_ids_table(requested_fields: Dict[str, List[str]] ) -def join_with_person_distinct_ids_table(from_table: str, to_table: str, requested_fields: Dict[str, List[str]]): +def join_with_person_distinct_ids_table( + from_table: str, to_table: str, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers +): from posthog.hogql import ast if not requested_fields: @@ -69,7 +72,7 @@ def to_printed_hogql(self): class PersonDistinctIdsTable(LazyTable): fields: Dict[str, FieldOrTable] = PERSON_DISTINCT_IDS_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str]]): + def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): return select_from_person_distinct_ids_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/person_overrides.py b/posthog/hogql/database/schema/person_overrides.py index 64fa3fa49ecf3..c4576d0a58b83 100644 --- a/posthog/hogql/database/schema/person_overrides.py +++ b/posthog/hogql/database/schema/person_overrides.py @@ -10,6 +10,7 @@ ) from posthog.hogql.errors import HogQLException +from posthog.schema import HogQLQueryModifiers PERSON_OVERRIDES_FIELDS: Dict[str, FieldOrTable] = { "team_id": IntegerDatabaseField(name="team_id"), @@ -30,7 +31,9 @@ def select_from_person_overrides_table(requested_fields: Dict[str, List[str]]): ) -def join_with_person_overrides_table(from_table: str, to_table: str, requested_fields: Dict[str, Any]): +def join_with_person_overrides_table( + from_table: str, to_table: str, requested_fields: Dict[str, Any], modifiers: HogQLQueryModifiers +): from posthog.hogql import ast if not requested_fields: @@ -65,7 +68,7 @@ def to_printed_hogql(self): class PersonOverridesTable(Table): fields: Dict[str, FieldOrTable] = PERSON_OVERRIDES_FIELDS - def lazy_select(self, requested_fields: Dict[str, Any]): + def lazy_select(self, requested_fields: Dict[str, Any], modifiers: HogQLQueryModifiers): return select_from_person_overrides_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/persons.py b/posthog/hogql/database/schema/persons.py index 57872daeb30d8..4e853a55ef5b5 100644 --- a/posthog/hogql/database/schema/persons.py +++ b/posthog/hogql/database/schema/persons.py @@ -15,6 +15,7 @@ ) from posthog.hogql.errors import HogQLException from posthog.hogql.database.schema.persons_pdi import PersonsPDITable, persons_pdi_join +from posthog.schema import HogQLQueryModifiers, PersonsArgMaxVersion PERSONS_FIELDS: Dict[str, FieldOrTable] = { "id": StringDatabaseField(name="id"), @@ -30,24 +31,54 @@ } -def select_from_persons_table(requested_fields: Dict[str, List[str]]): - select = argmax_select( - table_name="raw_persons", - select_fields=requested_fields, - group_fields=["id"], - argmax_field="version", - deleted_field="is_deleted", - ) - select.settings = HogQLQuerySettings(optimize_aggregation_in_order=True) - return select +def select_from_persons_table(requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): + if modifiers.personsArgMaxVersion == PersonsArgMaxVersion.v2: + from posthog.hogql.parser import parse_select + from posthog.hogql import ast + + query = parse_select( + """ + SELECT id FROM raw_persons WHERE (id, version) IN ( + SELECT id, max(version) as version + FROM raw_persons + GROUP BY id + HAVING ifNull(equals(argMax(raw_persons.is_deleted, raw_persons.version), 0), 0) + ) + """ + ) + query.settings = HogQLQuerySettings(optimize_aggregation_in_order=True) + + for field_name, field_chain in requested_fields.items(): + # We need to always select the 'id' field for the join constraint. The field name here is likely to + # be "persons__id" if anything, but just in case, let's avoid duplicates. + if field_name != "id": + query.select.append( + ast.Alias( + alias=field_name, + expr=ast.Field(chain=field_chain), + ) + ) + return query + else: + select = argmax_select( + table_name="raw_persons", + select_fields=requested_fields, + group_fields=["id"], + argmax_field="version", + deleted_field="is_deleted", + ) + select.settings = HogQLQuerySettings(optimize_aggregation_in_order=True) + return select -def join_with_persons_table(from_table: str, to_table: str, requested_fields: Dict[str, List[str]]): +def join_with_persons_table( + from_table: str, to_table: str, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers +): from posthog.hogql import ast if not requested_fields: raise HogQLException("No fields requested from persons table") - join_expr = ast.JoinExpr(table=select_from_persons_table(requested_fields)) + join_expr = ast.JoinExpr(table=select_from_persons_table(requested_fields, modifiers)) join_expr.join_type = "INNER JOIN" join_expr.alias = to_table join_expr.constraint = ast.JoinConstraint( @@ -77,8 +108,8 @@ def to_printed_hogql(self): class PersonsTable(LazyTable): fields: Dict[str, FieldOrTable] = PERSONS_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str]]): - return select_from_persons_table(requested_fields) + def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): + return select_from_persons_table(requested_fields, modifiers) def to_printed_clickhouse(self, context): return "person" diff --git a/posthog/hogql/database/schema/persons_pdi.py b/posthog/hogql/database/schema/persons_pdi.py index ed54ead6cded3..8f83234b6bed3 100644 --- a/posthog/hogql/database/schema/persons_pdi.py +++ b/posthog/hogql/database/schema/persons_pdi.py @@ -8,6 +8,8 @@ FieldOrTable, ) from posthog.hogql.errors import HogQLException +from posthog.schema import HogQLQueryModifiers + # :NOTE: We already have person_distinct_ids.py, which most tables link to. This persons_pdi.py is a hack to # make "select persons.pdi.distinct_id from persons" work while avoiding circular imports. Don't use directly. @@ -26,7 +28,9 @@ def persons_pdi_select(requested_fields: Dict[str, List[str]]): # :NOTE: We already have person_distinct_ids.py, which most tables link to. This persons_pdi.py is a hack to # make "select persons.pdi.distinct_id from persons" work while avoiding circular imports. Don't use directly. -def persons_pdi_join(from_table: str, to_table: str, requested_fields: Dict[str, List[str]]): +def persons_pdi_join( + from_table: str, to_table: str, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers +): from posthog.hogql import ast if not requested_fields: @@ -53,7 +57,7 @@ class PersonsPDITable(LazyTable): "person_id": StringDatabaseField(name="person_id"), } - def lazy_select(self, requested_fields: Dict[str, List[str]]): + def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): return persons_pdi_select(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/session_replay_events.py b/posthog/hogql/database/schema/session_replay_events.py index b8d79e86d9780..f163e8052e8bf 100644 --- a/posthog/hogql/database/schema/session_replay_events.py +++ b/posthog/hogql/database/schema/session_replay_events.py @@ -15,6 +15,7 @@ PersonDistinctIdsTable, join_with_person_distinct_ids_table, ) +from posthog.schema import HogQLQueryModifiers SESSION_REPLAY_EVENTS_COMMON_FIELDS: Dict[str, FieldOrTable] = { "session_id": StringDatabaseField(name="session_id"), @@ -108,7 +109,7 @@ class SessionReplayEventsTable(LazyTable): "first_url": StringDatabaseField(name="first_url"), } - def lazy_select(self, requested_fields: Dict[str, List[str]]): + def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): return select_from_session_replay_events_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/modifiers.py b/posthog/hogql/modifiers.py index 2811c60501719..36ad867fe49d2 100644 --- a/posthog/hogql/modifiers.py +++ b/posthog/hogql/modifiers.py @@ -16,4 +16,7 @@ def create_default_modifiers_for_team( if modifiers.personsOnEventsMode is None: modifiers.personsOnEventsMode = team.person_on_events_mode or PersonOnEventsMode.DISABLED + if modifiers.personsArgMaxVersion is None: + modifiers.personsArgMaxVersion = "v1" + return modifiers diff --git a/posthog/hogql/test/__snapshots__/test_query.ambr b/posthog/hogql/test/__snapshots__/test_query.ambr new file mode 100644 index 0000000000000..41f301150f147 --- /dev/null +++ b/posthog/hogql/test/__snapshots__/test_query.ambr @@ -0,0 +1,401 @@ +# name: TestQuery.test_hogql_arrays + ' + + SELECT [1, 2, 3], [10, 11, 12][1] + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_hogql_lambdas + ' + + SELECT arrayMap(x -> multiply(x, 2), [1, 2, 3]), 1 + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_hogql_query_filters_alias + ' + + SELECT e.event, e.distinct_id + FROM events AS e + WHERE and(equals(e.team_id, 420), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), %(hogql_val_1)s), 0)) + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_hogql_union_all_limits + ' + + SELECT events.event + FROM events + WHERE equals(events.team_id, 420) + LIMIT 100 UNION ALL + SELECT events.event + FROM events + WHERE equals(events.team_id, 420) + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query + ' + + SELECT count(), events.event + FROM events + WHERE and(equals(events.team_id, 420), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), %(hogql_val_1)s), 0)) + GROUP BY events.event + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_distinct + ' + + SELECT DISTINCT persons.properties___sneaky_mail + FROM ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___random_uuid, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS persons + WHERE ifNull(equals(persons.properties___random_uuid, %(hogql_val_2)s), 0) + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_joins_events_e_pdi + ' + + SELECT e.event, toTimeZone(e.timestamp, %(hogql_val_0)s), e__pdi.distinct_id, e__pdi.person_id + FROM events AS e INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE equals(e.team_id, 420) + LIMIT 10 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_joins_events_pdi + ' + + SELECT events.event, toTimeZone(events.timestamp, %(hogql_val_0)s), events__pdi.distinct_id, events__pdi.person_id + FROM events INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE equals(events.team_id, 420) + LIMIT 10 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_joins_events_pdi_e_person_properties + ' + + SELECT e.event, toTimeZone(e.timestamp, %(hogql_val_1)s), e__pdi.distinct_id, e__pdi__person.properties___sneaky_mail + FROM events AS e INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) INNER JOIN ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE equals(e.team_id, 420) + LIMIT 10 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_joins_events_pdi_person + ' + + SELECT events.event, toTimeZone(events.timestamp, %(hogql_val_0)s), events__pdi.distinct_id, events__pdi__person.id + FROM events INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN ( + SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE equals(events.team_id, 420) + LIMIT 10 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_joins_events_pdi_person_properties + ' + + SELECT events.event, toTimeZone(events.timestamp, %(hogql_val_1)s), events__pdi.distinct_id, events__pdi__person.properties___sneaky_mail + FROM events INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE equals(events.team_id, 420) + LIMIT 10 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_joins_events_person_properties + ' + + SELECT e.event, toTimeZone(e.timestamp, %(hogql_val_1)s), e__pdi__person.properties___sneaky_mail + FROM events AS e INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) INNER JOIN ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE equals(e.team_id, 420) + LIMIT 10 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_joins_events_person_properties_in_aggregration + ' + + SELECT s__pdi__person.properties___sneaky_mail, count() + FROM events AS s INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS s__pdi ON equals(s.distinct_id, s__pdi.distinct_id) INNER JOIN ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS s__pdi__person ON equals(s__pdi.person_id, s__pdi__person.id) + WHERE equals(s.team_id, 420) + GROUP BY s__pdi__person.properties___sneaky_mail + LIMIT 10 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_joins_pdi + ' + + SELECT e.event, toTimeZone(e.timestamp, %(hogql_val_0)s), pdi.person_id + FROM events AS e INNER JOIN ( + SELECT person_distinct_id2.distinct_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS pdi ON equals(e.distinct_id, pdi.distinct_id) + WHERE equals(e.team_id, 420) + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_joins_pdi_person_properties + ' + + SELECT pdi.distinct_id, pdi__person.properties___sneaky_mail + FROM person_distinct_id2 AS pdi INNER JOIN ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS pdi__person ON equals(pdi.person_id, pdi__person.id) + WHERE equals(pdi.team_id, 420) + LIMIT 10 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_joins_pdi_persons + ' + + SELECT pdi.distinct_id, toTimeZone(pdi__person.created_at, %(hogql_val_0)s) + FROM person_distinct_id2 AS pdi INNER JOIN ( + SELECT argMax(person.created_at, person.version) AS created_at, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS pdi__person ON equals(pdi.person_id, pdi__person.id) + WHERE equals(pdi.team_id, 420) + LIMIT 10 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_joins_simple + ' + + SELECT e.event, toTimeZone(e.timestamp, %(hogql_val_0)s), pdi.distinct_id, p.id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(p.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', '') + FROM events AS e LEFT JOIN person_distinct_id2 AS pdi ON equals(pdi.distinct_id, e.distinct_id) LEFT JOIN person AS p ON equals(p.id, pdi.person_id) + WHERE and(equals(p.team_id, 420), equals(pdi.team_id, 420), equals(e.team_id, 420)) + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_person_distinct_ids + ' + + SELECT DISTINCT person_distinct_ids.person_id, person_distinct_ids.distinct_id + FROM ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS person_distinct_ids + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_select_person_with_joins_without_poe + ' + + SELECT events.event, toTimeZone(events.timestamp, %(hogql_val_1)s), events__pdi__person.id, events__pdi__person.properties___sneaky_mail + FROM events INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE equals(events.team_id, 420) + LIMIT 10 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_query_select_person_with_poe_without_joins + ' + + SELECT events.event, toTimeZone(events.timestamp, %(hogql_val_0)s), events.person_id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.person_properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', '') + FROM events + WHERE equals(events.team_id, 420) + LIMIT 10 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_select_person_on_events + ' + + SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(s.person_properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), count() + FROM events AS s + WHERE equals(s.team_id, 420) + GROUP BY replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(s.person_properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', '') + LIMIT 10 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_subquery + ' + + SELECT count, event + FROM ( + SELECT count() AS count, events.event + FROM events + WHERE and(equals(events.team_id, 420), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), %(hogql_val_1)s), 0)) + GROUP BY events.event) + GROUP BY count, event + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_subquery_alias + ' + + SELECT c.count, c.event + FROM ( + SELECT count(*) AS count, events.event + FROM events + WHERE and(equals(events.team_id, 420), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), %(hogql_val_1)s), 0)) + GROUP BY events.event) AS c + GROUP BY c.count, c.event + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_tuple_access + ' + + SELECT col_a, arrayZip((sumMap(g.1, g.2) AS x).1, x.2) AS r + FROM ( + SELECT col_a, groupArray(tuple(col_b, col_c)) AS g + FROM ( + SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '') AS col_a, events.event AS col_b, count() AS col_c + FROM events + WHERE equals(events.team_id, 420) + GROUP BY replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', ''), events.event) + GROUP BY col_a) + GROUP BY col_a ORDER BY col_a ASC + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_with_pivot_table_1_level + ' + + SELECT PIVOT_FUNCTION_2.col_a, PIVOT_FUNCTION_2.r + FROM ( + SELECT PIVOT_FUNCTION_1.col_a, arrayZip((sumMap(PIVOT_FUNCTION_1.g.1, PIVOT_FUNCTION_1.g.2) AS x).1, x.2) AS r + FROM ( + SELECT PIVOT_TABLE_COL_ABC.col_a, groupArray(tuple(PIVOT_TABLE_COL_ABC.col_b, PIVOT_TABLE_COL_ABC.col_c)) AS g + FROM ( + SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '') AS col_a, events.event AS col_b, count() AS col_c + FROM events + WHERE equals(events.team_id, 420) + GROUP BY replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', ''), events.event) AS PIVOT_TABLE_COL_ABC + GROUP BY PIVOT_TABLE_COL_ABC.col_a) AS PIVOT_FUNCTION_1 + GROUP BY PIVOT_FUNCTION_1.col_a) AS PIVOT_FUNCTION_2 ORDER BY PIVOT_FUNCTION_2.col_a ASC + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_with_pivot_table_2_levels + ' + + SELECT final.col_a, final.r + FROM ( + SELECT PIVOT_FUNCTION_2.col_a, PIVOT_FUNCTION_2.r + FROM ( + SELECT PIVOT_FUNCTION_1.col_a, arrayZip((sumMap(PIVOT_FUNCTION_1.g.1, PIVOT_FUNCTION_1.g.2) AS x).1, x.2) AS r + FROM ( + SELECT PIVOT_TABLE_COL_ABC.col_a, groupArray(tuple(PIVOT_TABLE_COL_ABC.col_b, PIVOT_TABLE_COL_ABC.col_c)) AS g + FROM ( + SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '') AS col_a, events.event AS col_b, count() AS col_c + FROM events + WHERE equals(events.team_id, 420) + GROUP BY replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', ''), events.event) AS PIVOT_TABLE_COL_ABC + GROUP BY PIVOT_TABLE_COL_ABC.col_a) AS PIVOT_FUNCTION_1 + GROUP BY PIVOT_FUNCTION_1.col_a) AS PIVOT_FUNCTION_2) AS final ORDER BY final.col_a ASC + LIMIT 100 + SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 + ' +--- diff --git a/posthog/hogql/test/test_modifiers.py b/posthog/hogql/test/test_modifiers.py index e519bdf3e984a..b49a5da32cdfa 100644 --- a/posthog/hogql/test/test_modifiers.py +++ b/posthog/hogql/test/test_modifiers.py @@ -21,7 +21,7 @@ def test_create_default_modifiers_for_team_init(self): ) assert modifiers.personsOnEventsMode == PersonOnEventsMode.V2_ENABLED - def test_modifiers_person_on_events_mode_v1_enabled(self): + def test_modifiers_persons_on_events_mode_v1_enabled(self): query = "SELECT event, person_id FROM events" # Control @@ -35,3 +35,14 @@ def test_modifiers_person_on_events_mode_v1_enabled(self): query, team=self.team, modifiers=HogQLQueryModifiers(personsOnEventsMode=PersonOnEventsMode.V1_ENABLED) ) assert " JOIN " not in response.clickhouse + + def test_modifiers_persons_argmax_version_v2(self): + query = "SELECT * FROM persons" + + # Control (v1) + response = execute_hogql_query(query, team=self.team, modifiers=HogQLQueryModifiers(personsArgMaxVersion="v1")) + assert "in(tuple(person.id, person.version)" not in response.clickhouse + + # Test (v2) + response = execute_hogql_query(query, team=self.team, modifiers=HogQLQueryModifiers(personsArgMaxVersion="v2")) + assert "in(tuple(person.id, person.version)" in response.clickhouse diff --git a/posthog/hogql/test/test_printer.py b/posthog/hogql/test/test_printer.py index 1c92b32daa805..3bb4667ac917c 100644 --- a/posthog/hogql/test/test_printer.py +++ b/posthog/hogql/test/test_printer.py @@ -12,7 +12,7 @@ from posthog.hogql.parser import parse_select from posthog.hogql.printer import print_ast from posthog.models.team.team import WeekStartDay -from posthog.schema import HogQLQueryModifiers +from posthog.schema import HogQLQueryModifiers, PersonsArgMaxVersion from posthog.test.base import BaseTest from posthog.utils import PersonOnEventsMode @@ -583,35 +583,84 @@ def test_select_sample(self): ) with override_settings(PERSON_ON_EVENTS_V2_OVERRIDE=False): + context = HogQLContext( + team_id=self.team.pk, + enable_select_queries=True, + modifiers=HogQLQueryModifiers(personsArgMaxVersion=PersonsArgMaxVersion.v2), + ) self.assertEqual( self._select( - "SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 JOIN persons ON persons.id=events.person_id" + "SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 JOIN persons ON persons.id=events.person_id", + context, ), - f"SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) JOIN (SELECT person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, events__pdi.person_id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000", + f"SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, " + f"person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 " + f"WHERE equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id HAVING " + f"ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi " + f"ON equals(events.distinct_id, events__pdi.distinct_id) JOIN (SELECT person.id FROM person " + f"WHERE and(equals(person.team_id, {self.team.pk}), ifNull(in(tuple(person.id, person.version), " + f"(SELECT person.id, max(person.version) AS version FROM person WHERE equals(person.team_id, {self.team.pk}) " + f"GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) " + f"SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, events__pdi.person_id) " + f"WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000", ) + context = HogQLContext( + team_id=self.team.pk, + enable_select_queries=True, + modifiers=HogQLQueryModifiers(personsArgMaxVersion=PersonsArgMaxVersion.v2), + ) self.assertEqual( self._select( - "SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 JOIN persons SAMPLE 0.1 ON persons.id=events.person_id" + "SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 JOIN persons SAMPLE 0.1 ON persons.id=events.person_id", + context, ), - f"SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) JOIN (SELECT person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons SAMPLE 0.1 ON equals(persons.id, events__pdi.person_id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000", + f"SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, " + f"person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 " + f"WHERE equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id HAVING " + f"ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi " + f"ON equals(events.distinct_id, events__pdi.distinct_id) JOIN (SELECT person.id FROM person WHERE " + f"and(equals(person.team_id, {self.team.pk}), ifNull(in(tuple(person.id, person.version), (SELECT person.id, " + f"max(person.version) AS version FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id " + f"HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) " + f"AS persons SAMPLE 0.1 ON equals(persons.id, events__pdi.person_id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000", ) with override_settings(PERSON_ON_EVENTS_OVERRIDE=True): + context = HogQLContext( + team_id=self.team.pk, + enable_select_queries=True, + modifiers=HogQLQueryModifiers(personsArgMaxVersion=PersonsArgMaxVersion.v2), + ) expected = self._select( - "SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 JOIN persons ON persons.id=events.person_id" + "SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 JOIN persons ON persons.id=events.person_id", + context, ) self.assertEqual( expected, - f"SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 JOIN (SELECT person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, events.person_id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000", + f"SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 JOIN (SELECT person.id FROM person WHERE " + f"and(equals(person.team_id, {self.team.pk}), ifNull(in(tuple(person.id, person.version), (SELECT person.id, " + f"max(person.version) AS version FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id " + f"HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) " + f"AS persons ON equals(persons.id, events.person_id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000", ) + context = HogQLContext( + team_id=self.team.pk, + enable_select_queries=True, + modifiers=HogQLQueryModifiers(personsArgMaxVersion=PersonsArgMaxVersion.v2), + ) expected = self._select( - "SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 JOIN persons SAMPLE 0.1 ON persons.id=events.person_id" + "SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 JOIN persons SAMPLE 0.1 ON persons.id=events.person_id", + context, ) self.assertEqual( expected, - f"SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 JOIN (SELECT person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons SAMPLE 0.1 ON equals(persons.id, events.person_id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000", + f"SELECT events.event FROM events SAMPLE 2/78 OFFSET 999 JOIN (SELECT person.id FROM person WHERE " + f"and(equals(person.team_id, {self.team.pk}), ifNull(in(tuple(person.id, person.version), (SELECT person.id, " + f"max(person.version) AS version FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id " + f"HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) " + f"AS persons SAMPLE 0.1 ON equals(persons.id, events.person_id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000", ) def test_count_distinct(self): diff --git a/posthog/hogql/test/test_query.py b/posthog/hogql/test/test_query.py index 93494ee8bb6ab..475a346ff2b5e 100644 --- a/posthog/hogql/test/test_query.py +++ b/posthog/hogql/test/test_query.py @@ -1,3 +1,4 @@ +import pytest from uuid import UUID from zoneinfo import ZoneInfo @@ -10,6 +11,7 @@ from posthog.hogql.errors import SyntaxException, HogQLException from posthog.hogql.property import property_to_expr from posthog.hogql.query import execute_hogql_query +from posthog.hogql.test.utils import pretty_print_in_tests from posthog.models import Cohort from posthog.models.cohort.util import recalculate_cohortpeople from posthog.models.utils import UUIDT @@ -41,6 +43,7 @@ def _create_random_events(self) -> str: flush_persons_and_events() return random_uuid + @pytest.mark.usefixtures("unittest_snapshot") def test_query(self): with freeze_time("2020-01-10"): random_uuid = self._create_random_events() @@ -50,69 +53,73 @@ def test_query(self): placeholders={"random_uuid": ast.Constant(value=random_uuid)}, team=self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT count(), events.event FROM events WHERE and(equals(events.team_id, {self.team.id}), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), %(hogql_val_1)s), 0)) GROUP BY events.event LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, f"SELECT count(), event FROM events WHERE equals(properties.random_uuid, '{random_uuid}') GROUP BY event LIMIT 100", ) self.assertEqual(response.results, [(2, "random event")]) + @pytest.mark.usefixtures("unittest_snapshot") + def test_subquery(self): + with freeze_time("2020-01-10"): + random_uuid = self._create_random_events() + response = execute_hogql_query( "select count, event from (select count() as count, event from events where properties.random_uuid = {random_uuid} group by event) group by count, event", placeholders={"random_uuid": ast.Constant(value=random_uuid)}, team=self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT count, event FROM (SELECT count() AS count, events.event FROM events WHERE and(equals(events.team_id, {self.team.id}), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), %(hogql_val_1)s), 0)) GROUP BY events.event) GROUP BY count, event LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, f"SELECT count, event FROM (SELECT count() AS count, event FROM events WHERE equals(properties.random_uuid, '{random_uuid}') GROUP BY event) GROUP BY count, event LIMIT 100", ) self.assertEqual(response.results, [(2, "random event")]) + @pytest.mark.usefixtures("unittest_snapshot") + def test_subquery_alias(self): + with freeze_time("2020-01-10"): + random_uuid = self._create_random_events() + response = execute_hogql_query( "select count, event from (select count(*) as count, event from events where properties.random_uuid = {random_uuid} group by event) as c group by count, event", placeholders={"random_uuid": ast.Constant(value=random_uuid)}, team=self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT c.count, c.event FROM (SELECT count(*) AS count, events.event FROM events WHERE and(equals(events.team_id, {self.team.id}), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), %(hogql_val_1)s), 0)) GROUP BY events.event) AS c GROUP BY c.count, c.event LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, f"SELECT count, event FROM (SELECT count(*) AS count, event FROM events WHERE equals(properties.random_uuid, '{random_uuid}') GROUP BY event) AS c GROUP BY count, event LIMIT 100", ) self.assertEqual(response.results, [(2, "random event")]) + @pytest.mark.usefixtures("unittest_snapshot") + def test_query_distinct(self): + with freeze_time("2020-01-10"): + random_uuid = self._create_random_events() + response = execute_hogql_query( "select distinct properties.sneaky_mail from persons where properties.random_uuid = {random_uuid}", placeholders={"random_uuid": ast.Constant(value=random_uuid)}, team=self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT DISTINCT persons.properties___sneaky_mail FROM (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), person.version) AS properties___sneaky_mail, argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_1)s), ''), 'null'), '^\"|\"$', ''), person.version) AS properties___random_uuid, person.id AS id FROM person WHERE equals(person.team_id, {self.team.id}) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons WHERE ifNull(equals(persons.properties___random_uuid, %(hogql_val_2)s), 0) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, f"SELECT DISTINCT properties.sneaky_mail FROM persons WHERE equals(properties.random_uuid, '{random_uuid}') LIMIT 100", ) self.assertEqual(response.results, [("tim@posthog.com",)]) + @pytest.mark.usefixtures("unittest_snapshot") + def test_query_person_distinct_ids(self): + with freeze_time("2020-01-10"): + self._create_random_events() response = execute_hogql_query( f"select distinct person_id, distinct_id from person_distinct_ids", self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT DISTINCT person_distinct_ids.person_id, person_distinct_ids.distinct_id FROM (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.id}) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS person_distinct_ids LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, "SELECT DISTINCT person_id, distinct_id FROM person_distinct_ids LIMIT 100", @@ -122,15 +129,16 @@ def test_query(self): def test_query_timings(self): with freeze_time("2020-01-10"): random_uuid = self._create_random_events() - response = execute_hogql_query( - "select count(), event from events where properties.random_uuid = {random_uuid} group by event", - placeholders={"random_uuid": ast.Constant(value=random_uuid)}, - team=self.team, - ) - self.assertTrue(isinstance(response.timings, list) and len(response.timings) > 0) - self.assertTrue(isinstance(response.timings[0], QueryTiming)) - self.assertEqual(response.timings[-1].k, ".") + response = execute_hogql_query( + "select count(), event from events where properties.random_uuid = {random_uuid} group by event", + placeholders={"random_uuid": ast.Constant(value=random_uuid)}, + team=self.team, + ) + self.assertTrue(isinstance(response.timings, list) and len(response.timings) > 0) + self.assertTrue(isinstance(response.timings[0], QueryTiming)) + self.assertEqual(response.timings[-1].k, ".") + @pytest.mark.usefixtures("unittest_snapshot") def test_query_joins_simple(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -146,10 +154,7 @@ def test_query_joins_simple(self): """, self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT e.event, toTimeZone(e.timestamp, %(hogql_val_0)s), pdi.distinct_id, p.id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(p.properties, %(hogql_val_1)s), ''), 'null'), '^\"|\"$', '') FROM events AS e LEFT JOIN person_distinct_id2 AS pdi ON equals(pdi.distinct_id, e.distinct_id) LEFT JOIN person AS p ON equals(p.id, pdi.person_id) WHERE and(equals(p.team_id, {self.team.id}), equals(pdi.team_id, {self.team.id}), equals(e.team_id, {self.team.id})) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, "SELECT event, timestamp, pdi.distinct_id, p.id, p.properties.sneaky_mail FROM events AS e LEFT JOIN person_distinct_ids AS pdi ON equals(pdi.distinct_id, e.distinct_id) LEFT JOIN persons AS p ON equals(p.id, pdi.person_id) LIMIT 100", @@ -158,6 +163,7 @@ def test_query_joins_simple(self): self.assertEqual(response.results[0][2], "bla") self.assertEqual(response.results[0][4], "tim@posthog.com") + @pytest.mark.usefixtures("unittest_snapshot") def test_query_joins_pdi(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -177,20 +183,14 @@ def test_query_joins_pdi(self): self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT e.event, toTimeZone(e.timestamp, %(hogql_val_0)s), pdi.person_id FROM events AS e INNER JOIN (SELECT person_distinct_id2.distinct_id, " - f"argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id FROM person_distinct_id2 WHERE " - f"equals(person_distinct_id2.team_id, {self.team.id}) GROUP BY person_distinct_id2.distinct_id HAVING " - f"ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS pdi ON " - f"equals(e.distinct_id, pdi.distinct_id) WHERE equals(e.team_id, {self.team.id}) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, "SELECT event, timestamp, pdi.person_id FROM events AS e INNER JOIN (SELECT distinct_id, argMax(person_id, version) AS person_id FROM raw_person_distinct_ids GROUP BY distinct_id HAVING equals(argMax(is_deleted, version), 0)) AS pdi ON equals(e.distinct_id, pdi.distinct_id) LIMIT 100", ) self.assertTrue(len(response.results) > 0) + @pytest.mark.usefixtures("unittest_snapshot") def test_query_joins_events_pdi(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -199,10 +199,7 @@ def test_query_joins_events_pdi(self): "SELECT event, timestamp, pdi.distinct_id, pdi.person_id FROM events LIMIT 10", self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT events.event, toTimeZone(events.timestamp, %(hogql_val_0)s), events__pdi.distinct_id, events__pdi.person_id FROM events INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, "SELECT event, timestamp, pdi.distinct_id, pdi.person_id FROM events LIMIT 10", @@ -211,6 +208,7 @@ def test_query_joins_events_pdi(self): self.assertEqual(response.results[0][2], "bla") self.assertEqual(response.results[0][3], UUID("00000000-0000-4000-8000-000000000000")) + @pytest.mark.usefixtures("unittest_snapshot") def test_query_joins_events_e_pdi(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -223,14 +221,12 @@ def test_query_joins_events_e_pdi(self): response.hogql, "SELECT event, e.timestamp, e.pdi.distinct_id, pdi.person_id FROM events AS e LIMIT 10", ) - self.assertEqual( - response.clickhouse, - f"SELECT e.event, toTimeZone(e.timestamp, %(hogql_val_0)s), e__pdi.distinct_id, e__pdi.person_id FROM events AS e INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) WHERE equals(e.team_id, {self.team.pk}) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual(response.results[0][0], "random event") self.assertEqual(response.results[0][2], "bla") self.assertEqual(response.results[0][3], UUID("00000000-0000-4000-8000-000000000000")) + @pytest.mark.usefixtures("unittest_snapshot") def test_query_joins_pdi_persons(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -243,17 +239,11 @@ def test_query_joins_pdi_persons(self): response.hogql, "SELECT pdi.distinct_id, pdi.person.created_at FROM person_distinct_ids AS pdi LIMIT 10", ) - self.assertEqual( - response.clickhouse, - f"SELECT pdi.distinct_id, toTimeZone(pdi__person.created_at, %(hogql_val_0)s) FROM person_distinct_id2 AS pdi INNER JOIN (SELECT " - f"argMax(person.created_at, person.version) AS created_at, person.id AS id FROM person WHERE " - f"equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, " - f"person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS pdi__person ON equals(pdi.person_id, pdi__person.id) WHERE " - f"equals(pdi.team_id, {self.team.pk}) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual(response.results[0][0], "bla") self.assertEqual(response.results[0][1], datetime.datetime(2020, 1, 10, 0, 0, tzinfo=timezone.utc)) + @pytest.mark.usefixtures("unittest_snapshot") def test_query_joins_pdi_person_properties(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -266,17 +256,11 @@ def test_query_joins_pdi_person_properties(self): response.hogql, "SELECT pdi.distinct_id, pdi.person.properties.sneaky_mail FROM person_distinct_ids AS pdi LIMIT 10", ) - self.assertEqual( - response.clickhouse, - f"SELECT pdi.distinct_id, pdi__person.properties___sneaky_mail FROM person_distinct_id2 AS pdi INNER JOIN " - f"(SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), person.version) " - f"AS properties___sneaky_mail, person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id " - f"HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS pdi__person ON " - f"equals(pdi.person_id, pdi__person.id) WHERE equals(pdi.team_id, {self.team.pk}) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual(response.results[0][0], "bla") self.assertEqual(response.results[0][1], "tim@posthog.com") + @pytest.mark.usefixtures("unittest_snapshot") def test_query_joins_events_pdi_person(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -285,18 +269,7 @@ def test_query_joins_events_pdi_person(self): "SELECT event, timestamp, pdi.distinct_id, pdi.person.id FROM events LIMIT 10", self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT events.event, toTimeZone(events.timestamp, %(hogql_val_0)s), events__pdi.distinct_id, events__pdi__person.id FROM events " - f"INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, " - f"person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) " - f"GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, " - f"person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) " - f"INNER JOIN (SELECT person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING " - f"ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON " - f"equals(events__pdi.person_id, events__pdi__person.id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10" - f" SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, "SELECT event, timestamp, pdi.distinct_id, pdi.person.id FROM events LIMIT 10", @@ -305,6 +278,7 @@ def test_query_joins_events_pdi_person(self): self.assertEqual(response.results[0][2], "bla") self.assertEqual(response.results[0][3], UUID("00000000-0000-4000-8000-000000000000")) + @pytest.mark.usefixtures("unittest_snapshot") @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) def test_query_joins_events_pdi_person_properties(self): with freeze_time("2020-01-10"): @@ -314,18 +288,7 @@ def test_query_joins_events_pdi_person_properties(self): "SELECT event, timestamp, pdi.distinct_id, pdi.person.properties.sneaky_mail FROM events LIMIT 10", self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT events.event, toTimeZone(events.timestamp, %(hogql_val_1)s), events__pdi.distinct_id, events__pdi__person.properties___sneaky_mail FROM events " - f"INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, " - f"person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) " - f"GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) " - f"AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN (SELECT " - f"argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), person.version) " - f"AS properties___sneaky_mail, person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING " - f"ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, " - f"events__pdi__person.id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, "SELECT event, timestamp, pdi.distinct_id, pdi.person.properties.sneaky_mail FROM events LIMIT 10", @@ -334,6 +297,7 @@ def test_query_joins_events_pdi_person_properties(self): self.assertEqual(response.results[0][2], "bla") self.assertEqual(response.results[0][3], "tim@posthog.com") + @pytest.mark.usefixtures("unittest_snapshot") def test_query_joins_events_pdi_e_person_properties(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -342,19 +306,7 @@ def test_query_joins_events_pdi_e_person_properties(self): "SELECT event, e.timestamp, pdi.distinct_id, e.pdi.person.properties.sneaky_mail FROM events e LIMIT 10", self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT e.event, toTimeZone(e.timestamp, %(hogql_val_1)s), e__pdi.distinct_id, e__pdi__person.properties___sneaky_mail FROM events AS e " - f"INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, " - f"person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) " - f"GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, " - f"person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) INNER JOIN " - f"(SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), " - f"person.version) AS properties___sneaky_mail, person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) " - f"GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON " - f"equals(e__pdi.person_id, e__pdi__person.id) WHERE equals(e.team_id, {self.team.pk}) LIMIT 10" - f" SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, "SELECT event, e.timestamp, pdi.distinct_id, e.pdi.person.properties.sneaky_mail FROM events AS e LIMIT 10", @@ -363,6 +315,7 @@ def test_query_joins_events_pdi_e_person_properties(self): self.assertEqual(response.results[0][2], "bla") self.assertEqual(response.results[0][3], "tim@posthog.com") + @pytest.mark.usefixtures("unittest_snapshot") def test_query_joins_events_person_properties(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -371,18 +324,7 @@ def test_query_joins_events_person_properties(self): "SELECT event, e.timestamp, e.pdi.person.properties.sneaky_mail FROM events e LIMIT 10", self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT e.event, toTimeZone(e.timestamp, %(hogql_val_1)s), e__pdi__person.properties___sneaky_mail FROM events AS e INNER JOIN (SELECT " - f"argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id " - f"FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id " - f"HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, " - f"e__pdi.distinct_id) INNER JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), " - f"'^\"|\"$', ''), person.version) AS properties___sneaky_mail, person.id AS id FROM person WHERE " - f"equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) " - f"AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE equals(e.team_id, {self.team.pk}) LIMIT 10" - f" SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, "SELECT event, e.timestamp, e.pdi.person.properties.sneaky_mail FROM events AS e LIMIT 10", @@ -390,6 +332,7 @@ def test_query_joins_events_person_properties(self): self.assertEqual(response.results[0][0], "random event") self.assertEqual(response.results[0][2], "tim@posthog.com") + @pytest.mark.usefixtures("unittest_snapshot") def test_query_joins_events_person_properties_in_aggregration(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -397,24 +340,14 @@ def test_query_joins_events_person_properties_in_aggregration(self): "SELECT s.pdi.person.properties.sneaky_mail, count() FROM events s GROUP BY s.pdi.person.properties.sneaky_mail LIMIT 10", self.team, ) - expected = ( - f"SELECT s__pdi__person.properties___sneaky_mail, count() FROM events AS s INNER JOIN (SELECT argMax(person_distinct_id2.person_id, " - f"person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE " - f"equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id HAVING " - f"ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS s__pdi ON " - f"equals(s.distinct_id, s__pdi.distinct_id) INNER JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, " - f"%(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), person.version) AS properties___sneaky_mail, person.id AS id FROM person WHERE " - f"equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) " - f"AS s__pdi__person ON equals(s__pdi.person_id, s__pdi__person.id) WHERE equals(s.team_id, {self.team.pk}) " - f"GROUP BY s__pdi__person.properties___sneaky_mail LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1" - ) - self.assertEqual(response.clickhouse, expected) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, "SELECT s.pdi.person.properties.sneaky_mail, count() FROM events AS s GROUP BY s.pdi.person.properties.sneaky_mail LIMIT 10", ) self.assertEqual(response.results[0][0], "tim@posthog.com") + @pytest.mark.usefixtures("unittest_snapshot") def test_select_person_on_events(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -422,19 +355,14 @@ def test_select_person_on_events(self): "SELECT poe.properties.sneaky_mail, count() FROM events s GROUP BY poe.properties.sneaky_mail LIMIT 10", self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(s.person_properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), " - f"count() FROM events AS s WHERE equals(s.team_id, {self.team.pk}) GROUP BY " - f"replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(s.person_properties, %(hogql_val_1)s), ''), 'null'), '^\"|\"$', '') LIMIT 10" - f" SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, "SELECT poe.properties.sneaky_mail, count() FROM events AS s GROUP BY poe.properties.sneaky_mail LIMIT 10", ) self.assertEqual(response.results[0][0], "tim@posthog.com") + @pytest.mark.usefixtures("unittest_snapshot") @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) def test_query_select_person_with_joins_without_poe(self): with freeze_time("2020-01-10"): @@ -444,19 +372,7 @@ def test_query_select_person_with_joins_without_poe(self): "SELECT event, timestamp, person.id, person.properties.sneaky_mail FROM events LIMIT 10", self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT events.event, toTimeZone(events.timestamp, %(hogql_val_1)s), events__pdi__person.id, events__pdi__person.properties___sneaky_mail " - f"FROM events INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, " - f"person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) " - f"GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, " - f"person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) " - f"INNER JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), " - f"'^\"|\"$', ''), person.version) AS properties___sneaky_mail, person.id AS id FROM person WHERE " - f"equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) " - f"AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) " - f"WHERE equals(events.team_id, {self.team.pk}) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, "SELECT event, timestamp, person.id, person.properties.sneaky_mail FROM events LIMIT 10", @@ -465,6 +381,7 @@ def test_query_select_person_with_joins_without_poe(self): self.assertEqual(response.results[0][2], UUID("00000000-0000-4000-8000-000000000000")) self.assertEqual(response.results[0][3], "tim@posthog.com") + @pytest.mark.usefixtures("unittest_snapshot") @override_settings(PERSON_ON_EVENTS_OVERRIDE=True) def test_query_select_person_with_poe_without_joins(self): with freeze_time("2020-01-10"): @@ -474,10 +391,7 @@ def test_query_select_person_with_poe_without_joins(self): "SELECT event, timestamp, person.id, person.properties.sneaky_mail FROM events LIMIT 10", self.team, ) - self.assertEqual( - response.clickhouse, - f"SELECT events.event, toTimeZone(events.timestamp, %(hogql_val_0)s), events.person_id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.person_properties, %(hogql_val_1)s), ''), 'null'), '^\"|\"$', '') FROM events WHERE equals(events.team_id, {self.team.pk}) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual( response.hogql, "SELECT event, timestamp, person.id, person.properties.sneaky_mail FROM events LIMIT 10", @@ -667,6 +581,7 @@ def test_join_with_property_not_materialized(self): ) self.assertEqual(response.results, [("$pageview", "111"), ("$pageview", "111")]) + @pytest.mark.usefixtures("unittest_snapshot") def test_hogql_lambdas(self): with override_settings(PERSON_ON_EVENTS_V2_OVERRIDE=False): response = execute_hogql_query( @@ -674,11 +589,9 @@ def test_hogql_lambdas(self): team=self.team, ) self.assertEqual(response.results, [([2, 4, 6], 1)]) - self.assertEqual( - response.clickhouse, - f"SELECT arrayMap(x -> multiply(x, 2), [1, 2, 3]), 1 LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") def test_hogql_arrays(self): with override_settings(PERSON_ON_EVENTS_V2_OVERRIDE=False): response = execute_hogql_query( @@ -687,11 +600,9 @@ def test_hogql_arrays(self): ) # Following SQL tradition, ClickHouse array indexes start at 1, not from zero. self.assertEqual(response.results, [([1, 2, 3], 10)]) - self.assertEqual( - response.clickhouse, - f"SELECT [1, 2, 3], [10, 11, 12][1] LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") def test_tuple_access(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -715,17 +626,7 @@ def test_tuple_access(self): team=self.team, ) self.assertEqual(response.results, [("0", [("random event", 1)]), ("1", [("random event", 1)])]) - self.assertEqual( - response.clickhouse, - f"SELECT col_a, arrayZip((sumMap(g.1, g.2) AS x).1, x.2) AS r FROM " - f"(SELECT col_a, groupArray(tuple(col_b, col_c)) AS g FROM " - f"(SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', '') AS col_a, " - f"events.event AS col_b, count() AS col_c FROM events WHERE equals(events.team_id, {self.team.pk}) " - f"GROUP BY replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_1)s), ''), 'null'), '^\"|\"$', ''), events.event) " - f"GROUP BY col_a) " - f"GROUP BY col_a ORDER BY col_a ASC LIMIT 100 " - f"SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot def test_null_properties(self): with freeze_time("2020-01-10"): @@ -948,6 +849,7 @@ def test_window_functions_with_window(self): ] self.assertEqual(response.results, expected) + @pytest.mark.usefixtures("unittest_snapshot") def test_with_pivot_table_1_level(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -980,19 +882,9 @@ def test_with_pivot_table_1_level(self): team=self.team, ) self.assertEqual(response.results, [("0", [("random event", 1)]), ("1", [("random event", 1)])]) - self.assertEqual( - response.clickhouse, - f"SELECT PIVOT_FUNCTION_2.col_a, PIVOT_FUNCTION_2.r FROM " - f"(SELECT PIVOT_FUNCTION_1.col_a, arrayZip((sumMap(PIVOT_FUNCTION_1.g.1, PIVOT_FUNCTION_1.g.2) AS x).1, x.2) AS r " - f"FROM (SELECT PIVOT_TABLE_COL_ABC.col_a, groupArray(tuple(PIVOT_TABLE_COL_ABC.col_b, PIVOT_TABLE_COL_ABC.col_c)) AS g " - f"FROM (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', '') " - f"AS col_a, events.event AS col_b, count() AS col_c FROM events WHERE equals(events.team_id, {self.team.pk}) " - f"GROUP BY replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_1)s), ''), 'null'), " - f"'^\"|\"$', ''), events.event) AS PIVOT_TABLE_COL_ABC GROUP BY PIVOT_TABLE_COL_ABC.col_a) AS PIVOT_FUNCTION_1 " - f"GROUP BY PIVOT_FUNCTION_1.col_a) AS PIVOT_FUNCTION_2 ORDER BY PIVOT_FUNCTION_2.col_a ASC " - f"LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") def test_with_pivot_table_2_levels(self): with freeze_time("2020-01-10"): self._create_random_events() @@ -1026,18 +918,7 @@ def test_with_pivot_table_2_levels(self): team=self.team, ) self.assertEqual(response.results, [("0", [("random event", 1)]), ("1", [("random event", 1)])]) - self.assertEqual( - response.clickhouse, - f"SELECT final.col_a, final.r FROM (SELECT PIVOT_FUNCTION_2.col_a, PIVOT_FUNCTION_2.r FROM " - f"(SELECT PIVOT_FUNCTION_1.col_a, arrayZip((sumMap(PIVOT_FUNCTION_1.g.1, PIVOT_FUNCTION_1.g.2) AS x).1, x.2) AS r FROM " - f"(SELECT PIVOT_TABLE_COL_ABC.col_a, groupArray(tuple(PIVOT_TABLE_COL_ABC.col_b, PIVOT_TABLE_COL_ABC.col_c)) AS g FROM " - f"(SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', '') AS col_a, " - f"events.event AS col_b, count() AS col_c FROM events WHERE equals(events.team_id, {self.team.pk}) " - f"GROUP BY replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_1)s), ''), 'null'), '^\"|\"$', ''), " - f"events.event) AS PIVOT_TABLE_COL_ABC GROUP BY PIVOT_TABLE_COL_ABC.col_a) AS PIVOT_FUNCTION_1 " - f"GROUP BY PIVOT_FUNCTION_1.col_a) AS PIVOT_FUNCTION_2) AS final ORDER BY final.col_a ASC LIMIT 100 " - f"SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot def test_property_access_with_arrays(self): with freeze_time("2020-01-10"): @@ -1504,12 +1385,10 @@ def test_hogql_query_filters_alias(self): response.hogql, f"SELECT event, distinct_id FROM events AS e WHERE equals(properties.random_uuid, '{random_uuid}') LIMIT 100", ) - self.assertEqual( - response.clickhouse, - f"SELECT e.event, e.distinct_id FROM events AS e WHERE and(equals(e.team_id, {self.team.pk}), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), %(hogql_val_1)s), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot self.assertEqual(len(response.results), 2) + @pytest.mark.usefixtures("unittest_snapshot") def test_hogql_union_all_limits(self): query = "SELECT event FROM events UNION ALL SELECT event FROM events" response = execute_hogql_query(query, team=self.team) @@ -1517,7 +1396,4 @@ def test_hogql_union_all_limits(self): response.hogql, f"SELECT event FROM events LIMIT 100 UNION ALL SELECT event FROM events LIMIT 100", ) - self.assertEqual( - response.clickhouse, - f"SELECT events.event FROM events WHERE equals(events.team_id, {self.team.pk}) LIMIT 100 UNION ALL SELECT events.event FROM events WHERE equals(events.team_id, {self.team.pk}) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1", - ) + assert pretty_print_in_tests(response.clickhouse, self.team.pk) == self.snapshot diff --git a/posthog/hogql/test/utils.py b/posthog/hogql/test/utils.py new file mode 100644 index 0000000000000..8e5fc45313a0f --- /dev/null +++ b/posthog/hogql/test/utils.py @@ -0,0 +1,11 @@ +def pretty_print_in_tests(query: str, team_id: int) -> str: + return ( + query.replace("SELECT", "\nSELECT") + .replace("FROM", "\nFROM") + .replace("WHERE", "\nWHERE") + .replace("GROUP", "\nGROUP") + .replace("HAVING", "\nHAVING") + .replace("LIMIT", "\nLIMIT") + .replace("SETTINGS", "\nSETTINGS") + .replace(f"team_id, {team_id})", "team_id, 420)") + ) diff --git a/posthog/hogql/transforms/lazy_tables.py b/posthog/hogql/transforms/lazy_tables.py index e16ff08449f7e..d2bd4c1398aa9 100644 --- a/posthog/hogql/transforms/lazy_tables.py +++ b/posthog/hogql/transforms/lazy_tables.py @@ -186,7 +186,7 @@ def visit_select_query(self, node: ast.SelectQuery): # For all the collected tables, create the subqueries, and add them to the table. for table_name, table_to_add in tables_to_add.items(): - subquery = table_to_add.lazy_table.lazy_select(table_to_add.fields_accessed) + subquery = table_to_add.lazy_table.lazy_select(table_to_add.fields_accessed, self.context.modifiers) subquery = cast(ast.SelectQuery, resolve_types(subquery, self.context, [node.type])) old_table_type = select_type.tables[table_name] select_type.tables[table_name] = ast.SelectQueryAliasType(alias=table_name, select_query_type=subquery.type) @@ -203,7 +203,7 @@ def visit_select_query(self, node: ast.SelectQuery): # For all the collected joins, create the join subqueries, and add them to the table. for to_table, join_scope in joins_to_add.items(): join_to_add: ast.JoinExpr = join_scope.lazy_join.join_function( - join_scope.from_table, join_scope.to_table, join_scope.fields_accessed + join_scope.from_table, join_scope.to_table, join_scope.fields_accessed, self.context.modifiers ) join_to_add = cast(ast.JoinExpr, resolve_types(join_to_add, self.context, [node.type])) select_type.tables[to_table] = join_to_add.type diff --git a/posthog/hogql/transforms/test/__snapshots__/test_lazy_tables.ambr b/posthog/hogql/transforms/test/__snapshots__/test_lazy_tables.ambr new file mode 100644 index 0000000000000..ecb6fc525b2a9 --- /dev/null +++ b/posthog/hogql/transforms/test/__snapshots__/test_lazy_tables.ambr @@ -0,0 +1,194 @@ +# name: TestLazyJoins.test_resolve_lazy_table_as_select_table + ' + + SELECT persons.id, persons.properties___email, persons.`properties___$browser` + FROM ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___email, argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', ''), person.version) AS `properties___$browser`, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS persons + LIMIT 10000 + ' +--- +# name: TestLazyJoins.test_resolve_lazy_table_as_table_in_join + ' + + SELECT events.event, events.distinct_id, events__pdi.person_id, persons.properties___email + FROM events INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) LEFT JOIN ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___email, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, events__pdi.person_id) + WHERE equals(events.team_id, 420) + LIMIT 10 + ' +--- +# name: TestLazyJoins.test_resolve_lazy_tables + ' + + SELECT events.event, events__pdi.person_id + FROM events INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE equals(events.team_id, 420) + LIMIT 10000 + ' +--- +# name: TestLazyJoins.test_resolve_lazy_tables_one_level_properties + ' + + SELECT person_distinct_ids__person.`properties___$browser` + FROM ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS person_distinct_ids INNER JOIN ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS `properties___$browser`, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids__person ON equals(person_distinct_ids.person_id, person_distinct_ids__person.id) + LIMIT 10000 + ' +--- +# name: TestLazyJoins.test_resolve_lazy_tables_one_level_properties_deep + ' + + SELECT person_distinct_ids__person.`properties___$browser___in___json` + FROM ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS person_distinct_ids INNER JOIN ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s, %(hogql_val_1)s, %(hogql_val_2)s), ''), 'null'), '^"|"$', ''), person.version) AS `properties___$browser___in___json`, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids__person ON equals(person_distinct_ids.person_id, person_distinct_ids__person.id) + LIMIT 10000 + ' +--- +# name: TestLazyJoins.test_resolve_lazy_tables_traversed_fields + ' + + SELECT events.event, events__pdi.person_id + FROM events INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE equals(events.team_id, 420) + LIMIT 10000 + ' +--- +# name: TestLazyJoins.test_resolve_lazy_tables_two_levels + ' + + SELECT events.event, events__pdi__person.id + FROM events INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN ( + SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE equals(events.team_id, 420) + LIMIT 10000 + ' +--- +# name: TestLazyJoins.test_resolve_lazy_tables_two_levels_properties + ' + + SELECT events.event, events__pdi__person.`properties___$browser` + FROM events INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS `properties___$browser`, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE equals(events.team_id, 420) + LIMIT 10000 + ' +--- +# name: TestLazyJoins.test_resolve_lazy_tables_two_levels_properties_duplicate + ' + + SELECT events.event, events__pdi__person.properties, events__pdi__person.properties___name + FROM events INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___name, argMax(person.properties, person.version) AS properties, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE equals(events.team_id, 420) + LIMIT 10000 + ' +--- +# name: TestLazyJoins.test_resolve_lazy_tables_two_levels_traversed + ' + + SELECT events.event, events__pdi__person.id + FROM events INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN ( + SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE equals(events.team_id, 420) + LIMIT 10000 + ' +--- +# name: TestLazyJoins.test_select_count_from_lazy_table + ' + + SELECT count() + FROM ( + SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS persons + LIMIT 10000 + ' +--- diff --git a/posthog/hogql/transforms/test/__snapshots__/test_property_types.ambr b/posthog/hogql/transforms/test/__snapshots__/test_property_types.ambr new file mode 100644 index 0000000000000..5e449b22a6c40 --- /dev/null +++ b/posthog/hogql/transforms/test/__snapshots__/test_property_types.ambr @@ -0,0 +1,76 @@ +# name: TestPropertyTypes.test_resolve_property_types_combined + ' + + SELECT multiply(toFloat64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', '')), toFloat64OrNull(events__pdi__person.properties___tickets)) + FROM events INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___tickets, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE equals(events.team_id, 420) + LIMIT 10000 + ' +--- +# name: TestPropertyTypes.test_resolve_property_types_event + ' + + SELECT multiply(toFloat64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '')), toFloat64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', ''))), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_2)s), ''), 'null'), '^"|"$', ''), %(hogql_val_3)s), 0) + FROM events + WHERE equals(events.team_id, 420) + LIMIT 10000 + ' +--- +# name: TestPropertyTypes.test_resolve_property_types_event_person_poe_off + ' + + SELECT parseDateTime64BestEffortOrNull(events__pdi__person.properties___provided_timestamp, 6, %(hogql_val_1)s) + FROM events INNER JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___provided_timestamp, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE equals(events.team_id, 420) + LIMIT 10000 + ' +--- +# name: TestPropertyTypes.test_resolve_property_types_event_person_poe_on + ' + + SELECT parseDateTime64BestEffortOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.person_properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), 6, %(hogql_val_1)s) + FROM events + WHERE equals(events.team_id, 420) + LIMIT 10000 + ' +--- +# name: TestPropertyTypes.test_resolve_property_types_person + ' + + SELECT toFloat64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '')), parseDateTime64BestEffortOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', ''), 6, %(hogql_val_2)s), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_3)s), ''), 'null'), '^"|"$', '') + FROM person + WHERE equals(person.team_id, 420) + LIMIT 10000 + ' +--- +# name: TestPropertyTypes.test_resolve_property_types_person_raw + ' + + SELECT toFloat64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '')), parseDateTime64BestEffortOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', ''), 6, %(hogql_val_2)s), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_3)s), ''), 'null'), '^"|"$', '') + FROM person + WHERE equals(person.team_id, 420) + LIMIT 10000 + ' +--- diff --git a/posthog/hogql/transforms/test/test_lazy_tables.py b/posthog/hogql/transforms/test/test_lazy_tables.py index 28c4d24787242..aad1dbae3fb1c 100644 --- a/posthog/hogql/transforms/test/test_lazy_tables.py +++ b/posthog/hogql/transforms/test/test_lazy_tables.py @@ -1,169 +1,84 @@ +from typing import Any + +import pytest from django.test import override_settings from posthog.hogql.context import HogQLContext from posthog.hogql.parser import parse_select from posthog.hogql.printer import print_ast +from posthog.hogql.test.utils import pretty_print_in_tests from posthog.test.base import BaseTest class TestLazyJoins(BaseTest): + snapshot: Any maxDiff = None + @pytest.mark.usefixtures("unittest_snapshot") def test_resolve_lazy_tables(self): printed = self._print_select("select event, pdi.person_id from events") - expected = ( - "SELECT events.event, events__pdi.person_id " - "FROM events " - "INNER JOIN " - "(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id " - f"FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id " - "HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi " - "ON equals(events.distinct_id, events__pdi.distinct_id) " - f"WHERE equals(events.team_id, {self.team.pk}) " - "LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) def test_resolve_lazy_tables_traversed_fields(self): printed = self._print_select("select event, person_id from events") - expected = ( - f"SELECT events.event, events__pdi.person_id FROM events INNER JOIN (SELECT argMax(person_distinct_id2.person_id, " - f"person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE " - f"equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id HAVING " - f"ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi " - f"ON equals(events.distinct_id, events__pdi.distinct_id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") def test_resolve_lazy_tables_two_levels(self): printed = self._print_select("select event, pdi.person.id from events") - expected = ( - f"SELECT events.event, events__pdi__person.id FROM events INNER JOIN (SELECT " - f"argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id " - f"FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id " - f"HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON " - f"equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN (SELECT person.id AS id FROM person WHERE " - f"equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) " - f"AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) " - f"WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) def test_resolve_lazy_tables_two_levels_traversed(self): printed = self._print_select("select event, person.id from events") - expected = ( - f"SELECT events.event, events__pdi__person.id FROM events INNER JOIN (SELECT argMax(person_distinct_id2.person_id, " - f"person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE " - f"equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id HAVING " - f"ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON " - f"equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN (SELECT person.id AS id FROM person WHERE " - f"equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) " - f"AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) " - f"WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) def test_resolve_lazy_tables_one_level_properties(self): printed = self._print_select("select person.properties.$browser from person_distinct_ids") - expected = ( - f"SELECT person_distinct_ids__person.`properties___$browser` FROM " - f"(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id " - f"FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id " - f"HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS person_distinct_ids " - f"INNER JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), person.version) " - f"AS `properties___$browser`, person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id " - f"HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids__person " - f"ON equals(person_distinct_ids.person_id, person_distinct_ids__person.id) LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) def test_resolve_lazy_tables_one_level_properties_deep(self): printed = self._print_select("select person.properties.$browser.in.json from person_distinct_ids") - expected = ( - f"SELECT person_distinct_ids__person.`properties___$browser___in___json` FROM " - f"(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id " - f"FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id " - f"HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS person_distinct_ids " - f"INNER JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s, %(hogql_val_1)s, %(hogql_val_2)s), ''), 'null'), '^\"|\"$', ''), person.version) " - f"AS `properties___$browser___in___json`, person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id " - f"HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids__person " - f"ON equals(person_distinct_ids.person_id, person_distinct_ids__person.id) LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") def test_resolve_lazy_tables_two_levels_properties(self): printed = self._print_select("select event, pdi.person.properties.$browser from events") - expected = ( - f"SELECT events.event, events__pdi__person.`properties___$browser` FROM events INNER JOIN " - f"(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, " - f"person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) " - f"GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, " - f"person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) " - f"INNER JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', " - f"''), person.version) AS `properties___$browser`, person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) " - f"GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person " - f"ON equals(events__pdi.person_id, events__pdi__person.id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) def test_resolve_lazy_tables_two_levels_properties_duplicate(self): printed = self._print_select("select event, person.properties, person.properties.name from events") - expected = ( - f"SELECT events.event, events__pdi__person.properties, events__pdi__person.properties___name FROM events " - f"INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, " - f"person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) " - f"GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, " - f"person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) " - f"INNER JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), person.version) " - f"AS properties___name, argMax(person.properties, person.version) AS properties, person.id AS id FROM person " - f"WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) " - f"AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) def test_resolve_lazy_table_as_select_table(self): printed = self._print_select("select id, properties.email, properties.$browser from persons") - expected = ( - f"SELECT persons.id, persons.properties___email, persons.`properties___$browser` FROM " - f"(SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), person.version) AS " - f"properties___email, argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_1)s), ''), 'null'), '^\"|\"$', ''), person.version) " - f"AS `properties___$browser`, person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id " - f"HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) def test_resolve_lazy_table_as_table_in_join(self): printed = self._print_select( "select event, distinct_id, events.person_id, persons.properties.email from events left join persons on persons.id = events.person_id limit 10" ) - expected = ( - f"SELECT events.event, events.distinct_id, events__pdi.person_id, persons.properties___email FROM events " - f"INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, " - f"person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) " - f"GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) " - f"AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) LEFT JOIN (SELECT " - f"argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), person.version) AS properties___email, " - f"person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id " - f"HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, events__pdi.person_id) " - f"WHERE equals(events.team_id, {self.team.pk}) LIMIT 10" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") def test_select_count_from_lazy_table(self): printed = self._print_select("select count() from persons") - expected = ( - f"SELECT count() FROM (SELECT person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) " - f"GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot def _print_select(self, select: str): expr = parse_select(select) - return print_ast(expr, HogQLContext(team_id=self.team.pk, enable_select_queries=True), "clickhouse") + query = print_ast(expr, HogQLContext(team_id=self.team.pk, enable_select_queries=True), "clickhouse") + return pretty_print_in_tests(query, self.team.pk) diff --git a/posthog/hogql/transforms/test/test_property_types.py b/posthog/hogql/transforms/test/test_property_types.py index 203dc036d831a..c50f19a0a792d 100644 --- a/posthog/hogql/transforms/test/test_property_types.py +++ b/posthog/hogql/transforms/test/test_property_types.py @@ -1,13 +1,18 @@ +import pytest +from typing import Any + from django.test import override_settings from posthog.hogql.context import HogQLContext from posthog.hogql.parser import parse_select from posthog.hogql.printer import print_ast +from posthog.hogql.test.utils import pretty_print_in_tests from posthog.models import PropertyDefinition from posthog.test.base import BaseTest class TestPropertyTypes(BaseTest): + snapshot: Any maxDiff = None def setUp(self): @@ -43,84 +48,46 @@ def setUp(self): defaults={"property_type": "String"}, ) + @pytest.mark.usefixtures("unittest_snapshot") def test_resolve_property_types_event(self): printed = self._print_select( "select properties.$screen_width * properties.$screen_height, properties.bool from events" ) - expected = ( - "SELECT multiply(" - "toFloat64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', '')), " - "toFloat64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_1)s), ''), 'null'), '^\"|\"$', ''))), " - "ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_2)s), ''), 'null'), '^\"|\"$', ''), %(hogql_val_3)s), 0) " - f"FROM events WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") def test_resolve_property_types_person_raw(self): printed = self._print_select( "select properties.tickets, properties.provided_timestamp, properties.$initial_browser from raw_persons" ) - expected = ( - "SELECT toFloat64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', '')), " - "parseDateTime64BestEffortOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_1)s), ''), 'null'), '^\"|\"$', ''), 6, %(hogql_val_2)s), " - "replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_3)s), ''), 'null'), '^\"|\"$', '') " - f"FROM person WHERE equals(person.team_id, {self.team.pk}) LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") def test_resolve_property_types_person(self): printed = self._print_select( "select properties.tickets, properties.provided_timestamp, properties.$initial_browser from raw_persons" ) - expected = ( - "SELECT toFloat64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', '')), " - "parseDateTime64BestEffortOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_1)s), ''), 'null'), '^\"|\"$', ''), 6, %(hogql_val_2)s), " - "replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_3)s), ''), 'null'), '^\"|\"$', '') " - f"FROM person WHERE equals(person.team_id, {self.team.pk}) LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) def test_resolve_property_types_combined(self): printed = self._print_select("select properties.$screen_width * person.properties.tickets from events") - expected = ( - "SELECT multiply(" - "toFloat64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_1)s), ''), 'null'), '^\"|\"$', '')), " - "toFloat64OrNull(events__pdi__person.properties___tickets)) FROM events INNER JOIN " - "(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 " - f"WHERE equals(person_distinct_id2.team_id, {self.team.pk}) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi " - "ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN (SELECT " - "argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), person.version) AS properties___tickets, " - f"person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person " - f"ON equals(events__pdi.person_id, events__pdi__person.id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) def test_resolve_property_types_event_person_poe_off(self): printed = self._print_select("select person.properties.provided_timestamp from events") - expected = ( - f"SELECT parseDateTime64BestEffortOrNull(events__pdi__person.properties___provided_timestamp, 6, %(hogql_val_1)s) FROM events " - f"INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, " - f"person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, {self.team.pk}) " - f"GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) " - f"AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN (SELECT " - f"argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), " - f"person.version) AS properties___provided_timestamp, person.id AS id FROM person WHERE equals(person.team_id, {self.team.pk}) " - f"GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON " - f"equals(events__pdi.person_id, events__pdi__person.id) WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") @override_settings(PERSON_ON_EVENTS_OVERRIDE=True, PERSON_ON_EVENTS_V2_OVERRIDE=True) def test_resolve_property_types_event_person_poe_on(self): printed = self._print_select("select person.properties.provided_timestamp from events") - expected = ( - f"SELECT parseDateTime64BestEffortOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.person_properties, " - f"%(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), 6, %(hogql_val_1)s) FROM events WHERE equals(events.team_id, {self.team.pk}) LIMIT 10000" - ) - self.assertEqual(printed, expected) + assert printed == self.snapshot def _print_select(self, select: str): expr = parse_select(select) - return print_ast(expr, HogQLContext(team_id=self.team.pk, enable_select_queries=True), "clickhouse") + query = print_ast(expr, HogQLContext(team_id=self.team.pk, enable_select_queries=True), "clickhouse") + return pretty_print_in_tests(query, self.team.pk) diff --git a/posthog/schema.py b/posthog/schema.py index 29efb896c634a..c9ff94a78654a 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -228,11 +228,23 @@ class HogQLNotice(BaseModel): start: Optional[float] = None +class PersonsArgMaxVersion(str, Enum): + v1 = "v1" + v2 = "v2" + + +class PersonsOnEventsMode(str, Enum): + disabled = "disabled" + v1_enabled = "v1_enabled" + v2_enabled = "v2_enabled" + + class HogQLQueryModifiers(BaseModel): model_config = ConfigDict( extra="forbid", ) - personsOnEventsMode: Optional[str] = None + personsArgMaxVersion: Optional[PersonsArgMaxVersion] = None + personsOnEventsMode: Optional[PersonsOnEventsMode] = None class IntervalType(str, Enum): diff --git a/posthog/warehouse/models/view_link.py b/posthog/warehouse/models/view_link.py index cf4ecfb7582cd..978afb1b390a6 100644 --- a/posthog/warehouse/models/view_link.py +++ b/posthog/warehouse/models/view_link.py @@ -4,6 +4,7 @@ from .datawarehouse_saved_query import DataWarehouseSavedQuery from typing import Dict, Any from posthog.hogql.errors import HogQLException +from ...schema import HogQLQueryModifiers class DataWarehouseViewLink(CreatedMetaFields, UUIDModel, DeletedMetaFields): @@ -16,7 +17,9 @@ class DataWarehouseViewLink(CreatedMetaFields, UUIDModel, DeletedMetaFields): @property def join_function(self): - def _join_function(from_table: str, to_table: str, requested_fields: Dict[str, Any]): + def _join_function( + from_table: str, to_table: str, requested_fields: Dict[str, Any], modifiers: HogQLQueryModifiers + ): from posthog.hogql import ast from posthog.hogql.parser import parse_select