From ee0fc55439df27f331ae6ac5dfec4d1084ce041e Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Fri, 20 Oct 2023 11:04:24 +0200 Subject: [PATCH] feat(hogql): mixed PoE mode (#18064) * feat(hogql): mixed poe mode * schema --- frontend/src/queries/schema.json | 2 +- frontend/src/queries/schema.ts | 2 +- frontend/src/scenes/debug/HogQLDebug.tsx | 1 + posthog/hogql/database/database.py | 22 +++++++-- posthog/hogql/resolver.py | 8 ++++ posthog/hogql/test/test_modifiers.py | 57 ++++++++++++++++++++---- posthog/hogql/test/test_resolver.py | 34 +++++++++++++- posthog/schema.py | 1 + 8 files changed, 112 insertions(+), 15 deletions(-) diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 830ad1c4aa462..ff07a99907292 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -1500,7 +1500,7 @@ "type": "string" }, "personsOnEventsMode": { - "enum": ["disabled", "v1_enabled", "v2_enabled"], + "enum": ["disabled", "v1_enabled", "v1_mixed", "v2_enabled"], "type": "string" } }, diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 34270f870fe99..f4b3a91b7b282 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -129,7 +129,7 @@ export interface DataNode extends Node { /** HogQL Query Options are automatically set per team. However, they can be overriden in the query. */ export interface HogQLQueryModifiers { - personsOnEventsMode?: 'disabled' | 'v1_enabled' | 'v2_enabled' + personsOnEventsMode?: 'disabled' | 'v1_enabled' | 'v1_mixed' | 'v2_enabled' personsArgMaxVersion?: 'auto' | 'v1' | 'v2' inCohortVia?: 'leftjoin' | 'subquery' } diff --git a/frontend/src/scenes/debug/HogQLDebug.tsx b/frontend/src/scenes/debug/HogQLDebug.tsx index d1d32f98643b1..cf5b662a3f8f1 100644 --- a/frontend/src/scenes/debug/HogQLDebug.tsx +++ b/frontend/src/scenes/debug/HogQLDebug.tsx @@ -37,6 +37,7 @@ export function HogQLDebug({ query, setQuery, queryKey }: HogQLDebugProps): JSX. options={[ { value: 'disabled', label: 'Disabled' }, { value: 'v1_enabled', label: 'V1 Enabled' }, + { value: 'v1_mixed', label: 'V1 Mixed' }, { value: 'v2_enabled', label: 'V2 Enabled' }, ]} onChange={(value) => diff --git a/posthog/hogql/database/database.py b/posthog/hogql/database/database.py index b1771404c3c2b..1e5b8dd2cc390 100644 --- a/posthog/hogql/database/database.py +++ b/posthog/hogql/database/database.py @@ -35,8 +35,7 @@ from posthog.hogql.errors import HogQLException from posthog.models.group_type_mapping import GroupTypeMapping from posthog.models.team.team import WeekStartDay -from posthog.schema import HogQLQueryModifiers -from posthog.utils import PersonOnEventsMode +from posthog.schema import HogQLQueryModifiers, PersonsOnEventsMode class Database(BaseModel): @@ -117,7 +116,24 @@ def create_hogql_database(team_id: int, modifiers: Optional[HogQLQueryModifiers] team = Team.objects.get(pk=team_id) modifiers = create_default_modifiers_for_team(team, modifiers) database = Database(timezone=team.timezone, week_start_day=team.week_start_day) - if modifiers.personsOnEventsMode != PersonOnEventsMode.DISABLED: + + if modifiers.personsOnEventsMode == PersonsOnEventsMode.disabled: + # no change + database.events.fields["person"] = FieldTraverser(chain=["pdi", "person"]) + database.events.fields["person_id"] = FieldTraverser(chain=["pdi", "person_id"]) + + elif modifiers.personsOnEventsMode == PersonsOnEventsMode.v1_mixed: + # person.id via a join, person.properties on events + database.events.fields["person_id"] = FieldTraverser(chain=["pdi", "person_id"]) + database.events.fields["person"] = FieldTraverser(chain=["poe"]) + database.events.fields["poe"].fields["id"] = FieldTraverser(chain=["..", "pdi", "person_id"]) + database.events.fields["poe"].fields["created_at"] = FieldTraverser(chain=["..", "pdi", "person", "created_at"]) + database.events.fields["poe"].fields["properties"] = StringJSONDatabaseField(name="person_properties") + + elif ( + modifiers.personsOnEventsMode == PersonsOnEventsMode.v1_enabled + or modifiers.personsOnEventsMode == PersonsOnEventsMode.v2_enabled + ): # TODO: split PoE v1 and v2 once SQL Expression fields are supported #15180 database.events.fields["person"] = FieldTraverser(chain=["poe"]) database.events.fields["person_id"] = StringDatabaseField(name="person_id") diff --git a/posthog/hogql/resolver.py b/posthog/hogql/resolver.py index 652229a41d010..48ea712bb9e13 100644 --- a/posthog/hogql/resolver.py +++ b/posthog/hogql/resolver.py @@ -412,14 +412,22 @@ def visit_field(self, node: ast.Field): # Recursively resolve the rest of the chain until we can point to the deepest node. loop_type = type chain_to_parse = node.chain[1:] + previous_types = [] while True: if isinstance(loop_type, FieldTraverserType): chain_to_parse = loop_type.chain + chain_to_parse loop_type = loop_type.table_type continue + previous_types.append(loop_type) if len(chain_to_parse) == 0: break next_chain = chain_to_parse.pop(0) + if next_chain == "..": # only support one level of ".." + previous_types.pop() + previous_types.pop() + loop_type = previous_types[-1] + next_chain = chain_to_parse.pop(0) + loop_type = loop_type.get_child(next_chain) if loop_type is None: raise ResolverException(f"Cannot resolve type {'.'.join(node.chain)}. Unable to resolve {next_chain}.") diff --git a/posthog/hogql/test/test_modifiers.py b/posthog/hogql/test/test_modifiers.py index a4a674a801ff7..b876d7ada529d 100644 --- a/posthog/hogql/test/test_modifiers.py +++ b/posthog/hogql/test/test_modifiers.py @@ -1,10 +1,9 @@ from posthog.hogql.modifiers import create_default_modifiers_for_team from posthog.hogql.query import execute_hogql_query from posthog.models import Cohort -from posthog.schema import HogQLQueryModifiers +from posthog.schema import HogQLQueryModifiers, PersonsOnEventsMode from posthog.test.base import BaseTest from django.test import override_settings -from posthog.utils import PersonOnEventsMode class TestModifiers(BaseTest): @@ -12,31 +11,71 @@ class TestModifiers(BaseTest): def test_create_default_modifiers_for_team_init(self): assert self.team.person_on_events_mode == "disabled" modifiers = create_default_modifiers_for_team(self.team) - assert modifiers.personsOnEventsMode == PersonOnEventsMode.DISABLED # NB! not a None + assert modifiers.personsOnEventsMode == PersonsOnEventsMode.disabled # NB! not a None modifiers = create_default_modifiers_for_team( - self.team, HogQLQueryModifiers(personsOnEventsMode=PersonOnEventsMode.V1_ENABLED) + self.team, HogQLQueryModifiers(personsOnEventsMode=PersonsOnEventsMode.v1_enabled) ) - assert modifiers.personsOnEventsMode == PersonOnEventsMode.V1_ENABLED + assert modifiers.personsOnEventsMode == PersonsOnEventsMode.v1_enabled modifiers = create_default_modifiers_for_team( - self.team, HogQLQueryModifiers(personsOnEventsMode=PersonOnEventsMode.V2_ENABLED) + self.team, HogQLQueryModifiers(personsOnEventsMode=PersonsOnEventsMode.v2_enabled) ) - assert modifiers.personsOnEventsMode == PersonOnEventsMode.V2_ENABLED + assert modifiers.personsOnEventsMode == PersonsOnEventsMode.v2_enabled def test_modifiers_persons_on_events_mode_v1_enabled(self): query = "SELECT event, person_id FROM events" # Control response = execute_hogql_query( - query, team=self.team, modifiers=HogQLQueryModifiers(personsOnEventsMode=PersonOnEventsMode.DISABLED) + query, team=self.team, modifiers=HogQLQueryModifiers(personsOnEventsMode=PersonsOnEventsMode.disabled) ) assert " JOIN " in response.clickhouse # Test response = execute_hogql_query( - query, team=self.team, modifiers=HogQLQueryModifiers(personsOnEventsMode=PersonOnEventsMode.V1_ENABLED) + query, team=self.team, modifiers=HogQLQueryModifiers(personsOnEventsMode=PersonsOnEventsMode.v1_enabled) ) assert " JOIN " not in response.clickhouse + def test_modifiers_persons_on_events_mode_mapping(self): + query = "SELECT event, person.id, person.properties, person.created_at FROM events" + + test_cases = [ + ( + PersonsOnEventsMode.disabled, + "events.event", + "events__pdi__person.id", + "events__pdi__person.properties", + "toTimeZone(events__pdi__person.created_at, %(hogql_val_0)s)", + ), + ( + PersonsOnEventsMode.v1_enabled, + "events.event", + "events.person_id", + "events.person_properties", + "toTimeZone(events.person_created_at, %(hogql_val_0)s)", + ), + ( + PersonsOnEventsMode.v1_mixed, + "events.event", + "events__pdi.person_id", + "events.person_properties", + "toTimeZone(events__pdi__person.created_at, %(hogql_val_0)s)", + ), + ( + PersonsOnEventsMode.v2_enabled, + "events.event", + "events.person_id", + "events.person_properties", + "toTimeZone(events.person_created_at, %(hogql_val_0)s)", + ), + ] + + for (mode, *expected) in test_cases: + response = execute_hogql_query( + query, team=self.team, modifiers=HogQLQueryModifiers(personsOnEventsMode=mode) + ) + assert f"SELECT {', '.join(expected)} FROM" in response.clickhouse, f"PoE mode: {mode}" + def test_modifiers_persons_argmax_version_v2(self): query = "SELECT * FROM persons" diff --git a/posthog/hogql/test/test_resolver.py b/posthog/hogql/test/test_resolver.py index 80e6ad644be0c..7ed33e37291d2 100644 --- a/posthog/hogql/test/test_resolver.py +++ b/posthog/hogql/test/test_resolver.py @@ -9,7 +9,13 @@ from posthog.hogql import ast from posthog.hogql.context import HogQLContext from posthog.hogql.database.database import create_hogql_database -from posthog.hogql.database.models import LazyJoin +from posthog.hogql.database.models import ( + LazyJoin, + FieldTraverser, + StringJSONDatabaseField, + StringDatabaseField, + DateTimeDatabaseField, +) from posthog.hogql.visitor import clone_expr from posthog.hogql.parser import parse_select from posthog.hogql.printer import print_ast @@ -1000,3 +1006,29 @@ def test_lambda_parent_scope(self): self.assertEqual(lambda_type.parent, node.type) self.assertEqual(list(lambda_type.aliases.keys()), ["x"]) self.assertEqual(list(lambda_type.parent.columns.keys()), ["timestamp"]) + + def test_field_traverser_double_dot(self): + # Create a condition where we want to ".." out of "events.poe." to get to a higher level prop + self.database.events.fields["person"] = FieldTraverser(chain=["poe"]) + self.database.events.fields["poe"].fields["id"] = FieldTraverser(chain=["..", "pdi", "person_id"]) + self.database.events.fields["poe"].fields["created_at"] = FieldTraverser( + chain=["..", "pdi", "person", "created_at"] + ) + self.database.events.fields["poe"].fields["properties"] = StringJSONDatabaseField(name="person_properties") + + node = self._select("SELECT event, person.id, person.properties, person.created_at FROM events") + node = cast(ast.SelectQuery, resolve_types(node, self.context)) + + # all columns resolve to a type in the end + assert cast(ast.FieldType, node.select[0].type).resolve_database_field() == StringDatabaseField( + name="event", array=None, nullable=None + ) + assert cast(ast.FieldType, node.select[1].type).resolve_database_field() == StringDatabaseField( + name="person_id", array=None, nullable=None + ) + assert cast(ast.FieldType, node.select[2].type).resolve_database_field() == StringJSONDatabaseField( + name="person_properties" + ) + assert cast(ast.FieldType, node.select[3].type).resolve_database_field() == DateTimeDatabaseField( + name="created_at", array=None, nullable=None + ) diff --git a/posthog/schema.py b/posthog/schema.py index d50b260adba20..4370ee5a9c7a9 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -264,6 +264,7 @@ class PersonsArgMaxVersion(str, Enum): class PersonsOnEventsMode(str, Enum): disabled = "disabled" v1_enabled = "v1_enabled" + v1_mixed = "v1_mixed" v2_enabled = "v2_enabled"