From b5cad0a71adc3b772628911fb41ed997e8a85836 Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Thu, 16 Nov 2023 14:23:03 +0100 Subject: [PATCH] feat(hogql): materialization mode modifier (#18676) --- frontend/src/queries/schema.json | 4 ++ frontend/src/queries/schema.ts | 1 + frontend/src/scenes/debug/HogQLDebug.tsx | 20 +++++- posthog/hogql/modifiers.py | 5 +- posthog/hogql/printer.py | 83 +++++++++++++----------- posthog/hogql/test/test_modifiers.py | 42 +++++++++++- posthog/schema.py | 8 +++ 7 files changed, 121 insertions(+), 42 deletions(-) diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index d1eed070437ff..62825d2ac3f9c 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -1526,6 +1526,10 @@ "enum": ["leftjoin", "subquery"], "type": "string" }, + "materializationMode": { + "enum": ["auto", "legacy_null_as_string", "legacy_null_as_null", "disabled"], + "type": "string" + }, "personsArgMaxVersion": { "enum": ["auto", "v1", "v2"], "type": "string" diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index fdba8908a3a71..7b51deead7f6a 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -136,6 +136,7 @@ export interface HogQLQueryModifiers { personsOnEventsMode?: 'disabled' | 'v1_enabled' | 'v1_mixed' | 'v2_enabled' personsArgMaxVersion?: 'auto' | 'v1' | 'v2' inCohortVia?: 'leftjoin' | 'subquery' + materializationMode?: 'auto' | 'legacy_null_as_string' | 'legacy_null_as_null' | 'disabled' } export interface HogQLQueryResponse { diff --git a/frontend/src/scenes/debug/HogQLDebug.tsx b/frontend/src/scenes/debug/HogQLDebug.tsx index 21cc7b0b13104..4ef2a6656bede 100644 --- a/frontend/src/scenes/debug/HogQLDebug.tsx +++ b/frontend/src/scenes/debug/HogQLDebug.tsx @@ -80,7 +80,25 @@ export function HogQLDebug({ query, setQuery, queryKey }: HogQLDebugProps): JSX. } value={query.modifiers?.inCohortVia ?? response?.modifiers?.inCohortVia} /> - {' '} + + + Materialization Mode: + + setQuery({ + ...query, + modifiers: { ...query.modifiers, materializationMode: value }, + } as HogQLQuery) + } + value={query.modifiers?.materializationMode ?? response?.modifiers?.materializationMode} + /> + {dataLoading ? ( <> diff --git a/posthog/hogql/modifiers.py b/posthog/hogql/modifiers.py index 0643deefcc6fa..8884f197afcf6 100644 --- a/posthog/hogql/modifiers.py +++ b/posthog/hogql/modifiers.py @@ -1,7 +1,7 @@ from typing import Optional from posthog.models import Team -from posthog.schema import HogQLQueryModifiers +from posthog.schema import HogQLQueryModifiers, MaterializationMode from posthog.utils import PersonOnEventsMode @@ -22,4 +22,7 @@ def create_default_modifiers_for_team( if modifiers.inCohortVia is None: modifiers.inCohortVia = "subquery" + if modifiers.materializationMode is None or modifiers.materializationMode == MaterializationMode.auto: + modifiers.materializationMode = MaterializationMode.legacy_null_as_null + return modifiers diff --git a/posthog/hogql/printer.py b/posthog/hogql/printer.py index 5e5a076b9e55f..f89614d0dc95a 100644 --- a/posthog/hogql/printer.py +++ b/posthog/hogql/printer.py @@ -39,6 +39,7 @@ from posthog.models.property import PropertyName, TableColumn from posthog.models.team.team import WeekStartDay from posthog.models.utils import UUIDT +from posthog.schema import MaterializationMode from posthog.utils import PersonOnEventsMode @@ -907,47 +908,51 @@ def visit_property_type(self, type: ast.PropertyType): while isinstance(table, ast.TableAliasType): table = table.table_type - # find a materialized property for the first part of the chain - materialized_property_sql: Optional[str] = None - if isinstance(table, ast.TableType): - if self.dialect == "clickhouse": - table_name = table.table.to_printed_clickhouse(self.context) - else: - table_name = table.table.to_printed_hogql() - if field is None: - raise HogQLException(f"Can't resolve field {field_type.name} on table {table_name}") - field_name = cast(Union[Literal["properties"], Literal["person_properties"]], field.name) - - materialized_column = self._get_materialized_column(table_name, type.chain[0], field_name) - if materialized_column: - property_sql = self._print_identifier(materialized_column) - property_sql = f"{self.visit(field_type.table_type)}.{property_sql}" - materialized_property_sql = property_sql - elif ( - self.context.within_non_hogql_query - and (isinstance(table, ast.SelectQueryAliasType) and table.alias == "events__pdi__person") - or (isinstance(table, ast.VirtualTableType) and table.field == "poe") - ): - # :KLUDGE: Legacy person properties handling. Only used within non-HogQL queries, such as insights. - if self.context.modifiers.personsOnEventsMode != PersonOnEventsMode.DISABLED: - materialized_column = self._get_materialized_column("events", type.chain[0], "person_properties") - else: - materialized_column = self._get_materialized_column("person", type.chain[0], "properties") - if materialized_column: - materialized_property_sql = self._print_identifier(materialized_column) - args: List[str] = [] - if materialized_property_sql is not None: - # When reading materialized columns, treat the values "" and "null" as NULL-s. - # TODO: rematerialize all columns to support empty strings and "null" string values. - materialized_property_sql = f"nullIf(nullIf({materialized_property_sql}, ''), 'null')" - if len(type.chain) == 1: - return materialized_property_sql - else: - for name in type.chain[1:]: - args.append(self.context.add_value(name)) - return self._unsafe_json_extract_trim_quotes(materialized_property_sql, args) + if self.context.modifiers.materializationMode != "disabled": + # find a materialized property for the first part of the chain + materialized_property_sql: Optional[str] = None + if isinstance(table, ast.TableType): + if self.dialect == "clickhouse": + table_name = table.table.to_printed_clickhouse(self.context) + else: + table_name = table.table.to_printed_hogql() + if field is None: + raise HogQLException(f"Can't resolve field {field_type.name} on table {table_name}") + field_name = cast(Union[Literal["properties"], Literal["person_properties"]], field.name) + + materialized_column = self._get_materialized_column(table_name, type.chain[0], field_name) + if materialized_column: + property_sql = self._print_identifier(materialized_column) + property_sql = f"{self.visit(field_type.table_type)}.{property_sql}" + materialized_property_sql = property_sql + elif ( + self.context.within_non_hogql_query + and (isinstance(table, ast.SelectQueryAliasType) and table.alias == "events__pdi__person") + or (isinstance(table, ast.VirtualTableType) and table.field == "poe") + ): + # :KLUDGE: Legacy person properties handling. Only used within non-HogQL queries, such as insights. + if self.context.modifiers.personsOnEventsMode != PersonOnEventsMode.DISABLED: + materialized_column = self._get_materialized_column("events", type.chain[0], "person_properties") + else: + materialized_column = self._get_materialized_column("person", type.chain[0], "properties") + if materialized_column: + materialized_property_sql = self._print_identifier(materialized_column) + + if materialized_property_sql is not None: + # TODO: rematerialize all columns to properly support empty strings and "null" string values. + if self.context.modifiers.materializationMode == MaterializationMode.legacy_null_as_string: + materialized_property_sql = f"nullIf({materialized_property_sql}, '')" + else: # MaterializationMode.auto.legacy_null_as_null + materialized_property_sql = f"nullIf(nullIf({materialized_property_sql}, ''), 'null')" + + if len(type.chain) == 1: + return materialized_property_sql + else: + for name in type.chain[1:]: + args.append(self.context.add_value(name)) + return self._unsafe_json_extract_trim_quotes(materialized_property_sql, args) for name in type.chain: args.append(self.context.add_value(name)) diff --git a/posthog/hogql/test/test_modifiers.py b/posthog/hogql/test/test_modifiers.py index ba5ed58e84882..4296213727f37 100644 --- a/posthog/hogql/test/test_modifiers.py +++ b/posthog/hogql/test/test_modifiers.py @@ -1,7 +1,7 @@ from posthog.hogql.modifiers import create_default_modifiers_for_team from posthog.hogql.query import execute_hogql_query from posthog.models import Cohort -from posthog.schema import HogQLQueryModifiers, PersonsOnEventsMode +from posthog.schema import HogQLQueryModifiers, PersonsOnEventsMode, MaterializationMode from posthog.test.base import BaseTest from django.test import override_settings @@ -144,3 +144,43 @@ def test_modifiers_in_cohort_join(self): modifiers=HogQLQueryModifiers(inCohortVia="leftjoin"), ) assert "LEFT JOIN" in response.clickhouse + + def test_modifiers_materialization_mode(self): + try: + from ee.clickhouse.materialized_columns.analyze import materialize + except ModuleNotFoundError: + # EE not available? Assume we're good + self.assertEqual(1 + 2, 3) + return + materialize("events", "$browser") + + response = execute_hogql_query( + "SELECT properties.$browser FROM events", + team=self.team, + modifiers=HogQLQueryModifiers(materializationMode=MaterializationMode.auto), + ) + assert "SELECT nullIf(nullIf(events.`mat_$browser`, ''), 'null') FROM events" in response.clickhouse + + response = execute_hogql_query( + "SELECT properties.$browser FROM events", + team=self.team, + modifiers=HogQLQueryModifiers(materializationMode=MaterializationMode.legacy_null_as_null), + ) + assert "SELECT nullIf(nullIf(events.`mat_$browser`, ''), 'null') FROM events" in response.clickhouse + + response = execute_hogql_query( + "SELECT properties.$browser FROM events", + team=self.team, + modifiers=HogQLQueryModifiers(materializationMode=MaterializationMode.legacy_null_as_string), + ) + assert "SELECT nullIf(events.`mat_$browser`, '') FROM events" in response.clickhouse + + response = execute_hogql_query( + "SELECT properties.$browser FROM events", + team=self.team, + modifiers=HogQLQueryModifiers(materializationMode=MaterializationMode.disabled), + ) + assert ( + "SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', '') FROM events" + in response.clickhouse + ) diff --git a/posthog/schema.py b/posthog/schema.py index 3b86559f8fc78..fbba06675133a 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -255,6 +255,13 @@ class InCohortVia(str, Enum): subquery = "subquery" +class MaterializationMode(str, Enum): + auto = "auto" + legacy_null_as_string = "legacy_null_as_string" + legacy_null_as_null = "legacy_null_as_null" + disabled = "disabled" + + class PersonsArgMaxVersion(str, Enum): auto = "auto" v1 = "v1" @@ -273,6 +280,7 @@ class HogQLQueryModifiers(BaseModel): extra="forbid", ) inCohortVia: Optional[InCohortVia] = None + materializationMode: Optional[MaterializationMode] = None personsArgMaxVersion: Optional[PersonsArgMaxVersion] = None personsOnEventsMode: Optional[PersonsOnEventsMode] = None