Skip to content

Commit

Permalink
feat(hogql): materialization mode modifier (#18676)
Browse files Browse the repository at this point in the history
  • Loading branch information
mariusandra authored Nov 16, 2023
1 parent 259a204 commit b5cad0a
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 42 deletions.
4 changes: 4 additions & 0 deletions frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1526,6 +1526,10 @@
"enum": ["leftjoin", "subquery"],
"type": "string"
},
"materializationMode": {
"enum": ["auto", "legacy_null_as_string", "legacy_null_as_null", "disabled"],
"type": "string"
},
"personsArgMaxVersion": {
"enum": ["auto", "v1", "v2"],
"type": "string"
Expand Down
1 change: 1 addition & 0 deletions frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ export interface HogQLQueryModifiers {
personsOnEventsMode?: 'disabled' | 'v1_enabled' | 'v1_mixed' | 'v2_enabled'
personsArgMaxVersion?: 'auto' | 'v1' | 'v2'
inCohortVia?: 'leftjoin' | 'subquery'
materializationMode?: 'auto' | 'legacy_null_as_string' | 'legacy_null_as_null' | 'disabled'
}

export interface HogQLQueryResponse {
Expand Down
20 changes: 19 additions & 1 deletion frontend/src/scenes/debug/HogQLDebug.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,25 @@ export function HogQLDebug({ query, setQuery, queryKey }: HogQLDebugProps): JSX.
}
value={query.modifiers?.inCohortVia ?? response?.modifiers?.inCohortVia}
/>
</LemonLabel>{' '}
</LemonLabel>
<LemonLabel>
Materialization Mode:
<LemonSelect
options={[
{ value: 'auto', label: 'auto' },
{ value: 'legacy_null_as_string', label: 'legacy_null_as_string' },
{ value: 'legacy_null_as_null', label: 'legacy_null_as_null' },
{ value: 'disabled', label: 'disabled' },
]}
onChange={(value) =>
setQuery({
...query,
modifiers: { ...query.modifiers, materializationMode: value },
} as HogQLQuery)
}
value={query.modifiers?.materializationMode ?? response?.modifiers?.materializationMode}
/>
</LemonLabel>
</div>
{dataLoading ? (
<>
Expand Down
5 changes: 4 additions & 1 deletion posthog/hogql/modifiers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Optional

from posthog.models import Team
from posthog.schema import HogQLQueryModifiers
from posthog.schema import HogQLQueryModifiers, MaterializationMode
from posthog.utils import PersonOnEventsMode


Expand All @@ -22,4 +22,7 @@ def create_default_modifiers_for_team(
if modifiers.inCohortVia is None:
modifiers.inCohortVia = "subquery"

if modifiers.materializationMode is None or modifiers.materializationMode == MaterializationMode.auto:
modifiers.materializationMode = MaterializationMode.legacy_null_as_null

return modifiers
83 changes: 44 additions & 39 deletions posthog/hogql/printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from posthog.models.property import PropertyName, TableColumn
from posthog.models.team.team import WeekStartDay
from posthog.models.utils import UUIDT
from posthog.schema import MaterializationMode
from posthog.utils import PersonOnEventsMode


Expand Down Expand Up @@ -907,47 +908,51 @@ def visit_property_type(self, type: ast.PropertyType):
while isinstance(table, ast.TableAliasType):
table = table.table_type

# find a materialized property for the first part of the chain
materialized_property_sql: Optional[str] = None
if isinstance(table, ast.TableType):
if self.dialect == "clickhouse":
table_name = table.table.to_printed_clickhouse(self.context)
else:
table_name = table.table.to_printed_hogql()
if field is None:
raise HogQLException(f"Can't resolve field {field_type.name} on table {table_name}")
field_name = cast(Union[Literal["properties"], Literal["person_properties"]], field.name)

materialized_column = self._get_materialized_column(table_name, type.chain[0], field_name)
if materialized_column:
property_sql = self._print_identifier(materialized_column)
property_sql = f"{self.visit(field_type.table_type)}.{property_sql}"
materialized_property_sql = property_sql
elif (
self.context.within_non_hogql_query
and (isinstance(table, ast.SelectQueryAliasType) and table.alias == "events__pdi__person")
or (isinstance(table, ast.VirtualTableType) and table.field == "poe")
):
# :KLUDGE: Legacy person properties handling. Only used within non-HogQL queries, such as insights.
if self.context.modifiers.personsOnEventsMode != PersonOnEventsMode.DISABLED:
materialized_column = self._get_materialized_column("events", type.chain[0], "person_properties")
else:
materialized_column = self._get_materialized_column("person", type.chain[0], "properties")
if materialized_column:
materialized_property_sql = self._print_identifier(materialized_column)

args: List[str] = []
if materialized_property_sql is not None:
# When reading materialized columns, treat the values "" and "null" as NULL-s.
# TODO: rematerialize all columns to support empty strings and "null" string values.
materialized_property_sql = f"nullIf(nullIf({materialized_property_sql}, ''), 'null')"

if len(type.chain) == 1:
return materialized_property_sql
else:
for name in type.chain[1:]:
args.append(self.context.add_value(name))
return self._unsafe_json_extract_trim_quotes(materialized_property_sql, args)
if self.context.modifiers.materializationMode != "disabled":
# find a materialized property for the first part of the chain
materialized_property_sql: Optional[str] = None
if isinstance(table, ast.TableType):
if self.dialect == "clickhouse":
table_name = table.table.to_printed_clickhouse(self.context)
else:
table_name = table.table.to_printed_hogql()
if field is None:
raise HogQLException(f"Can't resolve field {field_type.name} on table {table_name}")
field_name = cast(Union[Literal["properties"], Literal["person_properties"]], field.name)

materialized_column = self._get_materialized_column(table_name, type.chain[0], field_name)
if materialized_column:
property_sql = self._print_identifier(materialized_column)
property_sql = f"{self.visit(field_type.table_type)}.{property_sql}"
materialized_property_sql = property_sql
elif (
self.context.within_non_hogql_query
and (isinstance(table, ast.SelectQueryAliasType) and table.alias == "events__pdi__person")
or (isinstance(table, ast.VirtualTableType) and table.field == "poe")
):
# :KLUDGE: Legacy person properties handling. Only used within non-HogQL queries, such as insights.
if self.context.modifiers.personsOnEventsMode != PersonOnEventsMode.DISABLED:
materialized_column = self._get_materialized_column("events", type.chain[0], "person_properties")
else:
materialized_column = self._get_materialized_column("person", type.chain[0], "properties")
if materialized_column:
materialized_property_sql = self._print_identifier(materialized_column)

if materialized_property_sql is not None:
# TODO: rematerialize all columns to properly support empty strings and "null" string values.
if self.context.modifiers.materializationMode == MaterializationMode.legacy_null_as_string:
materialized_property_sql = f"nullIf({materialized_property_sql}, '')"
else: # MaterializationMode.auto.legacy_null_as_null
materialized_property_sql = f"nullIf(nullIf({materialized_property_sql}, ''), 'null')"

if len(type.chain) == 1:
return materialized_property_sql
else:
for name in type.chain[1:]:
args.append(self.context.add_value(name))
return self._unsafe_json_extract_trim_quotes(materialized_property_sql, args)

for name in type.chain:
args.append(self.context.add_value(name))
Expand Down
42 changes: 41 additions & 1 deletion posthog/hogql/test/test_modifiers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from posthog.hogql.modifiers import create_default_modifiers_for_team
from posthog.hogql.query import execute_hogql_query
from posthog.models import Cohort
from posthog.schema import HogQLQueryModifiers, PersonsOnEventsMode
from posthog.schema import HogQLQueryModifiers, PersonsOnEventsMode, MaterializationMode
from posthog.test.base import BaseTest
from django.test import override_settings

Expand Down Expand Up @@ -144,3 +144,43 @@ def test_modifiers_in_cohort_join(self):
modifiers=HogQLQueryModifiers(inCohortVia="leftjoin"),
)
assert "LEFT JOIN" in response.clickhouse

def test_modifiers_materialization_mode(self):
try:
from ee.clickhouse.materialized_columns.analyze import materialize
except ModuleNotFoundError:
# EE not available? Assume we're good
self.assertEqual(1 + 2, 3)
return
materialize("events", "$browser")

response = execute_hogql_query(
"SELECT properties.$browser FROM events",
team=self.team,
modifiers=HogQLQueryModifiers(materializationMode=MaterializationMode.auto),
)
assert "SELECT nullIf(nullIf(events.`mat_$browser`, ''), 'null') FROM events" in response.clickhouse

response = execute_hogql_query(
"SELECT properties.$browser FROM events",
team=self.team,
modifiers=HogQLQueryModifiers(materializationMode=MaterializationMode.legacy_null_as_null),
)
assert "SELECT nullIf(nullIf(events.`mat_$browser`, ''), 'null') FROM events" in response.clickhouse

response = execute_hogql_query(
"SELECT properties.$browser FROM events",
team=self.team,
modifiers=HogQLQueryModifiers(materializationMode=MaterializationMode.legacy_null_as_string),
)
assert "SELECT nullIf(events.`mat_$browser`, '') FROM events" in response.clickhouse

response = execute_hogql_query(
"SELECT properties.$browser FROM events",
team=self.team,
modifiers=HogQLQueryModifiers(materializationMode=MaterializationMode.disabled),
)
assert (
"SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', '') FROM events"
in response.clickhouse
)
8 changes: 8 additions & 0 deletions posthog/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,13 @@ class InCohortVia(str, Enum):
subquery = "subquery"


class MaterializationMode(str, Enum):
auto = "auto"
legacy_null_as_string = "legacy_null_as_string"
legacy_null_as_null = "legacy_null_as_null"
disabled = "disabled"


class PersonsArgMaxVersion(str, Enum):
auto = "auto"
v1 = "v1"
Expand All @@ -273,6 +280,7 @@ class HogQLQueryModifiers(BaseModel):
extra="forbid",
)
inCohortVia: Optional[InCohortVia] = None
materializationMode: Optional[MaterializationMode] = None
personsArgMaxVersion: Optional[PersonsArgMaxVersion] = None
personsOnEventsMode: Optional[PersonsOnEventsMode] = None

Expand Down

0 comments on commit b5cad0a

Please sign in to comment.