Skip to content

Commit

Permalink
feat(hogql): Add basic support for PoE v3 (distinct ID overrides) (#2…
Browse files Browse the repository at this point in the history
  • Loading branch information
tkaemming authored Mar 22, 2024
1 parent e5bec12 commit 060ebc2
Show file tree
Hide file tree
Showing 10 changed files with 246 additions and 2 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -2567,7 +2567,7 @@
"type": "string"
},
"personsOnEventsMode": {
"enum": ["disabled", "v1_enabled", "v1_mixed", "v2_enabled"],
"enum": ["disabled", "v1_enabled", "v1_mixed", "v2_enabled", "v3_enabled"],
"type": "string"
}
},
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ export interface DataNode extends Node {

/** HogQL Query Options are automatically set per team. However, they can be overriden in the query. */
export interface HogQLQueryModifiers {
personsOnEventsMode?: 'disabled' | 'v1_enabled' | 'v1_mixed' | 'v2_enabled'
personsOnEventsMode?: 'disabled' | 'v1_enabled' | 'v1_mixed' | 'v2_enabled' | 'v3_enabled'
personsArgMaxVersion?: 'auto' | 'v1' | 'v2'
inCohortVia?: 'auto' | 'leftjoin' | 'subquery' | 'leftjoin_conjoined'
materializationMode?: 'auto' | 'legacy_null_as_string' | 'legacy_null_as_null' | 'disabled'
Expand Down
1 change: 1 addition & 0 deletions frontend/src/scenes/debug/HogQLDebug.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ export function HogQLDebug({ query, setQuery, queryKey }: HogQLDebugProps): JSX.
{ value: 'v1_enabled', label: 'V1 Enabled' },
{ value: 'v1_mixed', label: 'V1 Mixed' },
{ value: 'v2_enabled', label: 'V2 Enabled' },
{ value: 'v3_enabled', label: 'V3 Enabled (Join)' },
]}
onChange={(value) =>
setQuery({
Expand Down
2 changes: 2 additions & 0 deletions mypy-baseline.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ posthog/hogql/parser.py:0: error: "None" has no attribute "text" [attr-defined]
posthog/hogql/parser.py:0: error: "None" has no attribute "text" [attr-defined]
posthog/hogql/parser.py:0: error: Statement is unreachable [unreachable]
posthog/hogql/database/schema/person_distinct_ids.py:0: error: Argument 1 to "select_from_person_distinct_ids_table" has incompatible type "dict[str, list[str]]"; expected "dict[str, list[str | int]]" [arg-type]
posthog/hogql/database/schema/person_distinct_id_overrides.py:0: error: Argument 1 to "select_from_person_distinct_id_overrides_table" has incompatible type "dict[str, list[str]]"; expected "dict[str, list[str | int]]" [arg-type]
posthog/hogql/database/schema/cohort_people.py:0: error: Argument "chain" to "Field" has incompatible type "list[str]"; expected "list[str | int]" [arg-type]
posthog/hogql/database/schema/cohort_people.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance
posthog/hogql/database/schema/cohort_people.py:0: note: Consider using "Sequence" instead, which is covariant
Expand All @@ -105,6 +106,7 @@ posthog/hogql/database/database.py:0: error: "FieldOrTable" has no attribute "fi
posthog/hogql/database/database.py:0: error: "FieldOrTable" has no attribute "fields" [attr-defined]
posthog/hogql/database/database.py:0: error: "FieldOrTable" has no attribute "fields" [attr-defined]
posthog/hogql/database/database.py:0: error: "FieldOrTable" has no attribute "fields" [attr-defined]
posthog/hogql/database/database.py:0: error: "FieldOrTable" has no attribute "fields" [attr-defined]
posthog/hogql/database/database.py:0: error: Incompatible types (expression has type "Literal['view', 'lazy_table']", TypedDict item "type" has type "Literal['integer', 'float', 'string', 'datetime', 'date', 'boolean', 'array', 'json', 'lazy_table', 'virtual_table', 'field_traverser', 'expression']") [typeddict-item]
posthog/warehouse/models/datawarehouse_saved_query.py:0: error: Argument 1 to "create_hogql_database" has incompatible type "int | None"; expected "int" [arg-type]
posthog/warehouse/models/datawarehouse_saved_query.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "SelectQuery | SelectUnionQuery") [assignment]
Expand Down
25 changes: 25 additions & 0 deletions posthog/hogql/database/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
from posthog.hogql.database.schema.events import EventsTable
from posthog.hogql.database.schema.groups import GroupsTable, RawGroupsTable
from posthog.hogql.database.schema.numbers import NumbersTable
from posthog.hogql.database.schema.person_distinct_id_overrides import (
PersonDistinctIdOverridesTable,
RawPersonDistinctIdOverridesTable,
join_with_person_distinct_id_overrides_table,
)
from posthog.hogql.database.schema.person_distinct_ids import (
PersonDistinctIdsTable,
RawPersonDistinctIdsTable,
Expand Down Expand Up @@ -66,6 +71,7 @@ class Database(BaseModel):
groups: GroupsTable = GroupsTable()
persons: PersonsTable = PersonsTable()
person_distinct_ids: PersonDistinctIdsTable = PersonDistinctIdsTable()
person_distinct_id_overrides: PersonDistinctIdOverridesTable = PersonDistinctIdOverridesTable()
person_overrides: PersonOverridesTable = PersonOverridesTable()

session_replay_events: SessionReplayEventsTable = SessionReplayEventsTable()
Expand All @@ -81,6 +87,7 @@ class Database(BaseModel):
raw_persons: RawPersonsTable = RawPersonsTable()
raw_groups: RawGroupsTable = RawGroupsTable()
raw_cohort_people: RawCohortPeople = RawCohortPeople()
raw_person_distinct_id_overrides: RawPersonDistinctIdOverridesTable = RawPersonDistinctIdOverridesTable()
raw_person_overrides: RawPersonOverridesTable = RawPersonOverridesTable()
raw_sessions: RawSessionsTable = RawSessionsTable()

Expand Down Expand Up @@ -186,6 +193,24 @@ def create_hogql_database(
database.events.fields["poe"].fields["id"] = database.events.fields["person_id"]
database.events.fields["person"] = FieldTraverser(chain=["poe"])

elif modifiers.personsOnEventsMode == PersonsOnEventsMode.v3_enabled:
database.events.fields["event_person_id"] = StringDatabaseField(name="person_id")
database.events.fields["override"] = LazyJoin(
from_field=["distinct_id"], # ???
join_table=PersonDistinctIdOverridesTable(),
join_function=join_with_person_distinct_id_overrides_table,
)
database.events.fields["person_id"] = ExpressionField(
name="person_id",
expr=parse_expr(
# NOTE: assumes `join_use_nulls = 0` (the default), as ``override.distinct_id`` is not Nullable
"if(not(empty(override.distinct_id)), override.person_id, event_person_id)",
start=None,
),
)
database.events.fields["poe"].fields["id"] = database.events.fields["person_id"]
database.events.fields["person"] = FieldTraverser(chain=["poe"])

database.persons.fields["$virt_initial_referring_domain_type"] = create_initial_domain_type(
"$virt_initial_referring_domain_type"
)
Expand Down
92 changes: 92 additions & 0 deletions posthog/hogql/database/schema/person_distinct_id_overrides.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from typing import Dict, List
from posthog.hogql.ast import SelectQuery
from posthog.hogql.context import HogQLContext

from posthog.hogql.database.argmax import argmax_select
from posthog.hogql.database.models import (
Table,
IntegerDatabaseField,
StringDatabaseField,
BooleanDatabaseField,
LazyJoin,
LazyTable,
FieldOrTable,
)
from posthog.hogql.database.schema.persons import join_with_persons_table
from posthog.hogql.errors import HogQLException
from posthog.schema import HogQLQueryModifiers

PERSON_DISTINCT_ID_OVERRIDES_FIELDS = {
"team_id": IntegerDatabaseField(name="team_id"),
"distinct_id": StringDatabaseField(name="distinct_id"),
"person_id": StringDatabaseField(name="person_id"),
"person": LazyJoin(
from_field=["person_id"],
join_table="persons",
join_function=join_with_persons_table,
),
}


def select_from_person_distinct_id_overrides_table(requested_fields: Dict[str, List[str | int]]):
# Always include "person_id", as it's the key we use to make further joins, and it'd be great if it's available
if "person_id" not in requested_fields:
requested_fields = {**requested_fields, "person_id": ["person_id"]}
return argmax_select(
table_name="raw_person_distinct_id_overrides",
select_fields=requested_fields,
group_fields=["distinct_id"],
argmax_field="version",
deleted_field="is_deleted",
)


def join_with_person_distinct_id_overrides_table(
from_table: str,
to_table: str,
requested_fields: Dict[str, List[str]],
context: HogQLContext,
node: SelectQuery,
):
from posthog.hogql import ast

if not requested_fields:
raise HogQLException("No fields requested from person_distinct_id_overrides")
join_expr = ast.JoinExpr(table=select_from_person_distinct_id_overrides_table(requested_fields))
join_expr.join_type = "LEFT OUTER JOIN"
join_expr.alias = to_table
join_expr.constraint = ast.JoinConstraint(
expr=ast.CompareOperation(
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, "distinct_id"]),
right=ast.Field(chain=[to_table, "distinct_id"]),
)
)
return join_expr


class RawPersonDistinctIdOverridesTable(Table):
fields: Dict[str, FieldOrTable] = {
**PERSON_DISTINCT_ID_OVERRIDES_FIELDS,
"is_deleted": BooleanDatabaseField(name="is_deleted"),
"version": IntegerDatabaseField(name="version"),
}

def to_printed_clickhouse(self, context):
return "person_distinct_id_overrides"

def to_printed_hogql(self):
return "raw_person_distinct_id_overrides"


class PersonDistinctIdOverridesTable(LazyTable):
fields: Dict[str, FieldOrTable] = PERSON_DISTINCT_ID_OVERRIDES_FIELDS

def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers):

Check failure on line 85 in posthog/hogql/database/schema/person_distinct_id_overrides.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Signature of "lazy_select" incompatible with supertype "LazyTable"

Check failure on line 85 in posthog/hogql/database/schema/person_distinct_id_overrides.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Superclass:

Check failure on line 85 in posthog/hogql/database/schema/person_distinct_id_overrides.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

def lazy_select(self, requested_fields: dict[str, list[str | int]], context: HogQLContext, node: SelectQuery) -> Any

Check failure on line 85 in posthog/hogql/database/schema/person_distinct_id_overrides.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Subclass:

Check failure on line 85 in posthog/hogql/database/schema/person_distinct_id_overrides.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

def lazy_select(self, requested_fields: dict[str, list[str | int]], modifiers: HogQLQueryModifiers) -> Any
return select_from_person_distinct_id_overrides_table(requested_fields)

def to_printed_clickhouse(self, context):
return "person_distinct_id_overrides"

def to_printed_hogql(self):
return "person_distinct_id_overrides"
116 changes: 116 additions & 0 deletions posthog/hogql/database/test/__snapshots__/test_database.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,31 @@
]
}
],
"person_distinct_id_overrides": [
{
"key": "distinct_id",
"type": "string"
},
{
"key": "person_id",
"type": "string"
},
{
"key": "person",
"type": "lazy_table",
"table": "persons",
"fields": [
"id",
"created_at",
"team_id",
"properties",
"is_identified",
"pdi",
"$virt_initial_referring_domain_type",
"$virt_initial_channel_type"
]
}
],
"person_overrides": [
{
"key": "old_person_id",
Expand Down Expand Up @@ -790,6 +815,39 @@
"type": "integer"
}
],
"raw_person_distinct_id_overrides": [
{
"key": "distinct_id",
"type": "string"
},
{
"key": "person_id",
"type": "string"
},
{
"key": "person",
"type": "lazy_table",
"table": "persons",
"fields": [
"id",
"created_at",
"team_id",
"properties",
"is_identified",
"pdi",
"$virt_initial_referring_domain_type",
"$virt_initial_channel_type"
]
},
{
"key": "is_deleted",
"type": "boolean"
},
{
"key": "version",
"type": "integer"
}
],
"raw_person_overrides": [
{
"key": "old_person_id",
Expand Down Expand Up @@ -1155,6 +1213,31 @@
]
}
],
"person_distinct_id_overrides": [
{
"key": "distinct_id",
"type": "string"
},
{
"key": "person_id",
"type": "string"
},
{
"key": "person",
"type": "lazy_table",
"table": "persons",
"fields": [
"id",
"created_at",
"team_id",
"properties",
"is_identified",
"pdi",
"$virt_initial_referring_domain_type",
"$virt_initial_channel_type"
]
}
],
"person_overrides": [
{
"key": "old_person_id",
Expand Down Expand Up @@ -1641,6 +1724,39 @@
"type": "integer"
}
],
"raw_person_distinct_id_overrides": [
{
"key": "distinct_id",
"type": "string"
},
{
"key": "person_id",
"type": "string"
},
{
"key": "person",
"type": "lazy_table",
"table": "persons",
"fields": [
"id",
"created_at",
"team_id",
"properties",
"is_identified",
"pdi",
"$virt_initial_referring_domain_type",
"$virt_initial_channel_type"
]
},
{
"key": "is_deleted",
"type": "boolean"
},
{
"key": "version",
"type": "integer"
}
],
"raw_person_overrides": [
{
"key": "old_person_id",
Expand Down
7 changes: 7 additions & 0 deletions posthog/hogql/test/test_modifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,13 @@ def test_modifiers_persons_on_events_mode_mapping(self):
"events.person_properties AS properties",
"toTimeZone(events.person_created_at, %(hogql_val_1)s) AS created_at",
),
(
PersonsOnEventsMode.v3_enabled,
"events.event AS event",
"if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id) AS id",
"events.person_properties AS properties",
"toTimeZone(events.person_created_at, %(hogql_val_0)s) AS created_at",
),
]

for mode, *expected in test_cases:
Expand Down
1 change: 1 addition & 0 deletions posthog/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ class PersonsOnEventsMode(str, Enum):
v1_enabled = "v1_enabled"
v1_mixed = "v1_mixed"
v2_enabled = "v2_enabled"
v3_enabled = "v3_enabled"


class HogQLQueryModifiers(BaseModel):
Expand Down

0 comments on commit 060ebc2

Please sign in to comment.