-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(hogql): Add basic support for PoE v3 (distinct ID overrides) #21059
Changes from 7 commits
d64dce1
a52e8f0
08de7dd
5f3dd1e
f9a0e04
2fdc164
44d2c82
0e6dba4
8d287ef
7912ed4
d110123
e18b9fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,6 +31,11 @@ | |
from posthog.hogql.database.schema.events import EventsTable | ||
from posthog.hogql.database.schema.groups import GroupsTable, RawGroupsTable | ||
from posthog.hogql.database.schema.numbers import NumbersTable | ||
from posthog.hogql.database.schema.person_distinct_id_overrides import ( | ||
PersonDistinctIdOverridesTable, | ||
RawPersonDistinctIdOverridesTable, | ||
join_with_person_distinct_id_overrides_table, | ||
) | ||
from posthog.hogql.database.schema.person_distinct_ids import ( | ||
PersonDistinctIdsTable, | ||
RawPersonDistinctIdsTable, | ||
|
@@ -66,6 +71,7 @@ | |
groups: GroupsTable = GroupsTable() | ||
persons: PersonsTable = PersonsTable() | ||
person_distinct_ids: PersonDistinctIdsTable = PersonDistinctIdsTable() | ||
person_distinct_id_overrides: PersonDistinctIdOverridesTable = PersonDistinctIdOverridesTable() | ||
person_overrides: PersonOverridesTable = PersonOverridesTable() | ||
|
||
session_replay_events: SessionReplayEventsTable = SessionReplayEventsTable() | ||
|
@@ -81,6 +87,7 @@ | |
raw_persons: RawPersonsTable = RawPersonsTable() | ||
raw_groups: RawGroupsTable = RawGroupsTable() | ||
raw_cohort_people: RawCohortPeople = RawCohortPeople() | ||
raw_person_distinct_id_overrides: RawPersonDistinctIdOverridesTable = RawPersonDistinctIdOverridesTable() | ||
raw_person_overrides: RawPersonOverridesTable = RawPersonOverridesTable() | ||
raw_sessions: RawSessionsTable = RawSessionsTable() | ||
|
||
|
@@ -186,6 +193,24 @@ | |
database.events.fields["poe"].fields["id"] = database.events.fields["person_id"] | ||
database.events.fields["person"] = FieldTraverser(chain=["poe"]) | ||
|
||
elif modifiers.personsOnEventsMode == PersonsOnEventsMode.v3_enabled: | ||
database.events.fields["event_person_id"] = StringDatabaseField(name="person_id") | ||
database.events.fields["override"] = LazyJoin( | ||
from_field=["distinct_id"], # ??? | ||
join_table=PersonDistinctIdOverridesTable(), | ||
join_function=join_with_person_distinct_id_overrides_table, | ||
) | ||
database.events.fields["person_id"] = ExpressionField( | ||
name="person_id", | ||
expr=parse_expr( | ||
# NOTE: assumes `join_use_nulls = 0` (the default), as ``override.distinct_id`` is not Nullable | ||
"if(not(empty(override.distinct_id)), override.person_id, event_person_id)", | ||
start=None, | ||
), | ||
) | ||
database.events.fields["poe"].fields["id"] = database.events.fields["person_id"] | ||
Check failure on line 211 in posthog/hogql/database/database.py GitHub Actions / Python code quality checks
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some mypy errors here |
||
database.events.fields["person"] = FieldTraverser(chain=["poe"]) | ||
|
||
database.persons.fields["$virt_initial_referring_domain_type"] = create_initial_domain_type( | ||
"$virt_initial_referring_domain_type" | ||
) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is largely copy/pasted from the non- |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
from typing import Dict, List | ||
from posthog.hogql.ast import SelectQuery | ||
from posthog.hogql.context import HogQLContext | ||
|
||
from posthog.hogql.database.argmax import argmax_select | ||
from posthog.hogql.database.models import ( | ||
Table, | ||
IntegerDatabaseField, | ||
StringDatabaseField, | ||
BooleanDatabaseField, | ||
LazyJoin, | ||
LazyTable, | ||
FieldOrTable, | ||
) | ||
from posthog.hogql.database.schema.persons import join_with_persons_table | ||
from posthog.hogql.errors import HogQLException | ||
from posthog.schema import HogQLQueryModifiers | ||
|
||
PERSON_DISTINCT_ID_OVERRIDES_FIELDS = { | ||
"team_id": IntegerDatabaseField(name="team_id"), | ||
"distinct_id": StringDatabaseField(name="distinct_id"), | ||
"person_id": StringDatabaseField(name="person_id"), | ||
"person": LazyJoin( | ||
from_field=["person_id"], | ||
join_table="persons", | ||
join_function=join_with_persons_table, | ||
), | ||
} | ||
|
||
|
||
def select_from_person_distinct_id_overrides_table(requested_fields: Dict[str, List[str | int]]): | ||
# Always include "person_id", as it's the key we use to make further joins, and it'd be great if it's available | ||
if "person_id" not in requested_fields: | ||
requested_fields = {**requested_fields, "person_id": ["person_id"]} | ||
return argmax_select( | ||
table_name="raw_person_distinct_id_overrides", | ||
select_fields=requested_fields, | ||
group_fields=["distinct_id"], | ||
argmax_field="version", | ||
deleted_field="is_deleted", | ||
) | ||
|
||
|
||
def join_with_person_distinct_id_overrides_table( | ||
from_table: str, | ||
to_table: str, | ||
requested_fields: Dict[str, List[str]], | ||
context: HogQLContext, | ||
node: SelectQuery, | ||
): | ||
from posthog.hogql import ast | ||
|
||
if not requested_fields: | ||
raise HogQLException("No fields requested from person_distinct_id_overrides") | ||
join_expr = ast.JoinExpr(table=select_from_person_distinct_id_overrides_table(requested_fields)) | ||
Check failure on line 55 in posthog/hogql/database/schema/person_distinct_id_overrides.py GitHub Actions / Python code quality checks
Check failure on line 55 in posthog/hogql/database/schema/person_distinct_id_overrides.py GitHub Actions / Python code quality checks
|
||
join_expr.join_type = "LEFT OUTER JOIN" | ||
Comment on lines
+55
to
+56
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assuming we are able to stay reasonably on top of merges, something that could be interesting to check out here would be using an |
||
join_expr.alias = to_table | ||
join_expr.constraint = ast.JoinConstraint( | ||
expr=ast.CompareOperation( | ||
op=ast.CompareOperationOp.Eq, | ||
left=ast.Field(chain=[from_table, "distinct_id"]), | ||
right=ast.Field(chain=[to_table, "distinct_id"]), | ||
) | ||
) | ||
return join_expr | ||
|
||
|
||
class RawPersonDistinctIdOverridesTable(Table): | ||
fields: Dict[str, FieldOrTable] = { | ||
**PERSON_DISTINCT_ID_OVERRIDES_FIELDS, | ||
"is_deleted": BooleanDatabaseField(name="is_deleted"), | ||
"version": IntegerDatabaseField(name="version"), | ||
} | ||
|
||
def to_printed_clickhouse(self, context): | ||
return "person_distinct_id_overrides" | ||
|
||
def to_printed_hogql(self): | ||
return "raw_person_distinct_id_overrides" | ||
|
||
|
||
class PersonDistinctIdOverridesTable(LazyTable): | ||
fields: Dict[str, FieldOrTable] = PERSON_DISTINCT_ID_OVERRIDES_FIELDS | ||
|
||
def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): | ||
return select_from_person_distinct_id_overrides_table(requested_fields) | ||
|
||
def to_printed_clickhouse(self, context): | ||
return "person_distinct_id_overrides" | ||
|
||
def to_printed_hogql(self): | ||
return "person_distinct_id_overrides" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is defined on 182 above as well, but it wasn't obvious to me at a glance where this gets used (I didn't do a ton of digging either though to be fair.) I figured this would be used for the LHS of the join comparator, but it seems like that's defined on the
JoinExpr
itself (i.e. the return value ofjoin_with_person_distinct_id_overrides_table
)?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's essentially for this:
posthog/posthog/hogql/transforms/lazy_tables.py
Lines 281 to 282 in 4a60cad