Skip to content

Commit

Permalink
feat(hogql): Use distinct_id-based person overrides for `PERSON_ID_…
Browse files Browse the repository at this point in the history
…OVERRIDE_PROPERTIES_JOINED` mode (#21520)
  • Loading branch information
tkaemming authored Apr 16, 2024
1 parent cd3b893 commit cb5f851
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 31 deletions.
4 changes: 2 additions & 2 deletions frontend/src/scenes/debug/HogQLDebug.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,11 @@ export function HogQLDebug({ query, setQuery, queryKey }: HogQLDebugProps): JSX.
},
{
value: 'person_id_override_properties_on_events',
label: 'Properties: Events, Person ID: Overrides',
label: 'Properties: Events, Person ID: Overrides (v2)',
},
{
value: 'person_id_override_properties_joined',
label: 'Properties: Person, Person ID: Overrides',
label: 'Properties: Person, Person ID: Overrides (v3)',
},
]}
onChange={(value) =>
Expand Down
46 changes: 31 additions & 15 deletions posthog/hogql/database/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from posthog.hogql.database.schema.person_distinct_id_overrides import (
PersonDistinctIdOverridesTable,
RawPersonDistinctIdOverridesTable,
join_with_person_distinct_id_overrides_table,
)
from posthog.hogql.database.schema.person_distinct_ids import (
PersonDistinctIdsTable,
Expand Down Expand Up @@ -146,20 +147,35 @@ def _use_person_properties_from_events(database: Database) -> None:
database.events.fields["person"] = FieldTraverser(chain=["poe"])


def _use_person_id_from_person_overrides(database: Database) -> None:
def _use_person_id_from_person_overrides(database: Database, use_distinct_id_overrides: bool) -> None:
database.events.fields["event_person_id"] = StringDatabaseField(name="person_id")
database.events.fields["override"] = LazyJoin(
from_field=["event_person_id"],
join_table=PersonOverridesTable(),
join_function=join_with_person_overrides_table,
)
database.events.fields["person_id"] = ExpressionField(
name="person_id",
expr=parse_expr(
"ifNull(nullIf(override.override_person_id, '00000000-0000-0000-0000-000000000000'), event_person_id)",
start=None,
),
)
if use_distinct_id_overrides:
database.events.fields["override"] = LazyJoin(
from_field=["distinct_id"],
join_table=PersonDistinctIdOverridesTable(),
join_function=join_with_person_distinct_id_overrides_table,
)
database.events.fields["person_id"] = ExpressionField(
name="person_id",
expr=parse_expr(
# NOTE: assumes `join_use_nulls = 0` (the default), as ``override.distinct_id`` is not Nullable
"if(not(empty(override.distinct_id)), override.person_id, event_person_id)",
start=None,
),
)
else:
database.events.fields["override"] = LazyJoin(
from_field=["event_person_id"],
join_table=PersonOverridesTable(),
join_function=join_with_person_overrides_table,
)
database.events.fields["person_id"] = ExpressionField(
name="person_id",
expr=parse_expr(
"ifNull(nullIf(override.override_person_id, '00000000-0000-0000-0000-000000000000'), event_person_id)",
start=None,
),
)


def create_hogql_database(
Expand Down Expand Up @@ -187,12 +203,12 @@ def create_hogql_database(
_use_person_properties_from_events(database)

elif modifiers.personsOnEventsMode == PersonsOnEventsMode.person_id_override_properties_on_events:
_use_person_id_from_person_overrides(database)
_use_person_id_from_person_overrides(database, use_distinct_id_overrides=False)
_use_person_properties_from_events(database)
database.events.fields["poe"].fields["id"] = database.events.fields["person_id"]

elif modifiers.personsOnEventsMode == PersonsOnEventsMode.person_id_override_properties_joined:
_use_person_id_from_person_overrides(database)
_use_person_id_from_person_overrides(database, use_distinct_id_overrides=True)
database.events.fields["person"] = LazyJoin(
from_field=["person_id"],
join_table=PersonsTable(),
Expand Down
6 changes: 3 additions & 3 deletions posthog/hogql/test/test_modifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ class TestCase(NamedTuple):
"events.event AS event",
"events__person.id AS id",
"events__person.properties AS properties",
"toTimeZone(events__person.created_at, %(hogql_val_1)s) AS created_at",
"toTimeZone(events__person.created_at, %(hogql_val_0)s) AS created_at",
],
[
"events__person ON equals(ifNull(nullIf(events__override.override_person_id, %(hogql_val_0)s), events.person_id), events__person.id)",
"events__override ON equals(events.person_id, events__override.old_person_id)",
"events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id)",
"events__override ON equals(events.distinct_id, events__override.distinct_id)",
],
),
]
Expand Down
24 changes: 13 additions & 11 deletions posthog/hogql/transforms/test/__snapshots__/test_lazy_tables.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -95,18 +95,19 @@
# name: TestLazyJoins.test_resolve_lazy_table_indirect_duplicate_references
'''

SELECT ifNull(nullIf(events__override.override_person_id, %(hogql_val_1)s), events.person_id) AS person_id, events__person.properties AS properties
SELECT if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id) AS person_id, events__person.properties AS properties
FROM events INNER JOIN (
SELECT argMax(person.properties, person.version) AS properties, person.id AS id
FROM person
WHERE equals(person.team_id, 420)
GROUP BY person.id
HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0)
SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(ifNull(nullIf(events__override.override_person_id, %(hogql_val_0)s), events.person_id), events__person.id) LEFT OUTER JOIN (
SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, person_overrides.old_person_id AS old_person_id
FROM person_overrides
WHERE equals(person_overrides.team_id, 420)
GROUP BY person_overrides.old_person_id) AS events__override ON equals(events.person_id, events__override.old_person_id)
SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) LEFT OUTER JOIN (
SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id
FROM person_distinct_id_overrides
WHERE equals(person_distinct_id_overrides.team_id, 420)
GROUP BY person_distinct_id_overrides.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS events__override ON equals(events.distinct_id, events__override.distinct_id)
WHERE equals(events.team_id, 420)
LIMIT 10000
'''
Expand All @@ -116,16 +117,17 @@

SELECT events__person.id AS id
FROM events LEFT OUTER JOIN (
SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, person_overrides.old_person_id AS old_person_id
FROM person_overrides
WHERE equals(person_overrides.team_id, 420)
GROUP BY person_overrides.old_person_id) AS events__override ON equals(events.person_id, events__override.old_person_id) INNER JOIN (
SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id
FROM person_distinct_id_overrides
WHERE equals(person_distinct_id_overrides.team_id, 420)
GROUP BY person_distinct_id_overrides.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS events__override ON equals(events.distinct_id, events__override.distinct_id) INNER JOIN (
SELECT person.id AS id
FROM person
WHERE equals(person.team_id, 420)
GROUP BY person.id
HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0)
SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(ifNull(nullIf(events__override.override_person_id, %(hogql_val_0)s), events.person_id), events__person.id)
SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id)
WHERE equals(events.team_id, 420)
LIMIT 10000
'''
Expand Down

0 comments on commit cb5f851

Please sign in to comment.