diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index fb40ba56fdea5..fe899331ffbcc 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -1236,7 +1236,7 @@ "description": "HogQL Query Options are automatically set per team. However, they can be overriden in the query.", "properties": { "personsArgMaxVersion": { - "enum": ["v1", "v2"], + "enum": ["auto", "v1", "v2"], "type": "string" }, "personsOnEventsMode": { diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 6d0168d21f1fd..228233e9fba45 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -133,7 +133,7 @@ export interface DataNode extends Node { /** HogQL Query Options are automatically set per team. However, they can be overriden in the query. */ export interface HogQLQueryModifiers { personsOnEventsMode?: 'disabled' | 'v1_enabled' | 'v2_enabled' - personsArgMaxVersion?: 'v1' | 'v2' + personsArgMaxVersion?: 'auto' | 'v1' | 'v2' } export interface HogQLQueryResponse { diff --git a/posthog/api/test/__snapshots__/test_query.ambr b/posthog/api/test/__snapshots__/test_query.ambr index ff86c6ab812ad..05501e8c5ac45 100644 --- a/posthog/api/test/__snapshots__/test_query.ambr +++ b/posthog/api/test/__snapshots__/test_query.ambr @@ -354,12 +354,15 @@ GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', ''), person.version) AS properties___email, - person.id AS id + (SELECT person.id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) WHERE and(equals(events.team_id, 2), ifNull(equals(events__pdi__person.properties___email, 'tom@posthog.com'), 0), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC'))) ORDER BY events.event ASC LIMIT 101 @@ -385,12 +388,15 @@ GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN - (SELECT argMax(nullIf(nullIf(person.pmat_email, ''), 'null'), person.version) AS properties___email, - person.id AS id + (SELECT person.id, + nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) WHERE and(equals(events.team_id, 2), ifNull(equals(events__pdi__person.properties___email, 'tom@posthog.com'), 0), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC'))) ORDER BY events.event ASC LIMIT 101 diff --git a/posthog/hogql/database/schema/persons.py b/posthog/hogql/database/schema/persons.py index 4e853a55ef5b5..6df5513f316cf 100644 --- a/posthog/hogql/database/schema/persons.py +++ b/posthog/hogql/database/schema/persons.py @@ -32,7 +32,16 @@ def select_from_persons_table(requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): - if modifiers.personsArgMaxVersion == PersonsArgMaxVersion.v2: + version = modifiers.personsArgMaxVersion + if version == PersonsArgMaxVersion.auto: + version = PersonsArgMaxVersion.v1 + # If selecting properties, use the faster v2 query. Otherwise v1 is faster. + for field_chain in requested_fields.values(): + if field_chain[0] == "properties": + version = PersonsArgMaxVersion.v2 + break + + if version == PersonsArgMaxVersion.v2: from posthog.hogql.parser import parse_select from posthog.hogql import ast diff --git a/posthog/hogql/modifiers.py b/posthog/hogql/modifiers.py index 36ad867fe49d2..3f3cd86b5f8f0 100644 --- a/posthog/hogql/modifiers.py +++ b/posthog/hogql/modifiers.py @@ -17,6 +17,6 @@ def create_default_modifiers_for_team( modifiers.personsOnEventsMode = team.person_on_events_mode or PersonOnEventsMode.DISABLED if modifiers.personsArgMaxVersion is None: - modifiers.personsArgMaxVersion = "v1" + modifiers.personsArgMaxVersion = "auto" return modifiers diff --git a/posthog/hogql/test/__snapshots__/test_query.ambr b/posthog/hogql/test/__snapshots__/test_query.ambr index 41f301150f147..2f2faaf5bb33a 100644 --- a/posthog/hogql/test/__snapshots__/test_query.ambr +++ b/posthog/hogql/test/__snapshots__/test_query.ambr @@ -54,11 +54,14 @@ SELECT DISTINCT persons.properties___sneaky_mail FROM ( - SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___random_uuid, person.id AS id + SELECT person.id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '') AS properties___sneaky_mail, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', '') AS properties___random_uuid + FROM person + WHERE and(equals(person.team_id, 420), ifNull(in(tuple(person.id, person.version), ( + SELECT person.id, max(person.version) AS version FROM person WHERE equals(person.team_id, 420) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons WHERE ifNull(equals(persons.properties___random_uuid, %(hogql_val_2)s), 0) LIMIT 100 @@ -105,11 +108,14 @@ WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) INNER JOIN ( - SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, person.id AS id + SELECT person.id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '') AS properties___sneaky_mail + FROM person + WHERE and(equals(person.team_id, 420), ifNull(in(tuple(person.id, person.version), ( + SELECT person.id, max(person.version) AS version FROM person WHERE equals(person.team_id, 420) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE equals(e.team_id, 420) LIMIT 10 @@ -147,11 +153,14 @@ WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN ( - SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, person.id AS id + SELECT person.id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '') AS properties___sneaky_mail + FROM person + WHERE and(equals(person.team_id, 420), ifNull(in(tuple(person.id, person.version), ( + SELECT person.id, max(person.version) AS version FROM person WHERE equals(person.team_id, 420) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) WHERE equals(events.team_id, 420) LIMIT 10 @@ -168,11 +177,14 @@ WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) INNER JOIN ( - SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, person.id AS id + SELECT person.id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '') AS properties___sneaky_mail + FROM person + WHERE and(equals(person.team_id, 420), ifNull(in(tuple(person.id, person.version), ( + SELECT person.id, max(person.version) AS version FROM person WHERE equals(person.team_id, 420) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) WHERE equals(e.team_id, 420) LIMIT 10 @@ -189,11 +201,14 @@ WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS s__pdi ON equals(s.distinct_id, s__pdi.distinct_id) INNER JOIN ( - SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, person.id AS id + SELECT person.id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '') AS properties___sneaky_mail + FROM person + WHERE and(equals(person.team_id, 420), ifNull(in(tuple(person.id, person.version), ( + SELECT person.id, max(person.version) AS version FROM person WHERE equals(person.team_id, 420) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS s__pdi__person ON equals(s__pdi.person_id, s__pdi__person.id) WHERE equals(s.team_id, 420) GROUP BY s__pdi__person.properties___sneaky_mail @@ -221,11 +236,14 @@ SELECT pdi.distinct_id, pdi__person.properties___sneaky_mail FROM person_distinct_id2 AS pdi INNER JOIN ( - SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, person.id AS id + SELECT person.id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '') AS properties___sneaky_mail + FROM person + WHERE and(equals(person.team_id, 420), ifNull(in(tuple(person.id, person.version), ( + SELECT person.id, max(person.version) AS version FROM person WHERE equals(person.team_id, 420) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS pdi__person ON equals(pdi.person_id, pdi__person.id) WHERE equals(pdi.team_id, 420) LIMIT 10 @@ -282,11 +300,14 @@ WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) INNER JOIN ( - SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___sneaky_mail, person.id AS id + SELECT person.id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '') AS properties___sneaky_mail + FROM person + WHERE and(equals(person.team_id, 420), ifNull(in(tuple(person.id, person.version), ( + SELECT person.id, max(person.version) AS version FROM person WHERE equals(person.team_id, 420) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) WHERE equals(events.team_id, 420) LIMIT 10 diff --git a/posthog/hogql/test/test_modifiers.py b/posthog/hogql/test/test_modifiers.py index b49a5da32cdfa..d6d0f0e64d101 100644 --- a/posthog/hogql/test/test_modifiers.py +++ b/posthog/hogql/test/test_modifiers.py @@ -46,3 +46,28 @@ def test_modifiers_persons_argmax_version_v2(self): # Test (v2) response = execute_hogql_query(query, team=self.team, modifiers=HogQLQueryModifiers(personsArgMaxVersion="v2")) assert "in(tuple(person.id, person.version)" in response.clickhouse + + def test_modifiers_persons_argmax_version_auto(self): + # Use the v2 query when selecting properties.x + response = execute_hogql_query( + "SELECT id, properties.$browser, is_identified FROM persons", + team=self.team, + modifiers=HogQLQueryModifiers(personsArgMaxVersion="auto"), + ) + assert "in(tuple(person.id, person.version)" in response.clickhouse + + # Use the v2 query when selecting properties + response = execute_hogql_query( + "SELECT id, properties FROM persons", + team=self.team, + modifiers=HogQLQueryModifiers(personsArgMaxVersion="auto"), + ) + assert "in(tuple(person.id, person.version)" in response.clickhouse + + # Use the v1 query when not selecting any properties + response = execute_hogql_query( + "SELECT id, is_identified FROM persons", + team=self.team, + modifiers=HogQLQueryModifiers(personsArgMaxVersion="auto"), + ) + assert "in(tuple(person.id, person.version)" not in response.clickhouse diff --git a/posthog/schema.py b/posthog/schema.py index c9ff94a78654a..451a281d51a2b 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -229,6 +229,7 @@ class HogQLNotice(BaseModel): class PersonsArgMaxVersion(str, Enum): + auto = "auto" v1 = "v1" v2 = "v2"