diff --git a/posthog/hogql_queries/events_query_runner.py b/posthog/hogql_queries/events_query_runner.py index 034a888158329..e657485f4e1d8 100644 --- a/posthog/hogql_queries/events_query_runner.py +++ b/posthog/hogql_queries/events_query_runner.py @@ -115,8 +115,15 @@ def to_query(self) -> ast.SelectQuery: ).first() where_exprs.append( parse_expr( - "distinct_id in {list}", - {"list": ast.Constant(value=get_distinct_ids_for_subquery(person, self.team))}, + "cityHash64(distinct_id) in {list}", # Because the events table is partitioned by cityHash64(distinct_ids), using cityhash for the comparison is much quicker, + { + "list": ast.Constant( + value=[ + ast.Call(name="cityHash64", args=[ast.Constant(value=id)]) + for id in get_distinct_ids_for_subquery(person, self.team) + ] + ) + }, timings=self.timings, ) ) diff --git a/posthog/hogql_queries/test/test_events_query_runner.py b/posthog/hogql_queries/test/test_events_query_runner.py index 70c447b48a174..dcb814339e943 100644 --- a/posthog/hogql_queries/test/test_events_query_runner.py +++ b/posthog/hogql_queries/test/test_events_query_runner.py @@ -131,7 +131,7 @@ def test_person_id_expands_to_distinct_ids(self): query_ast = EventsQueryRunner(query=query, team=self.team).to_query() where_expr = cast(ast.CompareOperation, cast(ast.And, query_ast.where).exprs[0]) right_expr = cast(ast.Constant, where_expr.right) - self.assertEqual(right_expr.value, ["id1", "id2"]) + self.assertEqual([x.args[0].value for x in right_expr.value], ["id1", "id2"]) # another team another_team = Team.objects.create(organization=Organization.objects.create())