Skip to content

Commit

Permalink
perf: Use cityhash to filter events (#25409)
Browse files Browse the repository at this point in the history
  • Loading branch information
timgl authored Oct 5, 2024
1 parent dc9752d commit 1f100bc
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 3 deletions.
11 changes: 9 additions & 2 deletions posthog/hogql_queries/events_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,15 @@ def to_query(self) -> ast.SelectQuery:
).first()
where_exprs.append(
parse_expr(
"distinct_id in {list}",
{"list": ast.Constant(value=get_distinct_ids_for_subquery(person, self.team))},
"cityHash64(distinct_id) in {list}", # Because the events table is partitioned by cityHash64(distinct_ids), using cityhash for the comparison is much quicker,
{
"list": ast.Constant(
value=[
ast.Call(name="cityHash64", args=[ast.Constant(value=id)])
for id in get_distinct_ids_for_subquery(person, self.team)
]
)
},
timings=self.timings,
)
)
Expand Down
2 changes: 1 addition & 1 deletion posthog/hogql_queries/test/test_events_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_person_id_expands_to_distinct_ids(self):
query_ast = EventsQueryRunner(query=query, team=self.team).to_query()
where_expr = cast(ast.CompareOperation, cast(ast.And, query_ast.where).exprs[0])
right_expr = cast(ast.Constant, where_expr.right)
self.assertEqual(right_expr.value, ["id1", "id2"])
self.assertEqual([x.args[0].value for x in right_expr.value], ["id1", "id2"])

# another team
another_team = Team.objects.create(organization=Organization.objects.create())
Expand Down

0 comments on commit 1f100bc

Please sign in to comment.