From 561f0906bb9c278092a663769fe3ee7efd4eeda1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Mon, 6 Nov 2023 12:53:30 +0000 Subject: [PATCH] fix(persons): limit to maximum of 2500 distinct_ids for cross-db join --- posthog/hogql_queries/events_query_runner.py | 4 +++- posthog/models/event/query_event_list.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/posthog/hogql_queries/events_query_runner.py b/posthog/hogql_queries/events_query_runner.py index 6b168a3f7dd981..87e88f9822cce6 100644 --- a/posthog/hogql_queries/events_query_runner.py +++ b/posthog/hogql_queries/events_query_runner.py @@ -33,6 +33,8 @@ "created_at", ] +MAX_LIMIT_DISTINCT_IDS = 2500 + class EventsQueryRunner(QueryRunner): query: EventsQuery @@ -116,7 +118,7 @@ def to_query(self) -> ast.SelectQuery: if self.query.personId: with self.timings.measure("person_id"): person: Optional[Person] = get_pk_or_uuid( - Person.objects.filter(team=self.team), self.query.personId + Person.objects.filter(team=self.team)[:MAX_LIMIT_DISTINCT_IDS], self.query.personId ).first() distinct_ids = person.distinct_ids if person is not None else [] ids_list = list(map(str, distinct_ids)) diff --git a/posthog/models/event/query_event_list.py b/posthog/models/event/query_event_list.py index 66fc02c8ba55a9..983a1d64584f7e 100644 --- a/posthog/models/event/query_event_list.py +++ b/posthog/models/event/query_event_list.py @@ -9,6 +9,7 @@ from posthog.clickhouse.client.connection import Workload from posthog.hogql.constants import DEFAULT_RETURNED_ROWS from posthog.hogql.context import HogQLContext +from posthog.hogql_queries.events_query_runner import MAX_LIMIT_DISTINCT_IDS from posthog.models import Action, Filter, Person, Team from posthog.models.action.util import format_action_filter from posthog.models.event.sql import ( @@ -44,7 +45,7 @@ def determine_event_conditions( params.update({"before": timestamp}) elif k == "person_id": result += """AND distinct_id IN (%(distinct_ids)s) """ - person = get_pk_or_uuid(Person.objects.filter(team=team), v).first() + person = get_pk_or_uuid(Person.objects.filter(team=team)[:MAX_LIMIT_DISTINCT_IDS], v).first() distinct_ids = person.distinct_ids if person is not None else [] params.update({"distinct_ids": list(map(str, distinct_ids))}) elif k == "distinct_id":