diff --git a/posthog/hogql/constants.py b/posthog/hogql/constants.py index 8ea5670321267..cf298fe7883f5 100644 --- a/posthog/hogql/constants.py +++ b/posthog/hogql/constants.py @@ -4,6 +4,8 @@ from uuid import UUID from pydantic import ConfigDict, BaseModel +from posthog.hogql.errors import HogQLException + ConstantDataType: TypeAlias = Literal[ "int", "float", @@ -31,11 +33,37 @@ DEFAULT_RETURNED_ROWS = 100 # Max limit for all SELECT queries, and the default for CSV exports. MAX_SELECT_RETURNED_ROWS = 10000 # sync with CSV_EXPORT_LIMIT +# Max limit for all cohort calculations +MAX_SELECT_COHORT_CALCULATION_LIMIT = 1000000000 # 1b persons class LimitContext(Enum): QUERY = "query" EXPORT = "export" + COHORT_CALCULATION = "cohort_calculation" + + +def get_max_limit_for_context(limit_context: LimitContext) -> int: + if limit_context == LimitContext.EXPORT: + return MAX_SELECT_RETURNED_ROWS # 10k + elif limit_context == LimitContext.QUERY: + return MAX_SELECT_RETURNED_ROWS # 10k + elif limit_context == LimitContext.COHORT_CALCULATION: + return MAX_SELECT_COHORT_CALCULATION_LIMIT # 1b + else: + raise HogQLException(f"Unexpected LimitContext value: {limit_context}") + + +def get_default_limit_for_context(limit_context: LimitContext) -> int: + """Limit used if no limit is provided""" + if limit_context == LimitContext.EXPORT: + return MAX_SELECT_RETURNED_ROWS # 10k + elif limit_context == LimitContext.QUERY: + return DEFAULT_RETURNED_ROWS # 100 + elif limit_context == LimitContext.COHORT_CALCULATION: + return MAX_SELECT_COHORT_CALCULATION_LIMIT # 1b + else: + raise HogQLException(f"Unexpected LimitContext value: {limit_context}") # Settings applied at the SELECT level diff --git a/posthog/hogql/query.py b/posthog/hogql/query.py index 8ca5f5b582ab1..767e9f99d7b54 100644 --- a/posthog/hogql/query.py +++ b/posthog/hogql/query.py @@ -3,7 +3,7 @@ from posthog.clickhouse.client.connection import Workload from posthog.errors import ExposedCHQueryError from posthog.hogql import ast -from posthog.hogql.constants import HogQLGlobalSettings, LimitContext +from posthog.hogql.constants import HogQLGlobalSettings, LimitContext, get_default_limit_for_context from posthog.hogql.errors import HogQLException from posthog.hogql.hogql import HogQLContext from posthog.hogql.modifiers import create_default_modifiers_for_team @@ -34,7 +34,7 @@ def execute_hogql_query( workload: Workload = Workload.ONLINE, settings: Optional[HogQLGlobalSettings] = None, modifiers: Optional[HogQLQueryModifiers] = None, - limit_context: Optional[LimitContext] = None, + limit_context: Optional[LimitContext] = LimitContext.QUERY, timings: Optional[HogQLTimings] = None, explain: Optional[bool] = False, ) -> HogQLQueryResponse: @@ -68,20 +68,12 @@ def execute_hogql_query( select_query = replace_placeholders(select_query, placeholders) with timings.measure("max_limit"): - from posthog.hogql.constants import ( - DEFAULT_RETURNED_ROWS, - MAX_SELECT_RETURNED_ROWS, - ) - select_queries = ( select_query.select_queries if isinstance(select_query, ast.SelectUnionQuery) else [select_query] ) for one_query in select_queries: if one_query.limit is None: - # One more "max" of MAX_SELECT_RETURNED_ROWS (10k) in applied in the query printer. - one_query.limit = ast.Constant( - value=MAX_SELECT_RETURNED_ROWS if limit_context == LimitContext.EXPORT else DEFAULT_RETURNED_ROWS - ) + one_query.limit = ast.Constant(value=get_default_limit_for_context(limit_context)) # Get printed HogQL query, and returned columns. Using a cloned query. with timings.measure("hogql"): @@ -122,7 +114,7 @@ def execute_hogql_query( ) settings = settings or HogQLGlobalSettings() - if limit_context == LimitContext.EXPORT: + if limit_context == LimitContext.EXPORT or limit_context == LimitContext.COHORT_CALCULATION: settings.max_execution_time = EXPORT_CONTEXT_MAX_EXECUTION_TIME # Print the ClickHouse SQL query diff --git a/posthog/hogql_queries/events_query_runner.py b/posthog/hogql_queries/events_query_runner.py index f9ee10c648f25..bc9e9810f3698 100644 --- a/posthog/hogql_queries/events_query_runner.py +++ b/posthog/hogql_queries/events_query_runner.py @@ -10,9 +10,10 @@ from posthog.api.utils import get_pk_or_uuid from posthog.clickhouse.client.connection import Workload from posthog.hogql import ast +from posthog.hogql.constants import get_max_limit_for_context, get_default_limit_for_context from posthog.hogql.parser import parse_expr, parse_order_expr from posthog.hogql.property import action_to_expr, has_aggregation, property_to_expr -from posthog.hogql.query import execute_hogql_query, LimitContext +from posthog.hogql.query import execute_hogql_query from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.query_runner import QueryRunner from posthog.models import Action, Person @@ -255,22 +256,10 @@ def select_input_raw(self) -> List[str]: return ["*"] if len(self.query.select) == 0 else self.query.select def limit(self) -> int: - # importing locally so we could override in a test - from posthog.hogql.constants import ( - DEFAULT_RETURNED_ROWS, - MAX_SELECT_RETURNED_ROWS, - ) - # adding +1 to the limit to check if there's a "next page" after the requested results - return ( - min( - MAX_SELECT_RETURNED_ROWS, - (MAX_SELECT_RETURNED_ROWS if self.limit_context == LimitContext.EXPORT else DEFAULT_RETURNED_ROWS) - if self.query.limit is None - else self.query.limit, - ) - + 1 - ) + max_rows = get_max_limit_for_context(self.limit_context) + default_rows = get_default_limit_for_context(self.limit_context) + return min(max_rows, default_rows if self.query.limit is None else self.query.limit) + 1 def _is_stale(self, cached_result_package): return True diff --git a/posthog/hogql_queries/persons_query_runner.py b/posthog/hogql_queries/persons_query_runner.py index 3f9c0848b6556..cd1cf612dee96 100644 --- a/posthog/hogql_queries/persons_query_runner.py +++ b/posthog/hogql_queries/persons_query_runner.py @@ -3,7 +3,7 @@ from typing import List, cast, Literal from posthog.hogql import ast -from posthog.hogql.constants import DEFAULT_RETURNED_ROWS, MAX_SELECT_RETURNED_ROWS +from posthog.hogql.constants import get_max_limit_for_context, get_default_limit_for_context from posthog.hogql.parser import parse_expr, parse_order_expr from posthog.hogql.property import property_to_expr, has_aggregation from posthog.hogql.query import execute_hogql_query @@ -103,10 +103,9 @@ def input_columns(self) -> List[str]: return self.query.select or ["person", "id", "created_at", "person.$delete"] def query_limit(self) -> int: - return min( - MAX_SELECT_RETURNED_ROWS, - DEFAULT_RETURNED_ROWS if self.query.limit is None else self.query.limit, - ) + max_rows = get_max_limit_for_context(self.limit_context) + default_rows = get_default_limit_for_context(self.limit_context) + return min(max_rows, default_rows if self.query.limit is None else self.query.limit) def to_query(self) -> ast.SelectQuery: with self.timings.measure("columns"): diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index ed08a9fcbb34e..a7fba00b4d455 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -217,7 +217,7 @@ def calculate(self) -> BaseModel: raise NotImplementedError() def run(self, refresh_requested: Optional[bool] = None) -> CachedQueryResponse: - cache_key = self._cache_key() + ("_export" if self.limit_context == LimitContext.EXPORT else "") + cache_key = f"{self._cache_key()}_{self.limit_context or LimitContext.QUERY}" tag_queries(cache_key=cache_key) if not refresh_requested: diff --git a/posthog/models/cohort/util.py b/posthog/models/cohort/util.py index e96c00337da3d..949ec0bb21fe8 100644 --- a/posthog/models/cohort/util.py +++ b/posthog/models/cohort/util.py @@ -10,6 +10,7 @@ from posthog.client import sync_execute from posthog.constants import PropertyOperatorType +from posthog.hogql.constants import LimitContext from posthog.hogql.hogql import HogQLContext from posthog.hogql.printer import print_ast from posthog.models import Action, Filter, Team @@ -75,7 +76,9 @@ def print_cohort_hogql_query(cohort: Cohort, hogql_context: HogQLContext) -> str persons_query = cast(Dict, cohort.query) persons_query["select"] = ["id as actor_id"] - query = get_query_runner(persons_query, team=cast(Team, cohort.team)).to_query() + query = get_query_runner( + persons_query, team=cast(Team, cohort.team), limit_context=LimitContext.COHORT_CALCULATION + ).to_query() hogql_context.enable_select_queries = True return print_ast(query, context=hogql_context, dialect="clickhouse")