From f5e4b17201afea02baddeb5a02f7d1f489747884 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Thu, 21 Mar 2024 19:20:42 +0100 Subject: [PATCH] fix(insights): Make HogQL queries `limit_context`-aware (#21022) * fix(insights): Make HogQL queries `limit_context`-aware * Add some tests * Update `execute_hogql_query` shape * Fix minor issues * Update mypy-baseline.txt --- mypy-baseline.txt | 1 - posthog/hogql/query.py | 1 + .../funnel_correlation_query_runner.py | 1 + .../insights/funnels/funnels_query_runner.py | 1 + .../insights/insight_actors_query_runner.py | 1 + .../insights/lifecycle_query_runner.py | 1 + posthog/hogql_queries/insights/paginators.py | 3 ++- .../insights/paths_query_runner.py | 1 + .../insights/retention_query_runner.py | 1 + .../insights/stickiness_query_runner.py | 1 + .../insights/test/test_paginators.py | 6 +++-- .../test/test_retention_query_runner.py | 16 ++++++++++-- .../test/test_stickiness_query_runner.py | 14 ++++++++++- .../trends/test/test_trends_query_runner.py | 25 ++++++++++++++++--- .../insights/trends/trends_query_runner.py | 1 + .../sessions_timeline_query_runner.py | 1 + .../web_analytics/test/test_web_overview.py | 23 +++++++++++++++-- .../hogql_queries/web_analytics/top_clicks.py | 1 + .../web_analytics_query_runner.py | 1 + .../web_analytics/web_overview.py | 1 + 20 files changed, 89 insertions(+), 12 deletions(-) diff --git a/mypy-baseline.txt b/mypy-baseline.txt index 180d259fc3c20..f0a464d26c546 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -549,7 +549,6 @@ posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py:0: err posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py:0: error: Item "SelectQuery" of "SelectQuery | SelectUnionQuery | Field | Any | None" has no attribute "chain" [union-attr] posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery | Field | Any | None" has no attribute "chain" [union-attr] posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py:0: error: Item "None" of "SelectQuery | SelectUnionQuery | Field | Any | None" has no attribute "chain" [union-attr] -posthog/hogql_queries/insights/test/test_paginators.py:0: error: Argument 2 to "execute_hogql_query" of "HogQLHasMorePaginator" has incompatible type "SelectQuery | SelectUnionQuery"; expected "SelectQuery" [arg-type] posthog/hogql_queries/insights/test/test_paginators.py:0: error: Value of type "object" is not indexable [index] posthog/hogql_queries/insights/test/test_paginators.py:0: error: Value of type "object" is not indexable [index] posthog/hogql_queries/insights/test/test_paginators.py:0: error: Value of type "object" is not indexable [index] diff --git a/posthog/hogql/query.py b/posthog/hogql/query.py index e7bc3f7984205..69b5656020904 100644 --- a/posthog/hogql/query.py +++ b/posthog/hogql/query.py @@ -28,6 +28,7 @@ def execute_hogql_query( query: Union[str, ast.SelectQuery, ast.SelectUnionQuery], team: Team, + *, query_type: str = "hogql_query", filters: Optional[HogQLFilters] = None, placeholders: Optional[Dict[str, ast.Expr]] = None, diff --git a/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py b/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py index 2fef78a372324..bcb362ff3d4f9 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py +++ b/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py @@ -236,6 +236,7 @@ def _calculate(self) -> tuple[List[EventOddsRatio], bool, str, HogQLQueryRespons team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) assert response.results diff --git a/posthog/hogql_queries/insights/funnels/funnels_query_runner.py b/posthog/hogql_queries/insights/funnels/funnels_query_runner.py index 38e04603f2725..b1ca8dfd6dd54 100644 --- a/posthog/hogql_queries/insights/funnels/funnels_query_runner.py +++ b/posthog/hogql_queries/insights/funnels/funnels_query_runner.py @@ -92,6 +92,7 @@ def calculate(self): team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) results = self.funnel_class._format_results(response.results) diff --git a/posthog/hogql_queries/insights/insight_actors_query_runner.py b/posthog/hogql_queries/insights/insight_actors_query_runner.py index a0e38abae1776..d58f36cb6f7ee 100644 --- a/posthog/hogql_queries/insights/insight_actors_query_runner.py +++ b/posthog/hogql_queries/insights/insight_actors_query_runner.py @@ -102,6 +102,7 @@ def calculate(self) -> HogQLQueryResponse: team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) def _is_stale(self, cached_result_package): diff --git a/posthog/hogql_queries/insights/lifecycle_query_runner.py b/posthog/hogql_queries/insights/lifecycle_query_runner.py index 24bbe36f1c6bf..62968f5349e0e 100644 --- a/posthog/hogql_queries/insights/lifecycle_query_runner.py +++ b/posthog/hogql_queries/insights/lifecycle_query_runner.py @@ -157,6 +157,7 @@ def calculate(self) -> LifecycleQueryResponse: team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) # TODO: can we move the data conversion part into the query as well? It would make it easier to swap diff --git a/posthog/hogql_queries/insights/paginators.py b/posthog/hogql_queries/insights/paginators.py index 6dbdb1543b929..0dfda79ced617 100644 --- a/posthog/hogql_queries/insights/paginators.py +++ b/posthog/hogql_queries/insights/paginators.py @@ -54,8 +54,9 @@ def trim_results(self) -> list[Any]: def execute_hogql_query( self, - query_type: str, query: ast.SelectQuery, + *, + query_type: str, **kwargs, ) -> HogQLQueryResponse: self.response = cast( diff --git a/posthog/hogql_queries/insights/paths_query_runner.py b/posthog/hogql_queries/insights/paths_query_runner.py index c10a5a2320207..c454feb8e56ac 100644 --- a/posthog/hogql_queries/insights/paths_query_runner.py +++ b/posthog/hogql_queries/insights/paths_query_runner.py @@ -725,6 +725,7 @@ def calculate(self) -> PathsQueryResponse: team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) response.results = self.validate_results(response.results) diff --git a/posthog/hogql_queries/insights/retention_query_runner.py b/posthog/hogql_queries/insights/retention_query_runner.py index 221cb976757d2..3ac2c5b4b5462 100644 --- a/posthog/hogql_queries/insights/retention_query_runner.py +++ b/posthog/hogql_queries/insights/retention_query_runner.py @@ -313,6 +313,7 @@ def calculate(self) -> RetentionQueryResponse: team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) result_dict = { diff --git a/posthog/hogql_queries/insights/stickiness_query_runner.py b/posthog/hogql_queries/insights/stickiness_query_runner.py index d0b4b65c67f9b..184e3c0af02df 100644 --- a/posthog/hogql_queries/insights/stickiness_query_runner.py +++ b/posthog/hogql_queries/insights/stickiness_query_runner.py @@ -212,6 +212,7 @@ def calculate(self): team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) if response.timings is not None: diff --git a/posthog/hogql_queries/insights/test/test_paginators.py b/posthog/hogql_queries/insights/test/test_paginators.py index ac83efb45b353..6698115c46535 100644 --- a/posthog/hogql_queries/insights/test/test_paginators.py +++ b/posthog/hogql_queries/insights/test/test_paginators.py @@ -1,3 +1,5 @@ +from typing import cast +from posthog.hogql.ast import SelectQuery from posthog.hogql.constants import ( LimitContext, get_default_limit_for_context, @@ -136,8 +138,8 @@ def test_response_params_consistency(self): """Test consistency of response_params method.""" paginator = HogQLHasMorePaginator(limit=5, offset=10) paginator.response = paginator.execute_hogql_query( - "test_query", - parse_select("SELECT * FROM persons"), + cast(SelectQuery, parse_select("SELECT * FROM persons")), + query_type="test_query", team=self.team, ) params = paginator.response_params() diff --git a/posthog/hogql_queries/insights/test/test_retention_query_runner.py b/posthog/hogql_queries/insights/test/test_retention_query_runner.py index 30edb32102f76..04c108dd779f1 100644 --- a/posthog/hogql_queries/insights/test/test_retention_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_retention_query_runner.py @@ -1,3 +1,5 @@ +from typing import Optional +from unittest.mock import MagicMock, patch import uuid from datetime import datetime @@ -6,11 +8,14 @@ from django.test import override_settings from rest_framework import status +from posthog.clickhouse.client.execute import sync_execute from posthog.constants import ( RETENTION_FIRST_TIME, TREND_FILTER_TYPE_ACTIONS, TREND_FILTER_TYPE_EVENTS, ) +from posthog.hogql.constants import LimitContext +from posthog.hogql.query import INCREASED_MAX_EXECUTION_TIME from posthog.hogql_queries.insights.retention_query_runner import RetentionQueryRunner from posthog.hogql_queries.actors_query_runner import ActorsQueryRunner from posthog.models import Action, ActionStep @@ -1685,10 +1690,10 @@ def test_day_interval_sampled(self): class TestClickhouseRetentionGroupAggregation(ClickhouseTestMixin, APIBaseTest): - def run_query(self, query): + def run_query(self, query, *, limit_context: Optional[LimitContext] = None): if not query.get("retentionFilter"): query["retentionFilter"] = {} - runner = RetentionQueryRunner(team=self.team, query=query) + runner = RetentionQueryRunner(team=self.team, query=query, limit_context=limit_context) return runner.calculate().model_dump()["results"] def run_actors_query(self, interval, query, select=None, actor="person"): @@ -1920,3 +1925,10 @@ def test_groups_aggregating_person_on_events(self): [1], ], ) + + @patch("posthog.hogql.query.sync_execute", wraps=sync_execute) + def test_limit_is_context_aware(self, mock_sync_execute: MagicMock): + self.run_query(query={}, limit_context=LimitContext.QUERY_ASYNC) + + mock_sync_execute.assert_called_once() + self.assertIn(f" max_execution_time={INCREASED_MAX_EXECUTION_TIME},", mock_sync_execute.call_args[0][0]) diff --git a/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py b/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py index 3de1fb6ce865e..6e25827e6ecba 100644 --- a/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py @@ -1,8 +1,12 @@ from dataclasses import dataclass from typing import Dict, List, Optional, Union +from unittest.mock import MagicMock, patch from django.test import override_settings from freezegun import freeze_time +from posthog.clickhouse.client.execute import sync_execute +from posthog.hogql.constants import LimitContext +from posthog.hogql.query import INCREASED_MAX_EXECUTION_TIME from posthog.hogql_queries.insights.stickiness_query_runner import StickinessQueryRunner from posthog.models.action.action import Action from posthog.models.action_step import ActionStep @@ -197,6 +201,7 @@ def _run_query( properties: Optional[StickinessProperties] = None, filters: Optional[StickinessFilter] = None, filter_test_accounts: Optional[bool] = False, + limit_context: Optional[LimitContext] = None, ): query_series: List[EventsNode | ActionsNode] = [EventsNode(event="$pageview")] if series is None else series query_date_from = date_from or self.default_date_from @@ -211,7 +216,7 @@ def _run_query( stickinessFilter=filters, filterTestAccounts=filter_test_accounts, ) - return StickinessQueryRunner(team=self.team, query=query).calculate() + return StickinessQueryRunner(team=self.team, query=query, limit_context=limit_context).calculate() def test_stickiness_runs(self): self._create_test_events() @@ -580,3 +585,10 @@ def test_hogql_aggregations(self): 1, 0, ] + + @patch("posthog.hogql.query.sync_execute", wraps=sync_execute) + def test_limit_is_context_aware(self, mock_sync_execute: MagicMock): + self._run_query(limit_context=LimitContext.QUERY_ASYNC) + + mock_sync_execute.assert_called_once() + self.assertIn(f" max_execution_time={INCREASED_MAX_EXECUTION_TIME},", mock_sync_execute.call_args[0][0]) diff --git a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py index 09a7389e74c7b..433df7c7df23b 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py @@ -1,11 +1,13 @@ from dataclasses import dataclass from typing import Dict, List, Optional -from unittest.mock import patch +from unittest.mock import MagicMock, patch from django.test import override_settings from freezegun import freeze_time +from posthog.clickhouse.client.execute import sync_execute from posthog.hogql import ast -from posthog.hogql.constants import MAX_SELECT_RETURNED_ROWS +from posthog.hogql.constants import MAX_SELECT_RETURNED_ROWS, LimitContext from posthog.hogql.modifiers import create_default_modifiers_for_team +from posthog.hogql.query import INCREASED_MAX_EXECUTION_TIME from posthog.hogql_queries.insights.trends.trends_query_runner import TrendsQueryRunner from posthog.models.cohort.cohort import Cohort from posthog.models.property_definition import PropertyDefinition @@ -175,6 +177,7 @@ def _create_query_runner( breakdown: Optional[BreakdownFilter] = None, filter_test_accounts: Optional[bool] = None, hogql_modifiers: Optional[HogQLQueryModifiers] = None, + limit_context: Optional[LimitContext] = None, ) -> TrendsQueryRunner: query_series: List[EventsNode | ActionsNode] = [EventsNode(event="$pageview")] if series is None else series query = TrendsQuery( @@ -185,7 +188,7 @@ def _create_query_runner( breakdownFilter=breakdown, filterTestAccounts=filter_test_accounts, ) - return TrendsQueryRunner(team=self.team, query=query, modifiers=hogql_modifiers) + return TrendsQueryRunner(team=self.team, query=query, modifiers=hogql_modifiers, limit_context=limit_context) def _run_trends_query( self, @@ -195,8 +198,10 @@ def _run_trends_query( series: Optional[List[EventsNode | ActionsNode]], trends_filters: Optional[TrendsFilter] = None, breakdown: Optional[BreakdownFilter] = None, + *, filter_test_accounts: Optional[bool] = None, hogql_modifiers: Optional[HogQLQueryModifiers] = None, + limit_context: Optional[LimitContext] = None, ): return self._create_query_runner( date_from=date_from, @@ -207,6 +212,7 @@ def _run_trends_query( breakdown=breakdown, filter_test_accounts=filter_test_accounts, hogql_modifiers=hogql_modifiers, + limit_context=limit_context, ).calculate() def test_trends_query_label(self): @@ -1694,3 +1700,16 @@ def test_to_actors_query_options_breakdowns_hogql(self): BreakdownItem(label="Safari", value="Safari"), BreakdownItem(label="Edge", value="Edge"), ] + + @patch("posthog.hogql.query.sync_execute", wraps=sync_execute) + def test_limit_is_context_aware(self, mock_sync_execute: MagicMock): + self._run_trends_query( + "2020-01-09", + "2020-01-20", + IntervalType.day, + [EventsNode(event="$pageview")], + limit_context=LimitContext.QUERY_ASYNC, + ) + + mock_sync_execute.assert_called_once() + self.assertIn(f" max_execution_time={INCREASED_MAX_EXECUTION_TIME},", mock_sync_execute.call_args[0][0]) diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index b698e095e8b09..d66110298ffab 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -292,6 +292,7 @@ def run(index: int, query: ast.SelectQuery | ast.SelectUnionQuery, is_parallel: team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) timings_matrix[index] = response.timings diff --git a/posthog/hogql_queries/sessions_timeline_query_runner.py b/posthog/hogql_queries/sessions_timeline_query_runner.py index d920ec7cf94fd..cda9433d63efa 100644 --- a/posthog/hogql_queries/sessions_timeline_query_runner.py +++ b/posthog/hogql_queries/sessions_timeline_query_runner.py @@ -135,6 +135,7 @@ def calculate(self) -> SessionsTimelineQueryResponse: query_type="SessionsTimelineQuery", timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) assert query_result.results is not None timeline_entries_map: Dict[str, TimelineEntry] = {} diff --git a/posthog/hogql_queries/web_analytics/test/test_web_overview.py b/posthog/hogql_queries/web_analytics/test/test_web_overview.py index 63a26ffea9233..dcafe660fc72d 100644 --- a/posthog/hogql_queries/web_analytics/test/test_web_overview.py +++ b/posthog/hogql_queries/web_analytics/test/test_web_overview.py @@ -1,6 +1,11 @@ +from typing import Optional +from unittest.mock import MagicMock, patch from freezegun import freeze_time from parameterized import parameterized +from posthog.clickhouse.client.execute import sync_execute +from posthog.hogql.constants import LimitContext +from posthog.hogql.query import INCREASED_MAX_EXECUTION_TIME from posthog.hogql_queries.web_analytics.web_overview import WebOverviewQueryRunner from posthog.schema import WebOverviewQuery, DateRange from posthog.test.base import ( @@ -36,14 +41,21 @@ def _create_events(self, data, event="$pageview"): ) return person_result - def _run_web_overview_query(self, date_from, date_to, use_sessions_table=False, compare=True): + def _run_web_overview_query( + self, + date_from: str, + date_to: str, + use_sessions_table: bool = False, + compare: bool = True, + limit_context: Optional[LimitContext] = None, + ): query = WebOverviewQuery( dateRange=DateRange(date_from=date_from, date_to=date_to), properties=[], compare=compare, useSessionsTable=use_sessions_table, ) - runner = WebOverviewQueryRunner(team=self.team, query=query) + runner = WebOverviewQueryRunner(team=self.team, query=query, limit_context=limit_context) return runner.calculate() @parameterized.expand([(True,), (False,)]) @@ -185,3 +197,10 @@ def test_correctly_counts_pageviews_in_long_running_session(self, use_sessions_t sessions = results[2] self.assertEqual(1, sessions.value) + + @patch("posthog.hogql.query.sync_execute", wraps=sync_execute) + def test_limit_is_context_aware(self, mock_sync_execute: MagicMock): + self._run_web_overview_query("2023-12-01", "2023-12-03", limit_context=LimitContext.QUERY_ASYNC) + + mock_sync_execute.assert_called_once() + self.assertIn(f" max_execution_time={INCREASED_MAX_EXECUTION_TIME},", mock_sync_execute.call_args[0][0]) diff --git a/posthog/hogql_queries/web_analytics/top_clicks.py b/posthog/hogql_queries/web_analytics/top_clicks.py index 3218e68975f7a..192d7b279b704 100644 --- a/posthog/hogql_queries/web_analytics/top_clicks.py +++ b/posthog/hogql_queries/web_analytics/top_clicks.py @@ -51,6 +51,7 @@ def calculate(self): team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) return WebTopClicksQueryResponse( diff --git a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py index da4f98edcbf32..12ef703271c51 100644 --- a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py +++ b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py @@ -211,6 +211,7 @@ def _get_or_calculate_sample_ratio(self) -> SamplingRate: query=event_count, team=self.team, timings=self.timings, + limit_context=self.limit_context, ) if not response.results or not response.results[0] or not response.results[0][0]: diff --git a/posthog/hogql_queries/web_analytics/web_overview.py b/posthog/hogql_queries/web_analytics/web_overview.py index 38388315c8f0b..2da015a60ac4e 100644 --- a/posthog/hogql_queries/web_analytics/web_overview.py +++ b/posthog/hogql_queries/web_analytics/web_overview.py @@ -285,6 +285,7 @@ def calculate(self): team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) assert response.results