Skip to content

Commit

Permalink
fix(insights): Make HogQL queries limit_context-aware (#21022)
Browse files Browse the repository at this point in the history
* fix(insights): Make HogQL queries `limit_context`-aware

* Add some tests

* Update `execute_hogql_query` shape

* Fix minor issues

* Update mypy-baseline.txt
  • Loading branch information
Twixes authored Mar 21, 2024
1 parent 08acfba commit f5e4b17
Show file tree
Hide file tree
Showing 20 changed files with 89 additions and 12 deletions.
1 change: 0 additions & 1 deletion mypy-baseline.txt
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,6 @@ posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py:0: err
posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py:0: error: Item "SelectQuery" of "SelectQuery | SelectUnionQuery | Field | Any | None" has no attribute "chain" [union-attr]
posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery | Field | Any | None" has no attribute "chain" [union-attr]
posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py:0: error: Item "None" of "SelectQuery | SelectUnionQuery | Field | Any | None" has no attribute "chain" [union-attr]
posthog/hogql_queries/insights/test/test_paginators.py:0: error: Argument 2 to "execute_hogql_query" of "HogQLHasMorePaginator" has incompatible type "SelectQuery | SelectUnionQuery"; expected "SelectQuery" [arg-type]
posthog/hogql_queries/insights/test/test_paginators.py:0: error: Value of type "object" is not indexable [index]
posthog/hogql_queries/insights/test/test_paginators.py:0: error: Value of type "object" is not indexable [index]
posthog/hogql_queries/insights/test/test_paginators.py:0: error: Value of type "object" is not indexable [index]
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
def execute_hogql_query(
query: Union[str, ast.SelectQuery, ast.SelectUnionQuery],
team: Team,
*,
query_type: str = "hogql_query",
filters: Optional[HogQLFilters] = None,
placeholders: Optional[Dict[str, ast.Expr]] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ def _calculate(self) -> tuple[List[EventOddsRatio], bool, str, HogQLQueryRespons
team=self.team,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)
assert response.results

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def calculate(self):
team=self.team,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)

results = self.funnel_class._format_results(response.results)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def calculate(self) -> HogQLQueryResponse:
team=self.team,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)

def _is_stale(self, cached_result_package):
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql_queries/insights/lifecycle_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def calculate(self) -> LifecycleQueryResponse:
team=self.team,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)

# TODO: can we move the data conversion part into the query as well? It would make it easier to swap
Expand Down
3 changes: 2 additions & 1 deletion posthog/hogql_queries/insights/paginators.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,9 @@ def trim_results(self) -> list[Any]:

def execute_hogql_query(
self,
query_type: str,
query: ast.SelectQuery,
*,
query_type: str,
**kwargs,
) -> HogQLQueryResponse:
self.response = cast(
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql_queries/insights/paths_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,7 @@ def calculate(self) -> PathsQueryResponse:
team=self.team,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)

response.results = self.validate_results(response.results)
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql_queries/insights/retention_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ def calculate(self) -> RetentionQueryResponse:
team=self.team,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)

result_dict = {
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql_queries/insights/stickiness_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ def calculate(self):
team=self.team,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)

if response.timings is not None:
Expand Down
6 changes: 4 additions & 2 deletions posthog/hogql_queries/insights/test/test_paginators.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import cast
from posthog.hogql.ast import SelectQuery
from posthog.hogql.constants import (
LimitContext,
get_default_limit_for_context,
Expand Down Expand Up @@ -136,8 +138,8 @@ def test_response_params_consistency(self):
"""Test consistency of response_params method."""
paginator = HogQLHasMorePaginator(limit=5, offset=10)
paginator.response = paginator.execute_hogql_query(
"test_query",
parse_select("SELECT * FROM persons"),
cast(SelectQuery, parse_select("SELECT * FROM persons")),
query_type="test_query",
team=self.team,
)
params = paginator.response_params()
Expand Down
16 changes: 14 additions & 2 deletions posthog/hogql_queries/insights/test/test_retention_query_runner.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Optional
from unittest.mock import MagicMock, patch
import uuid
from datetime import datetime

Expand All @@ -6,11 +8,14 @@
from django.test import override_settings
from rest_framework import status

from posthog.clickhouse.client.execute import sync_execute
from posthog.constants import (
RETENTION_FIRST_TIME,
TREND_FILTER_TYPE_ACTIONS,
TREND_FILTER_TYPE_EVENTS,
)
from posthog.hogql.constants import LimitContext
from posthog.hogql.query import INCREASED_MAX_EXECUTION_TIME
from posthog.hogql_queries.insights.retention_query_runner import RetentionQueryRunner
from posthog.hogql_queries.actors_query_runner import ActorsQueryRunner
from posthog.models import Action, ActionStep
Expand Down Expand Up @@ -1685,10 +1690,10 @@ def test_day_interval_sampled(self):


class TestClickhouseRetentionGroupAggregation(ClickhouseTestMixin, APIBaseTest):
def run_query(self, query):
def run_query(self, query, *, limit_context: Optional[LimitContext] = None):
if not query.get("retentionFilter"):
query["retentionFilter"] = {}
runner = RetentionQueryRunner(team=self.team, query=query)
runner = RetentionQueryRunner(team=self.team, query=query, limit_context=limit_context)
return runner.calculate().model_dump()["results"]

def run_actors_query(self, interval, query, select=None, actor="person"):
Expand Down Expand Up @@ -1920,3 +1925,10 @@ def test_groups_aggregating_person_on_events(self):
[1],
],
)

@patch("posthog.hogql.query.sync_execute", wraps=sync_execute)
def test_limit_is_context_aware(self, mock_sync_execute: MagicMock):
self.run_query(query={}, limit_context=LimitContext.QUERY_ASYNC)

mock_sync_execute.assert_called_once()
self.assertIn(f" max_execution_time={INCREASED_MAX_EXECUTION_TIME},", mock_sync_execute.call_args[0][0])
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from dataclasses import dataclass
from typing import Dict, List, Optional, Union
from unittest.mock import MagicMock, patch
from django.test import override_settings

from freezegun import freeze_time
from posthog.clickhouse.client.execute import sync_execute
from posthog.hogql.constants import LimitContext
from posthog.hogql.query import INCREASED_MAX_EXECUTION_TIME
from posthog.hogql_queries.insights.stickiness_query_runner import StickinessQueryRunner
from posthog.models.action.action import Action
from posthog.models.action_step import ActionStep
Expand Down Expand Up @@ -197,6 +201,7 @@ def _run_query(
properties: Optional[StickinessProperties] = None,
filters: Optional[StickinessFilter] = None,
filter_test_accounts: Optional[bool] = False,
limit_context: Optional[LimitContext] = None,
):
query_series: List[EventsNode | ActionsNode] = [EventsNode(event="$pageview")] if series is None else series
query_date_from = date_from or self.default_date_from
Expand All @@ -211,7 +216,7 @@ def _run_query(
stickinessFilter=filters,
filterTestAccounts=filter_test_accounts,
)
return StickinessQueryRunner(team=self.team, query=query).calculate()
return StickinessQueryRunner(team=self.team, query=query, limit_context=limit_context).calculate()

def test_stickiness_runs(self):
self._create_test_events()
Expand Down Expand Up @@ -580,3 +585,10 @@ def test_hogql_aggregations(self):
1,
0,
]

@patch("posthog.hogql.query.sync_execute", wraps=sync_execute)
def test_limit_is_context_aware(self, mock_sync_execute: MagicMock):
self._run_query(limit_context=LimitContext.QUERY_ASYNC)

mock_sync_execute.assert_called_once()
self.assertIn(f" max_execution_time={INCREASED_MAX_EXECUTION_TIME},", mock_sync_execute.call_args[0][0])
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from dataclasses import dataclass
from typing import Dict, List, Optional
from unittest.mock import patch
from unittest.mock import MagicMock, patch
from django.test import override_settings
from freezegun import freeze_time
from posthog.clickhouse.client.execute import sync_execute
from posthog.hogql import ast
from posthog.hogql.constants import MAX_SELECT_RETURNED_ROWS
from posthog.hogql.constants import MAX_SELECT_RETURNED_ROWS, LimitContext
from posthog.hogql.modifiers import create_default_modifiers_for_team
from posthog.hogql.query import INCREASED_MAX_EXECUTION_TIME
from posthog.hogql_queries.insights.trends.trends_query_runner import TrendsQueryRunner
from posthog.models.cohort.cohort import Cohort
from posthog.models.property_definition import PropertyDefinition
Expand Down Expand Up @@ -175,6 +177,7 @@ def _create_query_runner(
breakdown: Optional[BreakdownFilter] = None,
filter_test_accounts: Optional[bool] = None,
hogql_modifiers: Optional[HogQLQueryModifiers] = None,
limit_context: Optional[LimitContext] = None,
) -> TrendsQueryRunner:
query_series: List[EventsNode | ActionsNode] = [EventsNode(event="$pageview")] if series is None else series
query = TrendsQuery(
Expand All @@ -185,7 +188,7 @@ def _create_query_runner(
breakdownFilter=breakdown,
filterTestAccounts=filter_test_accounts,
)
return TrendsQueryRunner(team=self.team, query=query, modifiers=hogql_modifiers)
return TrendsQueryRunner(team=self.team, query=query, modifiers=hogql_modifiers, limit_context=limit_context)

def _run_trends_query(
self,
Expand All @@ -195,8 +198,10 @@ def _run_trends_query(
series: Optional[List[EventsNode | ActionsNode]],
trends_filters: Optional[TrendsFilter] = None,
breakdown: Optional[BreakdownFilter] = None,
*,
filter_test_accounts: Optional[bool] = None,
hogql_modifiers: Optional[HogQLQueryModifiers] = None,
limit_context: Optional[LimitContext] = None,
):
return self._create_query_runner(
date_from=date_from,
Expand All @@ -207,6 +212,7 @@ def _run_trends_query(
breakdown=breakdown,
filter_test_accounts=filter_test_accounts,
hogql_modifiers=hogql_modifiers,
limit_context=limit_context,
).calculate()

def test_trends_query_label(self):
Expand Down Expand Up @@ -1694,3 +1700,16 @@ def test_to_actors_query_options_breakdowns_hogql(self):
BreakdownItem(label="Safari", value="Safari"),
BreakdownItem(label="Edge", value="Edge"),
]

@patch("posthog.hogql.query.sync_execute", wraps=sync_execute)
def test_limit_is_context_aware(self, mock_sync_execute: MagicMock):
self._run_trends_query(
"2020-01-09",
"2020-01-20",
IntervalType.day,
[EventsNode(event="$pageview")],
limit_context=LimitContext.QUERY_ASYNC,
)

mock_sync_execute.assert_called_once()
self.assertIn(f" max_execution_time={INCREASED_MAX_EXECUTION_TIME},", mock_sync_execute.call_args[0][0])
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ def run(index: int, query: ast.SelectQuery | ast.SelectUnionQuery, is_parallel:
team=self.team,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)

timings_matrix[index] = response.timings
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql_queries/sessions_timeline_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def calculate(self) -> SessionsTimelineQueryResponse:
query_type="SessionsTimelineQuery",
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)
assert query_result.results is not None
timeline_entries_map: Dict[str, TimelineEntry] = {}
Expand Down
23 changes: 21 additions & 2 deletions posthog/hogql_queries/web_analytics/test/test_web_overview.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from typing import Optional
from unittest.mock import MagicMock, patch
from freezegun import freeze_time
from parameterized import parameterized

from posthog.clickhouse.client.execute import sync_execute
from posthog.hogql.constants import LimitContext
from posthog.hogql.query import INCREASED_MAX_EXECUTION_TIME
from posthog.hogql_queries.web_analytics.web_overview import WebOverviewQueryRunner
from posthog.schema import WebOverviewQuery, DateRange
from posthog.test.base import (
Expand Down Expand Up @@ -36,14 +41,21 @@ def _create_events(self, data, event="$pageview"):
)
return person_result

def _run_web_overview_query(self, date_from, date_to, use_sessions_table=False, compare=True):
def _run_web_overview_query(
self,
date_from: str,
date_to: str,
use_sessions_table: bool = False,
compare: bool = True,
limit_context: Optional[LimitContext] = None,
):
query = WebOverviewQuery(
dateRange=DateRange(date_from=date_from, date_to=date_to),
properties=[],
compare=compare,
useSessionsTable=use_sessions_table,
)
runner = WebOverviewQueryRunner(team=self.team, query=query)
runner = WebOverviewQueryRunner(team=self.team, query=query, limit_context=limit_context)
return runner.calculate()

@parameterized.expand([(True,), (False,)])
Expand Down Expand Up @@ -185,3 +197,10 @@ def test_correctly_counts_pageviews_in_long_running_session(self, use_sessions_t

sessions = results[2]
self.assertEqual(1, sessions.value)

@patch("posthog.hogql.query.sync_execute", wraps=sync_execute)
def test_limit_is_context_aware(self, mock_sync_execute: MagicMock):
self._run_web_overview_query("2023-12-01", "2023-12-03", limit_context=LimitContext.QUERY_ASYNC)

mock_sync_execute.assert_called_once()
self.assertIn(f" max_execution_time={INCREASED_MAX_EXECUTION_TIME},", mock_sync_execute.call_args[0][0])
1 change: 1 addition & 0 deletions posthog/hogql_queries/web_analytics/top_clicks.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def calculate(self):
team=self.team,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)

return WebTopClicksQueryResponse(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ def _get_or_calculate_sample_ratio(self) -> SamplingRate:
query=event_count,
team=self.team,
timings=self.timings,
limit_context=self.limit_context,
)

if not response.results or not response.results[0] or not response.results[0][0]:
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql_queries/web_analytics/web_overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ def calculate(self):
team=self.team,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)
assert response.results

Expand Down

0 comments on commit f5e4b17

Please sign in to comment.