Skip to content

Commit

Permalink
fix(hogql): Fix hogql export breakdown limit (#21603)
Browse files Browse the repository at this point in the history
  • Loading branch information
webjunkie authored Apr 17, 2024
1 parent 86cd54c commit 66d5be2
Show file tree
Hide file tree
Showing 14 changed files with 79 additions and 22 deletions.
2 changes: 1 addition & 1 deletion posthog/api/cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
from posthog.api.utils import get_target_entity
from posthog.client import sync_execute
from posthog.constants import (
CSV_EXPORT_LIMIT,
INSIGHT_FUNNELS,
INSIGHT_LIFECYCLE,
INSIGHT_PATHS,
Expand All @@ -50,6 +49,7 @@
OFFSET,
PropertyOperatorType,
)
from posthog.hogql.constants import CSV_EXPORT_LIMIT
from posthog.event_usage import report_user_action
from posthog.hogql.context import HogQLContext
from posthog.models import Cohort, FeatureFlag, User, Person
Expand Down
2 changes: 1 addition & 1 deletion posthog/api/insight.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
)
from posthog.caching.insights_api import should_refresh_insight
from posthog.constants import (
BREAKDOWN_VALUES_LIMIT,
INSIGHT,
INSIGHT_FUNNELS,
INSIGHT_PATHS,
Expand All @@ -51,6 +50,7 @@
TRENDS_STICKINESS,
FunnelVizType,
)
from posthog.hogql.constants import BREAKDOWN_VALUES_LIMIT
from posthog.decorators import cached_by_filters
from posthog.helpers.multi_property_breakdown import (
protect_old_clients_from_multi_property_default,
Expand Down
2 changes: 1 addition & 1 deletion posthog/api/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@
from posthog.api.routing import TeamAndOrgViewSetMixin
from posthog.api.utils import format_paginated_url, get_pk_or_uuid, get_target_entity
from posthog.constants import (
CSV_EXPORT_LIMIT,
INSIGHT_FUNNELS,
INSIGHT_PATHS,
LIMIT,
OFFSET,
FunnelVizType,
)
from posthog.hogql.constants import CSV_EXPORT_LIMIT
from posthog.decorators import cached_by_filters
from posthog.logging.timing import timed
from posthog.models import Cohort, Filter, Person, User, Team
Expand Down
3 changes: 0 additions & 3 deletions posthog/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,6 @@ class BreakdownAttributionType(str, Enum):

MAX_SLUG_LENGTH = 48
GROUP_TYPES_LIMIT = 5
BREAKDOWN_VALUES_LIMIT = 25
BREAKDOWN_VALUES_LIMIT_FOR_COUNTRIES = 300
CSV_EXPORT_LIMIT = 10000


class EventDefinitionType(str, Enum):
Expand Down
15 changes: 15 additions & 0 deletions posthog/hogql/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@
# Max limit for all cohort calculations
MAX_SELECT_COHORT_CALCULATION_LIMIT = 1000000000 # 1b persons

CSV_EXPORT_LIMIT = 10000
CSV_EXPORT_BREAKDOWN_LIMIT_INITIAL = 512
CSV_EXPORT_BREAKDOWN_LIMIT_LOW = 64 # The lowest limit we want to go to

BREAKDOWN_VALUES_LIMIT = 25
BREAKDOWN_VALUES_LIMIT_FOR_COUNTRIES = 300


class LimitContext(str, Enum):
QUERY = "query"
Expand Down Expand Up @@ -65,6 +72,14 @@ def get_default_limit_for_context(limit_context: LimitContext) -> int:
raise ValueError(f"Unexpected LimitContext value: {limit_context}")


def get_breakdown_limit_for_context(limit_context: LimitContext) -> int:
"""Limit used for breakdowns"""
if limit_context == LimitContext.EXPORT:
return CSV_EXPORT_BREAKDOWN_LIMIT_INITIAL

return BREAKDOWN_VALUES_LIMIT


# Settings applied at the SELECT level
class HogQLQuerySettings(BaseModel):
model_config = ConfigDict(extra="forbid")
Expand Down
6 changes: 4 additions & 2 deletions posthog/hogql_queries/insights/funnels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from typing import Any, Dict, List, Optional, Tuple, Union, cast
import uuid
from posthog.clickhouse.materialized_columns.column import ColumnName
from posthog.constants import BREAKDOWN_VALUES_LIMIT
from posthog.hogql import ast
from posthog.hogql.constants import get_breakdown_limit_for_context
from posthog.hogql.parser import parse_expr, parse_select
from posthog.hogql.property import action_to_expr, property_to_expr
from posthog.hogql.query import execute_hogql_query
Expand Down Expand Up @@ -142,7 +142,9 @@ def breakdown_values(self) -> List[int] | List[str] | List[List[str]]:
else:
# get query params
breakdown_expr = self._get_breakdown_expr()
breakdown_limit_or_default = breakdownFilter.breakdown_limit or BREAKDOWN_VALUES_LIMIT
breakdown_limit_or_default = breakdownFilter.breakdown_limit or get_breakdown_limit_for_context(
self.context.limit_context
)
offset = 0

funnel_event_query = FunnelEventQuery(context=self.context)
Expand Down
5 changes: 5 additions & 0 deletions posthog/hogql_queries/insights/trends/breakdown.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Dict, List, Optional, Tuple, Union, cast
from posthog.hogql import ast
from posthog.hogql.constants import LimitContext
from posthog.hogql.parser import parse_expr
from posthog.hogql.timings import HogQLTimings
from posthog.hogql_queries.insights.trends.breakdown_values import (
Expand Down Expand Up @@ -30,6 +31,7 @@ class Breakdown:
modifiers: HogQLQueryModifiers
events_filter: ast.Expr
breakdown_values_override: Optional[List[str]]
limit_context: LimitContext

def __init__(
self,
Expand All @@ -41,6 +43,7 @@ def __init__(
modifiers: HogQLQueryModifiers,
events_filter: ast.Expr,
breakdown_values_override: Optional[List[str]] = None,
limit_context: LimitContext = LimitContext.QUERY,
):
self.team = team
self.query = query
Expand All @@ -50,6 +53,7 @@ def __init__(
self.modifiers = modifiers
self.events_filter = events_filter
self.breakdown_values_override = breakdown_values_override
self.limit_context = limit_context

@cached_property
def enabled(self) -> bool:
Expand Down Expand Up @@ -234,6 +238,7 @@ def _all_breakdown_values(self) -> List[str | int | None]:
breakdown_filter=self.query.breakdownFilter,
query_date_range=self.query_date_range,
modifiers=self.modifiers,
limit_context=self.limit_context,
)
return cast(List[str | int | None], breakdown.get_breakdown_values())

Expand Down
10 changes: 6 additions & 4 deletions posthog/hogql_queries/insights/trends/breakdown_values.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List, Optional, Union, Any
from posthog.constants import BREAKDOWN_VALUES_LIMIT, BREAKDOWN_VALUES_LIMIT_FOR_COUNTRIES
from posthog.hogql import ast
from posthog.hogql.constants import LimitContext, get_breakdown_limit_for_context, BREAKDOWN_VALUES_LIMIT_FOR_COUNTRIES
from posthog.hogql.parser import parse_expr, parse_select
from posthog.hogql.placeholders import replace_placeholders, find_placeholders
from posthog.hogql.query import execute_hogql_query
Expand Down Expand Up @@ -38,9 +38,10 @@ class BreakdownValues:
group_type_index: Optional[int]
hide_other_aggregation: Optional[bool]
normalize_url: Optional[bool]
breakdown_limit: Optional[int]
breakdown_limit: int
query_date_range: QueryDateRange
modifiers: HogQLQueryModifiers
limit_context: LimitContext

def __init__(
self,
Expand All @@ -51,6 +52,7 @@ def __init__(
breakdown_filter: BreakdownFilter,
query_date_range: QueryDateRange,
modifiers: HogQLQueryModifiers,
limit_context: LimitContext = LimitContext.QUERY,
):
self.team = team
self.series = series
Expand All @@ -70,7 +72,7 @@ def __init__(
)
self.hide_other_aggregation = breakdown_filter.breakdown_hide_other_aggregation
self.normalize_url = breakdown_filter.breakdown_normalize_url
self.breakdown_limit = breakdown_filter.breakdown_limit
self.breakdown_limit = breakdown_filter.breakdown_limit or get_breakdown_limit_for_context(limit_context)
self.query_date_range = query_date_range
self.modifiers = modifiers

Expand Down Expand Up @@ -113,7 +115,7 @@ def get_breakdown_values(self) -> List[str | int]:
if self.chart_display_type == ChartDisplayType.WorldMap:
breakdown_limit = BREAKDOWN_VALUES_LIMIT_FOR_COUNTRIES
else:
breakdown_limit = int(self.breakdown_limit) if self.breakdown_limit is not None else BREAKDOWN_VALUES_LIMIT
breakdown_limit = int(self.breakdown_limit)

aggregation_expression: ast.Expr
if self._aggregation_operation.aggregating_on_session_duration():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,17 @@ def test_breakdown_values_limit(self):
)
self.assertEqual(len(response.results), 11)

response = self._run_trends_query(
"2020-01-09",
"2020-01-20",
IntervalType.day,
[EventsNode(event="$pageview")],
TrendsFilter(display=ChartDisplayType.ActionsLineGraph),
BreakdownFilter(breakdown="breakdown_value", breakdown_type=BreakdownType.event),
limit_context=LimitContext.EXPORT,
)
self.assertEqual(len(response.results), 30)

def test_breakdown_values_unknown_property(self):
# same as above test, just without creating the property definition
for value in list(range(30)):
Expand Down
5 changes: 5 additions & 0 deletions posthog/hogql_queries/insights/trends/trends_query_builder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import List, Optional, cast
from posthog.hogql import ast
from posthog.hogql.constants import LimitContext
from posthog.hogql.parser import parse_expr, parse_select
from posthog.hogql.property import action_to_expr, property_to_expr
from posthog.hogql.timings import HogQLTimings
Expand Down Expand Up @@ -33,6 +34,7 @@ class TrendsQueryBuilder(DataWarehouseInsightQueryMixin):
series: EventsNode | ActionsNode | DataWarehouseNode
timings: HogQLTimings
modifiers: HogQLQueryModifiers
limit_context: LimitContext

def __init__(
self,
Expand All @@ -42,13 +44,15 @@ def __init__(
series: EventsNode | ActionsNode | DataWarehouseNode,
timings: HogQLTimings,
modifiers: HogQLQueryModifiers,
limit_context: LimitContext = LimitContext.QUERY,
):
self.query = trends_query
self.team = team
self.query_date_range = query_date_range
self.series = series
self.timings = timings
self.modifiers = modifiers
self.limit_context = limit_context

def build_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
breakdown = self._breakdown(is_actors_query=False)
Expand Down Expand Up @@ -605,6 +609,7 @@ def _breakdown(self, is_actors_query: bool, breakdown_values_override: Optional[
breakdown_values_override=breakdown_values_override,
),
breakdown_values_override=[breakdown_values_override] if breakdown_values_override is not None else None,
limit_context=self.limit_context,
)

@cached_property
Expand Down
3 changes: 3 additions & 0 deletions posthog/hogql_queries/insights/trends/trends_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ def to_queries(self) -> List[ast.SelectQuery | ast.SelectUnionQuery]:
series=series.series,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)
query = query_builder.build_query()

Expand Down Expand Up @@ -175,6 +176,7 @@ def to_actors_query(
series=series,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)

query = query_builder.build_actors_query(time_frame=time_frame, breakdown_filter=str(breakdown_value))
Expand Down Expand Up @@ -224,6 +226,7 @@ def to_actors_query_options(self) -> InsightActorsQueryOptionsResponse:
series=series,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)

breakdown = query_builder._breakdown(is_actors_query=False)
Expand Down
3 changes: 1 addition & 2 deletions posthog/models/filters/mixins/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
BREAKDOWN_NORMALIZE_URL,
BREAKDOWN_TYPE,
BREAKDOWN_VALUE,
BREAKDOWN_VALUES_LIMIT,
BREAKDOWN_VALUES_LIMIT_FOR_COUNTRIES,
BREAKDOWNS,
CLIENT_QUERY_ID,
COMPARE,
Expand Down Expand Up @@ -51,6 +49,7 @@
BreakdownAttributionType,
BREAKDOWN_HIDE_OTHER_AGGREGATION,
)
from posthog.hogql.constants import BREAKDOWN_VALUES_LIMIT, BREAKDOWN_VALUES_LIMIT_FOR_COUNTRIES
from posthog.models.entity import Entity, ExclusionEntity, MathType
from posthog.models.filters.mixins.base import BaseParamMixin, BreakdownType
from posthog.models.filters.mixins.utils import (
Expand Down
32 changes: 25 additions & 7 deletions posthog/tasks/exports/csv_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,10 @@
EXPORT_SUCCEEDED_COUNTER,
EXPORT_TIMER,
)
from ...constants import CSV_EXPORT_LIMIT
from ...exceptions import QuerySizeExceeded
from ...hogql.constants import CSV_EXPORT_LIMIT, CSV_EXPORT_BREAKDOWN_LIMIT_INITIAL, CSV_EXPORT_BREAKDOWN_LIMIT_LOW
from ...hogql.query import LimitContext

CSV_EXPORT_BREAKDOWN_LIMIT_INITIAL = 512
CSV_EXPORT_BREAKDOWN_LIMIT_LOW = 64 # The lowest limit we want to go to

logger = structlog.get_logger(__name__)


Expand Down Expand Up @@ -241,16 +239,36 @@ def get_from_insights_api(exported_asset: ExportedAsset, limit: int, resource: d
next_url = data.get("next")


def get_from_hogql_query(exported_asset: ExportedAsset, limit: int, resource: dict) -> Generator[Any, None, None]:
query = resource.get("source")
assert query is not None

while True:
try:
query_response = process_query(
team=exported_asset.team, query_json=query, limit_context=LimitContext.EXPORT
)
except QuerySizeExceeded:
if "breakdownFilter" not in query or limit <= CSV_EXPORT_BREAKDOWN_LIMIT_LOW:
raise

# HACKY: Adjust the breakdown_limit in the query
limit = int(limit / 2)
query["breakdownFilter"]["breakdown_limit"] = limit
continue

yield from _convert_response_to_csv_data(query_response)
return


def _export_to_dict(exported_asset: ExportedAsset, limit: int) -> Any:
resource = exported_asset.export_context

columns: List[str] = resource.get("columns", [])
returned_rows: Generator[Any, None, None]

if resource.get("source"):
query = resource.get("source")
query_response = process_query(team=exported_asset.team, query_json=query, limit_context=LimitContext.EXPORT)
returned_rows = _convert_response_to_csv_data(query_response)
returned_rows = get_from_hogql_query(exported_asset, limit, resource)
else:
returned_rows = get_from_insights_api(exported_asset, limit, resource)

Expand Down
2 changes: 1 addition & 1 deletion posthog/tasks/exports/test/test_csv_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
from posthog.tasks.exports.csv_exporter import (
UnexpectedEmptyJsonResponse,
add_query_params,
CSV_EXPORT_BREAKDOWN_LIMIT_INITIAL,
)
from posthog.hogql.constants import CSV_EXPORT_BREAKDOWN_LIMIT_INITIAL
from posthog.test.base import APIBaseTest, _create_event, flush_persons_and_events
from posthog.utils import absolute_uri

Expand Down

0 comments on commit 66d5be2

Please sign in to comment.