Skip to content

Commit

Permalink
perf: Remove extra breakdown query for trends (#22885)
Browse files Browse the repository at this point in the history
* perf: Remove extra breakdown query for trends

* fix

* fix actor breakdown

* fix breakdown tests

* fix

* remove most breakdown_values_override calls

* fix tests

* fix baseline

* remove unnecessary test

* fix histogram

* fix: Don't allow window function with distinct as it's wrong

* fix tests

* fix

* Update query snapshots

* fix histogram

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
timgl and github-actions[bot] authored Jun 14, 2024
1 parent 919d017 commit ec59e6f
Show file tree
Hide file tree
Showing 15 changed files with 1,621 additions and 1,325 deletions.
22 changes: 9 additions & 13 deletions mypy-baseline.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,15 @@ ee/billing/quota_limiting.py:0: error: "object" has no attribute "get" [attr-de
ee/billing/quota_limiting.py:0: error: Unsupported target for indexed assignment ("object") [index]
ee/billing/quota_limiting.py:0: error: Unsupported target for indexed assignment ("object") [index]
posthog/tasks/email.py:0: error: Module "django.utils.timezone" does not explicitly export attribute "timedelta" [attr-defined]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_histogram_bin_count" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument 1 to "parse_expr" has incompatible type "str | float | list[str | float] | Any | None"; expected "str" [arg-type]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument "breakdown_field" to "get_properties_chain" has incompatible type "str | float | list[str | float] | Any | None"; expected "str" [arg-type]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_group_type_index" [union-attr]
posthog/hogql_queries/insights/trends/aggregation_operations.py:0: error: List item 1 has incompatible type "str | None"; expected "str" [list-item]
posthog/hogql_queries/insights/trends/aggregation_operations.py:0: error: Argument "chain" to "Field" has incompatible type "list[str]"; expected "list[str | int]" [arg-type]
posthog/hogql_queries/insights/trends/aggregation_operations.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance
Expand Down Expand Up @@ -298,19 +307,6 @@ posthog/hogql/query.py:0: error: Subclass of "SelectQuery" and "SelectUnionQuery
posthog/queries/person_query.py:0: error: Incompatible type for lookup 'pk': (got "str | int | list[str]", expected "str | int") [misc]
posthog/queries/event_query/event_query.py:0: error: Incompatible type for lookup 'pk': (got "str | int | list[str]", expected "str | int") [misc]
posthog/hogql_queries/sessions_timeline_query_runner.py:0: error: Statement is unreachable [unreachable]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_histogram_bin_count" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument "exprs" to "Or" has incompatible type "list[CompareOperation]"; expected "list[Expr]" [arg-type]
posthog/hogql_queries/insights/trends/breakdown.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance
posthog/hogql_queries/insights/trends/breakdown.py:0: note: Consider using "Sequence" instead, which is covariant
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument 1 to "parse_expr" has incompatible type "str | float | list[str | float] | Any | None"; expected "str" [arg-type]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument "breakdown_field" to "get_properties_chain" has incompatible type "str | float | list[str | float] | Any | None"; expected "str" [arg-type]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_group_type_index" [union-attr]
posthog/hogql_queries/hogql_query_runner.py:0: error: Statement is unreachable [unreachable]
posthog/hogql_queries/hogql_query_runner.py:0: error: Incompatible return value type (got "SelectQuery | SelectUnionQuery", expected "SelectQuery") [return-value]
posthog/hogql_queries/events_query_runner.py:0: error: Statement is unreachable [unreachable]
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql/functions/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,7 @@ def compare_types(arg_types: list[ConstantType], sig_arg_types: tuple[ConstantTy
"timeStampSub": HogQLFunctionMeta("timeStampSub", 2, 2),
"now": HogQLFunctionMeta("now64", 0, 1, tz_aware=True, case_sensitive=False),
"nowInBlock": HogQLFunctionMeta("nowInBlock", 1, 1),
"rowNumberInAllBlocks": HogQLFunctionMeta("rowNumberInAllBlocks", 0, 0),
"today": HogQLFunctionMeta("today"),
"yesterday": HogQLFunctionMeta("yesterday"),
"timeSlot": HogQLFunctionMeta("timeSlot", 1, 1),
Expand Down
5 changes: 4 additions & 1 deletion posthog/hogql/printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1145,7 +1145,10 @@ def visit_window_function(self, node: ast.WindowFunction):
identifier = self._print_identifier(node.name)
exprs = ", ".join(self.visit(expr) for expr in node.exprs or [])
args = "(" + (", ".join(self.visit(arg) for arg in node.args or [])) + ")" if node.args else ""
over = f"({self.visit(node.over_expr)})" if node.over_expr else self._print_identifier(node.over_identifier)
if node.over_expr or node.over_identifier:
over = f"({self.visit(node.over_expr)})" if node.over_expr else self._print_identifier(node.over_identifier)
else:
over = "()"
return f"{identifier}({exprs}){args} OVER {over}"

def visit_window_frame_expr(self, node: ast.WindowFrameExpr):
Expand Down
222 changes: 32 additions & 190 deletions posthog/hogql_queries/insights/trends/breakdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,6 @@
from posthog.hogql.constants import LimitContext
from posthog.hogql.parser import parse_expr
from posthog.hogql.timings import HogQLTimings
from posthog.hogql_queries.insights.trends.breakdown_values import (
BREAKDOWN_NULL_STRING_LABEL,
BREAKDOWN_OTHER_STRING_LABEL,
BreakdownValues,
)
from posthog.hogql_queries.insights.trends.display import TrendsDisplay
from posthog.hogql_queries.insights.trends.utils import (
get_properties_chain,
)
Expand All @@ -17,6 +11,11 @@
from posthog.models.team.team import Team
from posthog.schema import ActionsNode, EventsNode, DataWarehouseNode, HogQLQueryModifiers, InCohortVia, TrendsQuery

BREAKDOWN_OTHER_STRING_LABEL = "$$_posthog_breakdown_other_$$"
BREAKDOWN_NULL_STRING_LABEL = "$$_posthog_breakdown_null_$$"
BREAKDOWN_OTHER_DISPLAY = "Other (i.e. all remaining values)"
BREAKDOWN_NULL_DISPLAY = "None (i.e. no value)"


def hogql_to_string(expr: ast.Expr) -> ast.Call:
return ast.Call(name="toString", args=[expr])
Expand All @@ -30,7 +29,6 @@ class Breakdown:
timings: HogQLTimings
modifiers: HogQLQueryModifiers
events_filter: ast.Expr
breakdown_values_override: Optional[list[str | int]]
limit_context: LimitContext

def __init__(
Expand All @@ -42,7 +40,6 @@ def __init__(
timings: HogQLTimings,
modifiers: HogQLQueryModifiers,
events_filter: ast.Expr,
breakdown_values_override: Optional[list[str | int]] = None,
limit_context: LimitContext = LimitContext.QUERY,
):
self.team = team
Expand All @@ -52,34 +49,19 @@ def __init__(
self.timings = timings
self.modifiers = modifiers
self.events_filter = events_filter
self.breakdown_values_override = breakdown_values_override
self.limit_context = limit_context

@cached_property
def enabled(self) -> bool:
return (
self.query.breakdownFilter is not None
and self.query.breakdownFilter.breakdown is not None
and self.has_breakdown_values
)

@cached_property
def is_session_type(self) -> bool:
return self.enabled and self.query.breakdownFilter.breakdown_type == "session"
return self.query.breakdownFilter is not None and self.query.breakdownFilter.breakdown is not None

@cached_property
def is_histogram_breakdown(self) -> bool:
return self.enabled and self.query.breakdownFilter.breakdown_histogram_bin_count is not None

def placeholders(self) -> dict[str, ast.Expr]:
values = self._breakdown_buckets_ast if self.is_histogram_breakdown else self._breakdown_values_ast

return {"cross_join_breakdown_values": ast.Alias(alias="breakdown_value", expr=values)}

def column_expr(self) -> ast.Alias:
if self.is_histogram_breakdown:
return ast.Alias(alias="breakdown_value", expr=self._get_breakdown_histogram_multi_if())

return ast.Alias(alias="breakdown_value", expr=ast.Field(chain=self._properties_chain))
if self.query.breakdownFilter.breakdown_type == "cohort":
if self.modifiers.inCohortVia == InCohortVia.LEFTJOIN_CONJOINED:
return ast.Alias(
Expand All @@ -95,19 +77,15 @@ def column_expr(self) -> ast.Alias:
expr=hogql_to_string(ast.Constant(value=cohort_breakdown)),
)

return ast.Alias(alias="breakdown_value", expr=self._get_breakdown_transform_func)
return ast.Alias(alias="breakdown_value", expr=self._get_breakdown_expression)

def events_where_filter(self) -> ast.Expr | None:
def events_where_filter(self, breakdown_values_override: Optional[str | int] = None) -> ast.Expr | None:
if (
self.query.breakdownFilter is not None
and self.query.breakdownFilter.breakdown is not None
and self.query.breakdownFilter.breakdown_type == "cohort"
):
breakdown = (
self.breakdown_values_override
if self.breakdown_values_override
else self.query.breakdownFilter.breakdown
)
breakdown = breakdown_values_override if breakdown_values_override else self.query.breakdownFilter.breakdown

if breakdown == "all":
return None
Expand Down Expand Up @@ -136,60 +114,33 @@ def events_where_filter(self) -> ast.Expr | None:
right=ast.Constant(value=breakdown),
)

# No need to filter if we're showing the "other" bucket, as we need to look at all events anyway.
# Except when explicitly filtering
if (
self.query.breakdownFilter is not None
and not self.query.breakdownFilter.breakdown_hide_other_aggregation
and len(self.breakdown_values_override or []) == 0
):
return ast.Constant(value=True)

if (
self.query.breakdownFilter is not None
and self.query.breakdownFilter.breakdown is not None
and self.query.breakdownFilter.breakdown_type == "hogql"
and isinstance(self.query.breakdownFilter.breakdown, str)
):
left = parse_expr(self.query.breakdownFilter.breakdown)
else:
left = ast.Field(chain=self._properties_chain)

if not self.is_histogram_breakdown:
left = hogql_to_string(left)

compare_ops = []
for _value in self._breakdown_values:
value: Optional[str] = str(_value) # non-cohorts are always strings
# If the value is one of the "other" values, then use the `transform()` func
if breakdown_values_override:
if (
self.query.breakdownFilter is not None
and self.query.breakdownFilter.breakdown is not None
and self.query.breakdownFilter.breakdown_type == "hogql"
and isinstance(self.query.breakdownFilter.breakdown, str)
):
left = parse_expr(self.query.breakdownFilter.breakdown)
else:
left = ast.Field(chain=self._properties_chain)
value: Optional[str] = str(breakdown_values_override) # non-cohorts are always strings
if value == BREAKDOWN_OTHER_STRING_LABEL:
transform_func = self._get_breakdown_transform_func
compare_ops.append(
ast.CompareOperation(
left=transform_func, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=value)
)
)
# TODO: Fix breaking down by other
return ast.Constant(value=True)
elif value == BREAKDOWN_NULL_STRING_LABEL:
compare_ops.append(
ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=None))
)
compare_ops.append(
ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=""))
return ast.Or(
exprs=[
ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=None)),
ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value="")),
]
)
else:
compare_ops.append(
ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=value))
)

if len(compare_ops) == 1:
return compare_ops[0]
elif len(compare_ops) == 0:
return parse_expr("1 = 1")

return ast.Or(exprs=compare_ops)
return ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=value))
return ast.Constant(value=True)

@cached_property
def _get_breakdown_transform_func(self) -> ast.Call:
def _get_breakdown_expression(self) -> ast.Call:
if self.query.breakdownFilter.breakdown_type == "hogql":
return self._get_breakdown_values_transform(parse_expr(self.query.breakdownFilter.breakdown))
return self._get_breakdown_values_transform(ast.Field(chain=self._properties_chain))
Expand All @@ -202,127 +153,18 @@ def _get_breakdown_values_transform(self, node: ast.Expr) -> ast.Call:
return cast(
ast.Call,
parse_expr(
"transform(ifNull(nullIf(toString({node}), ''), {nil}), {values}, {values}, {other})",
"ifNull(nullIf(toString({node}), ''), {nil})",
placeholders={
"node": node,
"values": self._breakdown_values_ast,
"nil": ast.Constant(value=BREAKDOWN_NULL_STRING_LABEL),
"other": ast.Constant(value=BREAKDOWN_OTHER_STRING_LABEL),
},
),
)

@cached_property
def _breakdown_buckets_ast(self) -> ast.Array:
buckets = self._get_breakdown_histogram_buckets()
values = [f"[{t[0]},{t[1]}]" for t in buckets]
# TODO: add this only if needed
values.append('["",""]')

return ast.Array(exprs=[ast.Constant(value=v) for v in values])

@property
def _breakdown_values_ast(self) -> ast.Array:
exprs: list[ast.Expr] = []
for value in self._breakdown_values:
if isinstance(value, str):
exprs.append(ast.Constant(value=value))
else:
exprs.append(hogql_to_string(ast.Constant(value=value)))
return ast.Array(exprs=exprs)

@cached_property
def _all_breakdown_values(self) -> list[str | int | None]:
# Used in the actors query
if self.breakdown_values_override is not None:
return cast(list[str | int | None], self.breakdown_values_override)

if self.query.breakdownFilter is None:
return []

with self.timings.measure("breakdown_values_query"):
breakdown = BreakdownValues(
team=self.team,
series=self.series,
events_filter=self.events_filter,
chart_display_type=self._trends_display().display_type,
breakdown_filter=self.query.breakdownFilter,
query_date_range=self.query_date_range,
modifiers=self.modifiers,
limit_context=self.limit_context,
)
return cast(list[str | int | None], breakdown.get_breakdown_values())

@cached_property
def _breakdown_values(self) -> list[str | int]:
values = [BREAKDOWN_NULL_STRING_LABEL if v is None else v for v in self._all_breakdown_values]
return cast(list[str | int], values)

@cached_property
def has_breakdown_values(self) -> bool:
return len(self._breakdown_values) > 0

def _get_breakdown_histogram_buckets(self) -> list[tuple[float, float]]:
buckets = []
values = self._breakdown_values

if len(values) == 1:
values = [values[0], values[0]]

for i in range(len(values) - 1):
last_value = i == len(values) - 2

# Since we always `floor(x, 2)` the value, we add 0.01 to the last bucket
# to ensure it's always slightly greater than the maximum value
lower_bound = float(values[i])
upper_bound = float(values[i + 1]) + 0.01 if last_value else float(values[i + 1])
buckets.append((lower_bound, upper_bound))

return buckets

def _get_breakdown_histogram_multi_if(self) -> ast.Expr:
multi_if_exprs: list[ast.Expr] = []

buckets = self._get_breakdown_histogram_buckets()

for lower_bound, upper_bound in buckets:
multi_if_exprs.extend(
[
ast.And(
exprs=[
ast.CompareOperation(
left=ast.Field(chain=self._properties_chain),
op=ast.CompareOperationOp.GtEq,
right=ast.Constant(value=lower_bound),
),
ast.CompareOperation(
left=ast.Field(chain=self._properties_chain),
op=ast.CompareOperationOp.Lt,
right=ast.Constant(value=upper_bound),
),
]
),
ast.Constant(value=f"[{lower_bound},{upper_bound}]"),
]
)

# `else` block of the multi-if
multi_if_exprs.append(ast.Constant(value='["",""]'))

return ast.Call(name="multiIf", args=multi_if_exprs)

@cached_property
def _properties_chain(self):
return get_properties_chain(
breakdown_type=self.query.breakdownFilter.breakdown_type,
breakdown_field=self.query.breakdownFilter.breakdown,
group_type_index=self.query.breakdownFilter.breakdown_group_type_index,
)

def _trends_display(self) -> TrendsDisplay:
display = (
self.query.trendsFilter.display
if self.query.trendsFilter is not None and self.query.trendsFilter.display is not None
else None
)
return TrendsDisplay(display)
Loading

0 comments on commit ec59e6f

Please sign in to comment.