diff --git a/posthog/hogql/functions/mapping.py b/posthog/hogql/functions/mapping.py index 3f062914308ea0..8da35817dcf56c 100644 --- a/posthog/hogql/functions/mapping.py +++ b/posthog/hogql/functions/mapping.py @@ -674,7 +674,7 @@ class HogQLFunctionMeta: "medianBFloat16If": HogQLFunctionMeta("medianBFloat16If", 2, 2, aggregate=True), "quantile": HogQLFunctionMeta("quantile", 1, 1, min_params=1, max_params=1, aggregate=True), "quantileIf": HogQLFunctionMeta("quantileIf", 2, 2, min_params=1, max_params=1, aggregate=True), - "quantiles": HogQLFunctionMeta("quantiles", 1, 1, min_params=1, max_params=1, aggregate=True), + "quantiles": HogQLFunctionMeta("quantiles", 1, None, aggregate=True), "quantilesIf": HogQLFunctionMeta("quantilesIf", 2, 2, min_params=1, max_params=1, aggregate=True), # "quantileExact": HogQLFunctionMeta("quantileExact", 1, 1, aggregate=True), # "quantileExactIf": HogQLFunctionMeta("quantileExactIf", 2, 2, aggregate=True), diff --git a/posthog/hogql_queries/insights/trends/breakdown.py b/posthog/hogql_queries/insights/trends/breakdown.py new file mode 100644 index 00000000000000..e0c8b598bf93f4 --- /dev/null +++ b/posthog/hogql_queries/insights/trends/breakdown.py @@ -0,0 +1,118 @@ +from typing import List, Tuple +from posthog.hogql import ast +from posthog.hogql_queries.insights.trends.breakdown_values import BreakdownValues +from posthog.hogql_queries.insights.trends.utils import series_event_name +from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.models.filters.mixins.utils import cached_property +from posthog.models.team.team import Team +from posthog.schema import ActionsNode, EventsNode, TrendsQuery + + +class Breakdown: + query: TrendsQuery + team: Team + series: EventsNode | ActionsNode + query_date_range: QueryDateRange + + def __init__( + self, team: Team, query: TrendsQuery, series: EventsNode | ActionsNode, query_date_range: QueryDateRange + ): + self.team = team + self.query = query + self.series = series + self.query_date_range = query_date_range + + @cached_property + def enabled(self): + return self.query.breakdown is not None and self.query.breakdown.breakdown is not None + + @cached_property + def is_histogram_breakdown(self): + return self.enabled and self.query.breakdown.breakdown_histogram_bin_count is not None + + def placeholders(self): + values = self._get_breakdown_buckets_ast() if self.is_histogram_breakdown else self._get_breakdown_values_ast + + return {"cross_join_breakdown_values": ast.Alias(alias="breakdown_value", expr=values)} + + def events_select(self): + if self.is_histogram_breakdown: + return ast.Alias(alias="breakdown_value", expr=self._get_breakdown_histogram_multi_if()) + + return ast.Alias(alias="breakdown_value", expr=ast.Field(chain=["properties", self.query.breakdown.breakdown])) + + def events_where_filter(self): + return ast.CompareOperation( + left=ast.Field(chain=["properties", self.query.breakdown.breakdown]), + op=ast.CompareOperationOp.In, + right=self._get_breakdown_values_ast, + ) + + def _get_breakdown_buckets_ast(self) -> ast.Array: + buckets = self._get_breakdown_histogram_buckets() + values = list(map(lambda t: f"[{t[0]},{t[1]}]", buckets)) + values.append('["",""]') + + return ast.Array(exprs=list(map(lambda v: ast.Constant(value=v), values))) + + @cached_property + def _get_breakdown_values_ast(self) -> ast.Array: + return ast.Array(exprs=list(map(lambda v: ast.Constant(value=v), self._get_breakdown_values))) + + @cached_property + def _get_breakdown_values(self) -> ast.Array: + breakdown = BreakdownValues( + team=self.team, + event_name=series_event_name(self.series), + breakdown_field=self.query.breakdown.breakdown, + query_date_range=self.query_date_range, + histogram_bin_count=self.query.breakdown.breakdown_histogram_bin_count, + ) + return breakdown.get_breakdown_values() + + def _get_breakdown_histogram_buckets(self) -> List[Tuple[float, float]]: + buckets = [] + values = self._get_breakdown_values + + if len(values) == 1: + values = [values[0], values[0]] + + for i in range(len(values) - 1): + last_value = i == len(values) - 2 + lower_bound = values[i] + upper_bound = values[i + 1] + 0.01 if last_value else values[i + 1] + buckets.append((lower_bound, upper_bound)) + + return buckets + + def _get_breakdown_histogram_multi_if(self) -> ast.Expr: + multi_if_exprs: List[ast.Expr] = [] + + buckets = self._get_breakdown_histogram_buckets() + + for lower_bound, upper_bound in buckets: + + multi_if_exprs.extend( + [ + ast.And( + exprs=[ + ast.CompareOperation( + left=ast.Field(chain=["properties", self.query.breakdown.breakdown]), + op=ast.CompareOperationOp.GtEq, + right=ast.Constant(value=lower_bound), + ), + ast.CompareOperation( + left=ast.Field(chain=["properties", self.query.breakdown.breakdown]), + op=ast.CompareOperationOp.Lt, + right=ast.Constant(value=upper_bound), + ), + ] + ), + ast.Constant(value=f"[{lower_bound},{upper_bound}]"), + ] + ) + + # `else` block of the multi-if + multi_if_exprs.append(ast.Constant(value='["",""]')) + + return ast.Call(name="multiIf", args=multi_if_exprs) diff --git a/posthog/hogql_queries/insights/trends/breakdown_values.py b/posthog/hogql_queries/insights/trends/breakdown_values.py index e94e3dbe4a2a1a..f93d176ca61833 100644 --- a/posthog/hogql_queries/insights/trends/breakdown_values.py +++ b/posthog/hogql_queries/insights/trends/breakdown_values.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional from posthog.hogql import ast from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql.query import execute_hogql_query @@ -11,12 +11,21 @@ class BreakdownValues: event_name: str breakdown_field: str query_date_range: QueryDateRange + histogram_bin_count: Optional[int] - def __init__(self, team: Team, event_name: str, breakdown_field: str, query_date_range: QueryDateRange): + def __init__( + self, + team: Team, + event_name: str, + breakdown_field: str, + query_date_range: QueryDateRange, + histogram_bin_count: Optional[float] = None, + ): self.team = team self.event_name = event_name self.breakdown_field = breakdown_field self.query_date_range = query_date_range + self.histogram_bin_count = int(histogram_bin_count) if histogram_bin_count is not None else None def get_breakdown_values(self) -> List[str]: select_field = ast.Alias(alias="value", expr=ast.Field(chain=["properties", self.breakdown_field])) @@ -36,8 +45,6 @@ def get_breakdown_values(self) -> List[str]: ORDER BY count DESC, value DESC - LIMIT 25 - OFFSET 0 ) """, placeholders={ @@ -46,6 +53,10 @@ def get_breakdown_values(self) -> List[str]: }, ) + if self.histogram_bin_count is not None: + expr = self._to_bucketing_expression() + query.select = [expr] + response = execute_hogql_query( query_type="TrendsQueryBreakdownValues", query=query, @@ -53,7 +64,6 @@ def get_breakdown_values(self) -> List[str]: ) values = response.results[0][0] - return values def _where_filter(self) -> ast.Expr: @@ -77,3 +87,18 @@ def _where_filter(self) -> ast.Expr: filters.append(parse_expr("event = {event}", placeholders={"event": ast.Constant(value=self.event_name)})) return ast.And(exprs=filters) + + def _to_bucketing_expression(self) -> ast.Expr: + assert isinstance(self.histogram_bin_count, int) + + if self.histogram_bin_count <= 1: + qunatile_expression = "quantiles(0,1)(value)" + else: + quantiles = [] + bin_size = 1.0 / self.histogram_bin_count + for i in range(self.histogram_bin_count + 1): + quantiles.append(i * bin_size) + + qunatile_expression = f"quantiles({','.join([f'{quantile:.2f}' for quantile in quantiles])})(value)" + + return parse_expr(f"arrayCompact(arrayMap(x -> floor(x, 2), {qunatile_expression}))") diff --git a/posthog/hogql_queries/insights/trends/query_builder.py b/posthog/hogql_queries/insights/trends/query_builder.py index 862f342ddc7ca6..4f38a78be72a98 100644 --- a/posthog/hogql_queries/insights/trends/query_builder.py +++ b/posthog/hogql_queries/insights/trends/query_builder.py @@ -2,7 +2,8 @@ from posthog.hogql import ast from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql.property import property_to_expr -from posthog.hogql_queries.insights.trends.breakdown_values import BreakdownValues +from posthog.hogql_queries.insights.trends.breakdown import Breakdown +from posthog.hogql_queries.insights.trends.utils import series_event_name from posthog.hogql_queries.utils.query_date_range import QueryDateRange from posthog.models.filters.mixins.utils import cached_property from posthog.models.team.team import Team @@ -35,7 +36,7 @@ def build_query(self) -> ast.SelectUnionQuery: return full_query def _get_date_subqueries(self) -> List[ast.SelectQuery]: - if not self._breakdown_enabled(): + if not self._breakdown.enabled: return [ parse_select( """ @@ -83,7 +84,7 @@ def _get_date_subqueries(self) -> List[ast.SelectQuery]: CROSS JOIN ( SELECT breakdown_value FROM ( - SELECT {breakdown_values} + SELECT {cross_join_breakdown_values} ) ARRAY JOIN breakdown_value as breakdown_value ) as sec @@ -91,7 +92,7 @@ def _get_date_subqueries(self) -> List[ast.SelectQuery]: """, placeholders={ **self.query_date_range.to_placeholders(), - "breakdown_values": ast.Alias(alias="breakdown_value", expr=self._get_breakdown_values_ast), + **self._breakdown.placeholders(), }, ) ] @@ -115,10 +116,8 @@ def _get_events_subquery(self) -> ast.SelectQuery: }, ) - if self._breakdown_enabled(): - query.select.append( - ast.Alias(alias="breakdown_value", expr=ast.Field(chain=["properties", self.query.breakdown.breakdown])) - ) + if self._breakdown.enabled: + query.select.append(self._breakdown.events_select()) query.group_by.append(ast.Field(chain=["breakdown_value"])) return query @@ -134,7 +133,7 @@ def _outer_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: placeholders={"inner_query": inner_query}, ) - if self._breakdown_enabled(): + if self._breakdown.enabled: query.select.append(ast.Field(chain=["breakdown_value"])) query.group_by = [ast.Field(chain=["breakdown_value"])] query.order_by = [ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC")] @@ -154,7 +153,7 @@ def _inner_select_query(self, inner_query: ast.SelectUnionQuery) -> ast.SelectQu placeholders={"inner_query": inner_query}, ) - if self._breakdown_enabled(): + if self._breakdown.enabled: query.select.append(ast.Field(chain=["breakdown_value"])) query.group_by.append(ast.Field(chain=["breakdown_value"])) query.order_by.append(ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC")) @@ -180,9 +179,11 @@ def _events_filter(self) -> ast.Expr: ) # Series - if self._series_event_name() is not None: + if series_event_name(self.series) is not None: filters.append( - parse_expr("event = {event}", placeholders={"event": ast.Constant(value=self._series_event_name())}) + parse_expr( + "event = {event}", placeholders={"event": ast.Constant(value=series_event_name(self.series))} + ) ) # Filter Test Accounts @@ -203,14 +204,8 @@ def _events_filter(self) -> ast.Expr: filters.append(property_to_expr(series.properties, self.team)) # Breakdown - if self._breakdown_enabled(): - filters.append( - ast.CompareOperation( - left=ast.Field(chain=["properties", self.query.breakdown.breakdown]), - op=ast.CompareOperationOp.In, - right=self._get_breakdown_values_ast, - ) - ) + if self._breakdown.enabled and not self._breakdown.is_histogram_breakdown: + filters.append(self._breakdown.events_where_filter()) if len(filters) == 0: return ast.Constant(value=True) @@ -232,19 +227,6 @@ def _sample_value(self) -> str: return f"SAMPLE {self.query.samplingFactor}" - def _series_event_name(self) -> str | None: - if isinstance(self.series, EventsNode): - return self.series.event - return None - - def _breakdown_enabled(self): - return self.query.breakdown is not None and self.query.breakdown.breakdown is not None - @cached_property - def _get_breakdown_values_ast(self) -> ast.Array: - breakdown = BreakdownValues( - self.team, self._series_event_name(), self.query.breakdown.breakdown, self.query_date_range - ) - breakdown_values = breakdown.get_breakdown_values() - - return ast.Array(exprs=list(map(lambda v: ast.Constant(value=v), breakdown_values))) + def _breakdown(self): + return Breakdown(team=self.team, query=self.query, series=self.series, query_date_range=self.query_date_range) diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index c2c9d8d3b92548..e86355da044d6a 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -19,6 +19,7 @@ from posthog.hogql_queries.utils.query_previous_period_date_range import QueryPreviousPeriodDateRange from posthog.models import Team from posthog.models.filters.mixins.utils import cached_property +from posthog.models.property_definition import PropertyDefinition from posthog.schema import ActionsNode, EventsNode, HogQLQueryResponse, TrendsQuery, TrendsQueryResponse @@ -131,8 +132,13 @@ def build_series_response(self, response: HogQLQueryResponse, series: SeriesWith # Modifications for when breakdowns are active if self.query.breakdown is not None and self.query.breakdown.breakdown is not None: - series_object["breakdown_value"] = val[2] - series_object["label"] = "{} - {}".format(series_object["label"], val[2]) + if self._is_breakdown_field_boolean(): + remapped_label = self._convert_boolean(val[2]) + series_object["label"] = "{} - {}".format(series_object["label"], remapped_label) + series_object["breakdown_value"] = remapped_label + else: + series_object["label"] = "{} - {}".format(series_object["label"], val[2]) + series_object["breakdown_value"] = val[2] res.append(series_object) return res @@ -191,3 +197,22 @@ def apply_formula(self, formula: str, results: List[Dict[str, Any]]) -> List[Dic new_result["label"] = f"Formula ({formula})" return [new_result] + + def _is_breakdown_field_boolean(self): + field_type = self._event_properties.get(self.query.breakdown.breakdown) + return field_type == "Boolean" + + def _convert_boolean(self, value: any): + bool_map = {1: "true", 0: "false", "": ""} + return bool_map.get(value) or value + + @cached_property + def _event_properties(self): + event_property_values = PropertyDefinition.objects.filter( + team_id=self.team.pk, + type__in=[None, PropertyDefinition.Type.EVENT], + ).values_list("name", "property_type") + + event_properties = {name: property_type for name, property_type in event_property_values if property_type} + + return event_properties diff --git a/posthog/hogql_queries/insights/trends/utils.py b/posthog/hogql_queries/insights/trends/utils.py new file mode 100644 index 00000000000000..64de859b164afc --- /dev/null +++ b/posthog/hogql_queries/insights/trends/utils.py @@ -0,0 +1,7 @@ +from posthog.schema import ActionsNode, EventsNode + + +def series_event_name(series: EventsNode | ActionsNode) -> str | None: + if isinstance(series, EventsNode): + return series.event + return None