From 1ecf28953ae077898460910808f3d8b446bfe1b0 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Fri, 13 Oct 2023 15:20:31 +0100 Subject: [PATCH] feat: Added the first iteration of trend insghts breakdowns (#17891) * Added the first iteration of trend insghts breakdowns * Removed team id from all events filters * Added support for histogram breakdowns * PR fixes * Support group breakdowns * Abstract property chain into a cached prop --- posthog/hogql/functions/mapping.py | 2 +- .../insights/trends/breakdown.py | 135 ++++++++++ .../insights/trends/breakdown_values.py | 120 +++++++++ .../insights/trends/query_builder.py | 232 ++++++++++++++++++ .../insights/trends/series_with_extras.py | 11 + .../{ => trends}/trends_query_runner.py | 217 +++++----------- .../hogql_queries/insights/trends/utils.py | 21 ++ posthog/hogql_queries/query_runner.py | 2 +- 8 files changed, 586 insertions(+), 154 deletions(-) create mode 100644 posthog/hogql_queries/insights/trends/breakdown.py create mode 100644 posthog/hogql_queries/insights/trends/breakdown_values.py create mode 100644 posthog/hogql_queries/insights/trends/query_builder.py create mode 100644 posthog/hogql_queries/insights/trends/series_with_extras.py rename posthog/hogql_queries/insights/{ => trends}/trends_query_runner.py (53%) create mode 100644 posthog/hogql_queries/insights/trends/utils.py diff --git a/posthog/hogql/functions/mapping.py b/posthog/hogql/functions/mapping.py index 3f062914308ea..8da35817dcf56 100644 --- a/posthog/hogql/functions/mapping.py +++ b/posthog/hogql/functions/mapping.py @@ -674,7 +674,7 @@ class HogQLFunctionMeta: "medianBFloat16If": HogQLFunctionMeta("medianBFloat16If", 2, 2, aggregate=True), "quantile": HogQLFunctionMeta("quantile", 1, 1, min_params=1, max_params=1, aggregate=True), "quantileIf": HogQLFunctionMeta("quantileIf", 2, 2, min_params=1, max_params=1, aggregate=True), - "quantiles": HogQLFunctionMeta("quantiles", 1, 1, min_params=1, max_params=1, aggregate=True), + "quantiles": HogQLFunctionMeta("quantiles", 1, None, aggregate=True), "quantilesIf": HogQLFunctionMeta("quantilesIf", 2, 2, min_params=1, max_params=1, aggregate=True), # "quantileExact": HogQLFunctionMeta("quantileExact", 1, 1, aggregate=True), # "quantileExactIf": HogQLFunctionMeta("quantileExactIf", 2, 2, aggregate=True), diff --git a/posthog/hogql_queries/insights/trends/breakdown.py b/posthog/hogql_queries/insights/trends/breakdown.py new file mode 100644 index 0000000000000..d216a928afdd7 --- /dev/null +++ b/posthog/hogql_queries/insights/trends/breakdown.py @@ -0,0 +1,135 @@ +from typing import Dict, List, Tuple +from posthog.hogql import ast +from posthog.hogql_queries.insights.trends.breakdown_values import BreakdownValues +from posthog.hogql_queries.insights.trends.utils import get_properties_chain, series_event_name +from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.models.filters.mixins.utils import cached_property +from posthog.models.team.team import Team +from posthog.schema import ActionsNode, EventsNode, TrendsQuery + + +class Breakdown: + query: TrendsQuery + team: Team + series: EventsNode | ActionsNode + query_date_range: QueryDateRange + + def __init__( + self, team: Team, query: TrendsQuery, series: EventsNode | ActionsNode, query_date_range: QueryDateRange + ): + self.team = team + self.query = query + self.series = series + self.query_date_range = query_date_range + + @cached_property + def enabled(self) -> bool: + return self.query.breakdown is not None and self.query.breakdown.breakdown is not None + + @cached_property + def is_histogram_breakdown(self) -> bool: + return self.enabled and self.query.breakdown.breakdown_histogram_bin_count is not None + + def placeholders(self) -> Dict[str, ast.Expr]: + values = self._breakdown_buckets_ast if self.is_histogram_breakdown else self._breakdown_values_ast + + return {"cross_join_breakdown_values": ast.Alias(alias="breakdown_value", expr=values)} + + def column_expr(self) -> ast.Expr: + if self.is_histogram_breakdown: + return ast.Alias(alias="breakdown_value", expr=self._get_breakdown_histogram_multi_if()) + + return ast.Alias( + alias="breakdown_value", + expr=ast.Field(chain=self._properties_chain), + ) + + def events_where_filter(self) -> ast.Expr: + return ast.CompareOperation( + left=ast.Field(chain=self._properties_chain), + op=ast.CompareOperationOp.In, + right=self._breakdown_values_ast, + ) + + @cached_property + def _breakdown_buckets_ast(self) -> ast.Array: + buckets = self._get_breakdown_histogram_buckets() + values = [f"[{t[0]},{t[1]}]" for t in buckets] + values.append('["",""]') + + return ast.Array(exprs=list(map(lambda v: ast.Constant(value=v), values))) + + @cached_property + def _breakdown_values_ast(self) -> ast.Array: + return ast.Array(exprs=[ast.Constant(value=v) for v in self._get_breakdown_values]) + + @cached_property + def _get_breakdown_values(self) -> ast.Array: + breakdown = BreakdownValues( + team=self.team, + event_name=series_event_name(self.series), + breakdown_field=self.query.breakdown.breakdown, + breakdown_type=self.query.breakdown.breakdown_type, + query_date_range=self.query_date_range, + histogram_bin_count=self.query.breakdown.breakdown_histogram_bin_count, + group_type_index=self.query.breakdown.breakdown_group_type_index, + ) + return breakdown.get_breakdown_values() + + def _get_breakdown_histogram_buckets(self) -> List[Tuple[float, float]]: + buckets = [] + values = self._get_breakdown_values + + if len(values) == 1: + values = [values[0], values[0]] + + for i in range(len(values) - 1): + last_value = i == len(values) - 2 + + # Since we always `floor(x, 2)` the value, we add 0.01 to the last bucket + # to ensure it's always slightly greater than the maximum value + lower_bound = values[i] + upper_bound = values[i + 1] + 0.01 if last_value else values[i + 1] + buckets.append((lower_bound, upper_bound)) + + return buckets + + def _get_breakdown_histogram_multi_if(self) -> ast.Expr: + multi_if_exprs: List[ast.Expr] = [] + + buckets = self._get_breakdown_histogram_buckets() + + for lower_bound, upper_bound in buckets: + + multi_if_exprs.extend( + [ + ast.And( + exprs=[ + ast.CompareOperation( + left=ast.Field(chain=self._properties_chain), + op=ast.CompareOperationOp.GtEq, + right=ast.Constant(value=lower_bound), + ), + ast.CompareOperation( + left=ast.Field(chain=self._properties_chain), + op=ast.CompareOperationOp.Lt, + right=ast.Constant(value=upper_bound), + ), + ] + ), + ast.Constant(value=f"[{lower_bound},{upper_bound}]"), + ] + ) + + # `else` block of the multi-if + multi_if_exprs.append(ast.Constant(value='["",""]')) + + return ast.Call(name="multiIf", args=multi_if_exprs) + + @cached_property + def _properties_chain(self): + return get_properties_chain( + breakdown_type=self.query.breakdown.breakdown_type, + breakdown_field=self.query.breakdown.breakdown, + group_type_index=self.query.breakdown.breakdown_group_type_index, + ) diff --git a/posthog/hogql_queries/insights/trends/breakdown_values.py b/posthog/hogql_queries/insights/trends/breakdown_values.py new file mode 100644 index 0000000000000..fc5244affc24d --- /dev/null +++ b/posthog/hogql_queries/insights/trends/breakdown_values.py @@ -0,0 +1,120 @@ +from typing import List, Optional +from posthog.hogql import ast +from posthog.hogql.parser import parse_expr, parse_select +from posthog.hogql.query import execute_hogql_query +from posthog.hogql_queries.insights.trends.utils import get_properties_chain +from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.models.team.team import Team + + +class BreakdownValues: + team: Team + event_name: str + breakdown_field: str + breakdown_type: str + query_date_range: QueryDateRange + histogram_bin_count: Optional[int] + group_type_index: Optional[int] + + def __init__( + self, + team: Team, + event_name: str, + breakdown_field: str, + query_date_range: QueryDateRange, + breakdown_type: str, + histogram_bin_count: Optional[float] = None, + group_type_index: Optional[float] = None, + ): + self.team = team + self.event_name = event_name + self.breakdown_field = breakdown_field + self.query_date_range = query_date_range + self.breakdown_type = breakdown_type + self.histogram_bin_count = int(histogram_bin_count) if histogram_bin_count is not None else None + self.group_type_index = int(group_type_index) if group_type_index is not None else None + + def get_breakdown_values(self) -> List[str]: + select_field = ast.Alias( + alias="value", + expr=ast.Field( + chain=get_properties_chain( + breakdown_type=self.breakdown_type, + breakdown_field=self.breakdown_field, + group_type_index=self.group_type_index, + ) + ), + ) + + query = parse_select( + """ + SELECT groupArray(value) FROM ( + SELECT + {select_field}, + count(*) as count + FROM + events e + WHERE + {events_where} + GROUP BY + value + ORDER BY + count DESC, + value DESC + ) + """, + placeholders={ + "events_where": self._where_filter(), + "select_field": select_field, + }, + ) + + if self.histogram_bin_count is not None: + expr = self._to_bucketing_expression() + query.select = [expr] + + response = execute_hogql_query( + query_type="TrendsQueryBreakdownValues", + query=query, + team=self.team, + ) + + values = response.results[0][0] + return values + + def _where_filter(self) -> ast.Expr: + filters: List[ast.Expr] = [] + + filters.append(parse_expr("notEmpty(e.person_id)")) + filters.extend( + [ + parse_expr( + "timestamp >= {date_from}", + placeholders=self.query_date_range.to_placeholders(), + ), + parse_expr( + "timestamp <= {date_to}", + placeholders=self.query_date_range.to_placeholders(), + ), + ] + ) + + if self.event_name is not None: + filters.append(parse_expr("event = {event}", placeholders={"event": ast.Constant(value=self.event_name)})) + + return ast.And(exprs=filters) + + def _to_bucketing_expression(self) -> ast.Expr: + assert isinstance(self.histogram_bin_count, int) + + if self.histogram_bin_count <= 1: + qunatile_expression = "quantiles(0,1)(value)" + else: + quantiles = [] + bin_size = 1.0 / self.histogram_bin_count + for i in range(self.histogram_bin_count + 1): + quantiles.append(i * bin_size) + + qunatile_expression = f"quantiles({','.join([f'{quantile:.2f}' for quantile in quantiles])})(value)" + + return parse_expr(f"arrayCompact(arrayMap(x -> floor(x, 2), {qunatile_expression}))") diff --git a/posthog/hogql_queries/insights/trends/query_builder.py b/posthog/hogql_queries/insights/trends/query_builder.py new file mode 100644 index 0000000000000..a7b4f3f401fb5 --- /dev/null +++ b/posthog/hogql_queries/insights/trends/query_builder.py @@ -0,0 +1,232 @@ +from typing import List +from posthog.hogql import ast +from posthog.hogql.parser import parse_expr, parse_select +from posthog.hogql.property import property_to_expr +from posthog.hogql_queries.insights.trends.breakdown import Breakdown +from posthog.hogql_queries.insights.trends.utils import series_event_name +from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.models.filters.mixins.utils import cached_property +from posthog.models.team.team import Team +from posthog.schema import ActionsNode, EventsNode, TrendsQuery + + +class TrendsQueryBuilder: + query: TrendsQuery + team: Team + query_date_range: QueryDateRange + series: EventsNode | ActionsNode + + def __init__( + self, trends_query: TrendsQuery, team: Team, query_date_range: QueryDateRange, series: EventsNode | ActionsNode + ): + self.query = trends_query + self.team = team + self.query_date_range = query_date_range + self.series = series + + def build_query(self) -> ast.SelectUnionQuery: + date_subqueries = self._get_date_subqueries() + event_query = self._get_events_subquery() + + date_events_union = ast.SelectUnionQuery(select_queries=[*date_subqueries, event_query]) + + inner_select = self._inner_select_query(date_events_union) + full_query = self._outer_select_query(inner_select) + + return full_query + + def _get_date_subqueries(self) -> List[ast.SelectQuery]: + if not self._breakdown.enabled: + return [ + parse_select( + """ + SELECT + 0 AS total, + dateTrunc({interval}, {date_to}) - {number_interval_period} AS day_start + FROM + numbers( + coalesce(dateDiff({interval}, {date_from}, {date_to}), 0) + ) + """, + placeholders={ + **self.query_date_range.to_placeholders(), + }, + ), + parse_select( + """ + SELECT + 0 AS total, + {date_from} AS day_start + """, + placeholders={ + **self.query_date_range.to_placeholders(), + }, + ), + ] + + return [ + parse_select( + """ + SELECT + 0 AS total, + ticks.day_start as day_start, + breakdown_value + FROM ( + SELECT + dateTrunc({interval}, {date_to}) - {number_interval_period} AS day_start + FROM + numbers( + coalesce(dateDiff({interval}, {date_from}, {date_to}), 0) + ) + UNION ALL + SELECT {date_from} AS day_start + ) as ticks + CROSS JOIN ( + SELECT breakdown_value + FROM ( + SELECT {cross_join_breakdown_values} + ) + ARRAY JOIN breakdown_value as breakdown_value + ) as sec + ORDER BY breakdown_value, day_start + """, + placeholders={ + **self.query_date_range.to_placeholders(), + **self._breakdown.placeholders(), + }, + ) + ] + + def _get_events_subquery(self) -> ast.SelectQuery: + query = parse_select( + """ + SELECT + {aggregation_operation} AS total, + dateTrunc({interval}, toTimeZone(toDateTime(timestamp), 'UTC')) AS day_start + FROM events AS e + %s + WHERE {events_filter} + GROUP BY day_start + """ + % (self._sample_value()), + placeholders={ + **self.query_date_range.to_placeholders(), + "events_filter": self._events_filter(), + "aggregation_operation": self._aggregation_operation(), + }, + ) + + if self._breakdown.enabled: + query.select.append(self._breakdown.column_expr()) + query.group_by.append(ast.Field(chain=["breakdown_value"])) + + return query + + def _outer_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: + query = parse_select( + """ + SELECT + groupArray(day_start) AS date, + groupArray(count) AS total + FROM {inner_query} + """, + placeholders={"inner_query": inner_query}, + ) + + if self._breakdown.enabled: + query.select.append(ast.Field(chain=["breakdown_value"])) + query.group_by = [ast.Field(chain=["breakdown_value"])] + query.order_by = [ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC")] + + return query + + def _inner_select_query(self, inner_query: ast.SelectUnionQuery) -> ast.SelectQuery: + query = parse_select( + """ + SELECT + sum(total) AS count, + day_start + FROM {inner_query} + GROUP BY day_start + ORDER BY day_start ASC + """, + placeholders={"inner_query": inner_query}, + ) + + if self._breakdown.enabled: + query.select.append(ast.Field(chain=["breakdown_value"])) + query.group_by.append(ast.Field(chain=["breakdown_value"])) + query.order_by.append(ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC")) + + return query + + def _events_filter(self) -> ast.Expr: + series = self.series + filters: List[ast.Expr] = [] + + # Dates + filters.extend( + [ + parse_expr( + "timestamp >= {date_from}", + placeholders=self.query_date_range.to_placeholders(), + ), + parse_expr( + "timestamp <= {date_to}", + placeholders=self.query_date_range.to_placeholders(), + ), + ] + ) + + # Series + if series_event_name(self.series) is not None: + filters.append( + parse_expr( + "event = {event}", placeholders={"event": ast.Constant(value=series_event_name(self.series))} + ) + ) + + # Filter Test Accounts + if ( + self.query.filterTestAccounts + and isinstance(self.team.test_account_filters, list) + and len(self.team.test_account_filters) > 0 + ): + for property in self.team.test_account_filters: + filters.append(property_to_expr(property, self.team)) + + # Properties + if self.query.properties is not None and self.query.properties != []: + filters.append(property_to_expr(self.query.properties, self.team)) + + # Series Filters + if series.properties is not None and series.properties != []: + filters.append(property_to_expr(series.properties, self.team)) + + # Breakdown + if self._breakdown.enabled and not self._breakdown.is_histogram_breakdown: + filters.append(self._breakdown.events_where_filter()) + + if len(filters) == 0: + return ast.Constant(value=True) + elif len(filters) == 1: + return filters[0] + else: + return ast.And(exprs=filters) + + def _aggregation_operation(self) -> ast.Expr: + if self.series.math == "hogql": + return parse_expr(self.series.math_hogql) + + return parse_expr("count(*)") + + # Using string interpolation for SAMPLE due to HogQL limitations with `UNION ALL` and `SAMPLE` AST nodes + def _sample_value(self) -> str: + if self.query.samplingFactor is None: + return "" + + return f"SAMPLE {self.query.samplingFactor}" + + @cached_property + def _breakdown(self): + return Breakdown(team=self.team, query=self.query, series=self.series, query_date_range=self.query_date_range) diff --git a/posthog/hogql_queries/insights/trends/series_with_extras.py b/posthog/hogql_queries/insights/trends/series_with_extras.py new file mode 100644 index 0000000000000..e95035fa907f0 --- /dev/null +++ b/posthog/hogql_queries/insights/trends/series_with_extras.py @@ -0,0 +1,11 @@ +from typing import Optional +from posthog.schema import ActionsNode, EventsNode + + +class SeriesWithExtras: + series: EventsNode | ActionsNode + is_previous_period_series: Optional[bool] + + def __init__(self, series: EventsNode | ActionsNode, is_previous_period_series: Optional[bool]): + self.series = series + self.is_previous_period_series = is_previous_period_series diff --git a/posthog/hogql_queries/insights/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py similarity index 53% rename from posthog/hogql_queries/insights/trends_query_runner.py rename to posthog/hogql_queries/insights/trends/trends_query_runner.py index 219c57e617a6f..4e8f556d1a6df 100644 --- a/posthog/hogql_queries/insights/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -9,28 +9,20 @@ from posthog.caching.utils import is_stale from posthog.hogql import ast -from posthog.hogql.parser import parse_expr, parse_select -from posthog.hogql.property import property_to_expr from posthog.hogql.query import execute_hogql_query from posthog.hogql.timings import HogQLTimings +from posthog.hogql_queries.insights.trends.query_builder import TrendsQueryBuilder +from posthog.hogql_queries.insights.trends.series_with_extras import SeriesWithExtras from posthog.hogql_queries.query_runner import QueryRunner from posthog.hogql_queries.utils.formula_ast import FormulaAST from posthog.hogql_queries.utils.query_date_range import QueryDateRange from posthog.hogql_queries.utils.query_previous_period_date_range import QueryPreviousPeriodDateRange from posthog.models import Team from posthog.models.filters.mixins.utils import cached_property +from posthog.models.property_definition import PropertyDefinition from posthog.schema import ActionsNode, EventsNode, HogQLQueryResponse, TrendsQuery, TrendsQueryResponse -class SeriesWithExtras: - series: EventsNode | ActionsNode - is_previous_period_series: Optional[bool] - - def __init__(self, series: EventsNode | ActionsNode, is_previous_period_series: Optional[bool]): - self.series = series - self.is_previous_period_series = is_previous_period_series - - class TrendsQueryRunner(QueryRunner): query: TrendsQuery query_type = TrendsQuery @@ -40,63 +32,6 @@ def __init__(self, query: TrendsQuery | Dict[str, Any], team: Team, timings: Opt super().__init__(query, team, timings) self.series = self.setup_series() - def to_query(self) -> List[ast.SelectQuery]: - queries = [] - with self.timings.measure("trends_query"): - for series in self.series: - if not series.is_previous_period_series: - date_placeholders = self.query_date_range.to_placeholders() - else: - date_placeholders = self.query_previous_date_range.to_placeholders() - - queries.append( - parse_select( - """ - SELECT - groupArray(day_start) AS date, - groupArray(count) AS total - FROM - ( - SELECT - sum(total) AS count, - day_start - FROM - ( - SELECT - 0 AS total, - dateTrunc({interval}, {date_to}) - {number_interval_period} AS day_start - FROM - numbers( - coalesce(dateDiff({interval}, {date_from}, {date_to}), 0) - ) - UNION ALL - SELECT - 0 AS total, - {date_from} - UNION ALL - SELECT - {aggregation_operation} AS total, - dateTrunc({interval}, toTimeZone(toDateTime(timestamp), 'UTC')) AS date - FROM events AS e - %s - WHERE {events_filter} - GROUP BY date - ) - GROUP BY day_start - ORDER BY day_start ASC - ) - """ - % (self.sample_value()), - placeholders={ - **date_placeholders, - "events_filter": self.events_filter(series), - "aggregation_operation": self.aggregation_operation(series.series), - }, - timings=self.timings, - ) - ) - return queries - def _is_stale(self, cached_result_package): date_to = self.query_date_range.date_to() interval = self.query_date_range.interval_name @@ -119,9 +54,19 @@ def _refresh_frequency(self): return refresh_frequency - def to_persons_query(self) -> str: - # TODO: add support for selecting and filtering by breakdowns - raise NotImplementedError() + def to_query(self) -> List[ast.SelectQuery]: + queries = [] + with self.timings.measure("trends_query"): + for series in self.series: + if not series.is_previous_period_series: + query_date_range = self.query_date_range + else: + query_date_range = self.query_previous_date_range + + query_builder = TrendsQueryBuilder(self.query, self.team, query_date_range, series.series) + queries.append(query_builder.build_query()) + + return queries def calculate(self): queries = self.to_query() @@ -156,10 +101,22 @@ def build_series_response(self, response: HogQLQueryResponse, series: SeriesWith for val in response.results: series_object = { "data": val[1], - "labels": [item.strftime("%-d-%b-%Y") for item in val[0]], # Add back in hour formatting - "days": [item.strftime("%Y-%m-%d") for item in val[0]], # Add back in hour formatting + "labels": [item.strftime("%-d-%b-%Y") for item in val[0]], # TODO: Add back in hour formatting + "days": [item.strftime("%Y-%m-%d") for item in val[0]], # TODO: Add back in hour formatting "count": float(sum(val[1])), "label": "All events" if self.series_event(series.series) is None else self.series_event(series.series), + "action": { # TODO: Populate missing props in `action` + "id": self.series_event(series.series), + "type": "events", + "order": 0, + "name": self.series_event(series.series) or "All events", + "custom_name": None, + "math": series.series.math, + "math_property": None, + "math_hogql": None, + "math_group_type_index": None, + "properties": {}, + }, } # Modifications for when comparing to previous period @@ -173,6 +130,16 @@ def build_series_response(self, response: HogQLQueryResponse, series: SeriesWith series_object["compare_label"] = "previous" if series.is_previous_period_series else "current" series_object["labels"] = labels + # Modifications for when breakdowns are active + if self.query.breakdown is not None and self.query.breakdown.breakdown is not None: + if self._is_breakdown_field_boolean(): + remapped_label = self._convert_boolean(val[2]) + series_object["label"] = "{} - {}".format(series_object["label"], remapped_label) + series_object["breakdown_value"] = remapped_label + else: + series_object["label"] = "{} - {}".format(series_object["label"], val[2]) + series_object["breakdown_value"] = val[2] + res.append(series_object) return res @@ -188,85 +155,6 @@ def query_previous_date_range(self): date_range=self.query.dateRange, team=self.team, interval=self.query.interval, now=datetime.now() ) - def aggregation_operation(self, series: EventsNode | ActionsNode) -> ast.Expr: - if series.math == "hogql": - return parse_expr(series.math_hogql) - - return parse_expr("count(*)") - - def events_filter(self, series_with_extra: SeriesWithExtras) -> ast.Expr: - series = series_with_extra.series - filters: List[ast.Expr] = [] - - # Team ID - filters.append(parse_expr("team_id = {team_id}", placeholders={"team_id": ast.Constant(value=self.team.pk)})) - - if not series_with_extra.is_previous_period_series: - # Dates (current period) - filters.extend( - [ - parse_expr( - "(toTimeZone(timestamp, 'UTC') >= {date_from})", - placeholders=self.query_date_range.to_placeholders(), - ), - parse_expr( - "(toTimeZone(timestamp, 'UTC') <= {date_to})", - placeholders=self.query_date_range.to_placeholders(), - ), - ] - ) - else: - # Date (previous period) - filters.extend( - [ - parse_expr( - "(toTimeZone(timestamp, 'UTC') >= {date_from})", - placeholders=self.query_previous_date_range.to_placeholders(), - ), - parse_expr( - "(toTimeZone(timestamp, 'UTC') <= {date_to})", - placeholders=self.query_previous_date_range.to_placeholders(), - ), - ] - ) - - # Series - if self.series_event(series) is not None: - filters.append( - parse_expr("event = {event}", placeholders={"event": ast.Constant(value=self.series_event(series))}) - ) - - # Filter Test Accounts - if ( - self.query.filterTestAccounts - and isinstance(self.team.test_account_filters, list) - and len(self.team.test_account_filters) > 0 - ): - for property in self.team.test_account_filters: - filters.append(property_to_expr(property, self.team)) - - # Properties - if self.query.properties is not None and self.query.properties != []: - filters.append(property_to_expr(self.query.properties, self.team)) - - # Series Filters - if series.properties is not None and series.properties != []: - filters.append(property_to_expr(series.properties, self.team)) - - if len(filters) == 0: - return ast.Constant(value=True) - elif len(filters) == 1: - return filters[0] - else: - return ast.And(exprs=filters) - - # Using string interpolation for SAMPLE due to HogQL limitations with `UNION ALL` and `SAMPLE` AST nodes - def sample_value(self) -> str: - if self.query.samplingFactor is None: - return "" - - return f"SAMPLE {self.query.samplingFactor}" - def series_event(self, series: EventsNode | ActionsNode) -> str | None: if isinstance(series, EventsNode): return series.event @@ -309,3 +197,28 @@ def apply_formula(self, formula: str, results: List[Dict[str, Any]]) -> List[Dic new_result["label"] = f"Formula ({formula})" return [new_result] + + def _is_breakdown_field_boolean(self): + if self.query.breakdown.breakdown_type == "person": + property_type = PropertyDefinition.Type.PERSON + elif self.query.breakdown.breakdown_type == "group": + property_type = PropertyDefinition.Type.GROUP + else: + property_type = PropertyDefinition.Type.EVENT + + field_type = self._event_property( + self.query.breakdown.breakdown, property_type, self.query.breakdown.breakdown_group_type_index + ) + return field_type == "Boolean" + + def _convert_boolean(self, value: any): + bool_map = {1: "true", 0: "false", "": ""} + return bool_map.get(value) or value + + def _event_property(self, field: str, field_type: PropertyDefinition.Type, group_type_index: Optional[int]): + return PropertyDefinition.objects.get( + name=field, + team=self.team, + type=field_type, + group_type_index=group_type_index if field_type == PropertyDefinition.Type.GROUP else None, + ).property_type diff --git a/posthog/hogql_queries/insights/trends/utils.py b/posthog/hogql_queries/insights/trends/utils.py new file mode 100644 index 0000000000000..cc1e906291c5a --- /dev/null +++ b/posthog/hogql_queries/insights/trends/utils.py @@ -0,0 +1,21 @@ +from typing import List, Optional +from posthog.schema import ActionsNode, EventsNode + + +def series_event_name(series: EventsNode | ActionsNode) -> str | None: + if isinstance(series, EventsNode): + return series.event + return None + + +def get_properties_chain( + breakdown_type: str, breakdown_field: str, group_type_index: Optional[float | int] +) -> List[str]: + if breakdown_type == "person": + return ["person", "properties", breakdown_field] + + if breakdown_type == "group" and group_type_index is not None: + group_type_index_int = int(group_type_index) + return [f"group_{group_type_index_int}", "properties", breakdown_field] + + return ["properties", breakdown_field] diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index 91b9c711c1e27..4677774a65615 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -93,7 +93,7 @@ def get_query_runner( return LifecycleQueryRunner(query=cast(LifecycleQuery | Dict[str, Any], query), team=team, timings=timings) if kind == "TrendsQuery": - from .insights.trends_query_runner import TrendsQueryRunner + from .insights.trends.trends_query_runner import TrendsQueryRunner return TrendsQueryRunner(query=cast(TrendsQuery | Dict[str, Any], query), team=team, timings=timings) if kind == "EventsQuery":