From 7a37a712ffa066ae32d6b0c8cb50ded277524432 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Mon, 30 Oct 2023 15:10:41 +0000 Subject: [PATCH 1/3] Added the final set of aggregation functions --- .../insights/trends/aggregation_operations.py | 178 ++++++++++++++++-- 1 file changed, 165 insertions(+), 13 deletions(-) diff --git a/posthog/hogql_queries/insights/trends/aggregation_operations.py b/posthog/hogql_queries/insights/trends/aggregation_operations.py index 3920344cbfd52..8b7d0cf71afda 100644 --- a/posthog/hogql_queries/insights/trends/aggregation_operations.py +++ b/posthog/hogql_queries/insights/trends/aggregation_operations.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional from posthog.hogql import ast from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql_queries.utils.query_date_range import QueryDateRange @@ -13,13 +13,15 @@ class QueryAlternator: _group_bys: List[ast.Expr] _select_from: ast.JoinExpr | None - def __init__(self, query: ast.SelectQuery): + def __init__(self, query: ast.SelectQuery | ast.SelectUnionQuery): + assert isinstance(query, ast.SelectQuery) + self._query = query self._selects = [] self._group_bys = [] self._select_from = None - def build(self) -> ast.SelectQuery: + def build(self) -> ast.SelectQuery | ast.SelectUnionQuery: if len(self._selects) > 0: self._query.select.extend(self._selects) @@ -48,26 +50,120 @@ class AggregationOperations: series: EventsNode | ActionsNode query_date_range: QueryDateRange - def __init__(self, series: str, query_date_range: QueryDateRange) -> None: + def __init__(self, series: EventsNode | ActionsNode, query_date_range: QueryDateRange) -> None: self.series = series self.query_date_range = query_date_range def select_aggregation(self) -> ast.Expr: - if self.series.math == "hogql": + if self.series.math == "hogql" and self.series.math_hogql is not None: return parse_expr(self.series.math_hogql) elif self.series.math == "total": return parse_expr("count(e.uuid)") elif self.series.math == "dau": return parse_expr("count(DISTINCT e.person_id)") elif self.series.math == "weekly_active": - return ast.Field(chain=["counts"]) + return ast.Field(chain=["counts"]) # This gets replaced when doing query orchestration + elif self.series.math == "monthly_active": + return ast.Field(chain=["counts"]) # This gets replaced when doing query orchestration + elif self.series.math == "unique_session": + return parse_expr('count(DISTINCT e."$session_id")') + elif self.series.math == "unique_group" and self.series.math_group_type_index is not None: + return parse_expr(f'count(DISTINCT e."$group_{self.series.math_group_type_index}")') + elif self.series.math_property is not None: + if self.series.math == "avg": + return self._math_func("avg", None) + elif self.series.math == "sum": + return self._math_func("sum", None) + elif self.series.math == "min": + return self._math_func("min", None) + elif self.series.math == "max": + return self._math_func("max", None) + elif self.series.math == "median": + return self._math_func("median", None) + elif self.series.math == "p90": + return self._math_quantile(0.9, None) + elif self.series.math == "p95": + return self._math_quantile(0.95, None) + elif self.series.math == "p99": + return self._math_quantile(0.99, None) + else: + raise NotImplementedError() return parse_expr("count(e.uuid)") def requires_query_orchestration(self) -> bool: - return self.series.math == "weekly_active" + math_to_return_true = [ + "weekly_active", + "monthly_active", + ] + + return self._is_count_per_actor_variant() or self.series.math in math_to_return_true + + def _is_count_per_actor_variant(self): + return self.series.math in [ + "avg_count_per_actor", + "min_count_per_actor", + "max_count_per_actor", + "median_count_per_actor", + "p90_count_per_actor", + "p95_count_per_actor", + "p99_count_per_actor", + ] + + def _math_func(self, method: str, override_chain: Optional[List[str | int]]) -> ast.Call: + if override_chain is not None: + return ast.Call(name=method, args=[ast.Field(chain=override_chain)]) + + if self.series.math_property == "$time": + return ast.Call( + name=method, + args=[ + ast.Call( + name="toUnixTimestamp", + args=[ast.Field(chain=["properties", "$time"])], + ) + ], + ) + + if self.series.math_property == "$session_duration": + chain = ["session", "duration"] + else: + chain = ["properties", self.series.math_property] + + return ast.Call(name=method, args=[ast.Field(chain=chain)]) + + def _math_quantile(self, percentile: float, override_chain: Optional[List[str | int]]) -> ast.Call: + chain = ["properties", self.series.math_property] + + return ast.Call( + name="quantile", + params=[ast.Constant(value=percentile)], + args=[ast.Field(chain=override_chain or chain)], + ) + + def _interval_placeholders(self): + if self.series.math == "weekly_active": + return { + "exclusive_lookback": ast.Call(name="toIntervalDay", args=[ast.Constant(value=6)]), + "inclusive_lookback": ast.Call(name="toIntervalDay", args=[ast.Constant(value=7)]), + } + elif self.series.math == "monthly_active": + return { + "exclusive_lookback": ast.Call(name="toIntervalDay", args=[ast.Constant(value=29)]), + "inclusive_lookback": ast.Call(name="toIntervalDay", args=[ast.Constant(value=30)]), + } + + raise NotImplementedError() + + def _parent_select_query( + self, inner_query: ast.SelectQuery | ast.SelectUnionQuery + ) -> ast.SelectQuery | ast.SelectUnionQuery: + if self._is_count_per_actor_variant(): + return parse_select( + "SELECT total, day_start FROM {inner_query}", + placeholders={"inner_query": inner_query}, + ) - def _parent_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: return parse_select( """ SELECT @@ -82,7 +178,42 @@ def _parent_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: }, ) - def _inner_select_query(self, cross_join_select_query: ast.SelectQuery) -> ast.SelectQuery: + def _inner_select_query( + self, cross_join_select_query: ast.SelectQuery | ast.SelectUnionQuery + ) -> ast.SelectQuery | ast.SelectUnionQuery: + if self._is_count_per_actor_variant(): + if self.series.math == "avg_count_per_actor": + math_func = self._math_func("avg", ["total"]) + elif self.series.math == "min_count_per_actor": + math_func = self._math_func("min", ["total"]) + elif self.series.math == "max_count_per_actor": + math_func = self._math_func("max", ["total"]) + elif self.series.math == "median_count_per_actor": + math_func = self._math_func("median", ["total"]) + elif self.series.math == "p90_count_per_actor": + math_func = self._math_quantile(0.9, ["total"]) + elif self.series.math == "p95_count_per_actor": + math_func = self._math_quantile(0.95, ["total"]) + elif self.series.math == "p99_count_per_actor": + math_func = self._math_quantile(0.99, ["total"]) + else: + raise NotImplementedError() + + total_alias = ast.Alias(alias="total", expr=math_func) + + return parse_select( + """ + SELECT + {total_alias}, day_start + FROM {inner_query} + GROUP BY day_start + """, + placeholders={ + "inner_query": cross_join_select_query, + "total_alias": total_alias, + }, + ) + return parse_select( """ SELECT @@ -92,22 +223,43 @@ def _inner_select_query(self, cross_join_select_query: ast.SelectQuery) -> ast.S SELECT toStartOfDay({date_to}) - toIntervalDay(number) AS timestamp FROM - numbers(dateDiff('day', toStartOfDay({date_from} - INTERVAL 7 DAY), {date_to})) + numbers(dateDiff('day', toStartOfDay({date_from} - {inclusive_lookback}), {date_to})) ) d CROSS JOIN {cross_join_select_query} e WHERE e.timestamp <= d.timestamp + INTERVAL 1 DAY AND - e.timestamp > d.timestamp - INTERVAL 6 DAY + e.timestamp > d.timestamp - {exclusive_lookback} GROUP BY d.timestamp ORDER BY d.timestamp """, placeholders={ **self.query_date_range.to_placeholders(), + **self._interval_placeholders(), "cross_join_select_query": cross_join_select_query, }, ) - def _events_query(self, events_where_clause: ast.Expr, sample_value: ast.RatioExpr) -> ast.SelectQuery: + def _events_query( + self, events_where_clause: ast.Expr, sample_value: ast.RatioExpr + ) -> ast.SelectQuery | ast.SelectUnionQuery: + if self._is_count_per_actor_variant(): + return parse_select( + """ + SELECT + count(e.uuid) AS total, + dateTrunc({interval}, timestamp) AS day_start + FROM events AS e + SAMPLE {sample} + WHERE {events_where_clause} + GROUP BY e.person_id, day_start + """, + placeholders={ + **self.query_date_range.to_placeholders(), + "events_where_clause": events_where_clause, + "sample": sample_value, + }, + ) + return parse_select( """ SELECT @@ -127,7 +279,7 @@ def _events_query(self, events_where_clause: ast.Expr, sample_value: ast.RatioEx }, ) - def get_query_orchestrator(self, events_where_clause: ast.Expr, sample_value: str): + def get_query_orchestrator(self, events_where_clause: ast.Expr, sample_value: ast.RatioExpr): events_query = self._events_query(events_where_clause, sample_value) inner_select = self._inner_select_query(events_query) parent_select = self._parent_select_query(inner_select) From 3bf712dee14ceb2bf2cafb4cfbd2d2c3ef5f80e2 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Mon, 30 Oct 2023 16:14:58 +0000 Subject: [PATCH 2/3] Added the persons query to trends query --- posthog/hogql/printer.py | 6 +- .../insights/insight_persons_query_runner.py | 8 +- .../insights/trends/aggregation_operations.py | 12 +-- .../insights/trends/query_builder.py | 98 ++++++++++++------- .../insights/trends/trends_query_runner.py | 22 ++++- posthog/hogql_queries/query_runner.py | 2 +- 6 files changed, 102 insertions(+), 46 deletions(-) diff --git a/posthog/hogql/printer.py b/posthog/hogql/printer.py index fa55d34e586a8..3bb0139b4b6f1 100644 --- a/posthog/hogql/printer.py +++ b/posthog/hogql/printer.py @@ -854,7 +854,11 @@ def visit_field_type(self, type: ast.FieldType): return field_sql field_sql = f"{self.visit(type.table_type)}.{field_sql}" - elif isinstance(type.table_type, ast.SelectQueryType) or isinstance(type.table_type, ast.SelectQueryAliasType): + elif ( + isinstance(type.table_type, ast.SelectQueryType) + or isinstance(type.table_type, ast.SelectQueryAliasType) + or isinstance(type.table_type, ast.SelectUnionQueryType) + ): field_sql = self._print_identifier(type.name) if isinstance(type.table_type, ast.SelectQueryAliasType): field_sql = f"{self.visit(type.table_type)}.{field_sql}" diff --git a/posthog/hogql_queries/insights/insight_persons_query_runner.py b/posthog/hogql_queries/insights/insight_persons_query_runner.py index 9f03fa061d891..a6bc08c0d0849 100644 --- a/posthog/hogql_queries/insights/insight_persons_query_runner.py +++ b/posthog/hogql_queries/insights/insight_persons_query_runner.py @@ -5,6 +5,7 @@ from posthog.hogql.query import execute_hogql_query from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.insights.lifecycle_query_runner import LifecycleQueryRunner +from posthog.hogql_queries.insights.trends.trends_query_runner import TrendsQueryRunner from posthog.hogql_queries.query_runner import QueryRunner, get_query_runner from posthog.models import Team from posthog.models.filters.mixins.utils import cached_property @@ -32,16 +33,19 @@ def __init__( def source_runner(self) -> QueryRunner: return get_query_runner(self.query.source, self.team, self.timings, self.in_export_context) - def to_query(self) -> ast.SelectQuery: + def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: if isinstance(self.source_runner, LifecycleQueryRunner): lifecycle_runner = cast(LifecycleQueryRunner, self.source_runner) day = self.query.day status = self.query.status return lifecycle_runner.to_persons_query(day=day, status=status) + elif isinstance(self.source_runner, TrendsQueryRunner): + trends_runner = cast(TrendsQueryRunner, self.source_runner) + return trends_runner.to_persons_query() raise ValueError(f"Cannot convert source query of type {self.query.source.kind} to persons query") - def to_persons_query(self) -> ast.SelectQuery: + def to_persons_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: return self.to_query() def calculate(self) -> HogQLQueryResponse: diff --git a/posthog/hogql_queries/insights/trends/aggregation_operations.py b/posthog/hogql_queries/insights/trends/aggregation_operations.py index 8b7d0cf71afda..31432a76a4da2 100644 --- a/posthog/hogql_queries/insights/trends/aggregation_operations.py +++ b/posthog/hogql_queries/insights/trends/aggregation_operations.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import List, Optional, cast from posthog.hogql import ast from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql_queries.utils.query_date_range import QueryDateRange @@ -13,7 +13,7 @@ class QueryAlternator: _group_bys: List[ast.Expr] _select_from: ast.JoinExpr | None - def __init__(self, query: ast.SelectQuery | ast.SelectUnionQuery): + def __init__(self, query: ast.SelectQuery): assert isinstance(query, ast.SelectQuery) self._query = query @@ -21,7 +21,7 @@ def __init__(self, query: ast.SelectQuery | ast.SelectUnionQuery): self._group_bys = [] self._select_from = None - def build(self) -> ast.SelectQuery | ast.SelectUnionQuery: + def build(self) -> ast.SelectQuery: if len(self._selects) > 0: self._query.select.extend(self._selects) @@ -280,9 +280,9 @@ def _events_query( ) def get_query_orchestrator(self, events_where_clause: ast.Expr, sample_value: ast.RatioExpr): - events_query = self._events_query(events_where_clause, sample_value) - inner_select = self._inner_select_query(events_query) - parent_select = self._parent_select_query(inner_select) + events_query = cast(ast.SelectQuery, self._events_query(events_where_clause, sample_value)) + inner_select = cast(ast.SelectQuery, self._inner_select_query(events_query)) + parent_select = cast(ast.SelectQuery, self._parent_select_query(inner_select)) class QueryOrchestrator: events_query_builder: QueryAlternator diff --git a/posthog/hogql_queries/insights/trends/query_builder.py b/posthog/hogql_queries/insights/trends/query_builder.py index 6c4ba3aa849ba..859f6a2f5e691 100644 --- a/posthog/hogql_queries/insights/trends/query_builder.py +++ b/posthog/hogql_queries/insights/trends/query_builder.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional, cast from posthog.hogql import ast from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql.property import property_to_expr @@ -35,9 +35,9 @@ def __init__( self.series = series self.timings = timings - def build_query(self) -> ast.SelectUnionQuery: + def build_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: date_subqueries = self._get_date_subqueries() - event_query = self._get_events_subquery() + event_query = self._get_events_subquery(False) date_events_union = ast.SelectUnionQuery(select_queries=[*date_subqueries, event_query]) @@ -46,11 +46,21 @@ def build_query(self) -> ast.SelectUnionQuery: return full_query + def build_persons_query(self) -> ast.SelectQuery: + event_query = self._get_events_subquery(True) + + event_query.select = [ast.Alias(alias="person_id", expr=ast.Field(chain=["e", "person_id"]))] + event_query.group_by = None + + return event_query + def _get_date_subqueries(self) -> List[ast.SelectQuery]: if not self._breakdown.enabled: return [ - parse_select( - """ + cast( + ast.SelectQuery, + parse_select( + """ SELECT 0 AS total, dateTrunc({interval}, {date_to}) - {number_interval_period} AS day_start @@ -59,25 +69,31 @@ def _get_date_subqueries(self) -> List[ast.SelectQuery]: coalesce(dateDiff({interval}, {date_from}, {date_to}), 0) ) """, - placeholders={ - **self.query_date_range.to_placeholders(), - }, + placeholders={ + **self.query_date_range.to_placeholders(), + }, + ), ), - parse_select( - """ + cast( + ast.SelectQuery, + parse_select( + """ SELECT 0 AS total, {date_from} AS day_start """, - placeholders={ - **self.query_date_range.to_placeholders(), - }, + placeholders={ + **self.query_date_range.to_placeholders(), + }, + ), ), ] return [ - parse_select( - """ + cast( + ast.SelectQuery, + parse_select( + """ SELECT 0 AS total, ticks.day_start as day_start, @@ -101,16 +117,19 @@ def _get_date_subqueries(self) -> List[ast.SelectQuery]: ) as sec ORDER BY breakdown_value, day_start """, - placeholders={ - **self.query_date_range.to_placeholders(), - **self._breakdown.placeholders(), - }, + placeholders={ + **self.query_date_range.to_placeholders(), + **self._breakdown.placeholders(), + }, + ), ) ] - def _get_events_subquery(self) -> ast.SelectQuery: - default_query = parse_select( - """ + def _get_events_subquery(self, no_modifications: Optional[bool]) -> ast.SelectQuery: + default_query = cast( + ast.SelectQuery, + parse_select( + """ SELECT {aggregation_operation} AS total, dateTrunc({interval}, timestamp) AS day_start @@ -119,16 +138,19 @@ def _get_events_subquery(self) -> ast.SelectQuery: WHERE {events_filter} GROUP BY day_start """, - placeholders={ - **self.query_date_range.to_placeholders(), - "events_filter": self._events_filter(), - "aggregation_operation": self._aggregation_operation.select_aggregation(), - "sample": self._sample_value(), - }, + placeholders={ + **self.query_date_range.to_placeholders(), + "events_filter": self._events_filter(), + "aggregation_operation": self._aggregation_operation.select_aggregation(), + "sample": self._sample_value(), + }, + ), ) # No breakdowns and no complex series aggregation - if not self._breakdown.enabled and not self._aggregation_operation.requires_query_orchestration(): + if ( + not self._breakdown.enabled and not self._aggregation_operation.requires_query_orchestration() + ) or no_modifications is True: return default_query # Both breakdowns and complex series aggregation elif self._breakdown.enabled and self._aggregation_operation.requires_query_orchestration(): @@ -161,14 +183,17 @@ def _get_events_subquery(self) -> ast.SelectQuery: return default_query def _outer_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: - query = parse_select( - """ + query = cast( + ast.SelectQuery, + parse_select( + """ SELECT groupArray(day_start) AS date, groupArray(count) AS total FROM {inner_query} """, - placeholders={"inner_query": inner_query}, + placeholders={"inner_query": inner_query}, + ), ) if self._breakdown.enabled: @@ -179,8 +204,10 @@ def _outer_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: return query def _inner_select_query(self, inner_query: ast.SelectUnionQuery) -> ast.SelectQuery: - query = parse_select( - """ + query = cast( + ast.SelectQuery, + parse_select( + """ SELECT sum(total) AS count, day_start @@ -188,7 +215,8 @@ def _inner_select_query(self, inner_query: ast.SelectUnionQuery) -> ast.SelectQu GROUP BY day_start ORDER BY day_start ASC """, - placeholders={"inner_query": inner_query}, + placeholders={"inner_query": inner_query}, + ), ) if self._breakdown.enabled: diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index cfbcb60fdf28e..02bae1e35da6f 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -46,7 +46,7 @@ def __init__( query: TrendsQuery | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None, - in_export_context: Optional[int] = None, + in_export_context: Optional[bool] = None, ): super().__init__(query, team, timings, in_export_context) self.series = self.setup_series() @@ -93,6 +93,26 @@ def to_query(self) -> List[ast.SelectQuery]: return queries + def to_persons_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: + queries = [] + with self.timings.measure("trends_persons_query"): + for series in self.series: + if not series.is_previous_period_series: + query_date_range = self.query_date_range + else: + query_date_range = self.query_previous_date_range + + query_builder = TrendsQueryBuilder( + trends_query=series.overriden_query or self.query, + team=self.team, + query_date_range=query_date_range, + series=series.series, + timings=self.timings, + ) + queries.append(query_builder.build_persons_query()) + + return ast.SelectUnionQuery(select_queries=queries) + def calculate(self): queries = self.to_query() diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index 8f0656e7922bc..13c59c6d51c88 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -232,7 +232,7 @@ def run(self, refresh_requested: Optional[bool] = None) -> CachedQueryResponse: def to_query(self) -> ast.SelectQuery: raise NotImplementedError() - def to_persons_query(self) -> ast.SelectQuery: + def to_persons_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: # TODO: add support for selecting and filtering by breakdowns raise NotImplementedError() From a4e91fecc91b0540056105302adb194c7844f749 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Tue, 31 Oct 2023 16:48:20 +0000 Subject: [PATCH 3/3] Added the ability to view all chart types on trends --- .../hogql_queries/insights/trends/display.py | 68 ++++++++++ .../insights/trends/query_builder.py | 21 ++- .../insights/trends/series_with_extras.py | 3 + .../trends/test/test_query_builder.py | 122 ++++++++++++++++++ .../insights/trends/trends_query_runner.py | 113 ++++++++++++---- 5 files changed, 299 insertions(+), 28 deletions(-) create mode 100644 posthog/hogql_queries/insights/trends/display.py create mode 100644 posthog/hogql_queries/insights/trends/test/test_query_builder.py diff --git a/posthog/hogql_queries/insights/trends/display.py b/posthog/hogql_queries/insights/trends/display.py new file mode 100644 index 0000000000000..db0fa29e0045e --- /dev/null +++ b/posthog/hogql_queries/insights/trends/display.py @@ -0,0 +1,68 @@ +from posthog.hogql import ast +from posthog.schema import ChartDisplayType + + +class TrendsDisplay: + display_type: ChartDisplayType + + def __init__(self, display_type: ChartDisplayType) -> None: + self.display_type = display_type + + def should_aggregate_values(self) -> bool: + return ( + self.display_type == ChartDisplayType.BoldNumber + or self.display_type == ChartDisplayType.ActionsPie + or self.display_type == ChartDisplayType.ActionsBarValue + or self.display_type == ChartDisplayType.WorldMap + ) + + def wrap_inner_query(self, inner_query: ast.SelectQuery, breakdown_enabled: bool) -> ast.SelectQuery: + if self.display_type == ChartDisplayType.ActionsLineGraphCumulative: + return self._get_cumulative_query(inner_query, breakdown_enabled) + + return inner_query + + def should_wrap_inner_query(self) -> bool: + return self.display_type == ChartDisplayType.ActionsLineGraphCumulative + + def modify_outer_query(self, outer_query: ast.SelectQuery, inner_query: ast.SelectQuery) -> ast.SelectQuery: + if ( + self.display_type == ChartDisplayType.BoldNumber + or self.display_type == ChartDisplayType.ActionsPie + or self.display_type == ChartDisplayType.WorldMap + ): + return ast.SelectQuery( + select=[ + ast.Alias( + alias="total", + expr=ast.Call(name="sum", args=[ast.Field(chain=["count"])]), + ) + ], + select_from=ast.JoinExpr(table=inner_query), + ) + + return outer_query + + def _get_cumulative_query(self, inner_query: ast.SelectQuery, breakdown_enabled: bool) -> ast.SelectQuery: + if breakdown_enabled: + window_expr = ast.WindowExpr( + order_by=[ast.OrderExpr(expr=ast.Field(chain=["day_start"]), order="ASC")], + partition_by=[ast.Field(chain=["breakdown_value"])], + ) + else: + window_expr = ast.WindowExpr(order_by=[ast.OrderExpr(expr=ast.Field(chain=["day_start"]), order="ASC")]) + + return ast.SelectQuery( + select=[ + ast.Field(chain=["day_start"]), + ast.Alias( + alias="count", + expr=ast.WindowFunction( + name="sum", + args=[ast.Field(chain=["count"])], + over_expr=window_expr, + ), + ), + ], + select_from=ast.JoinExpr(table=inner_query), + ) diff --git a/posthog/hogql_queries/insights/trends/query_builder.py b/posthog/hogql_queries/insights/trends/query_builder.py index 859f6a2f5e691..ddf873f10a0da 100644 --- a/posthog/hogql_queries/insights/trends/query_builder.py +++ b/posthog/hogql_queries/insights/trends/query_builder.py @@ -7,11 +7,12 @@ AggregationOperations, ) from posthog.hogql_queries.insights.trends.breakdown import Breakdown +from posthog.hogql_queries.insights.trends.display import TrendsDisplay from posthog.hogql_queries.insights.trends.utils import series_event_name from posthog.hogql_queries.utils.query_date_range import QueryDateRange from posthog.models.filters.mixins.utils import cached_property from posthog.models.team.team import Team -from posthog.schema import ActionsNode, EventsNode, TrendsQuery +from posthog.schema import ActionsNode, ChartDisplayType, EventsNode, TrendsQuery class TrendsQueryBuilder: @@ -196,6 +197,8 @@ def _outer_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: ), ) + query = self._trends_display.modify_outer_query(outer_query=query, inner_query=inner_query) + if self._breakdown.enabled: query.select.append(ast.Field(chain=["breakdown_value"])) query.group_by = [ast.Field(chain=["breakdown_value"])] @@ -224,6 +227,11 @@ def _inner_select_query(self, inner_query: ast.SelectUnionQuery) -> ast.SelectQu query.group_by.append(ast.Field(chain=["breakdown_value"])) query.order_by.append(ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC")) + if self._trends_display.should_wrap_inner_query(): + query = self._trends_display.wrap_inner_query(query, self._breakdown.enabled) + if self._breakdown.enabled: + query.select.append(ast.Field(chain=["breakdown_value"])) + return query def _events_filter(self) -> ast.Expr: @@ -298,5 +306,14 @@ def _breakdown(self): ) @cached_property - def _aggregation_operation(self): + def _aggregation_operation(self) -> AggregationOperations: return AggregationOperations(self.series, self.query_date_range) + + @cached_property + def _trends_display(self) -> TrendsDisplay: + if self.query.trendsFilter is None or self.query.trendsFilter.display is None: + display = ChartDisplayType.ActionsLineGraph + else: + display = self.query.trendsFilter.display + + return TrendsDisplay(display) diff --git a/posthog/hogql_queries/insights/trends/series_with_extras.py b/posthog/hogql_queries/insights/trends/series_with_extras.py index df8ff57fb0e7d..fb63a205f33d0 100644 --- a/posthog/hogql_queries/insights/trends/series_with_extras.py +++ b/posthog/hogql_queries/insights/trends/series_with_extras.py @@ -6,13 +6,16 @@ class SeriesWithExtras: series: EventsNode | ActionsNode is_previous_period_series: Optional[bool] overriden_query: Optional[TrendsQuery] + aggregate_values: Optional[bool] def __init__( self, series: EventsNode | ActionsNode, is_previous_period_series: Optional[bool], overriden_query: Optional[TrendsQuery], + aggregate_values: Optional[bool], ): self.series = series self.is_previous_period_series = is_previous_period_series self.overriden_query = overriden_query + self.aggregate_values = aggregate_values diff --git a/posthog/hogql_queries/insights/trends/test/test_query_builder.py b/posthog/hogql_queries/insights/trends/test/test_query_builder.py new file mode 100644 index 0000000000000..ce65a1605123c --- /dev/null +++ b/posthog/hogql_queries/insights/trends/test/test_query_builder.py @@ -0,0 +1,122 @@ +from datetime import datetime +from freezegun import freeze_time + +from posthog.hogql.query import execute_hogql_query +from posthog.hogql.timings import HogQLTimings +from posthog.hogql_queries.insights.trends.query_builder import TrendsQueryBuilder +from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.schema import ( + BaseMathType, + BreakdownFilter, + BreakdownType, + ChartDisplayType, + DateRange, + EventsNode, + HogQLQueryResponse, + TrendsFilter, + TrendsQuery, +) +from posthog.test.base import BaseTest, _create_event, _create_person + + +class TestQueryBuilder(BaseTest): + def setUp(self): + super().setUp() + + with freeze_time("2023-02-01"): + _create_person( + distinct_ids=["some_id"], + team_id=self.team.pk, + properties={"$some_prop": "something", "$another_prop": "something"}, + ) + _create_event( + event="$pageview", + team=self.team, + distinct_id="some_id", + properties={"$geoip_country_code": "AU"}, + ) + + def get_response(self, trends_query: TrendsQuery) -> HogQLQueryResponse: + query_date_range = QueryDateRange( + date_range=trends_query.dateRange, + team=self.team, + interval=trends_query.interval, + now=datetime.now(), + ) + + timings = HogQLTimings() + + query_builder = TrendsQueryBuilder( + trends_query=trends_query, + team=self.team, + query_date_range=query_date_range, + series=trends_query.series[0], + timings=timings, + ) + + query = query_builder.build_query() + + return execute_hogql_query( + query_type="TrendsQuery", + query=query, + team=self.team, + timings=timings, + ) + + def test_column_names(self): + trends_query = TrendsQuery( + kind="TrendsQuery", + dateRange=DateRange(date_from="2023-01-01"), + series=[EventsNode(event="$pageview", math=BaseMathType.total)], + ) + + response = self.get_response(trends_query) + + assert response.columns is not None + assert set(response.columns).issubset({"date", "total", "breakdown_value"}) + + def assert_column_names_with_display_type(self, display_type: ChartDisplayType): + trends_query = TrendsQuery( + kind="TrendsQuery", + dateRange=DateRange(date_from="2023-01-01"), + series=[EventsNode(event="$pageview")], + trendsFilter=TrendsFilter(display=display_type), + ) + + response = self.get_response(trends_query) + + assert response.columns is not None + assert set(response.columns).issubset({"date", "total", "breakdown_value"}) + + def assert_column_names_with_display_type_and_breakdowns(self, display_type: ChartDisplayType): + trends_query = TrendsQuery( + kind="TrendsQuery", + dateRange=DateRange(date_from="2023-01-01"), + series=[EventsNode(event="$pageview")], + trendsFilter=TrendsFilter(display=display_type), + breakdown=BreakdownFilter(breakdown="$geoip_country_code", breakdown_type=BreakdownType.event), + ) + + response = self.get_response(trends_query) + + assert response.columns is not None + assert set(response.columns).issubset({"date", "total", "breakdown_value"}) + + def test_column_names_with_display_type(self): + self.assert_column_names_with_display_type(ChartDisplayType.ActionsAreaGraph) + self.assert_column_names_with_display_type(ChartDisplayType.ActionsBar) + self.assert_column_names_with_display_type(ChartDisplayType.ActionsBarValue) + self.assert_column_names_with_display_type(ChartDisplayType.ActionsLineGraph) + self.assert_column_names_with_display_type(ChartDisplayType.ActionsPie) + self.assert_column_names_with_display_type(ChartDisplayType.BoldNumber) + self.assert_column_names_with_display_type(ChartDisplayType.WorldMap) + self.assert_column_names_with_display_type(ChartDisplayType.ActionsLineGraphCumulative) + + def test_column_names_with_display_type_and_breakdowns(self): + self.assert_column_names_with_display_type_and_breakdowns(ChartDisplayType.ActionsAreaGraph) + self.assert_column_names_with_display_type_and_breakdowns(ChartDisplayType.ActionsBar) + self.assert_column_names_with_display_type_and_breakdowns(ChartDisplayType.ActionsBarValue) + self.assert_column_names_with_display_type_and_breakdowns(ChartDisplayType.ActionsLineGraph) + self.assert_column_names_with_display_type_and_breakdowns(ChartDisplayType.ActionsPie) + self.assert_column_names_with_display_type_and_breakdowns(ChartDisplayType.WorldMap) + self.assert_column_names_with_display_type_and_breakdowns(ChartDisplayType.ActionsLineGraphCumulative) diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 02bae1e35da6f..067b06e3f0565 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -15,6 +15,7 @@ from posthog.hogql import ast from posthog.hogql.query import execute_hogql_query from posthog.hogql.timings import HogQLTimings +from posthog.hogql_queries.insights.trends.display import TrendsDisplay from posthog.hogql_queries.insights.trends.query_builder import TrendsQueryBuilder from posthog.hogql_queries.insights.trends.series_with_extras import SeriesWithExtras from posthog.hogql_queries.query_runner import QueryRunner @@ -29,6 +30,7 @@ from posthog.models.property_definition import PropertyDefinition from posthog.schema import ( ActionsNode, + ChartDisplayType, EventsNode, HogQLQueryResponse, TrendsQuery, @@ -146,28 +148,67 @@ def build_series_response(self, response: HogQLQueryResponse, series: SeriesWith if response.results is None: return [] + def get_value(name: str, val: Any): + if name not in ["date", "total", "breakdown_value"]: + raise Exception("Column not found in hogql results") + if response.columns is None: + raise Exception("No columns returned from hogql results") + + index = response.columns.index(name) + return val[index] + res = [] for val in response.results: - series_object = { - "data": val[1], - "labels": [item.strftime("%-d-%b-%Y") for item in val[0]], # TODO: Add back in hour formatting - "days": [item.strftime("%Y-%m-%d") for item in val[0]], # TODO: Add back in hour formatting - "count": float(sum(val[1])), - "label": "All events" if self.series_event(series.series) is None else self.series_event(series.series), - "filter": self._query_to_filter(), - "action": { # TODO: Populate missing props in `action` - "id": self.series_event(series.series), - "type": "events", - "order": 0, - "name": self.series_event(series.series) or "All events", - "custom_name": None, - "math": series.series.math, - "math_property": None, - "math_hogql": None, - "math_group_type_index": None, - "properties": {}, - }, - } + if series.aggregate_values: + series_object = { + "data": [], + "days": [], + "count": 0, + "aggregated_value": get_value("total", val), + "label": "All events" + if self.series_event(series.series) is None + else self.series_event(series.series), + "filter": self._query_to_filter(), + "action": { # TODO: Populate missing props in `action` + "id": self.series_event(series.series), + "type": "events", + "order": 0, + "name": self.series_event(series.series) or "All events", + "custom_name": None, + "math": series.series.math, + "math_property": None, + "math_hogql": None, + "math_group_type_index": None, + "properties": {}, + }, + } + else: + series_object = { + "data": get_value("total", val), + "labels": [ + item.strftime("%-d-%b-%Y") for item in get_value("date", val) + ], # TODO: Add back in hour formatting + "days": [ + item.strftime("%Y-%m-%d") for item in get_value("date", val) + ], # TODO: Add back in hour formatting + "count": float(sum(get_value("total", val))), + "label": "All events" + if self.series_event(series.series) is None + else self.series_event(series.series), + "filter": self._query_to_filter(), + "action": { # TODO: Populate missing props in `action` + "id": self.series_event(series.series), + "type": "events", + "order": 0, + "name": self.series_event(series.series) or "All events", + "custom_name": None, + "math": series.series.math, + "math_property": None, + "math_hogql": None, + "math_group_type_index": None, + "properties": {}, + }, + } # Modifications for when comparing to previous period if self.query.trendsFilter is not None and self.query.trendsFilter.compare: @@ -186,18 +227,18 @@ def build_series_response(self, response: HogQLQueryResponse, series: SeriesWith # Modifications for when breakdowns are active if self.query.breakdown is not None and self.query.breakdown.breakdown is not None: if self._is_breakdown_field_boolean(): - remapped_label = self._convert_boolean(val[2]) + remapped_label = self._convert_boolean(get_value("breakdown_value", val)) series_object["label"] = "{} - {}".format(series_object["label"], remapped_label) series_object["breakdown_value"] = remapped_label elif self.query.breakdown.breakdown_type == "cohort": - cohort_id = val[2] + cohort_id = get_value("breakdown_value", val) cohort_name = Cohort.objects.get(pk=cohort_id).name series_object["label"] = "{} - {}".format(series_object["label"], cohort_name) - series_object["breakdown_value"] = val[2] + series_object["breakdown_value"] = get_value("breakdown_value", val) else: - series_object["label"] = "{} - {}".format(series_object["label"], val[2]) - series_object["breakdown_value"] = val[2] + series_object["label"] = "{} - {}".format(series_object["label"], get_value("breakdown_value", val)) + series_object["breakdown_value"] = get_value("breakdown_value", val) res.append(series_object) return res @@ -226,7 +267,15 @@ def series_event(self, series: EventsNode | ActionsNode) -> str | None: return None def setup_series(self) -> List[SeriesWithExtras]: - series_with_extras = [SeriesWithExtras(series, None, None) for series in self.query.series] + series_with_extras = [ + SeriesWithExtras( + series, + None, + None, + self._trends_display.should_aggregate_values(), + ) + for series in self.query.series + ] if self.query.breakdown is not None and self.query.breakdown.breakdown_type == "cohort": updated_series = [] @@ -240,6 +289,7 @@ def setup_series(self) -> List[SeriesWithExtras]: series=series.series, is_previous_period_series=series.is_previous_period_series, overriden_query=copied_query, + aggregate_values=self._trends_display.should_aggregate_values(), ) ) series_with_extras = updated_series @@ -252,6 +302,7 @@ def setup_series(self) -> List[SeriesWithExtras]: series=series.series, is_previous_period_series=False, overriden_query=series.overriden_query, + aggregate_values=self._trends_display.should_aggregate_values(), ) ) updated_series.append( @@ -259,6 +310,7 @@ def setup_series(self) -> List[SeriesWithExtras]: series=series.series, is_previous_period_series=True, overriden_query=series.overriden_query, + aggregate_values=self._trends_display.should_aggregate_values(), ) ) series_with_extras = updated_series @@ -352,3 +404,12 @@ def _query_to_filter(self) -> Dict[str, any]: filter_dict.update(**self.query.breakdown.__dict__) return {k: v for k, v in filter_dict.items() if v is not None} + + @cached_property + def _trends_display(self) -> TrendsDisplay: + if self.query.trendsFilter is None or self.query.trendsFilter.display is None: + display = ChartDisplayType.ActionsLineGraph + else: + display = self.query.trendsFilter.display + + return TrendsDisplay(display)