From b75810d83b7dd176d8a9de63faf3ff7434249038 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Wed, 18 Oct 2023 14:13:11 +0100 Subject: [PATCH] feat: added session duration as a breakdown option (#18056) * Added session duration as a breakdown option * Update query snapshots * Fixed tests --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- posthog/hogql/database/schema/events.py | 1 + .../test/__snapshots__/test_database.ambr | 8 +++ posthog/hogql/test/test_resolver.py | 8 +++ .../insights/trends/breakdown.py | 12 +++++ .../insights/trends/breakdown_session.py | 52 +++++++++++++++++++ .../insights/trends/breakdown_values.py | 51 ++++++++++++------ .../insights/trends/query_builder.py | 18 ++++++- .../insights/trends/trends_query_runner.py | 12 +++-- 8 files changed, 140 insertions(+), 22 deletions(-) create mode 100644 posthog/hogql_queries/insights/trends/breakdown_session.py diff --git a/posthog/hogql/database/schema/events.py b/posthog/hogql/database/schema/events.py index 9934511ef59445..ba27ff7c5e1584 100644 --- a/posthog/hogql/database/schema/events.py +++ b/posthog/hogql/database/schema/events.py @@ -63,6 +63,7 @@ class EventsTable(Table): "distinct_id": StringDatabaseField(name="distinct_id"), "elements_chain": StringDatabaseField(name="elements_chain"), "created_at": DateTimeDatabaseField(name="created_at"), + "$session_id": StringDatabaseField(name="$session_id"), # Lazy table that adds a join to the persons table "pdi": LazyJoin( from_field="distinct_id", diff --git a/posthog/hogql/database/test/__snapshots__/test_database.ambr b/posthog/hogql/database/test/__snapshots__/test_database.ambr index 3cd02926282cf6..1a0efafd1a4c65 100644 --- a/posthog/hogql/database/test/__snapshots__/test_database.ambr +++ b/posthog/hogql/database/test/__snapshots__/test_database.ambr @@ -30,6 +30,10 @@ "key": "created_at", "type": "datetime" }, + { + "key": "$session_id", + "type": "string" + }, { "key": "pdi", "type": "lazy_table", @@ -812,6 +816,10 @@ "key": "created_at", "type": "datetime" }, + { + "key": "$session_id", + "type": "string" + }, { "key": "pdi", "type": "lazy_table", diff --git a/posthog/hogql/test/test_resolver.py b/posthog/hogql/test/test_resolver.py index 6c281d788624dd..80e6ad644be0c8 100644 --- a/posthog/hogql/test/test_resolver.py +++ b/posthog/hogql/test/test_resolver.py @@ -779,6 +779,7 @@ def test_asterisk_expander_table(self): chain=["elements_chain"], type=ast.FieldType(name="elements_chain", table_type=events_table_type) ), ast.Field(chain=["created_at"], type=ast.FieldType(name="created_at", table_type=events_table_type)), + ast.Field(chain=["$session_id"], type=ast.FieldType(name="$session_id", table_type=events_table_type)), ast.Field(chain=["$group_0"], type=ast.FieldType(name="$group_0", table_type=events_table_type)), ast.Field(chain=["$group_1"], type=ast.FieldType(name="$group_1", table_type=events_table_type)), ast.Field(chain=["$group_2"], type=ast.FieldType(name="$group_2", table_type=events_table_type)), @@ -816,6 +817,9 @@ def test_asterisk_expander_table_alias(self): ast.Field( chain=["created_at"], type=ast.FieldType(name="created_at", table_type=events_table_alias_type) ), + ast.Field( + chain=["$session_id"], type=ast.FieldType(name="$session_id", table_type=events_table_alias_type) + ), ast.Field(chain=["$group_0"], type=ast.FieldType(name="$group_0", table_type=events_table_alias_type)), ast.Field(chain=["$group_1"], type=ast.FieldType(name="$group_1", table_type=events_table_alias_type)), ast.Field(chain=["$group_2"], type=ast.FieldType(name="$group_2", table_type=events_table_alias_type)), @@ -892,6 +896,7 @@ def test_asterisk_expander_from_subquery_table(self): "distinct_id": ast.FieldType(name="distinct_id", table_type=events_table_type), "elements_chain": ast.FieldType(name="elements_chain", table_type=events_table_type), "created_at": ast.FieldType(name="created_at", table_type=events_table_type), + "$session_id": ast.FieldType(name="$session_id", table_type=events_table_type), "$group_0": ast.FieldType(name="$group_0", table_type=events_table_type), "$group_1": ast.FieldType(name="$group_1", table_type=events_table_type), "$group_2": ast.FieldType(name="$group_2", table_type=events_table_type), @@ -913,6 +918,7 @@ def test_asterisk_expander_from_subquery_table(self): type=ast.FieldType(name="elements_chain", table_type=inner_select_type), ), ast.Field(chain=["created_at"], type=ast.FieldType(name="created_at", table_type=inner_select_type)), + ast.Field(chain=["$session_id"], type=ast.FieldType(name="$session_id", table_type=inner_select_type)), ast.Field(chain=["$group_0"], type=ast.FieldType(name="$group_0", table_type=inner_select_type)), ast.Field(chain=["$group_1"], type=ast.FieldType(name="$group_1", table_type=inner_select_type)), ast.Field(chain=["$group_2"], type=ast.FieldType(name="$group_2", table_type=inner_select_type)), @@ -950,6 +956,7 @@ def test_asterisk_expander_select_union(self): "distinct_id": ast.FieldType(name="distinct_id", table_type=events_table_type), "elements_chain": ast.FieldType(name="elements_chain", table_type=events_table_type), "created_at": ast.FieldType(name="created_at", table_type=events_table_type), + "$session_id": ast.FieldType(name="$session_id", table_type=events_table_type), "$group_0": ast.FieldType(name="$group_0", table_type=events_table_type), "$group_1": ast.FieldType(name="$group_1", table_type=events_table_type), "$group_2": ast.FieldType(name="$group_2", table_type=events_table_type), @@ -974,6 +981,7 @@ def test_asterisk_expander_select_union(self): type=ast.FieldType(name="elements_chain", table_type=inner_select_type), ), ast.Field(chain=["created_at"], type=ast.FieldType(name="created_at", table_type=inner_select_type)), + ast.Field(chain=["$session_id"], type=ast.FieldType(name="$session_id", table_type=inner_select_type)), ast.Field(chain=["$group_0"], type=ast.FieldType(name="$group_0", table_type=inner_select_type)), ast.Field(chain=["$group_1"], type=ast.FieldType(name="$group_1", table_type=inner_select_type)), ast.Field(chain=["$group_2"], type=ast.FieldType(name="$group_2", table_type=inner_select_type)), diff --git a/posthog/hogql_queries/insights/trends/breakdown.py b/posthog/hogql_queries/insights/trends/breakdown.py index f58f27ff2e6b6c..403c5be4da5360 100644 --- a/posthog/hogql_queries/insights/trends/breakdown.py +++ b/posthog/hogql_queries/insights/trends/breakdown.py @@ -2,6 +2,7 @@ from posthog.hogql import ast from posthog.hogql.parser import parse_expr from posthog.hogql.timings import HogQLTimings +from posthog.hogql_queries.insights.trends.breakdown_session import BreakdownSession from posthog.hogql_queries.insights.trends.breakdown_values import BreakdownValues from posthog.hogql_queries.insights.trends.utils import get_properties_chain, series_event_name from posthog.hogql_queries.utils.query_date_range import QueryDateRange @@ -35,6 +36,10 @@ def __init__( def enabled(self) -> bool: return self.query.breakdown is not None and self.query.breakdown.breakdown is not None + @cached_property + def is_session_type(self) -> bool: + return self.enabled and self.query.breakdown.breakdown_type == "session" + @cached_property def is_histogram_breakdown(self) -> bool: return self.enabled and self.query.breakdown.breakdown_histogram_bin_count is not None @@ -166,8 +171,15 @@ def _get_breakdown_histogram_multi_if(self) -> ast.Expr: @cached_property def _properties_chain(self): + if self.is_session_type: + return self._breakdown_session.session_duration_property_chain() + return get_properties_chain( breakdown_type=self.query.breakdown.breakdown_type, breakdown_field=self.query.breakdown.breakdown, group_type_index=self.query.breakdown.breakdown_group_type_index, ) + + @cached_property + def _breakdown_session(self): + return BreakdownSession(self.query_date_range) diff --git a/posthog/hogql_queries/insights/trends/breakdown_session.py b/posthog/hogql_queries/insights/trends/breakdown_session.py new file mode 100644 index 00000000000000..1ab518cbec34eb --- /dev/null +++ b/posthog/hogql_queries/insights/trends/breakdown_session.py @@ -0,0 +1,52 @@ +from typing import List +from posthog.hogql import ast +from posthog.hogql.parser import parse_select +from posthog.hogql_queries.utils.query_date_range import QueryDateRange + + +class BreakdownSession: + query_date_range: QueryDateRange + + def __init__(self, query_date_range: QueryDateRange): + self.query_date_range = query_date_range + + def session_inner_join(self) -> ast.JoinExpr: + join = ast.JoinExpr( + table=ast.Field(chain=["events"]), + alias="e", + next_join=ast.JoinExpr( + join_type="INNER JOIN", + alias="sessions", + table=self._session_select_query(), + constraint=ast.JoinConstraint( + expr=ast.CompareOperation( + left=ast.Field(chain=["sessions", "$session_id"]), + op=ast.CompareOperationOp.Eq, + right=ast.Field(chain=["e", "$session_id"]), + ) + ), + ), + ) + + return join + + def session_duration_property_chain(self) -> List[str]: + return ["sessions", "session_duration"] + + def session_duration_field(self) -> ast.Field: + return ast.Field(chain=self.session_duration_property_chain()) + + def _session_select_query(self) -> ast.SelectQuery: + return parse_select( + """ + SELECT + "$session_id", dateDiff('second', min(timestamp), max(timestamp)) as session_duration + FROM events + WHERE + "$session_id" != '' AND + timestamp >= {date_from} - INTERVAL 24 HOUR AND + timestamp <= {date_to} + INTERVAL 24 HOUR + GROUP BY "$session_id" + """, + placeholders=self.query_date_range.to_placeholders(), + ) diff --git a/posthog/hogql_queries/insights/trends/breakdown_values.py b/posthog/hogql_queries/insights/trends/breakdown_values.py index 118bd0f1dc56a4..72ae54d0286be7 100644 --- a/posthog/hogql_queries/insights/trends/breakdown_values.py +++ b/posthog/hogql_queries/insights/trends/breakdown_values.py @@ -2,8 +2,10 @@ from posthog.hogql import ast from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql.query import execute_hogql_query +from posthog.hogql_queries.insights.trends.breakdown_session import BreakdownSession from posthog.hogql_queries.insights.trends.utils import get_properties_chain from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.models.filters.mixins.utils import cached_property from posthog.models.team.team import Team @@ -43,6 +45,8 @@ def get_breakdown_values(self) -> List[str]: alias="value", expr=parse_expr(self.breakdown_field), ) + elif self.breakdown_type == "session": + select_field = ast.Alias(alias="value", expr=self._breakdown_session.session_duration_field()) else: select_field = ast.Alias( alias="value", @@ -55,22 +59,20 @@ def get_breakdown_values(self) -> List[str]: ), ) - query = parse_select( + inner_events_query = parse_select( """ - SELECT groupArray(value) FROM ( - SELECT - {select_field}, - count(*) as count - FROM - events e - WHERE - {events_where} - GROUP BY - value - ORDER BY - count DESC, - value DESC - ) + SELECT + {select_field}, + count(e.uuid) as count + FROM + events e + WHERE + {events_where} + GROUP BY + value + ORDER BY + count DESC, + value DESC """, placeholders={ "events_where": self._where_filter(), @@ -78,9 +80,20 @@ def get_breakdown_values(self) -> List[str]: }, ) + query = parse_select( + """ + SELECT groupArray(value) FROM ({inner_events_query}) + """, + placeholders={ + "inner_events_query": inner_events_query, + }, + ) + if self.histogram_bin_count is not None: - expr = self._to_bucketing_expression() - query.select = [expr] + query.select = [self._to_bucketing_expression()] + + if self.breakdown_type == "session": + inner_events_query.select_from = self._breakdown_session.session_inner_join() response = execute_hogql_query( query_type="TrendsQueryBreakdownValues", @@ -127,3 +140,7 @@ def _to_bucketing_expression(self) -> ast.Expr: qunatile_expression = f"quantiles({','.join([f'{quantile:.2f}' for quantile in quantiles])})(value)" return parse_expr(f"arrayCompact(arrayMap(x -> floor(x, 2), {qunatile_expression}))") + + @cached_property + def _breakdown_session(self): + return BreakdownSession(self.query_date_range) diff --git a/posthog/hogql_queries/insights/trends/query_builder.py b/posthog/hogql_queries/insights/trends/query_builder.py index 99574f97d70376..ac1f4811d1cbf6 100644 --- a/posthog/hogql_queries/insights/trends/query_builder.py +++ b/posthog/hogql_queries/insights/trends/query_builder.py @@ -4,6 +4,7 @@ from posthog.hogql.property import property_to_expr from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.insights.trends.breakdown import Breakdown +from posthog.hogql_queries.insights.trends.breakdown_session import BreakdownSession from posthog.hogql_queries.insights.trends.utils import series_event_name from posthog.hogql_queries.utils.query_date_range import QueryDateRange from posthog.models.filters.mixins.utils import cached_property @@ -128,6 +129,9 @@ def _get_events_subquery(self) -> ast.SelectQuery: query.select.append(self._breakdown.column_expr()) query.group_by.append(ast.Field(chain=["breakdown_value"])) + if self._breakdown.is_session_type: + query.select_from = self._breakdown_session.session_inner_join() + return query def _outer_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: @@ -214,6 +218,14 @@ def _events_filter(self) -> ast.Expr: # Breakdown if self._breakdown.enabled and not self._breakdown.is_histogram_breakdown: filters.append(self._breakdown.events_where_filter()) + if self._breakdown.is_session_type: + filters.append( + ast.CompareOperation( + left=self._breakdown_session.session_duration_field(), + op=ast.CompareOperationOp.NotEq, + right=ast.Constant(value=None), + ) + ) if len(filters) == 0: return ast.Constant(value=True) @@ -226,7 +238,7 @@ def _aggregation_operation(self) -> ast.Expr: if self.series.math == "hogql": return parse_expr(self.series.math_hogql) - return parse_expr("count(*)") + return parse_expr("count(e.uuid)") # Using string interpolation for SAMPLE due to HogQL limitations with `UNION ALL` and `SAMPLE` AST nodes def _sample_value(self) -> str: @@ -244,3 +256,7 @@ def _breakdown(self): query_date_range=self.query_date_range, timings=self.timings, ) + + @cached_property + def _breakdown_session(self): + return BreakdownSession(self.query_date_range) diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 672007501c8b6c..b3f31ad5f6a6fc 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -246,7 +246,11 @@ def apply_formula(self, formula: str, results: List[Dict[str, Any]]) -> List[Dic return [new_result] def _is_breakdown_field_boolean(self): - if self.query.breakdown.breakdown_type == "hogql" or self.query.breakdown.breakdown_type == "cohort": + if ( + self.query.breakdown.breakdown_type == "hogql" + or self.query.breakdown.breakdown_type == "cohort" + or self.query.breakdown.breakdown_type == "session" + ): return False if self.query.breakdown.breakdown_type == "person": @@ -286,10 +290,10 @@ def _query_to_filter(self) -> Dict[str, any]: "interval": self.query.interval, } - if self.query.breakdown is not None: - filter_dict.update(self.query.breakdown.__dict__) - if self.query.trendsFilter is not None: filter_dict.update(self.query.trendsFilter.__dict__) + if self.query.breakdown is not None: + filter_dict.update(**self.query.breakdown.__dict__) + return {k: v for k, v in filter_dict.items() if v is not None}