Skip to content

Commit

Permalink
feat: added session duration as a breakdown option (PostHog#18056)
Browse files Browse the repository at this point in the history
* Added session duration as a breakdown option

* Update query snapshots

* Fixed tests

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
2 people authored and Justicea83 committed Oct 25, 2023
1 parent 3ada6df commit b75810d
Show file tree
Hide file tree
Showing 8 changed files with 140 additions and 22 deletions.
1 change: 1 addition & 0 deletions posthog/hogql/database/schema/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class EventsTable(Table):
"distinct_id": StringDatabaseField(name="distinct_id"),
"elements_chain": StringDatabaseField(name="elements_chain"),
"created_at": DateTimeDatabaseField(name="created_at"),
"$session_id": StringDatabaseField(name="$session_id"),
# Lazy table that adds a join to the persons table
"pdi": LazyJoin(
from_field="distinct_id",
Expand Down
8 changes: 8 additions & 0 deletions posthog/hogql/database/test/__snapshots__/test_database.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
"key": "created_at",
"type": "datetime"
},
{
"key": "$session_id",
"type": "string"
},
{
"key": "pdi",
"type": "lazy_table",
Expand Down Expand Up @@ -812,6 +816,10 @@
"key": "created_at",
"type": "datetime"
},
{
"key": "$session_id",
"type": "string"
},
{
"key": "pdi",
"type": "lazy_table",
Expand Down
8 changes: 8 additions & 0 deletions posthog/hogql/test/test_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,7 @@ def test_asterisk_expander_table(self):
chain=["elements_chain"], type=ast.FieldType(name="elements_chain", table_type=events_table_type)
),
ast.Field(chain=["created_at"], type=ast.FieldType(name="created_at", table_type=events_table_type)),
ast.Field(chain=["$session_id"], type=ast.FieldType(name="$session_id", table_type=events_table_type)),
ast.Field(chain=["$group_0"], type=ast.FieldType(name="$group_0", table_type=events_table_type)),
ast.Field(chain=["$group_1"], type=ast.FieldType(name="$group_1", table_type=events_table_type)),
ast.Field(chain=["$group_2"], type=ast.FieldType(name="$group_2", table_type=events_table_type)),
Expand Down Expand Up @@ -816,6 +817,9 @@ def test_asterisk_expander_table_alias(self):
ast.Field(
chain=["created_at"], type=ast.FieldType(name="created_at", table_type=events_table_alias_type)
),
ast.Field(
chain=["$session_id"], type=ast.FieldType(name="$session_id", table_type=events_table_alias_type)
),
ast.Field(chain=["$group_0"], type=ast.FieldType(name="$group_0", table_type=events_table_alias_type)),
ast.Field(chain=["$group_1"], type=ast.FieldType(name="$group_1", table_type=events_table_alias_type)),
ast.Field(chain=["$group_2"], type=ast.FieldType(name="$group_2", table_type=events_table_alias_type)),
Expand Down Expand Up @@ -892,6 +896,7 @@ def test_asterisk_expander_from_subquery_table(self):
"distinct_id": ast.FieldType(name="distinct_id", table_type=events_table_type),
"elements_chain": ast.FieldType(name="elements_chain", table_type=events_table_type),
"created_at": ast.FieldType(name="created_at", table_type=events_table_type),
"$session_id": ast.FieldType(name="$session_id", table_type=events_table_type),
"$group_0": ast.FieldType(name="$group_0", table_type=events_table_type),
"$group_1": ast.FieldType(name="$group_1", table_type=events_table_type),
"$group_2": ast.FieldType(name="$group_2", table_type=events_table_type),
Expand All @@ -913,6 +918,7 @@ def test_asterisk_expander_from_subquery_table(self):
type=ast.FieldType(name="elements_chain", table_type=inner_select_type),
),
ast.Field(chain=["created_at"], type=ast.FieldType(name="created_at", table_type=inner_select_type)),
ast.Field(chain=["$session_id"], type=ast.FieldType(name="$session_id", table_type=inner_select_type)),
ast.Field(chain=["$group_0"], type=ast.FieldType(name="$group_0", table_type=inner_select_type)),
ast.Field(chain=["$group_1"], type=ast.FieldType(name="$group_1", table_type=inner_select_type)),
ast.Field(chain=["$group_2"], type=ast.FieldType(name="$group_2", table_type=inner_select_type)),
Expand Down Expand Up @@ -950,6 +956,7 @@ def test_asterisk_expander_select_union(self):
"distinct_id": ast.FieldType(name="distinct_id", table_type=events_table_type),
"elements_chain": ast.FieldType(name="elements_chain", table_type=events_table_type),
"created_at": ast.FieldType(name="created_at", table_type=events_table_type),
"$session_id": ast.FieldType(name="$session_id", table_type=events_table_type),
"$group_0": ast.FieldType(name="$group_0", table_type=events_table_type),
"$group_1": ast.FieldType(name="$group_1", table_type=events_table_type),
"$group_2": ast.FieldType(name="$group_2", table_type=events_table_type),
Expand All @@ -974,6 +981,7 @@ def test_asterisk_expander_select_union(self):
type=ast.FieldType(name="elements_chain", table_type=inner_select_type),
),
ast.Field(chain=["created_at"], type=ast.FieldType(name="created_at", table_type=inner_select_type)),
ast.Field(chain=["$session_id"], type=ast.FieldType(name="$session_id", table_type=inner_select_type)),
ast.Field(chain=["$group_0"], type=ast.FieldType(name="$group_0", table_type=inner_select_type)),
ast.Field(chain=["$group_1"], type=ast.FieldType(name="$group_1", table_type=inner_select_type)),
ast.Field(chain=["$group_2"], type=ast.FieldType(name="$group_2", table_type=inner_select_type)),
Expand Down
12 changes: 12 additions & 0 deletions posthog/hogql_queries/insights/trends/breakdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from posthog.hogql import ast
from posthog.hogql.parser import parse_expr
from posthog.hogql.timings import HogQLTimings
from posthog.hogql_queries.insights.trends.breakdown_session import BreakdownSession
from posthog.hogql_queries.insights.trends.breakdown_values import BreakdownValues
from posthog.hogql_queries.insights.trends.utils import get_properties_chain, series_event_name
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
Expand Down Expand Up @@ -35,6 +36,10 @@ def __init__(
def enabled(self) -> bool:
return self.query.breakdown is not None and self.query.breakdown.breakdown is not None

@cached_property
def is_session_type(self) -> bool:
return self.enabled and self.query.breakdown.breakdown_type == "session"

@cached_property
def is_histogram_breakdown(self) -> bool:
return self.enabled and self.query.breakdown.breakdown_histogram_bin_count is not None
Expand Down Expand Up @@ -166,8 +171,15 @@ def _get_breakdown_histogram_multi_if(self) -> ast.Expr:

@cached_property
def _properties_chain(self):
if self.is_session_type:
return self._breakdown_session.session_duration_property_chain()

return get_properties_chain(
breakdown_type=self.query.breakdown.breakdown_type,
breakdown_field=self.query.breakdown.breakdown,
group_type_index=self.query.breakdown.breakdown_group_type_index,
)

@cached_property
def _breakdown_session(self):
return BreakdownSession(self.query_date_range)
52 changes: 52 additions & 0 deletions posthog/hogql_queries/insights/trends/breakdown_session.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from typing import List
from posthog.hogql import ast
from posthog.hogql.parser import parse_select
from posthog.hogql_queries.utils.query_date_range import QueryDateRange


class BreakdownSession:
query_date_range: QueryDateRange

def __init__(self, query_date_range: QueryDateRange):
self.query_date_range = query_date_range

def session_inner_join(self) -> ast.JoinExpr:
join = ast.JoinExpr(
table=ast.Field(chain=["events"]),
alias="e",
next_join=ast.JoinExpr(
join_type="INNER JOIN",
alias="sessions",
table=self._session_select_query(),
constraint=ast.JoinConstraint(
expr=ast.CompareOperation(
left=ast.Field(chain=["sessions", "$session_id"]),
op=ast.CompareOperationOp.Eq,
right=ast.Field(chain=["e", "$session_id"]),
)
),
),
)

return join

def session_duration_property_chain(self) -> List[str]:
return ["sessions", "session_duration"]

def session_duration_field(self) -> ast.Field:
return ast.Field(chain=self.session_duration_property_chain())

def _session_select_query(self) -> ast.SelectQuery:
return parse_select(
"""
SELECT
"$session_id", dateDiff('second', min(timestamp), max(timestamp)) as session_duration
FROM events
WHERE
"$session_id" != '' AND
timestamp >= {date_from} - INTERVAL 24 HOUR AND
timestamp <= {date_to} + INTERVAL 24 HOUR
GROUP BY "$session_id"
""",
placeholders=self.query_date_range.to_placeholders(),
)
51 changes: 34 additions & 17 deletions posthog/hogql_queries/insights/trends/breakdown_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
from posthog.hogql import ast
from posthog.hogql.parser import parse_expr, parse_select
from posthog.hogql.query import execute_hogql_query
from posthog.hogql_queries.insights.trends.breakdown_session import BreakdownSession
from posthog.hogql_queries.insights.trends.utils import get_properties_chain
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.models.filters.mixins.utils import cached_property
from posthog.models.team.team import Team


Expand Down Expand Up @@ -43,6 +45,8 @@ def get_breakdown_values(self) -> List[str]:
alias="value",
expr=parse_expr(self.breakdown_field),
)
elif self.breakdown_type == "session":
select_field = ast.Alias(alias="value", expr=self._breakdown_session.session_duration_field())
else:
select_field = ast.Alias(
alias="value",
Expand All @@ -55,32 +59,41 @@ def get_breakdown_values(self) -> List[str]:
),
)

query = parse_select(
inner_events_query = parse_select(
"""
SELECT groupArray(value) FROM (
SELECT
{select_field},
count(*) as count
FROM
events e
WHERE
{events_where}
GROUP BY
value
ORDER BY
count DESC,
value DESC
)
SELECT
{select_field},
count(e.uuid) as count
FROM
events e
WHERE
{events_where}
GROUP BY
value
ORDER BY
count DESC,
value DESC
""",
placeholders={
"events_where": self._where_filter(),
"select_field": select_field,
},
)

query = parse_select(
"""
SELECT groupArray(value) FROM ({inner_events_query})
""",
placeholders={
"inner_events_query": inner_events_query,
},
)

if self.histogram_bin_count is not None:
expr = self._to_bucketing_expression()
query.select = [expr]
query.select = [self._to_bucketing_expression()]

if self.breakdown_type == "session":
inner_events_query.select_from = self._breakdown_session.session_inner_join()

response = execute_hogql_query(
query_type="TrendsQueryBreakdownValues",
Expand Down Expand Up @@ -127,3 +140,7 @@ def _to_bucketing_expression(self) -> ast.Expr:
qunatile_expression = f"quantiles({','.join([f'{quantile:.2f}' for quantile in quantiles])})(value)"

return parse_expr(f"arrayCompact(arrayMap(x -> floor(x, 2), {qunatile_expression}))")

@cached_property
def _breakdown_session(self):
return BreakdownSession(self.query_date_range)
18 changes: 17 additions & 1 deletion posthog/hogql_queries/insights/trends/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from posthog.hogql.property import property_to_expr
from posthog.hogql.timings import HogQLTimings
from posthog.hogql_queries.insights.trends.breakdown import Breakdown
from posthog.hogql_queries.insights.trends.breakdown_session import BreakdownSession
from posthog.hogql_queries.insights.trends.utils import series_event_name
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.models.filters.mixins.utils import cached_property
Expand Down Expand Up @@ -128,6 +129,9 @@ def _get_events_subquery(self) -> ast.SelectQuery:
query.select.append(self._breakdown.column_expr())
query.group_by.append(ast.Field(chain=["breakdown_value"]))

if self._breakdown.is_session_type:
query.select_from = self._breakdown_session.session_inner_join()

return query

def _outer_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery:
Expand Down Expand Up @@ -214,6 +218,14 @@ def _events_filter(self) -> ast.Expr:
# Breakdown
if self._breakdown.enabled and not self._breakdown.is_histogram_breakdown:
filters.append(self._breakdown.events_where_filter())
if self._breakdown.is_session_type:
filters.append(
ast.CompareOperation(
left=self._breakdown_session.session_duration_field(),
op=ast.CompareOperationOp.NotEq,
right=ast.Constant(value=None),
)
)

if len(filters) == 0:
return ast.Constant(value=True)
Expand All @@ -226,7 +238,7 @@ def _aggregation_operation(self) -> ast.Expr:
if self.series.math == "hogql":
return parse_expr(self.series.math_hogql)

return parse_expr("count(*)")
return parse_expr("count(e.uuid)")

# Using string interpolation for SAMPLE due to HogQL limitations with `UNION ALL` and `SAMPLE` AST nodes
def _sample_value(self) -> str:
Expand All @@ -244,3 +256,7 @@ def _breakdown(self):
query_date_range=self.query_date_range,
timings=self.timings,
)

@cached_property
def _breakdown_session(self):
return BreakdownSession(self.query_date_range)
12 changes: 8 additions & 4 deletions posthog/hogql_queries/insights/trends/trends_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,11 @@ def apply_formula(self, formula: str, results: List[Dict[str, Any]]) -> List[Dic
return [new_result]

def _is_breakdown_field_boolean(self):
if self.query.breakdown.breakdown_type == "hogql" or self.query.breakdown.breakdown_type == "cohort":
if (
self.query.breakdown.breakdown_type == "hogql"
or self.query.breakdown.breakdown_type == "cohort"
or self.query.breakdown.breakdown_type == "session"
):
return False

if self.query.breakdown.breakdown_type == "person":
Expand Down Expand Up @@ -286,10 +290,10 @@ def _query_to_filter(self) -> Dict[str, any]:
"interval": self.query.interval,
}

if self.query.breakdown is not None:
filter_dict.update(self.query.breakdown.__dict__)

if self.query.trendsFilter is not None:
filter_dict.update(self.query.trendsFilter.__dict__)

if self.query.breakdown is not None:
filter_dict.update(**self.query.breakdown.__dict__)

return {k: v for k, v in filter_dict.items() if v is not None}

0 comments on commit b75810d

Please sign in to comment.