Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added session duration as a breakdown option #18056

Merged
merged 3 commits into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions posthog/hogql/database/schema/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class EventsTable(Table):
"distinct_id": StringDatabaseField(name="distinct_id"),
"elements_chain": StringDatabaseField(name="elements_chain"),
"created_at": DateTimeDatabaseField(name="created_at"),
"$session_id": StringDatabaseField(name="$session_id"),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is actually a materialized field. I think accessing properties.$session_id will direct it to access the field with this name. However fine to add it explicitly too.

# Lazy table that adds a join to the persons table
"pdi": LazyJoin(
from_field="distinct_id",
Expand Down
12 changes: 12 additions & 0 deletions posthog/hogql_queries/insights/trends/breakdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from posthog.hogql import ast
from posthog.hogql.parser import parse_expr
from posthog.hogql.timings import HogQLTimings
from posthog.hogql_queries.insights.trends.breakdown_session import BreakdownSession
from posthog.hogql_queries.insights.trends.breakdown_values import BreakdownValues
from posthog.hogql_queries.insights.trends.utils import get_properties_chain, series_event_name
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
Expand Down Expand Up @@ -35,6 +36,10 @@ def __init__(
def enabled(self) -> bool:
return self.query.breakdown is not None and self.query.breakdown.breakdown is not None

@cached_property
def is_session_type(self) -> bool:
return self.enabled and self.query.breakdown.breakdown_type == "session"

@cached_property
def is_histogram_breakdown(self) -> bool:
return self.enabled and self.query.breakdown.breakdown_histogram_bin_count is not None
Expand Down Expand Up @@ -166,8 +171,15 @@ def _get_breakdown_histogram_multi_if(self) -> ast.Expr:

@cached_property
def _properties_chain(self):
if self.is_session_type:
return self._breakdown_session.session_duration_property_chain()

return get_properties_chain(
breakdown_type=self.query.breakdown.breakdown_type,
breakdown_field=self.query.breakdown.breakdown,
group_type_index=self.query.breakdown.breakdown_group_type_index,
)

@cached_property
def _breakdown_session(self):
return BreakdownSession(self.query_date_range)
52 changes: 52 additions & 0 deletions posthog/hogql_queries/insights/trends/breakdown_session.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from typing import List
from posthog.hogql import ast
from posthog.hogql.parser import parse_select
from posthog.hogql_queries.utils.query_date_range import QueryDateRange


class BreakdownSession:
query_date_range: QueryDateRange

def __init__(self, query_date_range: QueryDateRange):
self.query_date_range = query_date_range

def session_inner_join(self) -> ast.JoinExpr:
join = ast.JoinExpr(
table=ast.Field(chain=["events"]),
alias="e",
next_join=ast.JoinExpr(
join_type="INNER JOIN",
alias="sessions",
table=self._session_select_query(),
constraint=ast.JoinConstraint(
expr=ast.CompareOperation(
left=ast.Field(chain=["sessions", "$session_id"]),
op=ast.CompareOperationOp.Eq,
right=ast.Field(chain=["e", "$session_id"]),
)
),
),
)

return join

def session_duration_property_chain(self) -> List[str]:
return ["sessions", "session_duration"]

def session_duration_field(self) -> ast.Field:
return ast.Field(chain=self.session_duration_property_chain())

def _session_select_query(self) -> ast.SelectQuery:
return parse_select(
"""
SELECT
"$session_id", dateDiff('second', min(timestamp), max(timestamp)) as session_duration
FROM events
WHERE
"$session_id" != '' AND
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the "materialized columns are bad with nulls" issue. If properties.$session is not set, the materialized column $session_id is ''. If it's set to an empty string, it's also '', but if it's set to null or undefined, it'll come back as null. 🤯

timestamp >= {date_from} - INTERVAL 24 HOUR AND
timestamp <= {date_to} + INTERVAL 24 HOUR
GROUP BY "$session_id"
""",
placeholders=self.query_date_range.to_placeholders(),
)
51 changes: 34 additions & 17 deletions posthog/hogql_queries/insights/trends/breakdown_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
from posthog.hogql import ast
from posthog.hogql.parser import parse_expr, parse_select
from posthog.hogql.query import execute_hogql_query
from posthog.hogql_queries.insights.trends.breakdown_session import BreakdownSession
from posthog.hogql_queries.insights.trends.utils import get_properties_chain
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.models.filters.mixins.utils import cached_property
from posthog.models.team.team import Team


Expand Down Expand Up @@ -43,6 +45,8 @@ def get_breakdown_values(self) -> List[str]:
alias="value",
expr=parse_expr(self.breakdown_field),
)
elif self.breakdown_type == "session":
select_field = ast.Alias(alias="value", expr=self._breakdown_session.session_duration_field())
else:
select_field = ast.Alias(
alias="value",
Expand All @@ -55,32 +59,41 @@ def get_breakdown_values(self) -> List[str]:
),
)

query = parse_select(
inner_events_query = parse_select(
"""
SELECT groupArray(value) FROM (
SELECT
{select_field},
count(*) as count
FROM
events e
WHERE
{events_where}
GROUP BY
value
ORDER BY
count DESC,
value DESC
)
SELECT
{select_field},
count(e.uuid) as count
FROM
events e
WHERE
{events_where}
GROUP BY
value
ORDER BY
count DESC,
value DESC
""",
placeholders={
"events_where": self._where_filter(),
"select_field": select_field,
},
)

query = parse_select(
"""
SELECT groupArray(value) FROM ({inner_events_query})
""",
placeholders={
"inner_events_query": inner_events_query,
},
)

if self.histogram_bin_count is not None:
expr = self._to_bucketing_expression()
query.select = [expr]
query.select = [self._to_bucketing_expression()]

if self.breakdown_type == "session":
inner_events_query.select_from = self._breakdown_session.session_inner_join()

response = execute_hogql_query(
query_type="TrendsQueryBreakdownValues",
Expand Down Expand Up @@ -127,3 +140,7 @@ def _to_bucketing_expression(self) -> ast.Expr:
qunatile_expression = f"quantiles({','.join([f'{quantile:.2f}' for quantile in quantiles])})(value)"

return parse_expr(f"arrayCompact(arrayMap(x -> floor(x, 2), {qunatile_expression}))")

@cached_property
def _breakdown_session(self):
return BreakdownSession(self.query_date_range)
18 changes: 17 additions & 1 deletion posthog/hogql_queries/insights/trends/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from posthog.hogql.property import property_to_expr
from posthog.hogql.timings import HogQLTimings
from posthog.hogql_queries.insights.trends.breakdown import Breakdown
from posthog.hogql_queries.insights.trends.breakdown_session import BreakdownSession
from posthog.hogql_queries.insights.trends.utils import series_event_name
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.models.filters.mixins.utils import cached_property
Expand Down Expand Up @@ -128,6 +129,9 @@ def _get_events_subquery(self) -> ast.SelectQuery:
query.select.append(self._breakdown.column_expr())
query.group_by.append(ast.Field(chain=["breakdown_value"]))

if self._breakdown.is_session_type:
query.select_from = self._breakdown_session.session_inner_join()

return query

def _outer_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery:
Expand Down Expand Up @@ -214,6 +218,14 @@ def _events_filter(self) -> ast.Expr:
# Breakdown
if self._breakdown.enabled and not self._breakdown.is_histogram_breakdown:
filters.append(self._breakdown.events_where_filter())
if self._breakdown.is_session_type:
filters.append(
ast.CompareOperation(
left=self._breakdown_session.session_duration_field(),
op=ast.CompareOperationOp.NotEq,
right=ast.Constant(value=None),
)
)

if len(filters) == 0:
return ast.Constant(value=True)
Expand All @@ -226,7 +238,7 @@ def _aggregation_operation(self) -> ast.Expr:
if self.series.math == "hogql":
return parse_expr(self.series.math_hogql)

return parse_expr("count(*)")
return parse_expr("count(e.uuid)")

# Using string interpolation for SAMPLE due to HogQL limitations with `UNION ALL` and `SAMPLE` AST nodes
def _sample_value(self) -> str:
Expand All @@ -244,3 +256,7 @@ def _breakdown(self):
query_date_range=self.query_date_range,
timings=self.timings,
)

@cached_property
def _breakdown_session(self):
return BreakdownSession(self.query_date_range)
12 changes: 8 additions & 4 deletions posthog/hogql_queries/insights/trends/trends_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,11 @@ def apply_formula(self, formula: str, results: List[Dict[str, Any]]) -> List[Dic
return [new_result]

def _is_breakdown_field_boolean(self):
if self.query.breakdown.breakdown_type == "hogql" or self.query.breakdown.breakdown_type == "cohort":
if (
self.query.breakdown.breakdown_type == "hogql"
or self.query.breakdown.breakdown_type == "cohort"
or self.query.breakdown.breakdown_type == "session"
):
return False

if self.query.breakdown.breakdown_type == "person":
Expand Down Expand Up @@ -286,10 +290,10 @@ def _query_to_filter(self) -> Dict[str, any]:
"interval": self.query.interval,
}

if self.query.breakdown is not None:
filter_dict.update(self.query.breakdown.__dict__)

if self.query.trendsFilter is not None:
filter_dict.update(self.query.trendsFilter.__dict__)

if self.query.breakdown is not None:
filter_dict.update(**self.query.breakdown.__dict__)

return {k: v for k, v in filter_dict.items() if v is not None}
Loading