Skip to content

Commit

Permalink
Add conversion support to session and entry bounce queries
Browse files Browse the repository at this point in the history
  • Loading branch information
rafaeelaudibert committed Dec 3, 2024
1 parent dfd15b5 commit 1e8365e
Showing 1 changed file with 42 additions and 150 deletions.
192 changes: 42 additions & 150 deletions posthog/hogql_queries/web_analytics/stats_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,11 @@ def to_query(self) -> ast.SelectQuery:
if self.query.includeBounceRate:
return self.to_entry_bounce_query()

if self._has_session_properties():
return self._to_main_query_with_session_properties()

return self.to_main_query()
return self.to_main_query(
self._counts_breakdown_value(), include_session_properties=self._has_session_properties()
)

def to_main_query(self) -> ast.SelectQuery:
def to_main_query(self, breakdown, *, include_session_properties=False) -> ast.SelectQuery:
with self.timings.measure("stats_table_query"):
# Base selects, always returns the breakdown value, and the total number of visitors
selects = [
Expand All @@ -87,150 +86,43 @@ def to_main_query(self) -> ast.SelectQuery:
if self._include_extra_aggregation_value():
selects.append(self._extra_aggregation_value())

query = ast.SelectQuery(
select=selects,
select_from=ast.JoinExpr(table=self._main_inner_query()),
group_by=[ast.Field(chain=["context.columns.breakdown_value"])],
order_by=[
ast.OrderExpr(expr=ast.Field(chain=["context.columns.visitors"]), order="DESC"),
ast.OrderExpr(
expr=ast.Field(
chain=[
"context.columns.views"
if self.query.conversionGoal is None
else "context.columns.total_conversions"
]
),
order="DESC",
query = ast.SelectQuery(
select=selects,
select_from=ast.JoinExpr(
table=self._main_inner_query(
breakdown,
include_session_properties=include_session_properties,
)
),
ast.OrderExpr(expr=ast.Field(chain=["context.columns.breakdown_value"]), order="ASC"),
],
)

return query

def _to_main_query_with_session_properties(self) -> ast.SelectQuery:
with self.timings.measure("stats_table_query"):
query = parse_select(
"""
WITH
start_timestamp >= {date_from} AND start_timestamp < {date_to} AS current_period_segment,
start_timestamp >= {date_from_previous_period} AND start_timestamp < {date_from} AS previous_period_segment
SELECT
{processed_breakdown_value} AS "context.columns.breakdown_value",
tuple(
uniqIf(filtered_person_id, current_period_segment),
uniqIf(filtered_person_id, previous_period_segment)
) AS "context.columns.visitors",
tuple(
sumIf(filtered_pageview_count, current_period_segment),
sumIf(filtered_pageview_count, previous_period_segment)
) AS "context.columns.views"
FROM (
SELECT
any(person_id) AS filtered_person_id,
count() AS filtered_pageview_count,
{breakdown_value} AS breakdown_value,
session.session_id AS session_id,
min(session.$start_timestamp) as start_timestamp
FROM events
WHERE and(
timestamp >= {date_from_previous_period},
timestamp < {date_to},
events.event == '$pageview',
{event_properties},
{session_properties},
{where_breakdown}
)
GROUP BY session_id, breakdown_value
)
GROUP BY "context.columns.breakdown_value"
ORDER BY "context.columns.visitors" DESC,
"context.columns.views" DESC,
"context.columns.breakdown_value" ASC
""",
timings=self.timings,
placeholders={
"breakdown_value": self._counts_breakdown_value(),
"processed_breakdown_value": self._processed_breakdown_value(),
"where_breakdown": self.where_breakdown(),
"event_properties": self._event_properties(),
"session_properties": self._session_properties(),
"date_from_previous_period": self._date_from_previous_period(),
"date_from": self._date_from(),
"date_to": self._date_to(),
},
group_by=[ast.Field(chain=["context.columns.breakdown_value"])],
order_by=[
ast.OrderExpr(expr=ast.Field(chain=["context.columns.visitors"]), order="DESC"),
ast.OrderExpr(
expr=ast.Field(
chain=[
"context.columns.views"
if self.query.conversionGoal is None
else "context.columns.total_conversions"
]
),
order="DESC",
),
ast.OrderExpr(expr=ast.Field(chain=["context.columns.breakdown_value"]), order="ASC"),
],
)
assert isinstance(query, ast.SelectQuery)

if self.query.breakdownBy == WebStatsBreakdown.LANGUAGE:
query.select.append(self._extra_aggregation_value())

return query

def to_entry_bounce_query(self) -> ast.SelectQuery:
with self.timings.measure("stats_table_query"):
query = parse_select(
"""
WITH
start_timestamp >= {date_from} AND start_timestamp < {date_to} AS current_period_segment,
start_timestamp >= {date_from_previous_period} AND start_timestamp < {date_from} AS previous_period_segment
SELECT
breakdown_value AS "context.columns.breakdown_value",
tuple(
uniqIf(filtered_person_id, current_period_segment),
uniqIf(filtered_person_id, previous_period_segment)
) AS "context.columns.visitors",
tuple(
sumIf(filtered_pageview_count, current_period_segment),
sumIf(filtered_pageview_count, previous_period_segment)
) AS "context.columns.views",
tuple(
avgIf(is_bounce, current_period_segment),
avgIf(is_bounce, previous_period_segment)
) AS "context.columns.bounce_rate",
FROM (
SELECT
{bounce_breakdown} AS breakdown_value,
any(person_id) AS filtered_person_id,
count() AS filtered_pageview_count,
any(session.$is_bounce) AS is_bounce,
session.session_id AS session_id,
min(session.$start_timestamp) as start_timestamp
FROM events
WHERE and(
timestamp >= {date_from_previous_period},
timestamp < {date_to},
events.event == '$pageview',
{event_properties},
{session_properties},
{where_breakdown}
)
GROUP BY session_id, breakdown_value
)
GROUP BY "context.columns.breakdown_value"
ORDER BY "context.columns.visitors" DESC,
"context.columns.views" DESC,
"context.columns.breakdown_value" ASC
""",
timings=self.timings,
placeholders={
"bounce_breakdown": self._bounce_entry_pathname_breakdown(),
"where_breakdown": self.where_breakdown(),
"session_properties": self._session_properties(),
"event_properties": self._event_properties(),
"date_from_previous_period": self._date_from_previous_period(),
"date_from": self._date_from(),
"date_to": self._date_to(),
},
)
assert isinstance(query, ast.SelectQuery)
query = self.to_main_query(self._bounce_entry_pathname_breakdown(), include_session_properties=True)

if self.query.conversionGoal is None:
query.select.append(self._period_comparison_tuple("is_bounce", "context.columns.bounce_rate", "avg"))

return query

# TODO: Support conversion goal
def to_path_scroll_bounce_query(self) -> ast.SelectQuery:
if self.query.breakdownBy != WebStatsBreakdown.PAGE:
raise NotImplementedError("Scroll depth is only supported for page breakdowns")

with self.timings.measure("stats_table_bounce_query"):
query = parse_select(
"""
Expand Down Expand Up @@ -349,6 +241,7 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery:
assert isinstance(query, ast.SelectQuery)
return query

# TODO: Support conversion goal
def to_path_bounce_query(self) -> ast.SelectQuery:
if self.query.breakdownBy not in [WebStatsBreakdown.INITIAL_PAGE, WebStatsBreakdown.PAGE]:
raise NotImplementedError("Bounce rate is only supported for page breakdowns")
Expand Down Expand Up @@ -435,27 +328,23 @@ def to_path_bounce_query(self) -> ast.SelectQuery:
assert isinstance(query, ast.SelectQuery)
return query

def _main_inner_query(self):
def _main_inner_query(self, breakdown, *, include_session_properties=False):
query = parse_select(
"""
SELECT
any(person_id) AS filtered_person_id,
count() AS filtered_pageview_count,
{breakdown_value} AS breakdown_value,
session.session_id AS session_id,
any(session.$is_bounce) AS is_bounce,
min(session.$start_timestamp) as start_timestamp
FROM events
WHERE and(
timestamp >= {date_from},
timestamp < {date_to},
events.event == '$pageview',
{all_properties},
{where_breakdown}
)
GROUP BY events.`$session_id`, breakdown_value
WHERE and(timestamp >= {date_from}, timestamp < {date_to}, events.event == '$pageview', {all_properties}, {where_breakdown})
GROUP BY session_id, breakdown_value
""",
timings=self.timings,
placeholders={
"breakdown_value": self._counts_breakdown_value(),
"breakdown_value": breakdown,
"date_from": self._date_from_previous_period(),
"date_to": self._date_to(),
"all_properties": self._all_properties(),
Expand All @@ -465,6 +354,9 @@ def _main_inner_query(self):

assert isinstance(query, ast.SelectQuery)

if include_session_properties:
query.where.args.append(self._session_properties()) # query.where is an `ast.Call`

if self.conversion_count_expr and self.conversion_person_id_expr:
query.select.append(ast.Alias(alias="conversion_count", expr=self.conversion_count_expr))
query.select.append(ast.Alias(alias="conversion_person_id", expr=self.conversion_person_id_expr))
Expand Down

0 comments on commit 1e8365e

Please sign in to comment.