diff --git a/posthog/hogql_queries/web_analytics/stats_table.py b/posthog/hogql_queries/web_analytics/stats_table.py index 198eb3c9b40cb..aeef63fe97e82 100644 --- a/posthog/hogql_queries/web_analytics/stats_table.py +++ b/posthog/hogql_queries/web_analytics/stats_table.py @@ -57,12 +57,11 @@ def to_query(self) -> ast.SelectQuery: if self.query.includeBounceRate: return self.to_entry_bounce_query() - if self._has_session_properties(): - return self._to_main_query_with_session_properties() - - return self.to_main_query() + return self.to_main_query( + self._counts_breakdown_value(), include_session_properties=self._has_session_properties() + ) - def to_main_query(self) -> ast.SelectQuery: + def to_main_query(self, breakdown, *, include_session_properties=False) -> ast.SelectQuery: with self.timings.measure("stats_table_query"): # Base selects, always returns the breakdown value, and the total number of visitors selects = [ @@ -87,150 +86,43 @@ def to_main_query(self) -> ast.SelectQuery: if self._include_extra_aggregation_value(): selects.append(self._extra_aggregation_value()) - query = ast.SelectQuery( - select=selects, - select_from=ast.JoinExpr(table=self._main_inner_query()), - group_by=[ast.Field(chain=["context.columns.breakdown_value"])], - order_by=[ - ast.OrderExpr(expr=ast.Field(chain=["context.columns.visitors"]), order="DESC"), - ast.OrderExpr( - expr=ast.Field( - chain=[ - "context.columns.views" - if self.query.conversionGoal is None - else "context.columns.total_conversions" - ] - ), - order="DESC", + query = ast.SelectQuery( + select=selects, + select_from=ast.JoinExpr( + table=self._main_inner_query( + breakdown, + include_session_properties=include_session_properties, + ) ), - ast.OrderExpr(expr=ast.Field(chain=["context.columns.breakdown_value"]), order="ASC"), - ], - ) - - return query - - def _to_main_query_with_session_properties(self) -> ast.SelectQuery: - with self.timings.measure("stats_table_query"): - query = parse_select( - """ -WITH - start_timestamp >= {date_from} AND start_timestamp < {date_to} AS current_period_segment, - start_timestamp >= {date_from_previous_period} AND start_timestamp < {date_from} AS previous_period_segment -SELECT - {processed_breakdown_value} AS "context.columns.breakdown_value", - tuple( - uniqIf(filtered_person_id, current_period_segment), - uniqIf(filtered_person_id, previous_period_segment) - ) AS "context.columns.visitors", - tuple( - sumIf(filtered_pageview_count, current_period_segment), - sumIf(filtered_pageview_count, previous_period_segment) - ) AS "context.columns.views" -FROM ( - SELECT - any(person_id) AS filtered_person_id, - count() AS filtered_pageview_count, - {breakdown_value} AS breakdown_value, - session.session_id AS session_id, - min(session.$start_timestamp) as start_timestamp - FROM events - WHERE and( - timestamp >= {date_from_previous_period}, - timestamp < {date_to}, - events.event == '$pageview', - {event_properties}, - {session_properties}, - {where_breakdown} - ) - GROUP BY session_id, breakdown_value -) -GROUP BY "context.columns.breakdown_value" -ORDER BY "context.columns.visitors" DESC, -"context.columns.views" DESC, -"context.columns.breakdown_value" ASC -""", - timings=self.timings, - placeholders={ - "breakdown_value": self._counts_breakdown_value(), - "processed_breakdown_value": self._processed_breakdown_value(), - "where_breakdown": self.where_breakdown(), - "event_properties": self._event_properties(), - "session_properties": self._session_properties(), - "date_from_previous_period": self._date_from_previous_period(), - "date_from": self._date_from(), - "date_to": self._date_to(), - }, + group_by=[ast.Field(chain=["context.columns.breakdown_value"])], + order_by=[ + ast.OrderExpr(expr=ast.Field(chain=["context.columns.visitors"]), order="DESC"), + ast.OrderExpr( + expr=ast.Field( + chain=[ + "context.columns.views" + if self.query.conversionGoal is None + else "context.columns.total_conversions" + ] + ), + order="DESC", + ), + ast.OrderExpr(expr=ast.Field(chain=["context.columns.breakdown_value"]), order="ASC"), + ], ) - assert isinstance(query, ast.SelectQuery) - - if self.query.breakdownBy == WebStatsBreakdown.LANGUAGE: - query.select.append(self._extra_aggregation_value()) return query def to_entry_bounce_query(self) -> ast.SelectQuery: - with self.timings.measure("stats_table_query"): - query = parse_select( - """ -WITH - start_timestamp >= {date_from} AND start_timestamp < {date_to} AS current_period_segment, - start_timestamp >= {date_from_previous_period} AND start_timestamp < {date_from} AS previous_period_segment -SELECT - breakdown_value AS "context.columns.breakdown_value", - tuple( - uniqIf(filtered_person_id, current_period_segment), - uniqIf(filtered_person_id, previous_period_segment) - ) AS "context.columns.visitors", - tuple( - sumIf(filtered_pageview_count, current_period_segment), - sumIf(filtered_pageview_count, previous_period_segment) - ) AS "context.columns.views", - tuple( - avgIf(is_bounce, current_period_segment), - avgIf(is_bounce, previous_period_segment) - ) AS "context.columns.bounce_rate", -FROM ( - SELECT - {bounce_breakdown} AS breakdown_value, - any(person_id) AS filtered_person_id, - count() AS filtered_pageview_count, - any(session.$is_bounce) AS is_bounce, - session.session_id AS session_id, - min(session.$start_timestamp) as start_timestamp - FROM events - WHERE and( - timestamp >= {date_from_previous_period}, - timestamp < {date_to}, - events.event == '$pageview', - {event_properties}, - {session_properties}, - {where_breakdown} - ) - GROUP BY session_id, breakdown_value -) -GROUP BY "context.columns.breakdown_value" -ORDER BY "context.columns.visitors" DESC, -"context.columns.views" DESC, -"context.columns.breakdown_value" ASC -""", - timings=self.timings, - placeholders={ - "bounce_breakdown": self._bounce_entry_pathname_breakdown(), - "where_breakdown": self.where_breakdown(), - "session_properties": self._session_properties(), - "event_properties": self._event_properties(), - "date_from_previous_period": self._date_from_previous_period(), - "date_from": self._date_from(), - "date_to": self._date_to(), - }, - ) - assert isinstance(query, ast.SelectQuery) + query = self.to_main_query(self._bounce_entry_pathname_breakdown(), include_session_properties=True) + + if self.query.conversionGoal is None: + query.select.append(self._period_comparison_tuple("is_bounce", "context.columns.bounce_rate", "avg")) + return query + # TODO: Support conversion goal def to_path_scroll_bounce_query(self) -> ast.SelectQuery: - if self.query.breakdownBy != WebStatsBreakdown.PAGE: - raise NotImplementedError("Scroll depth is only supported for page breakdowns") - with self.timings.measure("stats_table_bounce_query"): query = parse_select( """ @@ -349,6 +241,7 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: assert isinstance(query, ast.SelectQuery) return query + # TODO: Support conversion goal def to_path_bounce_query(self) -> ast.SelectQuery: if self.query.breakdownBy not in [WebStatsBreakdown.INITIAL_PAGE, WebStatsBreakdown.PAGE]: raise NotImplementedError("Bounce rate is only supported for page breakdowns") @@ -435,27 +328,23 @@ def to_path_bounce_query(self) -> ast.SelectQuery: assert isinstance(query, ast.SelectQuery) return query - def _main_inner_query(self): + def _main_inner_query(self, breakdown, *, include_session_properties=False): query = parse_select( """ SELECT any(person_id) AS filtered_person_id, count() AS filtered_pageview_count, {breakdown_value} AS breakdown_value, + session.session_id AS session_id, + any(session.$is_bounce) AS is_bounce, min(session.$start_timestamp) as start_timestamp FROM events -WHERE and( - timestamp >= {date_from}, - timestamp < {date_to}, - events.event == '$pageview', - {all_properties}, - {where_breakdown} -) -GROUP BY events.`$session_id`, breakdown_value +WHERE and(timestamp >= {date_from}, timestamp < {date_to}, events.event == '$pageview', {all_properties}, {where_breakdown}) +GROUP BY session_id, breakdown_value """, timings=self.timings, placeholders={ - "breakdown_value": self._counts_breakdown_value(), + "breakdown_value": breakdown, "date_from": self._date_from_previous_period(), "date_to": self._date_to(), "all_properties": self._all_properties(), @@ -465,6 +354,9 @@ def _main_inner_query(self): assert isinstance(query, ast.SelectQuery) + if include_session_properties: + query.where.args.append(self._session_properties()) # query.where is an `ast.Call` + if self.conversion_count_expr and self.conversion_person_id_expr: query.select.append(ast.Alias(alias="conversion_count", expr=self.conversion_count_expr)) query.select.append(ast.Alias(alias="conversion_person_id", expr=self.conversion_person_id_expr))