From 6948ee9ada06b0b5fbfe0def1691762582ce0e2b Mon Sep 17 00:00:00 2001 From: Rafa Audibert Date: Mon, 9 Dec 2024 17:20:52 -0300 Subject: [PATCH] perf: Remove session optimization See https://github.com/PostHog/posthog/pull/26586#discussion_r1875855669 --- .../web_analytics/stats_table.py | 44 +++---------------- 1 file changed, 5 insertions(+), 39 deletions(-) diff --git a/posthog/hogql_queries/web_analytics/stats_table.py b/posthog/hogql_queries/web_analytics/stats_table.py index 12bf790f7829e..0a0612b572aa7 100644 --- a/posthog/hogql_queries/web_analytics/stats_table.py +++ b/posthog/hogql_queries/web_analytics/stats_table.py @@ -57,11 +57,9 @@ def to_query(self) -> ast.SelectQuery: if self.query.includeBounceRate: return self.to_entry_bounce_query() - return self.to_main_query( - self._counts_breakdown_value(), include_session_properties=self._has_session_properties() - ) + return self.to_main_query(self._counts_breakdown_value()) - def to_main_query(self, breakdown, *, include_session_properties=False) -> ast.SelectQuery: + def to_main_query(self, breakdown) -> ast.SelectQuery: with self.timings.measure("stats_table_query"): # Base selects, always returns the breakdown value, and the total number of visitors selects = [ @@ -101,12 +99,7 @@ def to_main_query(self, breakdown, *, include_session_properties=False) -> ast.S query = ast.SelectQuery( select=selects, - select_from=ast.JoinExpr( - table=self._main_inner_query( - breakdown, - include_session_properties=include_session_properties, - ) - ), + select_from=ast.JoinExpr(table=self._main_inner_query(breakdown)), group_by=[ast.Field(chain=["context.columns.breakdown_value"])], order_by=[ ast.OrderExpr(expr=ast.Field(chain=["context.columns.visitors"]), order="DESC"), @@ -127,14 +120,13 @@ def to_main_query(self, breakdown, *, include_session_properties=False) -> ast.S return query def to_entry_bounce_query(self) -> ast.SelectQuery: - query = self.to_main_query(self._bounce_entry_pathname_breakdown(), include_session_properties=True) + query = self.to_main_query(self._bounce_entry_pathname_breakdown()) if self.query.conversionGoal is None: query.select.append(self._period_comparison_tuple("is_bounce", "context.columns.bounce_rate", "avg")) return query - # TODO: Support conversion goal def to_path_scroll_bounce_query(self) -> ast.SelectQuery: with self.timings.measure("stats_table_bounce_query"): query = parse_select( @@ -254,7 +246,6 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: assert isinstance(query, ast.SelectQuery) return query - # TODO: Support conversion goal def to_path_bounce_query(self) -> ast.SelectQuery: if self.query.breakdownBy not in [WebStatsBreakdown.INITIAL_PAGE, WebStatsBreakdown.PAGE]: raise NotImplementedError("Bounce rate is only supported for page breakdowns") @@ -341,7 +332,7 @@ def to_path_bounce_query(self) -> ast.SelectQuery: assert isinstance(query, ast.SelectQuery) return query - def _main_inner_query(self, breakdown, *, include_session_properties=False): + def _main_inner_query(self, breakdown): query = parse_select( """ SELECT @@ -368,15 +359,6 @@ def _main_inner_query(self, breakdown, *, include_session_properties=False): assert isinstance(query, ast.SelectQuery) - if include_session_properties: - # Append session properties to the where clause if it exists - # - # We know `query.where` is an `ast.Call` because it's a `WHERE` clause - # and `ast.Call` is the only kind of expression that `query.where` can be - # but we need to convince mypy - if query.where is not None and isinstance(query.where, ast.Call): - query.where.args.append(self._session_properties()) - if self.conversion_count_expr and self.conversion_person_id_expr: query.select.append(ast.Alias(alias="conversion_count", expr=self.conversion_count_expr)) query.select.append(ast.Alias(alias="conversion_person_id", expr=self.conversion_person_id_expr)) @@ -482,22 +464,6 @@ def map_scroll_property(property: Union[EventPropertyFilter, PersonPropertyFilte ] return property_to_expr(properties, team=self.team, scope="event") - def _has_session_properties(self) -> bool: - return any( - get_property_type(p) == "session" for p in self.query.properties + self._test_account_filters - ) or self.query.breakdownBy in { - WebStatsBreakdown.INITIAL_CHANNEL_TYPE, - WebStatsBreakdown.INITIAL_REFERRING_DOMAIN, - WebStatsBreakdown.INITIAL_UTM_SOURCE, - WebStatsBreakdown.INITIAL_UTM_CAMPAIGN, - WebStatsBreakdown.INITIAL_UTM_MEDIUM, - WebStatsBreakdown.INITIAL_UTM_TERM, - WebStatsBreakdown.INITIAL_UTM_CONTENT, - WebStatsBreakdown.INITIAL_PAGE, - WebStatsBreakdown.EXIT_PAGE, - WebStatsBreakdown.INITIAL_UTM_SOURCE_MEDIUM_CAMPAIGN, - } - def _session_properties(self) -> ast.Expr: properties = [ p for p in self.query.properties + self._test_account_filters if get_property_type(p) == "session"