From 5397435b4ea46fcbb9bda497f27817fce54660dc Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Thu, 20 Jun 2024 11:32:48 +0100 Subject: [PATCH] Web analytics queries and their tests working --- .../test_session_where_clause_extractor.py | 22 +-- .../web_analytics/stats_table.py | 71 ++++---- .../test/test_web_stats_table.py | 155 ++++++++++++------ .../web_analytics/web_overview.py | 35 ++-- 4 files changed, 166 insertions(+), 117 deletions(-) diff --git a/posthog/hogql/database/schema/util/test/test_session_where_clause_extractor.py b/posthog/hogql/database/schema/util/test/test_session_where_clause_extractor.py index ef20964e23a67f..64b6533989addd 100644 --- a/posthog/hogql/database/schema/util/test/test_session_where_clause_extractor.py +++ b/posthog/hogql/database/schema/util/test/test_session_where_clause_extractor.py @@ -329,14 +329,14 @@ def test_join_with_events(self): FROM events JOIN (SELECT - sessions.session_id AS session_id + raw_sessions.session_id AS session_id FROM - sessions + raw_sessions WHERE and(equals(sessions.team_id, {self.team.id}), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, %(hogql_val_0)s), toIntervalDay(3)), %(hogql_val_1)s), 0)) GROUP BY - sessions.session_id, - sessions.session_id) AS sessions ON equals(events.`$session_id`, sessions.session_id) + raw_sessions.session_id, + raw_sessions.session_id) AS sessions ON equals(events.`$session_id`, sessions.session_id) WHERE and(equals(events.team_id, {self.team.id}), greater(toTimeZone(events.timestamp, %(hogql_val_2)s), %(hogql_val_3)s)) GROUP BY @@ -363,17 +363,17 @@ def test_union(self): FROM events LEFT JOIN (SELECT - dateDiff(%(hogql_val_0)s, min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, - sessions.session_id AS session_id + dateDiff(%(hogql_val_0)s, min(raw_sessions.min_timestamp), max(raw_sessions.max_timestamp)) AS `$session_duration`, + raw_sessions.session_id_v7 AS session_id_v7 FROM - sessions + raw_sessions WHERE - and(equals(sessions.team_id, {self.team.id}), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, %(hogql_val_1)s), toIntervalDay(3)), today()), 0)) + and(equals(raw_sessions.team_id, {self.team.id}), ifNull(lessOrEquals(minus(toTimeZone(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), %(hogql_val_1)s), toIntervalDay(3)), today()), 0)) GROUP BY - sessions.session_id, - sessions.session_id) AS events__session ON equals(events.`$session_id`, events__session.session_id) + raw_sessions.session_id_v7, + raw_sessions.session_id_v7) AS events__session ON equals(toUInt128(accurateCastOrNull(events.`$session_id`, %(hogql_val_2)s)), events__session.session_id_v7) WHERE - and(equals(events.team_id, {self.team.id}), less(toTimeZone(events.timestamp, %(hogql_val_2)s), today())) + and(equals(events.team_id, 1785), less(toTimeZone(events.timestamp, %(hogql_val_3)s), today())) LIMIT {MAX_SELECT_RETURNED_ROWS}""" assert expected == actual diff --git a/posthog/hogql_queries/web_analytics/stats_table.py b/posthog/hogql_queries/web_analytics/stats_table.py index 92521454bdb7fd..144c2ef0d63190 100644 --- a/posthog/hogql_queries/web_analytics/stats_table.py +++ b/posthog/hogql_queries/web_analytics/stats_table.py @@ -101,10 +101,9 @@ def _to_main_query_with_session_properties(self) -> ast.SelectQuery: SELECT any(person_id) AS person_id, count() AS filtered_pageview_count, - {breakdown_value} AS breakdown_value + {breakdown_value} AS breakdown_value, + session.session_id AS session_id FROM events - JOIN sessions - ON events.`$session_id` = sessions.session_id WHERE and( timestamp >= {date_from}, timestamp < {date_to}, @@ -113,7 +112,7 @@ def _to_main_query_with_session_properties(self) -> ast.SelectQuery: {session_properties}, {where_breakdown} ) - GROUP BY events.`$session_id`, breakdown_value + GROUP BY session_id, breakdown_value ) GROUP BY "context.columns.breakdown_value" ORDER BY "context.columns.visitors" DESC, @@ -146,10 +145,9 @@ def to_entry_bounce_query(self) -> ast.SelectQuery: any(person_id) AS person_id, count() AS filtered_pageview_count, {bounce_breakdown} AS breakdown_value, - any(sessions.$is_bounce) AS is_bounce + any(session.$is_bounce) AS is_bounce, + session.session_id AS session_id FROM events - JOIN sessions - ON events.`$session_id` = sessions.session_id WHERE and( timestamp >= {date_from}, timestamp < {date_to}, @@ -158,7 +156,7 @@ def to_entry_bounce_query(self) -> ast.SelectQuery: {session_properties}, {where_breakdown} ) - GROUP BY events.`$session_id`, breakdown_value + GROUP BY session_id, breakdown_value ) GROUP BY "context.columns.breakdown_value" ORDER BY "context.columns.visitors" DESC, @@ -200,10 +198,9 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: SELECT any(person_id) AS person_id, count() AS filtered_pageview_count, - {breakdown_value} AS breakdown_value + {breakdown_value} AS breakdown_value, + session.session_id AS session_id FROM events - JOIN sessions - ON events.`$session_id` = sessions.session_id WHERE and( timestamp >= {date_from}, timestamp < {date_to}, @@ -212,7 +209,7 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: {session_properties}, breakdown_value IS NOT NULL ) - GROUP BY events.`$session_id`, breakdown_value + GROUP BY session_id, breakdown_value ) GROUP BY breakdown_value ) AS counts @@ -223,10 +220,9 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: FROM ( SELECT {bounce_breakdown_value} AS breakdown_value, -- use $entry_pathname to find the bounce rate for sessions that started on this pathname - any(session.`$is_bounce`) AS is_bounce + any(session.`$is_bounce`) AS is_bounce, + session.session_id AS session_id FROM events - JOIN sessions - ON events.`$session_id` = sessions.session_id WHERE and( timestamp >= {date_from}, timestamp < {date_to}, @@ -235,7 +231,7 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: {session_properties}, breakdown_value IS NOT NULL ) - GROUP BY events.`$session_id`, breakdown_value + GROUP BY session_id, breakdown_value ) GROUP BY breakdown_value ) AS bounce @@ -254,10 +250,9 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: ELSE 0 END ) AS scroll_gt80_percentage_state, - avgState(toFloat(events.properties.`$prev_pageview_max_scroll_percentage`)) as average_scroll_percentage_state + avgState(toFloat(events.properties.`$prev_pageview_max_scroll_percentage`)) as average_scroll_percentage_state, + session.session_id AS session_id FROM events - JOIN sessions - ON events.`$session_id` = sessions.session_id WHERE and( timestamp >= {date_from}, timestamp < {date_to}, @@ -266,7 +261,7 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: {session_properties}, breakdown_value IS NOT NULL ) - GROUP BY events.`$session_id`, breakdown_value + GROUP BY session_id, breakdown_value ) GROUP BY breakdown_value ) AS scroll @@ -310,10 +305,9 @@ def to_path_bounce_query(self) -> ast.SelectQuery: SELECT any(person_id) AS person_id, count() AS filtered_pageview_count, - {breakdown_value} AS breakdown_value + {breakdown_value} AS breakdown_value, + session.session_id AS session_id FROM events - JOIN sessions - ON events.`$session_id` = sessions.session_id WHERE and( timestamp >= {date_from}, timestamp < {date_to}, @@ -322,7 +316,7 @@ def to_path_bounce_query(self) -> ast.SelectQuery: {session_properties}, {where_breakdown} ) - GROUP BY events.`$session_id`, breakdown_value + GROUP BY session_id, breakdown_value ) GROUP BY breakdown_value ) as counts @@ -333,10 +327,9 @@ def to_path_bounce_query(self) -> ast.SelectQuery: FROM ( SELECT {bounce_breakdown_value} AS breakdown_value, -- use $entry_pathname to find the bounce rate for sessions that started on this pathname - any(session.`$is_bounce`) AS is_bounce + any(session.`$is_bounce`) AS is_bounce, + session.session_id AS session_id FROM events - JOIN sessions - ON events.`$session_id` = sessions.session_id WHERE and( timestamp >= {date_from}, timestamp < {date_to}, @@ -345,7 +338,7 @@ def to_path_bounce_query(self) -> ast.SelectQuery: {session_properties}, breakdown_value IS NOT NULL ) - GROUP BY events.`$session_id`, breakdown_value + GROUP BY session_id, breakdown_value ) GROUP BY breakdown_value ) as bounce @@ -409,7 +402,7 @@ def _session_properties(self) -> ast.Expr: properties = [ p for p in self.query.properties + self._test_account_filters if get_property_type(p) == "session" ] - return property_to_expr(properties, team=self.team, scope="session") + return property_to_expr(properties, team=self.team, scope="event") def _all_properties(self) -> ast.Expr: properties = self.query.properties + self._test_account_filters @@ -456,23 +449,23 @@ def _counts_breakdown_value(self): case WebStatsBreakdown.PAGE: return self._apply_path_cleaning(ast.Field(chain=["events", "properties", "$pathname"])) case WebStatsBreakdown.INITIAL_PAGE: - return self._apply_path_cleaning(ast.Field(chain=["sessions", "$entry_pathname"])) + return self._apply_path_cleaning(ast.Field(chain=["session", "$entry_pathname"])) case WebStatsBreakdown.EXIT_PAGE: - return self._apply_path_cleaning(ast.Field(chain=["sessions", "$exit_pathname"])) + return self._apply_path_cleaning(ast.Field(chain=["session", "$exit_pathname"])) case WebStatsBreakdown.INITIAL_REFERRING_DOMAIN: - return ast.Field(chain=["sessions", "$entry_referring_domain"]) + return ast.Field(chain=["session", "$entry_referring_domain"]) case WebStatsBreakdown.INITIAL_UTM_SOURCE: - return ast.Field(chain=["sessions", "$entry_utm_source"]) + return ast.Field(chain=["session", "$entry_utm_source"]) case WebStatsBreakdown.INITIAL_UTM_CAMPAIGN: - return ast.Field(chain=["sessions", "$entry_utm_campaign"]) + return ast.Field(chain=["session", "$entry_utm_campaign"]) case WebStatsBreakdown.INITIAL_UTM_MEDIUM: - return ast.Field(chain=["sessions", "$entry_utm_medium"]) + return ast.Field(chain=["session", "$entry_utm_medium"]) case WebStatsBreakdown.INITIAL_UTM_TERM: - return ast.Field(chain=["sessions", "$entry_utm_term"]) + return ast.Field(chain=["session", "$entry_utm_term"]) case WebStatsBreakdown.INITIAL_UTM_CONTENT: - return ast.Field(chain=["sessions", "$entry_utm_content"]) + return ast.Field(chain=["session", "$entry_utm_content"]) case WebStatsBreakdown.INITIAL_CHANNEL_TYPE: - return ast.Field(chain=["sessions", "$channel_type"]) + return ast.Field(chain=["session", "$channel_type"]) case WebStatsBreakdown.BROWSER: return ast.Field(chain=["properties", "$browser"]) case WebStatsBreakdown.OS: @@ -515,7 +508,7 @@ def _scroll_prev_pathname_breakdown(self): return self._apply_path_cleaning(ast.Field(chain=["events", "properties", "$prev_pageview_pathname"])) def _bounce_entry_pathname_breakdown(self): - return self._apply_path_cleaning(ast.Field(chain=["sessions", "$entry_pathname"])) + return self._apply_path_cleaning(ast.Field(chain=["session", "$entry_pathname"])) def _apply_path_cleaning(self, path_expr: ast.Expr) -> ast.Expr: if not self.query.doPathCleaning or not self.team.path_cleaning_filters: diff --git a/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py b/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py index a3318f6f8ada8f..6ae35565412171 100644 --- a/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py +++ b/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py @@ -1,10 +1,17 @@ -import uuid - from freezegun import freeze_time from parameterized import parameterized from posthog.hogql_queries.web_analytics.stats_table import WebStatsTableQueryRunner -from posthog.schema import DateRange, WebStatsTableQuery, WebStatsBreakdown, EventPropertyFilter, PropertyOperator +from posthog.models.utils import uuid7 +from posthog.schema import ( + DateRange, + WebStatsTableQuery, + WebStatsBreakdown, + EventPropertyFilter, + PropertyOperator, + SessionTableVersion, + HogQLQueryModifiers, +) from posthog.test.base import ( APIBaseTest, ClickhouseTestMixin, @@ -49,7 +56,7 @@ def _create_pageviews(self, distinct_id: str, list_path_time_scroll: list[tuple[ **({"email": "test@posthog.com"} if distinct_id == "test" else {}), }, ) - session_id = str(uuid.uuid4()) + session_id = str(uuid7(person_time)) prev_path_time_scroll = None for path_time_scroll in list_path_time_scroll: pathname, time, scroll = path_time_scroll @@ -97,7 +104,9 @@ def _run_web_stats_table_query( include_bounce_rate=False, include_scroll_depth=False, properties=None, + session_table_version: SessionTableVersion = SessionTableVersion.V1, ): + modifiers = HogQLQueryModifiers(sessionTableVersion=session_table_version) query = WebStatsTableQuery( dateRange=DateRange(date_from=date_from, date_to=date_to), properties=properties or [], @@ -108,25 +117,31 @@ def _run_web_stats_table_query( includeScrollDepth=include_scroll_depth, ) self.team.path_cleaning_filters = path_cleaning_filters or [] - runner = WebStatsTableQueryRunner(team=self.team, query=query) + runner = WebStatsTableQueryRunner(team=self.team, query=query, modifiers=modifiers) return runner.calculate() - def test_no_crash_when_no_data(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_no_crash_when_no_data(self, session_table_version: SessionTableVersion): results = self._run_web_stats_table_query( - "2023-12-08", - "2023-12-15", + "2023-12-08", "2023-12-15", session_table_version=session_table_version ).results self.assertEqual([], results) - def test_increase_in_users(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_increase_in_users(self, session_table_version: SessionTableVersion): + s1a = str(uuid7("2023-12-02")) + s1b = str(uuid7("2023-12-13")) + s2 = str(uuid7("2023-12-10")) self._create_events( [ - ("p1", [("2023-12-02", "s1a", "/"), ("2023-12-03", "s1a", "/login"), ("2023-12-13", "s1b", "/docs")]), - ("p2", [("2023-12-10", "s2", "/")]), + ("p1", [("2023-12-02", s1a, "/"), ("2023-12-03", s1a, "/login"), ("2023-12-13", s1b, "/docs")]), + ("p2", [("2023-12-10", s2, "/")]), ] ) - results = self._run_web_stats_table_query("2023-12-01", "2023-12-11").results + results = self._run_web_stats_table_query( + "2023-12-01", "2023-12-11", session_table_version=session_table_version + ).results self.assertEqual( [ @@ -136,15 +151,21 @@ def test_increase_in_users(self): results, ) - def test_all_time(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_all_time(self, session_table_version: SessionTableVersion): + s1a = str(uuid7("2023-12-02")) + s1b = str(uuid7("2023-12-13")) + s2 = str(uuid7("2023-12-10")) self._create_events( [ - ("p1", [("2023-12-02", "s1a", "/"), ("2023-12-03", "s1a", "/login"), ("2023-12-13", "s1b", "/docs")]), - ("p2", [("2023-12-10", "s2", "/")]), + ("p1", [("2023-12-02", s1a, "/"), ("2023-12-03", s1a, "/login"), ("2023-12-13", s1b, "/docs")]), + ("p2", [("2023-12-10", s2, "/")]), ] ) - results = self._run_web_stats_table_query("all", "2023-12-15").results + results = self._run_web_stats_table_query( + "all", "2023-12-15", session_table_version=session_table_version + ).results self.assertEqual( [ @@ -155,23 +176,31 @@ def test_all_time(self): results, ) - def test_filter_test_accounts(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_filter_test_accounts(self, session_table_version: SessionTableVersion): + s1 = str(uuid7("2023-12-02")) # Create 1 test account - self._create_events([("test", [("2023-12-02", "s1", "/"), ("2023-12-03", "s1", "/login")])]) + self._create_events([("test", [("2023-12-02", s1, "/"), ("2023-12-03", s1, "/login")])]) - results = self._run_web_stats_table_query("2023-12-01", "2023-12-03").results + results = self._run_web_stats_table_query( + "2023-12-01", "2023-12-03", session_table_version=session_table_version + ).results self.assertEqual( [], results, ) - def test_breakdown_channel_type_doesnt_throw(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_breakdown_channel_type_doesnt_throw(self, session_table_version: SessionTableVersion): + s1a = str(uuid7("2023-12-02")) + s1b = str(uuid7("2023-12-13")) + s2 = str(uuid7("2023-12-10")) # not really testing the functionality yet, which is tested elsewhere, just that it runs self._create_events( [ - ("p1", [("2023-12-02", "s1a", "/"), ("2023-12-03", "s1a", "/login"), ("2023-12-13", "s1b", "/docs")]), - ("p2", [("2023-12-10", "s2", "/")]), + ("p1", [("2023-12-02", s1a, "/"), ("2023-12-03", s1a, "/login"), ("2023-12-13", s1b, "/docs")]), + ("p2", [("2023-12-10", s2, "/")]), ] ) @@ -179,6 +208,7 @@ def test_breakdown_channel_type_doesnt_throw(self): "2023-12-01", "2023-12-03", breakdown_by=WebStatsBreakdown.INITIAL_CHANNEL_TYPE, + session_table_version=session_table_version, ).results self.assertEqual( @@ -186,18 +216,19 @@ def test_breakdown_channel_type_doesnt_throw(self): len(results), ) - def test_limit(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_limit(self, session_table_version: SessionTableVersion): + s1 = str(uuid7("2023-12-02")) + s2 = str(uuid7("2023-12-10")) self._create_events( [ - ("p1", [("2023-12-02", "s1", "/"), ("2023-12-03", "s1", "/login")]), - ("p2", [("2023-12-10", "s2", "/")]), + ("p1", [("2023-12-02", s1, "/"), ("2023-12-03", s1, "/login")]), + ("p2", [("2023-12-10", s2, "/")]), ] ) response_1 = self._run_web_stats_table_query( - "all", - "2023-12-15", - limit=1, + "all", "2023-12-15", limit=1, session_table_version=session_table_version ) self.assertEqual( [ @@ -217,15 +248,20 @@ def test_limit(self): ) self.assertEqual(False, response_2.hasMore) - @parameterized.expand([(True,), (False,)]) - def test_path_filters(self, use_sessions_table): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_path_filters(self, session_table_version: SessionTableVersion): + s1 = str(uuid7("2023-12-02")) + s2 = str(uuid7("2023-12-10")) + s3 = str(uuid7("2023-12-10")) + s4 = str(uuid7("2023-12-11")) + s5 = str(uuid7("2023-12-11")) self._create_events( [ - ("p1", [("2023-12-02", "s1", "/cleaned/123/path/456")]), - ("p2", [("2023-12-10", "s2", "/cleaned/123")]), - ("p3", [("2023-12-10", "s3", "/cleaned/456")]), - ("p4", [("2023-12-11", "s4", "/not-cleaned")]), - ("p5", [("2023-12-11", "s5", "/thing_a")]), + ("p1", [("2023-12-02", s1, "/cleaned/123/path/456")]), + ("p2", [("2023-12-10", s2, "/cleaned/123")]), + ("p3", [("2023-12-10", s3, "/cleaned/456")]), + ("p4", [("2023-12-11", s4, "/not-cleaned")]), + ("p5", [("2023-12-11", s5, "/thing_a")]), ] ) @@ -238,6 +274,7 @@ def test_path_filters(self, use_sessions_table): {"regex": "thing_a", "alias": "thing_b"}, {"regex": "thing_b", "alias": "thing_c"}, ], + session_table_version=session_table_version, ).results self.assertEqual( @@ -250,7 +287,8 @@ def test_path_filters(self, use_sessions_table): results, ) - def test_scroll_depth_bounce_rate_one_user(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_scroll_depth_bounce_rate_one_user(self, session_table_version: SessionTableVersion): self._create_pageviews( "p1", [ @@ -266,6 +304,7 @@ def test_scroll_depth_bounce_rate_one_user(self): breakdown_by=WebStatsBreakdown.PAGE, include_scroll_depth=True, include_bounce_rate=True, + session_table_version=session_table_version, ).results self.assertEqual( @@ -277,7 +316,8 @@ def test_scroll_depth_bounce_rate_one_user(self): results, ) - def test_scroll_depth_bounce_rate(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_scroll_depth_bounce_rate(self, session_table_version: SessionTableVersion): self._create_pageviews( "p1", [ @@ -308,6 +348,7 @@ def test_scroll_depth_bounce_rate(self): breakdown_by=WebStatsBreakdown.PAGE, include_scroll_depth=True, include_bounce_rate=True, + session_table_version=session_table_version, ).results self.assertEqual( @@ -319,7 +360,8 @@ def test_scroll_depth_bounce_rate(self): results, ) - def test_scroll_depth_bounce_rate_with_filter(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_scroll_depth_bounce_rate_with_filter(self, session_table_version: SessionTableVersion): self._create_pageviews( "p1", [ @@ -351,6 +393,7 @@ def test_scroll_depth_bounce_rate_with_filter(self): include_scroll_depth=True, include_bounce_rate=True, properties=[EventPropertyFilter(key="$pathname", operator=PropertyOperator.EXACT, value="/a")], + session_table_version=session_table_version, ).results self.assertEqual( @@ -360,7 +403,8 @@ def test_scroll_depth_bounce_rate_with_filter(self): results, ) - def test_scroll_depth_bounce_rate_path_cleaning(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_scroll_depth_bounce_rate_path_cleaning(self, session_table_version: SessionTableVersion): self._create_pageviews( "p1", [ @@ -381,6 +425,7 @@ def test_scroll_depth_bounce_rate_path_cleaning(self): {"regex": "\\/b\\/\\d+", "alias": "/b/:id"}, {"regex": "\\/c\\/\\d+", "alias": "/c/:id"}, ], + session_table_version=session_table_version, ).results self.assertEqual( @@ -392,7 +437,8 @@ def test_scroll_depth_bounce_rate_path_cleaning(self): results, ) - def test_bounce_rate_one_user(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_bounce_rate_one_user(self, session_table_version: SessionTableVersion): self._create_pageviews( "p1", [ @@ -407,6 +453,7 @@ def test_bounce_rate_one_user(self): "2023-12-15", breakdown_by=WebStatsBreakdown.PAGE, include_bounce_rate=True, + session_table_version=session_table_version, ).results self.assertEqual( @@ -418,7 +465,8 @@ def test_bounce_rate_one_user(self): results, ) - def test_bounce_rate(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_bounce_rate(self, session_table_version: SessionTableVersion): self._create_pageviews( "p1", [ @@ -448,6 +496,7 @@ def test_bounce_rate(self): "2023-12-15", breakdown_by=WebStatsBreakdown.PAGE, include_bounce_rate=True, + session_table_version=session_table_version, ).results self.assertEqual( @@ -459,7 +508,8 @@ def test_bounce_rate(self): results, ) - def test_bounce_rate_with_property(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_bounce_rate_with_property(self, session_table_version: SessionTableVersion): self._create_pageviews( "p1", [ @@ -490,6 +540,7 @@ def test_bounce_rate_with_property(self): breakdown_by=WebStatsBreakdown.PAGE, include_bounce_rate=True, properties=[EventPropertyFilter(key="$pathname", operator=PropertyOperator.EXACT, value="/a")], + session_table_version=session_table_version, ).results self.assertEqual( @@ -499,7 +550,8 @@ def test_bounce_rate_with_property(self): results, ) - def test_bounce_rate_path_cleaning(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_bounce_rate_path_cleaning(self, session_table_version: SessionTableVersion): self._create_pageviews( "p1", [ @@ -519,6 +571,7 @@ def test_bounce_rate_path_cleaning(self): {"regex": "\\/b\\/\\d+", "alias": "/b/:id"}, {"regex": "\\/c\\/\\d+", "alias": "/c/:id"}, ], + session_table_version=session_table_version, ).results self.assertEqual( @@ -530,7 +583,8 @@ def test_bounce_rate_path_cleaning(self): results, ) - def test_entry_bounce_rate_one_user(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_entry_bounce_rate_one_user(self, session_table_version: SessionTableVersion): self._create_pageviews( "p1", [ @@ -545,6 +599,7 @@ def test_entry_bounce_rate_one_user(self): "2023-12-15", breakdown_by=WebStatsBreakdown.INITIAL_PAGE, include_bounce_rate=True, + session_table_version=session_table_version, ).results self.assertEqual( @@ -554,7 +609,8 @@ def test_entry_bounce_rate_one_user(self): results, ) - def test_entry_bounce_rate(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_entry_bounce_rate(self, session_table_version: SessionTableVersion): self._create_pageviews( "p1", [ @@ -584,6 +640,7 @@ def test_entry_bounce_rate(self): "2023-12-15", breakdown_by=WebStatsBreakdown.INITIAL_PAGE, include_bounce_rate=True, + session_table_version=session_table_version, ).results self.assertEqual( @@ -593,7 +650,8 @@ def test_entry_bounce_rate(self): results, ) - def test_entry_bounce_rate_with_property(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_entry_bounce_rate_with_property(self, session_table_version: SessionTableVersion): self._create_pageviews( "p1", [ @@ -624,6 +682,7 @@ def test_entry_bounce_rate_with_property(self): breakdown_by=WebStatsBreakdown.INITIAL_PAGE, include_bounce_rate=True, properties=[EventPropertyFilter(key="$pathname", operator=PropertyOperator.EXACT, value="/a")], + session_table_version=session_table_version, ).results self.assertEqual( @@ -633,7 +692,8 @@ def test_entry_bounce_rate_with_property(self): results, ) - def test_entry_bounce_rate_path_cleaning(self): + @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) + def test_entry_bounce_rate_path_cleaning(self, session_table_version: SessionTableVersion): self._create_pageviews( "p1", [ @@ -653,6 +713,7 @@ def test_entry_bounce_rate_path_cleaning(self): {"regex": "\\/b\\/\\d+", "alias": "/b/:id"}, {"regex": "\\/c\\/\\d+", "alias": "/c/:id"}, ], + session_table_version=session_table_version, ).results self.assertEqual( diff --git a/posthog/hogql_queries/web_analytics/web_overview.py b/posthog/hogql_queries/web_analytics/web_overview.py index fa4fb5b60b29d9..01508965d191ce 100644 --- a/posthog/hogql_queries/web_analytics/web_overview.py +++ b/posthog/hogql_queries/web_analytics/web_overview.py @@ -42,24 +42,21 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: FROM ( SELECT any(events.person_id) as person_id, - events.`$session_id` as session_id, - min(sessions.$start_timestamp) as start_timestamp, - any(sessions.$session_duration) as session_duration, + session.session_id as session_id, + min(session.$start_timestamp) as start_timestamp, + any(session.$session_duration) as session_duration, count() as filtered_pageview_count, - any(sessions.$is_bounce) as is_bounce - + any(session.$is_bounce) as is_bounce FROM events - JOIN sessions - ON events.`$session_id` = sessions.session_id WHERE and( - `$session_id` IS NOT NULL, + events.`$session_id` IS NOT NULL, event = '$pageview', timestamp >= {start}, timestamp < {end}, {event_properties}, {session_properties} ) - GROUP BY `$session_id` + GROUP BY session_id HAVING and( start_timestamp >= {start}, start_timestamp < {end} @@ -92,26 +89,24 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: FROM ( SELECT any(events.person_id) as person_id, - events.`$session_id` as session_id, - min(sessions.$start_timestamp) as $start_timestamp, - any(sessions.$session_duration) as session_duration, + session.session_id as session_id, + min(session.$start_timestamp) as start_timestamp, + any(session.$session_duration) as session_duration, count() as filtered_pageview_count, - any(sessions.$is_bounce) as is_bounce + any(session.$is_bounce) as is_bounce FROM events - JOIN sessions - ON events.`$session_id` = sessions.session_id WHERE and( - `$session_id` IS NOT NULL, + events.`$session_id` IS NOT NULL, event = '$pageview', timestamp >= {mid}, timestamp < {end}, {event_properties}, {session_properties} ) - GROUP BY `$session_id` + GROUP BY session_id HAVING and( - $start_timestamp >= {mid}, - $start_timestamp < {end} + start_timestamp >= {mid}, + start_timestamp < {end} ) ) """, @@ -173,7 +168,7 @@ def session_properties(self) -> ast.Expr: properties = [ p for p in self.query.properties + self._test_account_filters if get_property_type(p) == "session" ] - return property_to_expr(properties, team=self.team, scope="session") + return property_to_expr(properties, team=self.team, scope="event") def to_data(