diff --git a/posthog/clickhouse/migrations/0083_recreate_sessions_v1_after_limiting_teams.py b/posthog/clickhouse/migrations/0083_recreate_sessions_v1_after_limiting_teams.py new file mode 100644 index 0000000000000..d0da2eb158b6d --- /dev/null +++ b/posthog/clickhouse/migrations/0083_recreate_sessions_v1_after_limiting_teams.py @@ -0,0 +1,8 @@ +from posthog.clickhouse.client.migration_tools import run_sql_with_exceptions +from posthog.models.sessions.sql import DROP_SESSION_MATERIALIZED_VIEW_SQL, SESSIONS_TABLE_MV_SQL + +operations = [ + # drop the mv, and recreate it with the new part of the WHERE clause + run_sql_with_exceptions(DROP_SESSION_MATERIALIZED_VIEW_SQL()), + run_sql_with_exceptions(SESSIONS_TABLE_MV_SQL()), +] diff --git a/posthog/clickhouse/test/__snapshots__/test_schema.ambr b/posthog/clickhouse/test/__snapshots__/test_schema.ambr index ac13d60ef2d66..1be2abee933c7 100644 --- a/posthog/clickhouse/test/__snapshots__/test_schema.ambr +++ b/posthog/clickhouse/test/__snapshots__/test_schema.ambr @@ -2086,7 +2086,7 @@ sumIf(1, event='$autocapture') as autocapture_count FROM posthog_test.sharded_events - WHERE `$session_id` IS NOT NULL AND `$session_id` != '' + WHERE `$session_id` IS NOT NULL AND `$session_id` != '' AND team_id IN (1, 2, 13610, 19279, 21173, 29929, 32050, 9910, 11775, 21129, 31490) GROUP BY `$session_id`, team_id diff --git a/posthog/clickhouse/test/test_sessions_model.py b/posthog/clickhouse/test/test_sessions_model.py index 0e3631e0ab3cd..0042456a03d95 100644 --- a/posthog/clickhouse/test/test_sessions_model.py +++ b/posthog/clickhouse/test/test_sessions_model.py @@ -1,8 +1,10 @@ from posthog.clickhouse.client import sync_execute, query_with_columns +from posthog.models import Team from posthog.test.base import ( _create_event, ClickhouseTestMixin, BaseTest, + ClickhouseDestroyTablesMixin, ) distinct_id_counter = 0 @@ -21,7 +23,12 @@ def create_session_id(): return f"s{session_id_counter}" -class TestSessionsModel(ClickhouseTestMixin, BaseTest): +# only certain team ids can insert events into this legacy sessions table, see sessions/sql.py for more info +TEAM_ID = 2 +TEAM = Team(id=TEAM_ID) + + +class TestSessionsModel(ClickhouseDestroyTablesMixin, ClickhouseTestMixin, BaseTest): def select_by_session_id(self, session_id): return query_with_columns( """ @@ -34,7 +41,7 @@ def select_by_session_id(self, session_id): """, { "session_id": session_id, - "team_id": self.team.id, + "team_id": TEAM_ID, }, ) @@ -42,7 +49,7 @@ def test_it_creates_session_when_creating_event(self): distinct_id = create_distinct_id() session_id = create_session_id() _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id, properties={"$current_url": "/", "$session_id": session_id}, @@ -60,7 +67,7 @@ def test_it_creates_session_when_creating_event(self): """, { "distinct_id": distinct_id, - "team_id": self.team.id, + "team_id": TEAM_ID, }, ) @@ -72,14 +79,14 @@ def test_handles_different_distinct_id_across_same_session(self): session_id = create_session_id() _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id1, properties={"$session_id": session_id}, timestamp="2024-03-08", ) _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id2, properties={"$session_id": session_id}, @@ -96,28 +103,28 @@ def test_handles_entry_and_exit_urls(self): session_id = create_session_id() _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id, properties={"$current_url": "/entry", "$session_id": session_id}, timestamp="2024-03-08:01", ) _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id, properties={"$current_url": "/middle", "$session_id": session_id}, timestamp="2024-03-08:02", ) _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id, properties={"$current_url": "/middle", "$session_id": session_id}, timestamp="2024-03-08:03", ) _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id, properties={"$current_url": "/exit", "$session_id": session_id}, @@ -136,14 +143,14 @@ def test_handles_initial_utm_properties(self): session_id = create_session_id() _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id, properties={"$session_id": session_id, "utm_source": "source"}, timestamp="2024-03-08", ) _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id, properties={"$session_id": session_id, "utm_source": "other_source"}, @@ -159,35 +166,35 @@ def test_counts_pageviews_autocaptures_and_events(self): session_id = create_session_id() _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id, properties={"$session_id": session_id}, timestamp="2024-03-08", ) _create_event( - team=self.team, + team=TEAM, event="$autocapture", distinct_id=distinct_id, properties={"$session_id": session_id}, timestamp="2024-03-08", ) _create_event( - team=self.team, + team=TEAM, event="$autocapture", distinct_id=distinct_id, properties={"$session_id": session_id}, timestamp="2024-03-08", ) _create_event( - team=self.team, + team=TEAM, event="other event", distinct_id=distinct_id, properties={"$session_id": session_id}, timestamp="2024-03-08", ) _create_event( - team=self.team, + team=TEAM, event="$pageleave", distinct_id=distinct_id, properties={"$session_id": session_id}, @@ -209,14 +216,14 @@ def test_separates_sessions_across_same_user(self): session_id3 = create_session_id() _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id, properties={"$session_id": session_id1}, timestamp="2024-03-08", ) _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id, properties={"$session_id": session_id2}, @@ -235,7 +242,7 @@ def test_select_from_sessions(self): distinct_id = create_distinct_id() session_id = create_session_id() _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id, properties={"$session_id": session_id}, @@ -260,7 +267,7 @@ def test_select_from_sessions(self): """, { "session_id": session_id, - "team_id": self.team.id, + "team_id": TEAM_ID, }, ) self.assertEqual(len(responses), 1) @@ -270,7 +277,7 @@ def test_select_from_sessions_mv(self): distinct_id = create_distinct_id() session_id = create_session_id() _create_event( - team=self.team, + team=TEAM, event="$pageview", distinct_id=distinct_id, properties={"$session_id": session_id}, @@ -295,7 +302,7 @@ def test_select_from_sessions_mv(self): """, { "session_id": session_id, - "team_id": self.team.id, + "team_id": TEAM_ID, }, ) self.assertEqual(len(responses), 1) diff --git a/posthog/hogql/database/schema/test/test_sessions_v1.py b/posthog/hogql/database/schema/test/test_sessions_v1.py index eefd04197deab..77f41fd2f6bbc 100644 --- a/posthog/hogql/database/schema/test/test_sessions_v1.py +++ b/posthog/hogql/database/schema/test/test_sessions_v1.py @@ -8,6 +8,7 @@ ) from posthog.hogql.parser import parse_select from posthog.hogql.query import execute_hogql_query +from posthog.models import Team from posthog.models.property_definition import PropertyType from posthog.models.utils import uuid7 from posthog.schema import HogQLQueryModifiers, BounceRatePageViewMode, SessionTableVersion @@ -15,16 +16,20 @@ APIBaseTest, ClickhouseTestMixin, _create_event, - _create_person, + ClickhouseDestroyTablesMixin, ) +# only certain team ids can insert events into this legacy sessions table, see sessions/sql.py for more info +TEAM_ID = 2 +TEAM = Team(id=TEAM_ID, pk=TEAM_ID) -class TestSessionsV1(ClickhouseTestMixin, APIBaseTest): + +class TestSessionsV1(ClickhouseDestroyTablesMixin, ClickhouseTestMixin, APIBaseTest): def __execute(self, query): modifiers = HogQLQueryModifiers(sessionTableVersion=SessionTableVersion.V1) return execute_hogql_query( query=query, - team=self.team, + team=TEAM, modifiers=modifiers, ) @@ -33,7 +38,7 @@ def test_select_star(self): _create_event( event="$pageview", - team=self.team, + team=TEAM, distinct_id="d1", properties={"$current_url": "https://example.com", "$session_id": session_id}, ) @@ -56,7 +61,7 @@ def test_select_event_sessions_star(self): _create_event( event="$pageview", - team=self.team, + team=TEAM, distinct_id="d1", properties={"$current_url": "https://example.com", "$session_id": session_id}, ) @@ -93,7 +98,7 @@ def test_channel_type(self): _create_event( event="$pageview", - team=self.team, + team=TEAM, distinct_id="d1", properties={"gad_source": "1", "$session_id": session_id}, ) @@ -116,7 +121,7 @@ def test_event_dot_session_dot_channel_type(self): _create_event( event="$pageview", - team=self.team, + team=TEAM, distinct_id="d1", properties={"gad_source": "1", "$session_id": session_id}, ) @@ -139,7 +144,7 @@ def test_events_session_dot_channel_type(self): _create_event( event="$pageview", - team=self.team, + team=TEAM, distinct_id="d1", properties={"gad_source": "1", "$session_id": session_id}, ) @@ -157,57 +162,26 @@ def test_events_session_dot_channel_type(self): "Paid Search", ) - def test_persons_and_sessions_on_events(self): - p1 = _create_person(distinct_ids=["d1"], team=self.team) - p2 = _create_person(distinct_ids=["d2"], team=self.team) - - s1 = "session_test_persons_and_sessions_on_events_1" - s2 = "session_test_persons_and_sessions_on_events_2" - - _create_event( - event="$pageview", - team=self.team, - distinct_id="d1", - properties={"$session_id": s1, "utm_source": "source1"}, - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="d2", - properties={"$session_id": s2, "utm_source": "source2"}, - ) - - response = self.__execute( - parse_select( - "select events.person_id, session.$entry_utm_source from events where $session_id = {session_id} or $session_id = {session_id2} order by 2 asc", - placeholders={"session_id": ast.Constant(value=s1), "session_id2": ast.Constant(value=s2)}, - ), - ) - - [row1, row2] = response.results or [] - self.assertEqual(row1, (p1.uuid, "source1")) - self.assertEqual(row2, (p2.uuid, "source2")) - @parameterized.expand([(BounceRatePageViewMode.UNIQ_URLS,), (BounceRatePageViewMode.COUNT_PAGEVIEWS,)]) def test_bounce_rate(self, bounceRatePageViewMode): # person with 2 different sessions _create_event( event="$pageview", - team=self.team, + team=TEAM, distinct_id="d1", properties={"$session_id": "s1a", "$current_url": "https://example.com/1"}, timestamp="2023-12-02", ) _create_event( event="$pageview", - team=self.team, + team=TEAM, distinct_id="d1", properties={"$session_id": "s1a", "$current_url": "https://example.com/2"}, timestamp="2023-12-03", ) _create_event( event="$pageview", - team=self.team, + team=TEAM, distinct_id="d1", properties={"$session_id": "s1b", "$current_url": "https://example.com/3"}, timestamp="2023-12-12", @@ -215,7 +189,7 @@ def test_bounce_rate(self, bounceRatePageViewMode): # session with 1 pageview _create_event( event="$pageview", - team=self.team, + team=TEAM, distinct_id="d2", properties={"$session_id": "s2", "$current_url": "https://example.com/4"}, timestamp="2023-12-11", @@ -223,14 +197,14 @@ def test_bounce_rate(self, bounceRatePageViewMode): # session with 1 pageview and 1 autocapture _create_event( event="$pageview", - team=self.team, + team=TEAM, distinct_id="d3", properties={"$session_id": "s3", "$current_url": "https://example.com/5"}, timestamp="2023-12-11", ) _create_event( event="$autocapture", - team=self.team, + team=TEAM, distinct_id="d3", properties={"$session_id": "s3", "$current_url": "https://example.com/5"}, timestamp="2023-12-11", @@ -238,14 +212,14 @@ def test_bounce_rate(self, bounceRatePageViewMode): # short session with a pageleave _create_event( event="$pageview", - team=self.team, + team=TEAM, distinct_id="d4", properties={"$session_id": "s4", "$current_url": "https://example.com/6"}, timestamp="2023-12-11T12:00:00", ) _create_event( event="$pageleave", - team=self.team, + team=TEAM, distinct_id="d4", properties={"$session_id": "s4", "$current_url": "https://example.com/6"}, timestamp="2023-12-11T12:00:01", @@ -253,14 +227,14 @@ def test_bounce_rate(self, bounceRatePageViewMode): # long session with a pageleave _create_event( event="$pageview", - team=self.team, + team=TEAM, distinct_id="d5", properties={"$session_id": "s5", "$current_url": "https://example.com/7"}, timestamp="2023-12-11T12:00:00", ) _create_event( event="$pageleave", - team=self.team, + team=TEAM, distinct_id="d5", properties={"$session_id": "s5", "$current_url": "https://example.com/7"}, timestamp="2023-12-11T12:00:11", @@ -269,7 +243,7 @@ def test_bounce_rate(self, bounceRatePageViewMode): parse_select( "select $is_bounce, session_id from sessions ORDER BY session_id", ), - self.team, + TEAM, modifiers=HogQLQueryModifiers( bounceRatePageViewMode=bounceRatePageViewMode, sessionTableVersion=SessionTableVersion.V1 ), @@ -291,7 +265,7 @@ def test_can_use_v1_and_v2_fields(self): _create_event( event="$pageview", - team=self.team, + team=TEAM, distinct_id="d1", properties={ "$current_url": "https://example.com/pathname", @@ -372,4 +346,4 @@ def test_entry_utm(self): def test_can_get_values_for_all(self): results = get_lazy_session_table_properties_v1(None) for prop in results: - get_lazy_session_table_values_v1(key=prop["id"], team=self.team, search_term=None) + get_lazy_session_table_values_v1(key=prop["id"], team=TEAM, search_term=None) diff --git a/posthog/hogql_queries/web_analytics/test/test_session_attribution_explorer_query_runner.py b/posthog/hogql_queries/web_analytics/test/test_session_attribution_explorer_query_runner.py index d285ff2aa25f3..06ba1ddfeabbe 100644 --- a/posthog/hogql_queries/web_analytics/test/test_session_attribution_explorer_query_runner.py +++ b/posthog/hogql_queries/web_analytics/test/test_session_attribution_explorer_query_runner.py @@ -1,6 +1,5 @@ from typing import Optional -from parameterized import parameterized from posthog.hogql.constants import LimitContext from posthog.hogql_queries.web_analytics.session_attribution_explorer_query_runner import ( @@ -80,7 +79,7 @@ def _run_session_attribution_query( self, date_from: Optional[str] = None, date_to: Optional[str] = None, - session_table_version: SessionTableVersion = SessionTableVersion.V1, + session_table_version: SessionTableVersion = SessionTableVersion.V2, group_by: Optional[list[SessionAttributionGroupBy]] = None, limit_context: Optional[LimitContext] = None, properties: Optional[list[SessionPropertyFilter]] = None, @@ -94,20 +93,14 @@ def _run_session_attribution_query( runner = SessionAttributionExplorerQueryRunner(team=self.team, query=query, limit_context=limit_context) return runner.calculate() - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_no_crash_when_no_data(self, session_table_version: SessionTableVersion): - results = self._run_session_attribution_query( - session_table_version=session_table_version, - ).results + def test_no_crash_when_no_data(self): + results = self._run_session_attribution_query().results assert results == [(0, [], [], [], [], [], [], [])] - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_group_by_nothing(self, session_table_version: SessionTableVersion): + def test_group_by_nothing(self): self._create_data() - results = self._run_session_attribution_query( - session_table_version=session_table_version, - ).results + results = self._run_session_attribution_query().results assert results == [ ( @@ -122,12 +115,10 @@ def test_group_by_nothing(self, session_table_version: SessionTableVersion): ) ] - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_group_by_initial_url(self, session_table_version: SessionTableVersion): + def test_group_by_initial_url(self): self._create_data() results = self._run_session_attribution_query( - session_table_version=session_table_version, group_by=[SessionAttributionGroupBy.INITIAL_URL], ).results @@ -164,12 +155,10 @@ def test_group_by_initial_url(self, session_table_version: SessionTableVersion): ), ] - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_group_channel_medium_source(self, session_table_version: SessionTableVersion): + def test_group_channel_medium_source(self): self._create_data() results = self._run_session_attribution_query( - session_table_version=session_table_version, group_by=[ SessionAttributionGroupBy.CHANNEL_TYPE, SessionAttributionGroupBy.MEDIUM, @@ -191,12 +180,10 @@ def test_group_channel_medium_source(self, session_table_version: SessionTableVe (1, "Referral", ["referring_domain2"], "source2", "medium2", ["campaign2"], [], ["http://example.com/2"]), ] - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_filters(self, session_table_version: SessionTableVersion): + def test_filters(self): self._create_data() results = self._run_session_attribution_query( - session_table_version=session_table_version, group_by=[ SessionAttributionGroupBy.CHANNEL_TYPE, SessionAttributionGroupBy.MEDIUM, diff --git a/posthog/hogql_queries/web_analytics/test/test_web_overview.py b/posthog/hogql_queries/web_analytics/test/test_web_overview.py index bc41d4d0a6785..3e9b570f57b9e 100644 --- a/posthog/hogql_queries/web_analytics/test/test_web_overview.py +++ b/posthog/hogql_queries/web_analytics/test/test_web_overview.py @@ -2,7 +2,6 @@ from unittest.mock import MagicMock, patch from freezegun import freeze_time -from parameterized import parameterized from posthog.clickhouse.client.execute import sync_execute from posthog.hogql.constants import LimitContext @@ -72,7 +71,7 @@ def _run_web_overview_query( self, date_from: str, date_to: str, - session_table_version: SessionTableVersion = SessionTableVersion.V1, + session_table_version: SessionTableVersion = SessionTableVersion.V2, compare: bool = True, limit_context: Optional[LimitContext] = None, filter_test_accounts: Optional[bool] = False, @@ -97,19 +96,16 @@ def _run_web_overview_query( runner = WebOverviewQueryRunner(team=self.team, query=query, limit_context=limit_context) return runner.calculate() - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_no_crash_when_no_data(self, session_table_version: SessionTableVersion): + def test_no_crash_when_no_data(self): results = self._run_web_overview_query( "2023-12-08", "2023-12-15", - session_table_version=session_table_version, ).results assert [item.key for item in results] == ["visitors", "views", "sessions", "session duration", "bounce rate"] results = self._run_web_overview_query( "2023-12-08", "2023-12-15", - session_table_version=session_table_version, includeLCPScore=True, ).results assert [item.key for item in results] == [ @@ -132,9 +128,7 @@ def test_no_crash_when_no_data(self, session_table_version: SessionTableVersion) } ], ) - results = self._run_web_overview_query( - "2023-12-08", "2023-12-15", session_table_version=session_table_version, action=action - ).results + results = self._run_web_overview_query("2023-12-08", "2023-12-15", action=action).results assert [item.key for item in results] == [ "visitors", @@ -143,8 +137,7 @@ def test_no_crash_when_no_data(self, session_table_version: SessionTableVersion) "conversion rate", ] - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_increase_in_users(self, session_table_version: SessionTableVersion): + def test_increase_in_users(self): s1a = str(uuid7("2023-12-02")) s1b = str(uuid7("2023-12-12")) s2 = str(uuid7("2023-12-11")) @@ -159,7 +152,6 @@ def test_increase_in_users(self, session_table_version: SessionTableVersion): results = self._run_web_overview_query( "2023-12-08", "2023-12-15", - session_table_version=session_table_version, ).results visitors = results[0] @@ -192,8 +184,7 @@ def test_increase_in_users(self, session_table_version: SessionTableVersion): self.assertEqual(0, bounce.previous) self.assertEqual(None, bounce.changeFromPreviousPct) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_all_time(self, session_table_version: SessionTableVersion): + def test_all_time(self): s1a = str(uuid7("2023-12-02")) s1b = str(uuid7("2023-12-12")) s2 = str(uuid7("2023-12-11")) @@ -208,7 +199,6 @@ def test_all_time(self, session_table_version: SessionTableVersion): "all", "2023-12-15", compare=False, - session_table_version=session_table_version, ).results visitors = results[0] @@ -241,15 +231,12 @@ def test_all_time(self, session_table_version: SessionTableVersion): self.assertEqual(None, bounce.previous) self.assertEqual(None, bounce.changeFromPreviousPct) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_filter_test_accounts(self, session_table_version: SessionTableVersion): + def test_filter_test_accounts(self): s1 = str(uuid7("2023-12-02")) # Create 1 test account self._create_events([("test", [("2023-12-02", s1), ("2023-12-03", s1)])]) - results = self._run_web_overview_query( - "2023-12-01", "2023-12-03", session_table_version=session_table_version, filter_test_accounts=True - ).results + results = self._run_web_overview_query("2023-12-01", "2023-12-03", filter_test_accounts=True).results visitors = results[0] self.assertEqual(0, visitors.value) @@ -267,21 +254,17 @@ def test_filter_test_accounts(self, session_table_version: SessionTableVersion): self.assertEqual("bounce rate", bounce.key) self.assertEqual(None, bounce.value) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_dont_filter_test_accounts(self, session_table_version: SessionTableVersion): + def test_dont_filter_test_accounts(self): s1 = str(uuid7("2023-12-02")) # Create 1 test account self._create_events([("test", [("2023-12-02", s1), ("2023-12-03", s1)])]) - results = self._run_web_overview_query( - "2023-12-01", "2023-12-03", session_table_version=session_table_version, filter_test_accounts=False - ).results + results = self._run_web_overview_query("2023-12-01", "2023-12-03", filter_test_accounts=False).results visitors = results[0] self.assertEqual(1, visitors.value) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_correctly_counts_pageviews_in_long_running_session(self, session_table_version: SessionTableVersion): + def test_correctly_counts_pageviews_in_long_running_session(self): # this test is important when using the v1 sessions table as the raw sessions table will have 3 entries, one per day s1 = str(uuid7("2023-12-01")) self._create_events( @@ -293,7 +276,6 @@ def test_correctly_counts_pageviews_in_long_running_session(self, session_table_ results = self._run_web_overview_query( "2023-12-01", "2023-12-03", - session_table_version=session_table_version, ).results visitors = results[0] diff --git a/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py b/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py index 10ce9ab1ebcb4..865424f5bc862 100644 --- a/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py +++ b/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py @@ -1,7 +1,6 @@ from typing import Optional from freezegun import freeze_time -from parameterized import parameterized from posthog.hogql_queries.web_analytics.stats_table import WebStatsTableQueryRunner from posthog.models import Cohort @@ -108,7 +107,7 @@ def _run_web_stats_table_query( include_bounce_rate=False, include_scroll_depth=False, properties=None, - session_table_version: SessionTableVersion = SessionTableVersion.V1, + session_table_version: SessionTableVersion = SessionTableVersion.V2, filter_test_accounts: Optional[bool] = False, ): modifiers = HogQLQueryModifiers(sessionTableVersion=session_table_version) @@ -126,15 +125,14 @@ def _run_web_stats_table_query( runner = WebStatsTableQueryRunner(team=self.team, query=query, modifiers=modifiers) return runner.calculate() - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_no_crash_when_no_data(self, session_table_version: SessionTableVersion): + def test_no_crash_when_no_data(self): results = self._run_web_stats_table_query( - "2023-12-08", "2023-12-15", session_table_version=session_table_version + "2023-12-08", + "2023-12-15", ).results self.assertEqual([], results) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_increase_in_users(self, session_table_version: SessionTableVersion): + def test_increase_in_users(self): s1a = str(uuid7("2023-12-02")) s1b = str(uuid7("2023-12-13")) s2 = str(uuid7("2023-12-10")) @@ -145,9 +143,7 @@ def test_increase_in_users(self, session_table_version: SessionTableVersion): ] ) - results = self._run_web_stats_table_query( - "2023-12-01", "2023-12-11", session_table_version=session_table_version - ).results + results = self._run_web_stats_table_query("2023-12-01", "2023-12-11").results self.assertEqual( [ @@ -157,8 +153,7 @@ def test_increase_in_users(self, session_table_version: SessionTableVersion): results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_all_time(self, session_table_version: SessionTableVersion): + def test_all_time(self): s1a = str(uuid7("2023-12-02")) s1b = str(uuid7("2023-12-13")) s2 = str(uuid7("2023-12-10")) @@ -169,9 +164,7 @@ def test_all_time(self, session_table_version: SessionTableVersion): ] ) - results = self._run_web_stats_table_query( - "all", "2023-12-15", session_table_version=session_table_version - ).results + results = self._run_web_stats_table_query("all", "2023-12-15").results self.assertEqual( [ @@ -182,38 +175,31 @@ def test_all_time(self, session_table_version: SessionTableVersion): results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_filter_test_accounts(self, session_table_version: SessionTableVersion): + def test_filter_test_accounts(self): s1 = str(uuid7("2023-12-02")) # Create 1 test account self._create_events([("test", [("2023-12-02", s1, "/"), ("2023-12-03", s1, "/login")])]) - results = self._run_web_stats_table_query( - "2023-12-01", "2023-12-03", session_table_version=session_table_version, filter_test_accounts=True - ).results + results = self._run_web_stats_table_query("2023-12-01", "2023-12-03", filter_test_accounts=True).results self.assertEqual( [], results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_dont_filter_test_accounts(self, session_table_version: SessionTableVersion): + def test_dont_filter_test_accounts(self): s1 = str(uuid7("2023-12-02")) # Create 1 test account self._create_events([("test", [("2023-12-02", s1, "/"), ("2023-12-03", s1, "/login")])]) - results = self._run_web_stats_table_query( - "2023-12-01", "2023-12-03", session_table_version=session_table_version, filter_test_accounts=False - ).results + results = self._run_web_stats_table_query("2023-12-01", "2023-12-03", filter_test_accounts=False).results self.assertEqual( [["/", 1, 1], ["/login", 1, 1]], results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_breakdown_channel_type_doesnt_throw(self, session_table_version: SessionTableVersion): + def test_breakdown_channel_type_doesnt_throw(self): s1a = str(uuid7("2023-12-02")) s1b = str(uuid7("2023-12-13")) s2 = str(uuid7("2023-12-10")) @@ -229,7 +215,6 @@ def test_breakdown_channel_type_doesnt_throw(self, session_table_version: Sessio "2023-12-01", "2023-12-03", breakdown_by=WebStatsBreakdown.INITIAL_CHANNEL_TYPE, - session_table_version=session_table_version, ).results self.assertEqual( @@ -237,8 +222,7 @@ def test_breakdown_channel_type_doesnt_throw(self, session_table_version: Sessio len(results), ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_limit(self, session_table_version: SessionTableVersion): + def test_limit(self): s1 = str(uuid7("2023-12-02")) s2 = str(uuid7("2023-12-10")) self._create_events( @@ -248,9 +232,7 @@ def test_limit(self, session_table_version: SessionTableVersion): ] ) - response_1 = self._run_web_stats_table_query( - "all", "2023-12-15", limit=1, session_table_version=session_table_version - ) + response_1 = self._run_web_stats_table_query("all", "2023-12-15", limit=1) self.assertEqual( [ ["/", 2, 2], @@ -269,8 +251,7 @@ def test_limit(self, session_table_version: SessionTableVersion): ) self.assertEqual(False, response_2.hasMore) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_path_filters(self, session_table_version: SessionTableVersion): + def test_path_filters(self): s1 = str(uuid7("2023-12-02")) s2 = str(uuid7("2023-12-10")) s3 = str(uuid7("2023-12-10")) @@ -295,7 +276,6 @@ def test_path_filters(self, session_table_version: SessionTableVersion): {"regex": "thing_a", "alias": "thing_b"}, {"regex": "thing_b", "alias": "thing_c"}, ], - session_table_version=session_table_version, ).results self.assertEqual( @@ -308,8 +288,7 @@ def test_path_filters(self, session_table_version: SessionTableVersion): results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_scroll_depth_bounce_rate_one_user(self, session_table_version: SessionTableVersion): + def test_scroll_depth_bounce_rate_one_user(self): self._create_pageviews( "p1", [ @@ -325,7 +304,6 @@ def test_scroll_depth_bounce_rate_one_user(self, session_table_version: SessionT breakdown_by=WebStatsBreakdown.PAGE, include_scroll_depth=True, include_bounce_rate=True, - session_table_version=session_table_version, ).results self.assertEqual( @@ -337,8 +315,7 @@ def test_scroll_depth_bounce_rate_one_user(self, session_table_version: SessionT results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_scroll_depth_bounce_rate(self, session_table_version: SessionTableVersion): + def test_scroll_depth_bounce_rate(self): self._create_pageviews( "p1", [ @@ -369,7 +346,6 @@ def test_scroll_depth_bounce_rate(self, session_table_version: SessionTableVersi breakdown_by=WebStatsBreakdown.PAGE, include_scroll_depth=True, include_bounce_rate=True, - session_table_version=session_table_version, ).results self.assertEqual( @@ -381,8 +357,7 @@ def test_scroll_depth_bounce_rate(self, session_table_version: SessionTableVersi results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_scroll_depth_bounce_rate_with_filter(self, session_table_version: SessionTableVersion): + def test_scroll_depth_bounce_rate_with_filter(self): self._create_pageviews( "p1", [ @@ -414,7 +389,6 @@ def test_scroll_depth_bounce_rate_with_filter(self, session_table_version: Sessi include_scroll_depth=True, include_bounce_rate=True, properties=[EventPropertyFilter(key="$pathname", operator=PropertyOperator.EXACT, value="/a")], - session_table_version=session_table_version, ).results self.assertEqual( @@ -424,8 +398,7 @@ def test_scroll_depth_bounce_rate_with_filter(self, session_table_version: Sessi results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_scroll_depth_bounce_rate_path_cleaning(self, session_table_version: SessionTableVersion): + def test_scroll_depth_bounce_rate_path_cleaning(self): self._create_pageviews( "p1", [ @@ -446,7 +419,6 @@ def test_scroll_depth_bounce_rate_path_cleaning(self, session_table_version: Ses {"regex": "\\/b\\/\\d+", "alias": "/b/:id"}, {"regex": "\\/c\\/\\d+", "alias": "/c/:id"}, ], - session_table_version=session_table_version, ).results self.assertEqual( @@ -458,8 +430,7 @@ def test_scroll_depth_bounce_rate_path_cleaning(self, session_table_version: Ses results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_bounce_rate_one_user(self, session_table_version: SessionTableVersion): + def test_bounce_rate_one_user(self): self._create_pageviews( "p1", [ @@ -474,7 +445,6 @@ def test_bounce_rate_one_user(self, session_table_version: SessionTableVersion): "2023-12-15", breakdown_by=WebStatsBreakdown.PAGE, include_bounce_rate=True, - session_table_version=session_table_version, ).results self.assertEqual( @@ -486,8 +456,7 @@ def test_bounce_rate_one_user(self, session_table_version: SessionTableVersion): results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_bounce_rate(self, session_table_version: SessionTableVersion): + def test_bounce_rate(self): self._create_pageviews( "p1", [ @@ -517,7 +486,6 @@ def test_bounce_rate(self, session_table_version: SessionTableVersion): "2023-12-15", breakdown_by=WebStatsBreakdown.PAGE, include_bounce_rate=True, - session_table_version=session_table_version, ).results self.assertEqual( @@ -529,8 +497,7 @@ def test_bounce_rate(self, session_table_version: SessionTableVersion): results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_bounce_rate_with_property(self, session_table_version: SessionTableVersion): + def test_bounce_rate_with_property(self): self._create_pageviews( "p1", [ @@ -561,7 +528,6 @@ def test_bounce_rate_with_property(self, session_table_version: SessionTableVers breakdown_by=WebStatsBreakdown.PAGE, include_bounce_rate=True, properties=[EventPropertyFilter(key="$pathname", operator=PropertyOperator.EXACT, value="/a")], - session_table_version=session_table_version, ).results self.assertEqual( @@ -571,8 +537,7 @@ def test_bounce_rate_with_property(self, session_table_version: SessionTableVers results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_bounce_rate_path_cleaning(self, session_table_version: SessionTableVersion): + def test_bounce_rate_path_cleaning(self): self._create_pageviews( "p1", [ @@ -592,7 +557,6 @@ def test_bounce_rate_path_cleaning(self, session_table_version: SessionTableVers {"regex": "\\/b\\/\\d+", "alias": "/b/:id"}, {"regex": "\\/c\\/\\d+", "alias": "/c/:id"}, ], - session_table_version=session_table_version, ).results self.assertEqual( @@ -604,8 +568,7 @@ def test_bounce_rate_path_cleaning(self, session_table_version: SessionTableVers results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_entry_bounce_rate_one_user(self, session_table_version: SessionTableVersion): + def test_entry_bounce_rate_one_user(self): self._create_pageviews( "p1", [ @@ -620,7 +583,6 @@ def test_entry_bounce_rate_one_user(self, session_table_version: SessionTableVer "2023-12-15", breakdown_by=WebStatsBreakdown.INITIAL_PAGE, include_bounce_rate=True, - session_table_version=session_table_version, ).results self.assertEqual( @@ -630,8 +592,7 @@ def test_entry_bounce_rate_one_user(self, session_table_version: SessionTableVer results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_entry_bounce_rate(self, session_table_version: SessionTableVersion): + def test_entry_bounce_rate(self): self._create_pageviews( "p1", [ @@ -661,7 +622,6 @@ def test_entry_bounce_rate(self, session_table_version: SessionTableVersion): "2023-12-15", breakdown_by=WebStatsBreakdown.INITIAL_PAGE, include_bounce_rate=True, - session_table_version=session_table_version, ).results self.assertEqual( @@ -671,8 +631,7 @@ def test_entry_bounce_rate(self, session_table_version: SessionTableVersion): results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_entry_bounce_rate_with_property(self, session_table_version: SessionTableVersion): + def test_entry_bounce_rate_with_property(self): self._create_pageviews( "p1", [ @@ -703,7 +662,6 @@ def test_entry_bounce_rate_with_property(self, session_table_version: SessionTab breakdown_by=WebStatsBreakdown.INITIAL_PAGE, include_bounce_rate=True, properties=[EventPropertyFilter(key="$pathname", operator=PropertyOperator.EXACT, value="/a")], - session_table_version=session_table_version, ).results self.assertEqual( @@ -713,8 +671,7 @@ def test_entry_bounce_rate_with_property(self, session_table_version: SessionTab results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_entry_bounce_rate_path_cleaning(self, session_table_version: SessionTableVersion): + def test_entry_bounce_rate_path_cleaning(self): self._create_pageviews( "p1", [ @@ -734,7 +691,6 @@ def test_entry_bounce_rate_path_cleaning(self, session_table_version: SessionTab {"regex": "\\/b\\/\\d+", "alias": "/b/:id"}, {"regex": "\\/c\\/\\d+", "alias": "/c/:id"}, ], - session_table_version=session_table_version, ).results self.assertEqual( @@ -744,8 +700,7 @@ def test_entry_bounce_rate_path_cleaning(self, session_table_version: SessionTab results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_source_medium_campaign(self, session_table_version: SessionTableVersion): + def test_source_medium_campaign(self): d1 = "d1" s1 = str(uuid7("2024-06-26")) @@ -785,7 +740,6 @@ def test_source_medium_campaign(self, session_table_version: SessionTableVersion "all", "2024-06-27", breakdown_by=WebStatsBreakdown.INITIAL_UTM_SOURCE_MEDIUM_CAMPAIGN, - session_table_version=session_table_version, ).results self.assertEqual( @@ -793,8 +747,7 @@ def test_source_medium_campaign(self, session_table_version: SessionTableVersion results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_null_in_utm_tags(self, session_table_version: SessionTableVersion): + def test_null_in_utm_tags(self): d1 = "d1" s1 = str(uuid7("2024-06-26")) @@ -836,7 +789,6 @@ def test_null_in_utm_tags(self, session_table_version: SessionTableVersion): "all", "2024-06-27", breakdown_by=WebStatsBreakdown.INITIAL_UTM_SOURCE, - session_table_version=session_table_version, ).results self.assertEqual( @@ -844,8 +796,7 @@ def test_null_in_utm_tags(self, session_table_version: SessionTableVersion): results, ) - @parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]]) - def test_is_not_set_filter(self, session_table_version: SessionTableVersion): + def test_is_not_set_filter(self): d1 = "d1" s1 = str(uuid7("2024-06-26")) @@ -888,7 +839,6 @@ def test_is_not_set_filter(self, session_table_version: SessionTableVersion): "2024-06-27", breakdown_by=WebStatsBreakdown.INITIAL_UTM_SOURCE, properties=[EventPropertyFilter(key="utm_source", operator=PropertyOperator.IS_NOT_SET)], - session_table_version=session_table_version, ).results self.assertEqual( diff --git a/posthog/models/sessions/sql.py b/posthog/models/sessions/sql.py index 680e1f7ff6f7b..93d3083a37763 100644 --- a/posthog/models/sessions/sql.py +++ b/posthog/models/sessions/sql.py @@ -7,6 +7,7 @@ AggregatingMergeTree, ) +# V1 Sessions table TABLE_BASE_NAME = "sessions" SESSIONS_DATA_TABLE = lambda: f"sharded_{TABLE_BASE_NAME}" @@ -21,6 +22,32 @@ ) DROP_SESSION_VIEW_SQL = lambda: f"DROP VIEW IF EXISTS {TABLE_BASE_NAME}_v ON CLUSTER '{settings.CLICKHOUSE_CLUSTER}'" +# Only teams that were grandfathered into the V1 sessions table are allowed to use it. Everyone else should use V2, +# i.e. raw_sessions. These teams were those who were seen to have changed their session table version in these metabase +# queries: +# US: https://metabase.prod-us.posthog.dev/question#eyJkYXRhc2V0X3F1ZXJ5Ijp7InR5cGUiOiJuYXRpdmUiLCJuYXRpdmUiOnsicXVlcnkiOiJTRUxFQ1QgdGVhbV9pZCwgc1xuRlJPTSAoXG4gICAgU0VMRUNUIG1vZGlmaWVycy0-PidzZXNzaW9uVGFibGVWZXJzaW9uJyBBUyBzLCBpZCBhcyB0ZWFtX2lkXG4gICAgRlJPTSBwb3N0aG9nX3RlYW1cbikgc3ViXG5XSEVSRSBzICE9ICcnIiwidGVtcGxhdGUtdGFncyI6e319LCJkYXRhYmFzZSI6MzR9LCJkaXNwbGF5IjoidGFibGUiLCJwYXJhbWV0ZXJzIjpbXSwidmlzdWFsaXphdGlvbl9zZXR0aW5ncyI6e319 +# EU: https://metabase.prod-eu.posthog.dev/question#eyJkYXRhc2V0X3F1ZXJ5Ijp7InR5cGUiOiJuYXRpdmUiLCJuYXRpdmUiOnsicXVlcnkiOiJTRUxFQ1QgdGVhbV9pZCwgc1xuRlJPTSAoXG4gICAgU0VMRUNUIG1vZGlmaWVycy0-PidzZXNzaW9uVGFibGVWZXJzaW9uJyBBUyBzLCBpZCBhcyB0ZWFtX2lkXG4gICAgRlJPTSBwb3N0aG9nX3RlYW1cbikgc3ViXG5XSEVSRSBzICE9ICcnIiwidGVtcGxhdGUtdGFncyI6e319LCJkYXRhYmFzZSI6MzR9LCJkaXNwbGF5IjoidGFibGUiLCJwYXJhbWV0ZXJzIjpbXSwidmlzdWFsaXphdGlvbl9zZXR0aW5ncyI6e319 +# or had contacted support about an issue. +# This list exists because we want to reduce the number of writes happening to this table, and so we don't write to it +# for any team not in this list. Adding a team to this is possible if needed, but would require changing this MV in +# production and backfilling this table with the management command backfill_sessions_table. +ALLOWED_TEAM_IDS = [ + # posthog + 1, + 2, + # US query + 13610, # zendesk: https://posthoghelp.zendesk.com/agent/tickets/18001 + 19279, + 21173, + 29929, + 32050, + # EU query + 9910, + 11775, + 21129, + 31490, +] +ALLOWED_TEAM_IDS_SQL = ", ".join(str(team_id) for team_id in ALLOWED_TEAM_IDS) # if updating these column definitions # you'll need to update the explicit column definitions in the materialized view creation statement below @@ -144,7 +171,7 @@ def source_column(column_name: str) -> str: sumIf(1, event='$autocapture') as autocapture_count FROM {database}.sharded_events -WHERE `$session_id` IS NOT NULL AND `$session_id` != '' +WHERE `$session_id` IS NOT NULL AND `$session_id` != '' AND team_id IN ({allowed_team_ids}) GROUP BY `$session_id`, team_id """.format( database=settings.CLICKHOUSE_DATABASE, @@ -168,6 +195,7 @@ def source_column(column_name: str) -> str: mc_cid_property=source_column("mc_cid"), igshid_property=source_column("igshid"), ttclid_property=source_column("ttclid"), + allowed_team_ids=ALLOWED_TEAM_IDS_SQL, ) )