From d16aab49f711a04885f33fe74f70f6636ce03448 Mon Sep 17 00:00:00 2001 From: James Greenhill Date: Thu, 4 Jan 2024 04:14:39 -0800 Subject: [PATCH] fix: Don't use mat cols for session recordings events query (#19491) * fix: Don't use mat cols for session recordings events query * test fix * Update query snapshots * Update query snapshots * add comment for the future traveller * fix --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Paul D'Ambra --- posthog/queries/event_query/event_query.py | 3 ++- .../queries/session_recording_list_from_replay_summary.py | 7 ++++++- .../test_session_recording_list_from_session_replay.ambr | 4 ++-- .../test_session_recording_list_from_session_replay.py | 4 +++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/posthog/queries/event_query/event_query.py b/posthog/queries/event_query/event_query.py index 5018892060873..544afee2a2a10 100644 --- a/posthog/queries/event_query/event_query.py +++ b/posthog/queries/event_query/event_query.py @@ -259,6 +259,7 @@ def _get_prop_groups( person_properties_mode=PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN, person_id_joined_alias="person_id", prepend="global", + allow_denormalized_props=True, ) -> Tuple[str, Dict]: if not prop_group: return "", {} @@ -276,7 +277,7 @@ def _get_prop_groups( property_group=props_to_filter, prepend=prepend, table_name=self.EVENT_TABLE_ALIAS, - allow_denormalized_props=True, + allow_denormalized_props=allow_denormalized_props, person_properties_mode=person_properties_mode, person_id_joined_alias=person_id_joined_alias, hogql_context=self._filter.hogql_context, diff --git a/posthog/session_recordings/queries/session_recording_list_from_replay_summary.py b/posthog/session_recordings/queries/session_recording_list_from_replay_summary.py index 57c7a8864193f..0b94cda977a0a 100644 --- a/posthog/session_recordings/queries/session_recording_list_from_replay_summary.py +++ b/posthog/session_recordings/queries/session_recording_list_from_replay_summary.py @@ -118,7 +118,7 @@ def _get_events_timestamp_clause(self) -> Tuple[str, Dict[str, Any]]: @staticmethod def _get_console_log_clause( - console_logs_filter: List[Literal["error", "warn", "log"]] + console_logs_filter: List[Literal["error", "warn", "log"]], ) -> Tuple[str, Dict[str, Any]]: return ( ( @@ -425,6 +425,11 @@ def get_query(self, select_event_ids: bool = False) -> Tuple[str, Dict[str, Any] ], ), person_id_joined_alias=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id", + # TRICKY: we saw unusual memory usage behavior in EU clickhouse cluster + # when allowing use of denormalized properties in this query + # it is likely this can be returned to the default of True in future + # but would need careful monitoring + allow_denormalized_props=False, ) ( diff --git a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_session_replay.ambr b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_session_replay.ambr index 745b5a4e444c7..45667c11eb6b3 100644 --- a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_session_replay.ambr +++ b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_session_replay.ambr @@ -2574,7 +2574,7 @@ AND timestamp <= '2021-01-22 08:00:00' WHERE notEmpty(`$session_id`) AND (event = '$pageview') - AND (has(['false'], "mat_is_internal_user") + AND (has(['false'], replaceRegexpAll(JSONExtractRaw(e.properties, 'is_internal_user'), '^"|"$', '')) AND ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Chrome'), 0)) AND e.distinct_id in (select distinct_id @@ -3842,7 +3842,7 @@ AND timestamp >= '2021-01-13 12:00:00' AND timestamp <= '2021-01-22 08:00:00' WHERE notEmpty(`$session_id`) - AND (has(['false'], "mat_is_internal_user")) + AND (has(['false'], replaceRegexpAll(JSONExtractRaw(e.properties, 'is_internal_user'), '^"|"$', ''))) GROUP BY `$session_id` HAVING 1=1) as session_events_sub_query) GROUP BY session_id diff --git a/posthog/session_recordings/queries/test/test_session_recording_list_from_session_replay.py b/posthog/session_recordings/queries/test/test_session_recording_list_from_session_replay.py index b3bc8bf1c03f8..20447722f0b67 100644 --- a/posthog/session_recordings/queries/test/test_session_recording_list_from_session_replay.py +++ b/posthog/session_recordings/queries/test/test_session_recording_list_from_session_replay.py @@ -2698,7 +2698,9 @@ def test_event_filter_with_hogql_event_properties_test_accounts_excluded(self): (session_recordings, _) = session_recording_list_instance.run() self.assertEqual(len(session_recordings), 1) - @also_test_with_materialized_columns(event_properties=["is_internal_user"]) + # TRICKY: we had to disable use of materialized columns for part of the query generation + # due to RAM usage issues on the EU cluster + @also_test_with_materialized_columns(event_properties=["is_internal_user"], verify_no_jsonextract=False) @freeze_time("2021-01-21T20:00:00.000Z") @snapshot_clickhouse_queries def test_top_level_event_property_test_account_filter(self):