From 56b91d11ddd610c4e1fb6a2f75164a5d3f52fd3a Mon Sep 17 00:00:00 2001 From: David Newell Date: Wed, 28 Aug 2024 09:29:40 +0100 Subject: [PATCH] chore: remove event properties from subquery (#24616) --- .../filters/mixins/session_recordings.py | 7 - .../session_recording_list_from_filters.py | 3 +- ...t_session_recording_list_from_filters.ambr | 150 +++++++++++++++++- ...est_session_recording_list_from_filters.py | 97 ++++++++++- 4 files changed, 246 insertions(+), 11 deletions(-) diff --git a/posthog/models/filters/mixins/session_recordings.py b/posthog/models/filters/mixins/session_recordings.py index aad2202dfdafe..e2df650f4a082 100644 --- a/posthog/models/filters/mixins/session_recordings.py +++ b/posthog/models/filters/mixins/session_recordings.py @@ -20,13 +20,6 @@ class SessionRecordingsMixin(PropertyMixin, BaseParamMixin): def order(self) -> str: return self._data.get("order", "start_time") - # Supports a legacy use case where events were ORed not ANDed - # Can be removed and replaced with ast_operand once the new universal replay filtering is out - @cached_property - def events_operand(self) -> type[Union[ast.And, ast.Or]]: - operand = self._data.get("operand", "OR") - return ast.And if operand == "AND" else ast.Or - @cached_property def console_log_filters(self) -> PropertyGroup: property_group = self._parse_data(key="console_log_filters") diff --git a/posthog/session_recordings/queries/session_recording_list_from_filters.py b/posthog/session_recordings/queries/session_recording_list_from_filters.py index 92d868e83f0b2..0885d97a38b3c 100644 --- a/posthog/session_recordings/queries/session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/session_recording_list_from_filters.py @@ -488,7 +488,8 @@ def _where_predicates(self) -> ast.Expr: (event_where_exprs, _) = self._event_predicates if event_where_exprs: - exprs.append(self._filter.events_operand(exprs=event_where_exprs)) + # we OR all events in the where and use hasAll / hasAny in the HAVING clause + exprs.append(ast.Or(exprs=event_where_exprs)) if self._team.person_on_events_mode and self.person_properties: exprs.append(property_to_expr(self.person_properties, team=self._team, scope="event")) diff --git a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr index 9dd57f56763e8..bb30391e92f19 100644 --- a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr +++ b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr @@ -3388,6 +3388,154 @@ max_bytes_before_external_group_by=0 ''' # --- +# name: TestSessionRecordingsListFromFilters.test_multiple_event_filters.2 + ''' + SELECT s.session_id AS session_id, + any(s.team_id), + any(s.distinct_id), + min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, + max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, + dateDiff('SECOND', start_time, end_time) AS duration, + argMinMerge(s.first_url) AS first_url, + sum(s.click_count), + sum(s.keypress_count), + sum(s.mouse_activity_count), + divide(sum(s.active_milliseconds), 1000) AS active_seconds, + minus(duration, active_seconds) AS inactive_seconds, + sum(s.console_log_count) AS console_log_count, + sum(s.console_warn_count) AS console_warn_count, + sum(s.console_error_count) AS console_error_count + FROM session_replay_events AS s + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-25 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), in(s.session_id, + (SELECT events.`$session_id` AS session_id + FROM events + WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-24 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), or(and(equals(events.event, '$pageview'), 1), and(equals(events.event, 'new-event2'), 1))) + GROUP BY events.`$session_id` + HAVING hasAny(groupUniqArray(events.event), ['$pageview', 'new-event2'])))) + GROUP BY s.session_id + HAVING 1 + ORDER BY start_time DESC + LIMIT 51 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_multiple_event_filters.3 + ''' + SELECT s.session_id AS session_id, + any(s.team_id), + any(s.distinct_id), + min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, + max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, + dateDiff('SECOND', start_time, end_time) AS duration, + argMinMerge(s.first_url) AS first_url, + sum(s.click_count), + sum(s.keypress_count), + sum(s.mouse_activity_count), + divide(sum(s.active_milliseconds), 1000) AS active_seconds, + minus(duration, active_seconds) AS inactive_seconds, + sum(s.console_log_count) AS console_log_count, + sum(s.console_warn_count) AS console_warn_count, + sum(s.console_error_count) AS console_error_count + FROM session_replay_events AS s + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-25 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), in(s.session_id, + (SELECT events.`$session_id` AS session_id + FROM events + WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-24 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), or(and(equals(events.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'foo'), ''), 'null'), '^"|"$', ''), 'bar'), 0)), and(equals(events.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'bar'), ''), 'null'), '^"|"$', ''), 'foo'), 0)))) + GROUP BY events.`$session_id` + HAVING hasAll(groupUniqArray(events.event), ['$pageview'])))) + GROUP BY s.session_id + HAVING 1 + ORDER BY start_time DESC + LIMIT 51 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_multiple_event_filters.4 + ''' + SELECT s.session_id AS session_id, + any(s.team_id), + any(s.distinct_id), + min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, + max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, + dateDiff('SECOND', start_time, end_time) AS duration, + argMinMerge(s.first_url) AS first_url, + sum(s.click_count), + sum(s.keypress_count), + sum(s.mouse_activity_count), + divide(sum(s.active_milliseconds), 1000) AS active_seconds, + minus(duration, active_seconds) AS inactive_seconds, + sum(s.console_log_count) AS console_log_count, + sum(s.console_warn_count) AS console_warn_count, + sum(s.console_error_count) AS console_error_count + FROM session_replay_events AS s + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-25 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), in(s.session_id, + (SELECT events.`$session_id` AS session_id + FROM events + WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-24 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), or(and(equals(events.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'foo'), ''), 'null'), '^"|"$', ''), 'bar'), 0)), and(equals(events.event, 'new-event'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'foo'), ''), 'null'), '^"|"$', ''), 'bar'), 0)))) + GROUP BY events.`$session_id` + HAVING hasAll(groupUniqArray(events.event), ['$pageview', 'new-event'])))) + GROUP BY s.session_id + HAVING 1 + ORDER BY start_time DESC + LIMIT 51 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_multiple_event_filters.5 + ''' + SELECT s.session_id AS session_id, + any(s.team_id), + any(s.distinct_id), + min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, + max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, + dateDiff('SECOND', start_time, end_time) AS duration, + argMinMerge(s.first_url) AS first_url, + sum(s.click_count), + sum(s.keypress_count), + sum(s.mouse_activity_count), + divide(sum(s.active_milliseconds), 1000) AS active_seconds, + minus(duration, active_seconds) AS inactive_seconds, + sum(s.console_log_count) AS console_log_count, + sum(s.console_warn_count) AS console_warn_count, + sum(s.console_error_count) AS console_error_count + FROM session_replay_events AS s + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-25 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), in(s.session_id, + (SELECT events.`$session_id` AS session_id + FROM events + WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-24 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), or(and(equals(events.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'foo'), ''), 'null'), '^"|"$', ''), 'bar'), 0)), and(equals(events.event, 'new-event'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'foo'), ''), 'null'), '^"|"$', ''), 'bar'), 0)))) + GROUP BY events.`$session_id` + HAVING hasAny(groupUniqArray(events.event), ['$pageview', 'new-event'])))) + GROUP BY s.session_id + HAVING 1 + ORDER BY start_time DESC + LIMIT 51 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- # name: TestSessionRecordingsListFromFilters.test_operand_or_event_filters ''' SELECT s.session_id AS session_id, @@ -3409,7 +3557,7 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-25 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT events.`$session_id` AS session_id FROM events - WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-24 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), and(and(equals(events.event, '$pageview'), 1), and(equals(events.event, 'custom_event'), 1))) + WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-24 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), or(and(equals(events.event, '$pageview'), 1), and(equals(events.event, 'custom_event'), 1))) GROUP BY events.`$session_id` HAVING hasAll(groupUniqArray(events.event), ['$pageview', 'custom_event'])))) GROUP BY s.session_id diff --git a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py index 5f57396df1a7a..aeb434cb89ce9 100644 --- a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py @@ -1028,12 +1028,17 @@ def test_multiple_event_filters(self): self.create_event( user, self.an_hour_ago, - properties={"$session_id": session_id, "$window_id": "1"}, + properties={"$session_id": session_id, "$window_id": "1", "foo": "bar"}, ) self.create_event( user, self.an_hour_ago, - properties={"$session_id": session_id, "$window_id": "1"}, + properties={"$session_id": session_id, "$window_id": "1", "bar": "foo"}, + ) + self.create_event( + user, + self.an_hour_ago, + properties={"$session_id": session_id, "$window_id": "1", "bar": "foo"}, event_name="new-event", ) produce_replay_summary( @@ -1085,6 +1090,94 @@ def test_multiple_event_filters(self): ) assert session_recordings == [] + # it uses hasAny instead of hasAll because of the OR filter + (session_recordings, _, _) = self._filter_recordings_by( + { + "events": [ + { + "id": "$pageview", + "type": "events", + "order": 0, + "name": "$pageview", + }, + { + "id": "new-event2", + "type": "events", + "order": 0, + "name": "new-event2", + }, + ], + "operand": "OR", + } + ) + assert len(session_recordings) == 1 + + # two events with the same name + (session_recordings, _, _) = self._filter_recordings_by( + { + "events": [ + { + "id": "$pageview", + "type": "events", + "name": "$pageview", + "properties": [{"key": "foo", "value": ["bar"], "operator": "exact", "type": "event"}], + }, + { + "id": "$pageview", + "type": "events", + "name": "$pageview", + "properties": [{"key": "bar", "value": ["foo"], "operator": "exact", "type": "event"}], + }, + ], + "operand": "AND", + } + ) + assert len(session_recordings) == 1 + + # two events with different names + (session_recordings, _, _) = self._filter_recordings_by( + { + "events": [ + { + "id": "$pageview", + "type": "events", + "name": "$pageview", + "properties": [{"key": "foo", "value": ["bar"], "operator": "exact", "type": "event"}], + }, + { + "id": "new-event", + "type": "events", + "name": "new-event", + "properties": [{"key": "foo", "value": ["bar"], "operator": "exact", "type": "event"}], + }, + ], + "operand": "AND", + } + ) + assert len(session_recordings) == 0 + + # two events with different names + (session_recordings, _, _) = self._filter_recordings_by( + { + "events": [ + { + "id": "$pageview", + "type": "events", + "name": "$pageview", + "properties": [{"key": "foo", "value": ["bar"], "operator": "exact", "type": "event"}], + }, + { + "id": "new-event", + "type": "events", + "name": "new-event", + "properties": [{"key": "foo", "value": ["bar"], "operator": "exact", "type": "event"}], + }, + ], + "operand": "OR", + } + ) + assert len(session_recordings) == 1 + @snapshot_clickhouse_queries @also_test_with_materialized_columns(["$session_id", "$browser"], person_properties=["email"]) @freeze_time("2023-01-04")