From c6dc12792398bc15ded447d00ad63e19e16627c2 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Mon, 21 Oct 2024 11:27:58 +0200 Subject: [PATCH] fix: smaller cohort resource usage for replay filters (#25699) Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- .../session_recording_list_from_filters.py | 73 ++++- ...t_session_recording_list_from_filters.ambr | 253 +++++++++++++++--- ...est_session_recording_list_from_filters.py | 139 +++++++++- posthog/test/base.py | 9 + 4 files changed, 428 insertions(+), 46 deletions(-) diff --git a/posthog/session_recordings/queries/session_recording_list_from_filters.py b/posthog/session_recordings/queries/session_recording_list_from_filters.py index 354e4232cee84..38e06c4e4837d 100644 --- a/posthog/session_recordings/queries/session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/session_recording_list_from_filters.py @@ -2,6 +2,8 @@ from typing import Any, NamedTuple, cast, Optional, Union from datetime import datetime, timedelta +import posthoganalytics + from posthog.hogql import ast from posthog.hogql.ast import CompareOperation from posthog.hogql.parser import parse_select @@ -33,12 +35,22 @@ def is_group_property(p: Property) -> bool: return p.type == "group" +def is_cohort_property(p: Property) -> bool: + return "cohort" in p.type + + class SessionRecordingQueryResult(NamedTuple): results: list has_more_recording: bool timings: list[QueryTiming] | None = None +class UnexpectedQueryProperties(Exception): + def __init__(self, remaining_properties: PropertyGroup | None): + self.remaining_properties = remaining_properties + super().__init__(f"Unexpected properties in query: {remaining_properties}") + + class SessionRecordingListFromFilters: SESSION_RECORDINGS_DEFAULT_LIMIT = 50 @@ -224,11 +236,19 @@ def _where_predicates(self) -> Union[ast.And, ast.Or]: ) ) - remaining_properties = self._strip_person_and_event_properties(self._filter.property_groups) - if remaining_properties: - logger.info( - "session_replay_query_builder has unhandled properties", unhandled_properties=remaining_properties + cohort_subquery = CohortPropertyGroupsSubQuery(self._team, self._filter, self.ttl_days).get_query() + if cohort_subquery: + optional_exprs.append( + ast.CompareOperation( + op=ast.CompareOperationOp.In, + left=ast.Field(chain=["s", "distinct_id"]), + right=cohort_subquery, + ) ) + + remaining_properties = self._strip_person_and_event_and_cohort_properties(self._filter.property_groups) + if remaining_properties: + posthoganalytics.capture_exception(UnexpectedQueryProperties(remaining_properties)) optional_exprs.append(property_to_expr(remaining_properties, team=self._team, scope="replay")) if self._filter.console_log_filters.values: @@ -267,11 +287,14 @@ def _where_predicates(self) -> Union[ast.And, ast.Or]: def _having_predicates(self) -> ast.Expr: return property_to_expr(self._filter.having_predicates, team=self._team, scope="replay") - def _strip_person_and_event_properties(self, property_group: PropertyGroup) -> PropertyGroup | None: + def _strip_person_and_event_and_cohort_properties(self, property_group: PropertyGroup) -> PropertyGroup | None: property_groups_to_keep = [ g for g in property_group.flat - if not is_event_property(g) and not is_person_property(g) and not is_group_property(g) + if not is_event_property(g) + and not is_person_property(g) + and not is_group_property(g) + and not is_cohort_property(g) ] return ( @@ -334,6 +357,44 @@ def _where_predicates(self) -> ast.Expr: ) +class CohortPropertyGroupsSubQuery: + _team: Team + _filter: SessionRecordingsFilter + _ttl_days: int + + raw_cohort_to_distinct_id = """ + select distinct_id + from person_distinct_ids + where {cohort_predicate} + """ + + def __init__(self, team: Team, filter: SessionRecordingsFilter, ttl_days: int): + self._team = team + self._filter = filter + self._ttl_days = ttl_days + + def get_query(self) -> ast.SelectQuery | ast.SelectUnionQuery | None: + if self.cohort_properties: + return parse_select( + self.raw_cohort_to_distinct_id, + {"cohort_predicate": property_to_expr(self.cohort_properties, team=self._team, scope="replay")}, + ) + + return None + + @cached_property + def cohort_properties(self) -> PropertyGroup | None: + cohort_property_groups = [g for g in self._filter.property_groups.flat if is_cohort_property(g)] + return ( + PropertyGroup( + type=self._filter.property_operand, + values=cohort_property_groups, + ) + if cohort_property_groups + else None + ) + + class PersonsIdCompareOperation: _team: Team _filter: SessionRecordingsFilter diff --git a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr index 72161f92f2223..a0fc5699f9ee7 100644 --- a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr +++ b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr @@ -3161,17 +3161,18 @@ sum(s.console_error_count) AS console_error_count, ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing FROM session_replay_events AS s - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS s__pdi ON equals(s.distinct_id, s__pdi.distinct_id) - WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), ifNull(in(s__pdi.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)) + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), in(s.distinct_id, + (SELECT person_distinct_ids.distinct_id AS distinct_id + FROM + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids + WHERE ifNull(in(person_distinct_ids.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -3224,22 +3225,23 @@ sum(s.console_error_count) AS console_error_count, ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing FROM session_replay_events AS s - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS s__pdi ON equals(s.distinct_id, s__pdi.distinct_id) WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), and(in(s.session_id, (SELECT events.`$session_id` AS session_id FROM events WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-13 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), and(equals(events.event, '$pageview'), 1)) GROUP BY events.`$session_id` - HAVING hasAll(groupUniqArray(events.event), ['$pageview']))), ifNull(in(s__pdi.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))) + HAVING hasAll(groupUniqArray(events.event), ['$pageview']))), in(s.distinct_id, + (SELECT person_distinct_ids.distinct_id AS distinct_id + FROM + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids + WHERE ifNull(in(person_distinct_ids.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -3272,22 +3274,23 @@ sum(s.console_error_count) AS console_error_count, ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing FROM session_replay_events AS s - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS s__pdi ON equals(s.distinct_id, s__pdi.distinct_id) WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), and(in(s.session_id, (SELECT events.`$session_id` AS session_id FROM events WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-13 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), and(equals(events.event, 'custom_event'), 1)) GROUP BY events.`$session_id` - HAVING hasAll(groupUniqArray(events.event), ['custom_event']))), ifNull(in(s__pdi.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))) + HAVING hasAll(groupUniqArray(events.event), ['custom_event']))), in(s.distinct_id, + (SELECT person_distinct_ids.distinct_id AS distinct_id + FROM + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids + WHERE ifNull(in(person_distinct_ids.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -3407,6 +3410,190 @@ max_bytes_before_external_group_by=0 ''' # --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties + ''' + + SELECT count(DISTINCT person_id) + FROM person_static_cohort + WHERE team_id = 2 + AND cohort_id = 2 + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.1 + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.2 + ''' + /* cohort_calculation: */ + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.3 + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.4 + ''' + /* cohort_calculation: */ + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.5 + ''' + SELECT s.session_id AS session_id, + any(s.team_id), + any(s.distinct_id), + min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, + max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, + dateDiff('SECOND', start_time, end_time) AS duration, + argMinMerge(s.first_url) AS first_url, + sum(s.click_count) AS click_count, + sum(s.keypress_count) AS keypress_count, + sum(s.mouse_activity_count) AS mouse_activity_count, + divide(sum(s.active_milliseconds), 1000) AS active_seconds, + minus(duration, active_seconds) AS inactive_seconds, + sum(s.console_log_count) AS console_log_count, + sum(s.console_warn_count) AS console_warn_count, + sum(s.console_error_count) AS console_error_count, + ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing + FROM session_replay_events AS s + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), in(s.distinct_id, + (SELECT person_distinct_ids.distinct_id AS distinct_id + FROM + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids + WHERE ifNull(in(person_distinct_ids.person_id, + (SELECT person_static_cohort.person_id AS person_id + FROM person_static_cohort + WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 2)))), 0)))) + GROUP BY s.session_id + HAVING 1 + ORDER BY start_time DESC + LIMIT 51 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.6 + ''' + SELECT s.session_id AS session_id, + any(s.team_id), + any(s.distinct_id), + min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, + max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, + dateDiff('SECOND', start_time, end_time) AS duration, + argMinMerge(s.first_url) AS first_url, + sum(s.click_count) AS click_count, + sum(s.keypress_count) AS keypress_count, + sum(s.mouse_activity_count) AS mouse_activity_count, + divide(sum(s.active_milliseconds), 1000) AS active_seconds, + minus(duration, active_seconds) AS inactive_seconds, + sum(s.console_log_count) AS console_log_count, + sum(s.console_warn_count) AS console_warn_count, + sum(s.console_error_count) AS console_error_count, + ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing + FROM session_replay_events AS s + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), in(s.distinct_id, + (SELECT person_distinct_ids.distinct_id AS distinct_id + FROM + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids + WHERE ifNull(in(person_distinct_ids.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)))) + GROUP BY s.session_id + HAVING 1 + ORDER BY start_time DESC + LIMIT 51 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.7 + ''' + SELECT s.session_id AS session_id, + any(s.team_id), + any(s.distinct_id), + min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, + max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, + dateDiff('SECOND', start_time, end_time) AS duration, + argMinMerge(s.first_url) AS first_url, + sum(s.click_count) AS click_count, + sum(s.keypress_count) AS keypress_count, + sum(s.mouse_activity_count) AS mouse_activity_count, + divide(sum(s.active_milliseconds), 1000) AS active_seconds, + minus(duration, active_seconds) AS inactive_seconds, + sum(s.console_log_count) AS console_log_count, + sum(s.console_warn_count) AS console_warn_count, + sum(s.console_error_count) AS console_error_count, + ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing + FROM session_replay_events AS s + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), in(s.distinct_id, + (SELECT person_distinct_ids.distinct_id AS distinct_id + FROM + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids + WHERE and(ifNull(in(person_distinct_ids.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0), ifNull(in(person_distinct_ids.person_id, + (SELECT person_static_cohort.person_id AS person_id + FROM person_static_cohort + WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 2)))), 0))))) + GROUP BY s.session_id + HAVING 1 + ORDER BY start_time DESC + LIMIT 51 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- # name: TestSessionRecordingsListFromFilters.test_multiple_event_filters ''' SELECT s.session_id AS session_id, diff --git a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py index eec4a40f74b75..e18cb0941d27a 100644 --- a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py @@ -2161,15 +2161,141 @@ def test_filter_with_cohort_properties(self): { "key": "id", "value": cohort.pk, - "operator": None, + "operator": "in", "type": "cohort", } ] } ) - assert len(session_recordings) == 1 - assert session_recordings[0]["session_id"] == session_id_two + assert [x["session_id"] for x in session_recordings] == [session_id_two] + + @snapshot_clickhouse_queries + @also_test_with_materialized_columns(person_properties=["$some_prop"]) + def test_filter_with_static_and_dynamic_cohort_properties(self): + with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): + with freeze_time("2021-08-21T20:00:00.000Z"): + user_one = "test_filter_with_cohort_properties-user-in-static-cohort" + user_two = "test_filter_with_cohort_properties-user2-in-dynamic-cohort" + user_three = "test_filter_with_cohort_properties-user3-in-both-cohort" + + session_id_one = ( + f"in-static-cohort-test_filter_with_static_and_dynamic_cohort_properties-1-{str(uuid4())}" + ) + session_id_two = ( + f"in-dynamic-cohort-test_filter_with_static_and_dynamic_cohort_properties-2-{str(uuid4())}" + ) + session_id_three = ( + f"in-both-cohort-test_filter_with_static_and_dynamic_cohort_properties-3-{str(uuid4())}" + ) + + Person.objects.create(team=self.team, distinct_ids=[user_one], properties={"email": "in@static.cohort"}) + Person.objects.create( + team=self.team, + distinct_ids=[user_two], + properties={"email": "in@dynamic.cohort", "$some_prop": "some_val"}, + ) + Person.objects.create( + team=self.team, + distinct_ids=[user_three], + properties={"email": "in@both.cohorts", "$some_prop": "some_val"}, + ) + + dynamic_cohort = Cohort.objects.create( + team=self.team, + name="cohort1", + groups=[ + { + "properties": [ + { + "key": "$some_prop", + "value": "some_val", + "type": "person", + } + ] + } + ], + ) + + static_cohort = Cohort.objects.create(team=self.team, name="a static cohort", groups=[], is_static=True) + static_cohort.insert_users_by_list([user_one, user_three]) + + dynamic_cohort.calculate_people_ch(pending_version=0) + static_cohort.calculate_people_ch(pending_version=0) + + replay_summaries = [ + (user_one, session_id_one), + (user_two, session_id_two), + (user_three, session_id_three), + ] + for distinct_id, session_id in replay_summaries: + produce_replay_summary( + distinct_id=distinct_id, + session_id=session_id, + first_timestamp=self.an_hour_ago, + team_id=self.team.id, + ) + produce_replay_summary( + distinct_id=distinct_id, + session_id=session_id, + first_timestamp=self.an_hour_ago + relativedelta(seconds=30), + team_id=self.team.id, + ) + + (session_recordings, _, _) = self._filter_recordings_by( + { + "properties": [ + { + "key": "id", + "value": static_cohort.pk, + "operator": "in", + "type": "cohort", + }, + ] + } + ) + + assert sorted([x["session_id"] for x in session_recordings]) == sorted( + [session_id_one, session_id_three] + ) + + (session_recordings, _, _) = self._filter_recordings_by( + { + "properties": [ + { + "key": "id", + "value": dynamic_cohort.pk, + "operator": "in", + "type": "cohort", + }, + ] + } + ) + + assert sorted([x["session_id"] for x in session_recordings]) == sorted( + [session_id_two, session_id_three] + ) + + (session_recordings, _, _) = self._filter_recordings_by( + { + "properties": [ + { + "key": "id", + "value": dynamic_cohort.pk, + "operator": "in", + "type": "cohort", + }, + { + "key": "id", + "value": static_cohort.pk, + "operator": "in", + "type": "cohort", + }, + ] + } + ) + + assert sorted([x["session_id"] for x in session_recordings]) == [session_id_three] @snapshot_clickhouse_queries @also_test_with_materialized_columns(person_properties=["$some_prop"]) @@ -2251,7 +2377,7 @@ def test_filter_with_events_and_cohorts(self): { "key": "id", "value": cohort.pk, - "operator": None, + "operator": "in", "type": "cohort", } ], @@ -2274,7 +2400,7 @@ def test_filter_with_events_and_cohorts(self): { "key": "id", "value": cohort.pk, - "operator": None, + "operator": "in", "type": "cohort", } ], @@ -2289,8 +2415,7 @@ def test_filter_with_events_and_cohorts(self): } ) - assert len(session_recordings) == 1 - assert session_recordings[0]["session_id"] == session_id_two + assert [x["session_id"] for x in session_recordings] == [session_id_two] @snapshot_clickhouse_queries @also_test_with_materialized_columns(["$current_url"]) diff --git a/posthog/test/base.py b/posthog/test/base.py index ba2f5ea2f460f..385da83ba11ab 100644 --- a/posthog/test/base.py +++ b/posthog/test/base.py @@ -576,6 +576,15 @@ def assertQueryMatchesSnapshot(self, query, params=None, replace_all_numbers=Fal r"_condition_X_level", query, ) + + # replace cohort tuples + # like (tuple(cohortpeople.cohort_id, cohortpeople.version), [(35, 0)]) + query = re.sub( + r"\(tuple\((.*)\.cohort_id, (.*)\.version\), \[\(\d+, \d+\)\]\)", + r"(tuple(\1.cohort_id, \2.version), [(2, 0)])", + query, + ) + #### Cohort replacements end # Replace organization_id and notebook_id lookups, for postgres