From 62f2a63a6591eed3fe6e9862b215ba3ff19c723e Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Sun, 24 Nov 2024 11:14:13 +0000 Subject: [PATCH] fiddling --- .../session_recording_list_from_query.py | 30 ++- ...est_session_recording_list_from_filters.py | 167 ++++++++------- .../test_session_recording_list_from_query.py | 196 +++++++++--------- 3 files changed, 203 insertions(+), 190 deletions(-) diff --git a/posthog/session_recordings/queries/session_recording_list_from_query.py b/posthog/session_recordings/queries/session_recording_list_from_query.py index 1fed3e7bbb699..2a5f72ba4f2e6 100644 --- a/posthog/session_recordings/queries/session_recording_list_from_query.py +++ b/posthog/session_recordings/queries/session_recording_list_from_query.py @@ -11,6 +11,7 @@ from posthog.hogql.property import property_to_expr, action_to_expr from posthog.hogql.query import execute_hogql_query from posthog.hogql_queries.insights.paginators import HogQLHasMorePaginator +from posthog.hogql_queries.legacy_compatibility.clean_properties import clean_global_properties from posthog.hogql_queries.legacy_compatibility.filter_to_query import MathAvailability, legacy_entity_to_node from posthog.hogql_queries.utils.query_date_range import QueryDateRange from posthog.models import Team, Property, Entity, Action @@ -30,22 +31,24 @@ import structlog +from posthog.types import AnyPropertyFilter + logger = structlog.get_logger(__name__) -def is_event_property(p: Property) -> bool: +def is_event_property(p: AnyPropertyFilter) -> bool: return p.type == "event" or (p.type == "hogql" and bool(re.search(r"(? bool: +def is_person_property(p: AnyPropertyFilter) -> bool: return p.type == "person" or (p.type == "hogql" and "person.properties" in p.key) -def is_group_property(p: Property) -> bool: +def is_group_property(p: AnyPropertyFilter) -> bool: return p.type == "group" -def is_cohort_property(p: Property) -> bool: +def is_cohort_property(p: AnyPropertyFilter) -> bool: return "cohort" in p.type @@ -147,7 +150,7 @@ def __init__( @cached_property def _test_account_filters(self) -> list[Property]: - return [Property(**p) for p in self._team.test_account_filters] + return [clean_global_properties(p) for p in self._team.test_account_filters] @property def ttl_days(self): @@ -194,7 +197,7 @@ def get_query(self): ) def _order_by_clause(self) -> ast.Field: - return ast.Field(chain=[self._query.order]) + return ast.Field(chain=[self._query.order.value]) @cached_property def query_date_range(self): @@ -327,12 +330,12 @@ def ast_operand(self) -> type[Union[ast.And, ast.Or]]: return ast.And if self.property_operand == "AND" else ast.Or def _strip_person_and_event_and_cohort_properties( - self, properties: list[Property] | list[PropertyGroup] | None - ) -> PropertyGroup | None: + self, properties: list[AnyPropertyFilter] | None + ) -> list[AnyPropertyFilter] | None: if not properties: return None - property_groups_to_keep = [ + properties_to_keep = [ g for g in properties if not is_event_property(g) @@ -341,14 +344,7 @@ def _strip_person_and_event_and_cohort_properties( and not is_cohort_property(g) ] - return ( - PropertyGroup( - type=self.property_operand, - values=property_groups_to_keep, - ) - if property_groups_to_keep - else None - ) + return properties_to_keep def poe_is_active(team: Team) -> bool: diff --git a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py index 94187ec9fc1b0..61f95283e963a 100644 --- a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py @@ -1,10 +1,13 @@ from datetime import datetime +from typing import Literal from unittest.mock import ANY from uuid import uuid4 from dateutil.relativedelta import relativedelta from django.utils.timezone import now from freezegun import freeze_time +from parameterized import parameterized + from posthog.clickhouse.client import sync_execute from posthog.clickhouse.log_entries import TRUNCATE_LOG_ENTRIES_TABLE_SQL from posthog.constants import AvailableFeature @@ -1613,8 +1616,53 @@ def test_operand_or_event_filters(self): assert len(session_recordings) == 2 assert sorted([r["session_id"] for r in session_recordings]) == sorted([session_id_two, session_id_one]) + @parameterized.expand( + [ + # Case 1: Neither has WARN and message "random" + ( + '[{"key": "level", "value": ["warn"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', + "AND", + 0, + [], + ), + # Case 2: AND only matches one recording + ( + '[{"key": "level", "value": ["info"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', + "AND", + 1, + ["both_log_filters"], + ), + # Case 3: Only one is WARN level + ( + '[{"key": "level", "value": ["warn"], "operator": "exact", "type": "log_entry"}]', + "AND", + 1, + ["one_log_filter"], + ), + # Case 4: Only one has message "random" + ( + '[{"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', + "AND", + 1, + ["both_log_filters"], + ), + # Case 5: OR matches both (update assertion if TODO behavior changes) + ( + '[{"key": "level", "value": ["warn"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', + "OR", + 0, # Adjust this to match the correct behavior once implemented + [], + ), + ] + ) @snapshot_clickhouse_queries - def test_operand_or_filters(self): + def test_operand_or_filters( + self, + console_log_filters: str, + operand: Literal["AND", "OR"], + expected_count: int, + expected_session_ids: list[str], + ) -> None: user = "test_operand_or_filter-user" Person.objects.create(team=self.team, distinct_ids=[user], properties={"email": "bla"}) @@ -1624,13 +1672,10 @@ def test_operand_or_filters(self): session_id=session_with_both_log_filters, first_timestamp=self.an_hour_ago, team_id=self.team.id, - console_warn_count=1, - log_messages={ - "warn": [ - "random", - ], - }, + console_log_count=1, + log_messages={"info": ["random"]}, ) + session_with_one_log_filter = "one_log_filter" produce_replay_summary( distinct_id="user", @@ -1638,29 +1683,15 @@ def test_operand_or_filters(self): first_timestamp=self.an_hour_ago, team_id=self.team.id, console_warn_count=1, - log_messages={ - "warn": [ - "warn", - ], - }, + log_messages={"warn": ["warn"]}, ) - (session_recordings, _, _) = self._filter_recordings_by( - { - "console_log_filters": '[{"key": "level", "value": ["warn"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', - "operand": "AND", - } + session_recordings, _, _ = self._filter_recordings_by( + {"console_log_filters": console_log_filters, "operand": operand} ) - assert len(session_recordings) == 1 - assert session_recordings[0]["session_id"] == session_with_both_log_filters - (session_recordings, _, _) = self._filter_recordings_by( - { - "console_log_filters": '[{"key": "level", "value": ["warn"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', - "operand": "OR", - } - ) - assert len(session_recordings) == 2 + assert len(session_recordings) == expected_count + assert sorted([rec["session_id"] for rec in session_recordings]) == sorted(expected_session_ids) @snapshot_clickhouse_queries def test_operand_or_mandatory_filters(self): @@ -3030,18 +3061,40 @@ def test_filter_for_recordings_with_mixed_console_counts(self): @snapshot_clickhouse_queries @freeze_time("2021-01-21T20:00:00.000Z") - def test_filter_for_recordings_by_console_text(self): + @parameterized.expand( + [ + # Case 1: OR operand, message 4 matches in warn and error + ( + '[{"key": "level", "value": ["warn", "error"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "message 4", "operator": "icontains", "type": "log_entry"}]', + "OR", + ["with-errors-session", "with-two-session", "with-warns-session", "with-logs-session"], + ), + # Case 2: AND operand, message 5 matches only in warn + ( + '[{"key": "level", "value": ["warn", "error"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "message 5", "operator": "icontains", "type": "log_entry"}]', + "AND", + ["with-warns-session"], + ), + # Case 3: AND operand, message 5 does not match log level "info" + ( + '[{"key": "level", "value": ["info"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "message 5", "operator": "icontains", "type": "log_entry"}]', + "AND", + [], + ), + ] + ) + def test_filter_for_recordings_by_console_text( + self, + console_log_filters: str, + operand: Literal["AND", "OR"], + expected_session_ids: list[str], + ) -> None: Person.objects.create(team=self.team, distinct_ids=["user"], properties={"email": "bla"}) - with_logs_session_id = "with-logs-session" - with_warns_session_id = "with-warns-session" - with_errors_session_id = "with-errors-session" - with_two_session_id = "with-two-session" - with_no_matches_session_id = "with-no-matches-session" - + # Create sessions produce_replay_summary( distinct_id="user", - session_id=with_logs_session_id, + session_id="with-logs-session", first_timestamp=self.an_hour_ago, team_id=self.team.id, console_log_count=4, @@ -3056,7 +3109,7 @@ def test_filter_for_recordings_by_console_text(self): ) produce_replay_summary( distinct_id="user", - session_id=with_warns_session_id, + session_id="with-warns-session", first_timestamp=self.an_hour_ago, team_id=self.team.id, console_warn_count=5, @@ -3072,7 +3125,7 @@ def test_filter_for_recordings_by_console_text(self): ) produce_replay_summary( distinct_id="user", - session_id=with_errors_session_id, + session_id="with-errors-session", first_timestamp=self.an_hour_ago, team_id=self.team.id, console_error_count=4, @@ -3087,7 +3140,7 @@ def test_filter_for_recordings_by_console_text(self): ) produce_replay_summary( distinct_id="user", - session_id=with_two_session_id, + session_id="with-two-session", first_timestamp=self.an_hour_ago, team_id=self.team.id, console_error_count=4, @@ -3097,13 +3150,14 @@ def test_filter_for_recordings_by_console_text(self): "error message 1", "error message 2", "error message 3", + "error message 4", ], "info": ["log message 1", "log message 2", "log message 3"], }, ) produce_replay_summary( distinct_id="user", - session_id=with_no_matches_session_id, + session_id="with-no-matches-session", first_timestamp=self.an_hour_ago, team_id=self.team.id, console_error_count=4, @@ -3113,41 +3167,12 @@ def test_filter_for_recordings_by_console_text(self): }, ) - (session_recordings, _, _) = self._filter_recordings_by( - { - # there are 5 warn and 4 error logs, message 4 matches in both - "console_log_filters": '[{"key": "level", "value": ["warn", "error"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "message 4", "operator": "icontains", "type": "log_entry"}]', - "operand": "OR", - } - ) - - assert sorted([sr["session_id"] for sr in session_recordings]) == sorted( - [with_errors_session_id, with_two_session_id, with_warns_session_id, with_logs_session_id] - ) - - (session_recordings, _, _) = self._filter_recordings_by( - { - # there are 5 warn and 4 error logs, message 5 matches only matches in warn - "console_log_filters": '[{"key": "level", "value": ["warn", "error"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "message 5", "operator": "icontains", "type": "log_entry"}]', - "operand": "AND", - } - ) - - assert sorted([sr["session_id"] for sr in session_recordings]) == sorted( - [ - with_warns_session_id, - ] - ) - - (session_recordings, _, _) = self._filter_recordings_by( - { - # message 5 does not match log level "info" - "console_log_filters": '[{"key": "level", "value": ["info"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "message 5", "operator": "icontains", "type": "log_entry"}]', - "operand": "AND", - } + # Perform the filtering and validate results + session_recordings, _, _ = self._filter_recordings_by( + {"console_log_filters": console_log_filters, "operand": operand} ) - assert sorted([sr["session_id"] for sr in session_recordings]) == [] + assert sorted([sr["session_id"] for sr in session_recordings]) == sorted(expected_session_ids) @snapshot_clickhouse_queries def test_filter_for_recordings_by_snapshot_source(self): diff --git a/posthog/session_recordings/queries/test/test_session_recording_list_from_query.py b/posthog/session_recordings/queries/test/test_session_recording_list_from_query.py index 0770b66554b56..fa63d032fe07b 100644 --- a/posthog/session_recordings/queries/test/test_session_recording_list_from_query.py +++ b/posthog/session_recordings/queries/test/test_session_recording_list_from_query.py @@ -1,10 +1,13 @@ from datetime import datetime +from typing import Literal from unittest.mock import ANY from uuid import uuid4 from dateutil.relativedelta import relativedelta from django.utils.timezone import now from freezegun import freeze_time +from parameterized import parameterized + from posthog.clickhouse.client import sync_execute from posthog.clickhouse.log_entries import TRUNCATE_LOG_ENTRIES_TABLE_SQL from posthog.constants import AvailableFeature @@ -1603,8 +1606,53 @@ def test_operand_or_event_filters(self): assert len(session_recordings) == 2 assert sorted([r["session_id"] for r in session_recordings]) == sorted([session_id_two, session_id_one]) + @parameterized.expand( + [ + # Case 1: Neither has WARN and message "random" + ( + '[{"key": "level", "value": ["warn"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', + "AND", + 0, + [], + ), + # Case 2: AND only matches one recording + ( + '[{"key": "level", "value": ["info"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', + "AND", + 1, + ["both_log_filters"], + ), + # Case 3: Only one is WARN level + ( + '[{"key": "level", "value": ["warn"], "operator": "exact", "type": "log_entry"}]', + "AND", + 1, + ["one_log_filter"], + ), + # Case 4: Only one has message "random" + ( + '[{"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', + "AND", + 1, + ["both_log_filters"], + ), + # Case 5: OR matches both (update assertion if TODO behavior changes) + ( + '[{"key": "level", "value": ["warn"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', + "OR", + 0, # Adjust this to match the correct behavior once implemented + [], + ), + ] + ) @snapshot_clickhouse_queries - def test_operand_or_filters(self): + def test_operand_or_filters( + self, + console_log_filters: str, + operand: Literal["AND", "OR"], + expected_count: int, + expected_session_ids: list[str], + ) -> None: user = "test_operand_or_filter-user" Person.objects.create(team=self.team, distinct_ids=[user], properties={"email": "bla"}) @@ -1615,12 +1663,9 @@ def test_operand_or_filters(self): first_timestamp=self.an_hour_ago, team_id=self.team.id, console_log_count=1, - log_messages={ - "info": [ - "random", - ], - }, + log_messages={"info": ["random"]}, ) + session_with_one_log_filter = "one_log_filter" produce_replay_summary( distinct_id="user", @@ -1628,60 +1673,15 @@ def test_operand_or_filters(self): first_timestamp=self.an_hour_ago, team_id=self.team.id, console_warn_count=1, - log_messages={ - "warn": [ - "warn", - ], - }, + log_messages={"warn": ["warn"]}, ) - # neither has WARN and message random - (session_recordings, _, _) = self._filter_recordings_by( - { - "console_log_filters": '[{"key": "level", "value": ["warn"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', - "operand": "AND", - } + session_recordings, _, _ = self._filter_recordings_by( + {"console_log_filters": console_log_filters, "operand": operand} ) - assert len(session_recordings) == 0 - # AND only matches one recording - (session_recordings, _, _) = self._filter_recordings_by( - { - "console_log_filters": '[{"key": "level", "value": ["info"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', - "operand": "AND", - } - ) - assert len(session_recordings) == 1 - assert session_recordings[0]["session_id"] == session_with_both_log_filters - - # only one is warn level - (session_recordings, _, _) = self._filter_recordings_by( - { - "console_log_filters": '[{"key": "level", "value": ["warn"], "operator": "exact", "type": "log_entry"}]', - "operand": "AND", - } - ) - assert len(session_recordings) == 1 - assert session_recordings[0]["session_id"] == session_with_one_log_filter - - # only one has message RANDOM - (session_recordings, _, _) = self._filter_recordings_by( - { - "console_log_filters": '[{"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', - "operand": "AND", - } - ) - assert len(session_recordings) == 1 - assert session_recordings[0]["session_id"] == session_with_both_log_filters - - (session_recordings, _, _) = self._filter_recordings_by( - { - "console_log_filters": '[{"key": "level", "value": ["warn"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "random", "operator": "exact", "type": "log_entry"}]', - "operand": "OR", - } - ) - # TODO this should match both but I want to compare the generated SQL - assert len(session_recordings) == 0 + assert len(session_recordings) == expected_count + assert sorted([rec["session_id"] for rec in session_recordings]) == sorted(expected_session_ids) @snapshot_clickhouse_queries def test_operand_or_mandatory_filters(self): @@ -3051,18 +3051,40 @@ def test_filter_for_recordings_with_mixed_console_counts(self): @snapshot_clickhouse_queries @freeze_time("2021-01-21T20:00:00.000Z") - def test_filter_for_recordings_by_console_text(self): + @parameterized.expand( + [ + # Case 1: OR operand, message 4 matches in warn and error + ( + '[{"key": "level", "value": ["warn", "error"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "message 4", "operator": "icontains", "type": "log_entry"}]', + "OR", + ["with-errors-session", "with-two-session", "with-warns-session", "with-logs-session"], + ), + # Case 2: AND operand, message 5 matches only in warn + ( + '[{"key": "level", "value": ["warn", "error"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "message 5", "operator": "icontains", "type": "log_entry"}]', + "AND", + ["with-warns-session"], + ), + # Case 3: AND operand, message 5 does not match log level "info" + ( + '[{"key": "level", "value": ["info"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "message 5", "operator": "icontains", "type": "log_entry"}]', + "AND", + [], + ), + ] + ) + def test_filter_for_recordings_by_console_text( + self, + console_log_filters: str, + operand: Literal["AND", "OR"], + expected_session_ids: list[str], + ) -> None: Person.objects.create(team=self.team, distinct_ids=["user"], properties={"email": "bla"}) - with_logs_session_id = "with-logs-session" - with_warns_session_id = "with-warns-session" - with_errors_session_id = "with-errors-session" - with_two_session_id = "with-two-session" - with_no_matches_session_id = "with-no-matches-session" - + # Create sessions produce_replay_summary( distinct_id="user", - session_id=with_logs_session_id, + session_id="with-logs-session", first_timestamp=self.an_hour_ago, team_id=self.team.id, console_log_count=4, @@ -3077,7 +3099,7 @@ def test_filter_for_recordings_by_console_text(self): ) produce_replay_summary( distinct_id="user", - session_id=with_warns_session_id, + session_id="with-warns-session", first_timestamp=self.an_hour_ago, team_id=self.team.id, console_warn_count=5, @@ -3093,7 +3115,7 @@ def test_filter_for_recordings_by_console_text(self): ) produce_replay_summary( distinct_id="user", - session_id=with_errors_session_id, + session_id="with-errors-session", first_timestamp=self.an_hour_ago, team_id=self.team.id, console_error_count=4, @@ -3108,7 +3130,7 @@ def test_filter_for_recordings_by_console_text(self): ) produce_replay_summary( distinct_id="user", - session_id=with_two_session_id, + session_id="with-two-session", first_timestamp=self.an_hour_ago, team_id=self.team.id, console_error_count=4, @@ -3123,10 +3145,9 @@ def test_filter_for_recordings_by_console_text(self): "info": ["log message 1", "log message 2", "log message 3"], }, ) - produce_replay_summary( distinct_id="user", - session_id=with_no_matches_session_id, + session_id="with-no-matches-session", first_timestamp=self.an_hour_ago, team_id=self.team.id, console_error_count=4, @@ -3136,41 +3157,12 @@ def test_filter_for_recordings_by_console_text(self): }, ) - (session_recordings, _, _) = self._filter_recordings_by( - { - # there are 5 warn and 4 error logs, message 4 matches in both - "console_log_filters": '[{"key": "level", "value": ["warn", "error"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "message 4", "operator": "icontains", "type": "log_entry"}]', - "operand": "OR", - } - ) - - assert sorted([sr["session_id"] for sr in session_recordings]) == sorted( - [with_errors_session_id, with_two_session_id, with_warns_session_id, with_logs_session_id] - ) - - (session_recordings, _, _) = self._filter_recordings_by( - { - # there are 5 warn and 4 error logs, message 5 matches only matches in warn - "console_log_filters": '[{"key": "level", "value": ["warn", "error"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "message 5", "operator": "icontains", "type": "log_entry"}]', - "operand": "AND", - } - ) - - assert sorted([sr["session_id"] for sr in session_recordings]) == sorted( - [ - with_warns_session_id, - ] - ) - - (session_recordings, _, _) = self._filter_recordings_by( - { - # message 5 does not match log level "info" - "console_log_filters": '[{"key": "level", "value": ["info"], "operator": "exact", "type": "log_entry"}, {"key": "message", "value": "message 5", "operator": "icontains", "type": "log_entry"}]', - "operand": "AND", - } + # Perform the filtering and validate results + session_recordings, _, _ = self._filter_recordings_by( + {"console_log_filters": console_log_filters, "operand": operand} ) - assert sorted([sr["session_id"] for sr in session_recordings]) == [] + assert sorted([sr["session_id"] for sr in session_recordings]) == sorted(expected_session_ids) @snapshot_clickhouse_queries def test_filter_for_recordings_by_snapshot_source(self):