diff --git a/frontend/__snapshots__/scenes-app-errortracking--group-page--dark.png b/frontend/__snapshots__/scenes-app-errortracking--group-page--dark.png index 9ba7eb52e5ca6..6fb52cdc929ca 100644 Binary files a/frontend/__snapshots__/scenes-app-errortracking--group-page--dark.png and b/frontend/__snapshots__/scenes-app-errortracking--group-page--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-errortracking--group-page--light.png b/frontend/__snapshots__/scenes-app-errortracking--group-page--light.png index 6e01906523ab6..5e00f6a00e9dc 100644 Binary files a/frontend/__snapshots__/scenes-app-errortracking--group-page--light.png and b/frontend/__snapshots__/scenes-app-errortracking--group-page--light.png differ diff --git a/frontend/__snapshots__/scenes-app-errortracking--list-page--dark.png b/frontend/__snapshots__/scenes-app-errortracking--list-page--dark.png index 70c8fa237a9ba..dfe2045cf6ebe 100644 Binary files a/frontend/__snapshots__/scenes-app-errortracking--list-page--dark.png and b/frontend/__snapshots__/scenes-app-errortracking--list-page--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-errortracking--list-page--light.png b/frontend/__snapshots__/scenes-app-errortracking--list-page--light.png index d23dbf662cde5..30308194e52de 100644 Binary files a/frontend/__snapshots__/scenes-app-errortracking--list-page--light.png and b/frontend/__snapshots__/scenes-app-errortracking--list-page--light.png differ diff --git a/frontend/src/scenes/error-tracking/ErrorTrackingFilters.tsx b/frontend/src/scenes/error-tracking/ErrorTrackingFilters.tsx index c741a480e91e4..629a7a97d0ff0 100644 --- a/frontend/src/scenes/error-tracking/ErrorTrackingFilters.tsx +++ b/frontend/src/scenes/error-tracking/ErrorTrackingFilters.tsx @@ -18,12 +18,23 @@ export const FilterGroup = (): JSX.Element => { return (
-
- +
+ diff --git a/frontend/src/scenes/error-tracking/queries.test.ts b/frontend/src/scenes/error-tracking/queries.test.ts index 9f05cc21e472e..406a5837f4b10 100644 --- a/frontend/src/scenes/error-tracking/queries.test.ts +++ b/frontend/src/scenes/error-tracking/queries.test.ts @@ -1,4 +1,9 @@ -import { generateSparklineProps, parseSparklineSelection, SPARKLINE_CONFIGURATIONS } from './queries' +import { + generateSparklineProps, + parseSparklineSelection, + SPARKLINE_CONFIGURATIONS, + stringifyFingerprints, +} from './queries' describe('generateSparklineProps', () => { beforeAll(() => { @@ -131,3 +136,23 @@ describe('parseSparklineSelection', () => { expect(parseSparklineSelection('6w')).toEqual({ value: 6, displayAs: 'week' }) }) }) + +describe('stringifyFingerprints', () => { + it('works for basic case', async () => { + expect(stringifyFingerprints([['a', 'b', 'c']])).toEqual("[['a','b','c']]") + expect(stringifyFingerprints([['a']])).toEqual("[['a']]") + expect(stringifyFingerprints([])).toEqual('[]') + }) + + it('escapes single quotes correctly', async () => { + expect(stringifyFingerprints([["a'"]])).toEqual("[['a\\'']]") + expect(stringifyFingerprints([["a'", "b'"]])).toEqual("[['a\\'','b\\'']]") + expect(stringifyFingerprints([["a'", "b'"], ["c'"]])).toEqual("[['a\\'','b\\''],['c\\'']]") + }) + + it('escapes double quotes correctly', async () => { + expect(stringifyFingerprints([['a"']])).toEqual("[['a\"']]") + expect(stringifyFingerprints([['a"', 'b"']])).toEqual("[['a\"','b\"']]") + expect(stringifyFingerprints([['a"', 'b"'], ['c"']])).toEqual("[['a\"','b\"'],['c\"']]") + }) +}) diff --git a/frontend/src/scenes/error-tracking/queries.ts b/frontend/src/scenes/error-tracking/queries.ts index b506b5d40e310..0358e9096fd6d 100644 --- a/frontend/src/scenes/error-tracking/queries.ts +++ b/frontend/src/scenes/error-tracking/queries.ts @@ -184,12 +184,13 @@ export const errorTrackingGroupEventsQuery = ({ } // JSON.stringify wraps strings in double quotes and HogQL only supports single quote strings -const stringifyFingerprints = (fingerprints: ErrorTrackingGroup['fingerprint'][]): string => { - const stringifiedFingerprints = fingerprints.map((fp) => { - const stringifiedParts = fp.map((s) => `'${s}'`) - return `[${stringifiedParts.join(',')}]` - }) - return `[${stringifiedFingerprints.join(',')}]` +export const stringifyFingerprints = (fingerprints: ErrorTrackingGroup['fingerprint'][]): string => { + // so we escape all single quoted strings and replace double quotes with single quotes, unless they're already escaped. + // Also replace escaped double quotes with regular double quotes - this isn't valid JSON, but we aren't trying to generate JSON so its ok. + return JSON.stringify(fingerprints) + .replace(/'/g, "\\'") + .replace(/(? 10: + raise ValueError("Too many search tokens") + + for token in tokens: + if not token: + continue + + or_exprs: list[ast.Expr] = [] + props_to_search = [ + "$exception_list", + "$exception_stack_trace_raw", + "$exception_type", + "$exception_message", + ] + for prop in props_to_search: + or_exprs.append( + ast.CompareOperation( + op=ast.CompareOperationOp.Gt, + left=ast.Call( + name="position", + args=[ + ast.Call(name="lower", args=[ast.Field(chain=["properties", prop])]), + ast.Call(name="lower", args=[ast.Constant(value=token)]), + ], + ), + right=ast.Constant(value=0), + ) ) - ) - exprs.append( - ast.Or( - exprs=or_exprs, + and_exprs.append( + ast.Or( + exprs=or_exprs, + ) ) - ) + exprs.append(ast.And(exprs=and_exprs)) return ast.And(exprs=exprs) @@ -254,7 +273,6 @@ def error_tracking_groups(self): queryset = ErrorTrackingGroup.objects.filter(team=self.team) # :TRICKY: Ideally we'd have no null characters in the fingerprint, but if something made it into the pipeline with null characters # (because rest of the system supports it), try cleaning it up here. Make sure this cleaning is consistent with the rest of the system. - # This does mean we'll not match with this ErrorTrackingGroup cleaned_fingerprint = [part.replace("\x00", "\ufffd") for part in self.query.fingerprint or []] queryset = ( queryset.filter(fingerprint=cleaned_fingerprint) @@ -264,3 +282,14 @@ def error_tracking_groups(self): queryset = queryset.filter(assignee=self.query.assignee) if self.query.assignee else queryset groups = queryset.values("fingerprint", "merged_fingerprints", "status", "assignee") return {str(item["fingerprint"]): item for item in groups} + + +def search_tokenizer(query: str) -> list[str]: + # parse the search query to split it into words, except for quoted strings. Strip quotes from quoted strings. + # Example: 'This is a "quoted string" and this is \'another one\' with some words' + # Output: ['This', 'is', 'a', 'quoted string', 'and', 'this', 'is', 'another one', 'with', 'some', 'words'] + # This doesn't handle nested quotes, and some complex edge cases, but we don't really need that for now. + # If requirements do change, consider using a proper parser like `pyparsing` + pattern = r'"[^"]*"|\'[^\']*\'|\S+' + tokens = re.findall(pattern, query) + return [token.strip("'\"") for token in tokens] diff --git a/posthog/hogql_queries/test/__snapshots__/test_error_tracking_query_runner.ambr b/posthog/hogql_queries/test/__snapshots__/test_error_tracking_query_runner.ambr index 1d44cadb223c5..14ffed468c757 100644 --- a/posthog/hogql_queries/test/__snapshots__/test_error_tracking_query_runner.ambr +++ b/posthog/hogql_queries/test/__snapshots__/test_error_tracking_query_runner.ambr @@ -205,7 +205,7 @@ FROM "posthog_errortrackinggroup" WHERE ("posthog_errortrackinggroup"."team_id" = 2 AND "posthog_errortrackinggroup"."fingerprint" = (ARRAY['SyntaxError', - 'Cannot use ''in'' operator to search for ''wireframes'' in ‹�” ýf�ì½é–"¹’0ø*Lö¹SY A�Ξ÷ԝf + 'Cannot use ''in'' operator to search for ''wireframes'' in ‹�” ýf�ì½é–"¹’0ø*Lö¹SY A�Ξ÷ԝf ˆ�Ø'])::text[]) ''' # --- @@ -518,6 +518,46 @@ max_bytes_before_external_group_by=0 ''' # --- +# name: TestErrorTrackingQueryRunner.test_search_query_with_multiple_search_items + ''' + SELECT count(DISTINCT events.uuid) AS occurrences, + count(DISTINCT events.`$session_id`) AS sessions, + count(DISTINCT events.distinct_id) AS users, + max(toTimeZone(events.timestamp, 'UTC')) AS last_seen, + min(toTimeZone(events.timestamp, 'UTC')) AS first_seen, + any(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_message'), ''), 'null'), '^"|"$', '')) AS description, + any(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_type'), ''), 'null'), '^"|"$', '')) AS exception_type, + JSONExtract(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_fingerprint'), ''), 'null'), '^"|"$', ''), '[]'), 'Array(String)') AS fingerprint + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) + WHERE and(equals(events.team_id, 2), equals(events.event, '$exception'), ifNull(notILike(events__person.properties___email, '%@posthog.com%'), 1), and(or(ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_list'), ''), 'null'), '^"|"$', '')), lower('databasenotfoundX')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_stack_trace_raw'), ''), 'null'), '^"|"$', '')), lower('databasenotfoundX')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_type'), ''), 'null'), '^"|"$', '')), lower('databasenotfoundX')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_message'), ''), 'null'), '^"|"$', '')), lower('databasenotfoundX')), 0), 0)), or(ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_list'), ''), 'null'), '^"|"$', '')), lower('clickhouse/client/execute.py')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_stack_trace_raw'), ''), 'null'), '^"|"$', '')), lower('clickhouse/client/execute.py')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_type'), ''), 'null'), '^"|"$', '')), lower('clickhouse/client/execute.py')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_message'), ''), 'null'), '^"|"$', '')), lower('clickhouse/client/execute.py')), 0), 0)))) + GROUP BY fingerprint + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- # name: TestErrorTrackingQueryRunner.test_search_query_with_null_characters ''' SELECT count(DISTINCT events.uuid) AS occurrences, diff --git a/posthog/hogql_queries/test/test_error_tracking_query_runner.py b/posthog/hogql_queries/test/test_error_tracking_query_runner.py index d14854bb97509..0a17c061f3f50 100644 --- a/posthog/hogql_queries/test/test_error_tracking_query_runner.py +++ b/posthog/hogql_queries/test/test_error_tracking_query_runner.py @@ -1,6 +1,7 @@ +from unittest import TestCase from freezegun import freeze_time -from posthog.hogql_queries.error_tracking_query_runner import ErrorTrackingQueryRunner +from posthog.hogql_queries.error_tracking_query_runner import ErrorTrackingQueryRunner, search_tokenizer from posthog.schema import ( ErrorTrackingQuery, DateRange, @@ -22,6 +23,164 @@ from datetime import datetime from zoneinfo import ZoneInfo +SAMPLE_STACK_TRACE = [ + { + "abs_path": "/code/posthog/clickhouse/client/execute.py", + "context_line": " result = client.execute(", + "filename": "posthog/clickhouse/client/execute.py", + "function": "sync_execute", + "in_app": True, + "lineno": 142, + "module": "posthog.clickhouse.client.execute", + "post_context": [ + " prepared_sql,", + " params=prepared_args,", + " settings=settings,", + " with_column_types=with_column_types,", + " query_id=query_id,", + ], + "pre_context": [ + " **core_settings,", + ' "log_comment": json.dumps(tags, separators=(",", ":")),', + " }", + "", + " try:", + ], + }, + { + "abs_path": "/python-runtime/clickhouse_driver/client.py", + "context_line": " rv = self.process_ordinary_query(", + "filename": "clickhouse_driver/client.py", + "function": "execute", + "lineno": 382, + "module": "clickhouse_driver.client", + "post_context": [ + " query, params=params, with_column_types=with_column_types,", + " external_tables=external_tables,", + " query_id=query_id, types_check=types_check,", + " columnar=columnar", + " )", + ], + "pre_context": [ + " query, params, external_tables=external_tables,", + " query_id=query_id, types_check=types_check,", + " columnar=columnar", + " )", + " else:", + ], + }, + { + "abs_path": "/python-runtime/clickhouse_driver/client.py", + "context_line": " return self.receive_result(with_column_types=with_column_types,", + "filename": "clickhouse_driver/client.py", + "function": "process_ordinary_query", + "lineno": 580, + "module": "clickhouse_driver.client", + "post_context": [ + " columnar=columnar)", + "", + " def iter_process_ordinary_query(", + " self, query, params=None, with_column_types=False,", + " external_tables=None, query_id=None,", + ], + "pre_context": [ + " query, params, self.connection.context", + " )", + " self.connection.send_query(query, query_id=query_id, params=params)", + " self.connection.send_external_tables(external_tables,", + " types_check=types_check)", + ], + }, + { + "abs_path": "/python-runtime/clickhouse_driver/client.py", + "context_line": " return result.get_result()", + "filename": "clickhouse_driver/client.py", + "function": "receive_result", + "lineno": 213, + "module": "clickhouse_driver.client", + "post_context": [ + "", + " def iter_receive_result(self, with_column_types=False):", + " gen = self.packet_generator()", + "", + " result = self.iter_query_result_cls(", + ], + "pre_context": [ + "", + " else:", + " result = self.query_result_cls(", + " gen, with_column_types=with_column_types, columnar=columnar", + " )", + ], + }, + { + "abs_path": "/python-runtime/clickhouse_driver/result.py", + "context_line": " for packet in self.packet_generator:", + "filename": "clickhouse_driver/result.py", + "function": "get_result", + "lineno": 50, + "module": "clickhouse_driver.result", + "post_context": [ + " self.store(packet)", + "", + " data = self.data", + " if self.columnar:", + " data = [tuple(c) for c in self.data]", + ], + "pre_context": [ + " def get_result(self):", + ' """', + " :return: stored query result.", + ' """', + "", + ], + }, + { + "abs_path": "/python-runtime/clickhouse_driver/client.py", + "context_line": " packet = self.receive_packet()", + "filename": "clickhouse_driver/client.py", + "function": "packet_generator", + "lineno": 229, + "module": "clickhouse_driver.client", + "post_context": [ + " if not packet:", + " break", + "", + " if packet is True:", + " continue", + ], + "pre_context": [ + " yield row", + "", + " def packet_generator(self):", + " while True:", + " try:", + ], + }, + { + "abs_path": "/python-runtime/clickhouse_driver/client.py", + "context_line": " raise packet.exception", + "filename": "clickhouse_driver/client.py", + "function": "receive_packet", + "lineno": 246, + "module": "clickhouse_driver.client", + "post_context": [ + "", + " elif packet.type == ServerPacketTypes.PROGRESS:", + " self.last_query.store_progress(packet.progress)", + " return packet", + "", + ], + "pre_context": [ + "", + " def receive_packet(self):", + " packet = self.connection.receive_packet()", + "", + " if packet.type == ServerPacketTypes.EXCEPTION:", + ], + }, +] + class TestErrorTrackingQueryRunner(ClickhouseTestMixin, APIBaseTest): distinct_id_one = "user_1" @@ -217,6 +376,53 @@ def test_empty_search_query(self): self.assertEqual(len(results), 0) + @snapshot_clickhouse_queries + def test_search_query_with_multiple_search_items(self): + with freeze_time("2022-01-10 12:11:00"): + _create_event( + distinct_id=self.distinct_id_one, + event="$exception", + team=self.team, + properties={ + "$exception_fingerprint": ["DatabaseNotFoundX"], + "$exception_type": "DatabaseNotFoundX", + "$exception_message": "this is the same error message", + "$exception_list": [{"stack_trace": {"frames": SAMPLE_STACK_TRACE}}], + }, + ) + + _create_event( + distinct_id=self.distinct_id_two, + event="$exception", + team=self.team, + properties={ + "$exception_fingerprint": ["DatabaseNotFoundY"], + "$exception_type": "DatabaseNotFoundY", + "$exception_message": "this is the same error message", + "$exception_list": [{"stack_trace": {"frames": SAMPLE_STACK_TRACE}}], + }, + ) + flush_persons_and_events() + + runner = ErrorTrackingQueryRunner( + team=self.team, + query=ErrorTrackingQuery( + kind="ErrorTrackingQuery", + fingerprint=None, + dateRange=DateRange(), + filterTestAccounts=True, + searchQuery="databasenotfoundX clickhouse/client/execute.py", + ), + ) + + results = self._calculate(runner)["results"] + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["fingerprint"], ["DatabaseNotFoundX"]) + self.assertEqual(results[0]["occurrences"], 1) + self.assertEqual(results[0]["sessions"], 1) + self.assertEqual(results[0]["users"], 1) + @snapshot_clickhouse_queries def test_search_query_with_null_characters(self): fingerprint_with_null_bytes = [ @@ -462,3 +668,38 @@ def test_assignee_groups(self): results = self._calculate(runner)["results"] self.assertEqual(sorted([x["fingerprint"] for x in results]), [["SyntaxError"], ["custom_fingerprint"]]) + + +class TestSearchTokenizer(TestCase): + test_cases = [ + ( + "This is a \"quoted string\" and this is 'another one' with some words", + ["This", "is", "a", "quoted string", "and", "this", "is", "another one", "with", "some", "words"], + ), + ( + "Empty quotes: \"\" and '' should be preserved", + ["Empty", "quotes:", "", "and", "", "should", "be", "preserved"], + ), + ("Nested \"quotes 'are' tricky\" to handle", ["Nested", "quotes 'are' tricky", "to", "handle"]), + ( + "Unmatched quotes: \"open quote and 'partial quote", + ["Unmatched", "quotes:", "open", "quote", "and", "partial", "quote"], + ), + ("Multiple spaces between words", ["Multiple", "spaces", "between", "words"]), + ( + "Special characters: @#$% should be treated as words", + ["Special", "characters:", "@#$%", "should", "be", "treated", "as", "words"], + ), + ( + "Single quotes at \"start\" and 'end' of string", + ["Single", "quotes", "at", "start", "and", "end", "of", "string"], + ), + ('"Entire string is quoted"', ["Entire string is quoted"]), + ('Escaped quotes: "He said "Hello" to me"', ["Escaped", "quotes:", "He said ", "Hello", "to", "me"]), + ] + + def test_tokenizer(self): + for case, output in self.test_cases: + with self.subTest(case=case): + tokens = search_tokenizer(case) + self.assertEqual(tokens, output) diff --git a/posthog/hogql_queries/test/test_events_query_runner.py b/posthog/hogql_queries/test/test_events_query_runner.py index e0e3a82cd1c0d..073d082ca2f3b 100644 --- a/posthog/hogql_queries/test/test_events_query_runner.py +++ b/posthog/hogql_queries/test/test_events_query_runner.py @@ -186,3 +186,58 @@ def test_big_int(self): response = runner.run() assert isinstance(response, CachedEventsQueryResponse) assert response.results[0][0]["properties"]["bigInt"] == float(BIG_INT) + + def test_escaped_single_quotes_in_where_clause(self): + SINGLE_QUOTE = "I'm a string with a ' in it" + DOUBLE_QUOTE = 'I"m a string with a " in it' + self._create_events( + data=[ + ( + "p_null", + "2020-01-11T12:00:04Z", + {"boolean_field": None, "arr_field": [SINGLE_QUOTE]}, + ), + ( + "p_one", + "2020-01-11T12:00:14Z", + {"boolean_field": None, "arr_field": [DOUBLE_QUOTE]}, + ), + ] + ) + + flush_persons_and_events() + + with freeze_time("2020-01-11T12:01:00"): + query = EventsQuery( + after="-24h", + event="$pageview", + kind="EventsQuery", + where=[ + "has(JSONExtract(ifNull(properties.arr_field,'[]'),'Array(String)'), 'I\\'m a string with a \\' in it')" + ], + orderBy=["timestamp ASC"], + select=["*"], + ) + + runner = EventsQueryRunner(query=query, team=self.team) + response = runner.run() + assert isinstance(response, CachedEventsQueryResponse) + assert len(response.results) == 1 + assert response.results[0][0]["properties"]["arr_field"] == [SINGLE_QUOTE] + + query = EventsQuery( + after="-24h", + event="$pageview", + kind="EventsQuery", + where=[ + "has(JSONExtract(ifNull(properties.arr_field,'[]'),'Array(String)'), 'I\"m a string with a \" in it')" + ], + orderBy=["timestamp ASC"], + select=["*"], + ) + + runner = EventsQueryRunner(query=query, team=self.team) + response = runner.run() + assert isinstance(response, CachedEventsQueryResponse) + assert len(response.results) == 1 + assert response.results[0][0]["properties"]["arr_field"] == [DOUBLE_QUOTE]