-
+
+
diff --git a/frontend/src/scenes/error-tracking/queries.test.ts b/frontend/src/scenes/error-tracking/queries.test.ts
index 9f05cc21e472e..406a5837f4b10 100644
--- a/frontend/src/scenes/error-tracking/queries.test.ts
+++ b/frontend/src/scenes/error-tracking/queries.test.ts
@@ -1,4 +1,9 @@
-import { generateSparklineProps, parseSparklineSelection, SPARKLINE_CONFIGURATIONS } from './queries'
+import {
+ generateSparklineProps,
+ parseSparklineSelection,
+ SPARKLINE_CONFIGURATIONS,
+ stringifyFingerprints,
+} from './queries'
describe('generateSparklineProps', () => {
beforeAll(() => {
@@ -131,3 +136,23 @@ describe('parseSparklineSelection', () => {
expect(parseSparklineSelection('6w')).toEqual({ value: 6, displayAs: 'week' })
})
})
+
+describe('stringifyFingerprints', () => {
+ it('works for basic case', async () => {
+ expect(stringifyFingerprints([['a', 'b', 'c']])).toEqual("[['a','b','c']]")
+ expect(stringifyFingerprints([['a']])).toEqual("[['a']]")
+ expect(stringifyFingerprints([])).toEqual('[]')
+ })
+
+ it('escapes single quotes correctly', async () => {
+ expect(stringifyFingerprints([["a'"]])).toEqual("[['a\\'']]")
+ expect(stringifyFingerprints([["a'", "b'"]])).toEqual("[['a\\'','b\\'']]")
+ expect(stringifyFingerprints([["a'", "b'"], ["c'"]])).toEqual("[['a\\'','b\\''],['c\\'']]")
+ })
+
+ it('escapes double quotes correctly', async () => {
+ expect(stringifyFingerprints([['a"']])).toEqual("[['a\"']]")
+ expect(stringifyFingerprints([['a"', 'b"']])).toEqual("[['a\"','b\"']]")
+ expect(stringifyFingerprints([['a"', 'b"'], ['c"']])).toEqual("[['a\"','b\"'],['c\"']]")
+ })
+})
diff --git a/frontend/src/scenes/error-tracking/queries.ts b/frontend/src/scenes/error-tracking/queries.ts
index b506b5d40e310..0358e9096fd6d 100644
--- a/frontend/src/scenes/error-tracking/queries.ts
+++ b/frontend/src/scenes/error-tracking/queries.ts
@@ -184,12 +184,13 @@ export const errorTrackingGroupEventsQuery = ({
}
// JSON.stringify wraps strings in double quotes and HogQL only supports single quote strings
-const stringifyFingerprints = (fingerprints: ErrorTrackingGroup['fingerprint'][]): string => {
- const stringifiedFingerprints = fingerprints.map((fp) => {
- const stringifiedParts = fp.map((s) => `'${s}'`)
- return `[${stringifiedParts.join(',')}]`
- })
- return `[${stringifiedFingerprints.join(',')}]`
+export const stringifyFingerprints = (fingerprints: ErrorTrackingGroup['fingerprint'][]): string => {
+ // so we escape all single quoted strings and replace double quotes with single quotes, unless they're already escaped.
+ // Also replace escaped double quotes with regular double quotes - this isn't valid JSON, but we aren't trying to generate JSON so its ok.
+ return JSON.stringify(fingerprints)
+ .replace(/'/g, "\\'")
+ .replace(/(? 10:
+ raise ValueError("Too many search tokens")
+
+ for token in tokens:
+ if not token:
+ continue
+
+ or_exprs: list[ast.Expr] = []
+ props_to_search = [
+ "$exception_list",
+ "$exception_stack_trace_raw",
+ "$exception_type",
+ "$exception_message",
+ ]
+ for prop in props_to_search:
+ or_exprs.append(
+ ast.CompareOperation(
+ op=ast.CompareOperationOp.Gt,
+ left=ast.Call(
+ name="position",
+ args=[
+ ast.Call(name="lower", args=[ast.Field(chain=["properties", prop])]),
+ ast.Call(name="lower", args=[ast.Constant(value=token)]),
+ ],
+ ),
+ right=ast.Constant(value=0),
+ )
)
- )
- exprs.append(
- ast.Or(
- exprs=or_exprs,
+ and_exprs.append(
+ ast.Or(
+ exprs=or_exprs,
+ )
)
- )
+ exprs.append(ast.And(exprs=and_exprs))
return ast.And(exprs=exprs)
@@ -254,7 +273,6 @@ def error_tracking_groups(self):
queryset = ErrorTrackingGroup.objects.filter(team=self.team)
# :TRICKY: Ideally we'd have no null characters in the fingerprint, but if something made it into the pipeline with null characters
# (because rest of the system supports it), try cleaning it up here. Make sure this cleaning is consistent with the rest of the system.
- # This does mean we'll not match with this ErrorTrackingGroup
cleaned_fingerprint = [part.replace("\x00", "\ufffd") for part in self.query.fingerprint or []]
queryset = (
queryset.filter(fingerprint=cleaned_fingerprint)
@@ -264,3 +282,14 @@ def error_tracking_groups(self):
queryset = queryset.filter(assignee=self.query.assignee) if self.query.assignee else queryset
groups = queryset.values("fingerprint", "merged_fingerprints", "status", "assignee")
return {str(item["fingerprint"]): item for item in groups}
+
+
+def search_tokenizer(query: str) -> list[str]:
+ # parse the search query to split it into words, except for quoted strings. Strip quotes from quoted strings.
+ # Example: 'This is a "quoted string" and this is \'another one\' with some words'
+ # Output: ['This', 'is', 'a', 'quoted string', 'and', 'this', 'is', 'another one', 'with', 'some', 'words']
+ # This doesn't handle nested quotes, and some complex edge cases, but we don't really need that for now.
+ # If requirements do change, consider using a proper parser like `pyparsing`
+ pattern = r'"[^"]*"|\'[^\']*\'|\S+'
+ tokens = re.findall(pattern, query)
+ return [token.strip("'\"") for token in tokens]
diff --git a/posthog/hogql_queries/test/__snapshots__/test_error_tracking_query_runner.ambr b/posthog/hogql_queries/test/__snapshots__/test_error_tracking_query_runner.ambr
index 1d44cadb223c5..14ffed468c757 100644
--- a/posthog/hogql_queries/test/__snapshots__/test_error_tracking_query_runner.ambr
+++ b/posthog/hogql_queries/test/__snapshots__/test_error_tracking_query_runner.ambr
@@ -205,7 +205,7 @@
FROM "posthog_errortrackinggroup"
WHERE ("posthog_errortrackinggroup"."team_id" = 2
AND "posthog_errortrackinggroup"."fingerprint" = (ARRAY['SyntaxError',
- 'Cannot use ''in'' operator to search for ''wireframes'' in � ýf�ì½é"¹0ø*Lö¹SY A�Î÷Ôf
+ 'Cannot use ''in'' operator to search for ''wireframes'' in � ýf�ì½é"¹0ø*Lö¹SY A�Î÷Ôf
�Ø'])::text[])
'''
# ---
@@ -518,6 +518,46 @@
max_bytes_before_external_group_by=0
'''
# ---
+# name: TestErrorTrackingQueryRunner.test_search_query_with_multiple_search_items
+ '''
+ SELECT count(DISTINCT events.uuid) AS occurrences,
+ count(DISTINCT events.`$session_id`) AS sessions,
+ count(DISTINCT events.distinct_id) AS users,
+ max(toTimeZone(events.timestamp, 'UTC')) AS last_seen,
+ min(toTimeZone(events.timestamp, 'UTC')) AS first_seen,
+ any(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_message'), ''), 'null'), '^"|"$', '')) AS description,
+ any(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_type'), ''), 'null'), '^"|"$', '')) AS exception_type,
+ JSONExtract(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_fingerprint'), ''), 'null'), '^"|"$', ''), '[]'), 'Array(String)') AS fingerprint
+ FROM events
+ LEFT OUTER JOIN
+ (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
+ person_distinct_id_overrides.distinct_id AS distinct_id
+ FROM person_distinct_id_overrides
+ WHERE equals(person_distinct_id_overrides.team_id, 2)
+ GROUP BY person_distinct_id_overrides.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id)
+ LEFT JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id)
+ WHERE and(equals(events.team_id, 2), equals(events.event, '$exception'), ifNull(notILike(events__person.properties___email, '%@posthog.com%'), 1), and(or(ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_list'), ''), 'null'), '^"|"$', '')), lower('databasenotfoundX')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_stack_trace_raw'), ''), 'null'), '^"|"$', '')), lower('databasenotfoundX')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_type'), ''), 'null'), '^"|"$', '')), lower('databasenotfoundX')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_message'), ''), 'null'), '^"|"$', '')), lower('databasenotfoundX')), 0), 0)), or(ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_list'), ''), 'null'), '^"|"$', '')), lower('clickhouse/client/execute.py')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_stack_trace_raw'), ''), 'null'), '^"|"$', '')), lower('clickhouse/client/execute.py')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_type'), ''), 'null'), '^"|"$', '')), lower('clickhouse/client/execute.py')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_message'), ''), 'null'), '^"|"$', '')), lower('clickhouse/client/execute.py')), 0), 0))))
+ GROUP BY fingerprint
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=0
+ '''
+# ---
# name: TestErrorTrackingQueryRunner.test_search_query_with_null_characters
'''
SELECT count(DISTINCT events.uuid) AS occurrences,
diff --git a/posthog/hogql_queries/test/test_error_tracking_query_runner.py b/posthog/hogql_queries/test/test_error_tracking_query_runner.py
index d14854bb97509..0a17c061f3f50 100644
--- a/posthog/hogql_queries/test/test_error_tracking_query_runner.py
+++ b/posthog/hogql_queries/test/test_error_tracking_query_runner.py
@@ -1,6 +1,7 @@
+from unittest import TestCase
from freezegun import freeze_time
-from posthog.hogql_queries.error_tracking_query_runner import ErrorTrackingQueryRunner
+from posthog.hogql_queries.error_tracking_query_runner import ErrorTrackingQueryRunner, search_tokenizer
from posthog.schema import (
ErrorTrackingQuery,
DateRange,
@@ -22,6 +23,164 @@
from datetime import datetime
from zoneinfo import ZoneInfo
+SAMPLE_STACK_TRACE = [
+ {
+ "abs_path": "/code/posthog/clickhouse/client/execute.py",
+ "context_line": " result = client.execute(",
+ "filename": "posthog/clickhouse/client/execute.py",
+ "function": "sync_execute",
+ "in_app": True,
+ "lineno": 142,
+ "module": "posthog.clickhouse.client.execute",
+ "post_context": [
+ " prepared_sql,",
+ " params=prepared_args,",
+ " settings=settings,",
+ " with_column_types=with_column_types,",
+ " query_id=query_id,",
+ ],
+ "pre_context": [
+ " **core_settings,",
+ ' "log_comment": json.dumps(tags, separators=(",", ":")),',
+ " }",
+ "",
+ " try:",
+ ],
+ },
+ {
+ "abs_path": "/python-runtime/clickhouse_driver/client.py",
+ "context_line": " rv = self.process_ordinary_query(",
+ "filename": "clickhouse_driver/client.py",
+ "function": "execute",
+ "lineno": 382,
+ "module": "clickhouse_driver.client",
+ "post_context": [
+ " query, params=params, with_column_types=with_column_types,",
+ " external_tables=external_tables,",
+ " query_id=query_id, types_check=types_check,",
+ " columnar=columnar",
+ " )",
+ ],
+ "pre_context": [
+ " query, params, external_tables=external_tables,",
+ " query_id=query_id, types_check=types_check,",
+ " columnar=columnar",
+ " )",
+ " else:",
+ ],
+ },
+ {
+ "abs_path": "/python-runtime/clickhouse_driver/client.py",
+ "context_line": " return self.receive_result(with_column_types=with_column_types,",
+ "filename": "clickhouse_driver/client.py",
+ "function": "process_ordinary_query",
+ "lineno": 580,
+ "module": "clickhouse_driver.client",
+ "post_context": [
+ " columnar=columnar)",
+ "",
+ " def iter_process_ordinary_query(",
+ " self, query, params=None, with_column_types=False,",
+ " external_tables=None, query_id=None,",
+ ],
+ "pre_context": [
+ " query, params, self.connection.context",
+ " )",
+ " self.connection.send_query(query, query_id=query_id, params=params)",
+ " self.connection.send_external_tables(external_tables,",
+ " types_check=types_check)",
+ ],
+ },
+ {
+ "abs_path": "/python-runtime/clickhouse_driver/client.py",
+ "context_line": " return result.get_result()",
+ "filename": "clickhouse_driver/client.py",
+ "function": "receive_result",
+ "lineno": 213,
+ "module": "clickhouse_driver.client",
+ "post_context": [
+ "",
+ " def iter_receive_result(self, with_column_types=False):",
+ " gen = self.packet_generator()",
+ "",
+ " result = self.iter_query_result_cls(",
+ ],
+ "pre_context": [
+ "",
+ " else:",
+ " result = self.query_result_cls(",
+ " gen, with_column_types=with_column_types, columnar=columnar",
+ " )",
+ ],
+ },
+ {
+ "abs_path": "/python-runtime/clickhouse_driver/result.py",
+ "context_line": " for packet in self.packet_generator:",
+ "filename": "clickhouse_driver/result.py",
+ "function": "get_result",
+ "lineno": 50,
+ "module": "clickhouse_driver.result",
+ "post_context": [
+ " self.store(packet)",
+ "",
+ " data = self.data",
+ " if self.columnar:",
+ " data = [tuple(c) for c in self.data]",
+ ],
+ "pre_context": [
+ " def get_result(self):",
+ ' """',
+ " :return: stored query result.",
+ ' """',
+ "",
+ ],
+ },
+ {
+ "abs_path": "/python-runtime/clickhouse_driver/client.py",
+ "context_line": " packet = self.receive_packet()",
+ "filename": "clickhouse_driver/client.py",
+ "function": "packet_generator",
+ "lineno": 229,
+ "module": "clickhouse_driver.client",
+ "post_context": [
+ " if not packet:",
+ " break",
+ "",
+ " if packet is True:",
+ " continue",
+ ],
+ "pre_context": [
+ " yield row",
+ "",
+ " def packet_generator(self):",
+ " while True:",
+ " try:",
+ ],
+ },
+ {
+ "abs_path": "/python-runtime/clickhouse_driver/client.py",
+ "context_line": " raise packet.exception",
+ "filename": "clickhouse_driver/client.py",
+ "function": "receive_packet",
+ "lineno": 246,
+ "module": "clickhouse_driver.client",
+ "post_context": [
+ "",
+ " elif packet.type == ServerPacketTypes.PROGRESS:",
+ " self.last_query.store_progress(packet.progress)",
+ " return packet",
+ "",
+ ],
+ "pre_context": [
+ "",
+ " def receive_packet(self):",
+ " packet = self.connection.receive_packet()",
+ "",
+ " if packet.type == ServerPacketTypes.EXCEPTION:",
+ ],
+ },
+]
+
class TestErrorTrackingQueryRunner(ClickhouseTestMixin, APIBaseTest):
distinct_id_one = "user_1"
@@ -217,6 +376,53 @@ def test_empty_search_query(self):
self.assertEqual(len(results), 0)
+ @snapshot_clickhouse_queries
+ def test_search_query_with_multiple_search_items(self):
+ with freeze_time("2022-01-10 12:11:00"):
+ _create_event(
+ distinct_id=self.distinct_id_one,
+ event="$exception",
+ team=self.team,
+ properties={
+ "$exception_fingerprint": ["DatabaseNotFoundX"],
+ "$exception_type": "DatabaseNotFoundX",
+ "$exception_message": "this is the same error message",
+ "$exception_list": [{"stack_trace": {"frames": SAMPLE_STACK_TRACE}}],
+ },
+ )
+
+ _create_event(
+ distinct_id=self.distinct_id_two,
+ event="$exception",
+ team=self.team,
+ properties={
+ "$exception_fingerprint": ["DatabaseNotFoundY"],
+ "$exception_type": "DatabaseNotFoundY",
+ "$exception_message": "this is the same error message",
+ "$exception_list": [{"stack_trace": {"frames": SAMPLE_STACK_TRACE}}],
+ },
+ )
+ flush_persons_and_events()
+
+ runner = ErrorTrackingQueryRunner(
+ team=self.team,
+ query=ErrorTrackingQuery(
+ kind="ErrorTrackingQuery",
+ fingerprint=None,
+ dateRange=DateRange(),
+ filterTestAccounts=True,
+ searchQuery="databasenotfoundX clickhouse/client/execute.py",
+ ),
+ )
+
+ results = self._calculate(runner)["results"]
+
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]["fingerprint"], ["DatabaseNotFoundX"])
+ self.assertEqual(results[0]["occurrences"], 1)
+ self.assertEqual(results[0]["sessions"], 1)
+ self.assertEqual(results[0]["users"], 1)
+
@snapshot_clickhouse_queries
def test_search_query_with_null_characters(self):
fingerprint_with_null_bytes = [
@@ -462,3 +668,38 @@ def test_assignee_groups(self):
results = self._calculate(runner)["results"]
self.assertEqual(sorted([x["fingerprint"] for x in results]), [["SyntaxError"], ["custom_fingerprint"]])
+
+
+class TestSearchTokenizer(TestCase):
+ test_cases = [
+ (
+ "This is a \"quoted string\" and this is 'another one' with some words",
+ ["This", "is", "a", "quoted string", "and", "this", "is", "another one", "with", "some", "words"],
+ ),
+ (
+ "Empty quotes: \"\" and '' should be preserved",
+ ["Empty", "quotes:", "", "and", "", "should", "be", "preserved"],
+ ),
+ ("Nested \"quotes 'are' tricky\" to handle", ["Nested", "quotes 'are' tricky", "to", "handle"]),
+ (
+ "Unmatched quotes: \"open quote and 'partial quote",
+ ["Unmatched", "quotes:", "open", "quote", "and", "partial", "quote"],
+ ),
+ ("Multiple spaces between words", ["Multiple", "spaces", "between", "words"]),
+ (
+ "Special characters: @#$% should be treated as words",
+ ["Special", "characters:", "@#$%", "should", "be", "treated", "as", "words"],
+ ),
+ (
+ "Single quotes at \"start\" and 'end' of string",
+ ["Single", "quotes", "at", "start", "and", "end", "of", "string"],
+ ),
+ ('"Entire string is quoted"', ["Entire string is quoted"]),
+ ('Escaped quotes: "He said "Hello" to me"', ["Escaped", "quotes:", "He said ", "Hello", "to", "me"]),
+ ]
+
+ def test_tokenizer(self):
+ for case, output in self.test_cases:
+ with self.subTest(case=case):
+ tokens = search_tokenizer(case)
+ self.assertEqual(tokens, output)
diff --git a/posthog/hogql_queries/test/test_events_query_runner.py b/posthog/hogql_queries/test/test_events_query_runner.py
index e0e3a82cd1c0d..073d082ca2f3b 100644
--- a/posthog/hogql_queries/test/test_events_query_runner.py
+++ b/posthog/hogql_queries/test/test_events_query_runner.py
@@ -186,3 +186,58 @@ def test_big_int(self):
response = runner.run()
assert isinstance(response, CachedEventsQueryResponse)
assert response.results[0][0]["properties"]["bigInt"] == float(BIG_INT)
+
+ def test_escaped_single_quotes_in_where_clause(self):
+ SINGLE_QUOTE = "I'm a string with a ' in it"
+ DOUBLE_QUOTE = 'I"m a string with a " in it'
+ self._create_events(
+ data=[
+ (
+ "p_null",
+ "2020-01-11T12:00:04Z",
+ {"boolean_field": None, "arr_field": [SINGLE_QUOTE]},
+ ),
+ (
+ "p_one",
+ "2020-01-11T12:00:14Z",
+ {"boolean_field": None, "arr_field": [DOUBLE_QUOTE]},
+ ),
+ ]
+ )
+
+ flush_persons_and_events()
+
+ with freeze_time("2020-01-11T12:01:00"):
+ query = EventsQuery(
+ after="-24h",
+ event="$pageview",
+ kind="EventsQuery",
+ where=[
+ "has(JSONExtract(ifNull(properties.arr_field,'[]'),'Array(String)'), 'I\\'m a string with a \\' in it')"
+ ],
+ orderBy=["timestamp ASC"],
+ select=["*"],
+ )
+
+ runner = EventsQueryRunner(query=query, team=self.team)
+ response = runner.run()
+ assert isinstance(response, CachedEventsQueryResponse)
+ assert len(response.results) == 1
+ assert response.results[0][0]["properties"]["arr_field"] == [SINGLE_QUOTE]
+
+ query = EventsQuery(
+ after="-24h",
+ event="$pageview",
+ kind="EventsQuery",
+ where=[
+ "has(JSONExtract(ifNull(properties.arr_field,'[]'),'Array(String)'), 'I\"m a string with a \" in it')"
+ ],
+ orderBy=["timestamp ASC"],
+ select=["*"],
+ )
+
+ runner = EventsQueryRunner(query=query, team=self.team)
+ response = runner.run()
+ assert isinstance(response, CachedEventsQueryResponse)
+ assert len(response.results) == 1
+ assert response.results[0][0]["properties"]["arr_field"] == [DOUBLE_QUOTE]