Skip to content

Commit

Permalink
feat(errors): Search power up, filter on event props (#25425)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
neilkakkar and github-actions[bot] authored Oct 9, 2024
1 parent b32fb5c commit cac1266
Show file tree
Hide file tree
Showing 11 changed files with 436 additions and 34 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 14 additions & 3 deletions frontend/src/scenes/error-tracking/ErrorTrackingFilters.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,23 @@ export const FilterGroup = (): JSX.Element => {

return (
<div className="flex flex-1 items-center justify-between space-x-2">
<div className="flex items-center gap-2">
<LemonInput type="search" placeholder="Search..." value={searchQuery} onChange={setSearchQuery} />
<div className="flex flex-1 items-center gap-2 mx-2">
<LemonInput
type="search"
placeholder="Search..."
value={searchQuery}
onChange={setSearchQuery}
className="flex-grow max-w-none"
/>
<UniversalFilters
rootKey="error-tracking"
group={filterGroup}
taxonomicGroupTypes={[TaxonomicFilterGroupType.PersonProperties, TaxonomicFilterGroupType.Cohorts]}
// TODO: Probably makes sense to create a new taxonomic group for exception-specific event property filters only, keep it clean.
taxonomicGroupTypes={[
TaxonomicFilterGroupType.EventProperties,
TaxonomicFilterGroupType.PersonProperties,
TaxonomicFilterGroupType.Cohorts,
]}
onChange={setFilterGroup}
>
<RecordingsUniversalFilterGroup />
Expand Down
27 changes: 26 additions & 1 deletion frontend/src/scenes/error-tracking/queries.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import { generateSparklineProps, parseSparklineSelection, SPARKLINE_CONFIGURATIONS } from './queries'
import {
generateSparklineProps,
parseSparklineSelection,
SPARKLINE_CONFIGURATIONS,
stringifyFingerprints,
} from './queries'

describe('generateSparklineProps', () => {
beforeAll(() => {
Expand Down Expand Up @@ -131,3 +136,23 @@ describe('parseSparklineSelection', () => {
expect(parseSparklineSelection('6w')).toEqual({ value: 6, displayAs: 'week' })
})
})

describe('stringifyFingerprints', () => {
it('works for basic case', async () => {
expect(stringifyFingerprints([['a', 'b', 'c']])).toEqual("[['a','b','c']]")
expect(stringifyFingerprints([['a']])).toEqual("[['a']]")
expect(stringifyFingerprints([])).toEqual('[]')
})

it('escapes single quotes correctly', async () => {
expect(stringifyFingerprints([["a'"]])).toEqual("[['a\\'']]")
expect(stringifyFingerprints([["a'", "b'"]])).toEqual("[['a\\'','b\\'']]")
expect(stringifyFingerprints([["a'", "b'"], ["c'"]])).toEqual("[['a\\'','b\\''],['c\\'']]")
})

it('escapes double quotes correctly', async () => {
expect(stringifyFingerprints([['a"']])).toEqual("[['a\"']]")
expect(stringifyFingerprints([['a"', 'b"']])).toEqual("[['a\"','b\"']]")
expect(stringifyFingerprints([['a"', 'b"'], ['c"']])).toEqual("[['a\"','b\"'],['c\"']]")
})
})
13 changes: 7 additions & 6 deletions frontend/src/scenes/error-tracking/queries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,13 @@ export const errorTrackingGroupEventsQuery = ({
}

// JSON.stringify wraps strings in double quotes and HogQL only supports single quote strings
const stringifyFingerprints = (fingerprints: ErrorTrackingGroup['fingerprint'][]): string => {
const stringifiedFingerprints = fingerprints.map((fp) => {
const stringifiedParts = fp.map((s) => `'${s}'`)
return `[${stringifiedParts.join(',')}]`
})
return `[${stringifiedFingerprints.join(',')}]`
export const stringifyFingerprints = (fingerprints: ErrorTrackingGroup['fingerprint'][]): string => {
// so we escape all single quoted strings and replace double quotes with single quotes, unless they're already escaped.
// Also replace escaped double quotes with regular double quotes - this isn't valid JSON, but we aren't trying to generate JSON so its ok.
return JSON.stringify(fingerprints)
.replace(/'/g, "\\'")
.replace(/(?<!\\)"/g, "'")
.replace(/\\"/g, '"')
}

export const errorTrackingGroupBreakdownQuery = ({
Expand Down
73 changes: 51 additions & 22 deletions posthog/hogql_queries/error_tracking_query_runner.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from posthog.hogql import ast
from posthog.hogql.constants import LimitContext
from posthog.hogql_queries.insights.paginators import HogQLHasMorePaginator
Expand Down Expand Up @@ -130,31 +131,49 @@ def where(self):

if self.query.searchQuery:
# TODO: Refine this so it only searches the frames inside $exception_list
# TODO: Split out spaces and search for each word separately
# TODO: Add support for searching for specific properties
# TODO: We'd eventually need a more efficient searching strategy
# TODO: Add fuzzy search support
props_to_search = ["$exception_list", "$exception_stack_trace_raw", "$exception_type", "$exception_message"]
or_exprs: list[ast.Expr] = []
for prop in props_to_search:
or_exprs.append(
ast.CompareOperation(
op=ast.CompareOperationOp.Gt,
left=ast.Call(
name="position",
args=[
ast.Call(name="lower", args=[ast.Field(chain=["properties", prop])]),
ast.Call(name="lower", args=[ast.Constant(value=self.query.searchQuery)]),
],
),
right=ast.Constant(value=0),

# first parse the search query to split it into words, except for quoted strings
# then search for each word in the exception properties
tokens = search_tokenizer(self.query.searchQuery)
and_exprs: list[ast.Expr] = []

if len(tokens) > 10:
raise ValueError("Too many search tokens")

for token in tokens:
if not token:
continue

or_exprs: list[ast.Expr] = []
props_to_search = [
"$exception_list",
"$exception_stack_trace_raw",
"$exception_type",
"$exception_message",
]
for prop in props_to_search:
or_exprs.append(
ast.CompareOperation(
op=ast.CompareOperationOp.Gt,
left=ast.Call(
name="position",
args=[
ast.Call(name="lower", args=[ast.Field(chain=["properties", prop])]),
ast.Call(name="lower", args=[ast.Constant(value=token)]),
],
),
right=ast.Constant(value=0),
)
)
)

exprs.append(
ast.Or(
exprs=or_exprs,
and_exprs.append(
ast.Or(
exprs=or_exprs,
)
)
)
exprs.append(ast.And(exprs=and_exprs))

return ast.And(exprs=exprs)

Expand Down Expand Up @@ -254,7 +273,6 @@ def error_tracking_groups(self):
queryset = ErrorTrackingGroup.objects.filter(team=self.team)
# :TRICKY: Ideally we'd have no null characters in the fingerprint, but if something made it into the pipeline with null characters
# (because rest of the system supports it), try cleaning it up here. Make sure this cleaning is consistent with the rest of the system.
# This does mean we'll not match with this ErrorTrackingGroup
cleaned_fingerprint = [part.replace("\x00", "\ufffd") for part in self.query.fingerprint or []]
queryset = (
queryset.filter(fingerprint=cleaned_fingerprint)
Expand All @@ -264,3 +282,14 @@ def error_tracking_groups(self):
queryset = queryset.filter(assignee=self.query.assignee) if self.query.assignee else queryset
groups = queryset.values("fingerprint", "merged_fingerprints", "status", "assignee")
return {str(item["fingerprint"]): item for item in groups}


def search_tokenizer(query: str) -> list[str]:
# parse the search query to split it into words, except for quoted strings. Strip quotes from quoted strings.
# Example: 'This is a "quoted string" and this is \'another one\' with some words'
# Output: ['This', 'is', 'a', 'quoted string', 'and', 'this', 'is', 'another one', 'with', 'some', 'words']
# This doesn't handle nested quotes, and some complex edge cases, but we don't really need that for now.
# If requirements do change, consider using a proper parser like `pyparsing`
pattern = r'"[^"]*"|\'[^\']*\'|\S+'
tokens = re.findall(pattern, query)
return [token.strip("'\"") for token in tokens]
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@
FROM "posthog_errortrackinggroup"
WHERE ("posthog_errortrackinggroup"."team_id" = 2
AND "posthog_errortrackinggroup"."fingerprint" = (ARRAY['SyntaxError',
'Cannot use ''in'' operator to search for ''wireframes'' in ‹�” ýf�ì½é–"¹’0ø*Lö¹SY A�Ξ÷ԝf
'Cannot use ''in'' operator to search for ''wireframes'' in ‹�” ýf�ì½é–"¹’0ø*Lö¹SY A�Ξ÷ԝf
ˆ�Ø'])::text[])
'''
# ---
Expand Down Expand Up @@ -518,6 +518,46 @@
max_bytes_before_external_group_by=0
'''
# ---
# name: TestErrorTrackingQueryRunner.test_search_query_with_multiple_search_items
'''
SELECT count(DISTINCT events.uuid) AS occurrences,
count(DISTINCT events.`$session_id`) AS sessions,
count(DISTINCT events.distinct_id) AS users,
max(toTimeZone(events.timestamp, 'UTC')) AS last_seen,
min(toTimeZone(events.timestamp, 'UTC')) AS first_seen,
any(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_message'), ''), 'null'), '^"|"$', '')) AS description,
any(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_type'), ''), 'null'), '^"|"$', '')) AS exception_type,
JSONExtract(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_fingerprint'), ''), 'null'), '^"|"$', ''), '[]'), 'Array(String)') AS fingerprint
FROM events
LEFT OUTER JOIN
(SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
person_distinct_id_overrides.distinct_id AS distinct_id
FROM person_distinct_id_overrides
WHERE equals(person_distinct_id_overrides.team_id, 2)
GROUP BY person_distinct_id_overrides.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id)
LEFT JOIN
(SELECT person.id AS id,
replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email
FROM person
WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
(SELECT person.id AS id, max(person.version) AS version
FROM person
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id)
WHERE and(equals(events.team_id, 2), equals(events.event, '$exception'), ifNull(notILike(events__person.properties___email, '%@posthog.com%'), 1), and(or(ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_list'), ''), 'null'), '^"|"$', '')), lower('databasenotfoundX')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_stack_trace_raw'), ''), 'null'), '^"|"$', '')), lower('databasenotfoundX')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_type'), ''), 'null'), '^"|"$', '')), lower('databasenotfoundX')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_message'), ''), 'null'), '^"|"$', '')), lower('databasenotfoundX')), 0), 0)), or(ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_list'), ''), 'null'), '^"|"$', '')), lower('clickhouse/client/execute.py')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_stack_trace_raw'), ''), 'null'), '^"|"$', '')), lower('clickhouse/client/execute.py')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_type'), ''), 'null'), '^"|"$', '')), lower('clickhouse/client/execute.py')), 0), 0), ifNull(greater(position(lower(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$exception_message'), ''), 'null'), '^"|"$', '')), lower('clickhouse/client/execute.py')), 0), 0))))
GROUP BY fingerprint
LIMIT 101
OFFSET 0 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=0
'''
# ---
# name: TestErrorTrackingQueryRunner.test_search_query_with_null_characters
'''
SELECT count(DISTINCT events.uuid) AS occurrences,
Expand Down
Loading

0 comments on commit cac1266

Please sign in to comment.