From 2e3a412715ba9cc711784fc868462b46f26f0e16 Mon Sep 17 00:00:00 2001 From: Jodi Jang <116035587+jangjodi@users.noreply.github.com> Date: Mon, 25 Nov 2024 13:02:06 -0800 Subject: [PATCH] chore(similarity): Do not send > 30 system frames to seer (#81259) Do not send stacktraces with > 30 system frames and no in-app frames to seer --- src/sentry/grouping/ingest/seer.py | 12 +++- .../group_similar_issues_embeddings.py | 8 ++- src/sentry/seer/similarity/utils.py | 41 ++++++++++++ src/sentry/tasks/embeddings_grouping/utils.py | 9 ++- tests/sentry/grouping/ingest/test_seer.py | 49 ++++++++++++++ tests/sentry/seer/similarity/test_utils.py | 42 ++++++++++-- .../test_backfill_seer_grouping_records.py | 66 ++++++++++++++++++- 7 files changed, 212 insertions(+), 15 deletions(-) diff --git a/src/sentry/grouping/ingest/seer.py b/src/sentry/grouping/ingest/seer.py index 52c6eaadca5e6d..c5b2d23a52e531 100644 --- a/src/sentry/grouping/ingest/seer.py +++ b/src/sentry/grouping/ingest/seer.py @@ -17,9 +17,10 @@ from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer from sentry.seer.similarity.types import SimilarIssuesEmbeddingsRequest from sentry.seer.similarity.utils import ( + ReferrerOptions, event_content_is_seer_eligible, filter_null_from_string, - get_stacktrace_string, + get_stacktrace_string_with_metrics, killswitch_enabled, ) from sentry.utils import metrics @@ -182,7 +183,9 @@ def _circuit_breaker_broken(event: Event, project: Project) -> bool: def _has_empty_stacktrace_string(event: Event, variants: Mapping[str, BaseVariant]) -> bool: - stacktrace_string = get_stacktrace_string(get_grouping_info_from_variants(variants)) + stacktrace_string = get_stacktrace_string_with_metrics( + get_grouping_info_from_variants(variants), event.platform, ReferrerOptions.INGEST + ) if stacktrace_string == "": metrics.incr( "grouping.similarity.did_call_seer", @@ -217,7 +220,10 @@ def get_seer_similar_issues( "hash": event_hash, "project_id": event.project.id, "stacktrace": event.data.get( - "stacktrace_string", get_stacktrace_string(get_grouping_info_from_variants(variants)) + "stacktrace_string", + get_stacktrace_string_with_metrics( + get_grouping_info_from_variants(variants), event.platform, ReferrerOptions.INGEST + ), ), "exception_type": filter_null_from_string(exception_type) if exception_type else None, "k": num_neighbors, diff --git a/src/sentry/issues/endpoints/group_similar_issues_embeddings.py b/src/sentry/issues/endpoints/group_similar_issues_embeddings.py index 55cdda74aa4f19..fff3dcff065ade 100644 --- a/src/sentry/issues/endpoints/group_similar_issues_embeddings.py +++ b/src/sentry/issues/endpoints/group_similar_issues_embeddings.py @@ -18,6 +18,7 @@ from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer from sentry.seer.similarity.types import SeerSimilarIssueData, SimilarIssuesEmbeddingsRequest from sentry.seer.similarity.utils import ( + TooManyOnlySystemFramesException, event_content_has_stacktrace, get_stacktrace_string, killswitch_enabled, @@ -82,9 +83,12 @@ def get(self, request: Request, group) -> Response: stacktrace_string = "" if latest_event and event_content_has_stacktrace(latest_event): grouping_info = get_grouping_info(None, project=group.project, event=latest_event) - stacktrace_string = get_stacktrace_string(grouping_info) + try: + stacktrace_string = get_stacktrace_string(grouping_info) + except TooManyOnlySystemFramesException: + stacktrace_string = "" - if stacktrace_string == "" or not latest_event: + if not stacktrace_string or not latest_event: return Response([]) # No exception, stacktrace or in-app frames, or event similar_issues_params: SimilarIssuesEmbeddingsRequest = { diff --git a/src/sentry/seer/similarity/utils.py b/src/sentry/seer/similarity/utils.py index 29c8b3810c28dd..a50dad738a99f9 100644 --- a/src/sentry/seer/similarity/utils.py +++ b/src/sentry/seer/similarity/utils.py @@ -1,4 +1,5 @@ import logging +from enum import StrEnum from typing import Any, TypeVar from sentry import options @@ -151,6 +152,15 @@ ] +class ReferrerOptions(StrEnum): + INGEST = "ingest" + BACKFILL = "backfill" + + +class TooManyOnlySystemFramesException(Exception): + pass + + def _get_value_if_exists(exception_value: dict[str, Any]) -> str: return exception_value["values"][0] if exception_value.get("values") else "" @@ -177,6 +187,7 @@ def get_stacktrace_string(data: dict[str, Any]) -> str: frame_count = 0 html_frame_count = 0 # for a temporary metric + is_frames_truncated = False stacktrace_str = "" found_non_snipped_context_line = False @@ -185,12 +196,15 @@ def get_stacktrace_string(data: dict[str, Any]) -> str: def _process_frames(frames: list[dict[str, Any]]) -> list[str]: nonlocal frame_count nonlocal html_frame_count + nonlocal is_frames_truncated nonlocal found_non_snipped_context_line frame_strings = [] contributing_frames = [ frame for frame in frames if frame.get("id") == "frame" and frame.get("contributes") ] + if len(contributing_frames) + frame_count > MAX_FRAME_COUNT: + is_frames_truncated = True contributing_frames = _discard_excess_frames( contributing_frames, MAX_FRAME_COUNT, frame_count ) @@ -255,6 +269,8 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]: exc_value = _get_value_if_exists(exception_value) elif exception_value.get("id") == "stacktrace" and frame_count < MAX_FRAME_COUNT: frame_strings = _process_frames(exception_value["values"]) + if is_frames_truncated and not app_hash: + raise TooManyOnlySystemFramesException # Only exceptions have the type and value properties, so we don't need to handle the threads # case here header = f"{exc_type}: {exc_value}\n" if exception["id"] == "exception" else "" @@ -290,6 +306,31 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]: return stacktrace_str.strip() +def get_stacktrace_string_with_metrics( + data: dict[str, Any], platform: str | None, referrer: ReferrerOptions +) -> str | None: + try: + stacktrace_string = get_stacktrace_string(data) + except TooManyOnlySystemFramesException: + platform = platform if platform else "unknown" + metrics.incr( + "grouping.similarity.over_threshold_only_system_frames", + sample_rate=options.get("seer.similarity.metrics_sample_rate"), + tags={"platform": platform, "referrer": referrer}, + ) + if referrer == ReferrerOptions.INGEST: + metrics.incr( + "grouping.similarity.did_call_seer", + sample_rate=options.get("seer.similarity.metrics_sample_rate"), + tags={ + "call_made": False, + "blocker": "over-threshold-only-system-frames", + }, + ) + stacktrace_string = None + return stacktrace_string + + def event_content_has_stacktrace(event: Event) -> bool: # If an event has no stacktrace, there's no data for Seer to analyze, so no point in making the # API call. If we ever start analyzing message-only events, we'll need to add `event.title in diff --git a/src/sentry/tasks/embeddings_grouping/utils.py b/src/sentry/tasks/embeddings_grouping/utils.py index 7dd7b95c129981..b4606837d64c9e 100644 --- a/src/sentry/tasks/embeddings_grouping/utils.py +++ b/src/sentry/tasks/embeddings_grouping/utils.py @@ -32,9 +32,10 @@ SimilarHashNotFoundError, ) from sentry.seer.similarity.utils import ( + ReferrerOptions, event_content_has_stacktrace, filter_null_from_string, - get_stacktrace_string, + get_stacktrace_string_with_metrics, ) from sentry.snuba.dataset import Dataset from sentry.snuba.referrer import Referrer @@ -355,8 +356,10 @@ def get_events_from_nodestore( event._project_cache = project if event and event.data and event_content_has_stacktrace(event): grouping_info = get_grouping_info(None, project=project, event=event) - stacktrace_string = get_stacktrace_string(grouping_info) - if stacktrace_string == "": + stacktrace_string = get_stacktrace_string_with_metrics( + grouping_info, event.platform, ReferrerOptions.BACKFILL + ) + if not stacktrace_string: invalid_event_group_ids.append(group_id) continue primary_hash = event.get_primary_hash() diff --git a/tests/sentry/grouping/ingest/test_seer.py b/tests/sentry/grouping/ingest/test_seer.py index 33c872f07325a3..1477175ad31d9f 100644 --- a/tests/sentry/grouping/ingest/test_seer.py +++ b/tests/sentry/grouping/ingest/test_seer.py @@ -8,6 +8,7 @@ from sentry.grouping.ingest.seer import get_seer_similar_issues, should_call_seer_for_grouping from sentry.models.grouphash import GroupHash from sentry.seer.similarity.types import SeerSimilarIssueData +from sentry.seer.similarity.utils import MAX_FRAME_COUNT from sentry.testutils.cases import TestCase from sentry.testutils.helpers.eventprocessing import save_new_event from sentry.testutils.helpers.options import override_options @@ -306,3 +307,51 @@ def test_returns_no_grouphash_and_empty_metadata_if_no_similar_group_found(self) expected_metadata, None, ) + + @patch("sentry.seer.similarity.utils.metrics") + def test_too_many_only_system_frames(self, mock_metrics: Mock) -> None: + type = "FailedToFetchError" + value = "Charlie didn't bring the ball back" + context_line = f"raise {type}('{value}')" + new_event = Event( + project_id=self.project.id, + event_id="22312012112120120908201304152013", + data={ + "title": f"{type}('{value}')", + "exception": { + "values": [ + { + "type": type, + "value": value, + "stacktrace": { + "frames": [ + { + "function": f"play_fetch_{i}", + "filename": f"dogpark{i}.py", + "context_line": context_line, + } + for i in range(MAX_FRAME_COUNT + 1) + ] + }, + } + ] + }, + "platform": "python", + }, + ) + get_seer_similar_issues(new_event, new_event.get_grouping_variants()) + + sample_rate = options.get("seer.similarity.metrics_sample_rate") + mock_metrics.incr.assert_any_call( + "grouping.similarity.over_threshold_only_system_frames", + sample_rate=sample_rate, + tags={"platform": "python", "referrer": "ingest"}, + ) + mock_metrics.incr.assert_any_call( + "grouping.similarity.did_call_seer", + sample_rate=1.0, + tags={ + "call_made": False, + "blocker": "over-threshold-only-system-frames", + }, + ) diff --git a/tests/sentry/seer/similarity/test_utils.py b/tests/sentry/seer/similarity/test_utils.py index 2bbe76601e432f..353d81db1d25a5 100644 --- a/tests/sentry/seer/similarity/test_utils.py +++ b/tests/sentry/seer/similarity/test_utils.py @@ -3,10 +3,14 @@ from typing import Any, Literal, cast from uuid import uuid1 +import pytest + from sentry.eventstore.models import Event from sentry.seer.similarity.utils import ( BASE64_ENCODED_PREFIXES, + MAX_FRAME_COUNT, SEER_ELIGIBLE_PLATFORMS, + TooManyOnlySystemFramesException, _is_snipped_context_line, event_content_is_seer_eligible, filter_null_from_string, @@ -670,7 +674,7 @@ def test_chained_too_many_frames_minified_js_frame_limit(self): ) def test_chained_too_many_exceptions(self): - """Test that we restrict number of chained exceptions to 30.""" + """Test that we restrict number of chained exceptions to MAX_FRAME_COUNT.""" data_chained_exception = copy.deepcopy(self.CHAINED_APP_DATA) data_chained_exception["app"]["component"]["values"][0]["values"] = [ self.create_exception( @@ -678,10 +682,10 @@ def test_chained_too_many_exceptions(self): exception_value=f"exception {i} message!", frames=self.create_frames(num_frames=1, context_line_factory=lambda i: f"line {i}"), ) - for i in range(1, 32) + for i in range(1, MAX_FRAME_COUNT + 2) ] stacktrace_str = get_stacktrace_string(data_chained_exception) - for i in range(2, 32): + for i in range(2, MAX_FRAME_COUNT + 2): assert f"exception {i} message!" in stacktrace_str assert "exception 1 message!" not in stacktrace_str @@ -710,9 +714,35 @@ def test_no_app_no_system(self): stacktrace_str = get_stacktrace_string(data) assert stacktrace_str == "" - def test_over_30_contributing_frames(self): - """Check that when there are over 30 contributing frames, the last 30 are included.""" + def test_too_many_system_frames_single_exception(self): + data_system = copy.deepcopy(self.BASE_APP_DATA) + data_system["system"] = data_system.pop("app") + data_system["system"]["component"]["values"][0]["values"][0][ + "values" + ] += self.create_frames(MAX_FRAME_COUNT + 1, True) + + with pytest.raises(TooManyOnlySystemFramesException): + get_stacktrace_string(data_system) + + def test_too_many_system_frames_chained_exception(self): + data_system = copy.deepcopy(self.CHAINED_APP_DATA) + data_system["system"] = data_system.pop("app") + # Split MAX_FRAME_COUNT across the two exceptions + data_system["system"]["component"]["values"][0]["values"][0]["values"][0][ + "values" + ] += self.create_frames(MAX_FRAME_COUNT // 2, True) + data_system["system"]["component"]["values"][0]["values"][1]["values"][0][ + "values" + ] += self.create_frames(MAX_FRAME_COUNT // 2, True) + + with pytest.raises(TooManyOnlySystemFramesException): + get_stacktrace_string(data_system) + def test_too_many_in_app_contributing_frames(self): + """ + Check that when there are over MAX_FRAME_COUNT contributing frames, the last MAX_FRAME_COUNT + are included. + """ data_frames = copy.deepcopy(self.BASE_APP_DATA) # Create 30 contributing frames, 1-20 -> last 10 should be included data_frames["app"]["component"]["values"][0]["values"][0]["values"] = self.create_frames( @@ -739,7 +769,7 @@ def test_over_30_contributing_frames(self): for i in range(41, 61): num_frames += 1 assert ("test = " + str(i) + "!") in stacktrace_str - assert num_frames == 30 + assert num_frames == MAX_FRAME_COUNT def test_too_many_frames_minified_js_frame_limit(self): """Test that we restrict fully-minified stacktraces to 20 frames, and all other stacktraces to 30 frames.""" diff --git a/tests/sentry/tasks/test_backfill_seer_grouping_records.py b/tests/sentry/tasks/test_backfill_seer_grouping_records.py index efad864ac4809d..617657ee7cd68f 100644 --- a/tests/sentry/tasks/test_backfill_seer_grouping_records.py +++ b/tests/sentry/tasks/test_backfill_seer_grouping_records.py @@ -25,6 +25,7 @@ from sentry.models.grouphash import GroupHash from sentry.seer.similarity.grouping_records import CreateGroupingRecordData from sentry.seer.similarity.types import RawSeerSimilarIssueData +from sentry.seer.similarity.utils import MAX_FRAME_COUNT from sentry.snuba.dataset import Dataset from sentry.snuba.referrer import Referrer from sentry.tasks.embeddings_grouping.backfill_seer_grouping_records_for_project import ( @@ -47,7 +48,7 @@ from sentry.utils.safe import get_path from sentry.utils.snuba import QueryTooManySimultaneous, RateLimitExceeded, bulk_snuba_queries -EXCEPTION = { +EXCEPTION: dict[str, Any] = { "values": [ { "stacktrace": { @@ -365,6 +366,69 @@ def test_lookup_group_data_stacktrace_bulk_no_stacktrace_exception(self): assert bulk_group_data_stacktraces["data"] == expected_group_data assert bulk_group_data_stacktraces["stacktrace_list"] == expected_stacktraces + @patch("sentry.seer.similarity.utils.metrics") + def test_lookup_group_data_stacktrace_bulk_invalid_stacktrace_exception(self, mock_metrics): + """ + Test that if a group has over MAX_FRAME_COUNT only system frames, its data is not included in + the bulk lookup result + """ + # Use 2 events + rows, events, hashes = self.bulk_rows[:2], self.bulk_events[:2], {} + group_ids = [row["group_id"] for row in rows] + for group_id in group_ids: + hashes.update({group_id: self.group_hashes[group_id]}) + # Create one event where the stacktrace has over MAX_FRAME_COUNT system only frames + exception = copy.deepcopy(EXCEPTION) + exception["values"][0]["stacktrace"]["frames"] += [ + { + "function": f"divide_by_zero_{i}", + "module": "__main__", + "filename": "python_onboarding_{i}.py", + "abs_path": "/Users/user/python_onboarding/python_onboarding_{i}.py", + "lineno": i, + "in_app": True, + } + for i in range(MAX_FRAME_COUNT + 1) + ] + event = self.store_event( + data={ + "platform": "python", + "exception": exception, + "title": "title", + "timestamp": before_now(seconds=10).isoformat(), + }, + project_id=self.project.id, + assert_no_errors=False, + ) + rows.append({"event_id": event.event_id, "group_id": event.group_id}) + group_hash = GroupHash.objects.filter(group_id=event.group.id).first() + assert group_hash + hashes.update({event.group_id: group_hash.hash}) + + bulk_group_data_stacktraces, _ = get_events_from_nodestore(self.project, rows, group_ids) + expected_group_data = [ + CreateGroupingRecordData( + group_id=event.group.id, + hash=hashes[event.group.id], + project_id=self.project.id, + exception_type=get_path(event.data, "exception", "values", -1, "type"), + ) + for event in events + ] + expected_stacktraces = [ + f'Error{i}: error with value\n File "function_{i}.py", function function_{i}' + for i in range(2) + ] + assert bulk_group_data_stacktraces["data"] == expected_group_data + assert bulk_group_data_stacktraces["stacktrace_list"] == expected_stacktraces + + sample_rate = options.get("seer.similarity.metrics_sample_rate") + mock_metrics.incr.assert_called_with( + "grouping.similarity.over_threshold_only_system_frames", + sample_rate=sample_rate, + tags={"platform": "python", "referrer": "backfill"}, + ) + def test_lookup_group_data_stacktrace_bulk_with_fallback_success(self): """Test successful bulk lookup with fallback, where the fallback isn't used""" rows, events, hashes = (