diff --git a/src/sentry/grouping/ingest/seer.py b/src/sentry/grouping/ingest/seer.py index 52c6eaadca5e6d..369848f5cc4712 100644 --- a/src/sentry/grouping/ingest/seer.py +++ b/src/sentry/grouping/ingest/seer.py @@ -182,7 +182,12 @@ def _circuit_breaker_broken(event: Event, project: Project) -> bool: def _has_empty_stacktrace_string(event: Event, variants: Mapping[str, BaseVariant]) -> bool: - stacktrace_string = get_stacktrace_string(get_grouping_info_from_variants(variants)) + # Temporarily add project and event id to this for logging purposes + # TODO: Remove when grouping.similarity.over_threshold_system_only_frames is removed + grouping_info = get_grouping_info_from_variants(variants) + grouping_info["project_id"] = event.project.id # type: ignore[assignment] + grouping_info["event_id"] = event.event_id # type: ignore[assignment] + stacktrace_string = get_stacktrace_string(grouping_info) if stacktrace_string == "": metrics.incr( "grouping.similarity.did_call_seer", diff --git a/src/sentry/seer/similarity/utils.py b/src/sentry/seer/similarity/utils.py index 29c8b3810c28dd..faf54791795428 100644 --- a/src/sentry/seer/similarity/utils.py +++ b/src/sentry/seer/similarity/utils.py @@ -179,6 +179,7 @@ def get_stacktrace_string(data: dict[str, Any]) -> str: html_frame_count = 0 # for a temporary metric stacktrace_str = "" found_non_snipped_context_line = False + is_frames_truncated = False metrics.distribution("seer.grouping.exceptions.length", len(exceptions)) @@ -186,11 +187,14 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]: nonlocal frame_count nonlocal html_frame_count nonlocal found_non_snipped_context_line + nonlocal is_frames_truncated frame_strings = [] contributing_frames = [ frame for frame in frames if frame.get("id") == "frame" and frame.get("contributes") ] + if len(contributing_frames) + frame_count > MAX_FRAME_COUNT: + is_frames_truncated = True contributing_frames = _discard_excess_frames( contributing_frames, MAX_FRAME_COUNT, frame_count ) @@ -287,6 +291,14 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]: }, ) + if is_frames_truncated and not app_hash: + logger_extra = { + "project_id": data.get("project_id", ""), + "event_id": data.get("event_id", ""), + "hash": system_hash, + } + logger.info("grouping.similarity.over_threshold_system_only_frames", extra=logger_extra) + return stacktrace_str.strip() diff --git a/src/sentry/tasks/embeddings_grouping/utils.py b/src/sentry/tasks/embeddings_grouping/utils.py index 7dd7b95c129981..7c30b4bd44551e 100644 --- a/src/sentry/tasks/embeddings_grouping/utils.py +++ b/src/sentry/tasks/embeddings_grouping/utils.py @@ -355,6 +355,10 @@ def get_events_from_nodestore( event._project_cache = project if event and event.data and event_content_has_stacktrace(event): grouping_info = get_grouping_info(None, project=project, event=event) + # Temporarily add project and event id to this for logging purposes + # TODO: Remove when grouping.similarity.over_threshold_system_only_frames is removed + grouping_info["project_id"] = project.id + grouping_info["event_id"] = event.event_id # type: ignore[assignment] stacktrace_string = get_stacktrace_string(grouping_info) if stacktrace_string == "": invalid_event_group_ids.append(group_id) diff --git a/tests/sentry/grouping/ingest/test_seer.py b/tests/sentry/grouping/ingest/test_seer.py index 33c872f07325a3..0a044449582416 100644 --- a/tests/sentry/grouping/ingest/test_seer.py +++ b/tests/sentry/grouping/ingest/test_seer.py @@ -5,9 +5,11 @@ from sentry import options from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION from sentry.eventstore.models import Event +from sentry.grouping.grouping_info import get_grouping_info_from_variants from sentry.grouping.ingest.seer import get_seer_similar_issues, should_call_seer_for_grouping from sentry.models.grouphash import GroupHash from sentry.seer.similarity.types import SeerSimilarIssueData +from sentry.seer.similarity.utils import MAX_FRAME_COUNT from sentry.testutils.cases import TestCase from sentry.testutils.helpers.eventprocessing import save_new_event from sentry.testutils.helpers.options import override_options @@ -306,3 +308,47 @@ def test_returns_no_grouphash_and_empty_metadata_if_no_similar_group_found(self) expected_metadata, None, ) + + @patch("sentry.seer.similarity.utils.logger") + def test_too_many_only_system_frames(self, mock_logger: Mock) -> None: + type = "FailedToFetchError" + value = "Charlie didn't bring the ball back" + context_line = f"raise {type}('{value}')" + new_event = Event( + project_id=self.project.id, + event_id="22312012112120120908201304152013", + data={ + "title": f"{type}('{value}')", + "exception": { + "values": [ + { + "type": type, + "value": value, + "stacktrace": { + "frames": [ + { + "function": f"play_fetch_{i}", + "filename": f"dogpark{i}.py", + "context_line": context_line, + } + for i in range(MAX_FRAME_COUNT + 1) + ] + }, + } + ] + }, + "platform": "python", + }, + ) + variants = new_event.get_grouping_variants() + get_seer_similar_issues(new_event, variants) + + grouping_info = get_grouping_info_from_variants(variants) + mock_logger.info.assert_called_with( + "grouping.similarity.over_threshold_system_only_frames", + extra={ + "project_id": "", + "event_id": "", + "hash": grouping_info["system"]["hash"], + }, + ) diff --git a/tests/sentry/seer/similarity/test_utils.py b/tests/sentry/seer/similarity/test_utils.py index 2bbe76601e432f..54d94c736be474 100644 --- a/tests/sentry/seer/similarity/test_utils.py +++ b/tests/sentry/seer/similarity/test_utils.py @@ -1,11 +1,13 @@ import copy from collections.abc import Callable from typing import Any, Literal, cast +from unittest.mock import patch from uuid import uuid1 from sentry.eventstore.models import Event from sentry.seer.similarity.utils import ( BASE64_ENCODED_PREFIXES, + MAX_FRAME_COUNT, SEER_ELIGIBLE_PLATFORMS, _is_snipped_context_line, event_content_is_seer_eligible, @@ -710,7 +712,53 @@ def test_no_app_no_system(self): stacktrace_str = get_stacktrace_string(data) assert stacktrace_str == "" - def test_over_30_contributing_frames(self): + @patch("sentry.seer.similarity.utils.logger") + def test_too_many_system_frames_single_exception(self, mock_logger): + data_system = copy.deepcopy(self.BASE_APP_DATA) + data_system["system"] = data_system.pop("app") + data_system["system"]["component"]["values"][0]["values"][0][ + "values" + ] += self.create_frames(MAX_FRAME_COUNT + 1, True) + data_system["project_id"] = self.project.id + data_system["event_id"] = "39485673049520" + + get_stacktrace_string(data_system) + + mock_logger.info.assert_called_with( + "grouping.similarity.over_threshold_system_only_frames", + extra={ + "project_id": self.project.id, + "event_id": data_system["event_id"], + "hash": data_system["system"]["hash"], + }, + ) + + @patch("sentry.seer.similarity.utils.logger") + def test_too_many_system_frames_chained_exception(self, mock_logger): + data_system = copy.deepcopy(self.CHAINED_APP_DATA) + data_system["system"] = data_system.pop("app") + data_system["project_id"] = self.project.id + data_system["event_id"] = "39485673049520" + # Split MAX_FRAME_COUNT across the two exceptions + data_system["system"]["component"]["values"][0]["values"][0]["values"][0][ + "values" + ] += self.create_frames(MAX_FRAME_COUNT // 2, True) + data_system["system"]["component"]["values"][0]["values"][1]["values"][0][ + "values" + ] += self.create_frames(MAX_FRAME_COUNT // 2, True) + + get_stacktrace_string(data_system) + + mock_logger.info.assert_called_with( + "grouping.similarity.over_threshold_system_only_frames", + extra={ + "project_id": self.project.id, + "event_id": data_system["event_id"], + "hash": data_system["system"]["hash"], + }, + ) + + def test_too_many_in_app_contributing_frames(self): """Check that when there are over 30 contributing frames, the last 30 are included.""" data_frames = copy.deepcopy(self.BASE_APP_DATA) diff --git a/tests/sentry/tasks/test_backfill_seer_grouping_records.py b/tests/sentry/tasks/test_backfill_seer_grouping_records.py index efad864ac4809d..3cdf4c36d232a1 100644 --- a/tests/sentry/tasks/test_backfill_seer_grouping_records.py +++ b/tests/sentry/tasks/test_backfill_seer_grouping_records.py @@ -25,6 +25,7 @@ from sentry.models.grouphash import GroupHash from sentry.seer.similarity.grouping_records import CreateGroupingRecordData from sentry.seer.similarity.types import RawSeerSimilarIssueData +from sentry.seer.similarity.utils import MAX_FRAME_COUNT from sentry.snuba.dataset import Dataset from sentry.snuba.referrer import Referrer from sentry.tasks.embeddings_grouping.backfill_seer_grouping_records_for_project import ( @@ -365,6 +366,69 @@ def test_lookup_group_data_stacktrace_bulk_no_stacktrace_exception(self): assert bulk_group_data_stacktraces["data"] == expected_group_data assert bulk_group_data_stacktraces["stacktrace_list"] == expected_stacktraces + @patch("sentry.seer.similarity.utils.logger") + def test_lookup_group_data_stacktrace_bulk_too_many_system_frames(self, mock_logger): + """ + Test that if a group has over MAX_FRAME_COUNT only system frames, it is logged + and included in results + """ + # Use 2 events + rows, events, hashes = self.bulk_rows[:2], self.bulk_events[:2], {} + group_ids = [row["group_id"] for row in rows] + for group_id in group_ids: + hashes.update({group_id: self.group_hashes[group_id]}) + # Create one event where the stacktrace has over MAX_FRAME_COUNT system only frames + exception: Any = copy.deepcopy(EXCEPTION) + exception["values"][0]["stacktrace"]["frames"] += [ + { + "function": f"divide_by_zero_{i}", + "module": "__main__", + "filename": "python_onboarding_{i}.py", + "abs_path": "/Users/user/python_onboarding/python_onboarding_{i}.py", + "lineno": i, + "in_app": True, + } + for i in range(MAX_FRAME_COUNT + 1) + ] + event = self.store_event( + data={ + "platform": "python", + "exception": exception, + "title": "title", + "timestamp": before_now(seconds=10).isoformat(), + }, + project_id=self.project.id, + assert_no_errors=False, + ) + rows.append({"event_id": event.event_id, "group_id": event.group_id}) + group_hash = GroupHash.objects.filter(group_id=event.group.id).first() + assert group_hash + hashes.update({event.group_id: group_hash.hash}) + events.append(event) + + bulk_group_data_stacktraces, _ = get_events_from_nodestore(self.project, rows, group_ids) + expected_group_data = [ + CreateGroupingRecordData( + group_id=event.group.id, + hash=hashes[event.group.id], + project_id=self.project.id, + exception_type=get_path(event.data, "exception", "values", -1, "type"), + ) + for event in events + ] + + assert bulk_group_data_stacktraces["data"] == expected_group_data + assert len(bulk_group_data_stacktraces["stacktrace_list"]) == len(events) + + mock_logger.info.assert_called_with( + "grouping.similarity.over_threshold_system_only_frames", + extra={ + "project_id": self.project.id, + "event_id": event.event_id, + "hash": group_hash.hash, + }, + ) + def test_lookup_group_data_stacktrace_bulk_with_fallback_success(self): """Test successful bulk lookup with fallback, where the fallback isn't used""" rows, events, hashes = (