Skip to content

Commit

Permalink
chore(similarity): Add logging for over 30 system frames (#81130)
Browse files Browse the repository at this point in the history
Add logging for events with stacktraces with over 30 system-only frames
  • Loading branch information
jangjodi authored Nov 22, 2024
1 parent ef7abd1 commit a3dba87
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 2 deletions.
7 changes: 6 additions & 1 deletion src/sentry/grouping/ingest/seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,12 @@ def _circuit_breaker_broken(event: Event, project: Project) -> bool:


def _has_empty_stacktrace_string(event: Event, variants: Mapping[str, BaseVariant]) -> bool:
stacktrace_string = get_stacktrace_string(get_grouping_info_from_variants(variants))
# Temporarily add project and event id to this for logging purposes
# TODO: Remove when grouping.similarity.over_threshold_system_only_frames is removed
grouping_info = get_grouping_info_from_variants(variants)
grouping_info["project_id"] = event.project.id # type: ignore[assignment]
grouping_info["event_id"] = event.event_id # type: ignore[assignment]
stacktrace_string = get_stacktrace_string(grouping_info)
if stacktrace_string == "":
metrics.incr(
"grouping.similarity.did_call_seer",
Expand Down
12 changes: 12 additions & 0 deletions src/sentry/seer/similarity/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,18 +179,22 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
html_frame_count = 0 # for a temporary metric
stacktrace_str = ""
found_non_snipped_context_line = False
is_frames_truncated = False

metrics.distribution("seer.grouping.exceptions.length", len(exceptions))

def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
nonlocal frame_count
nonlocal html_frame_count
nonlocal found_non_snipped_context_line
nonlocal is_frames_truncated
frame_strings = []

contributing_frames = [
frame for frame in frames if frame.get("id") == "frame" and frame.get("contributes")
]
if len(contributing_frames) + frame_count > MAX_FRAME_COUNT:
is_frames_truncated = True
contributing_frames = _discard_excess_frames(
contributing_frames, MAX_FRAME_COUNT, frame_count
)
Expand Down Expand Up @@ -287,6 +291,14 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
},
)

if is_frames_truncated and not app_hash:
logger_extra = {
"project_id": data.get("project_id", ""),
"event_id": data.get("event_id", ""),
"hash": system_hash,
}
logger.info("grouping.similarity.over_threshold_system_only_frames", extra=logger_extra)

return stacktrace_str.strip()


Expand Down
4 changes: 4 additions & 0 deletions src/sentry/tasks/embeddings_grouping/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,10 @@ def get_events_from_nodestore(
event._project_cache = project
if event and event.data and event_content_has_stacktrace(event):
grouping_info = get_grouping_info(None, project=project, event=event)
# Temporarily add project and event id to this for logging purposes
# TODO: Remove when grouping.similarity.over_threshold_system_only_frames is removed
grouping_info["project_id"] = project.id
grouping_info["event_id"] = event.event_id # type: ignore[assignment]
stacktrace_string = get_stacktrace_string(grouping_info)
if stacktrace_string == "":
invalid_event_group_ids.append(group_id)
Expand Down
46 changes: 46 additions & 0 deletions tests/sentry/grouping/ingest/test_seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
from sentry import options
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
from sentry.eventstore.models import Event
from sentry.grouping.grouping_info import get_grouping_info_from_variants
from sentry.grouping.ingest.seer import get_seer_similar_issues, should_call_seer_for_grouping
from sentry.models.grouphash import GroupHash
from sentry.seer.similarity.types import SeerSimilarIssueData
from sentry.seer.similarity.utils import MAX_FRAME_COUNT
from sentry.testutils.cases import TestCase
from sentry.testutils.helpers.eventprocessing import save_new_event
from sentry.testutils.helpers.options import override_options
Expand Down Expand Up @@ -306,3 +308,47 @@ def test_returns_no_grouphash_and_empty_metadata_if_no_similar_group_found(self)
expected_metadata,
None,
)

@patch("sentry.seer.similarity.utils.logger")
def test_too_many_only_system_frames(self, mock_logger: Mock) -> None:
type = "FailedToFetchError"
value = "Charlie didn't bring the ball back"
context_line = f"raise {type}('{value}')"
new_event = Event(
project_id=self.project.id,
event_id="22312012112120120908201304152013",
data={
"title": f"{type}('{value}')",
"exception": {
"values": [
{
"type": type,
"value": value,
"stacktrace": {
"frames": [
{
"function": f"play_fetch_{i}",
"filename": f"dogpark{i}.py",
"context_line": context_line,
}
for i in range(MAX_FRAME_COUNT + 1)
]
},
}
]
},
"platform": "python",
},
)
variants = new_event.get_grouping_variants()
get_seer_similar_issues(new_event, variants)

grouping_info = get_grouping_info_from_variants(variants)
mock_logger.info.assert_called_with(
"grouping.similarity.over_threshold_system_only_frames",
extra={
"project_id": "",
"event_id": "",
"hash": grouping_info["system"]["hash"],
},
)
50 changes: 49 additions & 1 deletion tests/sentry/seer/similarity/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import copy
from collections.abc import Callable
from typing import Any, Literal, cast
from unittest.mock import patch
from uuid import uuid1

from sentry.eventstore.models import Event
from sentry.seer.similarity.utils import (
BASE64_ENCODED_PREFIXES,
MAX_FRAME_COUNT,
SEER_ELIGIBLE_PLATFORMS,
_is_snipped_context_line,
event_content_is_seer_eligible,
Expand Down Expand Up @@ -710,7 +712,53 @@ def test_no_app_no_system(self):
stacktrace_str = get_stacktrace_string(data)
assert stacktrace_str == ""

def test_over_30_contributing_frames(self):
@patch("sentry.seer.similarity.utils.logger")
def test_too_many_system_frames_single_exception(self, mock_logger):
data_system = copy.deepcopy(self.BASE_APP_DATA)
data_system["system"] = data_system.pop("app")
data_system["system"]["component"]["values"][0]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT + 1, True)
data_system["project_id"] = self.project.id
data_system["event_id"] = "39485673049520"

get_stacktrace_string(data_system)

mock_logger.info.assert_called_with(
"grouping.similarity.over_threshold_system_only_frames",
extra={
"project_id": self.project.id,
"event_id": data_system["event_id"],
"hash": data_system["system"]["hash"],
},
)

@patch("sentry.seer.similarity.utils.logger")
def test_too_many_system_frames_chained_exception(self, mock_logger):
data_system = copy.deepcopy(self.CHAINED_APP_DATA)
data_system["system"] = data_system.pop("app")
data_system["project_id"] = self.project.id
data_system["event_id"] = "39485673049520"
# Split MAX_FRAME_COUNT across the two exceptions
data_system["system"]["component"]["values"][0]["values"][0]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT // 2, True)
data_system["system"]["component"]["values"][0]["values"][1]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT // 2, True)

get_stacktrace_string(data_system)

mock_logger.info.assert_called_with(
"grouping.similarity.over_threshold_system_only_frames",
extra={
"project_id": self.project.id,
"event_id": data_system["event_id"],
"hash": data_system["system"]["hash"],
},
)

def test_too_many_in_app_contributing_frames(self):
"""Check that when there are over 30 contributing frames, the last 30 are included."""

data_frames = copy.deepcopy(self.BASE_APP_DATA)
Expand Down
64 changes: 64 additions & 0 deletions tests/sentry/tasks/test_backfill_seer_grouping_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from sentry.models.grouphash import GroupHash
from sentry.seer.similarity.grouping_records import CreateGroupingRecordData
from sentry.seer.similarity.types import RawSeerSimilarIssueData
from sentry.seer.similarity.utils import MAX_FRAME_COUNT
from sentry.snuba.dataset import Dataset
from sentry.snuba.referrer import Referrer
from sentry.tasks.embeddings_grouping.backfill_seer_grouping_records_for_project import (
Expand Down Expand Up @@ -365,6 +366,69 @@ def test_lookup_group_data_stacktrace_bulk_no_stacktrace_exception(self):
assert bulk_group_data_stacktraces["data"] == expected_group_data
assert bulk_group_data_stacktraces["stacktrace_list"] == expected_stacktraces

@patch("sentry.seer.similarity.utils.logger")
def test_lookup_group_data_stacktrace_bulk_too_many_system_frames(self, mock_logger):
"""
Test that if a group has over MAX_FRAME_COUNT only system frames, it is logged
and included in results
"""
# Use 2 events
rows, events, hashes = self.bulk_rows[:2], self.bulk_events[:2], {}
group_ids = [row["group_id"] for row in rows]
for group_id in group_ids:
hashes.update({group_id: self.group_hashes[group_id]})
# Create one event where the stacktrace has over MAX_FRAME_COUNT system only frames
exception: Any = copy.deepcopy(EXCEPTION)
exception["values"][0]["stacktrace"]["frames"] += [
{
"function": f"divide_by_zero_{i}",
"module": "__main__",
"filename": "python_onboarding_{i}.py",
"abs_path": "/Users/user/python_onboarding/python_onboarding_{i}.py",
"lineno": i,
"in_app": True,
}
for i in range(MAX_FRAME_COUNT + 1)
]
event = self.store_event(
data={
"platform": "python",
"exception": exception,
"title": "title",
"timestamp": before_now(seconds=10).isoformat(),
},
project_id=self.project.id,
assert_no_errors=False,
)
rows.append({"event_id": event.event_id, "group_id": event.group_id})
group_hash = GroupHash.objects.filter(group_id=event.group.id).first()
assert group_hash
hashes.update({event.group_id: group_hash.hash})
events.append(event)

bulk_group_data_stacktraces, _ = get_events_from_nodestore(self.project, rows, group_ids)
expected_group_data = [
CreateGroupingRecordData(
group_id=event.group.id,
hash=hashes[event.group.id],
project_id=self.project.id,
exception_type=get_path(event.data, "exception", "values", -1, "type"),
)
for event in events
]

assert bulk_group_data_stacktraces["data"] == expected_group_data
assert len(bulk_group_data_stacktraces["stacktrace_list"]) == len(events)

mock_logger.info.assert_called_with(
"grouping.similarity.over_threshold_system_only_frames",
extra={
"project_id": self.project.id,
"event_id": event.event_id,
"hash": group_hash.hash,
},
)

def test_lookup_group_data_stacktrace_bulk_with_fallback_success(self):
"""Test successful bulk lookup with fallback, where the fallback isn't used"""
rows, events, hashes = (
Expand Down

0 comments on commit a3dba87

Please sign in to comment.