Skip to content

Commit

Permalink
chore(similarity): Handle no filename and module (#81272)
Browse files Browse the repository at this point in the history
Replace filename with module if there is no filename
Do not send stacktrace to seer if a frame does not have module or
filename
  • Loading branch information
jangjodi authored Nov 27, 2024
1 parent a6a3f54 commit fc8225b
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 4 deletions.
26 changes: 25 additions & 1 deletion src/sentry/seer/similarity/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ class TooManyOnlySystemFramesException(Exception):
pass


class NoFilenameOrModuleException(Exception):
pass


def _get_value_if_exists(exception_value: dict[str, Any]) -> str:
return exception_value["values"][0] if exception_value.get("values") else ""

Expand Down Expand Up @@ -188,6 +192,7 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
frame_count = 0
html_frame_count = 0 # for a temporary metric
is_frames_truncated = False
has_no_filename_or_module = False
stacktrace_str = ""
found_non_snipped_context_line = False

Expand All @@ -197,6 +202,7 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
nonlocal frame_count
nonlocal html_frame_count
nonlocal is_frames_truncated
nonlocal has_no_filename_or_module
nonlocal found_non_snipped_context_line
frame_strings = []

Expand All @@ -211,14 +217,19 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
frame_count += len(contributing_frames)

for frame in contributing_frames:
frame_dict = {"filename": "", "function": "", "context-line": ""}
frame_dict = {"filename": "", "function": "", "context-line": "", "module": ""}
for frame_values in frame.get("values", []):
if frame_values.get("id") in frame_dict:
frame_dict[frame_values["id"]] = _get_value_if_exists(frame_values)

if not _is_snipped_context_line(frame_dict["context-line"]):
found_non_snipped_context_line = True

if frame_dict["filename"] == "" and frame_dict["module"] == "":
has_no_filename_or_module = True
elif frame_dict["filename"] == "":
frame_dict["filename"] = frame_dict["module"]

# Not an exhaustive list of tests we could run to detect HTML, but this is only
# meant to be a temporary, quick-and-dirty metric
# TODO: Don't let this, and the metric below, hang around forever. It's only to
Expand Down Expand Up @@ -271,6 +282,8 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
frame_strings = _process_frames(exception_value["values"])
if is_frames_truncated and not app_hash:
raise TooManyOnlySystemFramesException
if has_no_filename_or_module:
raise NoFilenameOrModuleException
# Only exceptions have the type and value properties, so we don't need to handle the threads
# case here
header = f"{exc_type}: {exc_value}\n" if exception["id"] == "exception" else ""
Expand Down Expand Up @@ -328,6 +341,17 @@ def get_stacktrace_string_with_metrics(
},
)
stacktrace_string = None
except NoFilenameOrModuleException:
if referrer == ReferrerOptions.INGEST:
metrics.incr(
"grouping.similarity.did_call_seer",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={
"call_made": False,
"blocker": "no-module-or-filename",
},
)
stacktrace_string = None
return stacktrace_string


Expand Down
47 changes: 44 additions & 3 deletions tests/sentry/seer/similarity/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
import copy
from collections.abc import Callable
from typing import Any, Literal, cast
from unittest.mock import patch
from uuid import uuid1

import pytest

from sentry import options
from sentry.eventstore.models import Event
from sentry.seer.similarity.utils import (
BASE64_ENCODED_PREFIXES,
MAX_FRAME_COUNT,
SEER_ELIGIBLE_PLATFORMS,
NoFilenameOrModuleException,
ReferrerOptions,
TooManyOnlySystemFramesException,
_is_snipped_context_line,
event_content_is_seer_eligible,
filter_null_from_string,
get_stacktrace_string,
get_stacktrace_string_with_metrics,
)
from sentry.testutils.cases import TestCase

Expand Down Expand Up @@ -331,14 +336,14 @@ class GetStacktraceStringTest(TestCase):
"name": None,
"contributes": True,
"hint": None,
"values": [],
"values": ["module"],
},
{
"id": "filename",
"name": None,
"contributes": True,
"hint": None,
"values": [],
"values": ["filename"],
},
{
"id": "function",
Expand Down Expand Up @@ -691,7 +696,7 @@ def test_chained_too_many_exceptions(self):

def test_thread(self):
stacktrace_str = get_stacktrace_string(self.MOBILE_THREAD_DATA)
assert stacktrace_str == 'File "", function TestHandler'
assert stacktrace_str == 'File "filename", function TestHandler'

def test_system(self):
data_system = copy.deepcopy(self.BASE_APP_DATA)
Expand Down Expand Up @@ -814,6 +819,42 @@ def test_only_stacktrace_frames(self):
stacktrace_str = get_stacktrace_string(self.ONLY_STACKTRACE)
assert stacktrace_str == 'File "index.php", function \n $server->emit($server->run());'

def test_replace_file_with_module(self):
exception = copy.deepcopy(self.BASE_APP_DATA)
# delete filename from the exception
del exception["app"]["component"]["values"][0]["values"][0]["values"][0]["values"][1]
stacktrace_string = get_stacktrace_string_with_metrics(
exception, "python", ReferrerOptions.INGEST
)
assert (
stacktrace_string
== 'ZeroDivisionError: division by zero\n File "__main__", function divide_by_zero\n divide = 1/0'
)

@patch("sentry.seer.similarity.utils.metrics")
def test_no_filename_or_module(self, mock_metrics):
exception = copy.deepcopy(self.BASE_APP_DATA)
# delete module from the exception
del exception["app"]["component"]["values"][0]["values"][0]["values"][0]["values"][0]
# delete filename from the exception
del exception["app"]["component"]["values"][0]["values"][0]["values"][0]["values"][0]
with pytest.raises(NoFilenameOrModuleException):
get_stacktrace_string(exception)

stacktrace_string = get_stacktrace_string_with_metrics(
exception, "python", ReferrerOptions.INGEST
)
sample_rate = options.get("seer.similarity.metrics_sample_rate")
assert stacktrace_string is None
mock_metrics.incr.assert_called_with(
"grouping.similarity.did_call_seer",
sample_rate=sample_rate,
tags={
"call_made": False,
"blocker": "no-module-or-filename",
},
)


class EventContentIsSeerEligibleTest(TestCase):
def get_eligible_event_data(self) -> dict[str, Any]:
Expand Down

0 comments on commit fc8225b

Please sign in to comment.