Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(similarity): Limit > 30 system frame check to Java #81385

Merged
merged 1 commit into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ def get(self, request: Request, group) -> Response:
if latest_event and event_content_has_stacktrace(latest_event):
grouping_info = get_grouping_info(None, project=group.project, event=latest_event)
try:
stacktrace_string = get_stacktrace_string(grouping_info)
stacktrace_string = get_stacktrace_string(
grouping_info, platform=latest_event.platform
)
except TooManyOnlySystemFramesException:
stacktrace_string = ""

Expand Down
19 changes: 16 additions & 3 deletions src/sentry/seer/similarity/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,19 @@
"ruby-rails",
]
)
SYSTEM_FRAME_CHECK_PLATFORMS = frozenset(
[
"java",
"java-android",
"java-appengine",
"java-log4j",
"java-log4j2",
"java-logging",
"java-logback",
"java-spring",
"java-spring-boot",
]
)
BASE64_ENCODED_PREFIXES = [
"data:text/html;base64",
"data:text/javascript;base64",
Expand All @@ -169,7 +182,7 @@ def _get_value_if_exists(exception_value: dict[str, Any]) -> str:
return exception_value["values"][0] if exception_value.get("values") else ""


def get_stacktrace_string(data: dict[str, Any]) -> str:
def get_stacktrace_string(data: dict[str, Any], platform: str | None = None) -> str:
"""Format a stacktrace string from the grouping information."""
app_hash = get_path(data, "app", "hash")
app_component = get_path(data, "app", "component", "values")
Expand Down Expand Up @@ -280,7 +293,7 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
exc_value = _get_value_if_exists(exception_value)
elif exception_value.get("id") == "stacktrace" and frame_count < MAX_FRAME_COUNT:
frame_strings = _process_frames(exception_value["values"])
if is_frames_truncated and not app_hash:
if platform in SYSTEM_FRAME_CHECK_PLATFORMS and is_frames_truncated and not app_hash:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only raise this exception if the platform is in SYSTEM_FRAME_CHECK_PLATFORMS

raise TooManyOnlySystemFramesException
if has_no_filename_or_module:
raise NoFilenameOrModuleException
Expand Down Expand Up @@ -323,7 +336,7 @@ def get_stacktrace_string_with_metrics(
data: dict[str, Any], platform: str | None, referrer: ReferrerOptions
) -> str | None:
try:
stacktrace_string = get_stacktrace_string(data)
stacktrace_string = get_stacktrace_string(data, platform)
except TooManyOnlySystemFramesException:
platform = platform if platform else "unknown"
metrics.incr(
Expand Down
119 changes: 114 additions & 5 deletions tests/sentry/grouping/ingest/test_seer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import asdict
from time import time
from unittest.mock import MagicMock, Mock, patch
from unittest.mock import ANY, MagicMock, Mock, call, patch

from sentry import options
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
Expand Down Expand Up @@ -368,7 +368,7 @@ def test_too_many_only_system_frames(self, mock_metrics: Mock) -> None:
}
]
},
"platform": "python",
"platform": "java",
},
)
expected_metadata = {
Expand All @@ -384,7 +384,7 @@ def test_too_many_only_system_frames(self, mock_metrics: Mock) -> None:
mock_metrics.incr.assert_any_call(
"grouping.similarity.over_threshold_only_system_frames",
sample_rate=sample_rate,
tags={"platform": "python", "referrer": "ingest"},
tags={"platform": "java", "referrer": "ingest"},
)
mock_metrics.incr.assert_any_call(
"grouping.similarity.did_call_seer",
Expand All @@ -395,6 +395,58 @@ def test_too_many_only_system_frames(self, mock_metrics: Mock) -> None:
},
)

@patch("sentry.seer.similarity.utils.metrics")
def test_too_many_only_system_frames_invalid_platform(self, mock_metrics: Mock) -> None:
type = "FailedToFetchError"
value = "Charlie didn't bring the ball back"
context_line = f"raise {type}('{value}')"
new_event = Event(
project_id=self.project.id,
event_id="22312012112120120908201304152013",
data={
"title": f"{type}('{value}')",
"exception": {
"values": [
{
"type": type,
"value": value,
"stacktrace": {
"frames": [
{
"function": f"play_fetch_{i}",
"filename": f"dogpark{i}.py",
"context_line": context_line,
}
for i in range(MAX_FRAME_COUNT + 1)
]
},
}
]
},
"platform": "python",
},
)
expected_metadata = {
"similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
"results": [],
}
assert get_seer_similar_issues(new_event, new_event.get_grouping_variants()) == (
expected_metadata,
None,
)

assert (
call(
"grouping.similarity.did_call_seer",
sample_rate=1.0,
tags={
"call_made": False,
"blocker": "over-threshold-only-system-frames",
},
)
not in mock_metrics.incr.call_args_list
)


class TestMaybeCheckSeerForMatchingGroupHash(TestCase):

Expand Down Expand Up @@ -486,7 +538,7 @@ def test_too_many_only_system_frames_maybe_check_seer_for_matching_group_hash(
}
]
},
"platform": "python",
"platform": "java",
},
)

Expand All @@ -502,7 +554,7 @@ def test_too_many_only_system_frames_maybe_check_seer_for_matching_group_hash(
mock_metrics.incr.assert_any_call(
"grouping.similarity.over_threshold_only_system_frames",
sample_rate=sample_rate,
tags={"platform": "python", "referrer": "ingest"},
tags={"platform": "java", "referrer": "ingest"},
)
mock_metrics.incr.assert_any_call(
"grouping.similarity.did_call_seer",
Expand All @@ -514,3 +566,60 @@ def test_too_many_only_system_frames_maybe_check_seer_for_matching_group_hash(
)

mock_get_similar_issues.assert_not_called()

@patch("sentry.grouping.ingest.seer.get_similarity_data_from_seer", return_value=[])
def test_too_many_only_system_frames_maybe_check_seer_for_matching_group_hash_invalid_platform(
self, mock_get_similarity_data: MagicMock
) -> None:
self.project.update_option("sentry:similarity_backfill_completed", int(time()))

type = "FailedToFetchError"
value = "Charlie didn't bring the ball back"
context_line = f"raise {type}('{value}')"
new_event = Event(
project_id=self.project.id,
event_id="22312012112120120908201304152013",
data={
"title": f"{type}('{value}')",
"exception": {
"values": [
{
"type": type,
"value": value,
"stacktrace": {
"frames": [
{
"function": f"play_fetch_{i}",
"filename": f"dogpark{i}.py",
"context_line": context_line,
}
for i in range(MAX_FRAME_COUNT + 1)
]
},
}
]
},
"platform": "python",
},
)

GroupHash.objects.create(
project=self.project, group=new_event.group, hash=new_event.get_primary_hash()
)
group_hashes = list(GroupHash.objects.filter(project_id=self.project.id))
maybe_check_seer_for_matching_grouphash(
new_event, new_event.get_grouping_variants(), group_hashes
)

mock_get_similarity_data.assert_called_with(
{
"event_id": new_event.event_id,
"hash": new_event.get_primary_hash(),
"project_id": self.project.id,
"stacktrace": ANY,
"exception_type": "FailedToFetchError",
"k": 1,
"referrer": "ingest",
"use_reranking": True,
}
)
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,7 @@ def test_too_many_system_frames(self):
}
]
},
"platform": "python",
"platform": "java",
}
new_event = self.store_event(data=error_data, project_id=self.project)
assert new_event.group
Expand Down
34 changes: 32 additions & 2 deletions tests/sentry/seer/similarity/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,7 +727,20 @@ def test_too_many_system_frames_single_exception(self):
] += self.create_frames(MAX_FRAME_COUNT + 1, True)

with pytest.raises(TooManyOnlySystemFramesException):
get_stacktrace_string(data_system)
get_stacktrace_string(data_system, platform="java")

def test_too_many_system_frames_single_exception_invalid_platform(self):
data_system = copy.deepcopy(self.BASE_APP_DATA)
data_system["system"] = data_system.pop("app")
data_system["system"]["component"]["values"][0]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT + 1, True)

stacktrace_string = get_stacktrace_string(data_system)
assert stacktrace_string is not None and stacktrace_string != ""

stacktrace_string = get_stacktrace_string(data_system, platform="python")
assert stacktrace_string is not None and stacktrace_string != ""

def test_too_many_system_frames_chained_exception(self):
data_system = copy.deepcopy(self.CHAINED_APP_DATA)
Expand All @@ -741,7 +754,24 @@ def test_too_many_system_frames_chained_exception(self):
] += self.create_frames(MAX_FRAME_COUNT // 2, True)

with pytest.raises(TooManyOnlySystemFramesException):
get_stacktrace_string(data_system)
get_stacktrace_string(data_system, platform="java")

def test_too_many_system_frames_chained_exception_invalid_platform(self):
data_system = copy.deepcopy(self.CHAINED_APP_DATA)
data_system["system"] = data_system.pop("app")
# Split MAX_FRAME_COUNT across the two exceptions
data_system["system"]["component"]["values"][0]["values"][0]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT // 2, True)
data_system["system"]["component"]["values"][0]["values"][1]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT // 2, True)

stacktrace_string = get_stacktrace_string(data_system)
assert stacktrace_string is not None and stacktrace_string != ""

stacktrace_string = get_stacktrace_string(data_system, platform="python")
assert stacktrace_string is not None and stacktrace_string != ""

def test_too_many_in_app_contributing_frames(self):
"""
Expand Down
4 changes: 2 additions & 2 deletions tests/sentry/tasks/test_backfill_seer_grouping_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def test_lookup_group_data_stacktrace_bulk_invalid_stacktrace_exception(self, mo
]
event = self.store_event(
data={
"platform": "python",
"platform": "java",
"exception": exception,
"title": "title",
"timestamp": before_now(seconds=10).isoformat(),
Expand Down Expand Up @@ -426,7 +426,7 @@ def test_lookup_group_data_stacktrace_bulk_invalid_stacktrace_exception(self, mo
mock_metrics.incr.assert_called_with(
"grouping.similarity.over_threshold_only_system_frames",
sample_rate=sample_rate,
tags={"platform": "python", "referrer": "backfill"},
tags={"platform": "java", "referrer": "backfill"},
)

def test_lookup_group_data_stacktrace_bulk_with_fallback_success(self):
Expand Down
Loading