From 35847fc597665cf0cbe57dfb2d14b6a7a3ea5dc6 Mon Sep 17 00:00:00 2001 From: David Newell Date: Thu, 5 Dec 2024 16:04:30 +0000 Subject: [PATCH] feat: merging & splitting issues (#26612) --- posthog/api/error_tracking.py | 39 +++-- posthog/models/__init__.py | 8 +- .../models/error_tracking/error_tracking.py | 120 +++++++++++----- posthog/models/error_tracking/sql.py | 4 + .../test/test_error_tracking.py | 136 ++++++++++-------- posthog/models/team/util.py | 2 + 6 files changed, 194 insertions(+), 115 deletions(-) diff --git a/posthog/api/error_tracking.py b/posthog/api/error_tracking.py index 9cdfd10f09122..7f554880ff2cf 100644 --- a/posthog/api/error_tracking.py +++ b/posthog/api/error_tracking.py @@ -10,8 +10,8 @@ from posthog.api.forbid_destroy_model import ForbidDestroyModel from posthog.api.routing import TeamAndOrgViewSetMixin -from posthog.models import ErrorTrackingSymbolSet -from posthog.models.error_tracking import ErrorTrackingStackFrame +from posthog.api.utils import action +from posthog.models.error_tracking import ErrorTrackingIssue, ErrorTrackingSymbolSet, ErrorTrackingStackFrame from posthog.models.utils import uuid7 from posthog.storage import object_storage @@ -28,29 +28,26 @@ class ObjectStorageUnavailable(Exception): pass -# class ErrorTrackingGroupSerializer(serializers.ModelSerializer): -# class Meta: -# model = ErrorTrackingGroup -# fields = ["assignee", "status"] +class ErrorTrackingIssueSerializer(serializers.ModelSerializer): + class Meta: + model = ErrorTrackingIssue + fields = ["assignee", "status"] -# class ErrorTrackingGroupViewSet(TeamAndOrgViewSetMixin, ForbidDestroyModel, viewsets.ModelViewSet): -# scope_object = "INTERNAL" -# queryset = ErrorTrackingGroup.objects.all() -# serializer_class = ErrorTrackingGroupSerializer +class ErrorTrackingGroupViewSet(TeamAndOrgViewSetMixin, ForbidDestroyModel, viewsets.ModelViewSet): + scope_object = "INTERNAL" + queryset = ErrorTrackingIssue.objects.all() + serializer_class = ErrorTrackingIssueSerializer -# def safely_get_object(self, queryset) -> QuerySet: -# stringified_fingerprint = self.kwargs["pk"] -# fingerprint = json.loads(urlsafe_base64_decode(stringified_fingerprint)) -# group, _ = queryset.get_or_create(fingerprint=fingerprint, team=self.team) -# return group + def safely_get_queryset(self, queryset): + return queryset.filter(team_id=self.team.id) -# @action(methods=["POST"], detail=True) -# def merge(self, request, **kwargs): -# group: ErrorTrackingGroup = self.get_object() -# merging_fingerprints: list[list[str]] = request.data.get("merging_fingerprints", []) -# group.merge(merging_fingerprints) -# return Response({"success": True}) + @action(methods=["POST"], detail=True) + def merge(self, request, **kwargs): + issue: ErrorTrackingIssue = self.get_object() + ids: list[str] = request.data.get("ids", []) + issue.merge(issue_ids=ids) + return Response({"success": True}) class ErrorTrackingStackFrameSerializer(serializers.ModelSerializer): diff --git a/posthog/models/__init__.py b/posthog/models/__init__.py index 54d21e2758544..593d60f95934f 100644 --- a/posthog/models/__init__.py +++ b/posthog/models/__init__.py @@ -30,7 +30,12 @@ from .element import Element from .element_group import ElementGroup from .entity import Entity -from .error_tracking import ErrorTrackingIssue, ErrorTrackingStackFrame, ErrorTrackingSymbolSet +from .error_tracking import ( + ErrorTrackingIssue, + ErrorTrackingIssueFingerprintV2, + ErrorTrackingStackFrame, + ErrorTrackingSymbolSet, +) from .event.event import Event from .event_buffer import EventBuffer from .event_definition import EventDefinition @@ -103,6 +108,7 @@ "ElementGroup", "Entity", "ErrorTrackingIssue", + "ErrorTrackingIssueFingerprintV2", "ErrorTrackingStackFrame", "ErrorTrackingSymbolSet", "Event", diff --git a/posthog/models/error_tracking/error_tracking.py b/posthog/models/error_tracking/error_tracking.py index b003e29b3b8a5..47ae96334b5f7 100644 --- a/posthog/models/error_tracking/error_tracking.py +++ b/posthog/models/error_tracking/error_tracking.py @@ -1,10 +1,14 @@ -from django.db import models +from django.db import models, transaction from django.contrib.postgres.fields import ArrayField + from posthog.models.utils import UUIDModel from posthog.models.team import Team from posthog.models.user import User -from django.db import transaction -from django.db.models import Q, QuerySet +from posthog.models.error_tracking.sql import INSERT_ERROR_TRACKING_ISSUE_FINGERPRINT_OVERRIDES + +from posthog.kafka_client.client import ClickhouseProducer +from posthog.kafka_client.topics import KAFKA_ERROR_TRACKING_ISSUE_FINGERPRINT +from uuid import UUID class ErrorTrackingIssue(UUIDModel): @@ -20,6 +24,30 @@ class Status(models.TextChoices): name = models.TextField(null=True, blank=True) description = models.TextField(null=True, blank=True) + def merge(self, issue_ids: list[str]) -> None: + fingerprints = resolve_fingerprints_for_issues(team_id=self.team.pk, issue_ids=issue_ids) + + with transaction.atomic(): + overrides = update_error_tracking_issue_fingerprints( + team_id=self.team.pk, issue_id=self.id, fingerprints=fingerprints + ) + ErrorTrackingIssue.objects.filter(team=self.team, id__in=issue_ids).delete() + update_error_tracking_issue_fingerprint_overrides(team_id=self.team.pk, overrides=overrides) + + def split(self, fingerprints: list[str]) -> None: + overrides: list[ErrorTrackingIssueFingerprintV2] = [] + + for fingerprint in fingerprints: + with transaction.atomic(): + new_issue = ErrorTrackingIssue.objects.create(team=self.team) + overrides.extend( + update_error_tracking_issue_fingerprints( + team_id=self.team.pk, issue_id=new_issue.id, fingerprints=[fingerprint] + ) + ) + + update_error_tracking_issue_fingerprint_overrides(team_id=self.team.pk, overrides=overrides) + class ErrorTrackingIssueAssignment(UUIDModel): issue = models.ForeignKey(ErrorTrackingIssue, on_delete=models.CASCADE) @@ -114,36 +142,6 @@ class Status(models.TextChoices): blank=True, ) - @classmethod - def filter_fingerprints(cls, queryset, fingerprints: list[list]) -> QuerySet: - query = Q(fingerprint__in=fingerprints) - - for fp in fingerprints: - query |= Q(merged_fingerprints__contains=fp) - - return queryset.filter(query) - - @transaction.atomic - def merge(self, fingerprints: list[list[str]]) -> None: - if not fingerprints: - return - - # sets don't like lists so we're converting fingerprints to tuples - def convert_fingerprints_to_tuples(fps: list[list[str]]): - return [tuple(f) for f in fps] - - merged_fingerprints = set(convert_fingerprints_to_tuples(self.merged_fingerprints)) - merged_fingerprints.update(convert_fingerprints_to_tuples(fingerprints)) - - merging_groups = ErrorTrackingGroup.objects.filter(team=self.team, fingerprint__in=fingerprints) - for group in merging_groups: - merged_fingerprints |= set(convert_fingerprints_to_tuples(group.merged_fingerprints)) - - merging_groups.delete() - # converting back to list of lists before saving - self.merged_fingerprints = [list(f) for f in merged_fingerprints] - self.save() - # DEPRECATED: Use ErrorTrackingIssueFingerprintV2 instead class ErrorTrackingIssueFingerprint(models.Model): @@ -155,3 +153,59 @@ class ErrorTrackingIssueFingerprint(models.Model): class Meta: constraints = [models.UniqueConstraint(fields=["team", "fingerprint"], name="unique fingerprint for team")] + + +def resolve_fingerprints_for_issues(team_id: int, issue_ids: list[str]) -> list[str]: + return list( + ErrorTrackingIssueFingerprintV2.objects.filter(team_id=team_id, issue_id__in=issue_ids).values_list( + "fingerprint", flat=True + ) + ) + + +def update_error_tracking_issue_fingerprints( + team_id: int, issue_id: str, fingerprints: list[str] +) -> list[ErrorTrackingIssueFingerprintV2]: + return list( + ErrorTrackingIssueFingerprintV2.objects.raw( + """ + UPDATE posthog_errortrackingissuefingerprintv2 + SET version = version + 1, issue_id = %s + WHERE team_id = %s AND fingerprint = ANY(%s) + RETURNING fingerprint, version, issue_id, id + """, + [issue_id, team_id, fingerprints], + ) + ) + + +def update_error_tracking_issue_fingerprint_overrides( + team_id: int, overrides: list[ErrorTrackingIssueFingerprintV2] +) -> None: + for override in overrides: + override_error_tracking_issue_fingerprint( + team_id=team_id, fingerprint=override.fingerprint, issue_id=override.issue_id, version=override.version + ) + + +def override_error_tracking_issue_fingerprint( + team_id: int, + fingerprint: str, + issue_id: UUID, + version=0, + is_deleted: bool = False, + sync: bool = False, +) -> None: + p = ClickhouseProducer() + p.produce( + topic=KAFKA_ERROR_TRACKING_ISSUE_FINGERPRINT, + sql=INSERT_ERROR_TRACKING_ISSUE_FINGERPRINT_OVERRIDES, + data={ + "team_id": team_id, + "fingerprint": fingerprint, + "issue_id": str(issue_id), + "version": version, + "is_deleted": int(is_deleted), + }, + sync=sync, + ) diff --git a/posthog/models/error_tracking/sql.py b/posthog/models/error_tracking/sql.py index 1700a6c7c4cd9..2bd96fb8956af 100644 --- a/posthog/models/error_tracking/sql.py +++ b/posthog/models/error_tracking/sql.py @@ -77,3 +77,7 @@ TRUNCATE_ERROR_TRACKING_ISSUE_FINGERPRINT_OVERRIDES_TABLE_SQL = ( f"TRUNCATE TABLE IF EXISTS {ERROR_TRACKING_ISSUE_FINGERPRINT_OVERRIDES_TABLE} ON CLUSTER '{CLICKHOUSE_CLUSTER}'" ) + +INSERT_ERROR_TRACKING_ISSUE_FINGERPRINT_OVERRIDES = """ +INSERT INTO error_tracking_issue_fingerprint_overrides (fingerprint, issue_id, team_id, is_deleted, version, _timestamp, _offset, _partition) SELECT %(fingerprint)s, %(issue_id)s, %(team_id)s, %(is_deleted)s, %(version)s, now(), 0, 0 VALUES +""" diff --git a/posthog/models/error_tracking/test/test_error_tracking.py b/posthog/models/error_tracking/test/test_error_tracking.py index 8ccb762c8eda8..91db5c014c090 100644 --- a/posthog/models/error_tracking/test/test_error_tracking.py +++ b/posthog/models/error_tracking/test/test_error_tracking.py @@ -1,60 +1,76 @@ -# class TestErrorTracking(BaseTest): -# def test_defaults(self): -# group = ErrorTrackingGroup.objects.create(status="active", team=self.team, fingerprint=["a_fingerprint"]) - -# assert group.fingerprint == ["a_fingerprint"] -# assert group.merged_fingerprints == [] -# assert group.assignee is None - -# def test_filtering(self): -# ErrorTrackingGroup.objects.bulk_create( -# [ -# ErrorTrackingGroup(team=self.team, fingerprint=["first_error"]), -# ErrorTrackingGroup( -# team=self.team, fingerprint=["second_error"], merged_fingerprints=[["previously_merged"]] -# ), -# ErrorTrackingGroup(team=self.team, fingerprint=["third_error"]), -# ] -# ) - -# matching_groups = ErrorTrackingGroup.objects.filter(fingerprint__in=[["first_error"], ["second_error"]]) -# assert matching_groups.count() == 2 - -# matching_groups = ErrorTrackingGroup.objects.filter(merged_fingerprints__contains=["previously_merged"]) -# assert matching_groups.count() == 1 - -# matching_groups = ErrorTrackingGroup.filter_fingerprints( -# queryset=ErrorTrackingGroup.objects, fingerprints=[["first_error"], ["previously_merged"]] -# ) -# assert matching_groups.count() == 2 - -# def test_merge(self): -# primary_group = ErrorTrackingGroup.objects.create( -# status="active", -# team=self.team, -# fingerprint=["a_fingerprint"], -# merged_fingerprints=[["already_merged_fingerprint"]], -# ) -# merge_group_1 = ErrorTrackingGroup.objects.create( -# status="active", team=self.team, fingerprint=["new_fingerprint"] -# ) -# merge_group_2 = ErrorTrackingGroup.objects.create( -# status="active", -# team=self.team, -# fingerprint=["another_fingerprint"], -# merged_fingerprints=[["merged_fingerprint"]], -# ) - -# merging_fingerprints = [merge_group_1.fingerprint, merge_group_2.fingerprint, ["no_group_fingerprint"]] -# primary_group.merge(merging_fingerprints) - -# assert sorted(primary_group.merged_fingerprints) == [ -# ["already_merged_fingerprint"], -# ["another_fingerprint"], -# ["merged_fingerprint"], -# ["new_fingerprint"], -# ["no_group_fingerprint"], -# ] - -# # deletes the old groups -# assert ErrorTrackingGroup.objects.count() == 1 +from posthog.test.base import BaseTest +from posthog.models.error_tracking import ErrorTrackingIssue, ErrorTrackingIssueFingerprintV2 + + +class TestErrorTracking(BaseTest): + def create_issue(self, fingerprints) -> ErrorTrackingIssue: + issue = ErrorTrackingIssue.objects.create(team=self.team) + for fingerprint in fingerprints: + ErrorTrackingIssueFingerprintV2.objects.create(team=self.team, issue=issue, fingerprint=fingerprint) + return issue + + def test_defaults(self): + issue = ErrorTrackingIssue.objects.create(team=self.team) + + assert issue.status == "active" + assert issue.name is None + + def test_basic_merge(self): + issue_one = self.create_issue(["fingerprint_one"]) + issue_two = self.create_issue(["fingerprint_two"]) + + issue_two.merge(issue_ids=[issue_one.id]) + + # remaps the first fingerprint to the second issue + assert ErrorTrackingIssueFingerprintV2.objects.filter(issue_id=issue_two.id).count() == 2 + # bumps the version + override = ErrorTrackingIssueFingerprintV2.objects.filter(fingerprint="fingerprint_one").first() + assert override + assert override.version == 1 + + # deletes issue one + assert ErrorTrackingIssue.objects.count() == 1 + + def test_merge_multiple_times(self): + issue_one = self.create_issue(["fingerprint_one"]) + issue_two = self.create_issue(["fingerprint_two"]) + issue_three = self.create_issue(["fingerprint_three"]) + + issue_two.merge(issue_ids=[issue_one.id]) + issue_three.merge(issue_ids=[issue_two.id]) + + # only the third issue remains + assert ErrorTrackingIssue.objects.count() == 1 + # all fingerprints point to the third issue + assert ErrorTrackingIssueFingerprintV2.objects.filter(issue_id=issue_three.id).count() == 3 + + # bumps versions of the merged issues correct number of times + override = ErrorTrackingIssueFingerprintV2.objects.filter(fingerprint="fingerprint_one").first() + assert override + assert override.version == 2 + override = ErrorTrackingIssueFingerprintV2.objects.filter(fingerprint="fingerprint_two").first() + assert override + assert override.version == 1 + + def test_merging_multiple_issues_at_once(self): + issue_one = self.create_issue(["fingerprint_one"]) + issue_two = self.create_issue(["fingerprint_two"]) + issue_three = self.create_issue(["fingerprint_three"]) + + issue_three.merge(issue_ids=[issue_one.id, issue_two.id]) + + assert ErrorTrackingIssueFingerprintV2.objects.filter(issue_id=issue_three.id).count() == 3 + + def test_splitting_fingerprints(self): + issue = self.create_issue(["fingerprint_one", "fingerprint_two", "fingerprint_three"]) + + issue.split(fingerprints=["fingerprint_one", "fingerprint_two"]) + + # creates two new issues + assert ErrorTrackingIssue.objects.count() == 3 + + # bumps the version but no longer points to the old issue + override = ErrorTrackingIssueFingerprintV2.objects.filter(fingerprint="fingerprint_one").first() + assert override + assert override.issue_id != issue.id + assert override.version == 1 diff --git a/posthog/models/team/util.py b/posthog/models/team/util.py index 5756c8da211aa..5cb103b154b84 100644 --- a/posthog/models/team/util.py +++ b/posthog/models/team/util.py @@ -14,10 +14,12 @@ def delete_bulky_postgres_data(team_ids: list[int]): from posthog.models.feature_flag.feature_flag import FeatureFlagHashKeyOverride from posthog.models.insight_caching_state import InsightCachingState from posthog.models.person import Person, PersonDistinctId + from posthog.models.error_tracking import ErrorTrackingIssueFingerprintV2 from posthog.models.early_access_feature import EarlyAccessFeature _raw_delete(EarlyAccessFeature.objects.filter(team_id__in=team_ids)) _raw_delete(PersonDistinctId.objects.filter(team_id__in=team_ids)) + _raw_delete(ErrorTrackingIssueFingerprintV2.objects.filter(team_id__in=team_ids)) _raw_delete(CohortPeople.objects.filter(cohort__team_id__in=team_ids)) _raw_delete(FeatureFlagHashKeyOverride.objects.filter(team_id__in=team_ids)) _raw_delete(Person.objects.filter(team_id__in=team_ids))