diff --git a/posthog/api/person.py b/posthog/api/person.py index b623d6a39bc66..b410de3d7da68 100644 --- a/posthog/api/person.py +++ b/posthog/api/person.py @@ -353,12 +353,15 @@ def list(self, request: request.Request, *args: Any, **kwargs: Any) -> response. OpenApiParameter( "delete_events", OpenApiTypes.BOOL, - description="If true, a task to delete all events associated with this person will be created and queued. The task does not run immediately and instead is batched together and at 5AM UTC every Sunday (controlled by environment variable CLEAR_CLICKHOUSE_REMOVED_DATA_SCHEDULE_CRON)", + description="If true, a task to delete all events associated with this person will be created and queued. The task does not run immediately and instead is batched together and at 5AM UTC every Sunday", default=False, ), ], ) def destroy(self, request: request.Request, pk=None, **kwargs): + """ + Use this endpoint to delete individual persons. For bulk deletion, use the bulk_delete endpoint instead. + """ try: person = self.get_object() person_id = person.id @@ -391,6 +394,70 @@ def destroy(self, request: request.Request, pk=None, **kwargs): except Person.DoesNotExist: raise NotFound(detail="Person not found.") + @extend_schema( + parameters=[ + OpenApiParameter( + "delete_events", + OpenApiTypes.BOOL, + description="If true, a task to delete all events associated with this person will be created and queued. The task does not run immediately and instead is batched together and at 5AM UTC every Sunday", + default=False, + ), + OpenApiParameter( + "distinct_ids", + OpenApiTypes.OBJECT, + description="A list of distinct IDs, up to 100 of them. We'll delete all persons associated with those distinct IDs.", + ), + OpenApiParameter( + "ids", + OpenApiTypes.OBJECT, + description="A list of PostHog person IDs, up to 100 of them. We'll delete all the persons listed.", + ), + ], + ) + @action(methods=["POST"], detail=False, required_scopes=["person:write"]) + def bulk_delete(self, request: request.Request, pk=None, **kwargs): + """ + This endpoint allows you to bulk delete persons, either by the PostHog person IDs or by distinct IDs. You can pass in a maximum of 100 IDs per call. + """ + if distinct_ids := request.data.get("distinct_ids"): + if len(distinct_ids) > 100: + raise ValidationError("You can only pass 100 distinct_ids in one call") + persons = self.get_queryset().filter(persondistinctid__distinct_id__in=distinct_ids) + elif ids := request.data.get("ids"): + if len(ids) > 100: + raise ValidationError("You can only pass 100 ids in one call") + persons = self.get_queryset().filter(uuid__in=ids) + else: + raise ValidationError("You need to specify either distinct_ids or ids") + + for person in persons: + delete_person(person=person) + self.perform_destroy(person) + log_activity( + organization_id=self.organization.id, + team_id=self.team_id, + user=cast(User, request.user), + was_impersonated=is_impersonated_session(request), + item_id=person.id, + scope="Person", + activity="deleted", + detail=Detail(name=str(person.uuid)), + ) + # Once the person is deleted, queue deletion of associated data, if that was requested + if request.data.get("delete_events"): + AsyncDeletion.objects.bulk_create( + [ + AsyncDeletion( + deletion_type=DeletionType.Person, + team_id=self.team_id, + key=str(person.uuid), + created_by=cast(User, self.request.user), + ) + ], + ignore_conflicts=True, + ) + return response.Response(status=202) + @action(methods=["GET"], detail=False, required_scopes=["person:read"]) def values(self, request: request.Request, **kwargs) -> response.Response: key = request.GET.get("key") @@ -636,16 +703,17 @@ def _set_properties(self, properties, user): }, ) - log_activity( - organization_id=self.organization.id, - team_id=self.team.id, - user=user, - was_impersonated=is_impersonated_session(self.request), - item_id=instance.pk, - scope="Person", - activity="updated", - detail=Detail(changes=[Change(type="Person", action="changed", field="properties")]), - ) + if self.organization.id: # should always be true, but mypy... + log_activity( + organization_id=self.organization.id, + team_id=self.team.id, + user=user, + was_impersonated=is_impersonated_session(self.request), + item_id=instance.pk, + scope="Person", + activity="updated", + detail=Detail(changes=[Change(type="Person", action="changed", field="properties")]), + ) # PRAGMA: Methods for getting Persons via clickhouse queries def _respond_with_cached_results( diff --git a/posthog/api/test/test_person.py b/posthog/api/test/test_person.py index 22895191c1f74..fd2c6e042a15f 100644 --- a/posthog/api/test/test_person.py +++ b/posthog/api/test/test_person.py @@ -378,6 +378,87 @@ def test_delete_person_and_events(self): self.assertEqual(async_deletion.key, str(person.uuid)) self.assertIsNone(async_deletion.delete_verified_at) + @freeze_time("2021-08-25T22:09:14.252Z") + def test_bulk_delete_ids(self): + person = _create_person( + team=self.team, + distinct_ids=["person_1", "anonymous_id"], + properties={"$os": "Chrome"}, + immediate=True, + ) + person2 = _create_person( + team=self.team, + distinct_ids=["person_2", "anonymous_id_2"], + properties={"$os": "Chrome"}, + immediate=True, + ) + _create_event(event="test", team=self.team, distinct_id="person_1") + _create_event(event="test", team=self.team, distinct_id="anonymous_id") + _create_event(event="test", team=self.team, distinct_id="someone_else") + + response = self.client.post( + f"/api/person/bulk_delete/", {"ids": [person.uuid, person2.uuid], "delete_events": True} + ) + + self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED, response.content) + self.assertEqual(response.content, b"") # Empty response + self.assertEqual(Person.objects.filter(team=self.team).count(), 0) + + response = self.client.delete(f"/api/person/{person.uuid}/") + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + + ch_persons = sync_execute( + "SELECT version, is_deleted, properties FROM person FINAL WHERE team_id = %(team_id)s and id = %(uuid)s", + {"team_id": self.team.pk, "uuid": person.uuid}, + ) + self.assertEqual([(100, 1, "{}")], ch_persons) + + # async deletion scheduled and executed + async_deletion = cast(AsyncDeletion, AsyncDeletion.objects.filter(team_id=self.team.id).first()) + self.assertEqual(async_deletion.deletion_type, DeletionType.Person) + self.assertEqual(async_deletion.key, str(person.uuid)) + self.assertIsNone(async_deletion.delete_verified_at) + + @freeze_time("2021-08-25T22:09:14.252Z") + def test_bulk_delete_distinct_id(self): + person = _create_person( + team=self.team, + distinct_ids=["person_1", "anonymous_id"], + properties={"$os": "Chrome"}, + immediate=True, + ) + _create_person( + team=self.team, + distinct_ids=["person_2", "anonymous_id_2"], + properties={"$os": "Chrome"}, + immediate=True, + ) + _create_event(event="test", team=self.team, distinct_id="person_1") + _create_event(event="test", team=self.team, distinct_id="anonymous_id") + _create_event(event="test", team=self.team, distinct_id="someone_else") + + response = self.client.post(f"/api/person/bulk_delete/", {"distinct_ids": ["anonymous_id", "person_2"]}) + + self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED, response.content) + self.assertEqual(response.content, b"") # Empty response + self.assertEqual(Person.objects.filter(team=self.team).count(), 0) + + response = self.client.delete(f"/api/person/{person.uuid}/") + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + + ch_persons = sync_execute( + "SELECT version, is_deleted, properties FROM person FINAL WHERE team_id = %(team_id)s and id = %(uuid)s", + {"team_id": self.team.pk, "uuid": person.uuid}, + ) + self.assertEqual([(100, 1, "{}")], ch_persons) + # No async deletion is scheduled + self.assertEqual(AsyncDeletion.objects.filter(team_id=self.team.id).count(), 0) + ch_events = sync_execute( + "SELECT count() FROM events WHERE team_id = %(team_id)s", + {"team_id": self.team.pk}, + )[0][0] + self.assertEqual(ch_events, 3) + @freeze_time("2021-08-25T22:09:14.252Z") def test_split_people_keep_props(self) -> None: # created first diff --git a/posthog/models/person/util.py b/posthog/models/person/util.py index 0e1efa7bdb2c9..4dbf3117a1a46 100644 --- a/posthog/models/person/util.py +++ b/posthog/models/person/util.py @@ -232,9 +232,9 @@ def get_persons_by_uuids(team: Team, uuids: list[str]) -> QuerySet: def delete_person(person: Person, sync: bool = False) -> None: # This is racy https://github.com/PostHog/posthog/issues/11590 distinct_ids_to_version = _get_distinct_ids_with_version(person) - _delete_person(person.team.id, person.uuid, int(person.version or 0), person.created_at, sync) + _delete_person(person.team_id, person.uuid, int(person.version or 0), person.created_at, sync) for distinct_id, version in distinct_ids_to_version.items(): - _delete_ch_distinct_id(person.team.id, person.uuid, distinct_id, version, sync) + _delete_ch_distinct_id(person.team_id, person.uuid, distinct_id, version, sync) def _delete_person(