Skip to content

Commit

Permalink
feat: Add bulk deletion endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
timgl committed Sep 5, 2024
1 parent acda90e commit bf66822
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 0 deletions.
67 changes: 67 additions & 0 deletions posthog/api/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,9 @@ def list(self, request: request.Request, *args: Any, **kwargs: Any) -> response.
],
)
def destroy(self, request: request.Request, pk=None, **kwargs):
"""
Use this endpoint to delete individual persons. If you wnat to bulk delete, use the bulk_delete endpoint.
"""
try:
person = self.get_object()
person_id = person.id
Expand Down Expand Up @@ -391,6 +394,70 @@ def destroy(self, request: request.Request, pk=None, **kwargs):
except Person.DoesNotExist:
raise NotFound(detail="Person not found.")

@extend_schema(
parameters=[
OpenApiParameter(
"delete_events",
OpenApiTypes.BOOL,
description="If true, a task to delete all events associated with this person will be created and queued. The task does not run immediately and instead is batched together and at 5AM UTC every Sunday (controlled by environment variable CLEAR_CLICKHOUSE_REMOVED_DATA_SCHEDULE_CRON)",
default=False,
),
OpenApiParameter(
"distinct_ids",
OpenApiTypes.OBJECT,
description="A list of distinct ids. We'll delete all persons associated with those distinct ids. The maximum amount of ids you can pass in one call is 100.",
),
OpenApiParameter(
"ids",
OpenApiTypes.OBJECT,
description="A list of PostHog person ids. We'll automatically fetch all related persons and delete those. The maximum amount of ids you can pass in one call is 100.",
),
],
)
@action(methods=["POST"], detail=False, required_scopes=["person:write"])
def bulk_delete(self, request: request.Request, pk=None, **kwargs):
"""
This endpoint allows you to bulk delete persons, either by the PostHog persons ID or by distinct IDs. You can pass through a maximum of 100 ids per call.
"""
if request.data.get("distinct_ids"):
if len(request.data["distinct_ids"]) > 100:
raise ValidationError("You can only pass 100 distinct_ids in one call")
persons = self.get_queryset().filter(persondistinctid__distinct_id__in=request.data.get("distinct_ids"))
elif request.data.get("ids"):
if len(request.data["ids"]) > 100:
raise ValidationError("You can only pass 100 ids in one call")
persons = self.get_queryset().filter(uuid__in=request.data["ids"])
else:
raise ValidationError("You need to specify either distinct_ids or ids")

for person in persons:
delete_person(person=person)
self.perform_destroy(person)
log_activity(
organization_id=self.organization.id,
team_id=self.team_id,
user=cast(User, request.user),
was_impersonated=is_impersonated_session(request),
item_id=person.id,
scope="Person",
activity="deleted",
detail=Detail(name=str(person.uuid)),
)
# Once the person is deleted, queue deletion of associated data, if that was requested
if request.data.get("delete_events"):
AsyncDeletion.objects.bulk_create(
[
AsyncDeletion(
deletion_type=DeletionType.Person,
team_id=self.team_id,
key=str(person.uuid),
created_by=cast(User, self.request.user),
)
],
ignore_conflicts=True,
)
return response.Response(status=202)

@action(methods=["GET"], detail=False, required_scopes=["person:read"])
def values(self, request: request.Request, **kwargs) -> response.Response:
key = request.GET.get("key")
Expand Down
81 changes: 81 additions & 0 deletions posthog/api/test/test_person.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,87 @@ def test_delete_person_and_events(self):
self.assertEqual(async_deletion.key, str(person.uuid))
self.assertIsNone(async_deletion.delete_verified_at)

@freeze_time("2021-08-25T22:09:14.252Z")
def test_bulk_delete_ids(self):
person = _create_person(
team=self.team,
distinct_ids=["person_1", "anonymous_id"],
properties={"$os": "Chrome"},
immediate=True,
)
person2 = _create_person(
team=self.team,
distinct_ids=["person_2", "anonymous_id_2"],
properties={"$os": "Chrome"},
immediate=True,
)
_create_event(event="test", team=self.team, distinct_id="person_1")
_create_event(event="test", team=self.team, distinct_id="anonymous_id")
_create_event(event="test", team=self.team, distinct_id="someone_else")

response = self.client.post(
f"/api/person/bulk_delete/", {"ids": [person.uuid, person2.uuid], "delete_events": True}
)

self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED, response.content)
self.assertEqual(response.content, b"") # Empty response
self.assertEqual(Person.objects.filter(team=self.team).count(), 0)

response = self.client.delete(f"/api/person/{person.uuid}/")
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)

ch_persons = sync_execute(
"SELECT version, is_deleted, properties FROM person FINAL WHERE team_id = %(team_id)s and id = %(uuid)s",
{"team_id": self.team.pk, "uuid": person.uuid},
)
self.assertEqual([(100, 1, "{}")], ch_persons)

# async deletion scheduled and executed
async_deletion = cast(AsyncDeletion, AsyncDeletion.objects.filter(team_id=self.team.id).first())
self.assertEqual(async_deletion.deletion_type, DeletionType.Person)
self.assertEqual(async_deletion.key, str(person.uuid))
self.assertIsNone(async_deletion.delete_verified_at)

@freeze_time("2021-08-25T22:09:14.252Z")
def test_bulk_delete_distinct_id(self):
person = _create_person(
team=self.team,
distinct_ids=["person_1", "anonymous_id"],
properties={"$os": "Chrome"},
immediate=True,
)
_create_person(
team=self.team,
distinct_ids=["person_2", "anonymous_id_2"],
properties={"$os": "Chrome"},
immediate=True,
)
_create_event(event="test", team=self.team, distinct_id="person_1")
_create_event(event="test", team=self.team, distinct_id="anonymous_id")
_create_event(event="test", team=self.team, distinct_id="someone_else")

response = self.client.post(f"/api/person/bulk_delete/", {"distinct_ids": ["anonymous_id", "person_2"]})

self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED, response.content)
self.assertEqual(response.content, b"") # Empty response
self.assertEqual(Person.objects.filter(team=self.team).count(), 0)

response = self.client.delete(f"/api/person/{person.uuid}/")
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)

ch_persons = sync_execute(
"SELECT version, is_deleted, properties FROM person FINAL WHERE team_id = %(team_id)s and id = %(uuid)s",
{"team_id": self.team.pk, "uuid": person.uuid},
)
self.assertEqual([(100, 1, "{}")], ch_persons)
# No async deletion is scheduled
self.assertEqual(AsyncDeletion.objects.filter(team_id=self.team.id).count(), 0)
ch_events = sync_execute(
"SELECT count() FROM events WHERE team_id = %(team_id)s",
{"team_id": self.team.pk},
)[0][0]
self.assertEqual(ch_events, 3)

@freeze_time("2021-08-25T22:09:14.252Z")
def test_split_people_keep_props(self) -> None:
# created first
Expand Down

0 comments on commit bf66822

Please sign in to comment.