-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: add command to delete persons with no distinct id's (#25657)
- Loading branch information
1 parent
2599db1
commit 1f305bb
Showing
1 changed file
with
70 additions
and
0 deletions.
There are no files selected for viewing
70 changes: 70 additions & 0 deletions
70
posthog/management/commands/delete_persons_with_no_distinct_ids.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
from django.core.management.base import BaseCommand, CommandError | ||
from django.db import connection | ||
|
||
|
||
class Command(BaseCommand): | ||
help = "Delete person rows that have no associated persondistinctid rows, by team" | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument("--team-id", default=None, type=int, help="Team ID to migrate from (on this instance)") | ||
parser.add_argument("--dry-run", action="store_false", help="Dry run (default: true)") | ||
|
||
def handle(self, **options): | ||
team_id = options["team_id"] | ||
dry_run = options["dry_run"] | ||
|
||
if not team_id: | ||
raise CommandError("source Team ID is required") | ||
|
||
print("Deleting persons with no distinct ids for team", team_id) # noqa: T201 | ||
|
||
if dry_run: | ||
delete_persons_without_distinct_ids_raw_sql_dry_run(team_id) | ||
else: | ||
delete_persons_without_distinct_ids_raw_sql(team_id) | ||
|
||
|
||
def delete_persons_without_distinct_ids_raw_sql(team_id): | ||
with connection.cursor() as cursor: | ||
cursor.execute( | ||
""" | ||
WITH persons_to_delete AS ( | ||
SELECT p.id | ||
FROM posthog_person p | ||
LEFT JOIN posthog_persondistinctid pd ON p.id = pd.person_id AND p.team_id = pd.team_id | ||
WHERE p.team_id = %s AND pd.id IS NULL | ||
) | ||
DELETE FROM posthog_person | ||
WHERE id IN (SELECT id FROM persons_to_delete) | ||
RETURNING id; | ||
""", | ||
[team_id], | ||
) | ||
|
||
deleted_ids = cursor.fetchall() | ||
deleted_count = len(deleted_ids) | ||
|
||
print(f"Deleted {deleted_count} Person objects with no PersonDistinctIds for team {team_id}.") # noqa: T201 | ||
return deleted_count | ||
|
||
|
||
def delete_persons_without_distinct_ids_raw_sql_dry_run(team_id): | ||
with connection.cursor() as cursor: | ||
cursor.execute( | ||
""" | ||
WITH persons_to_delete AS ( | ||
SELECT p.id | ||
FROM posthog_person p | ||
LEFT JOIN posthog_persondistinctid pd ON p.id = pd.person_id AND p.team_id = pd.team_id | ||
WHERE p.team_id = %s AND pd.id IS NULL | ||
) | ||
SELECT COUNT(*) FROM persons_to_delete; | ||
""", | ||
[team_id], | ||
) | ||
|
||
deleted_count = cursor.fetchone() | ||
deleted_count = deleted_count[0] if deleted_count else 0 | ||
|
||
print(f"Would have deleted {deleted_count} Person objects with no PersonDistinctIds for team {team_id}.") # noqa: T201 | ||
return deleted_count |