Skip to content

Commit

Permalink
fix(persons): make cleanup command allow you to work in batches (#26455)
Browse files Browse the repository at this point in the history
  • Loading branch information
oliverb123 authored Nov 27, 2024
1 parent 0b9d5f7 commit aae7f7d
Showing 1 changed file with 15 additions and 9 deletions.
24 changes: 15 additions & 9 deletions posthog/management/commands/delete_persons_with_no_distinct_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,15 @@ class Command(BaseCommand):

def add_arguments(self, parser):
parser.add_argument("--team-id", default=None, type=int, help="Team ID to migrate from (on this instance)")
parser.add_argument("--dry_run", action="store_false", help="Dry run mode (no changes will be made)")
parser.add_argument("--dry-run", action="store_false", help="Dry run mode (no changes will be made)")
parser.add_argument(
"--max-delete", default=1000, type=int, help="Max number of rows to delete in one go (default 1000)"
)

def handle(self, **options):
team_id = options["team_id"]
dry_run = options["dry_run"]
max_delete = options["max_delete"]

if not team_id:
raise CommandError("source Team ID is required")
Expand All @@ -21,28 +25,29 @@ def handle(self, **options):

if dry_run:
print("Dry run mode enabled. No changes will be made.") # noqa: T201
delete_persons_without_distinct_ids_raw_sql_dry_run(team_id)
delete_persons_without_distinct_ids_raw_sql_dry_run(team_id, max_delete)
else:
print("This is for real. Changes will be made. Sleeping for 10 seconds") # noqa: T201
time.sleep(10)
delete_persons_without_distinct_ids_raw_sql(team_id)
delete_persons_without_distinct_ids_raw_sql(team_id, max_delete)


def delete_persons_without_distinct_ids_raw_sql(team_id):
def delete_persons_without_distinct_ids_raw_sql(team_id, max_delete):
with connection.cursor() as cursor:
cursor.execute(
"""
WITH persons_to_delete AS (
SELECT p.id
FROM posthog_person p
LEFT JOIN posthog_persondistinctid pd ON p.id = pd.person_id AND p.team_id = pd.team_id
WHERE p.team_id = %s AND pd.id IS NULL
WHERE p.team_id = %(team_id)s AND pd.id IS NULL
LIMIT %(max_delete)s
)
DELETE FROM posthog_person
WHERE id IN (SELECT id FROM persons_to_delete)
RETURNING id;
""",
[team_id],
{"team_id": team_id, "max_delete": max_delete},
)

deleted_ids = cursor.fetchall()
Expand All @@ -52,19 +57,20 @@ def delete_persons_without_distinct_ids_raw_sql(team_id):
return deleted_count


def delete_persons_without_distinct_ids_raw_sql_dry_run(team_id):
def delete_persons_without_distinct_ids_raw_sql_dry_run(team_id, max_delete):
with connection.cursor() as cursor:
cursor.execute(
"""
WITH persons_to_delete AS (
SELECT p.id
FROM posthog_person p
LEFT JOIN posthog_persondistinctid pd ON p.id = pd.person_id AND p.team_id = pd.team_id
WHERE p.team_id = %s AND pd.id IS NULL
WHERE p.team_id = %(team_id)s AND pd.id IS NULL
LIMIT %(max_delete)s
)
SELECT COUNT(*) FROM persons_to_delete;
""",
[team_id],
{"team_id": team_id, "max_delete": max_delete},
)

deleted_count = cursor.fetchone()
Expand Down

0 comments on commit aae7f7d

Please sign in to comment.