Skip to content

Commit

Permalink
fix: skip now deleted teams in backfill_personless_distinct_ids (#23415)
Browse files Browse the repository at this point in the history
  • Loading branch information
bretthoerner authored Jul 2, 2024
1 parent c62058c commit d1d0320
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions posthog/management/commands/backfill_personless_distinct_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ def batch_insert_personless_distinct_ids(data, batch_size=1000):
ON CONFLICT (team_id, distinct_id) DO NOTHING
"""

team_ids = {d[0] for d in data}
existing_team_ids = set(Team.objects.filter(id__in=team_ids).values_list("id", flat=True))
missing_team_ids = team_ids - existing_team_ids
original_len = len(data)
data = [d for d in data if d[0] in existing_team_ids]
if missing_team_ids:
logger.info(
f"Skipping team ids {missing_team_ids!r} because they no longer exist, skipping {original_len - len(data)} records"
)

def chunks(lst, n):
for i in range(0, len(lst), n):
yield lst[i : i + n]
Expand Down Expand Up @@ -69,7 +79,7 @@ def execute(self, dry_run: bool = False) -> None:
else:
distinct_ids = ch_execute(query, parameters, settings=settings)
batch_insert_personless_distinct_ids(distinct_ids)
logger.info("Completed %r!", self)
logger.info("Completed %r (%d rows)!", self, len(distinct_ids))


@dataclass
Expand Down Expand Up @@ -103,7 +113,7 @@ def execute(self, dry_run: bool = False) -> None:
else:
distinct_ids = ch_execute(query, parameters, settings=settings)
batch_insert_personless_distinct_ids(distinct_ids)
logger.info("Completed %r!", self)
logger.info("Completed %r (%d rows)!", self, len(distinct_ids))


class Command(BaseCommand):
Expand Down

0 comments on commit d1d0320

Please sign in to comment.