Skip to content

Commit

Permalink
feat: Use our own soft deletes for event deletes
Browse files Browse the repository at this point in the history
  • Loading branch information
fuziontech committed Aug 26, 2024
1 parent d71d24d commit d070c7d
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions posthog/models/async_deletion/delete_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,13 @@ def process(self, deletions: list[AsyncDeletion]):

# Get estimated byte size of the query
str_predicate = " OR ".join(conditions)
query = f"DELETE FROM sharded_events ON CLUSTER '{CLICKHOUSE_CLUSTER}' WHERE {str_predicate}"
# We want to re-emit the events omitting everything but the important columns
# team_id, toDate(timestamp), event, cityHash64(distinct_id), cityHash64(uuid), _timestamp, is_deleted
query = f"""
INSERT INTO posthog.events (team_id, timestamp, event, distinct_id, uuid, _timestamp, is_deleted)
SELECT team_id, timestamp, event, distinct_id, uuid, now(), True
FROM posthog.events
WHERE NOT is_deleted AND {str_predicate}"""
query_size = len(query.encode("utf-8"))

logger.debug(f"Query size: {query_size}")
Expand Down Expand Up @@ -126,7 +132,7 @@ def _verify_by_column(self, distinct_columns: str, async_deletions: list[AsyncDe
f"""
SELECT DISTINCT {distinct_columns}
FROM events
WHERE {" OR ".join(conditions)}
WHERE NOT is_deleted AND {" OR ".join(conditions)}
""",
args,
settings={"max_execution_time": MAX_SELECT_EXECUTION_TIME},
Expand Down

0 comments on commit d070c7d

Please sign in to comment.