From b32b0140512ef523e5122653d195261feb234fee Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Thu, 2 Jan 2025 17:52:50 +0100 Subject: [PATCH] fix(data-warehouse): Clear the delta lake table cache after a pipeline has run (#27215) --- .../temporal/data_imports/pipelines/pipeline/pipeline.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/posthog/temporal/data_imports/pipelines/pipeline/pipeline.py b/posthog/temporal/data_imports/pipelines/pipeline/pipeline.py index 4aa82b5eea3f3..68a9f715631a2 100644 --- a/posthog/temporal/data_imports/pipelines/pipeline/pipeline.py +++ b/posthog/temporal/data_imports/pipelines/pipeline/pipeline.py @@ -100,8 +100,14 @@ def run(self): self._post_run_operations(row_count=row_count) finally: # Help reduce the memory footprint of each job + delta_table = self._delta_table_helper.get_delta_table() + self._delta_table_helper.get_delta_table.cache_clear() + if delta_table: + del delta_table + del self._resource del self._delta_table_helper + if "buffer" in locals() and buffer is not None: del buffer if "py_table" in locals() and py_table is not None: