From 3e370a08e9c92a507a1891c4234ce6e2404d29cf Mon Sep 17 00:00:00 2001 From: Tom Forbes Date: Sun, 20 Oct 2024 10:50:35 +0100 Subject: [PATCH] try this --- src/pypi_data/combine_parquet.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pypi_data/combine_parquet.py b/src/pypi_data/combine_parquet.py index 71b35774..3e4c4f54 100644 --- a/src/pypi_data/combine_parquet.py +++ b/src/pypi_data/combine_parquet.py @@ -12,6 +12,7 @@ log = structlog.get_logger() TARGET_SIZE = 1024 * 1024 * 1024 * 1.8 # 1.8 GB +FILL_BUFFER_COUNT = 4 # Download this many datasets at once def append_buffer(writer: pq.ParquetWriter, batch: RecordBatch, roll_up_path: Path) -> bool: @@ -27,7 +28,7 @@ def append_buffer(writer: pq.ParquetWriter, batch: RecordBatch, roll_up_path: Pa async def fill_buffer(buffer: list[tuple[tuple[int, str], RecordBatch]], client: httpx.AsyncClient, repositories: list[CodeRepository], path: Path) -> bool: - for _ in range(4): + for _ in range(FILL_BUFFER_COUNT): if not repositories: break repo = repositories.pop(0)