Skip to content

Commit

Permalink
try this
Browse files Browse the repository at this point in the history
  • Loading branch information
orf committed Oct 20, 2024
1 parent 4521a73 commit 3e370a0
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/pypi_data/combine_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
log = structlog.get_logger()

TARGET_SIZE = 1024 * 1024 * 1024 * 1.8 # 1.8 GB
FILL_BUFFER_COUNT = 4 # Download this many datasets at once


def append_buffer(writer: pq.ParquetWriter, batch: RecordBatch, roll_up_path: Path) -> bool:
Expand All @@ -27,7 +28,7 @@ def append_buffer(writer: pq.ParquetWriter, batch: RecordBatch, roll_up_path: Pa
async def fill_buffer(buffer: list[tuple[tuple[int, str], RecordBatch]], client: httpx.AsyncClient,
repositories: list[CodeRepository],
path: Path) -> bool:
for _ in range(4):
for _ in range(FILL_BUFFER_COUNT):
if not repositories:
break
repo = repositories.pop(0)
Expand Down

0 comments on commit 3e370a0

Please sign in to comment.