Skip to content

Commit

Permalink
Debug failures
Browse files Browse the repository at this point in the history
  • Loading branch information
orf committed Oct 20, 2024
1 parent 9d2473c commit 5c9be60
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
3 changes: 2 additions & 1 deletion src/pypi_data/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ def merge_datasets(
repos = Repos.model_validate_json(fd.read()).root
max_buffer_size = pydantic.RootModel[ByteSize].model_validate(max_buffer_size).root
target_size = pydantic.RootModel[ByteSize].model_validate(target_size).root
asyncio.run(combine_parquet(repos, output, max_buffer_size, target_size))
# Debug failures...
asyncio.run(combine_parquet(repos[200:], output, max_buffer_size, target_size))


async def resolve_dataset_redirects(
Expand Down
8 changes: 6 additions & 2 deletions src/pypi_data/combine_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ async def fill_buffer(
max_buffer_size: int,
client: httpx.AsyncClient,
repositories: Deque[CodeRepository],
directory: Path,
) -> bool:
while repositories:
time_hashing_ns = 0
Expand Down Expand Up @@ -139,6 +140,7 @@ async def fill_buffer(
f"iter={time_iterating_ns // 1_000_000} ms "
f"hash={time_hashing_ns // 1_000_000} ms"
)
log_system_stats(directory)

return bool(buffer)

Expand Down Expand Up @@ -175,7 +177,9 @@ async def combine_parquet(
async with httpx.AsyncClient(follow_redirects=True) as client:
while repositories:
if (
await fill_buffer(buffer, max_buffer_size, client, repositories)
await fill_buffer(
buffer, max_buffer_size, client, repositories, directory
)
is False
):
continue
Expand All @@ -200,7 +204,7 @@ async def combine_parquet(
while buffer or repositories:
if not buffer:
res = await fill_buffer(
buffer, max_buffer_size, client, repositories
buffer, max_buffer_size, client, repositories, directory
)
if res is None:
continue
Expand Down

0 comments on commit 5c9be60

Please sign in to comment.