From 9611632b438f67a37777839831eef7a2a0496e94 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 22 Nov 2024 21:20:11 -0500 Subject: [PATCH 1/2] RF: download - do raise exception if any download fails Otherwise it is hard-to-impossible to script using "dandi download" reliably --- dandi/download.py | 13 +++++++++++-- dandi/tests/test_download.py | 9 ++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index 0973d956f..10e7ab568 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -153,6 +153,13 @@ def download( gen_ = (r for dl in downloaders for r in dl.download_generator()) + errors = [] + + def p4e(out): + if out.get("status") == "error": + errors.append(out) + return out + # TODOs: # - redo frontends similarly to how command_ls did it # - have a single loop with analysis of `rec` to either any file @@ -161,11 +168,11 @@ def download( # if format is DownloadFormat.DEBUG: for rec in gen_: - print(rec, flush=True) + print(p4e(rec), flush=True) elif format is DownloadFormat.PYOUT: with out: for rec in gen_: - out(rec) + out(p4e(rec)) else: raise AssertionError(f"Unhandled DownloadFormat member: {format!r}") @@ -191,6 +198,8 @@ def download( break else: break + if errors: + raise RuntimeError(f"Encountered {len(errors)} errors while downloading.") @dataclass diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index 61d3a9e65..6e89d0dec 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -1,6 +1,7 @@ from __future__ import annotations from collections.abc import Callable +from contextlib import nullcontext from glob import glob import json import logging @@ -70,8 +71,9 @@ def test_download_000027( with pytest.raises(FileExistsError): download(url, tmp_path, format=DownloadFormat.DEBUG) assert "FileExistsError" not in capsys.readouterr().out - # but no exception is raised, and rather it gets output to pyout otherwise - download(url, tmp_path) + # Generic error should be raised if we are using pyout/parallelization + with pytest.raises(RuntimeError): + download(url, tmp_path) assert "FileExistsError" in capsys.readouterr().out # TODO: somehow get that status report about what was downloaded and what not @@ -147,7 +149,8 @@ def test_download_000027_resume( with (dldir / "checksum").open("w") as fp: json.dump(digests, fp) - download(url, tmp_path, get_metadata=False) + with pytest.raises(RuntimeError) if break_download else nullcontext(): + download(url, tmp_path, get_metadata=False) assert list_paths(dsdir, dirs=True) == [ dsdir / "sub-RAT123", dsdir / "sub-RAT123" / "sub-RAT123.nwb", From 9b3f2fb9691ae413108638397a27ec449ff2a71a Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 2 Dec 2024 12:14:01 -0500 Subject: [PATCH 2/2] BF: we need to also wrap callbacks to capture errors in their returns --- dandi/download.py | 20 ++++++++++++++++++-- dandi/tests/test_download.py | 3 ++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index 10e7ab568..235381420 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -7,6 +7,7 @@ from enum import Enum from functools import partial import hashlib +import inspect import json import os import os.path as op @@ -153,11 +154,24 @@ def download( gen_ = (r for dl in downloaders for r in dl.download_generator()) + # Constructs to capture errors and handle them at the end errors = [] + def p4e_gen(callback): + for v in callback: + yield p4e(v) + def p4e(out): if out.get("status") == "error": - errors.append(out) + if out not in errors: + errors.append(out) + else: + # If generator was yielded, we need to wrap it also with + # our handling + for k, v in out.items(): + if inspect.isgenerator(v): + rec[k] = p4e_gen(v) + return out # TODOs: @@ -199,7 +213,9 @@ def p4e(out): else: break if errors: - raise RuntimeError(f"Encountered {len(errors)} errors while downloading.") + raise RuntimeError( + f"Encountered {pluralize(len(errors), 'error')} while downloading." + ) @dataclass diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index 6e89d0dec..a06d94aec 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -72,9 +72,10 @@ def test_download_000027( download(url, tmp_path, format=DownloadFormat.DEBUG) assert "FileExistsError" not in capsys.readouterr().out # Generic error should be raised if we are using pyout/parallelization - with pytest.raises(RuntimeError): + with pytest.raises(RuntimeError) as exc: download(url, tmp_path) assert "FileExistsError" in capsys.readouterr().out + assert "Encountered 1 error while downloading" in str(exc) # TODO: somehow get that status report about what was downloaded and what not download(url, tmp_path, existing=DownloadExisting.SKIP) # TODO: check that skipped