From 1286878febc7c4e08f79f03c8e45bc9104978caa Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 12 Sep 2024 19:21:52 -0400 Subject: [PATCH 01/17] Log download directory path and also either we are entering __exit__ with or without exception etc --- dandi/download.py | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index f67e95454..e1ab4b60b 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -703,6 +703,11 @@ def _download_file( downloaded = dldir.offset resuming = downloaded > 0 if size is not None and downloaded == size: + lgr.debug( + "%s - downloaded size matches target size of %d, exiting the loop", + path, + size, + ) # Exit early when downloaded == size, as making a Range # request in such a case results in a 416 error from S3. # Problems will result if `size` is None but we've already @@ -829,16 +834,22 @@ def __enter__(self) -> DownloadDirectory: ): # Pick up where we left off, writing to the end of the file lgr.debug( - "Download directory exists and has matching checksum; resuming download" + "%s - download directory exists and has matching checksum(s) %s; resuming download", + self.dirpath, + matching_algs, ) self.fp = self.writefile.open("ab") else: # Delete the file (if it even exists) and start anew if not chkpath.exists(): - lgr.debug("Starting new download in new download directory") + lgr.debug( + "%s - starting new download in new download directory", self.dirpath + ) else: lgr.debug( - "Download directory found, but digests do not match; starting new download" + "%s - download directory found, but digests do not match;" + " starting new download", + self.dirpath, ) try: self.writefile.unlink() @@ -856,6 +867,22 @@ def __exit__( exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> None: + if exc_type is not None or exc_val is not None or exc_tb is not None: + lgr.debug( + "%s - entered __exit__ with current position %d with exception: " + "%s, %s, %s", + self.dirpath, + self.fp.tell(), + exc_type, + exc_val, + exc_tb, + ) + else: + lgr.debug( + "%s - entered __exit__ with current position %d without any exception", + self.dirpath, + self.fp.tell(), + ) assert self.fp is not None self.fp.close() try: @@ -863,6 +890,10 @@ def __exit__( try: self.writefile.replace(self.filepath) except IsADirectoryError: + lgr.debug( + "Destination path %s is a directory; removing it and retrying", + self.filepath, + ) rmtree(self.filepath) self.writefile.replace(self.filepath) finally: From 79f0f48c484a26ce5b1a0591b0eba699cc56e7ec Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 12 Sep 2024 19:27:54 -0400 Subject: [PATCH 02/17] Fix typing check - move assert before use of fp.tell() + shorten the message while at it --- dandi/download.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index e1ab4b60b..e14a7353a 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -867,10 +867,10 @@ def __exit__( exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> None: + assert self.fp is not None if exc_type is not None or exc_val is not None or exc_tb is not None: lgr.debug( - "%s - entered __exit__ with current position %d with exception: " - "%s, %s, %s", + "%s - entered __exit__ with position %d with exception: " "%s, %s, %s", self.dirpath, self.fp.tell(), exc_type, @@ -879,11 +879,10 @@ def __exit__( ) else: lgr.debug( - "%s - entered __exit__ with current position %d without any exception", + "%s - entered __exit__ with position %d without any exception", self.dirpath, self.fp.tell(), ) - assert self.fp is not None self.fp.close() try: if exc_type is None: From d04335992a704613554a4d0623bb058ea2a676eb Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 13 Sep 2024 10:54:42 -0400 Subject: [PATCH 03/17] Add unit-testing for DownloadDirectory to ensure expected operation Also shortened the log line to not include traceback --- dandi/download.py | 3 +- dandi/tests/test_download.py | 82 ++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index e14a7353a..f1df091f8 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -870,12 +870,11 @@ def __exit__( assert self.fp is not None if exc_type is not None or exc_val is not None or exc_tb is not None: lgr.debug( - "%s - entered __exit__ with position %d with exception: " "%s, %s, %s", + "%s - entered __exit__ with position %d with exception: %s, %s", self.dirpath, self.fp.tell(), exc_type, exc_val, - exc_tb, ) else: lgr.debug( diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index cd74c1609..19f36bd15 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -1,7 +1,10 @@ from __future__ import annotations from collections.abc import Callable +from glob import glob import json +import logging +from multiprocessing import Manager, Process import os import os.path from pathlib import Path @@ -21,6 +24,7 @@ from ..consts import DRAFT, dandiset_metadata_file from ..dandiarchive import DandisetURL from ..download import ( + DownloadDirectory, Downloader, DownloadExisting, DownloadFormat, @@ -1038,3 +1042,81 @@ def test_pyouthelper_time_remaining_1339(): assert len(done) == 2 else: assert done[-1] == f"ETA: {10 - i} seconds<" + + +def test_DownloadDirectory_basic(tmp_path: Path) -> None: + with DownloadDirectory(tmp_path, digests={}) as dl: + assert dl.dirpath.exists() + assert dl.writefile.exists() + assert dl.writefile.stat().st_size == 0 + assert dl.offset == 0 + + dl.append(b"123") + assert dl.fp is not None + dl.fp.flush() # appends are not flushed automatically + assert dl.writefile.stat().st_size == 3 + assert dl.offset == 0 # doesn't change + + dl.append(b"456") + # but after we are done - should be a full file! + assert tmp_path.stat().st_size == 6 + assert tmp_path.read_bytes() == b"123456" + + # no problem with overwriting with new content + with DownloadDirectory(tmp_path, digests={}) as dl: + dl.append(b"789") + assert tmp_path.read_bytes() == b"789" + + # even if path is a directory which we "overwrite" + tmp_path.unlink() + tmp_path.mkdir() + (tmp_path / "somedata.dat").write_text("content") + with DownloadDirectory(tmp_path, digests={}) as dl: + assert set(glob(f"{tmp_path}*")) == {str(tmp_path), str(dl.dirpath)} + dl.append(b"123") + assert tmp_path.read_bytes() == b"123" + + # no temp .dandidownload folder is left behind + assert set(glob(f"{tmp_path}*")) == {str(tmp_path)} + + # test locking + def subproc(path, results): + try: + with DownloadDirectory(path, digests={}): + results.append("re-entered fine") + except Exception as exc: + results.append(str(exc)) + + with Manager() as manager: + results = manager.list() + with DownloadDirectory(tmp_path, digests={}) as dl: + dl.append(b"123") + p1 = Process(target=subproc, args=(tmp_path, results)) + p1.start() + p1.join() + assert len(results) == 1 + assert results[0] == f"Could not acquire download lock for {tmp_path}" + assert tmp_path.read_bytes() == b"123" + + +def test_DownloadDirectory_exc( + tmp_path: Path, caplog: pytest.LogCaptureFixture +) -> None: + caplog.set_level(logging.DEBUG, logger="dandi") + # and now let's exit with exception + with pytest.raises(RuntimeError): + with DownloadDirectory(tmp_path, digests={}) as dl: + dl.append(b"456") + raise RuntimeError("Boom") + assert ( + "dandi", + 10, + f"{dl.dirpath} - entered __exit__ with position 3 with exception: " + ", Boom", + ) == caplog.record_tuples[-1] + # and we left without cleanup but closed things up after ourselves + assert tmp_path.exists() + assert tmp_path.is_dir() + assert dl.dirpath.exists() + assert dl.fp is None + assert dl.writefile.read_bytes() == b"456" From 15fde440874880513797628a21254f9e9506df76 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 13 Sep 2024 11:39:18 -0400 Subject: [PATCH 04/17] Add a check that DownloadDirectory moves the file instead of copying (via inode matching) --- dandi/tests/test_download.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index 19f36bd15..200b3e344 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -1058,9 +1058,14 @@ def test_DownloadDirectory_basic(tmp_path: Path) -> None: assert dl.offset == 0 # doesn't change dl.append(b"456") + inode_number = dl.writefile.stat().st_ino + assert inode_number != tmp_path.stat().st_ino + # but after we are done - should be a full file! assert tmp_path.stat().st_size == 6 assert tmp_path.read_bytes() == b"123456" + # we moved the file, didn't copy (expensive) + assert inode_number == tmp_path.stat().st_ino # no problem with overwriting with new content with DownloadDirectory(tmp_path, digests={}) as dl: From f436b0cb85460c2798f4884d63977d11e62f187e Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 13 Sep 2024 11:47:32 -0400 Subject: [PATCH 05/17] Move helper function to module level so it could be pickled not sure why was not failing for me locally but fails on CI --- dandi/tests/test_download.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index 200b3e344..5d1d5f45f 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -1085,18 +1085,11 @@ def test_DownloadDirectory_basic(tmp_path: Path) -> None: assert set(glob(f"{tmp_path}*")) == {str(tmp_path)} # test locking - def subproc(path, results): - try: - with DownloadDirectory(path, digests={}): - results.append("re-entered fine") - except Exception as exc: - results.append(str(exc)) - with Manager() as manager: results = manager.list() with DownloadDirectory(tmp_path, digests={}) as dl: dl.append(b"123") - p1 = Process(target=subproc, args=(tmp_path, results)) + p1 = Process(target=_download_directory_subproc, args=(tmp_path, results)) p1.start() p1.join() assert len(results) == 1 @@ -1104,6 +1097,15 @@ def subproc(path, results): assert tmp_path.read_bytes() == b"123" +# needs to be a top-level function for pickling +def _download_directory_subproc(path, results): + try: + with DownloadDirectory(path, digests={}): + results.append("re-entered fine") + except Exception as exc: + results.append(str(exc)) + + def test_DownloadDirectory_exc( tmp_path: Path, caplog: pytest.LogCaptureFixture ) -> None: From f7fadeb74022057de25c8dc5dbb008e4a25ff59c Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 13 Sep 2024 11:48:04 -0400 Subject: [PATCH 06/17] Add handling of PermissionError on Windows for an existing directory That is my guess for what is happening in ________________________ test_DownloadDirectory_basic _________________________ dandi\tests\test_download.py:1048: in test_DownloadDirectory_basic with DownloadDirectory(tmp_path, digests={}) as dl: dandi\download.py:889: in __exit__ self.writefile.replace(self.filepath) C:\hostedtoolcache\windows\Python\3.10.11\x64\lib\pathlib.py:1247: in replace self._accessor.replace(self, target) E PermissionError: [WinError 5] Access is denied: 'C:\\Users\\runneradmin\\AppData\\Local\\Temp\\pytest-of-runneradmin\\pytest-0\\test_DownloadDirectory_basic0.dandidownload\\file' -> 'C:\\Users\\runneradmin\\AppData\\Local\\Temp\\pytest-of-runneradmin\\pytest-0\\test_DownloadDirectory_basic0' --- dandi/download.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dandi/download.py b/dandi/download.py index f1df091f8..fbb0a79b1 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -13,6 +13,7 @@ from pathlib import Path import random from shutil import rmtree +import sys from threading import Lock import time from types import TracebackType @@ -887,7 +888,12 @@ def __exit__( if exc_type is None: try: self.writefile.replace(self.filepath) - except IsADirectoryError: + except (IsADirectoryError, PermissionError) as exc: + if isinstance(exc, PermissionError): + if not ( + sys.platform.startswith("win") and self.filepath.is_dir() + ): + raise lgr.debug( "Destination path %s is a directory; removing it and retrying", self.filepath, From 028060e667b89adb64bbd31313581b5dba8cf918 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 16 Sep 2024 19:32:22 -0400 Subject: [PATCH 07/17] Log details on how much was actually downloaded and in how many chunks --- dandi/dandiapi.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 5ea504de7..88951a2cd 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -1592,10 +1592,23 @@ def downloader(start_at: int = 0) -> Iterator[bytes]: # TODO: apparently we might need retries here as well etc # if result.status_code not in (200, 201): result.raise_for_status() + nbytes, nchunks = 0, 0 for chunk in result.iter_content(chunk_size=chunk_size): + nchunks += 1 if chunk: # could be some "keep alive"? + nbytes += len(chunk) yield chunk - lgr.info("Asset %s successfully downloaded", self.identifier) + else: + lgr.debug("'Empty' chunk downloaded for %s", url) + lgr.info( + "Asset %s (%d bytes in %d chunks starting from %d) successfully " + "downloaded from %s", + self.identifier, + nbytes, + nchunks, + start_at, + url, + ) return downloader From d3ce763f3d19812e7daab41344a40982d37695f1 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 17 Sep 2024 16:44:46 -0400 Subject: [PATCH 08/17] Make message more "factual" Very unlikely but it could be that directory already existed but without checksum file for some reason. --- dandi/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/download.py b/dandi/download.py index fbb0a79b1..73be08138 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -844,7 +844,7 @@ def __enter__(self) -> DownloadDirectory: # Delete the file (if it even exists) and start anew if not chkpath.exists(): lgr.debug( - "%s - starting new download in new download directory", self.dirpath + "%s - no prior digests found; starting new download", self.dirpath ) else: lgr.debug( From ddcab48ad41a56ffbaf2b5fad3602412d026763c Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Sep 2024 14:07:09 -0400 Subject: [PATCH 09/17] Log also versions of requests and urllib3 --- dandi/cli/command.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dandi/cli/command.py b/dandi/cli/command.py index 27f104af0..c49542fe1 100644 --- a/dandi/cli/command.py +++ b/dandi/cli/command.py @@ -119,9 +119,11 @@ def main(ctx, log_level, pdb=False): "dandi v%s, dandischema v%s, hdmf v%s, pynwb v%s, h5py v%s", __version__, get_module_version("dandischema"), + get_module_version("h5py"), get_module_version("hdmf"), get_module_version("pynwb"), - get_module_version("h5py"), + get_module_version("requests"), + get_module_version("urllib3"), extra={"file_only": True}, ) lgr.info("sys.argv = %r", sys.argv, extra={"file_only": True}) From fb2e782d65b7791d279fe5ab7be5f20cd568e2d3 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Sep 2024 14:12:38 -0400 Subject: [PATCH 10/17] RF+ENH: also report requests and urllib3 versions, do not prefix with v no v prefix since is not providing any information on top; sorting for deterministic order --- dandi/cli/command.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/dandi/cli/command.py b/dandi/cli/command.py index c49542fe1..aa1871c45 100644 --- a/dandi/cli/command.py +++ b/dandi/cli/command.py @@ -114,16 +114,19 @@ def main(ctx, log_level, pdb=False): lambda r: r.name != "pyout" and not r.name.startswith("pyout.") ) root.addHandler(handler) + exts = ( + "dandischema", + "h5py", + "hdmf", + "pynwb", + "requests", + "urllib3", + ) lgr.info( - "dandi v%s, dandischema v%s, hdmf v%s, pynwb v%s, h5py v%s", + "dandi %s, " + + ", ".join("%s %s" % (e, get_module_version(e)) for e in sorted(exts)), __version__, - get_module_version("dandischema"), - get_module_version("h5py"), - get_module_version("hdmf"), - get_module_version("pynwb"), - get_module_version("requests"), - get_module_version("urllib3"), extra={"file_only": True}, ) lgr.info("sys.argv = %r", sys.argv, extra={"file_only": True}) From 5a78454889c13bb6c6502bcffbcec892e9b730bc Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Sep 2024 14:14:59 -0400 Subject: [PATCH 11/17] Also report python version --- dandi/cli/command.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dandi/cli/command.py b/dandi/cli/command.py index aa1871c45..37c4818fc 100644 --- a/dandi/cli/command.py +++ b/dandi/cli/command.py @@ -124,8 +124,9 @@ def main(ctx, log_level, pdb=False): ) lgr.info( - "dandi %s, " + "python %s, dandi %s, " + ", ".join("%s %s" % (e, get_module_version(e)) for e in sorted(exts)), + sys.version.split()[0], __version__, extra={"file_only": True}, ) From a4db7bc04bd83a9e248b5e3c70e9605a7d807898 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Sep 2024 14:28:26 -0400 Subject: [PATCH 12/17] Demand urllib3 to be no less than 2.0.0 in which enforce_content_length was set to default to True --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.cfg b/setup.cfg index 28ca0318e..37b90e492 100644 --- a/setup.cfg +++ b/setup.cfg @@ -55,6 +55,8 @@ install_requires = ruamel.yaml >=0.15, <1 semantic-version tenacity + # possibly silently incomplete downloads: https://github.com/dandi/dandi-cli/issues/1500 + urllib3 >= 2.0.0 yarl ~= 1.9 zarr ~= 2.10 zarr_checksum ~= 0.4.0 From f52a3634bc474965677f4d829408c19dc3cbfa33 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 20 Sep 2024 14:57:56 -0400 Subject: [PATCH 13/17] ENH: respect/log separately Retry-After + support DANDI_DEVEL_AGGRESSIVE_RETRY mode This is all to address that odd case with 000026 where connection keeps interrupting. Unclear why so adding more specific cases handling and allowing for such an aggressive retrying where we would proceed as long as we are getting something (but sleep would also increase) --- dandi/download.py | 88 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 15 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index 73be08138..2295c017f 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -693,7 +693,10 @@ def _download_file( # TODO: how do we discover the total size???? # TODO: do not do it in-place, but rather into some "hidden" file resuming = False - for attempt in range(3): + attempt = 0 + nattempts = 3 # number to do, could be incremented if we downloaded a little + while attempt <= nattempts: + attempt += 1 try: if digester: downloaded_digest = digester() # start empty @@ -701,6 +704,7 @@ def _download_file( # I wonder if we could make writing async with downloader with DownloadDirectory(path, digests or {}) as dldir: assert dldir.offset is not None + downloaded_in_attempt = 0 downloaded = dldir.offset resuming = downloaded > 0 if size is not None and downloaded == size: @@ -719,6 +723,7 @@ def _download_file( assert downloaded_digest is not None downloaded_digest.update(block) downloaded += len(block) + downloaded_in_attempt += len(block) # TODO: yield progress etc out: dict[str, Any] = {"done": downloaded} if size: @@ -744,30 +749,83 @@ def _download_file( # Catching RequestException lets us retry on timeout & connection # errors (among others) in addition to HTTP status errors. except requests.RequestException as exc: + sleep_amount = random.random() * 5 * attempt + if os.environ.get("DANDI_DEVEL_AGGRESSIVE_RETRY"): + # in such a case if we downloaded a little more -- + # consider it a successful attempt + if downloaded_in_attempt > 0: + lgr.debug( + "%s - download failed on attempt #%d: %s, " + "but did download %d bytes, so considering " + "it a success and incrementing number of allowed attempts.", + path, + attempt, + exc, + downloaded_in_attempt, + ) + nattempts += 1 # TODO: actually we should probably retry only on selected codes, - # and also respect Retry-After - if attempt >= 2 or ( - exc.response is not None - and exc.response.status_code - not in ( + if exc.response is not None: + if exc.response.status_code not in ( 400, # Bad Request, but happened with gider: # https://github.com/dandi/dandi-cli/issues/87 *RETRY_STATUSES, + ): + lgr.debug( + "%s - download failed due to response %d: %s", + path, + exc.response.status_code, + exc, + ) + yield {"status": "error", "message": str(exc)} + return + elif retry_after := exc.response.headers.get("Retry-After"): + # playing safe + if not str(retry_after).isdigit(): + # our code is wrong, do not crash but issue warning so + # we might get report/fix it up + lgr.warning( + "%s - download failed due to response %d with non-integer" + " Retry-After=%r: %s", + path, + exc.response.status_code, + retry_after, + exc, + ) + yield {"status": "error", "message": str(exc)} + return + sleep_amount = int(retry_after) + lgr.debug( + "%s - download failed due to response %d with " + "Retry-After=%d: %s, will sleep and retry", + path, + exc.response.status_code, + sleep_amount, + exc, + ) + else: + lgr.debug("%s - download failed: %s", path, exc) + yield {"status": "error", "message": str(exc)} + return + elif attempt >= nattempts: + lgr.debug( + "%s - download failed after %d attempts: %s", path, attempt, exc ) - ): - lgr.debug("%s - download failed: %s", path, exc) yield {"status": "error", "message": str(exc)} return # if is_access_denied(exc) or attempt >= 2: # raise # sleep a little and retry - lgr.debug( - "%s - failed to download on attempt #%d: %s, will sleep a bit and retry", - path, - attempt, - exc, - ) - time.sleep(random.random() * 5) + else: + lgr.debug( + "%s - download failed on attempt #%d: %s, will sleep a bit and retry", + path, + attempt, + exc, + ) + time.sleep(sleep_amount) + else: + lgr.warning("downloader logic: We should not be here!") if downloaded_digest and not resuming: assert downloaded_digest is not None From a9226f0d672ef722fdb5cc9f8e1e8ea71f17ff0e Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 24 Oct 2024 16:29:08 -0400 Subject: [PATCH 14/17] Rename and document that DANDI_DOWNLOAD_AGGRESSIVE_RETRY "option" --- DEVELOPMENT.md | 4 ++++ dandi/download.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 293586ae3..80bdd3d0c 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -85,6 +85,10 @@ development command line options. function will patch `requests` to log the results of calls to `requests.utils.super_len()` +- `DANDI_DOWNLOAD_AGGRESSIVE_RETRY` -- When set, would make `download()` retry + very aggressively - it would keep trying if at least some bytes are downloaded + on each attempt. Typically is not needed and could be a sign of network issues. + ## Sourcegraph The [Sourcegraph](https://sourcegraph.com) browser extension can be used to diff --git a/dandi/download.py b/dandi/download.py index 2295c017f..249732e1b 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -750,7 +750,7 @@ def _download_file( # errors (among others) in addition to HTTP status errors. except requests.RequestException as exc: sleep_amount = random.random() * 5 * attempt - if os.environ.get("DANDI_DEVEL_AGGRESSIVE_RETRY"): + if os.environ.get("DANDI_DOWNLOAD_AGGRESSIVE_RETRY"): # in such a case if we downloaded a little more -- # consider it a successful attempt if downloaded_in_attempt > 0: From ad069c8d01ceaa9d8ed446ce07eeb4bdc8e17ae6 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 12 Nov 2024 10:29:41 -0500 Subject: [PATCH 15/17] State minimal version of duecredit to be 0.6.0 pip fails to deal with metadata of 0.5.0 due to missing __version__ --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 37b90e492..d51ac6fd2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -78,7 +78,7 @@ include = dandi* extensions = allensdk extras = - duecredit + duecredit >= 0.6.0 fsspec[http] style = flake8 From 08450edb987e46de63cb627235c3082331584bd2 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 14 Nov 2024 17:04:10 -0500 Subject: [PATCH 16/17] Use vcrpy only for python >= 3.10 See more info at - https://github.com/urllib3/urllib3/issues/3017 - https://github.com/kevin1024/vcrpy/issues/688 --- dandi/cli/tests/test_service_scripts.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dandi/cli/tests/test_service_scripts.py b/dandi/cli/tests/test_service_scripts.py index 790d298a3..e1f3a85d3 100644 --- a/dandi/cli/tests/test_service_scripts.py +++ b/dandi/cli/tests/test_service_scripts.py @@ -11,7 +11,6 @@ from click.testing import CliRunner from dandischema.consts import DANDI_SCHEMA_VERSION import pytest -import vcr from dandi import __version__ from dandi.tests.fixtures import SampleDandiset @@ -76,9 +75,14 @@ def test_update_dandiset_from_doi( dandiset_id = new_dandiset.dandiset_id repository = new_dandiset.api.instance.gui monkeypatch.setenv("DANDI_API_KEY", new_dandiset.api.api_key) - if os.environ.get("DANDI_TESTS_NO_VCR", ""): + if os.environ.get("DANDI_TESTS_NO_VCR", "") or sys.version_info <= (3, 10): + # Older vcrpy has an issue with Python 3.9 and newer urllib2 >= 2 + # But we require newer urllib2 for more correct operation, and + # do still support 3.9. Remove when 3.9 support is dropped ctx = nullcontext() else: + import vcr + ctx = vcr.use_cassette( str(DATA_DIR / "update_dandiset_from_doi" / f"{name}.vcr.yaml"), before_record_request=record_only_doi_requests, From 7f9739457c684b915faaba4333e632791e76c96f Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 14 Nov 2024 17:52:51 -0500 Subject: [PATCH 17/17] fix(workaround,test): completely skip running test_DownloadDirectory_basic on windows Somehow that causes "indigestion" to pytest process later in its life cycle --- dandi/tests/test_download.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index 5d1d5f45f..a4ad631c5 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -1044,6 +1044,7 @@ def test_pyouthelper_time_remaining_1339(): assert done[-1] == f"ETA: {10 - i} seconds<" +@mark.skipif_on_windows # https://github.com/pytest-dev/pytest/issues/12964 def test_DownloadDirectory_basic(tmp_path: Path) -> None: with DownloadDirectory(tmp_path, digests={}) as dl: assert dl.dirpath.exists()