From e538d46a052d67b3f729a08c0645a804ef6c6fd8 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 28 Nov 2023 14:50:01 -0500 Subject: [PATCH] Set 30-second connect & read timeout when downloading files --- dandi/consts.py | 2 ++ dandi/dandiapi.py | 9 +++++++-- dandi/download.py | 15 +++++++++++---- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/dandi/consts.py b/dandi/consts.py index 7a0a8522e..14eceab4f 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -201,3 +201,5 @@ def urls(self) -> Iterator[str]: } REQUEST_RETRIES = 12 + +DOWNLOAD_TIMEOUT = 30 diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 2f644fd22..7e9ab1178 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -24,6 +24,7 @@ from . import get_logger from .consts import ( + DOWNLOAD_TIMEOUT, DRAFT, MAX_CHUNK_SIZE, REQUEST_RETRIES, @@ -1472,7 +1473,9 @@ def downloader(start_at: int = 0) -> Iterator[bytes]: headers = None if start_at > 0: headers = {"Range": f"bytes={start_at}-"} - result = self.client.session.get(url, stream=True, headers=headers) + result = self.client.session.get( + url, stream=True, headers=headers, timeout=DOWNLOAD_TIMEOUT + ) # TODO: apparently we might need retries here as well etc # if result.status_code not in (200, 201): result.raise_for_status() @@ -1902,7 +1905,9 @@ def downloader(start_at: int = 0) -> Iterator[bytes]: headers = None if start_at > 0: headers = {"Range": f"bytes={start_at}-"} - result = self.client.session.get(url, stream=True, headers=headers) + result = self.client.session.get( + url, stream=True, headers=headers, timeout=DOWNLOAD_TIMEOUT + ) # TODO: apparently we might need retries here as well etc # if result.status_code not in (200, 201): result.raise_for_status() diff --git a/dandi/download.py b/dandi/download.py index 228b3acd7..7049a4ae9 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -702,9 +702,16 @@ def _download_file( yield out dldir.append(block) break - except requests.exceptions.HTTPError as exc: - # TODO: actually we should probably retry only on selected codes, and also - # respect Retry-After + except ValueError: + # When `requests` raises a ValueError, it's because the caller + # provided invalid parameters (e.g., an invalid URL), and so + # retrying won't change anything. + raise + # Catching RequestException lets us retry on timeout & connection + # errors (among others) in addition to HTTP status errors. + except requests.RequestException as exc: + # TODO: actually we should probably retry only on selected codes, + # and also respect Retry-After if attempt >= 2 or ( exc.response is not None and exc.response.status_code @@ -721,7 +728,7 @@ def _download_file( # raise # sleep a little and retry lgr.debug( - "Failed to download on attempt#%d: %s, will sleep a bit and retry", + "Failed to download on attempt #%d: %s, will sleep a bit and retry", attempt, exc, )