Skip to content

Commit

Permalink
Merge pull request #1369 from dandi/gh-1366
Browse files Browse the repository at this point in the history
Set 30-second connect & read timeout when downloading files
  • Loading branch information
yarikoptic authored Nov 29, 2023
2 parents eea1411 + e538d46 commit 036df85
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 6 deletions.
2 changes: 2 additions & 0 deletions dandi/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,5 @@ def urls(self) -> Iterator[str]:
}

REQUEST_RETRIES = 12

DOWNLOAD_TIMEOUT = 30
9 changes: 7 additions & 2 deletions dandi/dandiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from . import get_logger
from .consts import (
DOWNLOAD_TIMEOUT,
DRAFT,
MAX_CHUNK_SIZE,
REQUEST_RETRIES,
Expand Down Expand Up @@ -1472,7 +1473,9 @@ def downloader(start_at: int = 0) -> Iterator[bytes]:
headers = None
if start_at > 0:
headers = {"Range": f"bytes={start_at}-"}
result = self.client.session.get(url, stream=True, headers=headers)
result = self.client.session.get(
url, stream=True, headers=headers, timeout=DOWNLOAD_TIMEOUT
)
# TODO: apparently we might need retries here as well etc
# if result.status_code not in (200, 201):
result.raise_for_status()
Expand Down Expand Up @@ -1902,7 +1905,9 @@ def downloader(start_at: int = 0) -> Iterator[bytes]:
headers = None
if start_at > 0:
headers = {"Range": f"bytes={start_at}-"}
result = self.client.session.get(url, stream=True, headers=headers)
result = self.client.session.get(
url, stream=True, headers=headers, timeout=DOWNLOAD_TIMEOUT
)
# TODO: apparently we might need retries here as well etc
# if result.status_code not in (200, 201):
result.raise_for_status()
Expand Down
15 changes: 11 additions & 4 deletions dandi/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,9 +702,16 @@ def _download_file(
yield out
dldir.append(block)
break
except requests.exceptions.HTTPError as exc:
# TODO: actually we should probably retry only on selected codes, and also
# respect Retry-After
except ValueError:
# When `requests` raises a ValueError, it's because the caller
# provided invalid parameters (e.g., an invalid URL), and so
# retrying won't change anything.
raise
# Catching RequestException lets us retry on timeout & connection
# errors (among others) in addition to HTTP status errors.
except requests.RequestException as exc:
# TODO: actually we should probably retry only on selected codes,
# and also respect Retry-After
if attempt >= 2 or (
exc.response is not None
and exc.response.status_code
Expand All @@ -721,7 +728,7 @@ def _download_file(
# raise
# sleep a little and retry
lgr.debug(
"Failed to download on attempt#%d: %s, will sleep a bit and retry",
"Failed to download on attempt #%d: %s, will sleep a bit and retry",
attempt,
exc,
)
Expand Down

0 comments on commit 036df85

Please sign in to comment.