Skip to content

Commit

Permalink
dev(narugo): add lazy download mode
Browse files Browse the repository at this point in the history
  • Loading branch information
narugo1992 committed Sep 1, 2024
1 parent c6786f4 commit 6749c41
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
11 changes: 10 additions & 1 deletion hfutils/index/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def hf_tar_file_download(repo_id: str, archive_in_repo: str, file_in_archive: st
idx_repo_type: Optional[RepoTypeTyping] = None, idx_revision: Optional[str] = None,
proxies: Optional[Dict] = None, user_agent: Union[Dict, str, None] = None,
headers: Optional[Dict[str, str]] = None, endpoint: Optional[str] = None,
hf_token: Optional[str] = None):
force_download: bool = False, hf_token: Optional[str] = None):
"""
Download a file from a tar archive file in a Hugging Face repository.
Expand Down Expand Up @@ -470,6 +470,8 @@ def hf_tar_file_download(repo_id: str, archive_in_repo: str, file_in_archive: st
:type headers: Dict[str, str], optional
:param endpoint: The Hugging Face API endpoint.
:type endpoint: str, optional
:param force_download: Force download the file to destination path. Defualt to `False`.
:type force_download: bool
:param hf_token: The Hugging Face access token.
:type hf_token: str, optional
:raises FileNotFoundError: Raise this when file not exist in tar archive.
Expand Down Expand Up @@ -533,6 +535,13 @@ def hf_tar_file_download(repo_id: str, archive_in_repo: str, file_in_archive: st
end_bytes = info['offset'] + info['size'] - 1
headers['Range'] = f'bytes={start_bytes}-{end_bytes}'

if not force_download and os.path.exists(local_file) and \
os.path.isfile(local_file) and os.path.getsize(local_file) == info['size']:
_expected_sha256 = info.get('sha256')
if not _expected_sha256 or _f_sha256(local_file) == _expected_sha256:

Check warning on line 541 in hfutils/index/fetch.py

View check run for this annotation

Codecov / codecov/patch

hfutils/index/fetch.py#L540-L541

Added lines #L540 - L541 were not covered by tests
# file already ready, no need to download it again
return

Check warning on line 543 in hfutils/index/fetch.py

View check run for this annotation

Codecov / codecov/patch

hfutils/index/fetch.py#L543

Added line #L543 was not covered by tests

if os.path.dirname(local_file):
os.makedirs(os.path.dirname(local_file), exist_ok=True)
try:
Expand Down
11 changes: 10 additions & 1 deletion hfutils/index/local_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def tar_file_size(archive_file: str, file_in_archive: str, idx_file: Optional[st


def tar_file_download(archive_file: str, file_in_archive: str, local_file: str,
idx_file: Optional[str] = None, chunk_size: int = 1 << 20):
idx_file: Optional[str] = None, chunk_size: int = 1 << 20, force_download: bool = False):
"""
Extract and download a specific file from the tar archive to a local file.
Expand All @@ -190,6 +190,8 @@ def tar_file_download(archive_file: str, file_in_archive: str, local_file: str,
:type idx_file: Optional[str]
:param chunk_size: The size of chunks to read and write, in bytes. Default is 1MB.
:type chunk_size: int
:param force_download: Force download the file to destination path. Defualt to `False`.
:type force_download: bool
:raises FileNotFoundError: If the specified file is not found in the archive.
:raises ArchiveStandaloneFileIncompleteDownload: If the downloaded file size doesn't match the expected size.
Expand All @@ -212,6 +214,13 @@ def tar_file_download(archive_file: str, file_in_archive: str, local_file: str,

info = files[_n_path(file_in_archive)]

if not force_download and os.path.exists(local_file) and \
os.path.isfile(local_file) and os.path.getsize(local_file) == info['size']:
_expected_sha256 = info.get('sha256')
if not _expected_sha256 or _f_sha256(local_file) == _expected_sha256:

Check warning on line 220 in hfutils/index/local_fetch.py

View check run for this annotation

Codecov / codecov/patch

hfutils/index/local_fetch.py#L219-L220

Added lines #L219 - L220 were not covered by tests
# file already ready, no need to download it again
return

Check warning on line 222 in hfutils/index/local_fetch.py

View check run for this annotation

Codecov / codecov/patch

hfutils/index/local_fetch.py#L222

Added line #L222 was not covered by tests

if os.path.dirname(local_file):
os.makedirs(os.path.dirname(local_file), exist_ok=True)
try:
Expand Down

0 comments on commit 6749c41

Please sign in to comment.