-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1ce0c59
commit 9326294
Showing
5 changed files
with
250 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
from .fetch import hf_tar_list_files, hf_tar_file_download, hf_tar_get_index, hf_tar_file_exists, \ | ||
ArchiveStandaloneFileIncompleteDownload, ArchiveStandaloneFileHashNotMatch, hf_tar_file_size, hf_tar_file_info | ||
from .local_fetch import tar_get_index, tar_file_info, tar_file_download, tar_file_size, tar_file_exists, tar_list_files | ||
from .make import tar_create_index, hf_tar_create_index, tar_get_index_info, hf_tar_create_from_directory | ||
from .validate import hf_tar_item_validate, hf_tar_validate |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import json | ||
import os | ||
from typing import Optional, List | ||
|
||
|
||
def tar_get_index(archive_file: str, idx_file: Optional[str] = None): | ||
body, _ = os.path.splitext(archive_file) | ||
default_index_file = f'{body}.json' | ||
with open(idx_file or default_index_file, 'r') as f: | ||
return json.load(f) | ||
|
||
|
||
def tar_list_files(archive_file: str, idx_file: Optional[str] = None) -> List[str]: | ||
index_data = tar_get_index( | ||
archive_file=archive_file, | ||
idx_file=idx_file, | ||
) | ||
return list(index_data['files'].keys()) | ||
|
||
|
||
def tar_file_exists(archive_file: str, file_in_archive: str, idx_file: Optional[str] = None) -> bool: | ||
from .fetch import _hf_files_process, _n_path | ||
index = tar_get_index( | ||
archive_file=archive_file, | ||
idx_file=idx_file, | ||
) | ||
files = _hf_files_process(index['files']) | ||
return _n_path(file_in_archive) in files | ||
|
||
|
||
def tar_file_info(archive_file: str, file_in_archive: str, idx_file: Optional[str] = None) -> dict: | ||
from .fetch import _hf_files_process, _n_path | ||
index = tar_get_index( | ||
archive_file=archive_file, | ||
idx_file=idx_file, | ||
) | ||
files = _hf_files_process(index['files']) | ||
if _n_path(file_in_archive) not in files: | ||
raise FileNotFoundError(f'File {file_in_archive!r} not found ' | ||
f'in local archive {archive_file!r}.') | ||
else: | ||
return files[_n_path(file_in_archive)] | ||
|
||
|
||
def tar_file_size(archive_file: str, file_in_archive: str, idx_file: Optional[str] = None) -> int: | ||
return tar_file_info( | ||
archive_file=archive_file, | ||
file_in_archive=file_in_archive, | ||
idx_file=idx_file, | ||
)['size'] | ||
|
||
|
||
def tar_file_download(archive_file: str, file_in_archive: str, local_file: str, | ||
idx_file: Optional[str] = None, chunk_size: int = 1 << 20): | ||
from .fetch import _hf_files_process, _n_path, _f_sha256, \ | ||
ArchiveStandaloneFileIncompleteDownload, ArchiveStandaloneFileHashNotMatch | ||
|
||
index = tar_get_index( | ||
archive_file=archive_file, | ||
idx_file=idx_file, | ||
) | ||
files = _hf_files_process(index['files']) | ||
if _n_path(file_in_archive) not in files: | ||
raise FileNotFoundError(f'File {file_in_archive!r} not found ' | ||
f'in local archive {archive_file!r}.') | ||
|
||
info = files[_n_path(file_in_archive)] | ||
|
||
if os.path.dirname(local_file): | ||
os.makedirs(os.path.dirname(local_file), exist_ok=True) | ||
try: | ||
with open(local_file, 'wb') as wf: | ||
if info['size'] > 0: | ||
with open(archive_file, 'rb') as rf: | ||
rf.seek(info['offset']) | ||
tp = info['offset'] + info['size'] | ||
while rf.tell() < tp: | ||
read_bytes = min(tp - rf.tell(), chunk_size) | ||
wf.write(rf.read(read_bytes)) | ||
|
||
if os.path.getsize(local_file) != info['size']: | ||
raise ArchiveStandaloneFileIncompleteDownload( | ||
f'Expected size is {info["size"]}, but actually {os.path.getsize(local_file)} downloaded.' | ||
) | ||
|
||
if info.get('sha256'): | ||
_sha256 = _f_sha256(local_file) | ||
if _sha256 != info['sha256']: | ||
raise ArchiveStandaloneFileHashNotMatch( | ||
f'Expected hash is {info["sha256"]!r}, but actually {_sha256!r} found.' | ||
) | ||
|
||
except Exception: | ||
if os.path.exists(local_file): | ||
os.remove(local_file) | ||
raise |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import pytest | ||
from hbutils.system import TemporaryDirectory | ||
|
||
from hfutils.operate import download_directory_as_directory | ||
|
||
|
||
@pytest.fixture(scope='module') | ||
def local_narugo_test_cos5t_tars(): | ||
with TemporaryDirectory() as td: | ||
download_directory_as_directory( | ||
repo_id='narugo/test_cos5t_tars', | ||
repo_type='dataset', | ||
local_directory=td, | ||
) | ||
yield td |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
import os.path | ||
|
||
import pytest | ||
from hbutils.testing import isolated_directory | ||
from natsort import natsorted | ||
|
||
from hfutils.index import tar_list_files, tar_file_exists, tar_file_download, tar_file_info, \ | ||
tar_file_size | ||
from test.testings import get_testfile, file_compare | ||
|
||
|
||
@pytest.mark.unittest | ||
class TestIndexLocalFetch: | ||
def test_tar_list_files(self, local_narugo_test_cos5t_tars): | ||
files = tar_list_files( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
) | ||
assert len(files) == 17 | ||
assert natsorted(files) == [ | ||
'.meta.json', 'Bright_Voyager.png', 'Grail_League_1星.png', 'Grail_League_2星.png', 'Grail_League_3星.png', | ||
'Grail_League_4星.png', 'Grail_League_5星.png', '奥特瑙斯.png', '奥特瑙斯_改建型.png', '常夏的泳装.png', | ||
'常夏的泳装Ver_02.png', '愚人节.png', '愚人节_奥特瑙斯.png', '第1阶段.png', '第2阶段.png', '第3阶段.png', | ||
'第4阶段.png' | ||
] | ||
|
||
def test_tar_file_exists(self, local_narugo_test_cos5t_tars): | ||
assert tar_file_exists( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='.meta.json' | ||
) | ||
assert tar_file_exists( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='愚人节_奥特瑙斯.png' | ||
) | ||
assert tar_file_exists( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='./愚人节_奥特瑙斯.png' | ||
) | ||
assert not tar_file_exists( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='愚人节奥特瑙斯.png' | ||
) | ||
|
||
def test_tar_file_info(self, local_narugo_test_cos5t_tars): | ||
assert tar_file_info( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='.meta.json' | ||
) == { | ||
'offset': 2725376, | ||
'sha256': '4585b01c251a496b73cb231d29fc711cfb1d682a84334d95f6f5b6c1cc5b5222', | ||
'size': 8968 | ||
} | ||
assert tar_file_info( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='愚人节_奥特瑙斯.png' | ||
) == { | ||
'offset': 3954176, | ||
'sha256': '991497fa586f6f4529827e0f8f1f228c20ec9fb507c314ee9d20d47c46f26e89', | ||
'size': 255276 | ||
} | ||
assert tar_file_info( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='./愚人节_奥特瑙斯.png' | ||
) == { | ||
'offset': 3954176, | ||
'sha256': '991497fa586f6f4529827e0f8f1f228c20ec9fb507c314ee9d20d47c46f26e89', | ||
'size': 255276 | ||
} | ||
with pytest.raises(FileNotFoundError): | ||
_ = tar_file_info( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='愚人节奥特瑙斯.png' | ||
) | ||
|
||
def test_tar_file_size(self, local_narugo_test_cos5t_tars): | ||
assert tar_file_size( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='.meta.json' | ||
) == 8968 | ||
assert tar_file_size( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='愚人节_奥特瑙斯.png' | ||
) == 255276 | ||
assert tar_file_size( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='./愚人节_奥特瑙斯.png' | ||
) == 255276 | ||
with pytest.raises(FileNotFoundError): | ||
_ = tar_file_size( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='愚人节奥特瑙斯.png' | ||
) | ||
|
||
def test_tar_file_download_small(self, local_narugo_test_cos5t_tars): | ||
with isolated_directory(): | ||
tar_file_download( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='.meta.json', | ||
local_file='.meta.json' | ||
) | ||
file_compare(get_testfile('skin_mashu', '.meta.json'), '.meta.json') | ||
|
||
def test_tar_file_download_lfs(self, local_narugo_test_cos5t_tars): | ||
with isolated_directory(): | ||
tar_file_download( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='./愚人节_奥特瑙斯.png', | ||
local_file='愚人节_奥特瑙斯.png' | ||
) | ||
file_compare(get_testfile('skin_mashu', '愚人节_奥特瑙斯.png'), '愚人节_奥特瑙斯.png') | ||
|
||
def test_tar_file_download_not_found(self, local_narugo_test_cos5t_tars): | ||
with isolated_directory(), pytest.raises(FileNotFoundError): | ||
tar_file_download( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'mashu_skins.tar'), | ||
file_in_archive='./愚人节奥特瑙斯.png', | ||
local_file='愚人节_奥特瑙斯.png' | ||
) | ||
|
||
def test_tar_file_download_subdir(self, local_narugo_test_cos5t_tars): | ||
with isolated_directory(): | ||
tar_file_download( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'ex3.tar'), | ||
file_in_archive='artoria_caster_third_ascension_fate/sankaku_21305298.jpg', | ||
local_file='f/ac.jpg' | ||
) | ||
file_compare(get_testfile('sankaku_21305298.jpg'), 'f/ac.jpg') | ||
|
||
def test_tar_file_download_empty(self, local_narugo_test_cos5t_tars): | ||
with isolated_directory(): | ||
tar_file_download( | ||
archive_file=os.path.join(local_narugo_test_cos5t_tars, 'empty_file.tar'), | ||
file_in_archive='empty_file', | ||
local_file='empty_file', | ||
) | ||
assert os.path.getsize('empty_file') == 0 |