diff --git a/hfutils/operate/base.py b/hfutils/operate/base.py index 5ddc8e8487..6744808e39 100644 --- a/hfutils/operate/base.py +++ b/hfutils/operate/base.py @@ -196,7 +196,7 @@ def list_all_with_pattern( except HfHubHTTPError as err: if err.response.status_code == 413: new_batch_size = max(1, int(round(batch_size * batch_factor))) - logging.info(f'Reducing batch size {batch_size} --> {new_batch_size} ...') + logging.warning(f'Reducing batch size {batch_size} --> {new_batch_size} ...') batch_size = new_batch_size continue raise diff --git a/test/operate/test_base.py b/test/operate/test_base.py index 8c1dbd62db..1597947d09 100644 --- a/test/operate/test_base.py +++ b/test/operate/test_base.py @@ -1,7 +1,7 @@ import pytest from natsort import natsorted -from hfutils.operate import list_files_in_repository +from hfutils.operate import list_files_in_repository, list_all_with_pattern should_not_exists = [ '.gitignore', @@ -120,3 +120,32 @@ def test_list_files_in_repository_large(self): def test_list_files_in_repository_repo_not_exist(self): assert list_files_in_repository('deepghs/highres_datasets', repo_type='model') == [] + + def test_list_all_with_pattern(self): + vs = natsorted([ + item.path for item in + list_all_with_pattern( + 'deepghs/danbooru_newest', + repo_type='dataset', + pattern='images/*', + ) + ]) + assert vs == natsorted([ + *[f'images/0{i:03d}.tar' for i in range(1000)], + *[f'images/0{i:03d}.json' for i in range(1000)], + ]) + + def test_list_all_with_pattern_with_large_startup(self): + vs = natsorted([ + item.path for item in + list_all_with_pattern( + 'deepghs/danbooru_newest', + repo_type='dataset', + pattern='images/*', + startup_batch=1500, + ) + ]) + assert vs == natsorted([ + *[f'images/0{i:03d}.tar' for i in range(1000)], + *[f'images/0{i:03d}.json' for i in range(1000)], + ])