From 1e03e04cdb36686e2d8ed65fb2f747ae53bb31bd Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Tue, 20 Aug 2024 01:07:41 +0800 Subject: [PATCH] dev(narugo): add unittest for tree cli --- hfutils/entry/download.py | 8 +- test/entry/test_tree.py | 183 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 test/entry/test_tree.py diff --git a/hfutils/entry/download.py b/hfutils/entry/download.py index 0ffabed4f0..7069af0aec 100644 --- a/hfutils/entry/download.py +++ b/hfutils/entry/download.py @@ -7,7 +7,7 @@ from .base import CONTEXT_SETTINGS, command_wrap, ClickErrorException from ..operate import download_file_to_file, download_archive_as_directory, download_directory_as_directory -from ..operate.base import REPO_TYPES, RepoTypeTyping +from ..operate.base import REPO_TYPES, RepoTypeTyping, _IGNORE_PATTERN_UNSET from ..utils import get_requests_session @@ -55,12 +55,15 @@ def _add_download_subcommand(cli: click.Group) -> click.Group: help='Just check the file size when validating the downloaded files.', show_default=True) @click.option('--tmpdir', 'tmpdir', type=str, default=None, help='Use custom temporary Directory.', show_default=True) + @click.option('--all', 'show_all', is_flag=True, type=bool, default=False, + help='Show all files, including hidden files.', show_default=True) @command_wrap() def download( repo_id: str, repo_type: RepoTypeTyping, file_in_repo: Optional[str], archive_in_repo: Optional[str], dir_in_repo: Optional[str], output_path: str, revision: str, max_workers: int, - password: Optional[str], wildcard: Optional[str], soft_mode_when_check: bool, tmpdir: Optional[str] + password: Optional[str], wildcard: Optional[str], soft_mode_when_check: bool, tmpdir: Optional[str], + show_all: bool = False, ): """ Download data from HuggingFace repositories. @@ -135,6 +138,7 @@ def download( silent=False, max_workers=max_workers, soft_mode_when_check=soft_mode_when_check, + ignore_patterns=_IGNORE_PATTERN_UNSET if not show_all else [], ) else: diff --git a/test/entry/test_tree.py b/test/entry/test_tree.py new file mode 100644 index 0000000000..8d8b9f961c --- /dev/null +++ b/test/entry/test_tree.py @@ -0,0 +1,183 @@ +import click +import pytest +from hbutils.testing import simulate_entry + +from hfutils.entry import hfutilscli + + +@pytest.mark.unittest +class TestEntryTree: + def test_simple_tree_1(self): + result = simulate_entry(hfutilscli, [ + 'hfutils', 'tree', + '-r', 'deepghs/test_nested_dataset', + ]) + assert result.exitcode == 0 + lines = click.unstyle(result.stdout).strip().splitlines(keepends=False) + assert lines == [ + 'datasets/deepghs/test_nested_dataset@main/.', + '├── README.md', + '├── data.parquet', + '├── images', + '│ ├── 20240808', + '│ │ ├── 20240808015751528545_642b8ce09a5b1543e88cf95e359d39218d6b3ac5__narugo.json', + '│ │ ├── 20240808015751528545_642b8ce09a5b1543e88cf95e359d39218d6b3ac5__narugo.tar', + '│ │ ├── 20240808091226009067_be359abd170ee3e1a37d3bda7cdf9ff2490f5380__narugo.json', + '│ │ └── 20240808091226009067_be359abd170ee3e1a37d3bda7cdf9ff2490f5380__narugo.tar', + '│ ├── 20240810', + '│ │ ├── 20240810025407329132_7fbe690d6dca73e971036fbb884eba67d11c68d7__narugo.json', + '│ │ ├── 20240810025407329132_7fbe690d6dca73e971036fbb884eba67d11c68d7__narugo.tar', + '│ │ ├── 20240810025642281532_4c13dc63689d93e25a5de44bc9add04ea7d56162__narugo.json', + '│ │ ├── 20240810025642281532_4c13dc63689d93e25a5de44bc9add04ea7d56162__narugo.tar', + '│ │ ├── 20240810220450715507_f95017bb0ff97ee35cd878ba11e6c3d5b4eb6e1f__narugo.json', + '│ │ ├── 20240810220450715507_f95017bb0ff97ee35cd878ba11e6c3d5b4eb6e1f__narugo.tar', + '│ │ ├── 20240810222438167877_c60911f8922933991a20d190175c9eada582af7b__narugo.json', + '│ │ └── 20240810222438167877_c60911f8922933991a20d190175c9eada582af7b__narugo.tar', + '│ └── 20240811', + '│ ├── 20240811011334412620_ce548cb70673e563ad46a37a75b6c1f933b17292__narugo.json', + '│ └── 20240811011334412620_ce548cb70673e563ad46a37a75b6c1f933b17292__narugo.tar', + '├── meta.json', + '├── samples', + '│ ├── colored', + '│ │ ├── 0.webp', + '│ │ ├── 1.webp', + '│ │ ├── 2.webp', + '│ │ ├── 3.webp', + '│ │ ├── 4.webp', + '│ │ ├── 5.webp', + '│ │ ├── 6.webp', + '│ │ └── 7.webp', + '│ └── monochrome', + '│ ├── 0.webp', + '│ ├── 1.webp', + '│ ├── 2.webp', + '│ └── 3.webp', + '└── unarchived', + ' └── 20240810222438167877_c60911f8922933991a20d190175c9eada582af7b__narugo.parquet', + ] + + def test_simple_tree_all(self): + result = simulate_entry(hfutilscli, [ + 'hfutils', 'tree', + '-r', 'deepghs/test_nested_dataset', + '--all' + ]) + assert result.exitcode == 0 + lines = click.unstyle(result.stdout).strip().splitlines(keepends=False) + assert lines == [ + 'datasets/deepghs/test_nested_dataset@main/.', + '├── .gitattributes', + '├── README.md', + '├── data.parquet', + '├── images', + '│ ├── 20240808', + '│ │ ├── 20240808015751528545_642b8ce09a5b1543e88cf95e359d39218d6b3ac5__narugo.json', + '│ │ ├── 20240808015751528545_642b8ce09a5b1543e88cf95e359d39218d6b3ac5__narugo.tar', + '│ │ ├── 20240808091226009067_be359abd170ee3e1a37d3bda7cdf9ff2490f5380__narugo.json', + '│ │ └── 20240808091226009067_be359abd170ee3e1a37d3bda7cdf9ff2490f5380__narugo.tar', + '│ ├── 20240810', + '│ │ ├── 20240810025407329132_7fbe690d6dca73e971036fbb884eba67d11c68d7__narugo.json', + '│ │ ├── 20240810025407329132_7fbe690d6dca73e971036fbb884eba67d11c68d7__narugo.tar', + '│ │ ├── 20240810025642281532_4c13dc63689d93e25a5de44bc9add04ea7d56162__narugo.json', + '│ │ ├── 20240810025642281532_4c13dc63689d93e25a5de44bc9add04ea7d56162__narugo.tar', + '│ │ ├── 20240810220450715507_f95017bb0ff97ee35cd878ba11e6c3d5b4eb6e1f__narugo.json', + '│ │ ├── 20240810220450715507_f95017bb0ff97ee35cd878ba11e6c3d5b4eb6e1f__narugo.tar', + '│ │ ├── 20240810222438167877_c60911f8922933991a20d190175c9eada582af7b__narugo.json', + '│ │ └── 20240810222438167877_c60911f8922933991a20d190175c9eada582af7b__narugo.tar', + '│ └── 20240811', + '│ ├── 20240811011334412620_ce548cb70673e563ad46a37a75b6c1f933b17292__narugo.json', + '│ └── 20240811011334412620_ce548cb70673e563ad46a37a75b6c1f933b17292__narugo.tar', + '├── meta.json', + '├── samples', + '│ ├── colored', + '│ │ ├── 0.webp', + '│ │ ├── 1.webp', + '│ │ ├── 2.webp', + '│ │ ├── 3.webp', + '│ │ ├── 4.webp', + '│ │ ├── 5.webp', + '│ │ ├── 6.webp', + '│ │ └── 7.webp', + '│ └── monochrome', + '│ ├── 0.webp', + '│ ├── 1.webp', + '│ ├── 2.webp', + '│ └── 3.webp', + '└── unarchived', + ' └── 20240810222438167877_c60911f8922933991a20d190175c9eada582af7b__narugo.parquet' + ] + + def test_tree_subdir_1(self): + result = simulate_entry(hfutilscli, [ + 'hfutils', 'tree', + '-r', 'deepghs/test_nested_dataset', + '-d', 'images' + ]) + assert result.exitcode == 0 + lines = click.unstyle(result.stdout).strip().splitlines(keepends=False) + assert lines == [ + "datasets/deepghs/test_nested_dataset@main/images", + "├── 20240808", + "│ ├── 20240808015751528545_642b8ce09a5b1543e88cf95e359d39218d6b3ac5__narugo.json", + "│ ├── 20240808015751528545_642b8ce09a5b1543e88cf95e359d39218d6b3ac5__narugo.tar", + "│ ├── 20240808091226009067_be359abd170ee3e1a37d3bda7cdf9ff2490f5380__narugo.json", + "│ └── 20240808091226009067_be359abd170ee3e1a37d3bda7cdf9ff2490f5380__narugo.tar", + "├── 20240810", + "│ ├── 20240810025407329132_7fbe690d6dca73e971036fbb884eba67d11c68d7__narugo.json", + "│ ├── 20240810025407329132_7fbe690d6dca73e971036fbb884eba67d11c68d7__narugo.tar", + "│ ├── 20240810025642281532_4c13dc63689d93e25a5de44bc9add04ea7d56162__narugo.json", + "│ ├── 20240810025642281532_4c13dc63689d93e25a5de44bc9add04ea7d56162__narugo.tar", + "│ ├── 20240810220450715507_f95017bb0ff97ee35cd878ba11e6c3d5b4eb6e1f__narugo.json", + "│ ├── 20240810220450715507_f95017bb0ff97ee35cd878ba11e6c3d5b4eb6e1f__narugo.tar", + "│ ├── 20240810222438167877_c60911f8922933991a20d190175c9eada582af7b__narugo.json", + "│ └── 20240810222438167877_c60911f8922933991a20d190175c9eada582af7b__narugo.tar", + "└── 20240811", + " ├── 20240811011334412620_ce548cb70673e563ad46a37a75b6c1f933b17292__narugo.json", + " └── 20240811011334412620_ce548cb70673e563ad46a37a75b6c1f933b17292__narugo.tar" + ] + + def test_tree_subdir_2(self): + result = simulate_entry(hfutilscli, [ + 'hfutils', 'tree', + '-r', 'deepghs/test_nested_dataset', + '-d', 'samples' + ]) + assert result.exitcode == 0 + lines = click.unstyle(result.stdout).strip().splitlines(keepends=False) + assert lines == [ + "datasets/deepghs/test_nested_dataset@main/samples", + "├── colored", + "│ ├── 0.webp", + "│ ├── 1.webp", + "│ ├── 2.webp", + "│ ├── 3.webp", + "│ ├── 4.webp", + "│ ├── 5.webp", + "│ ├── 6.webp", + "│ └── 7.webp", + "└── monochrome", + " ├── 0.webp", + " ├── 1.webp", + " ├── 2.webp", + " └── 3.webp" + ] + + def test_tree_subdir_3(self): + result = simulate_entry(hfutilscli, [ + 'hfutils', 'tree', + '-r', 'deepghs/test_nested_dataset', + '-d', 'samples/colored' + ]) + assert result.exitcode == 0 + lines = click.unstyle(result.stdout).strip().splitlines(keepends=False) + assert lines == [ + "datasets/deepghs/test_nested_dataset@main/samples/colored", + "├── 0.webp", + "├── 1.webp", + "├── 2.webp", + "├── 3.webp", + "├── 4.webp", + "├── 5.webp", + "├── 6.webp", + "└── 7.webp" + ]