Skip to content

Commit

Permalink
removed __bytes_repr__ implementation from fileset and mock, pydra ca…
Browse files Browse the repository at this point in the history
…n call byte_chunks directly
  • Loading branch information
tclose committed Sep 8, 2024
1 parent a713d29 commit 894cd2a
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 28 deletions.
43 changes: 17 additions & 26 deletions fileformats/core/fileset.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,30 +887,6 @@ def hash_files(
file_hashes[str(path)] = crypto_obj.hexdigest()
return file_hashes

def __bytes_repr__(
self, cache: ty.Dict[ty.Any, str] # pylint: disable=unused-argument
) -> ty.Iterable[bytes]:
"""Provided for compatibility with Pydra's hashing function, return the contents
of all the files in the file-set in chunks
Parameters
----------
cache : dict[Any, str]
an object passed around by Pydra's hashing function to store cached versions
of previously hashed objects, to allow recursive structures
Yields
------
bytes
a chunk of bytes of length FILE_CHUNK_LEN_DEFAULT from the contents of all
files in the file-set.
"""
cls = type(self)
yield f"{cls.__module__}.{cls.__name__}:".encode()
for key, chunk_iter in self.byte_chunks():
yield (",'" + key + "'=").encode()
yield from chunk_iter

@classmethod
def referenced_types(cls) -> ty.Set[ty.Type[Classifier]]:
"""Returns a flattened list of nested types referenced within the fileset type
Expand Down Expand Up @@ -1701,8 +1677,23 @@ def type_name(cls) -> str:
assert class_name.endswith("Mock")
return class_name[: -len("Mock")]

def __bytes_repr__(self, cache: ty.Dict[str, ty.Any]) -> ty.Iterable[bytes]:
yield from (str(fspath).encode() for fspath in self.fspaths)
def byte_chunks(
self,
mtime: bool = False,
chunk_len=FILE_CHUNK_LEN_DEFAULT,
relative_to: ty.Optional[os.PathLike] = None,
ignore_hidden_files: bool = False,
ignore_hidden_dirs: bool = False,
):
if relative_to is None:
relative_to = os.path.commonpath(self.fspaths)
else:
relative_to = str(relative_to)
for key, fspath in sorted(
((str(p)[len(relative_to) :], p) for p in self.fspaths),
key=itemgetter(0),
):
yield (key, iter([key.encode()])) # empty iterator as files don't exist

@classproperty
def namespace(cls) -> str:
Expand Down
17 changes: 15 additions & 2 deletions fileformats/core/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import shutil
import time
import pytest
from fileformats.core import FileSet
from fileformats.generic import File, Directory, FsObject
from fileformats.core import FileSet, MockMixin
from fileformats.generic import File, Directory, FsObject, SetOf
from fileformats.text import TextFile
from fileformats.core.mixin import WithSeparateHeader
from fileformats.core.exceptions import UnsatisfiableCopyModeError
from fileformats.core.utils import mtime_cached_property
Expand Down Expand Up @@ -54,6 +55,11 @@ def fsobject(luigi_file, bowser_dir, request):
assert False


@pytest.fixture
def mock_fileset():
return SetOf[TextFile].mock("/path/to/a/mock", "/path/to/another/mock")


@pytest.fixture
def dest_dir(work_dir):
dest_dir = work_dir / "new-dir"
Expand Down Expand Up @@ -407,3 +413,10 @@ def test_mtime_cached_property_force_clear(tmp_path: Path):
file.flag = 1
MtimeTestFile.cached_prop.clear(file)
assert file.cached_prop == 1


def test_hash_mock_files(mock_fileset: MockMixin, work_dir: Path, dest_dir: Path):
file_hashes = mock_fileset.hash_files(relative_to="")
assert sorted(Path(p) for p in file_hashes) == sorted(
p for p in mock_fileset.fspaths
)

0 comments on commit 894cd2a

Please sign in to comment.