Skip to content

Commit

Permalink
Merge pull request #12 from justindujardin/feature/to_local_folders
Browse files Browse the repository at this point in the history
feat(to_local): support caching folders
  • Loading branch information
repo-ranger[bot] authored Apr 15, 2020
2 parents 1b5b054 + cc56f6e commit 1f97f6b
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 10 deletions.
17 changes: 12 additions & 5 deletions gcspath/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,18 @@ def to_local(cls, blob_path: Union["GCSPath", str]) -> Path:

# If the file isn't in the cache, download it
if not cache_blob.exists():
dest_folder = cache_blob.parent if cache_blob.suffix != "" else cache_blob
dest_folder.mkdir(exist_ok=True, parents=True)
cache_blob.write_bytes(blob_path.read_bytes())
blob_stat: BucketStat = blob_path.stat()
cache_time.write_text(str(blob_stat.last_modified))
# Is a blob
if cache_blob.suffix != "":
dest_folder = cache_blob.parent
dest_folder.mkdir(exist_ok=True, parents=True)
cache_blob.write_bytes(blob_path.read_bytes())
blob_stat: BucketStat = blob_path.stat()
cache_time.write_text(str(blob_stat.last_modified))
else:
# If not a specific blob, enumerate all the blobs under
# the path and cache them, then return the cache folder
for blob in blob_path.rglob("*"):
GCSPath.to_local(blob)
return cache_blob

def stat(self):
Expand Down
29 changes: 24 additions & 5 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,35 @@ def test_is_path_instance(with_adapter):

@pytest.mark.parametrize("adapter", TEST_ADAPTERS)
def test_path_to_local(with_adapter):
path = GCSPath(f"/{bucket}/directory/foo.txt")
path.write_text("---")
assert isinstance(path, GCSPath)
root: GCSPath = GCSPath.from_bucket(bucket) / "to_local"
foo_blob: GCSPath = root / "foo.txt"
foo_blob.write_text("---")
assert isinstance(foo_blob, GCSPath)
use_fs_cache()
cached: Path = GCSPath.to_local(path)
second_cached: Path = GCSPath.to_local(path)

# Cache a folder with blobs
sub_folder: GCSPath = root / "subfolder"
bar_blob: GCSPath = sub_folder / "bar.txt"
bar_blob.write_text("---")
baz_blob: GCSPath = sub_folder / "baz.txt"
baz_blob.write_text("---")

cached_folder: Path = GCSPath.to_local(sub_folder)
assert isinstance(cached_folder, Path)
assert cached_folder.exists() and cached_folder.is_dir()
bar_file = cached_folder / "bar.txt"
baz_file = cached_folder / "baz.txt"
assert bar_file.exists() and bar_file.is_file()
assert baz_file.exists() and baz_file.is_file()

# Cache a blob
cached: Path = GCSPath.to_local(foo_blob)
second_cached: Path = GCSPath.to_local(foo_blob)
assert isinstance(cached, Path)
assert cached.exists() and cached.is_file(), "local file should exist"
assert second_cached == cached, "must be the same path"
assert second_cached.stat() == cached.stat(), "must have the same stat"

clear_fs_cache()
assert not cached.exists(), "cache clear should delete file"

Expand Down

0 comments on commit 1f97f6b

Please sign in to comment.