Skip to content

Commit

Permalink
to_file -> write_reference_file_system
Browse files Browse the repository at this point in the history
  • Loading branch information
magland committed Apr 19, 2024
1 parent f4f9a02 commit 0b35dae
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 14 deletions.
11 changes: 6 additions & 5 deletions lindi/LindiH5ZarrStore/LindiH5ZarrStore.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import base64
from typing import Union, List, IO, Any, Dict, Literal
from typing import Union, List, IO, Any, Dict
from dataclasses import dataclass
import numpy as np
import zarr
Expand Down Expand Up @@ -460,16 +460,17 @@ def listdir(self, path: str = "") -> List[str]:
else:
return []

def to_file(self, file_name: str, *, file_type: Literal["zarr.json"] = "zarr.json"):
def write_reference_file_system(self, output_file_name: str):
"""Write a reference file system corresponding to this store to a file.
This can then be loaded using LindiH5pyFile.from_reference_file_system(file_name)
"""
if file_type != "zarr.json":
raise Exception(f"Unsupported file type: {file_type}")

if not output_file_name.endswith(".lindi.json"):
raise Exception("The output file name must end with .lindi.json")

ret = self.to_reference_file_system()
with open(file_name, "w") as f:
with open(output_file_name, "w") as f:
json.dump(ret, f, indent=2)

def to_reference_file_system(self) -> dict:
Expand Down
12 changes: 4 additions & 8 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,8 @@ def test_reference_file_system_to_file():
with h5py.File(filename, "w") as f:
f.create_dataset("X", data=[1, 2, 3])
with LindiH5ZarrStore.from_file(filename, url=filename) as store:
rfs_fname = f'{tmpdir}/test.zarr.json'
store.to_file(rfs_fname)
rfs_fname = f'{tmpdir}/test.lindi.json'
store.write_reference_file_system(rfs_fname)
client = lindi.LindiH5pyFile.from_reference_file_system(rfs_fname)
X = client["X"]
assert isinstance(X, lindi.LindiH5pyDataset)
Expand Down Expand Up @@ -423,7 +423,7 @@ def test_lindi_h5_zarr_store():
with pytest.raises(Exception, match=store_is_closed_msg):
store.to_reference_file_system()
with pytest.raises(Exception, match=store_is_closed_msg):
store.to_file("test.json")
store.write_reference_file_system("test.lindi.json")
with pytest.raises(Exception, match=store_is_closed_msg):
store._get_chunk_file_bytes_data("dataset1", "0")

Expand All @@ -443,17 +443,13 @@ def test_lindi_h5_zarr_store():
store["nonexistent/0"]

# Key error
store = LindiH5ZarrStore.from_file(filename)
store = LindiH5ZarrStore.from_file(filename, url='.')
with pytest.raises(KeyError):
store['']
assert '' not in store
with pytest.raises(KeyError):
store["nonexistent/.zattrs"]

# Unsupported file type
with pytest.raises(Exception, match="Unsupported file type: zarr"):
store.to_file("test.json", file_type="zarr") # type: ignore

# URL is not set
store = LindiH5ZarrStore.from_file(filename, url=None)
with pytest.raises(Exception, match="You must specify a url to create a reference file system"):
Expand Down
68 changes: 68 additions & 0 deletions tests/test_staging_area.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import tempfile
import os
import numpy as np
import lindi
import shutil


def test_staging_area():
with tempfile.TemporaryDirectory() as tmpdir:
staging_area = lindi.StagingArea.create(tmpdir + '/staging_area')
empty_rfs = {'refs': {'.zgroup': {'zarr_format': 2}}}
client = lindi.LindiH5pyFile.from_reference_file_system(empty_rfs, mode='r+', staging_area=staging_area)
X = np.random.randn(1000, 1000).astype(np.float32)
client.create_dataset('large_array', data=X, chunks=(400, 400))
total_size = _get_total_size_of_directory(tmpdir)
assert total_size >= X.nbytes * 0.5, f'{total_size} < {X.nbytes} * 0.5' # take into consideration compression
rfs = client.to_reference_file_system()
client2 = lindi.LindiH5pyFile.from_reference_file_system(rfs, mode='r')
assert isinstance(client2, lindi.LindiH5pyFile)
X1 = client['large_array']
assert isinstance(X1, lindi.LindiH5pyDataset)
X2 = client2['large_array']
assert isinstance(X2, lindi.LindiH5pyDataset)
assert np.allclose(X1[:], X2[:])

upload_dir = f'{tmpdir}/upload_dir'
os.makedirs(upload_dir, exist_ok=True)
output_fname = f'{tmpdir}/output.lindi.json'

def on_upload_blob(fname: str):
random_fname = f'{upload_dir}/{_random_string(10)}'
shutil.copy(fname, random_fname)
return random_fname

def on_upload_main(fname: str):
shutil.copy(fname, output_fname)
return output_fname

assert client.staging_store
client.staging_store.upload(
on_upload_blob=on_upload_blob,
on_upload_main=on_upload_main,
consolidate_chunks=True
)

client3 = lindi.LindiH5pyFile.from_reference_file_system(output_fname, mode='r')
X3 = client3['large_array']
assert isinstance(X3, lindi.LindiH5pyDataset)
assert np.allclose(X1[:], X3[:])


def _get_total_size_of_directory(directory):
total_size = 0
for dirpath, dirnames, filenames in os.walk(directory):
for f in filenames:
fp = os.path.join(dirpath, f)
total_size += os.path.getsize(fp)
return total_size


def _random_string(n):
import random
import string
return ''.join(random.choices(string.ascii_uppercase + string.digits, k=n))


if __name__ == '__main__':
test_staging_area()
2 changes: 1 addition & 1 deletion tests/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def test_store():
group1.create_group("group2")
group1.create_dataset("dataset2", data=[4, 5, 6])
with lindi.LindiH5ZarrStore.from_file(filename, url=filename) as store:
store.to_file(f"{tmpdir}/test.zarr.json") # for coverage
store.write_reference_file_system(f"{tmpdir}/test.lindi.json") # for coverage
a = store.listdir('')
assert _lists_are_equal_as_sets(a, ['dataset1', 'group1'])
b = store.listdir('group1')
Expand Down

0 comments on commit 0b35dae

Please sign in to comment.