From 347856227ca01bbfdf5ce30c6c6c2373e11457f0 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Fri, 28 Jun 2019 17:22:08 -0400 Subject: [PATCH 01/28] Add h5py_like to requirements --- requirements_dev.txt | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index 8aec543..e7d3e22 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -11,3 +11,4 @@ setuptools_rust==0.10.6 numpy==1.16.4 pytest==4.6.3 pytest-runner==5.1 +h5py_like==0.2.2 diff --git a/setup.py b/setup.py index ca7f862..4ce6f74 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ with open("HISTORY.rst") as history_file: history = history_file.read() -requirements = ["numpy"] +requirements = ["numpy", "h5py_like>=0.2.2"] setup_requirements = [] test_requirements = [] From cbff525f7e65b026106d6bb44f4c464a4c825f4d Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Fri, 28 Jun 2019 17:22:26 -0400 Subject: [PATCH 02/28] Add tests for h5py-like interface --- tests/conftest.py | 6 ++++++ tests/test_h5_like.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 tests/test_h5_like.py diff --git a/tests/conftest.py b/tests/conftest.py index 16bbfe6..5f41289 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,3 +19,9 @@ def ds_dtype(request, tmp_path): pyn5.create_dataset(n5_path, ds_name, DS_SIZE, BLOCKSIZE, dtype) yield pyn5.open(n5_path, ds_name, dtype, False), np.dtype(dtype.lower()) + + +@pytest.fixture +def file_(tmp_path): + f = pyn5.File(tmp_path / "test.n5") + yield f diff --git a/tests/test_h5_like.py b/tests/test_h5_like.py new file mode 100644 index 0000000..faf7e1a --- /dev/null +++ b/tests/test_h5_like.py @@ -0,0 +1,41 @@ +import shutil +import tempfile +from copy import deepcopy +from pathlib import Path + +from h5py_like import Mode, FileMixin +from h5py_like.test_utils import FileTestBase, DatasetTestBase, GroupTestBase, ModeTestBase +from pyn5 import File + +ds_kwargs = deepcopy(DatasetTestBase.dataset_kwargs) +ds_kwargs["chunks"] = (5, 5, 5) + + +class TestFile(FileTestBase): + dataset_kwargs = ds_kwargs + pass + + +class TestGroup(GroupTestBase): + dataset_kwargs = ds_kwargs + pass + + +class TestDataset(DatasetTestBase): + dataset_kwargs = ds_kwargs + pass + + +class TestMode(ModeTestBase): + def setup_method(self): + self.tmp_dir = Path(tempfile.mkdtemp()) + + def teardown_method(self): + try: + shutil.rmtree(self.tmp_dir) + except FileNotFoundError: + pass + + def factory(self, mode: Mode) -> FileMixin: + fpath = self.tmp_dir / "test.n5" + return File(fpath, mode) From 5900886fe4483a2524ee55889fccd3d7a7ca5275 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Fri, 28 Jun 2019 17:22:40 -0400 Subject: [PATCH 03/28] Add implementation for h5py-like interface --- pyn5/__init__.py | 4 + pyn5/attributes.py | 132 ++++++++++++++++++++++++++++++ pyn5/dataset.py | 113 ++++++++++++++++++++++++++ pyn5/file_group.py | 197 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 446 insertions(+) create mode 100644 pyn5/attributes.py create mode 100644 pyn5/dataset.py create mode 100644 pyn5/file_group.py diff --git a/pyn5/__init__.py b/pyn5/__init__.py index ba85413..ac8d0da 100644 --- a/pyn5/__init__.py +++ b/pyn5/__init__.py @@ -20,6 +20,9 @@ DatasetFLOAT64, create_dataset, ) +from .attributes import AttributeManager +from .dataset import Dataset +from .file_group import File, Group __all__ = [ "open", @@ -36,4 +39,5 @@ "DatasetINT64", "DatasetFLOAT32", "DatasetFLOAT64", + "File" ] diff --git a/pyn5/attributes.py b/pyn5/attributes.py new file mode 100644 index 0000000..958dc81 --- /dev/null +++ b/pyn5/attributes.py @@ -0,0 +1,132 @@ +from __future__ import annotations +import errno +import json +from contextlib import contextmanager +from copy import deepcopy +from pathlib import Path + +from typing import Iterator, Any, Dict + +import numpy as np + +from h5py_like import AttributeManagerBase, mutation, Mode +from h5py_like.base import H5ObjectLike + + +class NumpyEncoder(json.JSONEncoder): + """JSON encoder object which converts numpy arrays to lists.""" + + def default(self, obj): + if isinstance(obj, np.ndarray): + return obj.tolist() + return super().default(obj) + + +class AttributeManager(AttributeManagerBase): + """Object which reads and writes group attributes as JSON. + + The ``_encoder`` member variable (default ``NumpyEncoder``) can be set on + the class or the instance to change how attributes are serialised. + + The ``_dump_kwargs`` member variable is passed as kwargs to ``json.dump`` on write. + By default, it is an empty dict. + New instances make a deep copy of the class variable. + """ + + _dataset_keys = {"dimensions", "blockSize", "dataType", "compression"} + _encoder = NumpyEncoder + _dump_kwargs = dict() + + def __init__(self, dpath: Path, mode=Mode.default()): + """ + :param dpath: Path of the directory in which the attributes file resides. + :param mode: Mode + """ + self._path = Path(dpath) / "attributes.json" + self._dump_kwargs = deepcopy(self._dump_kwargs) + super().__init__(mode) + + @classmethod + def from_parent(cls, parent: H5ObjectLike) -> AttributeManager: + """ + Create AttributeManager for a File, Group or Dataset. + + :param parent: File, Group or Dataset to which the attributes belong. + :return: AttributeManager instance + """ + return cls(parent._path, parent.mode) + + @mutation + def __setitem__(self, k, v) -> None: + with self._open_attributes(True) as attrs: + attrs[k] = v + + @mutation + def __delitem__(self, v) -> None: + with self._open_attributes(True) as attrs: + del attrs[v] + + def __getitem__(self, k): + with self._open_attributes() as attrs: + return attrs[k] + + def __len__(self) -> int: + with self._open_attributes() as attrs: + return len(attrs) + + def __iter__(self) -> Iterator: + yield from self.keys() + + def keys(self): + with self._open_attributes() as attrs: + return attrs.keys() + + def values(self): + """Mutations are not written back to the attributes file""" + with self._open_attributes() as attrs: + return attrs.values() + + def items(self): + """Mutations are not written back to the attributes file""" + with self._open_attributes() as attrs: + return attrs.items() + + def __contains__(self, item): + with self._open_attributes() as attrs: + return item in attrs + + def _is_dataset(self): + with self._open_attributes() as attrs: + return self._dataset_keys.issubset(attrs) + + @contextmanager + def _open_attributes(self, write: bool = False) -> Dict[str, Any]: + """Return attributes as a context manager. + + :param write: Whether to write changes to the attributes dict. + :return: attributes as a dict (including N5 metadata) + """ + attributes = self._read_attributes() + yield attributes + if write: + self._write_attributes(attributes) + + def _read_attributes(self): + """Return attributes or an empty dict if they do not exist""" + try: + with open(self._path, "r") as f: + attributes = json.load(f) + except ValueError: + attributes = {} + except IOError as e: + if e.errno == errno.ENOENT: + attributes = {} + else: + raise + + return attributes + + def _write_attributes(self, attrs): + """Write dict to attributes file, using AttributeManager's encoder and kwargs.""" + with open(self._path, "w") as f: + json.dump(attrs, f, cls=self._encoder, **self._dump_kwargs) diff --git a/pyn5/dataset.py b/pyn5/dataset.py new file mode 100644 index 0000000..47817f9 --- /dev/null +++ b/pyn5/dataset.py @@ -0,0 +1,113 @@ +from typing import Union, Tuple, Optional, Any + +import numpy as np + +from h5py_like import DatasetBase, AttributeManagerBase, mutation, Name +from pyn5.attributes import AttributeManager +from .pyn5 import ( + DatasetUINT8, + DatasetUINT16, + DatasetUINT32, + DatasetUINT64, + DatasetINT8, + DatasetINT16, + DatasetINT32, + DatasetINT64, + DatasetFLOAT32, + DatasetFLOAT64, +) + + +dataset_types = { + np.dtype("uint8"): DatasetUINT8, + np.dtype("uint16"): DatasetUINT16, + np.dtype("uint32"): DatasetUINT32, + np.dtype("uint64"): DatasetUINT64, + np.dtype("int8"): DatasetINT8, + np.dtype("int16"): DatasetINT16, + np.dtype("int32"): DatasetINT32, + np.dtype("int64"): DatasetINT64, + np.dtype("float32"): DatasetFLOAT32, + np.dtype("float64"): DatasetFLOAT64, +} + + +class Dataset(DatasetBase): + def __init__(self, name: str, parent: "Group"): + """ + + :param name: basename of the dataset + :param parent: group to which the dataset belongs + """ + super().__init__(parent.mode) + self._name = name + self._parent = parent + self._path = self.parent._path / name + self._attrs = AttributeManager.from_parent(self) + + with self._attrs._open_attributes() as attrs: + self._shape = tuple(attrs["dimensions"][::-1]) + self._dtype = np.dtype(self.attrs["dataType"].lower()) + self._chunks = tuple(self.attrs["blockSize"][::-1]) + + self._impl = dataset_types[self.dtype]( + str(self.file._path), + self.name[1:], + True, # raise error if dataset does not exist on disk + ) + + @property + def dims(self): + raise NotImplementedError() + + @property + def shape(self) -> Tuple[int, ...]: + return self._shape + + @property + def dtype(self) -> np.dtype: + return self._dtype + + @property + def maxshape(self) -> Tuple[int, ...]: + raise NotImplementedError() + + @property + def fillvalue(self) -> Any: + return 0 + + @property + def chunks(self) -> Optional[Tuple[int, ...]]: + return self._chunks + + def resize(self, size: Union[int, Tuple[int, ...]], axis: Optional[int] = None): + raise NotImplementedError() + + def __getitem__(self, args) -> np.ndarray: + def fn(translation, dimensions): + return self._impl.read_ndarray( + translation[::-1], dimensions[::-1] + ).transpose() + + return self._getitem(args, fn, self._astype) + + @mutation + def __setitem__(self, args, val): + def fn(offset, arr): + return self._impl.write_ndarray( + offset[::-1], arr.transpose(), self.fillvalue + ) + + return self._setitem(args, val, fn) + + @property + def attrs(self) -> AttributeManagerBase: + return self._attrs + + @property + def name(self) -> str: + return str(Name(self.parent.name) / self._name) + + @property + def parent(self): + return self._parent diff --git a/pyn5/file_group.py b/pyn5/file_group.py new file mode 100644 index 0000000..2518b95 --- /dev/null +++ b/pyn5/file_group.py @@ -0,0 +1,197 @@ +from __future__ import annotations +import shutil +import warnings +from pathlib import Path +from typing import Iterator + +import numpy as np + +from h5py_like import GroupBase, FileMixin, AttributeManagerBase, Mode, mutation +from h5py_like.common import Name +from h5py_like.base import H5ObjectLike +from pyn5 import Dataset +from pyn5.attributes import AttributeManager +from .pyn5 import create_dataset + +N5_VERSION = "2.0.2" + + +class Group(GroupBase): + def __init__(self, name: str, parent: Group): + """ + + :param name: basename of the group + :param parent: group to which the group belongs + """ + self._name = name + self._parent = parent + self._path = self.parent._path / name + + self._attrs = AttributeManager.from_parent(self) + super().__init__(self.mode) + + def _create_child_group(self, name) -> GroupBase: + dpath = self._path / name + + try: + obj = self._get_child(name) + except KeyError: + pass + else: + if isinstance(obj, Group): + raise FileExistsError(f"Group already exists at {dpath}") + else: + raise TypeError(f"Dataset found at {dpath}") + + dpath.mkdir() + return Group(name, self) + + def _create_child_dataset( + self, name, shape=None, dtype=None, data=None, chunks=None, **kwds + ): + if chunks is None: + raise ValueError("'chunks' must be given") + + for key in kwds: + warnings.warn( + f"pyn5 does not implement '{key}' argument for create_dataset; it will be ignored" + ) + + if data is not None: + data = np.asarray(data, dtype=dtype) + dtype = data.dtype + shape = data.shape + + dtype = np.dtype(dtype) + + dpath = self._path / name + + try: + obj = self._get_child(name) + except KeyError: + pass + else: + if isinstance(obj, Dataset): + raise TypeError(f"Dataset already exists at {dpath}") + elif isinstance(obj, Group): + raise FileExistsError(f"Group found at {dpath}") + + file_path = str(self.file.filename) + create_dataset( + file_path, + str(Name(self.name) / name)[1:], + list(shape)[::-1], + list(chunks)[::-1], + dtype.name.upper(), + ) + + ds = Dataset(name, self) + if data is not None: + ds[...] = data + return ds + + def _get_child(self, name) -> H5ObjectLike: + dpath = self._path / name + if not dpath.is_dir(): + raise KeyError() + attrs = AttributeManager(dpath) + if attrs._is_dataset(): + return Dataset(name, self) + else: + return Group(name, self) + + @mutation + def __setitem__(self, name, obj): + """Not implemented""" + raise NotImplementedError() + + def copy( + self, + source, + dest, + name=None, + shallow=False, + expand_soft=False, + expand_external=False, + expand_refs=False, + without_attrs=False, + ): + """Not implemented""" + raise NotImplementedError() + + @property + def attrs(self) -> AttributeManager: + return self._attrs + + @property + def name(self) -> str: + return str(Name(self.parent.name) / self._name) + + @property + def parent(self): + return self._parent + + @mutation + def __delitem__(self, v) -> None: + shutil.rmtree(self[v]._path) + + def __len__(self) -> int: + return len(list(self)) + + def __iter__(self) -> Iterator: + for path in self._path.iterdir(): + if path.is_dir(): + yield path.name + + +class File(FileMixin, Group): + def __init__(self, name, mode=Mode.READ_WRITE_CREATE): + super().__init__(name, mode) + self._require_dir(self.filename) + self._path = self.filename + self._attrs = AttributeManager.from_parent(self) + + def __setitem__(self, name, obj): + """Not implemented""" + raise NotImplementedError() + + def copy( + self, + source, + dest, + name=None, + shallow=False, + expand_soft=False, + expand_external=False, + expand_refs=False, + without_attrs=False, + ): + """Not implemented""" + raise NotImplementedError() + + def _require_dir(self, dpath: Path): + if dpath.is_file(): + raise FileExistsError("File found at desired location of directory") + created = False + if dpath.is_dir(): + if self.mode == Mode.CREATE: + raise FileExistsError() + elif self.mode == Mode.CREATE_TRUNCATE: + shutil.rmtree(dpath) + dpath.mkdir() + created = True + else: + if self.mode in (Mode.READ_ONLY, Mode.READ_WRITE): + raise FileNotFoundError() + else: + dpath.mkdir(parents=True) + created = True + + attrs = AttributeManager(dpath, self.mode) + if created: + attrs["n5"] = N5_VERSION + else: + version = attrs.get("n5") + if version != N5_VERSION: + raise ValueError(f"Expected N5 version '{N5_VERSION}', got {version}") + return created From 8cabda926e9ebb194d15c5f7ce82b0fe16c6e7e2 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 13:52:27 -0400 Subject: [PATCH 04/28] chunk-guessing (requires h5py_like 0.3) --- pyn5/file_group.py | 16 ++++++++++++---- requirements_dev.txt | 2 +- setup.py | 2 +- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/pyn5/file_group.py b/pyn5/file_group.py index 2518b95..28ef958 100644 --- a/pyn5/file_group.py +++ b/pyn5/file_group.py @@ -6,9 +6,10 @@ import numpy as np -from h5py_like import GroupBase, FileMixin, AttributeManagerBase, Mode, mutation +from h5py_like import GroupBase, FileMixin, Mode, mutation from h5py_like.common import Name from h5py_like.base import H5ObjectLike +from h5py_like.shape_utils import guess_chunks from pyn5 import Dataset from pyn5.attributes import AttributeManager from .pyn5 import create_dataset @@ -49,9 +50,6 @@ def _create_child_group(self, name) -> GroupBase: def _create_child_dataset( self, name, shape=None, dtype=None, data=None, chunks=None, **kwds ): - if chunks is None: - raise ValueError("'chunks' must be given") - for key in kwds: warnings.warn( f"pyn5 does not implement '{key}' argument for create_dataset; it will be ignored" @@ -64,6 +62,16 @@ def _create_child_dataset( dtype = np.dtype(dtype) + if chunks is None: + warnings.warn( + "chunks not set: entire dataset will be a single chunk. " + "This will be slow and inefficient. " + "Set chunks=True to guess reasonable chunk sizes." + ) + chunks = shape + elif chunks == True: + chunks = guess_chunks(shape, dtype.itemsize) + dpath = self._path / name try: diff --git a/requirements_dev.txt b/requirements_dev.txt index e7d3e22..8106b62 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -11,4 +11,4 @@ setuptools_rust==0.10.6 numpy==1.16.4 pytest==4.6.3 pytest-runner==5.1 -h5py_like==0.2.2 +h5py_like==0.3.0 diff --git a/setup.py b/setup.py index 4ce6f74..5e46379 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ with open("HISTORY.rst") as history_file: history = history_file.read() -requirements = ["numpy", "h5py_like>=0.2.2"] +requirements = ["numpy", "h5py_like>=0.3.0"] setup_requirements = [] test_requirements = [] From 61e0ac20cb20304044d80f76b670eedf8c3fe2d2 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 14:35:38 -0400 Subject: [PATCH 05/28] restrict version to 3.7 --- .travis.yml | 25 ++++++++++++------------- setup.py | 4 ++-- tox.ini | 7 ++++--- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.travis.yml b/.travis.yml index 37066db..5bc2256 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ language: python +dist: xenial python: - - 3.5 + - 3.7 stages: - test @@ -36,20 +37,18 @@ deploy_template: &deploy_template jobs: include: - - <<: *test_template - python: 3.5 - - <<: *test_template - python: 3.6 +# - <<: *test_template +# python: 3.5 +# - <<: *test_template +# python: 3.6 - <<: *test_template python: 3.7 - dist: xenial - sudo: required - - <<: *deploy_template - env: - - CIBW_BUILD=cp35-* - - <<: *deploy_template - env: - - CIBW_BUILD=cp36-* +# - <<: *deploy_template +# env: +# - CIBW_BUILD=cp35-* +# - <<: *deploy_template +# env: +# - CIBW_BUILD=cp36-* - <<: *deploy_template env: - CIBW_BUILD=cp37-* diff --git a/setup.py b/setup.py index 5e46379..48117a2 100644 --- a/setup.py +++ b/setup.py @@ -42,8 +42,8 @@ "License :: OSI Approved :: MIT License", "Natural Language :: English", "Programming Language :: Rust", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", + # "Programming Language :: Python :: 3.5", + # "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", ], description="Python wrapper around rust-n5.", diff --git a/tox.ini b/tox.ini index 0b25a75..e3a9e16 100644 --- a/tox.ini +++ b/tox.ini @@ -1,11 +1,12 @@ [tox] -envlist = py35, py36, py37 +;envlist = py35, py36, py37 +envlist = py37 [travis] python = 3.7: py37 - 3.6: py36 - 3.5: py35 +; 3.6: py36 +; 3.5: py35 [flake8] max-line-length = 98 From d0d7da5a2ab153374a8cc52af19e16fe9bbf5ddb Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 14:40:42 -0400 Subject: [PATCH 06/28] align editorconfig with flake8 --- .editorconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/.editorconfig b/.editorconfig index d4a2c44..a2008e4 100644 --- a/.editorconfig +++ b/.editorconfig @@ -9,6 +9,7 @@ trim_trailing_whitespace = true insert_final_newline = true charset = utf-8 end_of_line = lf +max_line_length = 98 [*.bat] indent_style = tab From 2c48699dff1ae61598c28b261f9408a8446ccb80 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 14:40:51 -0400 Subject: [PATCH 07/28] fix lint issues --- pyn5/__init__.py | 5 ++++- pyn5/dataset.py | 2 +- pyn5/file_group.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pyn5/__init__.py b/pyn5/__init__.py index ac8d0da..2e608c0 100644 --- a/pyn5/__init__.py +++ b/pyn5/__init__.py @@ -39,5 +39,8 @@ "DatasetINT64", "DatasetFLOAT32", "DatasetFLOAT64", - "File" + "File", + "Group", + "Dataset", + "AttributeManager" ] diff --git a/pyn5/dataset.py b/pyn5/dataset.py index 47817f9..2c9cb46 100644 --- a/pyn5/dataset.py +++ b/pyn5/dataset.py @@ -33,7 +33,7 @@ class Dataset(DatasetBase): - def __init__(self, name: str, parent: "Group"): + def __init__(self, name: str, parent: "Group"): # noqa would need circular imports """ :param name: basename of the dataset diff --git a/pyn5/file_group.py b/pyn5/file_group.py index 28ef958..c2e609c 100644 --- a/pyn5/file_group.py +++ b/pyn5/file_group.py @@ -69,7 +69,7 @@ def _create_child_dataset( "Set chunks=True to guess reasonable chunk sizes." ) chunks = shape - elif chunks == True: + elif chunks is True: chunks = guess_chunks(shape, dtype.itemsize) dpath = self._path / name From d407689aaee32937dac4ba4657862d05f13f418b Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 17:39:38 -0400 Subject: [PATCH 08/28] safer N5 dataset metadata handling --- pyn5/attributes.py | 46 ++++++++++++++++++++++++++++++++++++------- pyn5/dataset.py | 12 ++++++++--- tests/test_h5_like.py | 28 +++++++++++++++++++++++++- 3 files changed, 75 insertions(+), 11 deletions(-) diff --git a/pyn5/attributes.py b/pyn5/attributes.py index 958dc81..c058b56 100644 --- a/pyn5/attributes.py +++ b/pyn5/attributes.py @@ -3,6 +3,8 @@ import json from contextlib import contextmanager from copy import deepcopy +from functools import wraps + from pathlib import Path from typing import Iterator, Any, Dict @@ -22,6 +24,16 @@ def default(self, obj): return super().default(obj) +def restrict_metadata(fn): + """Decorator for AttributeManager methods which prevents mutation of N5 metadata""" + @wraps(fn) + def wrapped(obj: AttributeManager, key, *args, **kwargs): + if obj._is_dataset() and key in obj._dataset_keys: + raise RuntimeError(f"N5 metadata (key '{key}') cannot be mutated") + return fn(obj, key, *args, **kwargs) + return mutation(wrapped) + + class AttributeManager(AttributeManagerBase): """Object which reads and writes group attributes as JSON. @@ -44,6 +56,7 @@ def __init__(self, dpath: Path, mode=Mode.default()): """ self._path = Path(dpath) / "attributes.json" self._dump_kwargs = deepcopy(self._dump_kwargs) + self._has_dataset_keys_ = None super().__init__(mode) @classmethod @@ -56,12 +69,12 @@ def from_parent(cls, parent: H5ObjectLike) -> AttributeManager: """ return cls(parent._path, parent.mode) - @mutation + @restrict_metadata def __setitem__(self, k, v) -> None: with self._open_attributes(True) as attrs: attrs[k] = v - @mutation + @restrict_metadata def __delitem__(self, v) -> None: with self._open_attributes(True) as attrs: del attrs[v] @@ -95,21 +108,40 @@ def __contains__(self, item): with self._open_attributes() as attrs: return item in attrs - def _is_dataset(self): - with self._open_attributes() as attrs: - return self._dataset_keys.issubset(attrs) + def _is_dataset(self) -> bool: + if self._has_dataset_keys_ is None: + try: + with open(self._path, "r") as f: + self._has_dataset_keys_ = self._dataset_keys.issubset(json.load(f)) + except ValueError: + self._has_dataset_keys_ = False + except IOError as e: + if e.errno == errno.ENOENT: + self._has_dataset_keys_ = False + else: + raise + return self._has_dataset_keys_ @contextmanager def _open_attributes(self, write: bool = False) -> Dict[str, Any]: """Return attributes as a context manager. + N5 metadata keys are stripped from the dict. + :param write: Whether to write changes to the attributes dict. - :return: attributes as a dict (including N5 metadata) + :return: attributes as a dict """ attributes = self._read_attributes() + + if self._is_dataset(): + hidden_attrs = {k: attributes.pop(k) for k in self._dataset_keys} + else: + hidden_attrs = dict() + yield attributes if write: - self._write_attributes(attributes) + hidden_attrs.update(attributes) + self._write_attributes(hidden_attrs) def _read_attributes(self): """Return attributes or an empty dict if they do not exist""" diff --git a/pyn5/dataset.py b/pyn5/dataset.py index 2c9cb46..3468489 100644 --- a/pyn5/dataset.py +++ b/pyn5/dataset.py @@ -45,10 +45,16 @@ def __init__(self, name: str, parent: "Group"): # noqa would need circular impo self._path = self.parent._path / name self._attrs = AttributeManager.from_parent(self) - with self._attrs._open_attributes() as attrs: + attrs = self._attrs._read_attributes() + + try: self._shape = tuple(attrs["dimensions"][::-1]) - self._dtype = np.dtype(self.attrs["dataType"].lower()) - self._chunks = tuple(self.attrs["blockSize"][::-1]) + self._dtype = np.dtype(attrs["dataType"].lower()) + self._chunks = tuple(attrs["blockSize"][::-1]) + except KeyError: + raise ValueError( + f"Not a dataset (missing metadata key): " + str(self._path) + ) self._impl = dataset_types[self.dtype]( str(self.file._path), diff --git a/tests/test_h5_like.py b/tests/test_h5_like.py index faf7e1a..894d7f3 100644 --- a/tests/test_h5_like.py +++ b/tests/test_h5_like.py @@ -1,3 +1,6 @@ +import json + +import pytest import shutil import tempfile from copy import deepcopy @@ -23,7 +26,30 @@ class TestGroup(GroupTestBase): class TestDataset(DatasetTestBase): dataset_kwargs = ds_kwargs - pass + + def test_has_metadata(self, file_): + ds = self.dataset(file_) + with open(ds.attrs._path) as f: + attrs = json.load(f) + for key in ds.attrs._dataset_keys: + assert key in attrs + + def test_no_return_metadata(self, file_): + ds = self.dataset(file_) + + for key in ds.attrs._dataset_keys: + assert key not in ds.attrs + assert key not in dict(ds.attrs) + + def test_no_mutate_metadata(self, file_): + ds = self.dataset(file_) + + for key in ds.attrs._dataset_keys: + with pytest.raises(RuntimeError): + ds.attrs[key] = "not a datatype" + + with pytest.raises(RuntimeError): + del ds.attrs[key] class TestMode(ModeTestBase): From 8f656ef057757c0a61571dcb3ff4086a16407b0a Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 19:06:39 -0400 Subject: [PATCH 09/28] dataset maxshape = shape --- pyn5/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyn5/dataset.py b/pyn5/dataset.py index 3468489..38f7124 100644 --- a/pyn5/dataset.py +++ b/pyn5/dataset.py @@ -76,7 +76,7 @@ def dtype(self) -> np.dtype: @property def maxshape(self) -> Tuple[int, ...]: - raise NotImplementedError() + return self.shape @property def fillvalue(self) -> Any: From a1a7a1de253accf162562eb77078598a306caccc Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 19:09:05 -0400 Subject: [PATCH 10/28] make block ordering explicit with tests --- tests/conftest.py | 19 +++++++++++++++++++ tests/test_h5_like.py | 31 +++++++++++++++++++++++++++++++ tests/test_pyn5.py | 30 ++++++++++++++++++++++++++++-- 3 files changed, 78 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 5f41289..4886a51 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,11 @@ import pyn5 +try: + import h5py +except ImportError: + h5py = None + DS_SIZE = (10, 10, 10) BLOCKSIZE = (2, 2, 2) @@ -11,6 +16,11 @@ FLOAT_DTYPES = ["FLOAT32", "FLOAT64"] +@pytest.fixture +def random(): + return np.random.RandomState(1991) + + @pytest.fixture(params=INT_DTYPES + FLOAT_DTYPES) def ds_dtype(request, tmp_path): dtype = request.param @@ -25,3 +35,12 @@ def ds_dtype(request, tmp_path): def file_(tmp_path): f = pyn5.File(tmp_path / "test.n5") yield f + + +@pytest.fixture +def h5_file(tmp_path): + if not h5py: + pytest.skip("h5py not installed") + + with h5py.File(tmp_path / "test.hdf5") as f: + yield f diff --git a/tests/test_h5_like.py b/tests/test_h5_like.py index 894d7f3..803f730 100644 --- a/tests/test_h5_like.py +++ b/tests/test_h5_like.py @@ -1,5 +1,6 @@ import json +import numpy as np import pytest import shutil import tempfile @@ -65,3 +66,33 @@ def teardown_method(self): def factory(self, mode: Mode) -> FileMixin: fpath = self.tmp_dir / "test.n5" return File(fpath, mode) + + +def test_data_ordering(file_, h5_file, random): + data = random.random_sample((11, 12, 13)) + ds_kwargs = {"shape": data.shape, "dtype": data.dtype, "chunks": (5, 5, 5)} + + for f in (file_, h5_file): + n5_ds = f.create_dataset("ds", **ds_kwargs) + n5_ds[:] = data + + assert np.allclose(file_["ds"][:], h5_file["ds"][:]) + + +def test_created_dirs(file_): + shape = (10, 20) + data = np.ones(shape) + + ds = file_.create_dataset("ds", data=data, chunks=(10, 10)) + + created = { + str(path.relative_to(ds._path)) + for path in ds._path.glob('**/*') + if path.suffix != ".json" + } + + assert created == {"0", "1", "0/0", "1/0"} + + attrs = ds.attrs._read_attributes() + + assert list(ds.shape) == attrs["dimensions"][::-1] diff --git a/tests/test_pyn5.py b/tests/test_pyn5.py index f85a32f..e828052 100644 --- a/tests/test_pyn5.py +++ b/tests/test_pyn5.py @@ -2,8 +2,7 @@ # -*- coding: utf-8 -*- """Tests for `pyn5` package.""" - - +import json import unittest from pathlib import Path import shutil @@ -94,6 +93,33 @@ def test_read_write_wrong_dtype(ds_dtype): np.testing.assert_equal(ds.read_ndarray([4, 4, 4], BLOCKSIZE), np.zeros(BLOCKSIZE)) +def test_data_ordering(tmp_path): + root = tmp_path / "test.n5" + + shape = (10, 20) + chunks = (10, 10) + + pyn5.create_dataset(str(root), "ds", shape, chunks, "UINT8") + ds = pyn5.DatasetUINT8(str(root), "ds", False) + arr = np.array(ds.read_ndarray((0, 0), shape)) + arr += 1 + ds.write_ndarray((0, 0), arr, 0) + + ds_path = root / "ds" + created = { + str(path.relative_to(ds_path)) + for path in ds_path.glob('**/*') + if path.suffix != ".json" + } + + assert created == {"0", "0/0", "0/1"} + + with open(ds_path / "attributes.json") as f: + attrs = json.load(f) + + assert list(shape) == attrs["dimensions"] + + class TestPythonReadWrite(unittest.TestCase): def setUp(self): self.root = "test.n5" From 7a26024e6210be0a7e9641e2008ca9d4757571e5 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 19:43:40 -0400 Subject: [PATCH 11/28] add h5py to requirements, for testing --- requirements_dev.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements_dev.txt b/requirements_dev.txt index 8106b62..2393d0a 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -12,3 +12,4 @@ numpy==1.16.4 pytest==4.6.3 pytest-runner==5.1 h5py_like==0.3.0 +h5py==2.9.0 From 65724e12f63b75694de55529ded785363dbd8162 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 19:44:00 -0400 Subject: [PATCH 12/28] add tests to compare with z5 --- tests/common.py | 13 +++++++++++++ tests/conftest.py | 13 +++++++++++++ tests/test_h5_like.py | 34 ++++++++++++++++++++++++---------- tests/test_pyn5.py | 32 ++++++++++++++++++++++++++------ 4 files changed, 76 insertions(+), 16 deletions(-) create mode 100644 tests/common.py diff --git a/tests/common.py b/tests/common.py new file mode 100644 index 0000000..81731d3 --- /dev/null +++ b/tests/common.py @@ -0,0 +1,13 @@ +import json + + +def blocks_in(dpath): + return { + str(path.relative_to(dpath)) + for path in dpath.glob('**/*') + if path.suffix != ".json" + } + + +def attrs_in(dpath): + return json.loads((dpath / "attributes.json").read_text()) diff --git a/tests/conftest.py b/tests/conftest.py index 4886a51..31c7c6f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,6 +8,11 @@ except ImportError: h5py = None +try: + import z5py +except ImportError: + z5py = None + DS_SIZE = (10, 10, 10) BLOCKSIZE = (2, 2, 2) @@ -44,3 +49,11 @@ def h5_file(tmp_path): with h5py.File(tmp_path / "test.hdf5") as f: yield f + + +@pytest.fixture +def z5_file(tmp_path): + if not z5py: + pytest.skip("z5py not installed") + + yield z5py.N5File(tmp_path / "test_z5.n5") diff --git a/tests/test_h5_like.py b/tests/test_h5_like.py index 803f730..7de2224 100644 --- a/tests/test_h5_like.py +++ b/tests/test_h5_like.py @@ -11,6 +11,8 @@ from h5py_like.test_utils import FileTestBase, DatasetTestBase, GroupTestBase, ModeTestBase from pyn5 import File +from .common import blocks_in, attrs_in + ds_kwargs = deepcopy(DatasetTestBase.dataset_kwargs) ds_kwargs["chunks"] = (5, 5, 5) @@ -68,9 +70,10 @@ def factory(self, mode: Mode) -> FileMixin: return File(fpath, mode) -def test_data_ordering(file_, h5_file, random): - data = random.random_sample((11, 12, 13)) - ds_kwargs = {"shape": data.shape, "dtype": data.dtype, "chunks": (5, 5, 5)} +def test_data_ordering(file_, h5_file): + shape = (5, 10, 15) + data = np.arange(np.product(shape)).reshape(shape) + ds_kwargs = {"shape": data.shape, "dtype": data.dtype, "chunks": (4, 4, 4)} for f in (file_, h5_file): n5_ds = f.create_dataset("ds", **ds_kwargs) @@ -85,14 +88,25 @@ def test_created_dirs(file_): ds = file_.create_dataset("ds", data=data, chunks=(10, 10)) - created = { - str(path.relative_to(ds._path)) - for path in ds._path.glob('**/*') - if path.suffix != ".json" - } - - assert created == {"0", "1", "0/0", "1/0"} + assert blocks_in(ds._path) == {"0", "1", "0/0", "1/0"} attrs = ds.attrs._read_attributes() assert list(ds.shape) == attrs["dimensions"][::-1] + + +def test_vs_z5(file_, z5_file): + z5_path = Path(z5_file.path) + shape = (10, 20) + data = np.arange(np.product(shape)).reshape(shape) + + for f in (file_, z5_file): + f.create_dataset("ds", data=data, chunks=(6, 7)) + + assert np.allclose(file_["ds"][:], z5_file["ds"][:]) + assert blocks_in(file_["ds"]._path) == blocks_in(z5_path / "ds") + + attrs = attrs_in(file_["ds"]._path) + z5_attrs = attrs_in(z5_path / "ds") + for key in ("blockSize", "dimensions", "dataType"): + assert attrs[key] == z5_attrs[key] diff --git a/tests/test_pyn5.py b/tests/test_pyn5.py index e828052..38e36f0 100644 --- a/tests/test_pyn5.py +++ b/tests/test_pyn5.py @@ -11,6 +11,7 @@ import pyn5 +from .common import blocks_in, attrs_in from .conftest import BLOCKSIZE @@ -106,13 +107,8 @@ def test_data_ordering(tmp_path): ds.write_ndarray((0, 0), arr, 0) ds_path = root / "ds" - created = { - str(path.relative_to(ds_path)) - for path in ds_path.glob('**/*') - if path.suffix != ".json" - } - assert created == {"0", "0/0", "0/1"} + assert blocks_in(ds_path) == {"0", "0/0", "0/1"} with open(ds_path / "attributes.json") as f: attrs = json.load(f) @@ -120,6 +116,30 @@ def test_data_ordering(tmp_path): assert list(shape) == attrs["dimensions"] +def test_vs_z5(tmp_path, z5_file): + root = tmp_path / "test.n5" + + z5_path = Path(z5_file.path) + shape = (10, 20) + data = np.arange(np.product(shape)).reshape(shape) + chunks = (6, 7) + + pyn5.create_dataset(str(root), "ds", shape, chunks, data.dtype.name.upper()) + ds = pyn5.DatasetINT64(str(root), "ds", False) + ds.write_ndarray((0, 0), data, 0) + + z5_file.create_dataset("ds", data=data, chunks=chunks) + + assert np.allclose(ds.read_ndarray((0, 0), shape), z5_file["ds"][:]) + assert blocks_in(root / "ds") != blocks_in(z5_path / "ds") + + attrs = attrs_in(root / "ds") + z5_attrs = attrs_in(z5_path / "ds") + for key in ("blockSize", "dimensions"): + assert attrs[key] != z5_attrs[key] + assert attrs["dataType"] == z5_attrs["dataType"] + + class TestPythonReadWrite(unittest.TestCase): def setUp(self): self.root = "test.n5" From f746c0d3771ea77e2b856271d638aea953aa22c3 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 20:54:38 -0400 Subject: [PATCH 13/28] use same compression in z5/pyn5 --- tests/test_h5_like.py | 6 +++--- tests/test_pyn5.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/test_h5_like.py b/tests/test_h5_like.py index 7de2224..3162f23 100644 --- a/tests/test_h5_like.py +++ b/tests/test_h5_like.py @@ -100,13 +100,13 @@ def test_vs_z5(file_, z5_file): shape = (10, 20) data = np.arange(np.product(shape)).reshape(shape) - for f in (file_, z5_file): - f.create_dataset("ds", data=data, chunks=(6, 7)) + file_.create_dataset("ds", data=data, chunks=(6, 7)) + z5_file.create_dataset("ds", data=data, chunks=(6, 7), compression="gzip", level=-1) assert np.allclose(file_["ds"][:], z5_file["ds"][:]) assert blocks_in(file_["ds"]._path) == blocks_in(z5_path / "ds") attrs = attrs_in(file_["ds"]._path) z5_attrs = attrs_in(z5_path / "ds") - for key in ("blockSize", "dimensions", "dataType"): + for key in ("blockSize", "dimensions", "dataType", "compression"): assert attrs[key] == z5_attrs[key] diff --git a/tests/test_pyn5.py b/tests/test_pyn5.py index 38e36f0..4e8c95b 100644 --- a/tests/test_pyn5.py +++ b/tests/test_pyn5.py @@ -128,7 +128,7 @@ def test_vs_z5(tmp_path, z5_file): ds = pyn5.DatasetINT64(str(root), "ds", False) ds.write_ndarray((0, 0), data, 0) - z5_file.create_dataset("ds", data=data, chunks=chunks) + z5_file.create_dataset("ds", data=data, chunks=(6, 7), compression="gzip", level=-1) assert np.allclose(ds.read_ndarray((0, 0), shape), z5_file["ds"][:]) assert blocks_in(root / "ds") != blocks_in(z5_path / "ds") @@ -137,7 +137,9 @@ def test_vs_z5(tmp_path, z5_file): z5_attrs = attrs_in(z5_path / "ds") for key in ("blockSize", "dimensions"): assert attrs[key] != z5_attrs[key] - assert attrs["dataType"] == z5_attrs["dataType"] + + for key in ("dataType", "compression"): + assert attrs[key] == z5_attrs[key] class TestPythonReadWrite(unittest.TestCase): From 114b67591c22e30fc8f556e87e5386c227540620 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 20:54:57 -0400 Subject: [PATCH 14/28] test for block hashsum identity --- tests/common.py | 39 +++++++++++++++++++++++++++++++++++++++ tests/test_h5_like.py | 4 +++- tests/test_pyn5.py | 4 +++- 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/tests/common.py b/tests/common.py index 81731d3..e1cf192 100644 --- a/tests/common.py +++ b/tests/common.py @@ -1,4 +1,6 @@ import json +import hashlib +from typing import NamedTuple, Optional, Tuple def blocks_in(dpath): @@ -9,5 +11,42 @@ def blocks_in(dpath): } +def blocks_hash(dpath): + md5 = hashlib.md5() + for fpath in sorted( + fpath for fpath in dpath.glob('**/*') + if fpath.is_file() and fpath.suffix != ".json" + ): + md5.update(str(fpath.relative_to(dpath)).encode()) + md5.update(fpath.read_bytes()) + + return md5.hexdigest() + + def attrs_in(dpath): return json.loads((dpath / "attributes.json").read_text()) + + +class BlockContents(NamedTuple): + mode: int + ndim: int + shape: Tuple[int, ...] + num_elem: Optional[int] + compressed_data: bytes + + @classmethod + def from_block(cls, fpath): + with open(fpath, 'rb') as f: + mode = int.from_bytes(f.read(2), "big", signed=False) + ndim = int.from_bytes(f.read(2), "big", signed=False) + shape = tuple( + int.from_bytes(f.read(4), "big", signed=False) + for _ in range(ndim) + ) + if mode: + num_elem = int.from_bytes(f.read(4), "big", signed=False) + else: + num_elem = None + data = f.read() + + return BlockContents(mode, ndim, shape, num_elem, data) diff --git a/tests/test_h5_like.py b/tests/test_h5_like.py index 3162f23..20a76ed 100644 --- a/tests/test_h5_like.py +++ b/tests/test_h5_like.py @@ -11,7 +11,7 @@ from h5py_like.test_utils import FileTestBase, DatasetTestBase, GroupTestBase, ModeTestBase from pyn5 import File -from .common import blocks_in, attrs_in +from .common import blocks_in, attrs_in, blocks_hash ds_kwargs = deepcopy(DatasetTestBase.dataset_kwargs) ds_kwargs["chunks"] = (5, 5, 5) @@ -110,3 +110,5 @@ def test_vs_z5(file_, z5_file): z5_attrs = attrs_in(z5_path / "ds") for key in ("blockSize", "dimensions", "dataType", "compression"): assert attrs[key] == z5_attrs[key] + + assert blocks_hash(file_._path) == blocks_hash(z5_path) diff --git a/tests/test_pyn5.py b/tests/test_pyn5.py index 4e8c95b..b404ce4 100644 --- a/tests/test_pyn5.py +++ b/tests/test_pyn5.py @@ -11,7 +11,7 @@ import pyn5 -from .common import blocks_in, attrs_in +from .common import blocks_in, attrs_in, blocks_hash from .conftest import BLOCKSIZE @@ -141,6 +141,8 @@ def test_vs_z5(tmp_path, z5_file): for key in ("dataType", "compression"): assert attrs[key] == z5_attrs[key] + assert blocks_hash(root) != blocks_hash(z5_path) + class TestPythonReadWrite(unittest.TestCase): def setUp(self): From 7620ec0b53c3263cea3008055aa2fbecaa08bfd1 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 21:37:44 -0400 Subject: [PATCH 15/28] n5 version handling - ValueError if mismatch in major versions - UserWarning if mismatch in minor versions - Nothing if mismatch in patch versions --- pyn5/file_group.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pyn5/file_group.py b/pyn5/file_group.py index c2e609c..dbf036d 100644 --- a/pyn5/file_group.py +++ b/pyn5/file_group.py @@ -15,6 +15,7 @@ from .pyn5 import create_dataset N5_VERSION = "2.0.2" +N5_VERSION_INFO = tuple(int(i) for i in N5_VERSION.split('.')) class Group(GroupBase): @@ -200,6 +201,15 @@ def _require_dir(self, dpath: Path): attrs["n5"] = N5_VERSION else: version = attrs.get("n5") - if version != N5_VERSION: + if not version: raise ValueError(f"Expected N5 version '{N5_VERSION}', got {version}") + + version_info = tuple(int(i) for i in version.split('.')) + + if version_info[0] != N5_VERSION_INFO[0]: + raise ValueError(f"Expected N5 version '{N5_VERSION}', got {version}") + elif version_info[1] != N5_VERSION_INFO[1]: + warnings.warn(f"Expected N5 version '{N5_VERSION}', got {version};" + f" trying to open anyway") + return created From dcd0492489e4ff9eb5a74df0703caa302536ef9b Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 21:38:54 -0400 Subject: [PATCH 16/28] Cross-check with z5 --- tests/common.py | 14 ++++++++++++-- tests/conftest.py | 5 +---- tests/test_h5_like.py | 10 ++++++++-- tests/test_pyn5.py | 10 ++++++++-- 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/tests/common.py b/tests/common.py index e1cf192..76d312b 100644 --- a/tests/common.py +++ b/tests/common.py @@ -2,6 +2,11 @@ import hashlib from typing import NamedTuple, Optional, Tuple +try: + import z5py +except ImportError: + z5py = None + def blocks_in(dpath): return { @@ -11,12 +16,17 @@ def blocks_in(dpath): } -def blocks_hash(dpath): - md5 = hashlib.md5() +def iter_block_paths(dpath): for fpath in sorted( fpath for fpath in dpath.glob('**/*') if fpath.is_file() and fpath.suffix != ".json" ): + yield fpath + + +def blocks_hash(dpath): + md5 = hashlib.md5() + for fpath in iter_block_paths(dpath): md5.update(str(fpath.relative_to(dpath)).encode()) md5.update(fpath.read_bytes()) diff --git a/tests/conftest.py b/tests/conftest.py index 31c7c6f..e5dd3c5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,10 +8,7 @@ except ImportError: h5py = None -try: - import z5py -except ImportError: - z5py = None +from .common import z5py DS_SIZE = (10, 10, 10) BLOCKSIZE = (2, 2, 2) diff --git a/tests/test_h5_like.py b/tests/test_h5_like.py index 20a76ed..16c612a 100644 --- a/tests/test_h5_like.py +++ b/tests/test_h5_like.py @@ -7,11 +7,13 @@ from copy import deepcopy from pathlib import Path +import pyn5 from h5py_like import Mode, FileMixin from h5py_like.test_utils import FileTestBase, DatasetTestBase, GroupTestBase, ModeTestBase from pyn5 import File -from .common import blocks_in, attrs_in, blocks_hash +from .common import z5py +from .common import blocks_in, attrs_in ds_kwargs = deepcopy(DatasetTestBase.dataset_kwargs) ds_kwargs["chunks"] = (5, 5, 5) @@ -111,4 +113,8 @@ def test_vs_z5(file_, z5_file): for key in ("blockSize", "dimensions", "dataType", "compression"): assert attrs[key] == z5_attrs[key] - assert blocks_hash(file_._path) == blocks_hash(z5_path) + data2 = pyn5.File(z5_path)["ds"][:] + data3 = z5py.N5File(file_._path)["ds"][:] + + assert np.array_equal(data, data2) + assert np.array_equal(data2, data3) diff --git a/tests/test_pyn5.py b/tests/test_pyn5.py index b404ce4..140ba65 100644 --- a/tests/test_pyn5.py +++ b/tests/test_pyn5.py @@ -11,7 +11,7 @@ import pyn5 -from .common import blocks_in, attrs_in, blocks_hash +from .common import blocks_in, attrs_in, blocks_hash, z5py from .conftest import BLOCKSIZE @@ -141,7 +141,13 @@ def test_vs_z5(tmp_path, z5_file): for key in ("dataType", "compression"): assert attrs[key] == z5_attrs[key] - assert blocks_hash(root) != blocks_hash(z5_path) + data2 = pyn5.DatasetINT64(str(z5_path), "ds", False).read_ndarray((0, 0), shape) + data3 = z5py.N5File(root)["ds"][:] + + assert not all([ + np.array_equal(data, data2), + np.array_equal(data, data3) + ]) class TestPythonReadWrite(unittest.TestCase): From 31e25837f5b4f1b58d3a7b380b81bff589e96dd2 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Tue, 16 Jul 2019 11:37:21 -0400 Subject: [PATCH 17/28] revert some travis changes --- .travis.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5bc2256..afd2cce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ language: python -dist: xenial python: - - 3.7 + - 3.5 stages: - test @@ -42,7 +41,9 @@ jobs: # - <<: *test_template # python: 3.6 - <<: *test_template + dist: xenial python: 3.7 + sudo: required # - <<: *deploy_template # env: # - CIBW_BUILD=cp35-* From 951049ae3a88bc8c6e4c7656ca7977c4ecae5432 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Tue, 16 Jul 2019 16:32:07 -0400 Subject: [PATCH 18/28] Add configurable compression --- Cargo.lock | 243 +++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 3 +- src/lib.rs | 15 ++- tests/conftest.py | 11 ++ tests/test_pyn5.py | 39 ++++++++ 5 files changed, 309 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 44a3024..8ddda83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,16 +13,76 @@ dependencies = [ "memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "backtrace" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "backtrace-sys 0.1.30 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "backtrace-sys" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "bitflags" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "build_const" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "bytecount" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "byteorder" version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "bzip2" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bzip2-sys 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cargo_metadata" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "error-chain 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", + "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "cc" version = "1.0.25" @@ -55,6 +115,15 @@ name = "either" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "error-chain" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "backtrace 0.3.32 (registry+https://github.com/rust-lang/crates.io-index)", + "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "flate2" version = "1.0.4" @@ -73,6 +142,11 @@ dependencies = [ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "ghost" version = "0.1.0" @@ -83,6 +157,11 @@ dependencies = [ "syn 0.15.35 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "glob" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "inventory" version = "0.1.3" @@ -134,6 +213,35 @@ name = "libc" version = "0.2.58" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "lz4" +version = "1.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)", + "lz4-sys 1.8.3 (registry+https://github.com/rust-lang/crates.io-index)", + "skeptic 0.13.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "lz4-sys" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "lzma-sys" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)", + "pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "mashup" version = "0.1.9" @@ -190,10 +298,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "bzip2 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "flate2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", "fs2 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lz4 1.23.1 (registry+https://github.com/rust-lang/crates.io-index)", "ndarray 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.7 (registry+https://github.com/rust-lang/crates.io-index)", @@ -201,6 +311,7 @@ dependencies = [ "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", "smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)", "walkdir 2.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "xz2 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -240,6 +351,11 @@ dependencies = [ "pyo3 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "pkg-config" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "proc-macro-hack" version = "0.4.1" @@ -261,6 +377,14 @@ dependencies = [ "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "pulldown-cmark" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "pyn5" version = "0.1.0" @@ -268,6 +392,7 @@ dependencies = [ "n5 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "numpy 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", "pyo3 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -314,11 +439,44 @@ dependencies = [ "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_core" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "rawpointer" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "regex" version = "1.1.7" @@ -339,6 +497,19 @@ dependencies = [ "ucd-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "remove_dir_all" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "ryu" version = "0.2.6" @@ -352,6 +523,20 @@ dependencies = [ "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "serde" version = "1.0.80" @@ -380,6 +565,21 @@ dependencies = [ "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "skeptic" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bytecount 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cargo_metadata 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "error-chain 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", + "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "pulldown-cmark 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", + "tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.2.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "smallvec" version = "0.6.9" @@ -403,6 +603,15 @@ dependencies = [ "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "tempdir" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "thread_local" version = "0.3.6" @@ -468,19 +677,37 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "xz2" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lzma-sys 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + [metadata] "checksum adler32 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7e522997b529f05601e05166c07ed17789691f562762c7f3b987263d2dedee5c" "checksum aho-corasick 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e6f484ae0c99fec2e858eb6134949117399f222608d84cadb3f58c1f97c2364c" +"checksum backtrace 0.3.32 (registry+https://github.com/rust-lang/crates.io-index)" = "18b50f5258d1a9ad8396d2d345827875de4261b158124d4c819d9b351454fae5" +"checksum backtrace-sys 0.1.30 (registry+https://github.com/rust-lang/crates.io-index)" = "5b3a000b9c543553af61bc01cbfc403b04b5caa9e421033866f2e98061eb3e61" +"checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd" "checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39" +"checksum bytecount 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b92204551573580e078dc80017f36a213eb77a0450e4ddd8cfa0f3f2d1f0178f" "checksum byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "94f88df23a25417badc922ab0f5716cc1330e87f71ddd9203b3a3ccd9cedf75d" +"checksum bzip2 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "42b7c3cbf0fa9c1b82308d57191728ca0256cb821220f4e2fd410a72ade26e3b" +"checksum bzip2-sys 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "6584aa36f5ad4c9247f5323b0a42f37802b37a836f0ad87084d7a33961abe25f" +"checksum cargo_metadata 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "e5d1b4d380e1bab994591a24c2bdd1b054f64b60bef483a8c598c7c345bc3bbe" "checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16" "checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" "checksum crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" "checksum ctor 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9a43db2bba5cafdc6aa068c892a518e477ee0df3705e53ec70247a9ff93546d5" "checksum either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3be565ca5c557d7f59e7cfcf1844f9e3033650c929c6566f511e8005f205c1d0" +"checksum error-chain 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3ab49e9dcb602294bc42f9a7dfc9bc6e936fca4418ea300dbfb84fe16de0b7d9" "checksum flate2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3b0c7353385f92079524de3b7116cf99d73947c08a7472774e9b3b04bff3b901" "checksum fs2 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" "checksum ghost 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5297b71943dc9fea26a3241b178c140ee215798b7f79f7773fd61683e25bca74" +"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb" "checksum inventory 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "21df85981fe094480bc2267723d3dc0fd1ae0d1f136affc659b7398be615d922" "checksum inventory-impl 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8a877ae8bce77402d5e9ed870730939e097aad827b2a932b361958fa9d6e75aa" "checksum itertools 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)" = "f58856976b776fedd95533137617a02fb25719f40e7d9b01c7043cd65474f450" @@ -488,6 +715,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" "checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" "checksum libc 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)" = "6281b86796ba5e4366000be6e9e18bf35580adf9e63fbe2294aadb587613a319" +"checksum lz4 1.23.1 (registry+https://github.com/rust-lang/crates.io-index)" = "43c94a9f09a60017f373020cc93d4291db4cd92b0db64ff25927f27d09dc23d5" +"checksum lz4-sys 1.8.3 (registry+https://github.com/rust-lang/crates.io-index)" = "20ab022822e9331c58d373acdd6b98085bace058ac6837b8266f213a2fccdafe" +"checksum lzma-sys 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "16b5c59c57cc4d39e7999f50431aa312ea78af7c93b23fbb0c3567bd672e7f35" "checksum mashup 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f2d82b34c7fb11bb41719465c060589e291d505ca4735ea30016a91f6fc79c3b" "checksum mashup-impl 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "aa607bfb674b4efb310512527d64266b065de3f894fc52f84efcbf7eaa5965fb" "checksum matrixmultiply 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "dcad67dcec2d58ff56f6292582377e6921afdf3bfbd533e26fb8900ae575e002" @@ -499,24 +729,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum num-complex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "107b9be86cd2481930688277b675b0114578227f034674726605b8a482d8baf8" "checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1" "checksum numpy 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b8aee6953fb9165b93853f82033ec9ab6ce23129eb864c4f8a709a86a242b075" +"checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c" "checksum proc-macro-hack 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2c725b36c99df7af7bf9324e9c999b9e37d92c8f8caf106d82e1d7953218d2d8" "checksum proc-macro-hack-impl 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2b753ad9ed99dd8efeaa7d2fb8453c8f6bc3e54b97966d35f1bc77ca6865254a" "checksum proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)" = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" +"checksum pulldown-cmark 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "eef52fac62d0ea7b9b4dc7da092aa64ea7ec3d90af6679422d3d7e0e14b6ee15" "checksum pyo3 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d09e6e2d3fa5ae1a8af694f865e03e763e730768b16e3097851ff0b7f2276086" "checksum pyo3-derive-backend 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d9d7ae8ab3017515cd7c82d88ce49b55e12a56c602dc69993e123da45c91b186" "checksum pyo3cls 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c494f8161f5b73096cc50f00fbb90fe670f476cde5e59c1decff39b546d54f40" "checksum quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "faf4799c5d274f3868a4aae320a0a182cbd2baee377b378f080e16a23e9d80db" +"checksum rand 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +"checksum rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +"checksum rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0e7a549d590831370895ab7ba4ea0c1b6b011d106b5ff2da6eee112615e6dc0" "checksum rawpointer 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ebac11a9d2e11f2af219b8b8d833b76b1ea0e054aa0e8d8e9e4cbde353bdf019" +"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" "checksum regex 1.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "0b2f0808e7d7e4fb1cb07feb6ff2f4bc827938f24f8c2e6a3beb7370af544bdd" "checksum regex-syntax 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9d76410686f9e3a17f06128962e0ecc5755870bb890c34820c7af7f1db2e1d48" +"checksum remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" +"checksum rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f4dccf6f4891ebcc0c39f9b6eb1a83b9bf5d747cb439ec6fba4f3b977038af" "checksum ryu 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7153dd96dade874ab973e098cb62fcdbb89a03682e46b144fd09550998d4a4a7" "checksum same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8f20c4be53a8a1ff4c1f1b2bd14570d2f634628709752f0702ecdd2b3f9a5267" +"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" "checksum serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef" "checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c" "checksum serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)" = "5a23aa71d4a4d43fdbfaac00eff68ba8a06a51759a89ac3304323e800c4dd40d" +"checksum skeptic 0.13.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6fb8ed853fdc19ce09752d63f3a2e5b5158aeb261520cd75eb618bd60305165" "checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be" "checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55" "checksum syn 0.15.35 (registry+https://github.com/rust-lang/crates.io-index)" = "641e117d55514d6d918490e47102f7e08d096fdde360247e4a10f7a91a8478d3" +"checksum tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" "checksum ucd-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d0f8bfa9ff0cadcd210129ad9d2c5f145c13e9ced3d3e5d948a6213487d52444" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" @@ -527,3 +769,4 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "afc5508759c5bf4285e61feb862b6083c8480aec864fa17a81fdec6f69b461ab" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +"checksum xz2 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c179869f34fc7c01830d3ce7ea2086bc3a07e0d35289b667d0a8bf910258926c" diff --git a/Cargo.toml b/Cargo.toml index 9ee6be8..ea772f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,5 +9,6 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.7", features = ["extension-module"] } -n5 = { version = "0.4", default-features = false, features = ["filesystem", "gzip", "use_ndarray"]} +n5 = { version = "0.4", default-features = false, features = ["filesystem", "bzip", "gzip", "lz", "xz", "use_ndarray"]} numpy = "0.6" +serde_json = "1.0.39" diff --git a/src/lib.rs b/src/lib.rs index c815a4c..193e455 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,6 +19,7 @@ fn create_dataset( dimensions: Vec, block_size: Vec, dtype: &str, + compression: Option<&str>, ) -> PyResult<()> { let dtype = match dtype { "UINT8" => DataType::UINT8, @@ -45,11 +46,23 @@ fn create_dataset( let n = N5Filesystem::open_or_create(root_path)?; if !n.exists(path_name) { + let compression_type: CompressionType = match compression { + None => CompressionType::new::(), + Some(s) => { + match serde_json::from_str(s) { + Ok(c) => c, + Err(_e) => return Err( + exceptions::ValueError::py_err("Could not deserialize compression") + ) + } + } + }; + let data_attrs = DatasetAttributes::new( dimensions.into(), block_size.into(), dtype, - CompressionType::new::(), + compression_type, ); n.create_dataset(path_name, &data_attrs)?; Ok(()) diff --git a/tests/conftest.py b/tests/conftest.py index 16bbfe6..ff96fd0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,3 +19,14 @@ def ds_dtype(request, tmp_path): pyn5.create_dataset(n5_path, ds_name, DS_SIZE, BLOCKSIZE, dtype) yield pyn5.open(n5_path, ds_name, dtype, False), np.dtype(dtype.lower()) + + +@pytest.fixture(params=[ + {"type": "raw"}, + {"type": "bzip2", "blockSize": 5}, + {"type": "gzip", "level": 5}, + {"type": "lz4", "blockSize": 32768}, + {"type": "xz", "preset": 3}, +], ids=lambda d: d.get("type", "raw")) +def compression_dict(request): + yield request.param diff --git a/tests/test_pyn5.py b/tests/test_pyn5.py index f85a32f..3ccc81d 100644 --- a/tests/test_pyn5.py +++ b/tests/test_pyn5.py @@ -7,6 +7,8 @@ import unittest from pathlib import Path import shutil +import json + import numpy as np import pytest @@ -94,6 +96,43 @@ def test_read_write_wrong_dtype(ds_dtype): np.testing.assert_equal(ds.read_ndarray([4, 4, 4], BLOCKSIZE), np.zeros(BLOCKSIZE)) +def test_compression(tmp_path, compression_dict): + root = tmp_path / "test.n5" + + data = np.arange(100, dtype=np.uint8).reshape((10, 10)) + + pyn5.create_dataset( + str(root), "ds", data.shape, (5, 5), data.dtype.name.upper(), + json.dumps(compression_dict) + ) + + with open(root / "ds" / "attributes.json") as f: + attrs = json.load(f) + + assert attrs["compression"] == compression_dict + + ds = pyn5.DatasetUINT8(str(root), "ds", True) + ds.write_ndarray((0, 0), data, 0) + + +def test_default_compression(tmp_path): + root = tmp_path / "test.n5" + + data = np.arange(100, dtype=np.uint8).reshape((10, 10)) + + pyn5.create_dataset( + str(root), "ds", data.shape, (5, 5), data.dtype.name.upper(), + ) + + with open(root / "ds" / "attributes.json") as f: + attrs = json.load(f) + + assert attrs["compression"] == {"type": "gzip", "level": -1} + + ds = pyn5.DatasetUINT8(str(root), "ds", True) + ds.write_ndarray((0, 0), data, 0) + + class TestPythonReadWrite(unittest.TestCase): def setUp(self): self.root = "test.n5" From d0670df978e98dccd8f54482c58d2fb1e9d794da Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Wed, 17 Jul 2019 10:39:26 -0400 Subject: [PATCH 19/28] h5like compression handling --- pyn5/file_group.py | 33 ++++++++++++++++++++++++++++++++- tests/conftest.py | 11 ++++++++++- tests/test_h5_like.py | 16 ++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/pyn5/file_group.py b/pyn5/file_group.py index dbf036d..d9b968b 100644 --- a/pyn5/file_group.py +++ b/pyn5/file_group.py @@ -1,4 +1,7 @@ from __future__ import annotations + +import json + import shutil import warnings from pathlib import Path @@ -18,6 +21,15 @@ N5_VERSION_INFO = tuple(int(i) for i in N5_VERSION.split('.')) +compression_args = { + "raw": None, + "bzip2": "blockSize", + "gzip": "level", + "lz4": "blockSize", + "xz": "preset", +} + + class Group(GroupBase): def __init__(self, name: str, parent: Group): """ @@ -49,7 +61,8 @@ def _create_child_group(self, name) -> GroupBase: return Group(name, self) def _create_child_dataset( - self, name, shape=None, dtype=None, data=None, chunks=None, **kwds + self, name, shape=None, dtype=None, data=None, chunks=None, + compression=None, compression_opts=None, **kwds ): for key in kwds: warnings.warn( @@ -85,6 +98,23 @@ def _create_child_dataset( elif isinstance(obj, Group): raise FileExistsError(f"Group found at {dpath}") + if compression: + try: + opt_name = compression_args[compression] + except KeyError: + raise ValueError( + f"Unknown compression type '{compression}': " + f"use one of {sorted(compression_args)}" + ) + + compression_dict = {"type": compression} + if compression_opts is not None: + compression_dict[opt_name] = compression_opts + + compression_str = json.dumps(compression_dict) + else: + compression_str = None + file_path = str(self.file.filename) create_dataset( file_path, @@ -92,6 +122,7 @@ def _create_child_dataset( list(shape)[::-1], list(chunks)[::-1], dtype.name.upper(), + compression_str, ) ds = Dataset(name, self) diff --git a/tests/conftest.py b/tests/conftest.py index 72d3f5c..5d3577d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,5 @@ +from copy import deepcopy + import pytest import numpy as np @@ -41,7 +43,14 @@ def ds_dtype(request, tmp_path): {"type": "xz", "preset": 3}, ], ids=lambda d: d.get("type", "raw")) def compression_dict(request): - yield request.param + yield deepcopy(request.param) + + +@pytest.fixture +def compression_name_opt(compression_dict): + name = compression_dict.pop("type") + arg = list(compression_dict.values()).pop() if compression_dict else None + yield name, arg @pytest.fixture diff --git a/tests/test_h5_like.py b/tests/test_h5_like.py index 16c612a..6af0813 100644 --- a/tests/test_h5_like.py +++ b/tests/test_h5_like.py @@ -118,3 +118,19 @@ def test_vs_z5(file_, z5_file): assert np.array_equal(data, data2) assert np.array_equal(data2, data3) + + +def test_compression_opts(file_, compression_name_opt): + compression, opt = compression_name_opt + + shape = (10, 20) + data = np.arange(np.product(shape)).reshape(shape) + + ds = file_.create_dataset( + "ds", data=data, chunks=(10, 10), compression=compression, compression_opts=opt + ) + + compression_dict = ds.attrs._read_attributes()["compression"] + assert compression_dict.pop("type") == compression + if opt is not None: + assert list(compression_dict.values()).pop() == opt From a67d9f5685db832d09faae371a2ca95be3f380ae Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Wed, 17 Jul 2019 11:41:36 -0400 Subject: [PATCH 20/28] test for block hash identity with z5 --- tests/test_h5_like.py | 17 ++++++++++++----- tests/test_pyn5.py | 23 ++++++++++++++++++++++- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/tests/test_h5_like.py b/tests/test_h5_like.py index 6af0813..4584d4d 100644 --- a/tests/test_h5_like.py +++ b/tests/test_h5_like.py @@ -12,7 +12,7 @@ from h5py_like.test_utils import FileTestBase, DatasetTestBase, GroupTestBase, ModeTestBase from pyn5 import File -from .common import z5py +from .common import z5py, blocks_hash from .common import blocks_in, attrs_in ds_kwargs = deepcopy(DatasetTestBase.dataset_kwargs) @@ -98,6 +98,7 @@ def test_created_dirs(file_): def test_vs_z5(file_, z5_file): + """Check same as z5""" z5_path = Path(z5_file.path) shape = (10, 20) data = np.arange(np.product(shape)).reshape(shape) @@ -113,11 +114,17 @@ def test_vs_z5(file_, z5_file): for key in ("blockSize", "dimensions", "dataType", "compression"): assert attrs[key] == z5_attrs[key] - data2 = pyn5.File(z5_path)["ds"][:] - data3 = z5py.N5File(file_._path)["ds"][:] - assert np.array_equal(data, data2) - assert np.array_equal(data2, data3) +def test_vs_z5_hash(file_, z5_file): + """Check identical block contents to z5""" + z5_path = Path(z5_file.path) + shape = (10, 20) + data = np.arange(np.product(shape)).reshape(shape) + + file_.create_dataset("ds", data=data, chunks=(6, 7), compression="raw") + z5_file.create_dataset("ds", data=data, chunks=(6, 7), compression="raw") + + assert blocks_hash(file_._path) == blocks_hash(z5_path) def test_compression_opts(file_, compression_name_opt): diff --git a/tests/test_pyn5.py b/tests/test_pyn5.py index 57d577f..2fff681 100644 --- a/tests/test_pyn5.py +++ b/tests/test_pyn5.py @@ -12,7 +12,7 @@ import pyn5 -from .common import blocks_in, attrs_in, z5py +from .common import blocks_in, attrs_in, z5py, blocks_hash from .conftest import BLOCKSIZE @@ -155,6 +155,7 @@ def test_data_ordering(tmp_path): def test_vs_z5(tmp_path, z5_file): + """Check different dimensions, same dtype/compression as z5""" root = tmp_path / "test.n5" z5_path = Path(z5_file.path) @@ -188,6 +189,26 @@ def test_vs_z5(tmp_path, z5_file): ]) +def test_vs_z5_hash(tmp_path, z5_file): + """Check different block hashes to z5""" + root = tmp_path / "test.n5" + + z5_path = Path(z5_file.path) + shape = (10, 20) + data = np.arange(np.product(shape)).reshape(shape) + chunks = (6, 7) + + pyn5.create_dataset( + str(root), "ds", shape, chunks, data.dtype.name.upper(), json.dumps({"type": "raw"}) + ) + ds = pyn5.DatasetINT64(str(root), "ds", False) + ds.write_ndarray((0, 0), data, 0) + + z5_file.create_dataset("ds", data=data, chunks=(6, 7), compression="raw") + + assert blocks_hash(root) != blocks_hash(z5_path) + + class TestPythonReadWrite(unittest.TestCase): def setUp(self): self.root = "test.n5" From b687427da75cec7d2b5682dd5e39a3e887ed11a5 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Wed, 17 Jul 2019 11:55:08 -0400 Subject: [PATCH 21/28] fix lint issues --- tests/test_h5_like.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_h5_like.py b/tests/test_h5_like.py index 4584d4d..d5ca19b 100644 --- a/tests/test_h5_like.py +++ b/tests/test_h5_like.py @@ -7,12 +7,11 @@ from copy import deepcopy from pathlib import Path -import pyn5 from h5py_like import Mode, FileMixin from h5py_like.test_utils import FileTestBase, DatasetTestBase, GroupTestBase, ModeTestBase from pyn5 import File -from .common import z5py, blocks_hash +from .common import blocks_hash from .common import blocks_in, attrs_in ds_kwargs = deepcopy(DatasetTestBase.dataset_kwargs) From f29b01238262a2f0eaee7a01ce94fd2992d17ad6 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Wed, 17 Jul 2019 11:57:23 -0400 Subject: [PATCH 22/28] python 3.6 compatibility --- .travis.yml | 16 +++++----------- pyn5/attributes.py | 5 ++--- pyn5/file_group.py | 4 +--- requirements_dev.txt | 2 +- setup.py | 2 +- tox.ini | 3 +-- 6 files changed, 11 insertions(+), 21 deletions(-) diff --git a/.travis.yml b/.travis.yml index 409c6b6..f181f6d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,20 +36,15 @@ deploy_template: &deploy_template jobs: include: -# - <<: *test_template -# python: 3.5 -# - <<: *test_template -# python: 3.6 + - <<: *test_template + python: 3.6 - <<: *test_template dist: xenial python: 3.7 sudo: required -# - <<: *deploy_template -# env: -# - CIBW_BUILD=cp35-* -# - <<: *deploy_template -# env: -# - CIBW_BUILD=cp36-* + - <<: *deploy_template + env: + - CIBW_BUILD=cp36-* - <<: *deploy_template env: - CIBW_BUILD=cp37-* @@ -62,4 +57,3 @@ jobs: secure: VeZNGpUs5ne3ZlCeZLPbT+3O6yRDM9sv8emg6m90ZwLuZAlXV0t2dGHKccMTBMbS2jWSy2q4TY2IkN0SOWrOmi53Klt3K5Y461Ra8dT+XdmXK8g+36HJEJKWfFvVpYVuIw72yoUWHZsr2iNxi7tiAc/AjBvbnbgSXuAayuVm+8K7tQ85kkfbdBErDQnhziEiqtIrjak3hwBgjWpm0UEuAKG/eTBFk0BAN9wqRajCS58WLaLVnF4FtAHT4QAxo33j99njB7cz8PLjXNd3BxT2BpMbjqmg8krVW7ayMJKdLvWdICezPB4nlsnL9jBlXMaRvI0ijSl59QkVCDbkrERUCR7IdJZqAX3IFSe+9X1cwzJsJeXYOfQjYMX+ZyqR8qcmQKS6M1u3uYMXhoj+TU9uO0sK4dNxrS0DRhg22TdjAcpqnz0UDVVWfFapltroE0ePVPs8aOOqdpJewRDPDI0ghRg/nzrSIhEI+85XnSTcjm4if5hwiEFchIFlV5d/ZIHtPn+b0fCRTDMq4kjObxD9uBbVvda1+CESCNrE91oB1erlrjygsDnpdRWi5dzaOVe5DJmAxT/V5mPFlskOPJLZr3lzZOQm1FGVJbvTN766plDCWozTAG8wT58hq/nJTBIiRjHlQnhFjlHLkaWTEq/jJ8mA++KXvbofCy833V0OMrw= provider: pypi user: pattonw - diff --git a/pyn5/attributes.py b/pyn5/attributes.py index c058b56..d15d223 100644 --- a/pyn5/attributes.py +++ b/pyn5/attributes.py @@ -1,4 +1,3 @@ -from __future__ import annotations import errno import json from contextlib import contextmanager @@ -27,7 +26,7 @@ def default(self, obj): def restrict_metadata(fn): """Decorator for AttributeManager methods which prevents mutation of N5 metadata""" @wraps(fn) - def wrapped(obj: AttributeManager, key, *args, **kwargs): + def wrapped(obj: "AttributeManager", key, *args, **kwargs): if obj._is_dataset() and key in obj._dataset_keys: raise RuntimeError(f"N5 metadata (key '{key}') cannot be mutated") return fn(obj, key, *args, **kwargs) @@ -60,7 +59,7 @@ def __init__(self, dpath: Path, mode=Mode.default()): super().__init__(mode) @classmethod - def from_parent(cls, parent: H5ObjectLike) -> AttributeManager: + def from_parent(cls, parent: H5ObjectLike) -> "AttributeManager": """ Create AttributeManager for a File, Group or Dataset. diff --git a/pyn5/file_group.py b/pyn5/file_group.py index d9b968b..1e8abdd 100644 --- a/pyn5/file_group.py +++ b/pyn5/file_group.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import json import shutil @@ -31,7 +29,7 @@ class Group(GroupBase): - def __init__(self, name: str, parent: Group): + def __init__(self, name: str, parent: "Group"): """ :param name: basename of the group diff --git a/requirements_dev.txt b/requirements_dev.txt index 2393d0a..114b51e 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -11,5 +11,5 @@ setuptools_rust==0.10.6 numpy==1.16.4 pytest==4.6.3 pytest-runner==5.1 -h5py_like==0.3.0 +h5py_like==0.4.0 h5py==2.9.0 diff --git a/setup.py b/setup.py index 48117a2..bc4938f 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ with open("HISTORY.rst") as history_file: history = history_file.read() -requirements = ["numpy", "h5py_like>=0.3.0"] +requirements = ["numpy", "h5py_like>=0.4.0"] setup_requirements = [] test_requirements = [] diff --git a/tox.ini b/tox.ini index e3a9e16..b99daca 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,5 @@ [tox] -;envlist = py35, py36, py37 -envlist = py37 +envlist = py36, py37 [travis] python = From 135ead97ab1e03ccbe6a4459e4da90a9ba9e1a8d Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Wed, 17 Jul 2019 13:29:03 -0400 Subject: [PATCH 23/28] enum for supported compression types --- pyn5/__init__.py | 7 ++++++- pyn5/common.py | 40 ++++++++++++++++++++++++++++++++++++++++ pyn5/file_group.py | 15 ++++----------- 3 files changed, 50 insertions(+), 12 deletions(-) create mode 100644 pyn5/common.py diff --git a/pyn5/__init__.py b/pyn5/__init__.py index 2e608c0..27f9a7b 100644 --- a/pyn5/__init__.py +++ b/pyn5/__init__.py @@ -6,6 +6,8 @@ __email__ = "pattonw@hhmi.org" __version__ = "0.1.0" +from h5py_like import Mode + from .python_wrappers import open, read, write from .pyn5 import ( DatasetUINT8, @@ -23,6 +25,7 @@ from .attributes import AttributeManager from .dataset import Dataset from .file_group import File, Group +from .common import CompressionType __all__ = [ "open", @@ -42,5 +45,7 @@ "File", "Group", "Dataset", - "AttributeManager" + "AttributeManager", + "CompressionType", + "Mode", ] diff --git a/pyn5/common.py b/pyn5/common.py new file mode 100644 index 0000000..958e929 --- /dev/null +++ b/pyn5/common.py @@ -0,0 +1,40 @@ +import enum + + +class StrEnum(str, enum.Enum): + def __new__(cls, *args): + for arg in args: + if not isinstance(arg, (str, enum.auto)): + raise TypeError( + "Values of StrEnums must be strings: {} is a {}".format( + repr(arg), type(arg) + ) + ) + return super().__new__(cls, *args) + + def __str__(self): + return self.value + + # pylint: disable=no-self-argument + # The first argument to this function is documented to be the name of the + # enum member, not `self`: + # https://docs.python.org/3.6/library/enum.html#using-automatic-values + def _generate_next_value_(name, *_): + return name + + +class CompressionType(StrEnum): + RAW = "raw" + BZIP2 = "bzip2" + GZIP = "gzip" + LZ4 = "lz4" + XZ = "xz" + + +compression_args = { + CompressionType.RAW: None, + CompressionType.BZIP2: "blockSize", + CompressionType.GZIP: "level", + CompressionType.LZ4: "blockSize", + CompressionType.XZ: "preset", +} diff --git a/pyn5/file_group.py b/pyn5/file_group.py index 1e8abdd..629ab71 100644 --- a/pyn5/file_group.py +++ b/pyn5/file_group.py @@ -11,23 +11,16 @@ from h5py_like.common import Name from h5py_like.base import H5ObjectLike from h5py_like.shape_utils import guess_chunks + from pyn5 import Dataset -from pyn5.attributes import AttributeManager +from .common import compression_args +from .attributes import AttributeManager from .pyn5 import create_dataset N5_VERSION = "2.0.2" N5_VERSION_INFO = tuple(int(i) for i in N5_VERSION.split('.')) -compression_args = { - "raw": None, - "bzip2": "blockSize", - "gzip": "level", - "lz4": "blockSize", - "xz": "preset", -} - - class Group(GroupBase): def __init__(self, name: str, parent: "Group"): """ @@ -105,7 +98,7 @@ def _create_child_dataset( f"use one of {sorted(compression_args)}" ) - compression_dict = {"type": compression} + compression_dict = {"type": str(compression)} if compression_opts is not None: compression_dict[opt_name] = compression_opts From 686b7a5c843abcf6d621b678885f6e1a2711ad9b Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Wed, 17 Jul 2019 13:34:11 -0400 Subject: [PATCH 24/28] test for non-default compression, default option --- tests/test_pyn5.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/test_pyn5.py b/tests/test_pyn5.py index 2fff681..173a559 100644 --- a/tests/test_pyn5.py +++ b/tests/test_pyn5.py @@ -132,6 +132,26 @@ def test_default_compression(tmp_path): ds.write_ndarray((0, 0), data, 0) +def test_default_compression_opts(tmp_path, compression_name_opt): + name, _ = compression_name_opt + root = tmp_path / "test.n5" + + data = np.arange(100, dtype=np.uint8).reshape((10, 10)) + + pyn5.create_dataset( + str(root), "ds", data.shape, (5, 5), data.dtype.name.upper(), + json.dumps({"type": name}) + ) + + with open(root / "ds" / "attributes.json") as f: + attrs = json.load(f) + + assert attrs["compression"]["type"] == name + + ds = pyn5.DatasetUINT8(str(root), "ds", True) + ds.write_ndarray((0, 0), data, 0) + + def test_data_ordering(tmp_path): root = tmp_path / "test.n5" From edd328e70cdcfaff9e943c5ae76486cab663710f Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Wed, 17 Jul 2019 14:16:24 -0400 Subject: [PATCH 25/28] better error when no n5 version found --- pyn5/file_group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyn5/file_group.py b/pyn5/file_group.py index 629ab71..6ebe1f0 100644 --- a/pyn5/file_group.py +++ b/pyn5/file_group.py @@ -224,7 +224,7 @@ def _require_dir(self, dpath: Path): else: version = attrs.get("n5") if not version: - raise ValueError(f"Expected N5 version '{N5_VERSION}', got {version}") + raise ValueError(f"No N5 version found in {attrs._path}") version_info = tuple(int(i) for i in version.split('.')) From 5c4d5dbc578460d1e5dfceea6ff9bbd28ea6cf98 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Wed, 17 Jul 2019 14:16:44 -0400 Subject: [PATCH 26/28] documentation --- README.rst | 36 +++++++++++++++++++++++++++++++++--- docs/usage.rst | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index e7f907f..8daecf9 100644 --- a/README.rst +++ b/README.rst @@ -6,6 +6,9 @@ pyn5 .. image:: https://img.shields.io/pypi/v/pyn5.svg :target: https://pypi.python.org/pypi/pyn5 +.. image:: https://img.shields.io/pypi/pyversions/pyn5.svg + :target: https://pypi.python.org/pypi/pyn5 + .. image:: https://travis-ci.org/pattonw/rust-pyn5.svg?branch=master :target: https://travis-ci.org/pattonw/rust-pyn5 @@ -14,19 +17,38 @@ pyn5 :alt: Documentation Status - - Python wrapper around rust-n5. * Free software: MIT license * Documentation: https://rust-pyn5.readthedocs.io. +Installation +------------ + +``pip install pyn5`` installs pre-compiled wheels. +To build from source, you need + +* `setuptools-rust`_ +* a rust_ compiler + + - >= 1.34.0-nightly 2019-02-06 + - <= 1.36.0-nightly 2019-07-01 Features -------- -* TODO +* h5py_ -like interface + +Related projects +---------------- + +* N5_ (file system format spec and reference implementation in java) +* `rust-n5`_ (implementation in rust, used in pyn5) +* zarr_ (similar chunked array storage library and format, supports some N5 features) +* z5_ (C++ implementation of zarr and N5 with python bindings, depends on conda) +* h5py_ (hierarchical array storage) +* `h5py_like`_ (ABCs for APIs like h5py) Credits ------- @@ -35,3 +57,11 @@ This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypack .. _Cookiecutter: https://github.com/audreyr/cookiecutter .. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage +.. _N5: https://github.com/saalfeldlab/n5/ +.. _rust-n5: https://github.com/aschampion/rust-n5/ +.. _zarr: https://zarr-developers.github.io/ +.. _z5: https://github.com/constantinpape/z5/ +.. _setuptools-rust: https://github.com/PyO3/setuptools-rust +.. _rust: https://www.rust-lang.org/tools/install +.. _h5py: https://www.h5py.org/ +.. _h5py_like: https://github.com/clbarnes/h5py_like diff --git a/docs/usage.rst b/docs/usage.rst index 444523e..5c5ff64 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -5,3 +5,52 @@ Usage To use pyn5 in a project:: import pyn5 + +pyn5 exposes an API largely compatible with h5py_. +There are additionally some enums defined to optionally help manage open modes and compression types + + +.. code-block:: python + + import numpy as np + + from pyn5 import File, Mode, CompressionType + + f = File("path/to/test.n5", mode=Mode.READ_WRITE_CREATE) # default mode 'a' + + g1 = f.create_group("group1") + g2 = f.require_group("/group1/group2") + ds1 = g2.create_dataset( + "dataset1", + data=np.random.random((10, 10)), + chunks=(5, 5), + compression=CompressionType.GZIP, + compression_opts=-1 + ) # default compression + + # indexing supports slices, integers, ellipses, and newaxes + arr = ds1[:, 5, np.newaxes] + + ds1.attrs["key"] = "value" + + +Differences from h5py +--------------------- + +* The HDF5_ format is different to N5_; refer to their specifications +* No files are held open, so there is no need to use a context manager (``with``) to open a ``File`` + + - But you can use one if you want, for compatibility + +* Attributes must be JSON-serializable + + - The default encoder will convert numpy arrays to nested lists; they will remain lists when read + +* Compression types are as described in the N5_ spec +* Group and Dataset copying and linking are not supported +* Non-zero fill values, dataset resizing, and named dimensions are not supported + + +.. _HDF5: https://support.hdfgroup.org/HDF5/doc/H5.format.html +.. _h5py: http://docs.h5py.org/en/stable/ +.. _N5: https://github.com/saalfeldlab/n5/#file-system-specification-version-203-snapshot From 4ce43b75cb4c019c90e6cbf61408cc819bf7ae64 Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Wed, 17 Jul 2019 14:24:43 -0400 Subject: [PATCH 27/28] add 3.6 --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index bc4938f..2436020 100644 --- a/setup.py +++ b/setup.py @@ -42,8 +42,7 @@ "License :: OSI Approved :: MIT License", "Natural Language :: English", "Programming Language :: Rust", - # "Programming Language :: Python :: 3.5", - # "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", ], description="Python wrapper around rust-n5.", From fdc3989ff7684d786e97e077994505fdb2ff9abb Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Thu, 18 Jul 2019 11:14:53 -0400 Subject: [PATCH 28/28] add threading and tests --- pyn5/dataset.py | 36 ++++++++++++++++++++++++++++++++++-- requirements_dev.txt | 2 +- setup.py | 2 +- tests/test_h5_like.py | 10 ++++++---- 4 files changed, 42 insertions(+), 8 deletions(-) diff --git a/pyn5/dataset.py b/pyn5/dataset.py index 38f7124..07bd6e6 100644 --- a/pyn5/dataset.py +++ b/pyn5/dataset.py @@ -1,8 +1,11 @@ +from copy import copy + from typing import Union, Tuple, Optional, Any import numpy as np from h5py_like import DatasetBase, AttributeManagerBase, mutation, Name +from h5py_like.shape_utils import thread_read_fn, thread_write_fn from pyn5.attributes import AttributeManager from .pyn5 import ( DatasetUINT8, @@ -33,6 +36,8 @@ class Dataset(DatasetBase): + threads = None + def __init__(self, name: str, parent: "Group"): # noqa would need circular imports """ @@ -44,6 +49,7 @@ def __init__(self, name: str, parent: "Group"): # noqa would need circular impo self._parent = parent self._path = self.parent._path / name self._attrs = AttributeManager.from_parent(self) + self.threads = copy(self.threads) attrs = self._attrs._read_attributes() @@ -90,20 +96,46 @@ def resize(self, size: Union[int, Tuple[int, ...]], axis: Optional[int] = None): raise NotImplementedError() def __getitem__(self, args) -> np.ndarray: - def fn(translation, dimensions): + def inner_fn(translation, dimensions): return self._impl.read_ndarray( translation[::-1], dimensions[::-1] ).transpose() + if self.threads: + def fn(translation, dimensions): + return thread_read_fn( + translation, + dimensions, + self.chunks, + self.shape, + inner_fn, + self.threads + ) + else: + fn = inner_fn + return self._getitem(args, fn, self._astype) @mutation def __setitem__(self, args, val): - def fn(offset, arr): + def inner_fn(offset, arr): return self._impl.write_ndarray( offset[::-1], arr.transpose(), self.fillvalue ) + if self.threads: + def fn(offset, arr): + return thread_write_fn( + offset, + arr, + self.chunks, + self.shape, + inner_fn, + self.threads + ) + else: + fn = inner_fn + return self._setitem(args, val, fn) @property diff --git a/requirements_dev.txt b/requirements_dev.txt index 114b51e..55468ae 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -11,5 +11,5 @@ setuptools_rust==0.10.6 numpy==1.16.4 pytest==4.6.3 pytest-runner==5.1 -h5py_like==0.4.0 +h5py_like==0.5.2 h5py==2.9.0 diff --git a/setup.py b/setup.py index 2436020..e8f2163 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ with open("HISTORY.rst") as history_file: history = history_file.read() -requirements = ["numpy", "h5py_like>=0.4.0"] +requirements = ["numpy", "h5py_like>=0.5.2"] setup_requirements = [] test_requirements = [] diff --git a/tests/test_h5_like.py b/tests/test_h5_like.py index d5ca19b..c9276b8 100644 --- a/tests/test_h5_like.py +++ b/tests/test_h5_like.py @@ -8,13 +8,15 @@ from pathlib import Path from h5py_like import Mode, FileMixin -from h5py_like.test_utils import FileTestBase, DatasetTestBase, GroupTestBase, ModeTestBase +from h5py_like.test_utils import ( + FileTestBase, ThreadedDatasetTestBase, GroupTestBase, ModeTestBase, +) from pyn5 import File from .common import blocks_hash from .common import blocks_in, attrs_in -ds_kwargs = deepcopy(DatasetTestBase.dataset_kwargs) +ds_kwargs = deepcopy(ThreadedDatasetTestBase.dataset_kwargs) ds_kwargs["chunks"] = (5, 5, 5) @@ -28,8 +30,8 @@ class TestGroup(GroupTestBase): pass -class TestDataset(DatasetTestBase): - dataset_kwargs = ds_kwargs +class TestDataset(ThreadedDatasetTestBase): + dataset_kwargs = deepcopy(ThreadedDatasetTestBase.dataset_kwargs) def test_has_metadata(self, file_): ds = self.dataset(file_)