From 0bdb36daf40dff5cfb058ab7028ddca2013d0ecc Mon Sep 17 00:00:00 2001 From: Chris Barnes Date: Mon, 15 Jul 2019 17:39:38 -0400 Subject: [PATCH] safer N5 dataset metadata handling --- pyn5/attributes.py | 46 ++++++++++++++++++++++++++++++++++++------- pyn5/dataset.py | 14 ++++++++++--- tests/test_h5_like.py | 28 +++++++++++++++++++++++++- 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/pyn5/attributes.py b/pyn5/attributes.py index 958dc81..c058b56 100644 --- a/pyn5/attributes.py +++ b/pyn5/attributes.py @@ -3,6 +3,8 @@ import json from contextlib import contextmanager from copy import deepcopy +from functools import wraps + from pathlib import Path from typing import Iterator, Any, Dict @@ -22,6 +24,16 @@ def default(self, obj): return super().default(obj) +def restrict_metadata(fn): + """Decorator for AttributeManager methods which prevents mutation of N5 metadata""" + @wraps(fn) + def wrapped(obj: AttributeManager, key, *args, **kwargs): + if obj._is_dataset() and key in obj._dataset_keys: + raise RuntimeError(f"N5 metadata (key '{key}') cannot be mutated") + return fn(obj, key, *args, **kwargs) + return mutation(wrapped) + + class AttributeManager(AttributeManagerBase): """Object which reads and writes group attributes as JSON. @@ -44,6 +56,7 @@ def __init__(self, dpath: Path, mode=Mode.default()): """ self._path = Path(dpath) / "attributes.json" self._dump_kwargs = deepcopy(self._dump_kwargs) + self._has_dataset_keys_ = None super().__init__(mode) @classmethod @@ -56,12 +69,12 @@ def from_parent(cls, parent: H5ObjectLike) -> AttributeManager: """ return cls(parent._path, parent.mode) - @mutation + @restrict_metadata def __setitem__(self, k, v) -> None: with self._open_attributes(True) as attrs: attrs[k] = v - @mutation + @restrict_metadata def __delitem__(self, v) -> None: with self._open_attributes(True) as attrs: del attrs[v] @@ -95,21 +108,40 @@ def __contains__(self, item): with self._open_attributes() as attrs: return item in attrs - def _is_dataset(self): - with self._open_attributes() as attrs: - return self._dataset_keys.issubset(attrs) + def _is_dataset(self) -> bool: + if self._has_dataset_keys_ is None: + try: + with open(self._path, "r") as f: + self._has_dataset_keys_ = self._dataset_keys.issubset(json.load(f)) + except ValueError: + self._has_dataset_keys_ = False + except IOError as e: + if e.errno == errno.ENOENT: + self._has_dataset_keys_ = False + else: + raise + return self._has_dataset_keys_ @contextmanager def _open_attributes(self, write: bool = False) -> Dict[str, Any]: """Return attributes as a context manager. + N5 metadata keys are stripped from the dict. + :param write: Whether to write changes to the attributes dict. - :return: attributes as a dict (including N5 metadata) + :return: attributes as a dict """ attributes = self._read_attributes() + + if self._is_dataset(): + hidden_attrs = {k: attributes.pop(k) for k in self._dataset_keys} + else: + hidden_attrs = dict() + yield attributes if write: - self._write_attributes(attributes) + hidden_attrs.update(attributes) + self._write_attributes(hidden_attrs) def _read_attributes(self): """Return attributes or an empty dict if they do not exist""" diff --git a/pyn5/dataset.py b/pyn5/dataset.py index 2c9cb46..0ef920e 100644 --- a/pyn5/dataset.py +++ b/pyn5/dataset.py @@ -1,3 +1,5 @@ +import json + from typing import Union, Tuple, Optional, Any import numpy as np @@ -45,10 +47,16 @@ def __init__(self, name: str, parent: "Group"): # noqa would need circular impo self._path = self.parent._path / name self._attrs = AttributeManager.from_parent(self) - with self._attrs._open_attributes() as attrs: + attrs = self._attrs._read_attributes() + + try: self._shape = tuple(attrs["dimensions"][::-1]) - self._dtype = np.dtype(self.attrs["dataType"].lower()) - self._chunks = tuple(self.attrs["blockSize"][::-1]) + self._dtype = np.dtype(attrs["dataType"].lower()) + self._chunks = tuple(attrs["blockSize"][::-1]) + except KeyError: + raise ValueError( + f"Not a dataset (missing metadata key): " + str(self._path) + ) self._impl = dataset_types[self.dtype]( str(self.file._path), diff --git a/tests/test_h5_like.py b/tests/test_h5_like.py index faf7e1a..894d7f3 100644 --- a/tests/test_h5_like.py +++ b/tests/test_h5_like.py @@ -1,3 +1,6 @@ +import json + +import pytest import shutil import tempfile from copy import deepcopy @@ -23,7 +26,30 @@ class TestGroup(GroupTestBase): class TestDataset(DatasetTestBase): dataset_kwargs = ds_kwargs - pass + + def test_has_metadata(self, file_): + ds = self.dataset(file_) + with open(ds.attrs._path) as f: + attrs = json.load(f) + for key in ds.attrs._dataset_keys: + assert key in attrs + + def test_no_return_metadata(self, file_): + ds = self.dataset(file_) + + for key in ds.attrs._dataset_keys: + assert key not in ds.attrs + assert key not in dict(ds.attrs) + + def test_no_mutate_metadata(self, file_): + ds = self.dataset(file_) + + for key in ds.attrs._dataset_keys: + with pytest.raises(RuntimeError): + ds.attrs[key] = "not a datatype" + + with pytest.raises(RuntimeError): + del ds.attrs[key] class TestMode(ModeTestBase):