Skip to content
This repository has been archived by the owner on Oct 1, 2024. It is now read-only.

Commit

Permalink
safer N5 dataset metadata handling
Browse files Browse the repository at this point in the history
  • Loading branch information
clbarnes committed Jul 15, 2019
1 parent 2c48699 commit d407689
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 11 deletions.
46 changes: 39 additions & 7 deletions pyn5/attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import json
from contextlib import contextmanager
from copy import deepcopy
from functools import wraps

from pathlib import Path

from typing import Iterator, Any, Dict
Expand All @@ -22,6 +24,16 @@ def default(self, obj):
return super().default(obj)


def restrict_metadata(fn):
"""Decorator for AttributeManager methods which prevents mutation of N5 metadata"""
@wraps(fn)
def wrapped(obj: AttributeManager, key, *args, **kwargs):
if obj._is_dataset() and key in obj._dataset_keys:
raise RuntimeError(f"N5 metadata (key '{key}') cannot be mutated")
return fn(obj, key, *args, **kwargs)
return mutation(wrapped)


class AttributeManager(AttributeManagerBase):
"""Object which reads and writes group attributes as JSON.
Expand All @@ -44,6 +56,7 @@ def __init__(self, dpath: Path, mode=Mode.default()):
"""
self._path = Path(dpath) / "attributes.json"
self._dump_kwargs = deepcopy(self._dump_kwargs)
self._has_dataset_keys_ = None
super().__init__(mode)

@classmethod
Expand All @@ -56,12 +69,12 @@ def from_parent(cls, parent: H5ObjectLike) -> AttributeManager:
"""
return cls(parent._path, parent.mode)

@mutation
@restrict_metadata
def __setitem__(self, k, v) -> None:
with self._open_attributes(True) as attrs:
attrs[k] = v

@mutation
@restrict_metadata
def __delitem__(self, v) -> None:
with self._open_attributes(True) as attrs:
del attrs[v]
Expand Down Expand Up @@ -95,21 +108,40 @@ def __contains__(self, item):
with self._open_attributes() as attrs:
return item in attrs

def _is_dataset(self):
with self._open_attributes() as attrs:
return self._dataset_keys.issubset(attrs)
def _is_dataset(self) -> bool:
if self._has_dataset_keys_ is None:
try:
with open(self._path, "r") as f:
self._has_dataset_keys_ = self._dataset_keys.issubset(json.load(f))
except ValueError:
self._has_dataset_keys_ = False
except IOError as e:
if e.errno == errno.ENOENT:
self._has_dataset_keys_ = False
else:
raise
return self._has_dataset_keys_

@contextmanager
def _open_attributes(self, write: bool = False) -> Dict[str, Any]:
"""Return attributes as a context manager.
N5 metadata keys are stripped from the dict.
:param write: Whether to write changes to the attributes dict.
:return: attributes as a dict (including N5 metadata)
:return: attributes as a dict
"""
attributes = self._read_attributes()

if self._is_dataset():
hidden_attrs = {k: attributes.pop(k) for k in self._dataset_keys}
else:
hidden_attrs = dict()

yield attributes
if write:
self._write_attributes(attributes)
hidden_attrs.update(attributes)
self._write_attributes(hidden_attrs)

def _read_attributes(self):
"""Return attributes or an empty dict if they do not exist"""
Expand Down
12 changes: 9 additions & 3 deletions pyn5/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,16 @@ def __init__(self, name: str, parent: "Group"): # noqa would need circular impo
self._path = self.parent._path / name
self._attrs = AttributeManager.from_parent(self)

with self._attrs._open_attributes() as attrs:
attrs = self._attrs._read_attributes()

try:
self._shape = tuple(attrs["dimensions"][::-1])
self._dtype = np.dtype(self.attrs["dataType"].lower())
self._chunks = tuple(self.attrs["blockSize"][::-1])
self._dtype = np.dtype(attrs["dataType"].lower())
self._chunks = tuple(attrs["blockSize"][::-1])
except KeyError:
raise ValueError(
f"Not a dataset (missing metadata key): " + str(self._path)
)

self._impl = dataset_types[self.dtype](
str(self.file._path),
Expand Down
28 changes: 27 additions & 1 deletion tests/test_h5_like.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import json

import pytest
import shutil
import tempfile
from copy import deepcopy
Expand All @@ -23,7 +26,30 @@ class TestGroup(GroupTestBase):

class TestDataset(DatasetTestBase):
dataset_kwargs = ds_kwargs
pass

def test_has_metadata(self, file_):
ds = self.dataset(file_)
with open(ds.attrs._path) as f:
attrs = json.load(f)
for key in ds.attrs._dataset_keys:
assert key in attrs

def test_no_return_metadata(self, file_):
ds = self.dataset(file_)

for key in ds.attrs._dataset_keys:
assert key not in ds.attrs
assert key not in dict(ds.attrs)

def test_no_mutate_metadata(self, file_):
ds = self.dataset(file_)

for key in ds.attrs._dataset_keys:
with pytest.raises(RuntimeError):
ds.attrs[key] = "not a datatype"

with pytest.raises(RuntimeError):
del ds.attrs[key]


class TestMode(ModeTestBase):
Expand Down

0 comments on commit d407689

Please sign in to comment.