Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add set_data_io to Data #1013

Merged
merged 17 commits into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# HDMF Changelog

## HDMF 3.11.1 (Upcoming)
## HDMF 3.12.0 (Upcoming)

### Enhancements
- Add Data.set_data_io allows(), which allows for setting a DataIO to a data object after-the-fact. @bendichter and @CodyCBakerPhD [#1013](https://github.com/hdmf-dev/hdmf/pull/1013)

### Minor Improvements
- Updated `__gather_columns` to ignore the order of bases when generating columns from the super class. @mavaylon1 [#991](https://github.com/hdmf-dev/hdmf/pull/991)
Expand Down
36 changes: 33 additions & 3 deletions src/hdmf/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,22 +747,34 @@ def __smart_str_dict(d, num_indent):
out += '\n' + indent + right_br
return out

def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], **kwargs) -> None:
def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], data_io_kwargs: dict = None, **kwargs):
"""
Apply DataIO object to a dataset field of the Container.

Parameters
----------
dataset_name: str
Name of dataset to wrap in DataIO
data_io_class: Type[DataIO]
Class to use for DataIO, e.g. H5DataIO or ZarrDataIO
data_io_kwargs: dict
keyword arguments passed to the constructor of the DataIO class.
**kwargs:
kwargs are passed to the constructor of the DataIO class
DEPRECATED. Use data_io_kwargs instead.
kwargs are passed to the constructor of the DataIO class.
"""
if kwargs or (data_io_kwargs is None):
rly marked this conversation as resolved.
Show resolved Hide resolved
warn(
"Use of **kwargs in Container.set_data_io() is deprecated. Please pass the DataIO kwargs as a "
"dictionary to the `data_io_kwargs` parameter instead.",
DeprecationWarning,
stacklevel=2
)
data_io_kwargs = kwargs
data = self.fields.get(dataset_name)
if data is None:
raise ValueError(f"{dataset_name} is None and cannot be wrapped in a DataIO class")
self.fields[dataset_name] = data_io_class(data=data, **kwargs)
self.fields[dataset_name] = data_io_class(data=data, **data_io_kwargs)


class Data(AbstractContainer):
Expand Down Expand Up @@ -797,10 +809,28 @@ def set_dataio(self, **kwargs):
"""
Apply DataIO object to the data held by this Data object
"""
warn(
"Data.set_dataio() is deprecated. Please use Data.set_data_io() instead.",
DeprecationWarning,
stacklevel=2,
)
dataio = getargs('dataio', kwargs)
dataio.data = self.__data
self.__data = dataio

def set_data_io(self, data_io_class: Type[DataIO], data_io_kwargs: dict) -> None:
bendichter marked this conversation as resolved.
Show resolved Hide resolved
"""
Apply DataIO object to the data held by this Data object.

Parameters
----------
data_io_class: Type[DataIO]
The DataIO to apply to the data held by this Data.
data_io_kwargs: dict
The keyword arguments to pass to the DataIO.
"""
self.__data = data_io_class(data=self.__data, **data_io_kwargs)

@docval({'name': 'func', 'type': types.FunctionType, 'doc': 'a function to transform *data*'})
def transform(self, **kwargs):
"""
Expand Down
23 changes: 0 additions & 23 deletions tests/unit/test_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from uuid import uuid4, UUID
import os

from hdmf.backends.hdf5 import H5DataIO
from hdmf.container import AbstractContainer, Container, Data, HERDManager
from hdmf.common.resources import HERD
from hdmf.testing import TestCase
Expand Down Expand Up @@ -397,28 +396,6 @@ def test_get_ancestors(self):
self.assertTupleEqual(parent_obj.get_ancestors(), (grandparent_obj, ))
self.assertTupleEqual(child_obj.get_ancestors(), (parent_obj, grandparent_obj))

def test_set_data_io(self):

class ContainerWithData(Container):
__fields__ = ('data1', 'data2')

@docval(
{"name": "name", "doc": "name", "type": str},
{'name': 'data1', 'doc': 'field1 doc', 'type': list},
{'name': 'data2', 'doc': 'field2 doc', 'type': list, 'default': None}
)
def __init__(self, **kwargs):
super().__init__(name=kwargs["name"])
self.data1 = kwargs["data1"]
self.data2 = kwargs["data2"]

obj = ContainerWithData("name", [1, 2, 3, 4, 5], None)
obj.set_data_io("data1", H5DataIO, chunks=True)
assert isinstance(obj.data1, H5DataIO)

with self.assertRaises(ValueError):
obj.set_data_io("data2", H5DataIO, chunks=True)


class TestHTMLRepr(TestCase):

Expand Down
56 changes: 55 additions & 1 deletion tests/unit/test_io_hdf5_h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from hdmf.backends.errors import UnsupportedOperation
from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, OrphanContainerBuildError, LinkBuilder
from hdmf.container import Container
from hdmf import Data
from hdmf import Data, docval
from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError
from hdmf.spec.catalog import SpecCatalog
from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace
Expand Down Expand Up @@ -3671,3 +3671,57 @@ def test_hdf5io_can_read():
assert not HDF5IO.can_read("not_a_file")
assert HDF5IO.can_read("tests/unit/back_compat_tests/1.0.5.h5")
assert not HDF5IO.can_read(__file__) # this file is not an HDF5 file


class TestContainerSetDataIO(TestCase):

def setUp(self) -> None:
class ContainerWithData(Container):
__fields__ = ('data1', 'data2')

@docval(
{"name": "name", "doc": "name", "type": str},
{'name': 'data1', 'doc': 'field1 doc', 'type': list},
{'name': 'data2', 'doc': 'field2 doc', 'type': list, 'default': None}
)
def __init__(self, **kwargs):
super().__init__(name=kwargs["name"])
self.data1 = kwargs["data1"]
self.data2 = kwargs["data2"]

self.obj = ContainerWithData("name", [1, 2, 3, 4, 5], None)

def test_set_data_io(self):
self.obj.set_data_io("data1", H5DataIO, data_io_kwargs=dict(chunks=True))
assert isinstance(self.obj.data1, H5DataIO)
rly marked this conversation as resolved.
Show resolved Hide resolved
assert self.obj.data1.io_settings["chunks"]

def test_fail_set_data_io(self):
"""Attempt to set a DataIO for a dataset that is missing."""
with self.assertRaisesWith(ValueError, "data2 is None and cannot be wrapped in a DataIO class"):
self.obj.set_data_io("data2", H5DataIO, data_io_kwargs=dict(chunks=True))

def test_set_data_io_old_api(self):
"""Test that using the kwargs still works but throws a warning."""
msg = (
"Use of **kwargs in Container.set_data_io() is deprecated. Please pass the DataIO kwargs as a dictionary to"
" the `data_io_kwargs` parameter instead."
)
with self.assertWarnsWith(DeprecationWarning, msg):
self.obj.set_data_io("data1", H5DataIO, chunks=True)
self.assertIsInstance(self.obj.data1, H5DataIO)
rly marked this conversation as resolved.
Show resolved Hide resolved
self.assertTrue(self.obj.data1.io_settings["chunks"])


class TestDataSetDataIO(TestCase):

def setUp(self):
class MyData(Data):
pass

self.data = MyData("my_data", [1, 2, 3])

def test_set_data_io(self):
self.data.set_data_io(H5DataIO, dict(chunks=True))
assert isinstance(self.data.data, H5DataIO)
rly marked this conversation as resolved.
Show resolved Hide resolved
assert self.data.data.io_settings["chunks"]