diff --git a/CHANGELOG.md b/CHANGELOG.md index ce3a84cee..9b1e3c476 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ # HDMF Changelog -## HDMF 3.11.1 (Upcoming) +## HDMF 3.12.0 (Upcoming) + +### Enhancements +- Add Data.set_data_io allows(), which allows for setting a DataIO to a data object after-the-fact. @bendichter and @CodyCBakerPhD [#1013](https://github.com/hdmf-dev/hdmf/pull/1013) ### Minor Improvements - Updated `__gather_columns` to ignore the order of bases when generating columns from the super class. @mavaylon1 [#991](https://github.com/hdmf-dev/hdmf/pull/991) diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 7c0b5fdf4..229e20083 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -747,8 +747,9 @@ def __smart_str_dict(d, num_indent): out += '\n' + indent + right_br return out - def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], **kwargs) -> None: + def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], data_io_kwargs: dict = None, **kwargs): """ + Apply DataIO object to a dataset field of the Container. Parameters ---------- @@ -756,13 +757,24 @@ def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], **kwargs) Name of dataset to wrap in DataIO data_io_class: Type[DataIO] Class to use for DataIO, e.g. H5DataIO or ZarrDataIO + data_io_kwargs: dict + keyword arguments passed to the constructor of the DataIO class. **kwargs: - kwargs are passed to the constructor of the DataIO class + DEPRECATED. Use data_io_kwargs instead. + kwargs are passed to the constructor of the DataIO class. """ + if kwargs or (data_io_kwargs is None): + warn( + "Use of **kwargs in Container.set_data_io() is deprecated. Please pass the DataIO kwargs as a " + "dictionary to the `data_io_kwargs` parameter instead.", + DeprecationWarning, + stacklevel=2 + ) + data_io_kwargs = kwargs data = self.fields.get(dataset_name) if data is None: raise ValueError(f"{dataset_name} is None and cannot be wrapped in a DataIO class") - self.fields[dataset_name] = data_io_class(data=data, **kwargs) + self.fields[dataset_name] = data_io_class(data=data, **data_io_kwargs) class Data(AbstractContainer): @@ -797,10 +809,28 @@ def set_dataio(self, **kwargs): """ Apply DataIO object to the data held by this Data object """ + warn( + "Data.set_dataio() is deprecated. Please use Data.set_data_io() instead.", + DeprecationWarning, + stacklevel=2, + ) dataio = getargs('dataio', kwargs) dataio.data = self.__data self.__data = dataio + def set_data_io(self, data_io_class: Type[DataIO], data_io_kwargs: dict) -> None: + """ + Apply DataIO object to the data held by this Data object. + + Parameters + ---------- + data_io_class: Type[DataIO] + The DataIO to apply to the data held by this Data. + data_io_kwargs: dict + The keyword arguments to pass to the DataIO. + """ + self.__data = data_io_class(data=self.__data, **data_io_kwargs) + @docval({'name': 'func', 'type': types.FunctionType, 'doc': 'a function to transform *data*'}) def transform(self, **kwargs): """ diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 0e3fbc9eb..b5a2d87e8 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -2,7 +2,6 @@ from uuid import uuid4, UUID import os -from hdmf.backends.hdf5 import H5DataIO from hdmf.container import AbstractContainer, Container, Data, HERDManager from hdmf.common.resources import HERD from hdmf.testing import TestCase @@ -397,28 +396,6 @@ def test_get_ancestors(self): self.assertTupleEqual(parent_obj.get_ancestors(), (grandparent_obj, )) self.assertTupleEqual(child_obj.get_ancestors(), (parent_obj, grandparent_obj)) - def test_set_data_io(self): - - class ContainerWithData(Container): - __fields__ = ('data1', 'data2') - - @docval( - {"name": "name", "doc": "name", "type": str}, - {'name': 'data1', 'doc': 'field1 doc', 'type': list}, - {'name': 'data2', 'doc': 'field2 doc', 'type': list, 'default': None} - ) - def __init__(self, **kwargs): - super().__init__(name=kwargs["name"]) - self.data1 = kwargs["data1"] - self.data2 = kwargs["data2"] - - obj = ContainerWithData("name", [1, 2, 3, 4, 5], None) - obj.set_data_io("data1", H5DataIO, chunks=True) - assert isinstance(obj.data1, H5DataIO) - - with self.assertRaises(ValueError): - obj.set_data_io("data2", H5DataIO, chunks=True) - class TestHTMLRepr(TestCase): diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 90934df94..5a4fd5a32 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -20,7 +20,7 @@ from hdmf.backends.errors import UnsupportedOperation from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, OrphanContainerBuildError, LinkBuilder from hdmf.container import Container -from hdmf import Data +from hdmf import Data, docval from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError from hdmf.spec.catalog import SpecCatalog from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace @@ -3671,3 +3671,57 @@ def test_hdf5io_can_read(): assert not HDF5IO.can_read("not_a_file") assert HDF5IO.can_read("tests/unit/back_compat_tests/1.0.5.h5") assert not HDF5IO.can_read(__file__) # this file is not an HDF5 file + + +class TestContainerSetDataIO(TestCase): + + def setUp(self) -> None: + class ContainerWithData(Container): + __fields__ = ('data1', 'data2') + + @docval( + {"name": "name", "doc": "name", "type": str}, + {'name': 'data1', 'doc': 'field1 doc', 'type': list}, + {'name': 'data2', 'doc': 'field2 doc', 'type': list, 'default': None} + ) + def __init__(self, **kwargs): + super().__init__(name=kwargs["name"]) + self.data1 = kwargs["data1"] + self.data2 = kwargs["data2"] + + self.obj = ContainerWithData("name", [1, 2, 3, 4, 5], None) + + def test_set_data_io(self): + self.obj.set_data_io("data1", H5DataIO, data_io_kwargs=dict(chunks=True)) + assert isinstance(self.obj.data1, H5DataIO) + assert self.obj.data1.io_settings["chunks"] + + def test_fail_set_data_io(self): + """Attempt to set a DataIO for a dataset that is missing.""" + with self.assertRaisesWith(ValueError, "data2 is None and cannot be wrapped in a DataIO class"): + self.obj.set_data_io("data2", H5DataIO, data_io_kwargs=dict(chunks=True)) + + def test_set_data_io_old_api(self): + """Test that using the kwargs still works but throws a warning.""" + msg = ( + "Use of **kwargs in Container.set_data_io() is deprecated. Please pass the DataIO kwargs as a dictionary to" + " the `data_io_kwargs` parameter instead." + ) + with self.assertWarnsWith(DeprecationWarning, msg): + self.obj.set_data_io("data1", H5DataIO, chunks=True) + self.assertIsInstance(self.obj.data1, H5DataIO) + self.assertTrue(self.obj.data1.io_settings["chunks"]) + + +class TestDataSetDataIO(TestCase): + + def setUp(self): + class MyData(Data): + pass + + self.data = MyData("my_data", [1, 2, 3]) + + def test_set_data_io(self): + self.data.set_data_io(H5DataIO, dict(chunks=True)) + assert isinstance(self.data.data, H5DataIO) + assert self.data.data.io_settings["chunks"]