Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backend Configuration IIIb] Configure DynamicTable router for set_data_io #700

Merged
merged 17 commits into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* Added a `from_nwbfile` class method constructor to all `BackendConfiguration` models. [PR #673](https://github.com/catalystneuro/neuroconv/pull/673)
* Added compression to `FicTracDataInterface`. [PR #678](https://github.com/catalystneuro/neuroconv/pull/678)
* Exposed `block_index` to all OpenEphys interfaces. [PR #695](https://github.com/catalystneuro/neuroconv/pull/695)
* Added support for `DynamicTable` columns in the `configure_backend` tool function. [PR #700](https://github.com/catalystneuro/neuroconv/pull/700)



Expand Down
2 changes: 1 addition & 1 deletion requirements-minimal.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ jsonschema>=3.2.0
PyYAML>=5.4
scipy>=1.4.1
h5py>=3.9.0
hdmf>=3.11.0
hdmf @ git+https://github.com/hdmf-dev/hdmf.git@dev
CodyCBakerPhD marked this conversation as resolved.
Show resolved Hide resolved
hdmf_zarr>=0.4.0
pynwb>=2.3.2;python_version>='3.8'
pydantic>=1.10.13,<2.0.0
Expand Down
12 changes: 10 additions & 2 deletions src/neuroconv/tools/nwb_helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
"""Collection of Pydantic models and helper functions for configuring dataset IO parameters for different backends."""
# Mark these imports as private to avoid polluting the namespace; only used in global BACKEND_NWB_IO mapping
from hdmf_zarr import NWBZarrIO as _NWBZarrIO
from pynwb import NWBHDF5IO as _NWBHDF5IO

from ._backend_configuration import get_default_backend_configuration
from ._configuration_models._base_backend import BackendConfiguration
from ._configuration_models._base_dataset_io import DatasetInfo, DatasetIOConfiguration
Expand All @@ -24,13 +28,17 @@

BACKEND_CONFIGURATIONS = dict(hdf5=HDF5BackendConfiguration, zarr=ZarrBackendConfiguration)
DATASET_IO_CONFIGURATIONS = dict(hdf5=HDF5DatasetIOConfiguration, zarr=ZarrDatasetIOConfiguration)
BACKEND_NWB_IO = dict(hdf5=_NWBHDF5IO, zarr=_NWBZarrIO)

__all__ = [
"AVAILABLE_HDF5_COMPRESSION_METHODS",
"AVAILABLE_ZARR_COMPRESSION_METHODS",
"BACKEND_CONFIGURATIONS",
"DATASET_IO_CONFIGURATIONS",
"BACKEND_NWB_IO",
"get_default_backend_configuration",
"get_default_dataset_io_configurations",
"configure_backend",
"AVAILABLE_HDF5_COMPRESSION_METHODS",
"AVAILABLE_ZARR_COMPRESSION_METHODS",
"BackendConfiguration",
"DatasetIOConfiguration",
"get_default_dataset_io_configurations",
Expand Down
14 changes: 12 additions & 2 deletions src/neuroconv/tools/nwb_helpers/_configure_backend.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""Collection of helper functions related to configuration of datasets dependent on backend."""
from typing import Union

from pynwb import NWBFile
from hdmf.common import Data
from pynwb import NWBFile, TimeSeries

from ._configuration_models._hdf5_backend import HDF5BackendConfiguration
from ._configuration_models._zarr_backend import ZarrBackendConfiguration
Expand All @@ -21,4 +22,13 @@ def configure_backend(

# TODO: update buffer shape in iterator, if present

nwbfile_objects[object_id].set_data_io(dataset_name=dataset_name, data_io_class=data_io_class, **data_io_kwargs)
nwbfile_object = nwbfile_objects[object_id]
if isinstance(nwbfile_object, Data):
nwbfile_object.set_data_io(data_io_class=data_io_class, data_io_kwargs=data_io_kwargs)
elif isinstance(nwbfile_object, TimeSeries):
nwbfile_object.set_data_io(dataset_name=dataset_name, data_io_class=data_io_class, **data_io_kwargs)
else: # Strictly speaking, it would be odd if a backend_configuration led to this, but might as well be safe
raise NotImplementedError(
f"Unsupported object type {type(nwbfile_object)} for backend "
f"configuration of {nwbfile_object.name}!"
)
1 change: 1 addition & 0 deletions src/neuroconv/tools/spikeinterface/spikeinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import warnings
from collections import defaultdict
from numbers import Real
from pathlib import Path
from typing import List, Literal, Optional, Union

import numpy as np
Expand Down
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we could add a ragged array tests. Does chunking makes sense on those cases? I guess it is very difficult to define a good chunking pattern unlesss your entries are homogeneous.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would; I think we do have general configuration tests for this, just not for the configure_backend call since in principle there's nothing at all different about those special columns that have _index in the name (they are still just VectorData at the end of the day)

Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
import numcodecs
import numpy as np
import pytest
from hdmf.common import DynamicTable, VectorData
from hdmf.data_utils import DataChunkIterator
from hdmf_zarr import NWBZarrIO
from pynwb import NWBHDF5IO
from pynwb.testing.mock.base import mock_TimeSeries
from pynwb.testing.mock.file import mock_NWBFile

from neuroconv.tools.hdmf import SliceableDataChunkIterator
from neuroconv.tools.nwb_helpers import (
BACKEND_NWB_IO,
configure_backend,
get_default_backend_configuration,
)
Expand All @@ -31,8 +31,6 @@
def test_simple_time_series(
tmpdir: Path, case_name: str, iterator: callable, iterator_options: dict, backend: Literal["hdf5", "zarr"]
):
BACKEND_NWB_IO = dict(hdf5=NWBHDF5IO, zarr=NWBZarrIO)

array = np.zeros(shape=(30_000 * 5, 384), dtype="int16")
data = iterator(array, **iterator_options)

Expand All @@ -44,7 +42,7 @@ def test_simple_time_series(
dataset_configuration = backend_configuration.dataset_configurations["acquisition/TestTimeSeries/data"]
configure_backend(nwbfile=nwbfile, backend_configuration=backend_configuration)

nwbfile_path = str(tmpdir / f"test_configure_{backend}_defaults_{case_name}_data.nwb.h5")
nwbfile_path = str(tmpdir / f"test_configure_defaults_{case_name}_time_series.nwb.{backend}")
with BACKEND_NWB_IO[backend](path=nwbfile_path, mode="w") as io:
io.write(nwbfile)

Expand All @@ -58,3 +56,34 @@ def test_simple_time_series(
assert written_data.compression == "gzip"
elif backend == "zarr":
assert written_data.compressor == numcodecs.GZip(level=1)


@pytest.mark.parametrize("backend", ["hdf5", "zarr"])
def test_simple_dynamic_table(tmpdir: Path, backend: Literal["hdf5", "zarr"]):
data = np.zeros(shape=(30_000 * 5, 384), dtype="int16")

nwbfile = mock_NWBFile()
dynamic_table = DynamicTable(
name="TestDynamicTable", description="", columns=[VectorData(name="TestColumn", description="", data=data)]
)
nwbfile.add_acquisition(dynamic_table)

backend_configuration = get_default_backend_configuration(nwbfile=nwbfile, backend=backend)
dataset_configuration = backend_configuration.dataset_configurations["acquisition/TestDynamicTable/TestColumn/data"]
configure_backend(nwbfile=nwbfile, backend_configuration=backend_configuration)

nwbfile_path = str(tmpdir / f"test_configure_defaults_dynamic_table.nwb.{backend}")
NWB_IO = BACKEND_NWB_IO[backend]
with NWB_IO(path=nwbfile_path, mode="w") as io:
io.write(nwbfile)

with NWB_IO(path=nwbfile_path, mode="r") as io:
written_nwbfile = io.read()
written_data = written_nwbfile.acquisition["TestDynamicTable"]["TestColumn"].data

assert written_data.chunks == dataset_configuration.chunk_shape

if backend == "hdf5":
assert written_data.compression == "gzip"
elif backend == "zarr":
assert written_data.compressor == numcodecs.GZip(level=1)
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
import numcodecs
import numpy as np
import pytest
from hdmf.common import DynamicTable, VectorData
from hdmf.data_utils import DataChunkIterator
from hdmf_zarr import NWBZarrIO
from pynwb import NWBHDF5IO
from pynwb.testing.mock.base import mock_TimeSeries
from pynwb.testing.mock.file import mock_NWBFile

from neuroconv.tools.hdmf import SliceableDataChunkIterator
from neuroconv.tools.nwb_helpers import (
BACKEND_NWB_IO,
configure_backend,
get_default_backend_configuration,
)
Expand All @@ -31,8 +31,6 @@
def test_simple_time_series_override(
tmpdir: Path, case_name: str, iterator: callable, iterator_options: dict, backend: Literal["hdf5", "zarr"]
):
BACKEND_NWB_IO = dict(hdf5=NWBHDF5IO, zarr=NWBZarrIO)

array = np.zeros(shape=(30_000 * 5, 384), dtype="int16")
data = iterator(array, **iterator_options)

Expand All @@ -59,7 +57,7 @@ def test_simple_time_series_override(
if case_name != "unwrapped": # TODO: eventually, even this case will be buffered automatically
assert nwbfile.acquisition["TestTimeSeries"].data

nwbfile_path = str(tmpdir / f"test_configure_{backend}_defaults_{case_name}_data.nwb.h5")
nwbfile_path = str(tmpdir / f"test_configure_defaults_{case_name}_data.nwb.{backend}")
with BACKEND_NWB_IO[backend](path=nwbfile_path, mode="w") as io:
io.write(nwbfile)

Expand All @@ -74,3 +72,45 @@ def test_simple_time_series_override(
assert written_data.compression_opts == higher_gzip_level
elif backend == "zarr":
assert written_data.compressor == numcodecs.GZip(level=5)


@pytest.mark.parametrize("backend", ["hdf5", "zarr"])
def test_simple_dynamic_table_override(tmpdir: Path, backend: Literal["hdf5", "zarr"]):
data = np.zeros(shape=(30_000 * 5, 384), dtype="int16")

nwbfile = mock_NWBFile()
dynamic_table = DynamicTable(
name="TestDynamicTable", description="", columns=[VectorData(name="TestColumn", description="", data=data)]
)
nwbfile.add_acquisition(dynamic_table)

backend_configuration = get_default_backend_configuration(nwbfile=nwbfile, backend=backend)
dataset_configuration = backend_configuration.dataset_configurations["acquisition/TestDynamicTable/TestColumn/data"]

smaller_chunk_shape = (30_000, 64)
dataset_configuration.chunk_shape = smaller_chunk_shape

higher_gzip_level = 5
if backend == "hdf5":
CodyCBakerPhD marked this conversation as resolved.
Show resolved Hide resolved
dataset_configuration.compression_options = dict(level=higher_gzip_level)
elif backend == "zarr":
dataset_configuration.compression_options = dict(level=higher_gzip_level)

configure_backend(nwbfile=nwbfile, backend_configuration=backend_configuration)

nwbfile_path = str(tmpdir / f"test_configure_defaults_dynamic_table.nwb.{backend}")
NWB_IO = BACKEND_NWB_IO[backend]
with NWB_IO(path=nwbfile_path, mode="w") as io:
io.write(nwbfile)

with NWB_IO(path=nwbfile_path, mode="r") as io:
written_nwbfile = io.read()
written_data = written_nwbfile.acquisition["TestDynamicTable"]["TestColumn"].data

assert written_data.chunks == smaller_chunk_shape

if backend == "hdf5":
assert written_data.compression == "gzip"
assert written_data.compression_opts == higher_gzip_level
elif backend == "zarr":
assert written_data.compressor == numcodecs.GZip(level=5)
Loading