From 258b53653547f93a1518d06205ee880935c2adb3 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Tue, 20 Feb 2024 14:49:24 -0500 Subject: [PATCH] [Bug] Fix configure_backend to account for data or timestamp links (#732) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Heberto Mayorquin <h.mayorquin@gmail.com> --- .gitignore | 3 + CHANGELOG.md | 3 +- .../tools/nwb_helpers/_configure_backend.py | 14 +++- .../test_configure_backend_defaults.py | 82 +++++++++++++++++++ 4 files changed, 97 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 5c00276a5..04504e9e0 100644 --- a/.gitignore +++ b/.gitignore @@ -124,3 +124,6 @@ venv.bak/ # NWB .nwb .pre-commit-config.yaml + +# Spyder +.spyproject diff --git a/CHANGELOG.md b/CHANGELOG.md index dda694fc5..ec70cbd3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,8 @@ ### Bug fixes * LocalPathExpander matches only `folder_paths` or `file_paths` if that is indicated in the passed specification. [PR #679](https://github.com/catalystneuro/neuroconv/pull/675) and [PR #675](https://github.com/catalystneuro/neuroconv/pull/679 * Fixed depth consideration in partial chunking pattern for the ROI data buffer. [PR #677](https://github.com/catalystneuro/neuroconv/pull/677) -* Fix mapping between channel names and the electrode table when writing more than one `ElectricalSeries` to the NWBFile. This fixes an issue when the converter pipeline of `SpikeGLXConverterPipe` was writing the electrode table region of the NIDQ stream incorrectly [PR #678](https://github.com/catalystneuro/neuroconv/pull/678) +* Fix mapping between channel names and the electrode table when writing more than one `ElectricalSeries` to the NWBFile. This fixes an issue when the converter pipeline of `SpikeGLXConverterPipe` was writing the electrode table region of the NIDQ stream incorrectly. [PR #678](https://github.com/catalystneuro/neuroconv/pull/678) +* Fix `configure_backend` when applied to `TimeSeries` contents that leverage internal links for `data` or `timestamps`. [PR #732](https://github.com/catalystneuro/neuroconv/pull/732) ### Features * Changed the `Suite2pSegmentationInterface` to support multiple plane segmentation outputs. The interface now has a `plane_name` and `channel_name` arguments to determine which plane output and channel trace add to the NWBFile. [PR #601](https://github.com/catalystneuro/neuroconv/pull/601) diff --git a/src/neuroconv/tools/nwb_helpers/_configure_backend.py b/src/neuroconv/tools/nwb_helpers/_configure_backend.py index 6f1abfb55..a2dcaec69 100644 --- a/src/neuroconv/tools/nwb_helpers/_configure_backend.py +++ b/src/neuroconv/tools/nwb_helpers/_configure_backend.py @@ -24,12 +24,18 @@ def configure_backend( # TODO: update buffer shape in iterator, if present nwbfile_object = nwbfile_objects[object_id] + is_dataset_linked = isinstance(nwbfile_object.fields.get(dataset_name), TimeSeries) + # Table columns if isinstance(nwbfile_object, Data): nwbfile_object.set_data_io(data_io_class=data_io_class, data_io_kwargs=data_io_kwargs) - elif isinstance(nwbfile_object, TimeSeries): + # TimeSeries data or timestamps + elif isinstance(nwbfile_object, TimeSeries) and not is_dataset_linked: nwbfile_object.set_data_io(dataset_name=dataset_name, data_io_class=data_io_class, **data_io_kwargs) - else: # Strictly speaking, it would be odd if a backend_configuration led to this, but might as well be safe + # Skip the setting of a DataIO when target dataset is a link (assume it will be found in parent) + elif isinstance(nwbfile_object, TimeSeries) and is_dataset_linked: + continue + # Strictly speaking, it would be odd if a backend_configuration led to this, but might as well be safe + else: raise NotImplementedError( - f"Unsupported object type {type(nwbfile_object)} for backend " - f"configuration of {nwbfile_object.name}!" + f"Unsupported object type {type(nwbfile_object)} for backend configuration of {nwbfile_object.name}!" ) diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_defaults.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_defaults.py index e3bcb4146..693d8e4d0 100644 --- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_defaults.py +++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_defaults.py @@ -115,3 +115,85 @@ def test_simple_dynamic_table(tmpdir: Path, integer_array: np.ndarray, backend: assert written_data.compressor == numcodecs.GZip(level=1) assert_array_equal(x=integer_array, y=written_data[:]) + + +@pytest.mark.parametrize( + "case_name,iterator,data_iterator_options,timestamps_iterator_options", + [ + ("unwrapped", lambda x: x, dict(), dict()), + ("generic", SliceableDataChunkIterator, dict(), dict()), + ("classic", DataChunkIterator, dict(iter_axis=1, buffer_size=30_000), dict(buffer_size=30_000)), + # Need to hardcode buffer size in classic case or else it takes forever... + ], +) +@pytest.mark.parametrize("backend", ["hdf5", "zarr"]) +def test_time_series_timestamps_linkage( + tmpdir: Path, + integer_array: np.ndarray, + case_name: str, + iterator: callable, + data_iterator_options: dict, + timestamps_iterator_options: dict, + backend: Literal["hdf5", "zarr"], +): + data_1 = iterator(integer_array, **data_iterator_options) + data_2 = iterator(integer_array, **data_iterator_options) + + timestamps_array = np.linspace(start=0.0, stop=1.0, num=integer_array.shape[0]) + timestamps = iterator(timestamps_array, **timestamps_iterator_options) + + nwbfile = mock_NWBFile() + time_series_1 = mock_TimeSeries(name="TestTimeSeries1", data=data_1, timestamps=timestamps, rate=None) + nwbfile.add_acquisition(time_series_1) + + time_series_2 = mock_TimeSeries(name="TestTimeSeries2", data=data_2, timestamps=time_series_1, rate=None) + nwbfile.add_acquisition(time_series_2) + + # Note that the field will still show up in the configuration display + backend_configuration = get_default_backend_configuration(nwbfile=nwbfile, backend=backend) + # print(backend_configuration) + dataset_configuration_1 = backend_configuration.dataset_configurations["acquisition/TestTimeSeries1/data"] + dataset_configuration_2 = backend_configuration.dataset_configurations["acquisition/TestTimeSeries2/data"] + timestamps_configuration_1 = backend_configuration.dataset_configurations["acquisition/TestTimeSeries1/timestamps"] + timestamps_configuration_1 = backend_configuration.dataset_configurations["acquisition/TestTimeSeries2/timestamps"] + configure_backend(nwbfile=nwbfile, backend_configuration=backend_configuration) + + if case_name != "unwrapped": # TODO: eventually, even this case will be buffered automatically + assert nwbfile.acquisition["TestTimeSeries1"].data + assert nwbfile.acquisition["TestTimeSeries2"].data + assert nwbfile.acquisition["TestTimeSeries1"].timestamps + assert nwbfile.acquisition["TestTimeSeries2"].timestamps + + nwbfile_path = str(tmpdir / f"test_time_series_timestamps_linkage_{case_name}_data.nwb.{backend}") + with BACKEND_NWB_IO[backend](path=nwbfile_path, mode="w") as io: + io.write(nwbfile) + + with BACKEND_NWB_IO[backend](path=nwbfile_path, mode="r") as io: + written_nwbfile = io.read() + + written_data_1 = written_nwbfile.acquisition["TestTimeSeries1"].data + assert written_data_1.chunks == dataset_configuration_1.chunk_shape + if backend == "hdf5": + assert written_data_1.compression == "gzip" + elif backend == "zarr": + assert written_data_1.compressor == numcodecs.GZip(level=1) + assert_array_equal(x=integer_array, y=written_data_1[:]) + + written_data_2 = written_nwbfile.acquisition["TestTimeSeries2"].data + assert written_data_2.chunks == dataset_configuration_2.chunk_shape + if backend == "hdf5": + assert written_data_2.compression == "gzip" + elif backend == "zarr": + assert written_data_2.compressor == numcodecs.GZip(level=1) + assert_array_equal(x=integer_array, y=written_data_2[:]) + + written_timestamps_1 = written_nwbfile.acquisition["TestTimeSeries1"].timestamps + assert written_timestamps_1.chunks == timestamps_configuration_1.chunk_shape + if backend == "hdf5": + assert written_timestamps_1.compression == "gzip" + elif backend == "zarr": + assert written_timestamps_1.compressor == numcodecs.GZip(level=1) + assert_array_equal(x=timestamps_array, y=written_timestamps_1[:]) + + written_timestamps_2 = written_nwbfile.acquisition["TestTimeSeries2"].timestamps + assert written_timestamps_2 == written_timestamps_1