From 258b53653547f93a1518d06205ee880935c2adb3 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Tue, 20 Feb 2024 14:49:24 -0500
Subject: [PATCH] [Bug] Fix configure_backend to account for data or timestamp
 links (#732)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Heberto Mayorquin <h.mayorquin@gmail.com>
---
 .gitignore                                    |  3 +
 CHANGELOG.md                                  |  3 +-
 .../tools/nwb_helpers/_configure_backend.py   | 14 +++-
 .../test_configure_backend_defaults.py        | 82 +++++++++++++++++++
 4 files changed, 97 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5c00276a5..04504e9e0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -124,3 +124,6 @@ venv.bak/
 # NWB
 .nwb
 .pre-commit-config.yaml
+
+# Spyder
+.spyproject
diff --git a/CHANGELOG.md b/CHANGELOG.md
index dda694fc5..ec70cbd3f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,8 @@
 ### Bug fixes
 * LocalPathExpander matches only `folder_paths` or `file_paths` if that is indicated in the passed specification. [PR #679](https://github.com/catalystneuro/neuroconv/pull/675) and [PR #675](https://github.com/catalystneuro/neuroconv/pull/679
 * Fixed depth consideration in partial chunking pattern for the ROI data buffer. [PR #677](https://github.com/catalystneuro/neuroconv/pull/677)
-* Fix mapping between channel names and the electrode table when writing more than one `ElectricalSeries` to the NWBFile. This fixes an issue when the converter pipeline of `SpikeGLXConverterPipe` was writing the electrode table region of the NIDQ stream incorrectly [PR #678](https://github.com/catalystneuro/neuroconv/pull/678)
+* Fix mapping between channel names and the electrode table when writing more than one `ElectricalSeries` to the NWBFile. This fixes an issue when the converter pipeline of `SpikeGLXConverterPipe` was writing the electrode table region of the NIDQ stream incorrectly. [PR #678](https://github.com/catalystneuro/neuroconv/pull/678)
+* Fix `configure_backend` when applied to `TimeSeries` contents that leverage internal links for `data` or `timestamps`. [PR #732](https://github.com/catalystneuro/neuroconv/pull/732)
 
 ### Features
 * Changed the `Suite2pSegmentationInterface` to support multiple plane segmentation outputs. The interface now has a `plane_name` and `channel_name` arguments to determine which plane output and channel trace add to the NWBFile. [PR #601](https://github.com/catalystneuro/neuroconv/pull/601)
diff --git a/src/neuroconv/tools/nwb_helpers/_configure_backend.py b/src/neuroconv/tools/nwb_helpers/_configure_backend.py
index 6f1abfb55..a2dcaec69 100644
--- a/src/neuroconv/tools/nwb_helpers/_configure_backend.py
+++ b/src/neuroconv/tools/nwb_helpers/_configure_backend.py
@@ -24,12 +24,18 @@ def configure_backend(
         # TODO: update buffer shape in iterator, if present
 
         nwbfile_object = nwbfile_objects[object_id]
+        is_dataset_linked = isinstance(nwbfile_object.fields.get(dataset_name), TimeSeries)
+        # Table columns
         if isinstance(nwbfile_object, Data):
             nwbfile_object.set_data_io(data_io_class=data_io_class, data_io_kwargs=data_io_kwargs)
-        elif isinstance(nwbfile_object, TimeSeries):
+        # TimeSeries data or timestamps
+        elif isinstance(nwbfile_object, TimeSeries) and not is_dataset_linked:
             nwbfile_object.set_data_io(dataset_name=dataset_name, data_io_class=data_io_class, **data_io_kwargs)
-        else:  # Strictly speaking, it would be odd if a backend_configuration led to this, but might as well be safe
+        # Skip the setting of a DataIO when target dataset is a link (assume it will be found in parent)
+        elif isinstance(nwbfile_object, TimeSeries) and is_dataset_linked:
+            continue
+        # Strictly speaking, it would be odd if a backend_configuration led to this, but might as well be safe
+        else:
             raise NotImplementedError(
-                f"Unsupported object type {type(nwbfile_object)} for backend "
-                f"configuration of {nwbfile_object.name}!"
+                f"Unsupported object type {type(nwbfile_object)} for backend configuration of {nwbfile_object.name}!"
             )
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_defaults.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_defaults.py
index e3bcb4146..693d8e4d0 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_defaults.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_defaults.py
@@ -115,3 +115,85 @@ def test_simple_dynamic_table(tmpdir: Path, integer_array: np.ndarray, backend:
             assert written_data.compressor == numcodecs.GZip(level=1)
 
         assert_array_equal(x=integer_array, y=written_data[:])
+
+
+@pytest.mark.parametrize(
+    "case_name,iterator,data_iterator_options,timestamps_iterator_options",
+    [
+        ("unwrapped", lambda x: x, dict(), dict()),
+        ("generic", SliceableDataChunkIterator, dict(), dict()),
+        ("classic", DataChunkIterator, dict(iter_axis=1, buffer_size=30_000), dict(buffer_size=30_000)),
+        # Need to hardcode buffer size in classic case or else it takes forever...
+    ],
+)
+@pytest.mark.parametrize("backend", ["hdf5", "zarr"])
+def test_time_series_timestamps_linkage(
+    tmpdir: Path,
+    integer_array: np.ndarray,
+    case_name: str,
+    iterator: callable,
+    data_iterator_options: dict,
+    timestamps_iterator_options: dict,
+    backend: Literal["hdf5", "zarr"],
+):
+    data_1 = iterator(integer_array, **data_iterator_options)
+    data_2 = iterator(integer_array, **data_iterator_options)
+
+    timestamps_array = np.linspace(start=0.0, stop=1.0, num=integer_array.shape[0])
+    timestamps = iterator(timestamps_array, **timestamps_iterator_options)
+
+    nwbfile = mock_NWBFile()
+    time_series_1 = mock_TimeSeries(name="TestTimeSeries1", data=data_1, timestamps=timestamps, rate=None)
+    nwbfile.add_acquisition(time_series_1)
+
+    time_series_2 = mock_TimeSeries(name="TestTimeSeries2", data=data_2, timestamps=time_series_1, rate=None)
+    nwbfile.add_acquisition(time_series_2)
+
+    # Note that the field will still show up in the configuration display
+    backend_configuration = get_default_backend_configuration(nwbfile=nwbfile, backend=backend)
+    # print(backend_configuration)
+    dataset_configuration_1 = backend_configuration.dataset_configurations["acquisition/TestTimeSeries1/data"]
+    dataset_configuration_2 = backend_configuration.dataset_configurations["acquisition/TestTimeSeries2/data"]
+    timestamps_configuration_1 = backend_configuration.dataset_configurations["acquisition/TestTimeSeries1/timestamps"]
+    timestamps_configuration_1 = backend_configuration.dataset_configurations["acquisition/TestTimeSeries2/timestamps"]
+    configure_backend(nwbfile=nwbfile, backend_configuration=backend_configuration)
+
+    if case_name != "unwrapped":  # TODO: eventually, even this case will be buffered automatically
+        assert nwbfile.acquisition["TestTimeSeries1"].data
+        assert nwbfile.acquisition["TestTimeSeries2"].data
+        assert nwbfile.acquisition["TestTimeSeries1"].timestamps
+        assert nwbfile.acquisition["TestTimeSeries2"].timestamps
+
+    nwbfile_path = str(tmpdir / f"test_time_series_timestamps_linkage_{case_name}_data.nwb.{backend}")
+    with BACKEND_NWB_IO[backend](path=nwbfile_path, mode="w") as io:
+        io.write(nwbfile)
+
+    with BACKEND_NWB_IO[backend](path=nwbfile_path, mode="r") as io:
+        written_nwbfile = io.read()
+
+        written_data_1 = written_nwbfile.acquisition["TestTimeSeries1"].data
+        assert written_data_1.chunks == dataset_configuration_1.chunk_shape
+        if backend == "hdf5":
+            assert written_data_1.compression == "gzip"
+        elif backend == "zarr":
+            assert written_data_1.compressor == numcodecs.GZip(level=1)
+        assert_array_equal(x=integer_array, y=written_data_1[:])
+
+        written_data_2 = written_nwbfile.acquisition["TestTimeSeries2"].data
+        assert written_data_2.chunks == dataset_configuration_2.chunk_shape
+        if backend == "hdf5":
+            assert written_data_2.compression == "gzip"
+        elif backend == "zarr":
+            assert written_data_2.compressor == numcodecs.GZip(level=1)
+        assert_array_equal(x=integer_array, y=written_data_2[:])
+
+        written_timestamps_1 = written_nwbfile.acquisition["TestTimeSeries1"].timestamps
+        assert written_timestamps_1.chunks == timestamps_configuration_1.chunk_shape
+        if backend == "hdf5":
+            assert written_timestamps_1.compression == "gzip"
+        elif backend == "zarr":
+            assert written_timestamps_1.compressor == numcodecs.GZip(level=1)
+        assert_array_equal(x=timestamps_array, y=written_timestamps_1[:])
+
+        written_timestamps_2 = written_nwbfile.acquisition["TestTimeSeries2"].timestamps
+        assert written_timestamps_2 == written_timestamps_1