From c4bca8ac37f56b68f228be37b274dc1dbc1fcbee Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Sun, 17 Sep 2023 21:51:31 -0400
Subject: [PATCH 01/27] port over tool function for defaults

---
 .../nwb_helpers/_dataset_configuration.py     | 213 +++++++++++
 .../test_get_default_backend_configuration.py | 205 ++++++++++
 ...test_get_default_dataset_configurations.py | 349 ++++++++++++++++++
 ...t_dataset_configurations_appended_files.py | 146 ++++++++
 .../test_dataset_configuration_model.py       |   0
 .../test_dataset_info_model.py                |   0
 .../test_hdf5_backend_configuration_model.py  |   0
 .../test_hdf5_dataset_configuration_model.py  |   0
 .../test_zarr_backend_configuration_model.py  |   0
 .../test_zarr_dataset_configuration_model.py  |   0
 10 files changed, 913 insertions(+)
 create mode 100644 src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
 create mode 100644 tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_backend_configuration.py
 create mode 100644 tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
 create mode 100644 tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py
 rename tests/test_minimal/test_tools/test_backend_and_dataset_configuration/{ => test_models}/test_dataset_configuration_model.py (100%)
 rename tests/test_minimal/test_tools/test_backend_and_dataset_configuration/{ => test_models}/test_dataset_info_model.py (100%)
 rename tests/test_minimal/test_tools/test_backend_and_dataset_configuration/{ => test_models}/test_hdf5_backend_configuration_model.py (100%)
 rename tests/test_minimal/test_tools/test_backend_and_dataset_configuration/{ => test_models}/test_hdf5_dataset_configuration_model.py (100%)
 rename tests/test_minimal/test_tools/test_backend_and_dataset_configuration/{ => test_models}/test_zarr_backend_configuration_model.py (100%)
 rename tests/test_minimal/test_tools/test_backend_and_dataset_configuration/{ => test_models}/test_zarr_dataset_configuration_model.py (100%)

diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
new file mode 100644
index 000000000..011e16a99
--- /dev/null
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -0,0 +1,213 @@
+"""Collection of helper functions related to configuration of datasets dependent on backend."""
+from typing import Iterable, Literal, Union
+
+import h5py
+import numpy as np
+import zarr
+from hdmf import Container
+from hdmf.data_utils import DataChunkIterator, DataIO, GenericDataChunkIterator
+from hdmf.utils import get_data_shape
+from hdmf_zarr import NWBZarrIO
+from pynwb import NWBHDF5IO, NWBFile, TimeSeries
+from pynwb.base import DynamicTable
+
+from ._dataset_and_backend_models import (
+    BACKEND_TO_CONFIGURATION,
+    BACKEND_TO_DATASET_CONFIGURATION,
+    DatasetConfiguration,
+    DatasetInfo,
+    HDF5BackendConfiguration,
+    HDF5DatasetConfiguration,
+    ZarrBackendConfiguration,
+    ZarrDatasetConfiguration,
+)
+from ..hdmf import SliceableDataChunkIterator
+
+
+def _get_mode(io: Union[NWBHDF5IO, NWBZarrIO]) -> str:
+    """NWBHDF5IO and NWBZarrIO have different ways of storing the mode they used on a path."""
+    if isinstance(io, NWBHDF5IO):
+        return io.mode
+    elif isinstance(io, NWBZarrIO):
+        return io._ZarrIO__mode
+
+
+def _is_value_already_written_to_file(
+    candidate_dataset: Union[h5py.Dataset, zarr.Array],
+    backend: Literal["hdf5", "zarr"],
+    existing_file: Union[h5py.File, zarr.Group, None],
+) -> bool:
+    """
+    Determine if the neurodata object is already written to the file on disk.
+
+    This object should then be skipped by the `get_io_datasets` function when working in append mode.
+    """
+    return (
+        isinstance(candidate_dataset, h5py.Dataset)  # If the source data is an HDF5 Dataset
+        and backend == "hdf5"  # If working in append mode
+        and candidate_dataset.file == existing_file  # If the source HDF5 Dataset is the appending NWBFile
+    ) or (
+        isinstance(candidate_dataset, zarr.Array)  # If the source data is an Zarr Array
+        and backend == "zarr"  # If working in append mode
+        and candidate_dataset.store == existing_file  # If the source Zarr 'file' is the appending NWBFile
+    )
+
+
+def _parse_location_in_memory_nwbfile(current_location: str, neurodata_object: Container) -> str:
+    parent = neurodata_object.parent
+    if isinstance(parent, NWBFile):
+        # Items in defined top-level places like acquisition, intervals, etc. do not act as 'containers'
+        # in the .parent sense; ask if object is in their in-memory dictionaries instead
+        for outer_field_name, outer_field_value in parent.fields.items():
+            if isinstance(outer_field_value, dict) and neurodata_object.name in outer_field_value:
+                return outer_field_name + "/" + neurodata_object.name + "/" + current_location
+        return neurodata_object.name + "/" + current_location
+    return _parse_location_in_memory_nwbfile(
+        current_location=neurodata_object.name + "/" + current_location, neurodata_object=parent
+    )
+
+
+def _get_dataset_metadata(
+    neurodata_object: Union[TimeSeries, DynamicTable], field_name: str, backend: Literal["hdf5", "zarr"]
+) -> Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration]:
+    """Fill in the Dataset model with as many values as can be automatically detected or inferred."""
+    DatasetConfigurationClass = BACKEND_TO_DATASET_CONFIGURATION[backend]
+
+    candidate_dataset = getattr(neurodata_object, field_name)
+    # For now, skip over datasets already wrapped in DataIO
+    # Could maybe eventually support modifying chunks in place
+    # But setting buffer shape only possible if iterator was wrapped first
+    if not isinstance(candidate_dataset, DataIO):
+        # DataChunkIterator has best generic dtype inference, though logic is hard to peel out of it
+        # And it can fail in rare cases but not essential to our default configuration
+        try:
+            dtype = str(DataChunkIterator(candidate_dataset).dtype)  # string cast to be JSON friendly
+        except Exception as exception:
+            if str(exception) != "Data type could not be determined. Please specify dtype in DataChunkIterator init.":
+                raise exception
+            else:
+                dtype = "unknown"
+
+        maxshape = get_data_shape(data=candidate_dataset)
+
+        if isinstance(candidate_dataset, GenericDataChunkIterator):
+            chunk_shape = candidate_dataset.chunk_shape
+            buffer_shape = candidate_dataset.buffer_shape
+        elif dtype != "unknown":
+            # TODO: eventually replace this with staticmethods on hdmf.data_utils.GenericDataChunkIterator
+            chunk_shape = SliceableDataChunkIterator.estimate_default_chunk_shape(
+                chunk_mb=10.0, maxshape=maxshape, dtype=np.dtype(dtype)
+            )
+            buffer_shape = SliceableDataChunkIterator.estimate_default_buffer_shape(
+                buffer_gb=0.5, chunk_shape=chunk_shape, maxshape=maxshape, dtype=np.dtype(dtype)
+            )
+        else:
+            pass  # TODO: think on this; perhaps zarr's standalone estimator?
+
+        dataset_info = DatasetInfo(
+            object_id=neurodata_object.object_id,
+            object_name=neurodata_object.name,
+            location=_parse_location_in_memory_nwbfile(current_location=field_name, neurodata_object=neurodata_object),
+            field=field_name,
+            maxshape=maxshape,
+            dtype=dtype,
+        )
+        dataset_configuration = DatasetConfigurationClass(
+            dataset_info=dataset_info, chunk_shape=chunk_shape, buffer_shape=buffer_shape
+        )
+        return dataset_configuration
+
+
+def get_default_dataset_configurations(
+    nwbfile: NWBFile,
+    backend: Union[None, Literal["hdf5", "zarr"]] = None,  # None for auto-detect from append mode, otherwise required
+) -> Iterable[DatasetConfiguration]:
+    """
+    Method for automatically detecting all objects in the file that could be wrapped in a DataIO.
+
+    Parameters
+    ----------
+    nwbfile : pynwb.NWBFile
+        An in-memory NWBFile object, either generated from the base class or read from an existing file of any backend.
+    backend : "hdf5" or "zarr"
+        Which backend format type you would like to use in configuring each datasets compression methods and options.
+
+    Yields
+    ------
+    DatasetConfiguration
+        A summary of each detected object that can be wrapped in a DataIO.
+    """
+    if backend is None and nwbfile.read_io is None:
+        raise ValueError(
+            "Keyword argument `backend` (either 'hdf5' or 'zarr') must be specified if the `nwbfile` was not "
+            "read from an existing file!"
+        )
+    if backend is None and nwbfile.read_io is not None and nwbfile.read_io.mode not in ("r+", "a"):
+        raise ValueError(
+            "Keyword argument `backend` (either 'hdf5' or 'zarr') must be specified if the `nwbfile` is being appended."
+        )
+
+    detected_backend = None
+    existing_file = None
+    if isinstance(nwbfile.read_io, NWBHDF5IO) and _get_mode(io=nwbfile.read_io) in ("r+", "a"):
+        detected_backend = "hdf5"
+        existing_file = nwbfile.read_io._file
+    elif isinstance(nwbfile.read_io, NWBZarrIO) and _get_mode(io=nwbfile.read_io) in ("r+", "a"):
+        detected_backend = "zarr"
+        existing_file = nwbfile.read_io.file.store
+    backend = backend or detected_backend
+
+    if detected_backend is not None and detected_backend != backend:
+        raise ValueError(
+            f"Detected backend '{detected_backend}' for appending file, but specified `backend` "
+            f"({backend}) does not match! Set `backend=None` or remove the keyword argument to allow it to auto-detect."
+        )
+
+    for neurodata_object in nwbfile.objects.values():
+        if isinstance(neurodata_object, TimeSeries):
+            time_series = neurodata_object  # for readability
+
+            for field_name in ("data", "timestamps"):
+                if field_name not in time_series.fields:  # timestamps is optional
+                    continue
+
+                candidate_dataset = getattr(time_series, field_name)
+                if _is_value_already_written_to_file(
+                    candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file
+                ):
+                    continue  # skip
+
+                # Edge case of in-memory ImageSeries with external mode; data is in fields and is empty array
+                if isinstance(candidate_dataset, np.ndarray) and not np.any(candidate_dataset):
+                    continue  # skip
+
+                yield _get_dataset_metadata(neurodata_object=time_series, field_name=field_name, backend=backend)
+        elif isinstance(neurodata_object, DynamicTable):
+            dynamic_table = neurodata_object  # for readability
+
+            for column_name in dynamic_table.colnames:
+                candidate_dataset = dynamic_table[column_name].data  # VectorData object
+                if _is_value_already_written_to_file(
+                    candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file
+                ):
+                    continue  # skip
+
+                yield _get_dataset_metadata(
+                    neurodata_object=dynamic_table[column_name], field_name="data", backend=backend
+                )
+
+
+def get_default_backend_configuration(
+    nwbfile: NWBFile, backend: Literal["hdf5", "zarr"]
+) -> Union[HDF5BackendConfiguration, ZarrBackendConfiguration]:
+    """Fill a default backend configuration to serve as a starting point for further customization."""
+    BackendConfigurationClass = BACKEND_TO_CONFIGURATION[backend]
+
+    default_dataset_configurations = get_default_dataset_configurations(nwbfile=nwbfile, backend=backend)
+    dataset_configurations = {
+        default_dataset_configuration.dataset_info.location: default_dataset_configuration
+        for default_dataset_configuration in default_dataset_configurations
+    }
+
+    backend_configuration = BackendConfigurationClass(dataset_configurations=dataset_configurations)
+    return backend_configuration
\ No newline at end of file
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_backend_configuration.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_backend_configuration.py
new file mode 100644
index 000000000..45878e472
--- /dev/null
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_backend_configuration.py
@@ -0,0 +1,205 @@
+"""Integration tests for `get_default_backend_configuration`."""
+from io import StringIO
+from pathlib import Path
+from unittest.mock import patch
+
+import numpy as np
+import pytest
+from hdmf_zarr import NWBZarrIO
+from pynwb import NWBHDF5IO, NWBFile
+from pynwb.testing.mock.base import mock_TimeSeries
+from pynwb.testing.mock.file import mock_NWBFile
+
+from neuroconv.tools.nwb_helpers import (
+    HDF5BackendConfiguration,
+    ZarrBackendConfiguration,
+    get_default_backend_configuration,
+    get_module,
+)
+
+
+def generate_complex_nwbfile() -> NWBFile:
+    nwbfile = mock_NWBFile()
+
+    raw_array = np.array([[1, 2, 3], [4, 5, 6]])
+    raw_time_series = mock_TimeSeries(name="RawTimeSeries", data=raw_array)
+    nwbfile.add_acquisition(raw_time_series)
+
+    number_of_trials = 10
+    for start_time, stop_time in zip(
+        np.linspace(start=0.0, stop=10.0, num=number_of_trials), np.linspace(start=1.0, stop=11.0, num=number_of_trials)
+    ):
+        nwbfile.add_trial(start_time=start_time, stop_time=stop_time)
+
+    ecephys_module = get_module(nwbfile=nwbfile, name="ecephys")
+    processed_array = np.array([[7.0, 8.0], [9.0, 10.0], [11.0, 12.0], [13.0, 14.0]])
+    processed_time_series = mock_TimeSeries(name="ProcessedTimeSeries", data=processed_array)
+    ecephys_module.add(processed_time_series)
+
+    return nwbfile
+
+
+@pytest.fixture(scope="session")
+def hdf5_nwbfile_path(tmpdir_factory):
+    nwbfile_path = tmpdir_factory.mktemp("data").join("test_default_backend_configuration_hdf5_nwbfile.nwb.h5")
+    if not Path(nwbfile_path).exists():
+        nwbfile = generate_complex_nwbfile()
+        with NWBHDF5IO(path=str(nwbfile_path), mode="w") as io:
+            io.write(nwbfile)
+    return str(nwbfile_path)
+
+
+@pytest.fixture(scope="session")
+def zarr_nwbfile_path(tmpdir_factory):
+    nwbfile_path = tmpdir_factory.mktemp("data").join("test_default_backend_configuration_hdf5_nwbfile.nwb.zarr")
+    if not Path(nwbfile_path).exists():
+        nwbfile = generate_complex_nwbfile()
+        with NWBZarrIO(path=str(nwbfile_path), mode="w") as io:
+            io.write(nwbfile)
+    return str(nwbfile_path)
+
+
+def test_complex_hdf5(hdf5_nwbfile_path):
+    with NWBHDF5IO(path=hdf5_nwbfile_path, mode="a") as io:
+        nwbfile = io.read()
+
+        raw_array = np.array([[11, 21, 31], [41, 51, 61]])
+        raw_time_series = mock_TimeSeries(name="NewRawTimeSeries", data=raw_array)
+        nwbfile.add_acquisition(raw_time_series)
+
+        number_of_epochs = 5
+        for start_time, stop_time in zip(
+            np.linspace(start=0.0, stop=10.0, num=number_of_epochs),
+            np.linspace(start=1.0, stop=11.0, num=number_of_epochs),
+        ):
+            nwbfile.add_epoch(start_time=start_time, stop_time=stop_time)
+
+        ecephys_module = get_module(nwbfile=nwbfile, name="ecephys")
+        processed_array = np.array([[7.1, 8.1], [9.1, 10.1], [11.1, 12.1], [13.1, 14.1]])
+        processed_time_series = mock_TimeSeries(name="NewProcessedTimeSeries", data=processed_array)
+        ecephys_module.add(processed_time_series)
+
+        backend_configuration = get_default_backend_configuration(nwbfile=nwbfile, backend="hdf5")
+
+    assert isinstance(backend_configuration, HDF5BackendConfiguration)
+
+    dataset_configurations = backend_configuration.dataset_configurations
+    assert len(dataset_configurations) == 4
+    assert "acquisition/NewRawTimeSeries/data" in dataset_configurations
+    assert "epochs/start_time/data" in dataset_configurations
+    assert "epochs/stop_time/data" in dataset_configurations
+    assert "processing/ecephys/NewProcessedTimeSeries/data" in dataset_configurations
+
+    # Best summary test of expected output is the printout
+    with patch("sys.stdout", new=StringIO()) as stdout:
+        print(backend_configuration)
+
+    expected_print = """Configurable datasets identified using the hdf5 backend
+-------------------------------------------------------
+epochs/start_time/data
+    maxshape : (5,)
+    dtype : float64
+    chunk shape : (5,)
+    buffer shape : (5,)
+    compression method : gzip
+    compression options : None
+epochs/stop_time/data
+    maxshape : (5,)
+    dtype : float64
+    chunk shape : (5,)
+    buffer shape : (5,)
+    compression method : gzip
+    compression options : None
+acquisition/NewRawTimeSeries/data
+    maxshape : (2, 3)
+    dtype : int32
+    chunk shape : (2, 3)
+    buffer shape : (2, 3)
+    compression method : gzip
+    compression options : None
+processing/ecephys/NewProcessedTimeSeries/data
+    maxshape : (4, 2)
+    dtype : float64
+    chunk shape : (4, 2)
+    buffer shape : (4, 2)
+    compression method : gzip
+    compression options : None
+"""
+    assert stdout.getvalue() == expected_print
+
+
+def test_complex_zarr(zarr_nwbfile_path):
+    with NWBZarrIO(path=zarr_nwbfile_path, mode="a") as io:
+        nwbfile = io.read()
+
+        raw_array = np.array([[11, 21, 31], [41, 51, 61]])
+        raw_time_series = mock_TimeSeries(name="NewRawTimeSeries", data=raw_array)
+        nwbfile.add_acquisition(raw_time_series)
+
+        number_of_epochs = 5
+        for start_time, stop_time in zip(
+            np.linspace(start=0.0, stop=10.0, num=number_of_epochs),
+            np.linspace(start=1.0, stop=11.0, num=number_of_epochs),
+        ):
+            nwbfile.add_epoch(start_time=start_time, stop_time=stop_time)
+
+        ecephys_module = get_module(nwbfile=nwbfile, name="ecephys")
+        processed_array = np.array([[7.1, 8.1], [9.1, 10.1], [11.1, 12.1], [13.1, 14.1]])
+        processed_time_series = mock_TimeSeries(name="NewProcessedTimeSeries", data=processed_array)
+        ecephys_module.add(processed_time_series)
+
+        backend_configuration = get_default_backend_configuration(nwbfile=nwbfile, backend="zarr")
+
+    assert isinstance(backend_configuration, ZarrBackendConfiguration)
+
+    dataset_configurations = backend_configuration.dataset_configurations
+    assert len(dataset_configurations) == 4
+    assert "acquisition/NewRawTimeSeries/data" in dataset_configurations
+    assert "epochs/start_time/data" in dataset_configurations
+    assert "epochs/stop_time/data" in dataset_configurations
+    assert "processing/ecephys/NewProcessedTimeSeries/data" in dataset_configurations
+
+    # Best summary test of expected output is the printout
+    with patch("sys.stdout", new=StringIO()) as stdout:
+        print(backend_configuration)
+
+    expected_print = """Configurable datasets identified using the zarr backend
+-------------------------------------------------------
+epochs/start_time/data
+    maxshape : (5,)
+    dtype : float64
+    chunk shape : (5,)
+    buffer shape : (5,)
+    compression method : gzip
+    compression options : None
+    filter methods : None
+    filter options : None
+epochs/stop_time/data
+    maxshape : (5,)
+    dtype : float64
+    chunk shape : (5,)
+    buffer shape : (5,)
+    compression method : gzip
+    compression options : None
+    filter methods : None
+    filter options : None
+acquisition/NewRawTimeSeries/data
+    maxshape : (2, 3)
+    dtype : int32
+    chunk shape : (2, 3)
+    buffer shape : (2, 3)
+    compression method : gzip
+    compression options : None
+    filter methods : None
+    filter options : None
+processing/ecephys/NewProcessedTimeSeries/data
+    maxshape : (4, 2)
+    dtype : float64
+    chunk shape : (4, 2)
+    buffer shape : (4, 2)
+    compression method : gzip
+    compression options : None
+    filter methods : None
+    filter options : None
+"""
+    assert stdout.getvalue() == expected_print
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
new file mode 100644
index 000000000..f08c85167
--- /dev/null
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
@@ -0,0 +1,349 @@
+"""Unit tests for `get_default_dataset_configurations`."""
+import numpy as np
+from hdmf.common import VectorData
+from hdmf.data_utils import DataChunkIterator
+from pynwb.base import DynamicTable
+from pynwb.image import ImageSeries
+from pynwb.testing.mock.base import mock_TimeSeries
+from pynwb.testing.mock.file import mock_NWBFile
+
+from neuroconv.tools.hdmf import SliceableDataChunkIterator
+from neuroconv.tools.nwb_helpers import (
+    HDF5DatasetConfiguration,
+    ZarrDatasetConfiguration,
+    get_default_dataset_configurations,
+)
+
+
+def test_unwrapped_time_series_hdf5():
+    array = np.array([[1, 2, 3], [4, 5, 6]])
+
+    nwbfile = mock_NWBFile()
+    time_series = mock_TimeSeries(name="TestTimeSeries", data=array)
+    nwbfile.add_acquisition(time_series)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == time_series.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
+    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+
+def test_unwrapped_time_series_zarr():
+    array = np.array([[1, 2, 3], [4, 5, 6]])
+
+    nwbfile = mock_NWBFile()
+    time_series = mock_TimeSeries(name="TestTimeSeries", data=array)
+    nwbfile.add_acquisition(time_series)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == time_series.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
+    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+    assert dataset_configuration.filter_methods is None
+    assert dataset_configuration.filter_options is None
+
+
+def test_generic_iterator_wrapped_time_series_hdf5():
+    array = np.array([[1, 2, 3], [4, 5, 6]])
+
+    nwbfile = mock_NWBFile()
+    time_series = mock_TimeSeries(name="TestTimeSeries", data=SliceableDataChunkIterator(data=array))
+    nwbfile.add_acquisition(time_series)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == time_series.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
+    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+
+def test_classic_iterator_wrapped_simple_time_series_zarr():
+    array = np.array([[1, 2, 3], [4, 5, 6]])
+
+    nwbfile = mock_NWBFile()
+    time_series = mock_TimeSeries(name="TestTimeSeries", data=DataChunkIterator(data=array))
+    nwbfile.add_acquisition(time_series)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == time_series.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
+    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+    assert dataset_configuration.filter_methods is None
+    assert dataset_configuration.filter_options is None
+
+
+def test_classic_iterator_wrapped_time_series_hdf5():
+    array = np.array([[1, 2, 3], [4, 5, 6]])
+
+    nwbfile = mock_NWBFile()
+    time_series = mock_TimeSeries(name="TestTimeSeries", data=DataChunkIterator(data=array))
+    nwbfile.add_acquisition(time_series)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == time_series.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
+    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+
+def test_generic_iterator_wrapped_simple_time_series_zarr():
+    array = np.array([[1, 2, 3], [4, 5, 6]])
+
+    nwbfile = mock_NWBFile()
+    time_series = mock_TimeSeries(name="TestTimeSeries", data=SliceableDataChunkIterator(data=array))
+    nwbfile.add_acquisition(time_series)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == time_series.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
+    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+    assert dataset_configuration.filter_methods is None
+    assert dataset_configuration.filter_options is None
+
+
+def test_external_image_series_hdf5():
+    nwbfile = mock_NWBFile()
+    image_series = ImageSeries(name="TestImageSeries", external_file=[""], rate=1.0)
+    nwbfile.add_acquisition(image_series)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+
+    assert len(dataset_configurations) == 0
+
+
+def test_external_image_series_zarr():
+    nwbfile = mock_NWBFile()
+    image_series = ImageSeries(name="TestImageSeries", external_file=[""], rate=1.0)
+    nwbfile.add_acquisition(image_series)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+
+    assert len(dataset_configurations) == 0
+
+
+def test_unwrapped_dynamic_table_hdf5():
+    array = np.array([0.1, 0.2, 0.3])
+
+    nwbfile = mock_NWBFile()
+    column = VectorData(name="TestColumn", description="", data=array.squeeze())
+    dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column])
+    nwbfile.add_acquisition(dynamic_table)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == column.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
+    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+
+def test_unwrapped_dynamic_table_zarr():
+    array = np.array([0.1, 0.2, 0.3])
+
+    nwbfile = mock_NWBFile()
+    column = VectorData(name="TestColumn", description="", data=array.squeeze())
+    dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column])
+    nwbfile.add_acquisition(dynamic_table)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == column.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
+    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+    assert dataset_configuration.filter_methods is None
+    assert dataset_configuration.filter_options is None
+
+
+def test_generic_iterator_wrapped_dynamic_table_hdf5():
+    array = np.array([0.1, 0.2, 0.3])
+
+    nwbfile = mock_NWBFile()
+    column = VectorData(name="TestColumn", description="", data=SliceableDataChunkIterator(data=array.squeeze()))
+    dynamic_table = DynamicTable(
+        name="TestDynamicTable",
+        description="",
+        id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
+        columns=[column],
+    )
+    nwbfile.add_acquisition(dynamic_table)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == column.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
+    assert dataset_configuration.dataset_info.maxshape == (array.shape[0],)
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+
+def test_generic_iterator_wrapped_dynamic_table_zarr():
+    array = np.array([0.1, 0.2, 0.3])
+
+    nwbfile = mock_NWBFile()
+    column = VectorData(name="TestColumn", description="", data=SliceableDataChunkIterator(data=array.squeeze()))
+    dynamic_table = DynamicTable(
+        name="TestDynamicTable",
+        description="",
+        id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
+        columns=[column],
+    )
+    nwbfile.add_acquisition(dynamic_table)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == column.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
+    assert dataset_configuration.dataset_info.maxshape == (array.shape[0],)
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+    assert dataset_configuration.filter_methods is None
+    assert dataset_configuration.filter_options is None
+
+
+def test_classic_iterator_wrapped_dynamic_table_hdf5():
+    array = np.array([0.1, 0.2, 0.3])
+
+    nwbfile = mock_NWBFile()
+    column = VectorData(name="TestColumn", description="", data=DataChunkIterator(data=array.squeeze()))
+    dynamic_table = DynamicTable(
+        name="TestDynamicTable",
+        description="",
+        id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
+        columns=[column],
+    )
+    nwbfile.add_acquisition(dynamic_table)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == column.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
+    assert dataset_configuration.dataset_info.maxshape == (array.shape[0],)
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+
+def test_classic_iterator_wrapped_dynamic_table_zarr():
+    array = np.array([0.1, 0.2, 0.3])
+
+    nwbfile = mock_NWBFile()
+    column = VectorData(name="TestColumn", description="", data=DataChunkIterator(data=array.squeeze()))
+    dynamic_table = DynamicTable(
+        name="TestDynamicTable",
+        description="",
+        id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
+        columns=[column],
+    )
+    nwbfile.add_acquisition(dynamic_table)
+
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == column.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
+    assert dataset_configuration.dataset_info.maxshape == (array.shape[0],)
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+    assert dataset_configuration.filter_methods is None
+    assert dataset_configuration.filter_options is None
\ No newline at end of file
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py
new file mode 100644
index 000000000..dcda5d8a0
--- /dev/null
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py
@@ -0,0 +1,146 @@
+"""
+Unit tests for `get_default_dataset_configurations` operating on already written files open in append mode.
+Mostly testing that the right objects are skipped from identification as candidates for configuration.
+"""
+from pathlib import Path
+
+import numpy as np
+import pytest
+from hdmf.common import VectorData
+from hdmf_zarr import NWBZarrIO
+from pynwb import NWBHDF5IO, NWBFile
+from pynwb.base import DynamicTable
+from pynwb.testing.mock.base import mock_TimeSeries
+from pynwb.testing.mock.file import mock_NWBFile
+
+from neuroconv.tools.nwb_helpers import (
+    HDF5DatasetConfiguration,
+    ZarrDatasetConfiguration,
+    get_default_dataset_configurations,
+)
+
+
+def generate_nwbfile_with_existing_time_series() -> NWBFile:
+    nwbfile = mock_NWBFile()
+    array = np.array([[1, 2, 3], [4, 5, 6]])
+    time_series = mock_TimeSeries(name="ExistingTimeSeries", data=array)
+    nwbfile.add_acquisition(time_series)
+    return nwbfile
+
+
+@pytest.fixture(scope="session")
+def hdf5_nwbfile_path(tmpdir_factory):
+    nwbfile_path = tmpdir_factory.mktemp("data").join("test_default_dataset_configurations_hdf5_nwbfile_.nwb.h5")
+    if not Path(nwbfile_path).exists():
+        nwbfile = generate_nwbfile_with_existing_time_series()
+        with NWBHDF5IO(path=str(nwbfile_path), mode="w") as io:
+            io.write(nwbfile)
+    return str(nwbfile_path)
+
+
+@pytest.fixture(scope="session")
+def zarr_nwbfile_path(tmpdir_factory):
+    nwbfile_path = tmpdir_factory.mktemp("data").join("test_default_dataset_configurations_zarr_nwbfile.nwb.zarr")
+    if not Path(nwbfile_path).exists():
+        nwbfile = generate_nwbfile_with_existing_time_series()
+        with NWBZarrIO(path=str(nwbfile_path), mode="w") as io:
+            io.write(nwbfile)
+    return str(nwbfile_path)
+
+
+def test_unwrapped_time_series_hdf5(hdf5_nwbfile_path):
+    array = np.array([[1, 2, 3], [4, 5, 6]])
+
+    with NWBHDF5IO(path=hdf5_nwbfile_path, mode="a") as io:
+        nwbfile = io.read()
+        new_time_series = mock_TimeSeries(name="NewTimeSeries", data=array)
+        nwbfile.add_acquisition(new_time_series)
+        dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == new_time_series.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/NewTimeSeries/data"
+    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+
+def test_unwrapped_time_series_zarr(zarr_nwbfile_path):
+    array = np.array([[1, 2, 3], [4, 5, 6]])
+
+    with NWBZarrIO(path=zarr_nwbfile_path, mode="a") as io:
+        nwbfile = io.read()
+        new_time_series = mock_TimeSeries(name="NewTimeSeries", data=array)
+        nwbfile.add_acquisition(new_time_series)
+        dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == new_time_series.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/NewTimeSeries/data"
+    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+    assert dataset_configuration.filter_methods is None
+    assert dataset_configuration.filter_options is None
+
+
+def test_unwrapped_dynamic_table_hdf5(hdf5_nwbfile_path):
+    array = np.array([0.1, 0.2, 0.3])
+
+    with NWBHDF5IO(path=hdf5_nwbfile_path, mode="a") as io:
+        nwbfile = io.read()
+        column = VectorData(name="TestColumn", description="", data=array.squeeze())
+        dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column])
+        nwbfile.add_acquisition(dynamic_table)
+        dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == column.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
+    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+
+def test_unwrapped_dynamic_table_zarr(zarr_nwbfile_path):
+    array = np.array([0.1, 0.2, 0.3])
+
+    with NWBZarrIO(path=zarr_nwbfile_path, mode="a") as io:
+        nwbfile = io.read()
+        column = VectorData(name="TestColumn", description="", data=array.squeeze())
+        dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column])
+        nwbfile.add_acquisition(dynamic_table)
+        dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+    assert dataset_configuration.dataset_info.object_id == column.object_id
+    assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
+    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+    assert dataset_configuration.filter_methods is None
+    assert dataset_configuration.filter_options is None
\ No newline at end of file
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_dataset_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_configuration_model.py
similarity index 100%
rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_dataset_configuration_model.py
rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_configuration_model.py
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_dataset_info_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_info_model.py
similarity index 100%
rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_dataset_info_model.py
rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_info_model.py
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_hdf5_backend_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_backend_configuration_model.py
similarity index 100%
rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_hdf5_backend_configuration_model.py
rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_backend_configuration_model.py
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_hdf5_dataset_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_configuration_model.py
similarity index 100%
rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_hdf5_dataset_configuration_model.py
rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_configuration_model.py
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_zarr_backend_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_backend_configuration_model.py
similarity index 100%
rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_zarr_backend_configuration_model.py
rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_backend_configuration_model.py
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_zarr_dataset_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_configuration_model.py
similarity index 100%
rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_zarr_dataset_configuration_model.py
rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_configuration_model.py

From 38a1fa3722c1ba23bd5f0f906dc1262a8a46055f Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Sun, 17 Sep 2023 21:53:37 -0400
Subject: [PATCH 02/27] modify iterator as well

---
 src/neuroconv/tools/hdmf.py | 80 ++++++++++++++++++++++++++++---------
 1 file changed, 61 insertions(+), 19 deletions(-)

diff --git a/src/neuroconv/tools/hdmf.py b/src/neuroconv/tools/hdmf.py
index 6ad712d51..b82b12e1f 100644
--- a/src/neuroconv/tools/hdmf.py
+++ b/src/neuroconv/tools/hdmf.py
@@ -1,35 +1,77 @@
 """Collection of modifications of HDMF functions that are to be tested/used on this repo until propagation upstream."""
+import math
 from typing import Tuple
 
 import numpy as np
 from hdmf.data_utils import GenericDataChunkIterator as HDMFGenericDataChunkIterator
+from pydantic import Field
+from typing_extensions import Annotated
 
 
 class GenericDataChunkIterator(HDMFGenericDataChunkIterator):
     def _get_default_buffer_shape(self, buffer_gb: float = 1.0) -> Tuple[int]:
-        num_axes = len(self.maxshape)
-        chunk_bytes = np.prod(self.chunk_shape) * self.dtype.itemsize
+        return self.estimate_default_buffer_shape(
+            buffer_gb=buffer_gb, chunk_shape=self.chunk_shape, maxshape=self.maxshape, dtype=self.dtype
+        )
+
+    # TODO: move this to the core iterator in HDMF so it can be easily swapped out as well as run on its own
+    @staticmethod
+    def estimate_default_chunk_shape(
+        chunk_mb: Annotated[float, Field(gt=0.0)],
+        maxshape: Tuple[int, ...],
+        dtype: np.dtype,
+    ) -> Tuple[int, ...]:
+        """
+        Select chunk shape with size in MB less than the threshold of chunk_mb.
+
+        Keeps the dimensional ratios of the original data.
+        """
+        assert chunk_mb > 0.0, f"chunk_mb ({chunk_mb}) must be greater than zero!"
+        # Eventually, Pydantic validation can handle this validation for us
+
+        n_dims = len(maxshape)
+        itemsize = dtype.itemsize
+        chunk_bytes = chunk_mb * 1e6
+
+        min_maxshape = min(maxshape)
+        v = tuple(math.floor(maxshape_axis / min_maxshape) for maxshape_axis in maxshape)
+        prod_v = math.prod(v)
+        while prod_v * itemsize > chunk_bytes and prod_v != 1:
+            non_unit_min_v = min(x for x in v if x != 1)
+            v = tuple(math.floor(x / non_unit_min_v) if x != 1 else x for x in v)
+            prod_v = math.prod(v)
+        k = math.floor((chunk_bytes / (prod_v * itemsize)) ** (1 / n_dims))
+        return tuple([min(k * x, maxshape[dim]) for dim, x in enumerate(v)])
+
+    # TODO: move this to the core iterator in HDMF so it can be easily swapped out as well as run on its own
+    @staticmethod
+    def estimate_default_buffer_shape(
+        buffer_gb: Annotated[float, Field(gt=0.0)],
+        chunk_shape: Tuple[int, ...],
+        maxshape: Tuple[int, ...],
+        dtype: np.dtype,
+    ) -> Tuple[int]:
+        num_axes = len(maxshape)
+        chunk_bytes = math.prod(chunk_shape) * dtype.itemsize
         assert buffer_gb > 0, f"buffer_gb ({buffer_gb}) must be greater than zero!"
         assert (
             buffer_gb >= chunk_bytes / 1e9
         ), f"buffer_gb ({buffer_gb}) must be greater than the chunk size ({chunk_bytes / 1e9})!"
-        assert all(
-            np.array(self.chunk_shape) > 0
-        ), f"Some dimensions of chunk_shape ({self.chunk_shape}) are less than zero!"
+        assert all(np.array(chunk_shape) > 0), f"Some dimensions of chunk_shape ({chunk_shape}) are less than zero!"
 
-        maxshape = np.array(self.maxshape)
+        maxshape = np.array(maxshape)
 
         # Early termination condition
-        if np.prod(maxshape) * self.dtype.itemsize / 1e9 < buffer_gb:
-            return tuple(self.maxshape)
+        if math.prod(maxshape) * dtype.itemsize / 1e9 < buffer_gb:
+            return tuple(maxshape)
 
         buffer_bytes = chunk_bytes
-        axis_sizes_bytes = maxshape * self.dtype.itemsize
-        smallest_chunk_axis, second_smallest_chunk_axis, *_ = np.argsort(self.chunk_shape)
+        axis_sizes_bytes = maxshape * dtype.itemsize
+        smallest_chunk_axis, second_smallest_chunk_axis, *_ = np.argsort(chunk_shape)
         target_buffer_bytes = buffer_gb * 1e9
 
         # If the smallest full axis does not fit within the buffer size, form a square along the two smallest axes
-        sub_square_buffer_shape = np.array(self.chunk_shape)
+        sub_square_buffer_shape = np.array(chunk_shape)
         if min(axis_sizes_bytes) > target_buffer_bytes:
             k1 = np.floor((target_buffer_bytes / chunk_bytes) ** 0.5)
             for axis in [smallest_chunk_axis, second_smallest_chunk_axis]:
@@ -40,32 +82,32 @@ def _get_default_buffer_shape(self, buffer_gb: float = 1.0) -> Tuple[int]:
         chunk_to_buffer_ratio = buffer_gb * 1e9 / chunk_bytes
         chunk_scaling_factor = np.floor(chunk_to_buffer_ratio ** (1 / num_axes))
         unpadded_buffer_shape = [
-            np.clip(a=int(x), a_min=self.chunk_shape[j], a_max=self.maxshape[j])
-            for j, x in enumerate(chunk_scaling_factor * np.array(self.chunk_shape))
+            np.clip(a=int(x), a_min=chunk_shape[j], a_max=maxshape[j])
+            for j, x in enumerate(chunk_scaling_factor * np.array(chunk_shape))
         ]
 
-        unpadded_buffer_bytes = np.prod(unpadded_buffer_shape) * self.dtype.itemsize
+        unpadded_buffer_bytes = math.prod(unpadded_buffer_shape) * dtype.itemsize
 
         # Method that starts by filling the smallest axis completely or calculates best partial fill
-        padded_buffer_shape = np.array(self.chunk_shape)
-        chunks_per_axis = np.ceil(maxshape / self.chunk_shape)
+        padded_buffer_shape = np.array(chunk_shape)
+        chunks_per_axis = np.ceil(maxshape / chunk_shape)
         small_axis_fill_size = chunk_bytes * min(chunks_per_axis)
         full_axes_used = np.zeros(shape=num_axes, dtype=bool)
         if small_axis_fill_size <= target_buffer_bytes:
             buffer_bytes = small_axis_fill_size
-            padded_buffer_shape[smallest_chunk_axis] = self.maxshape[smallest_chunk_axis]
+            padded_buffer_shape[smallest_chunk_axis] = maxshape[smallest_chunk_axis]
             full_axes_used[smallest_chunk_axis] = True
         for axis, chunks_on_axis in enumerate(chunks_per_axis):
             if full_axes_used[axis]:  # If the smallest axis, skip since already used
                 continue
             if chunks_on_axis * buffer_bytes <= target_buffer_bytes:  # If multiple axes can be used together
                 buffer_bytes *= chunks_on_axis
-                padded_buffer_shape[axis] = self.maxshape[axis]
+                padded_buffer_shape[axis] = maxshape[axis]
             else:  # Found an axis that is too large to use with the rest of the buffer; calculate how much can be used
                 k3 = np.floor(target_buffer_bytes / buffer_bytes)
                 padded_buffer_shape[axis] *= k3
                 break
-        padded_buffer_bytes = np.prod(padded_buffer_shape) * self.dtype.itemsize
+        padded_buffer_bytes = math.prod(padded_buffer_shape) * dtype.itemsize
 
         if padded_buffer_bytes >= unpadded_buffer_bytes:
             return tuple(padded_buffer_shape)

From a981068e75afdaac344683ed0f5730b59868bc69 Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Sun, 17 Sep 2023 22:25:20 -0400
Subject: [PATCH 03/27] factor out backend config stuff to other PR

---
 .../nwb_helpers/_dataset_configuration.py     |  16 --
 .../test_get_default_backend_configuration.py | 205 ------------------
 2 files changed, 221 deletions(-)
 delete mode 100644 tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_backend_configuration.py

diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
index 011e16a99..0d8c74fde 100644
--- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -195,19 +195,3 @@ def get_default_dataset_configurations(
                 yield _get_dataset_metadata(
                     neurodata_object=dynamic_table[column_name], field_name="data", backend=backend
                 )
-
-
-def get_default_backend_configuration(
-    nwbfile: NWBFile, backend: Literal["hdf5", "zarr"]
-) -> Union[HDF5BackendConfiguration, ZarrBackendConfiguration]:
-    """Fill a default backend configuration to serve as a starting point for further customization."""
-    BackendConfigurationClass = BACKEND_TO_CONFIGURATION[backend]
-
-    default_dataset_configurations = get_default_dataset_configurations(nwbfile=nwbfile, backend=backend)
-    dataset_configurations = {
-        default_dataset_configuration.dataset_info.location: default_dataset_configuration
-        for default_dataset_configuration in default_dataset_configurations
-    }
-
-    backend_configuration = BackendConfigurationClass(dataset_configurations=dataset_configurations)
-    return backend_configuration
\ No newline at end of file
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_backend_configuration.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_backend_configuration.py
deleted file mode 100644
index 45878e472..000000000
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_backend_configuration.py
+++ /dev/null
@@ -1,205 +0,0 @@
-"""Integration tests for `get_default_backend_configuration`."""
-from io import StringIO
-from pathlib import Path
-from unittest.mock import patch
-
-import numpy as np
-import pytest
-from hdmf_zarr import NWBZarrIO
-from pynwb import NWBHDF5IO, NWBFile
-from pynwb.testing.mock.base import mock_TimeSeries
-from pynwb.testing.mock.file import mock_NWBFile
-
-from neuroconv.tools.nwb_helpers import (
-    HDF5BackendConfiguration,
-    ZarrBackendConfiguration,
-    get_default_backend_configuration,
-    get_module,
-)
-
-
-def generate_complex_nwbfile() -> NWBFile:
-    nwbfile = mock_NWBFile()
-
-    raw_array = np.array([[1, 2, 3], [4, 5, 6]])
-    raw_time_series = mock_TimeSeries(name="RawTimeSeries", data=raw_array)
-    nwbfile.add_acquisition(raw_time_series)
-
-    number_of_trials = 10
-    for start_time, stop_time in zip(
-        np.linspace(start=0.0, stop=10.0, num=number_of_trials), np.linspace(start=1.0, stop=11.0, num=number_of_trials)
-    ):
-        nwbfile.add_trial(start_time=start_time, stop_time=stop_time)
-
-    ecephys_module = get_module(nwbfile=nwbfile, name="ecephys")
-    processed_array = np.array([[7.0, 8.0], [9.0, 10.0], [11.0, 12.0], [13.0, 14.0]])
-    processed_time_series = mock_TimeSeries(name="ProcessedTimeSeries", data=processed_array)
-    ecephys_module.add(processed_time_series)
-
-    return nwbfile
-
-
-@pytest.fixture(scope="session")
-def hdf5_nwbfile_path(tmpdir_factory):
-    nwbfile_path = tmpdir_factory.mktemp("data").join("test_default_backend_configuration_hdf5_nwbfile.nwb.h5")
-    if not Path(nwbfile_path).exists():
-        nwbfile = generate_complex_nwbfile()
-        with NWBHDF5IO(path=str(nwbfile_path), mode="w") as io:
-            io.write(nwbfile)
-    return str(nwbfile_path)
-
-
-@pytest.fixture(scope="session")
-def zarr_nwbfile_path(tmpdir_factory):
-    nwbfile_path = tmpdir_factory.mktemp("data").join("test_default_backend_configuration_hdf5_nwbfile.nwb.zarr")
-    if not Path(nwbfile_path).exists():
-        nwbfile = generate_complex_nwbfile()
-        with NWBZarrIO(path=str(nwbfile_path), mode="w") as io:
-            io.write(nwbfile)
-    return str(nwbfile_path)
-
-
-def test_complex_hdf5(hdf5_nwbfile_path):
-    with NWBHDF5IO(path=hdf5_nwbfile_path, mode="a") as io:
-        nwbfile = io.read()
-
-        raw_array = np.array([[11, 21, 31], [41, 51, 61]])
-        raw_time_series = mock_TimeSeries(name="NewRawTimeSeries", data=raw_array)
-        nwbfile.add_acquisition(raw_time_series)
-
-        number_of_epochs = 5
-        for start_time, stop_time in zip(
-            np.linspace(start=0.0, stop=10.0, num=number_of_epochs),
-            np.linspace(start=1.0, stop=11.0, num=number_of_epochs),
-        ):
-            nwbfile.add_epoch(start_time=start_time, stop_time=stop_time)
-
-        ecephys_module = get_module(nwbfile=nwbfile, name="ecephys")
-        processed_array = np.array([[7.1, 8.1], [9.1, 10.1], [11.1, 12.1], [13.1, 14.1]])
-        processed_time_series = mock_TimeSeries(name="NewProcessedTimeSeries", data=processed_array)
-        ecephys_module.add(processed_time_series)
-
-        backend_configuration = get_default_backend_configuration(nwbfile=nwbfile, backend="hdf5")
-
-    assert isinstance(backend_configuration, HDF5BackendConfiguration)
-
-    dataset_configurations = backend_configuration.dataset_configurations
-    assert len(dataset_configurations) == 4
-    assert "acquisition/NewRawTimeSeries/data" in dataset_configurations
-    assert "epochs/start_time/data" in dataset_configurations
-    assert "epochs/stop_time/data" in dataset_configurations
-    assert "processing/ecephys/NewProcessedTimeSeries/data" in dataset_configurations
-
-    # Best summary test of expected output is the printout
-    with patch("sys.stdout", new=StringIO()) as stdout:
-        print(backend_configuration)
-
-    expected_print = """Configurable datasets identified using the hdf5 backend
--------------------------------------------------------
-epochs/start_time/data
-    maxshape : (5,)
-    dtype : float64
-    chunk shape : (5,)
-    buffer shape : (5,)
-    compression method : gzip
-    compression options : None
-epochs/stop_time/data
-    maxshape : (5,)
-    dtype : float64
-    chunk shape : (5,)
-    buffer shape : (5,)
-    compression method : gzip
-    compression options : None
-acquisition/NewRawTimeSeries/data
-    maxshape : (2, 3)
-    dtype : int32
-    chunk shape : (2, 3)
-    buffer shape : (2, 3)
-    compression method : gzip
-    compression options : None
-processing/ecephys/NewProcessedTimeSeries/data
-    maxshape : (4, 2)
-    dtype : float64
-    chunk shape : (4, 2)
-    buffer shape : (4, 2)
-    compression method : gzip
-    compression options : None
-"""
-    assert stdout.getvalue() == expected_print
-
-
-def test_complex_zarr(zarr_nwbfile_path):
-    with NWBZarrIO(path=zarr_nwbfile_path, mode="a") as io:
-        nwbfile = io.read()
-
-        raw_array = np.array([[11, 21, 31], [41, 51, 61]])
-        raw_time_series = mock_TimeSeries(name="NewRawTimeSeries", data=raw_array)
-        nwbfile.add_acquisition(raw_time_series)
-
-        number_of_epochs = 5
-        for start_time, stop_time in zip(
-            np.linspace(start=0.0, stop=10.0, num=number_of_epochs),
-            np.linspace(start=1.0, stop=11.0, num=number_of_epochs),
-        ):
-            nwbfile.add_epoch(start_time=start_time, stop_time=stop_time)
-
-        ecephys_module = get_module(nwbfile=nwbfile, name="ecephys")
-        processed_array = np.array([[7.1, 8.1], [9.1, 10.1], [11.1, 12.1], [13.1, 14.1]])
-        processed_time_series = mock_TimeSeries(name="NewProcessedTimeSeries", data=processed_array)
-        ecephys_module.add(processed_time_series)
-
-        backend_configuration = get_default_backend_configuration(nwbfile=nwbfile, backend="zarr")
-
-    assert isinstance(backend_configuration, ZarrBackendConfiguration)
-
-    dataset_configurations = backend_configuration.dataset_configurations
-    assert len(dataset_configurations) == 4
-    assert "acquisition/NewRawTimeSeries/data" in dataset_configurations
-    assert "epochs/start_time/data" in dataset_configurations
-    assert "epochs/stop_time/data" in dataset_configurations
-    assert "processing/ecephys/NewProcessedTimeSeries/data" in dataset_configurations
-
-    # Best summary test of expected output is the printout
-    with patch("sys.stdout", new=StringIO()) as stdout:
-        print(backend_configuration)
-
-    expected_print = """Configurable datasets identified using the zarr backend
--------------------------------------------------------
-epochs/start_time/data
-    maxshape : (5,)
-    dtype : float64
-    chunk shape : (5,)
-    buffer shape : (5,)
-    compression method : gzip
-    compression options : None
-    filter methods : None
-    filter options : None
-epochs/stop_time/data
-    maxshape : (5,)
-    dtype : float64
-    chunk shape : (5,)
-    buffer shape : (5,)
-    compression method : gzip
-    compression options : None
-    filter methods : None
-    filter options : None
-acquisition/NewRawTimeSeries/data
-    maxshape : (2, 3)
-    dtype : int32
-    chunk shape : (2, 3)
-    buffer shape : (2, 3)
-    compression method : gzip
-    compression options : None
-    filter methods : None
-    filter options : None
-processing/ecephys/NewProcessedTimeSeries/data
-    maxshape : (4, 2)
-    dtype : float64
-    chunk shape : (4, 2)
-    buffer shape : (4, 2)
-    compression method : gzip
-    compression options : None
-    filter methods : None
-    filter options : None
-"""
-    assert stdout.getvalue() == expected_print

From 966592c31e66e1aed27e988492430c8298ff3759 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 18 Sep 2023 02:46:11 +0000
Subject: [PATCH 04/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/neuroconv/tools/nwb_helpers/__init__.py          | 12 ++++++++++--
 .../tools/nwb_helpers/_models/_base_models.py        |  2 +-
 .../tools/nwb_helpers/_models/_hdf5_models.py        |  6 +++---
 .../tools/nwb_helpers/_models/_zarr_models.py        |  6 +++---
 src/neuroconv/tools/testing/__init__.py              |  8 ++++----
 .../tools/testing/_mock/_mock_dataset_models.py      |  6 +++---
 .../test_get_default_dataset_configurations.py       |  2 +-
 ..._default_dataset_configurations_appended_files.py |  2 +-
 .../test_hdf5_backend_configuration_model.py         |  1 -
 .../test_hdf5_dataset_configuration_model.py         |  5 ++++-
 .../test_zarr_backend_configuration_model.py         |  1 -
 .../test_zarr_dataset_configuration_model.py         |  5 ++++-
 12 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/src/neuroconv/tools/nwb_helpers/__init__.py b/src/neuroconv/tools/nwb_helpers/__init__.py
index 89c738d51..0982439bb 100644
--- a/src/neuroconv/tools/nwb_helpers/__init__.py
+++ b/src/neuroconv/tools/nwb_helpers/__init__.py
@@ -6,8 +6,16 @@
     make_or_load_nwbfile,
 )
 from ._models._base_models import DatasetConfiguration, DatasetInfo
-from ._models._hdf5_models import HDF5BackendConfiguration, HDF5DatasetConfiguration, AVAILABLE_HDF5_COMPRESSION_METHODS
-from ._models._zarr_models import ZarrBackendConfiguration, ZarrDatasetConfiguration, AVAILABLE_ZARR_COMPRESSION_METHODS
+from ._models._hdf5_models import (
+    AVAILABLE_HDF5_COMPRESSION_METHODS,
+    HDF5BackendConfiguration,
+    HDF5DatasetConfiguration,
+)
+from ._models._zarr_models import (
+    AVAILABLE_ZARR_COMPRESSION_METHODS,
+    ZarrBackendConfiguration,
+    ZarrDatasetConfiguration,
+)
 
 BACKEND_TO_DATASET_CONFIGURATION = dict(hdf5=HDF5DatasetConfiguration, zarr=ZarrDatasetConfiguration)
 BACKEND_TO_CONFIGURATION = dict(hdf5=HDF5BackendConfiguration, zarr=ZarrBackendConfiguration)
diff --git a/src/neuroconv/tools/nwb_helpers/_models/_base_models.py b/src/neuroconv/tools/nwb_helpers/_models/_base_models.py
index eb0408c2e..ad940fb6f 100644
--- a/src/neuroconv/tools/nwb_helpers/_models/_base_models.py
+++ b/src/neuroconv/tools/nwb_helpers/_models/_base_models.py
@@ -1,5 +1,5 @@
 """Base Pydantic models for DatasetInfo and DatasetConfiguration."""
-from typing import Any, Dict, Tuple, Union, Literal, Type
+from typing import Any, Dict, Literal, Tuple, Type, Union
 
 import h5py
 import numcodecs
diff --git a/src/neuroconv/tools/nwb_helpers/_models/_hdf5_models.py b/src/neuroconv/tools/nwb_helpers/_models/_hdf5_models.py
index e21015f89..6e1108432 100644
--- a/src/neuroconv/tools/nwb_helpers/_models/_hdf5_models.py
+++ b/src/neuroconv/tools/nwb_helpers/_models/_hdf5_models.py
@@ -1,12 +1,12 @@
 """Base Pydantic models for the HDF5DatasetConfiguration."""
-from typing import Any, Dict, Literal, Union, Type
+from typing import Any, Dict, Literal, Type, Union
 
 import h5py
-from pynwb import H5DataIO
 from nwbinspector.utils import is_module_installed
 from pydantic import Field
+from pynwb import H5DataIO
 
-from ._base_models import DatasetConfiguration, BackendConfiguration
+from ._base_models import BackendConfiguration, DatasetConfiguration
 
 _base_hdf5_filters = set(h5py.filters.decode) - set(
     (
diff --git a/src/neuroconv/tools/nwb_helpers/_models/_zarr_models.py b/src/neuroconv/tools/nwb_helpers/_models/_zarr_models.py
index 0e83d58e5..7b7b2dcbc 100644
--- a/src/neuroconv/tools/nwb_helpers/_models/_zarr_models.py
+++ b/src/neuroconv/tools/nwb_helpers/_models/_zarr_models.py
@@ -1,13 +1,13 @@
 """Base Pydantic models for the ZarrDatasetConfiguration."""
-from typing import Any, Dict, Literal, Union, List, Type
+from typing import Any, Dict, List, Literal, Type, Union
 
 import numcodecs
-import zarr
 import psutil
+import zarr
 from hdmf_zarr import ZarrDataIO
 from pydantic import Field, root_validator
 
-from ._base_models import DatasetConfiguration, BackendConfiguration
+from ._base_models import BackendConfiguration, DatasetConfiguration
 
 _available_zarr_filters = (
     set(zarr.codec_registry.keys())
diff --git a/src/neuroconv/tools/testing/__init__.py b/src/neuroconv/tools/testing/__init__.py
index 3c987fdd0..502634466 100644
--- a/src/neuroconv/tools/testing/__init__.py
+++ b/src/neuroconv/tools/testing/__init__.py
@@ -1,10 +1,10 @@
-from .mock_files import generate_path_expander_demo_ibl
-from .mock_interfaces import MockBehaviorEventInterface, MockSpikeGLXNIDQInterface
-from .mock_ttl_signals import generate_mock_ttl_signal, regenerate_test_cases
 from ._mock._mock_dataset_models import (
     mock_DatasetInfo,
     mock_HDF5BackendConfiguration,
-    mock_ZarrBackendConfiguration,
     mock_HDF5DatasetConfiguration,
+    mock_ZarrBackendConfiguration,
     mock_ZarrDatasetConfiguration,
 )
+from .mock_files import generate_path_expander_demo_ibl
+from .mock_interfaces import MockBehaviorEventInterface, MockSpikeGLXNIDQInterface
+from .mock_ttl_signals import generate_mock_ttl_signal, regenerate_test_cases
diff --git a/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py b/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py
index 024ad9d3c..67b82c1bb 100644
--- a/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py
+++ b/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py
@@ -1,17 +1,17 @@
-from typing import Any, Tuple, Dict, Union, Literal, Iterable
+from typing import Any, Dict, Iterable, Literal, Tuple, Union
 
 import h5py
 import numcodecs
 import numpy as np
 
 from ...nwb_helpers import (
+    AVAILABLE_HDF5_COMPRESSION_METHODS,
+    AVAILABLE_ZARR_COMPRESSION_METHODS,
     DatasetInfo,
     HDF5BackendConfiguration,
     HDF5DatasetConfiguration,
     ZarrBackendConfiguration,
     ZarrDatasetConfiguration,
-    AVAILABLE_HDF5_COMPRESSION_METHODS,
-    AVAILABLE_ZARR_COMPRESSION_METHODS,
 )
 
 
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
index f08c85167..be0c6f0d8 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
@@ -346,4 +346,4 @@ def test_classic_iterator_wrapped_dynamic_table_zarr():
     assert dataset_configuration.compression_method == "gzip"
     assert dataset_configuration.compression_options is None
     assert dataset_configuration.filter_methods is None
-    assert dataset_configuration.filter_options is None
\ No newline at end of file
+    assert dataset_configuration.filter_options is None
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py
index dcda5d8a0..48950e0b7 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py
@@ -143,4 +143,4 @@ def test_unwrapped_dynamic_table_zarr(zarr_nwbfile_path):
     assert dataset_configuration.compression_method == "gzip"
     assert dataset_configuration.compression_options is None
     assert dataset_configuration.filter_methods is None
-    assert dataset_configuration.filter_options is None
\ No newline at end of file
+    assert dataset_configuration.filter_options is None
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_backend_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_backend_configuration_model.py
index e77694d08..290ca6a1b 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_backend_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_backend_configuration_model.py
@@ -2,7 +2,6 @@
 from io import StringIO
 from unittest.mock import patch
 
-
 from neuroconv.tools.testing import mock_HDF5BackendConfiguration
 
 
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_configuration_model.py
index 39e4a787d..83e0f71b8 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_configuration_model.py
@@ -4,7 +4,10 @@
 
 import pytest
 
-from neuroconv.tools.nwb_helpers import HDF5DatasetConfiguration, AVAILABLE_HDF5_COMPRESSION_METHODS
+from neuroconv.tools.nwb_helpers import (
+    AVAILABLE_HDF5_COMPRESSION_METHODS,
+    HDF5DatasetConfiguration,
+)
 from neuroconv.tools.testing import mock_DatasetInfo, mock_HDF5DatasetConfiguration
 
 
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_backend_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_backend_configuration_model.py
index 66d7dbc03..9e235df77 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_backend_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_backend_configuration_model.py
@@ -2,7 +2,6 @@
 from io import StringIO
 from unittest.mock import patch
 
-
 from neuroconv.tools.testing import mock_ZarrBackendConfiguration
 
 
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_configuration_model.py
index 6350b0d4f..36e469259 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_configuration_model.py
@@ -4,7 +4,10 @@
 
 import pytest
 
-from neuroconv.tools.nwb_helpers import ZarrDatasetConfiguration, AVAILABLE_ZARR_COMPRESSION_METHODS
+from neuroconv.tools.nwb_helpers import (
+    AVAILABLE_ZARR_COMPRESSION_METHODS,
+    ZarrDatasetConfiguration,
+)
 from neuroconv.tools.testing import mock_DatasetInfo, mock_ZarrDatasetConfiguration
 
 

From 2e7af8496e4883180aa640dae0d71803f6b785c1 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Sun, 17 Sep 2023 23:00:34 -0400
Subject: [PATCH 05/27] Update CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7ee7c7c08..ea285d351 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
 
 * Added Pydantic data models of `DatasetInfo` (immutable summary of core dataset values such as maximum shape and dtype) and `DatasetConfiguration` for both HDF5 and Zarr datasets (the optional layer that specifies chunk/buffering/compression). [PR #567](https://github.com/catalystneuro/neuroconv/pull/567)
 * Added Pydantic data models of `BackendConfiguration` for both HDF5 and Zarr datasets (container/mapper of all the `DatasetConfiguration`s for a particular file). [PR #568](https://github.com/catalystneuro/neuroconv/pull/568)
+* Added tool function `get_default_dataset_configurations` for identifying and collecting all fields of an in-memory `NWBFile` that could become datasets on disk; and return instances of the Pydantic dataset models filled with default values for chunking/buffering/compression. [PR #569](https://github.com/catalystneuro/neuroconv/pull/569)
 
 
 

From 85bc9277f3b379d1dcea1c8f1f847d0072b1384a Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Sun, 17 Sep 2023 23:05:54 -0400
Subject: [PATCH 06/27] Update __init__.py

---
 src/neuroconv/tools/nwb_helpers/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/neuroconv/tools/nwb_helpers/__init__.py b/src/neuroconv/tools/nwb_helpers/__init__.py
index 0982439bb..f961974b2 100644
--- a/src/neuroconv/tools/nwb_helpers/__init__.py
+++ b/src/neuroconv/tools/nwb_helpers/__init__.py
@@ -5,7 +5,7 @@
     make_nwbfile_from_metadata,
     make_or_load_nwbfile,
 )
-from ._models._base_models import DatasetConfiguration, DatasetInfo
+from ._models._base_models import DatasetInfo
 from ._models._hdf5_models import (
     AVAILABLE_HDF5_COMPRESSION_METHODS,
     HDF5BackendConfiguration,
@@ -16,6 +16,7 @@
     ZarrBackendConfiguration,
     ZarrDatasetConfiguration,
 )
+from ._dataset_configuration import get_default_backend_configuration
 
 BACKEND_TO_DATASET_CONFIGURATION = dict(hdf5=HDF5DatasetConfiguration, zarr=ZarrDatasetConfiguration)
 BACKEND_TO_CONFIGURATION = dict(hdf5=HDF5BackendConfiguration, zarr=ZarrBackendConfiguration)

From 8307156709a17f2c2b0561ea9591e93eded67694 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 18 Sep 2023 03:06:04 +0000
Subject: [PATCH 07/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/neuroconv/tools/nwb_helpers/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/neuroconv/tools/nwb_helpers/__init__.py b/src/neuroconv/tools/nwb_helpers/__init__.py
index f961974b2..0501ea5be 100644
--- a/src/neuroconv/tools/nwb_helpers/__init__.py
+++ b/src/neuroconv/tools/nwb_helpers/__init__.py
@@ -1,3 +1,4 @@
+from ._dataset_configuration import get_default_backend_configuration
 from ._metadata_and_file_helpers import (
     add_device_from_metadata,
     get_default_nwbfile_metadata,
@@ -16,7 +17,6 @@
     ZarrBackendConfiguration,
     ZarrDatasetConfiguration,
 )
-from ._dataset_configuration import get_default_backend_configuration
 
 BACKEND_TO_DATASET_CONFIGURATION = dict(hdf5=HDF5DatasetConfiguration, zarr=ZarrDatasetConfiguration)
 BACKEND_TO_CONFIGURATION = dict(hdf5=HDF5BackendConfiguration, zarr=ZarrBackendConfiguration)

From 13c9b37c3f41c23bb515a4be2e52d62fa0b4a498 Mon Sep 17 00:00:00 2001
From: CodyCBakerPhD <codycbakerphd@gmail.com>
Date: Wed, 4 Oct 2023 04:33:36 -0400
Subject: [PATCH 08/27] use dataset_name in DatasetInfo; other debugs

---
 src/neuroconv/tools/nwb_helpers/__init__.py   |   5 +-
 .../nwb_helpers/_dataset_configuration.py     |  34 +-
 ...test_get_default_dataset_configurations.py | 413 +++++++++---------
 ...t_dataset_configurations_appended_files.py |   8 +-
 4 files changed, 232 insertions(+), 228 deletions(-)

diff --git a/src/neuroconv/tools/nwb_helpers/__init__.py b/src/neuroconv/tools/nwb_helpers/__init__.py
index 0501ea5be..e381a3294 100644
--- a/src/neuroconv/tools/nwb_helpers/__init__.py
+++ b/src/neuroconv/tools/nwb_helpers/__init__.py
@@ -1,4 +1,4 @@
-from ._dataset_configuration import get_default_backend_configuration
+from ._dataset_configuration import get_default_dataset_configurations
 from ._metadata_and_file_helpers import (
     add_device_from_metadata,
     get_default_nwbfile_metadata,
@@ -17,6 +17,3 @@
     ZarrBackendConfiguration,
     ZarrDatasetConfiguration,
 )
-
-BACKEND_TO_DATASET_CONFIGURATION = dict(hdf5=HDF5DatasetConfiguration, zarr=ZarrDatasetConfiguration)
-BACKEND_TO_CONFIGURATION = dict(hdf5=HDF5BackendConfiguration, zarr=ZarrBackendConfiguration)
diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
index 0d8c74fde..e18b3d17f 100644
--- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -11,18 +11,14 @@
 from pynwb import NWBHDF5IO, NWBFile, TimeSeries
 from pynwb.base import DynamicTable
 
-from ._dataset_and_backend_models import (
-    BACKEND_TO_CONFIGURATION,
-    BACKEND_TO_DATASET_CONFIGURATION,
-    DatasetConfiguration,
-    DatasetInfo,
-    HDF5BackendConfiguration,
-    HDF5DatasetConfiguration,
-    ZarrBackendConfiguration,
-    ZarrDatasetConfiguration,
-)
+from ._models._base_models import DatasetConfiguration, DatasetInfo
+from ._models._hdf5_models import HDF5BackendConfiguration, HDF5DatasetConfiguration
+from ._models._zarr_models import ZarrBackendConfiguration, ZarrDatasetConfiguration
 from ..hdmf import SliceableDataChunkIterator
 
+BACKEND_TO_DATASET_CONFIGURATION = dict(hdf5=HDF5DatasetConfiguration, zarr=ZarrDatasetConfiguration)
+BACKEND_TO_CONFIGURATION = dict(hdf5=HDF5BackendConfiguration, zarr=ZarrBackendConfiguration)
+
 
 def _get_mode(io: Union[NWBHDF5IO, NWBZarrIO]) -> str:
     """NWBHDF5IO and NWBZarrIO have different ways of storing the mode they used on a path."""
@@ -81,14 +77,14 @@ def _get_dataset_metadata(
         # DataChunkIterator has best generic dtype inference, though logic is hard to peel out of it
         # And it can fail in rare cases but not essential to our default configuration
         try:
-            dtype = str(DataChunkIterator(candidate_dataset).dtype)  # string cast to be JSON friendly
+            dtype = DataChunkIterator(candidate_dataset).dtype
         except Exception as exception:
             if str(exception) != "Data type could not be determined. Please specify dtype in DataChunkIterator init.":
                 raise exception
             else:
-                dtype = "unknown"
+                dtype = np.dtype("object")
 
-        maxshape = get_data_shape(data=candidate_dataset)
+        full_shape = get_data_shape(data=candidate_dataset)
 
         if isinstance(candidate_dataset, GenericDataChunkIterator):
             chunk_shape = candidate_dataset.chunk_shape
@@ -96,20 +92,22 @@ def _get_dataset_metadata(
         elif dtype != "unknown":
             # TODO: eventually replace this with staticmethods on hdmf.data_utils.GenericDataChunkIterator
             chunk_shape = SliceableDataChunkIterator.estimate_default_chunk_shape(
-                chunk_mb=10.0, maxshape=maxshape, dtype=np.dtype(dtype)
+                chunk_mb=10.0, maxshape=full_shape, dtype=np.dtype(dtype)
             )
             buffer_shape = SliceableDataChunkIterator.estimate_default_buffer_shape(
-                buffer_gb=0.5, chunk_shape=chunk_shape, maxshape=maxshape, dtype=np.dtype(dtype)
+                buffer_gb=0.5, chunk_shape=chunk_shape, maxshape=full_shape, dtype=np.dtype(dtype)
             )
         else:
             pass  # TODO: think on this; perhaps zarr's standalone estimator?
 
+        location = _parse_location_in_memory_nwbfile(current_location=field_name, neurodata_object=neurodata_object)
+        dataset_name = location.strip("/")[-1]
         dataset_info = DatasetInfo(
             object_id=neurodata_object.object_id,
             object_name=neurodata_object.name,
-            location=_parse_location_in_memory_nwbfile(current_location=field_name, neurodata_object=neurodata_object),
-            field=field_name,
-            maxshape=maxshape,
+            location=location,
+            dataset_name=dataset_name,
+            full_shape=full_shape,
             dtype=dtype,
         )
         dataset_configuration = DatasetConfigurationClass(
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
index be0c6f0d8..00d030802 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
@@ -7,6 +7,7 @@
 from pynwb.testing.mock.base import mock_TimeSeries
 from pynwb.testing.mock.file import mock_NWBFile
 
+import pytest
 from neuroconv.tools.hdmf import SliceableDataChunkIterator
 from neuroconv.tools.nwb_helpers import (
     HDF5DatasetConfiguration,
@@ -15,11 +16,13 @@
 )
 
 
-def test_unwrapped_time_series_hdf5():
+@pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])
+def test_unwrapped_time_series_hdf5(iterator: callable):
     array = np.array([[1, 2, 3], [4, 5, 6]])
+    data = iterator(array)
 
     nwbfile = mock_NWBFile()
-    time_series = mock_TimeSeries(name="TestTimeSeries", data=array)
+    time_series = mock_TimeSeries(name="TestTimeSeries", data=data)
     nwbfile.add_acquisition(time_series)
 
     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
@@ -30,7 +33,7 @@ def test_unwrapped_time_series_hdf5():
     assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
     assert dataset_configuration.dataset_info.object_id == time_series.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
-    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.full_shape == array.shape
     assert dataset_configuration.dataset_info.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
@@ -38,11 +41,13 @@ def test_unwrapped_time_series_hdf5():
     assert dataset_configuration.compression_options is None
 
 
-def test_unwrapped_time_series_zarr():
+@pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])
+def test_unwrapped_time_series_zarr(iterator: callable):
     array = np.array([[1, 2, 3], [4, 5, 6]])
+    data = iterator(array)
 
     nwbfile = mock_NWBFile()
-    time_series = mock_TimeSeries(name="TestTimeSeries", data=array)
+    time_series = mock_TimeSeries(name="TestTimeSeries", data=data)
     nwbfile.add_acquisition(time_series)
 
     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
@@ -53,7 +58,7 @@ def test_unwrapped_time_series_zarr():
     assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
     assert dataset_configuration.dataset_info.object_id == time_series.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
-    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.full_shape == array.shape
     assert dataset_configuration.dataset_info.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
@@ -63,100 +68,100 @@ def test_unwrapped_time_series_zarr():
     assert dataset_configuration.filter_options is None
 
 
-def test_generic_iterator_wrapped_time_series_hdf5():
-    array = np.array([[1, 2, 3], [4, 5, 6]])
+# def test_generic_iterator_wrapped_time_series_hdf5():
+#     array = np.array([[1, 2, 3], [4, 5, 6]])
 
-    nwbfile = mock_NWBFile()
-    time_series = mock_TimeSeries(name="TestTimeSeries", data=SliceableDataChunkIterator(data=array))
-    nwbfile.add_acquisition(time_series)
+#     nwbfile = mock_NWBFile()
+#     time_series = mock_TimeSeries(name="TestTimeSeries", data=SliceableDataChunkIterator(data=array))
+#     nwbfile.add_acquisition(time_series)
 
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
 
-    assert len(dataset_configurations) == 1
+#     assert len(dataset_configurations) == 1
 
-    dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
-    assert dataset_configuration.dataset_info.object_id == time_series.object_id
-    assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
-    assert dataset_configuration.dataset_info.maxshape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
-    assert dataset_configuration.chunk_shape == array.shape
-    assert dataset_configuration.buffer_shape == array.shape
-    assert dataset_configuration.compression_method == "gzip"
-    assert dataset_configuration.compression_options is None
+#     dataset_configuration = dataset_configurations[0]
+#     assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+#     assert dataset_configuration.dataset_info.object_id == time_series.object_id
+#     assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
+#     assert dataset_configuration.dataset_info.full_shape == array.shape
+#     assert dataset_configuration.dataset_info.dtype == array.dtype
+#     assert dataset_configuration.chunk_shape == array.shape
+#     assert dataset_configuration.buffer_shape == array.shape
+#     assert dataset_configuration.compression_method == "gzip"
+#     assert dataset_configuration.compression_options is None
 
 
-def test_classic_iterator_wrapped_simple_time_series_zarr():
-    array = np.array([[1, 2, 3], [4, 5, 6]])
+# def test_classic_iterator_wrapped_simple_time_series_zarr():
+#     array = np.array([[1, 2, 3], [4, 5, 6]])
 
-    nwbfile = mock_NWBFile()
-    time_series = mock_TimeSeries(name="TestTimeSeries", data=DataChunkIterator(data=array))
-    nwbfile.add_acquisition(time_series)
+#     nwbfile = mock_NWBFile()
+#     time_series = mock_TimeSeries(name="TestTimeSeries", data=DataChunkIterator(data=array))
+#     nwbfile.add_acquisition(time_series)
 
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
 
-    assert len(dataset_configurations) == 1
+#     assert len(dataset_configurations) == 1
 
-    dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
-    assert dataset_configuration.dataset_info.object_id == time_series.object_id
-    assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
-    assert dataset_configuration.dataset_info.maxshape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
-    assert dataset_configuration.chunk_shape == array.shape
-    assert dataset_configuration.buffer_shape == array.shape
-    assert dataset_configuration.compression_method == "gzip"
-    assert dataset_configuration.compression_options is None
-    assert dataset_configuration.filter_methods is None
-    assert dataset_configuration.filter_options is None
+#     dataset_configuration = dataset_configurations[0]
+#     assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+#     assert dataset_configuration.dataset_info.object_id == time_series.object_id
+#     assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
+#     assert dataset_configuration.dataset_info.full_shape == array.shape
+#     assert dataset_configuration.dataset_info.dtype == array.dtype
+#     assert dataset_configuration.chunk_shape == array.shape
+#     assert dataset_configuration.buffer_shape == array.shape
+#     assert dataset_configuration.compression_method == "gzip"
+#     assert dataset_configuration.compression_options is None
+#     assert dataset_configuration.filter_methods is None
+#     assert dataset_configuration.filter_options is None
 
 
-def test_classic_iterator_wrapped_time_series_hdf5():
-    array = np.array([[1, 2, 3], [4, 5, 6]])
+# def test_classic_iterator_wrapped_time_series_hdf5():
+#     array = np.array([[1, 2, 3], [4, 5, 6]])
 
-    nwbfile = mock_NWBFile()
-    time_series = mock_TimeSeries(name="TestTimeSeries", data=DataChunkIterator(data=array))
-    nwbfile.add_acquisition(time_series)
+#     nwbfile = mock_NWBFile()
+#     time_series = mock_TimeSeries(name="TestTimeSeries", data=DataChunkIterator(data=array))
+#     nwbfile.add_acquisition(time_series)
 
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
 
-    assert len(dataset_configurations) == 1
+#     assert len(dataset_configurations) == 1
 
-    dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
-    assert dataset_configuration.dataset_info.object_id == time_series.object_id
-    assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
-    assert dataset_configuration.dataset_info.maxshape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
-    assert dataset_configuration.chunk_shape == array.shape
-    assert dataset_configuration.buffer_shape == array.shape
-    assert dataset_configuration.compression_method == "gzip"
-    assert dataset_configuration.compression_options is None
+#     dataset_configuration = dataset_configurations[0]
+#     assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+#     assert dataset_configuration.dataset_info.object_id == time_series.object_id
+#     assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
+#     assert dataset_configuration.dataset_info.full_shape == array.shape
+#     assert dataset_configuration.dataset_info.dtype == array.dtype
+#     assert dataset_configuration.chunk_shape == array.shape
+#     assert dataset_configuration.buffer_shape == array.shape
+#     assert dataset_configuration.compression_method == "gzip"
+#     assert dataset_configuration.compression_options is None
 
 
-def test_generic_iterator_wrapped_simple_time_series_zarr():
-    array = np.array([[1, 2, 3], [4, 5, 6]])
+# def test_generic_iterator_wrapped_simple_time_series_zarr():
+#     array = np.array([[1, 2, 3], [4, 5, 6]])
 
-    nwbfile = mock_NWBFile()
-    time_series = mock_TimeSeries(name="TestTimeSeries", data=SliceableDataChunkIterator(data=array))
-    nwbfile.add_acquisition(time_series)
+#     nwbfile = mock_NWBFile()
+#     time_series = mock_TimeSeries(name="TestTimeSeries", data=SliceableDataChunkIterator(data=array))
+#     nwbfile.add_acquisition(time_series)
 
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
 
-    assert len(dataset_configurations) == 1
+#     assert len(dataset_configurations) == 1
 
-    dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
-    assert dataset_configuration.dataset_info.object_id == time_series.object_id
-    assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
-    assert dataset_configuration.dataset_info.maxshape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
-    assert dataset_configuration.chunk_shape == array.shape
-    assert dataset_configuration.buffer_shape == array.shape
-    assert dataset_configuration.compression_method == "gzip"
-    assert dataset_configuration.compression_options is None
-    assert dataset_configuration.filter_methods is None
-    assert dataset_configuration.filter_options is None
+#     dataset_configuration = dataset_configurations[0]
+#     assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+#     assert dataset_configuration.dataset_info.object_id == time_series.object_id
+#     assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
+#     assert dataset_configuration.dataset_info.full_shape == array.shape
+#     assert dataset_configuration.dataset_info.dtype == array.dtype
+#     assert dataset_configuration.chunk_shape == array.shape
+#     assert dataset_configuration.buffer_shape == array.shape
+#     assert dataset_configuration.compression_method == "gzip"
+#     assert dataset_configuration.compression_options is None
+#     assert dataset_configuration.filter_methods is None
+#     assert dataset_configuration.filter_options is None
 
 
 def test_external_image_series_hdf5():
@@ -179,11 +184,13 @@ def test_external_image_series_zarr():
     assert len(dataset_configurations) == 0
 
 
-def test_unwrapped_dynamic_table_hdf5():
+@pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])
+def test_unwrapped_dynamic_table_hdf5(iterator: callable):
     array = np.array([0.1, 0.2, 0.3])
+    data = iterator(array.squeeze())
 
     nwbfile = mock_NWBFile()
-    column = VectorData(name="TestColumn", description="", data=array.squeeze())
+    column = VectorData(name="TestColumn", description="", data=data)
     dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column])
     nwbfile.add_acquisition(dynamic_table)
 
@@ -195,7 +202,7 @@ def test_unwrapped_dynamic_table_hdf5():
     assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
     assert dataset_configuration.dataset_info.object_id == column.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
-    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.full_shape == array.shape
     assert dataset_configuration.dataset_info.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
@@ -203,11 +210,13 @@ def test_unwrapped_dynamic_table_hdf5():
     assert dataset_configuration.compression_options is None
 
 
-def test_unwrapped_dynamic_table_zarr():
+@pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])
+def test_unwrapped_dynamic_table_zarr(iterator: callable):
     array = np.array([0.1, 0.2, 0.3])
+    data = iterator(array.squeeze())
 
     nwbfile = mock_NWBFile()
-    column = VectorData(name="TestColumn", description="", data=array.squeeze())
+    column = VectorData(name="TestColumn", description="", data=data)
     dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column])
     nwbfile.add_acquisition(dynamic_table)
 
@@ -219,67 +228,7 @@ def test_unwrapped_dynamic_table_zarr():
     assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
     assert dataset_configuration.dataset_info.object_id == column.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
-    assert dataset_configuration.dataset_info.maxshape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
-    assert dataset_configuration.chunk_shape == array.shape
-    assert dataset_configuration.buffer_shape == array.shape
-    assert dataset_configuration.compression_method == "gzip"
-    assert dataset_configuration.compression_options is None
-    assert dataset_configuration.filter_methods is None
-    assert dataset_configuration.filter_options is None
-
-
-def test_generic_iterator_wrapped_dynamic_table_hdf5():
-    array = np.array([0.1, 0.2, 0.3])
-
-    nwbfile = mock_NWBFile()
-    column = VectorData(name="TestColumn", description="", data=SliceableDataChunkIterator(data=array.squeeze()))
-    dynamic_table = DynamicTable(
-        name="TestDynamicTable",
-        description="",
-        id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
-        columns=[column],
-    )
-    nwbfile.add_acquisition(dynamic_table)
-
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
-
-    assert len(dataset_configurations) == 1
-
-    dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
-    assert dataset_configuration.dataset_info.object_id == column.object_id
-    assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
-    assert dataset_configuration.dataset_info.maxshape == (array.shape[0],)
-    assert dataset_configuration.dataset_info.dtype == array.dtype
-    assert dataset_configuration.chunk_shape == array.shape
-    assert dataset_configuration.buffer_shape == array.shape
-    assert dataset_configuration.compression_method == "gzip"
-    assert dataset_configuration.compression_options is None
-
-
-def test_generic_iterator_wrapped_dynamic_table_zarr():
-    array = np.array([0.1, 0.2, 0.3])
-
-    nwbfile = mock_NWBFile()
-    column = VectorData(name="TestColumn", description="", data=SliceableDataChunkIterator(data=array.squeeze()))
-    dynamic_table = DynamicTable(
-        name="TestDynamicTable",
-        description="",
-        id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
-        columns=[column],
-    )
-    nwbfile.add_acquisition(dynamic_table)
-
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
-
-    assert len(dataset_configurations) == 1
-
-    dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
-    assert dataset_configuration.dataset_info.object_id == column.object_id
-    assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
-    assert dataset_configuration.dataset_info.maxshape == (array.shape[0],)
+    assert dataset_configuration.dataset_info.full_shape == array.shape
     assert dataset_configuration.dataset_info.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
@@ -289,61 +238,121 @@ def test_generic_iterator_wrapped_dynamic_table_zarr():
     assert dataset_configuration.filter_options is None
 
 
-def test_classic_iterator_wrapped_dynamic_table_hdf5():
-    array = np.array([0.1, 0.2, 0.3])
-
-    nwbfile = mock_NWBFile()
-    column = VectorData(name="TestColumn", description="", data=DataChunkIterator(data=array.squeeze()))
-    dynamic_table = DynamicTable(
-        name="TestDynamicTable",
-        description="",
-        id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
-        columns=[column],
-    )
-    nwbfile.add_acquisition(dynamic_table)
-
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
-
-    assert len(dataset_configurations) == 1
-
-    dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
-    assert dataset_configuration.dataset_info.object_id == column.object_id
-    assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
-    assert dataset_configuration.dataset_info.maxshape == (array.shape[0],)
-    assert dataset_configuration.dataset_info.dtype == array.dtype
-    assert dataset_configuration.chunk_shape == array.shape
-    assert dataset_configuration.buffer_shape == array.shape
-    assert dataset_configuration.compression_method == "gzip"
-    assert dataset_configuration.compression_options is None
-
-
-def test_classic_iterator_wrapped_dynamic_table_zarr():
-    array = np.array([0.1, 0.2, 0.3])
-
-    nwbfile = mock_NWBFile()
-    column = VectorData(name="TestColumn", description="", data=DataChunkIterator(data=array.squeeze()))
-    dynamic_table = DynamicTable(
-        name="TestDynamicTable",
-        description="",
-        id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
-        columns=[column],
-    )
-    nwbfile.add_acquisition(dynamic_table)
-
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
-
-    assert len(dataset_configurations) == 1
-
-    dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
-    assert dataset_configuration.dataset_info.object_id == column.object_id
-    assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
-    assert dataset_configuration.dataset_info.maxshape == (array.shape[0],)
-    assert dataset_configuration.dataset_info.dtype == array.dtype
-    assert dataset_configuration.chunk_shape == array.shape
-    assert dataset_configuration.buffer_shape == array.shape
-    assert dataset_configuration.compression_method == "gzip"
-    assert dataset_configuration.compression_options is None
-    assert dataset_configuration.filter_methods is None
-    assert dataset_configuration.filter_options is None
+# def test_generic_iterator_wrapped_dynamic_table_hdf5():
+#     array = np.array([0.1, 0.2, 0.3])
+
+#     nwbfile = mock_NWBFile()
+#     column = VectorData(name="TestColumn", description="", data=SliceableDataChunkIterator(data=array.squeeze()))
+#     dynamic_table = DynamicTable(
+#         name="TestDynamicTable",
+#         description="",
+#         id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
+#         columns=[column],
+#     )
+#     nwbfile.add_acquisition(dynamic_table)
+
+#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+
+#     assert len(dataset_configurations) == 1
+
+#     dataset_configuration = dataset_configurations[0]
+#     assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+#     assert dataset_configuration.dataset_info.object_id == column.object_id
+#     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
+#     assert dataset_configuration.dataset_info.full_shape == (array.shape[0],)
+#     assert dataset_configuration.dataset_info.dtype == array.dtype
+#     assert dataset_configuration.chunk_shape == array.shape
+#     assert dataset_configuration.buffer_shape == array.shape
+#     assert dataset_configuration.compression_method == "gzip"
+#     assert dataset_configuration.compression_options is None
+
+
+# def test_generic_iterator_wrapped_dynamic_table_zarr():
+#     array = np.array([0.1, 0.2, 0.3])
+
+#     nwbfile = mock_NWBFile()
+#     column = VectorData(name="TestColumn", description="", data=SliceableDataChunkIterator(data=array.squeeze()))
+#     dynamic_table = DynamicTable(
+#         name="TestDynamicTable",
+#         description="",
+#         id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
+#         columns=[column],
+#     )
+#     nwbfile.add_acquisition(dynamic_table)
+
+#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+
+#     assert len(dataset_configurations) == 1
+
+#     dataset_configuration = dataset_configurations[0]
+#     assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+#     assert dataset_configuration.dataset_info.object_id == column.object_id
+#     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
+#     assert dataset_configuration.dataset_info.full_shape == (array.shape[0],)
+#     assert dataset_configuration.dataset_info.dtype == array.dtype
+#     assert dataset_configuration.chunk_shape == array.shape
+#     assert dataset_configuration.buffer_shape == array.shape
+#     assert dataset_configuration.compression_method == "gzip"
+#     assert dataset_configuration.compression_options is None
+#     assert dataset_configuration.filter_methods is None
+#     assert dataset_configuration.filter_options is None
+
+
+# def test_classic_iterator_wrapped_dynamic_table_hdf5():
+#     array = np.array([0.1, 0.2, 0.3])
+
+#     nwbfile = mock_NWBFile()
+#     column = VectorData(name="TestColumn", description="", data=DataChunkIterator(data=array.squeeze()))
+#     dynamic_table = DynamicTable(
+#         name="TestDynamicTable",
+#         description="",
+#         id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
+#         columns=[column],
+#     )
+#     nwbfile.add_acquisition(dynamic_table)
+
+#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+
+#     assert len(dataset_configurations) == 1
+
+#     dataset_configuration = dataset_configurations[0]
+#     assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+#     assert dataset_configuration.dataset_info.object_id == column.object_id
+#     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
+#     assert dataset_configuration.dataset_info.full_shape == (array.shape[0],)
+#     assert dataset_configuration.dataset_info.dtype == array.dtype
+#     assert dataset_configuration.chunk_shape == array.shape
+#     assert dataset_configuration.buffer_shape == array.shape
+#     assert dataset_configuration.compression_method == "gzip"
+#     assert dataset_configuration.compression_options is None
+
+
+# def test_classic_iterator_wrapped_dynamic_table_zarr():
+#     array = np.array([0.1, 0.2, 0.3])
+
+#     nwbfile = mock_NWBFile()
+#     column = VectorData(name="TestColumn", description="", data=DataChunkIterator(data=array.squeeze()))
+#     dynamic_table = DynamicTable(
+#         name="TestDynamicTable",
+#         description="",
+#         id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
+#         columns=[column],
+#     )
+#     nwbfile.add_acquisition(dynamic_table)
+
+#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+
+#     assert len(dataset_configurations) == 1
+
+#     dataset_configuration = dataset_configurations[0]
+#     assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+#     assert dataset_configuration.dataset_info.object_id == column.object_id
+#     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
+#     assert dataset_configuration.dataset_info.full_shape == (array.shape[0],)
+#     assert dataset_configuration.dataset_info.dtype == array.dtype
+#     assert dataset_configuration.chunk_shape == array.shape
+#     assert dataset_configuration.buffer_shape == array.shape
+#     assert dataset_configuration.compression_method == "gzip"
+#     assert dataset_configuration.compression_options is None
+#     assert dataset_configuration.filter_methods is None
+#     assert dataset_configuration.filter_options is None
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py
index 48950e0b7..f33334e93 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py
@@ -63,7 +63,7 @@ def test_unwrapped_time_series_hdf5(hdf5_nwbfile_path):
     assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
     assert dataset_configuration.dataset_info.object_id == new_time_series.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/NewTimeSeries/data"
-    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.full_shape == array.shape
     assert dataset_configuration.dataset_info.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
@@ -86,7 +86,7 @@ def test_unwrapped_time_series_zarr(zarr_nwbfile_path):
     assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
     assert dataset_configuration.dataset_info.object_id == new_time_series.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/NewTimeSeries/data"
-    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.full_shape == array.shape
     assert dataset_configuration.dataset_info.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
@@ -112,7 +112,7 @@ def test_unwrapped_dynamic_table_hdf5(hdf5_nwbfile_path):
     assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
     assert dataset_configuration.dataset_info.object_id == column.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
-    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.full_shape == array.shape
     assert dataset_configuration.dataset_info.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
@@ -136,7 +136,7 @@ def test_unwrapped_dynamic_table_zarr(zarr_nwbfile_path):
     assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
     assert dataset_configuration.dataset_info.object_id == column.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
-    assert dataset_configuration.dataset_info.maxshape == array.shape
+    assert dataset_configuration.dataset_info.full_shape == array.shape
     assert dataset_configuration.dataset_info.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape

From b63161ae76ff3e915d48f2e8b9cf1a395cd0c352 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 4 Oct 2023 08:37:26 +0000
Subject: [PATCH 09/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../test_helpers/test_get_default_dataset_configurations.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
index 00d030802..1c4a80adc 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
@@ -1,5 +1,6 @@
 """Unit tests for `get_default_dataset_configurations`."""
 import numpy as np
+import pytest
 from hdmf.common import VectorData
 from hdmf.data_utils import DataChunkIterator
 from pynwb.base import DynamicTable
@@ -7,7 +8,6 @@
 from pynwb.testing.mock.base import mock_TimeSeries
 from pynwb.testing.mock.file import mock_NWBFile
 
-import pytest
 from neuroconv.tools.hdmf import SliceableDataChunkIterator
 from neuroconv.tools.nwb_helpers import (
     HDF5DatasetConfiguration,

From d55e2a2b4037786dffd74643bc47ce43f4967b0f Mon Sep 17 00:00:00 2001
From: CodyCBakerPhD <codycbakerphd@gmail.com>
Date: Thu, 5 Oct 2023 03:59:26 -0400
Subject: [PATCH 10/27] remove comments

---
 ...test_get_default_dataset_configurations.py | 216 ------------------
 1 file changed, 216 deletions(-)

diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
index 00d030802..8485c0dac 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
@@ -68,102 +68,6 @@ def test_unwrapped_time_series_zarr(iterator: callable):
     assert dataset_configuration.filter_options is None
 
 
-# def test_generic_iterator_wrapped_time_series_hdf5():
-#     array = np.array([[1, 2, 3], [4, 5, 6]])
-
-#     nwbfile = mock_NWBFile()
-#     time_series = mock_TimeSeries(name="TestTimeSeries", data=SliceableDataChunkIterator(data=array))
-#     nwbfile.add_acquisition(time_series)
-
-#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
-
-#     assert len(dataset_configurations) == 1
-
-#     dataset_configuration = dataset_configurations[0]
-#     assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
-#     assert dataset_configuration.dataset_info.object_id == time_series.object_id
-#     assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
-#     assert dataset_configuration.dataset_info.full_shape == array.shape
-#     assert dataset_configuration.dataset_info.dtype == array.dtype
-#     assert dataset_configuration.chunk_shape == array.shape
-#     assert dataset_configuration.buffer_shape == array.shape
-#     assert dataset_configuration.compression_method == "gzip"
-#     assert dataset_configuration.compression_options is None
-
-
-# def test_classic_iterator_wrapped_simple_time_series_zarr():
-#     array = np.array([[1, 2, 3], [4, 5, 6]])
-
-#     nwbfile = mock_NWBFile()
-#     time_series = mock_TimeSeries(name="TestTimeSeries", data=DataChunkIterator(data=array))
-#     nwbfile.add_acquisition(time_series)
-
-#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
-
-#     assert len(dataset_configurations) == 1
-
-#     dataset_configuration = dataset_configurations[0]
-#     assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
-#     assert dataset_configuration.dataset_info.object_id == time_series.object_id
-#     assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
-#     assert dataset_configuration.dataset_info.full_shape == array.shape
-#     assert dataset_configuration.dataset_info.dtype == array.dtype
-#     assert dataset_configuration.chunk_shape == array.shape
-#     assert dataset_configuration.buffer_shape == array.shape
-#     assert dataset_configuration.compression_method == "gzip"
-#     assert dataset_configuration.compression_options is None
-#     assert dataset_configuration.filter_methods is None
-#     assert dataset_configuration.filter_options is None
-
-
-# def test_classic_iterator_wrapped_time_series_hdf5():
-#     array = np.array([[1, 2, 3], [4, 5, 6]])
-
-#     nwbfile = mock_NWBFile()
-#     time_series = mock_TimeSeries(name="TestTimeSeries", data=DataChunkIterator(data=array))
-#     nwbfile.add_acquisition(time_series)
-
-#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
-
-#     assert len(dataset_configurations) == 1
-
-#     dataset_configuration = dataset_configurations[0]
-#     assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
-#     assert dataset_configuration.dataset_info.object_id == time_series.object_id
-#     assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
-#     assert dataset_configuration.dataset_info.full_shape == array.shape
-#     assert dataset_configuration.dataset_info.dtype == array.dtype
-#     assert dataset_configuration.chunk_shape == array.shape
-#     assert dataset_configuration.buffer_shape == array.shape
-#     assert dataset_configuration.compression_method == "gzip"
-#     assert dataset_configuration.compression_options is None
-
-
-# def test_generic_iterator_wrapped_simple_time_series_zarr():
-#     array = np.array([[1, 2, 3], [4, 5, 6]])
-
-#     nwbfile = mock_NWBFile()
-#     time_series = mock_TimeSeries(name="TestTimeSeries", data=SliceableDataChunkIterator(data=array))
-#     nwbfile.add_acquisition(time_series)
-
-#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
-
-#     assert len(dataset_configurations) == 1
-
-#     dataset_configuration = dataset_configurations[0]
-#     assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
-#     assert dataset_configuration.dataset_info.object_id == time_series.object_id
-#     assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
-#     assert dataset_configuration.dataset_info.full_shape == array.shape
-#     assert dataset_configuration.dataset_info.dtype == array.dtype
-#     assert dataset_configuration.chunk_shape == array.shape
-#     assert dataset_configuration.buffer_shape == array.shape
-#     assert dataset_configuration.compression_method == "gzip"
-#     assert dataset_configuration.compression_options is None
-#     assert dataset_configuration.filter_methods is None
-#     assert dataset_configuration.filter_options is None
-
-
 def test_external_image_series_hdf5():
     nwbfile = mock_NWBFile()
     image_series = ImageSeries(name="TestImageSeries", external_file=[""], rate=1.0)
@@ -236,123 +140,3 @@ def test_unwrapped_dynamic_table_zarr(iterator: callable):
     assert dataset_configuration.compression_options is None
     assert dataset_configuration.filter_methods is None
     assert dataset_configuration.filter_options is None
-
-
-# def test_generic_iterator_wrapped_dynamic_table_hdf5():
-#     array = np.array([0.1, 0.2, 0.3])
-
-#     nwbfile = mock_NWBFile()
-#     column = VectorData(name="TestColumn", description="", data=SliceableDataChunkIterator(data=array.squeeze()))
-#     dynamic_table = DynamicTable(
-#         name="TestDynamicTable",
-#         description="",
-#         id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
-#         columns=[column],
-#     )
-#     nwbfile.add_acquisition(dynamic_table)
-
-#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
-
-#     assert len(dataset_configurations) == 1
-
-#     dataset_configuration = dataset_configurations[0]
-#     assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
-#     assert dataset_configuration.dataset_info.object_id == column.object_id
-#     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
-#     assert dataset_configuration.dataset_info.full_shape == (array.shape[0],)
-#     assert dataset_configuration.dataset_info.dtype == array.dtype
-#     assert dataset_configuration.chunk_shape == array.shape
-#     assert dataset_configuration.buffer_shape == array.shape
-#     assert dataset_configuration.compression_method == "gzip"
-#     assert dataset_configuration.compression_options is None
-
-
-# def test_generic_iterator_wrapped_dynamic_table_zarr():
-#     array = np.array([0.1, 0.2, 0.3])
-
-#     nwbfile = mock_NWBFile()
-#     column = VectorData(name="TestColumn", description="", data=SliceableDataChunkIterator(data=array.squeeze()))
-#     dynamic_table = DynamicTable(
-#         name="TestDynamicTable",
-#         description="",
-#         id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
-#         columns=[column],
-#     )
-#     nwbfile.add_acquisition(dynamic_table)
-
-#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
-
-#     assert len(dataset_configurations) == 1
-
-#     dataset_configuration = dataset_configurations[0]
-#     assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
-#     assert dataset_configuration.dataset_info.object_id == column.object_id
-#     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
-#     assert dataset_configuration.dataset_info.full_shape == (array.shape[0],)
-#     assert dataset_configuration.dataset_info.dtype == array.dtype
-#     assert dataset_configuration.chunk_shape == array.shape
-#     assert dataset_configuration.buffer_shape == array.shape
-#     assert dataset_configuration.compression_method == "gzip"
-#     assert dataset_configuration.compression_options is None
-#     assert dataset_configuration.filter_methods is None
-#     assert dataset_configuration.filter_options is None
-
-
-# def test_classic_iterator_wrapped_dynamic_table_hdf5():
-#     array = np.array([0.1, 0.2, 0.3])
-
-#     nwbfile = mock_NWBFile()
-#     column = VectorData(name="TestColumn", description="", data=DataChunkIterator(data=array.squeeze()))
-#     dynamic_table = DynamicTable(
-#         name="TestDynamicTable",
-#         description="",
-#         id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
-#         columns=[column],
-#     )
-#     nwbfile.add_acquisition(dynamic_table)
-
-#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
-
-#     assert len(dataset_configurations) == 1
-
-#     dataset_configuration = dataset_configurations[0]
-#     assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
-#     assert dataset_configuration.dataset_info.object_id == column.object_id
-#     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
-#     assert dataset_configuration.dataset_info.full_shape == (array.shape[0],)
-#     assert dataset_configuration.dataset_info.dtype == array.dtype
-#     assert dataset_configuration.chunk_shape == array.shape
-#     assert dataset_configuration.buffer_shape == array.shape
-#     assert dataset_configuration.compression_method == "gzip"
-#     assert dataset_configuration.compression_options is None
-
-
-# def test_classic_iterator_wrapped_dynamic_table_zarr():
-#     array = np.array([0.1, 0.2, 0.3])
-
-#     nwbfile = mock_NWBFile()
-#     column = VectorData(name="TestColumn", description="", data=DataChunkIterator(data=array.squeeze()))
-#     dynamic_table = DynamicTable(
-#         name="TestDynamicTable",
-#         description="",
-#         id=list(range(array.shape[0])),  # Need to include ID since the data of the column is not wrapped in an IO
-#         columns=[column],
-#     )
-#     nwbfile.add_acquisition(dynamic_table)
-
-#     dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
-
-#     assert len(dataset_configurations) == 1
-
-#     dataset_configuration = dataset_configurations[0]
-#     assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
-#     assert dataset_configuration.dataset_info.object_id == column.object_id
-#     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
-#     assert dataset_configuration.dataset_info.full_shape == (array.shape[0],)
-#     assert dataset_configuration.dataset_info.dtype == array.dtype
-#     assert dataset_configuration.chunk_shape == array.shape
-#     assert dataset_configuration.buffer_shape == array.shape
-#     assert dataset_configuration.compression_method == "gzip"
-#     assert dataset_configuration.compression_options is None
-#     assert dataset_configuration.filter_methods is None
-#     assert dataset_configuration.filter_options is None

From 3c7cde84423d5e27e44d6b8bdbf58f73da1d5c59 Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Tue, 7 Nov 2023 20:37:50 -0500
Subject: [PATCH 11/27] remove unused typing

---
 src/neuroconv/tools/hdmf.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/src/neuroconv/tools/hdmf.py b/src/neuroconv/tools/hdmf.py
index b82b12e1f..843fda22b 100644
--- a/src/neuroconv/tools/hdmf.py
+++ b/src/neuroconv/tools/hdmf.py
@@ -4,8 +4,6 @@
 
 import numpy as np
 from hdmf.data_utils import GenericDataChunkIterator as HDMFGenericDataChunkIterator
-from pydantic import Field
-from typing_extensions import Annotated
 
 
 class GenericDataChunkIterator(HDMFGenericDataChunkIterator):
@@ -16,11 +14,7 @@ def _get_default_buffer_shape(self, buffer_gb: float = 1.0) -> Tuple[int]:
 
     # TODO: move this to the core iterator in HDMF so it can be easily swapped out as well as run on its own
     @staticmethod
-    def estimate_default_chunk_shape(
-        chunk_mb: Annotated[float, Field(gt=0.0)],
-        maxshape: Tuple[int, ...],
-        dtype: np.dtype,
-    ) -> Tuple[int, ...]:
+    def estimate_default_chunk_shape(chunk_mb: float, maxshape: Tuple[int, ...], dtype: np.dtype) -> Tuple[int, ...]:
         """
         Select chunk shape with size in MB less than the threshold of chunk_mb.
 
@@ -46,10 +40,7 @@ def estimate_default_chunk_shape(
     # TODO: move this to the core iterator in HDMF so it can be easily swapped out as well as run on its own
     @staticmethod
     def estimate_default_buffer_shape(
-        buffer_gb: Annotated[float, Field(gt=0.0)],
-        chunk_shape: Tuple[int, ...],
-        maxshape: Tuple[int, ...],
-        dtype: np.dtype,
+        buffer_gb: float, chunk_shape: Tuple[int, ...], maxshape: Tuple[int, ...], dtype: np.dtype
     ) -> Tuple[int]:
         num_axes = len(maxshape)
         chunk_bytes = math.prod(chunk_shape) * dtype.itemsize
@@ -117,7 +108,7 @@ def estimate_default_buffer_shape(
 
 class SliceableDataChunkIterator(GenericDataChunkIterator):
     """
-    Generic data chunk iterator that works for any memory mapped array, such as a np.memmap or an h5py.Dataset
+    Generic data chunk iterator that works for any memory mapped array, such as a np.memmap or h5py.Dataset object.
     """
 
     def __init__(self, data, **kwargs):

From b845ac6a2fec0fb6fe6f42fbcacd5cda85855a06 Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Tue, 7 Nov 2023 20:58:06 -0500
Subject: [PATCH 12/27] improve error message and fix import test

---
 tests/imports.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/imports.py b/tests/imports.py
index 781a4bb97..656ddfea9 100644
--- a/tests/imports.py
+++ b/tests/imports.py
@@ -4,7 +4,6 @@
 Run them by using:
 pytest tests/import_structure.py::TestImportStructure::test_name
 """
-
 from unittest import TestCase
 
 
@@ -44,7 +43,7 @@ def test_top_level(self):
             "BaseExtractorInterface",
             "run_conversion_from_yaml",
         ]
-        self.assertCountEqual(first=current_structure, second=expected_structure)
+        assert sorted(current_structure) == sorted(expected_structure)
 
     def test_tools(self):
         """Python dir() calls (and __dict__ as well) update dynamically based on global imports."""
@@ -64,8 +63,9 @@ def test_tools(self):
             "deploy_process",
             "LocalPathExpander",
             "get_module",
+            "hdmf",
         ]
-        self.assertCountEqual(first=current_structure, second=expected_structure)
+        assert sorted(current_structure) == sorted(expected_structure)
 
     def test_datainterfaces(self):
         from neuroconv import datainterfaces
@@ -87,4 +87,4 @@ def test_datainterfaces(self):
             "interfaces_by_category",
         ] + interface_name_list
 
-        self.assertCountEqual(first=current_structure, second=expected_structure)
+        assert sorted(current_structure) == sorted(expected_structure)

From 5c7fb6bdf326107d173cb53e0783e6c60689eb83 Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Tue, 7 Nov 2023 21:18:30 -0500
Subject: [PATCH 13/27] add global static maps; further condense tests with
 parametrize

---
 src/neuroconv/tools/nwb_helpers/__init__.py   |   3 +
 ...test_get_default_dataset_configurations.py | 100 ++++--------------
 2 files changed, 25 insertions(+), 78 deletions(-)

diff --git a/src/neuroconv/tools/nwb_helpers/__init__.py b/src/neuroconv/tools/nwb_helpers/__init__.py
index e381a3294..c17fa3ef0 100644
--- a/src/neuroconv/tools/nwb_helpers/__init__.py
+++ b/src/neuroconv/tools/nwb_helpers/__init__.py
@@ -17,3 +17,6 @@
     ZarrBackendConfiguration,
     ZarrDatasetConfiguration,
 )
+
+DATASET_CONFIGURATIONS = dict(hdf5=HDF5DatasetConfiguration, zarr=ZarrDatasetConfiguration)
+BACKEND_CONFIGURATIONS = dict(hdf5=HDF5BackendConfiguration, zarr=ZarrBackendConfiguration)
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
index bf34abd2d..76469979e 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
@@ -1,4 +1,6 @@
 """Unit tests for `get_default_dataset_configurations`."""
+from typing import Literal
+
 import numpy as np
 import pytest
 from hdmf.common import VectorData
@@ -9,15 +11,12 @@
 from pynwb.testing.mock.file import mock_NWBFile
 
 from neuroconv.tools.hdmf import SliceableDataChunkIterator
-from neuroconv.tools.nwb_helpers import (
-    HDF5DatasetConfiguration,
-    ZarrDatasetConfiguration,
-    get_default_dataset_configurations,
-)
+from neuroconv.tools.nwb_helpers import DATASET_CONFIGURATIONS, get_default_dataset_configurations
 
 
 @pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])
-def test_unwrapped_time_series_hdf5(iterator: callable):
+@pytest.mark.parametrize("backend", ["hdf5", "zarr"])
+def test_configuration_on_unwrapped_time_series_hdf5(iterator: callable, backend: Literal["hdf5", "zarr"]):
     array = np.array([[1, 2, 3], [4, 5, 6]])
     data = iterator(array)
 
@@ -25,12 +24,12 @@ def test_unwrapped_time_series_hdf5(iterator: callable):
     time_series = mock_TimeSeries(name="TestTimeSeries", data=data)
     nwbfile.add_acquisition(time_series)
 
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend=backend))
 
     assert len(dataset_configurations) == 1
 
     dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+    assert isinstance(dataset_configuration, DATASET_CONFIGURATIONS[backend])
     assert dataset_configuration.dataset_info.object_id == time_series.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
     assert dataset_configuration.dataset_info.full_shape == array.shape
@@ -40,70 +39,39 @@ def test_unwrapped_time_series_hdf5(iterator: callable):
     assert dataset_configuration.compression_method == "gzip"
     assert dataset_configuration.compression_options is None
 
-
-@pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])
-def test_unwrapped_time_series_zarr(iterator: callable):
-    array = np.array([[1, 2, 3], [4, 5, 6]])
-    data = iterator(array)
-
-    nwbfile = mock_NWBFile()
-    time_series = mock_TimeSeries(name="TestTimeSeries", data=data)
-    nwbfile.add_acquisition(time_series)
-
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
-
-    assert len(dataset_configurations) == 1
-
-    dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
-    assert dataset_configuration.dataset_info.object_id == time_series.object_id
-    assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
-    assert dataset_configuration.dataset_info.full_shape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
-    assert dataset_configuration.chunk_shape == array.shape
-    assert dataset_configuration.buffer_shape == array.shape
-    assert dataset_configuration.compression_method == "gzip"
-    assert dataset_configuration.compression_options is None
-    assert dataset_configuration.filter_methods is None
-    assert dataset_configuration.filter_options is None
+    if backend == "zarr":
+        assert dataset_configuration.filter_methods is None
+        assert dataset_configuration.filter_options is None
 
 
-def test_external_image_series_hdf5():
+@pytest.mark.parametrize("backend", ["hdf5", "zarr"])
+def test_configuration_on_external_image_series_hdf5(backend: Literal["hdf5", "zarr"]):
     nwbfile = mock_NWBFile()
     image_series = ImageSeries(name="TestImageSeries", external_file=[""], rate=1.0)
     nwbfile.add_acquisition(image_series)
 
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
-
-    assert len(dataset_configurations) == 0
-
-
-def test_external_image_series_zarr():
-    nwbfile = mock_NWBFile()
-    image_series = ImageSeries(name="TestImageSeries", external_file=[""], rate=1.0)
-    nwbfile.add_acquisition(image_series)
-
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend=backend))
 
     assert len(dataset_configurations) == 0
 
 
 @pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])
-def test_unwrapped_dynamic_table_hdf5(iterator: callable):
+@pytest.mark.parametrize("backend", ["hdf5", "zarr"])
+def test_configuration_on_dynamic_table(iterator: callable, backend: Literal["hdf5", "zarr"]):
     array = np.array([0.1, 0.2, 0.3])
-    data = iterator(array.squeeze())
+    data = iterator(array)
 
     nwbfile = mock_NWBFile()
     column = VectorData(name="TestColumn", description="", data=data)
-    dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column])
+    dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column], id=list(range(len(array))))
     nwbfile.add_acquisition(dynamic_table)
 
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend=backend))
 
     assert len(dataset_configurations) == 1
 
     dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+    assert isinstance(dataset_configuration, DATASET_CONFIGURATIONS[backend])
     assert dataset_configuration.dataset_info.object_id == column.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
     assert dataset_configuration.dataset_info.full_shape == array.shape
@@ -113,30 +81,6 @@ def test_unwrapped_dynamic_table_hdf5(iterator: callable):
     assert dataset_configuration.compression_method == "gzip"
     assert dataset_configuration.compression_options is None
 
-
-@pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])
-def test_unwrapped_dynamic_table_zarr(iterator: callable):
-    array = np.array([0.1, 0.2, 0.3])
-    data = iterator(array.squeeze())
-
-    nwbfile = mock_NWBFile()
-    column = VectorData(name="TestColumn", description="", data=data)
-    dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column])
-    nwbfile.add_acquisition(dynamic_table)
-
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
-
-    assert len(dataset_configurations) == 1
-
-    dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
-    assert dataset_configuration.dataset_info.object_id == column.object_id
-    assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
-    assert dataset_configuration.dataset_info.full_shape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
-    assert dataset_configuration.chunk_shape == array.shape
-    assert dataset_configuration.buffer_shape == array.shape
-    assert dataset_configuration.compression_method == "gzip"
-    assert dataset_configuration.compression_options is None
-    assert dataset_configuration.filter_methods is None
-    assert dataset_configuration.filter_options is None
+    if backend == "zarr":
+        assert dataset_configuration.filter_methods is None
+        assert dataset_configuration.filter_options is None

From 91aab8c27579c140793ed2d196128f4aacc1a425 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 8 Nov 2023 02:18:53 +0000
Subject: [PATCH 14/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../test_helpers/test_get_default_dataset_configurations.py  | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
index 76469979e..2ed3b81d5 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
@@ -11,7 +11,10 @@
 from pynwb.testing.mock.file import mock_NWBFile
 
 from neuroconv.tools.hdmf import SliceableDataChunkIterator
-from neuroconv.tools.nwb_helpers import DATASET_CONFIGURATIONS, get_default_dataset_configurations
+from neuroconv.tools.nwb_helpers import (
+    DATASET_CONFIGURATIONS,
+    get_default_dataset_configurations,
+)
 
 
 @pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])

From 65eee6bad4f93ce147ee68a02d1c14b053ace342 Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Tue, 7 Nov 2023 22:25:22 -0500
Subject: [PATCH 15/27] fix name

---
 .../test_helpers/test_get_default_dataset_configurations.py   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
index 76469979e..0b79fdedc 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
@@ -16,7 +16,7 @@
 
 @pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])
 @pytest.mark.parametrize("backend", ["hdf5", "zarr"])
-def test_configuration_on_unwrapped_time_series_hdf5(iterator: callable, backend: Literal["hdf5", "zarr"]):
+def test_configuration_on_time_series(iterator: callable, backend: Literal["hdf5", "zarr"]):
     array = np.array([[1, 2, 3], [4, 5, 6]])
     data = iterator(array)
 
@@ -45,7 +45,7 @@ def test_configuration_on_unwrapped_time_series_hdf5(iterator: callable, backend
 
 
 @pytest.mark.parametrize("backend", ["hdf5", "zarr"])
-def test_configuration_on_external_image_series_hdf5(backend: Literal["hdf5", "zarr"]):
+def test_configuration_on_external_image_series(backend: Literal["hdf5", "zarr"]):
     nwbfile = mock_NWBFile()
     image_series = ImageSeries(name="TestImageSeries", external_file=[""], rate=1.0)
     nwbfile.add_acquisition(image_series)

From 673e2f99e295fc5b47f9a29c6400d30adcc41687 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Mon, 20 Nov 2023 14:44:59 -0500
Subject: [PATCH 16/27] Apply suggestions from code review

Co-authored-by: Heberto Mayorquin <h.mayorquin@gmail.com>
---
 src/neuroconv/tools/nwb_helpers/_dataset_configuration.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
index e18b3d17f..2d9023130 100644
--- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -21,7 +21,7 @@
 
 
 def _get_mode(io: Union[NWBHDF5IO, NWBZarrIO]) -> str:
-    """NWBHDF5IO and NWBZarrIO have different ways of storing the mode they used on a path."""
+    """NWBHDF5IO and NWBZarrIO have different ways of storing the io mode (e.g. "r", "a", "w") they used on a path."""
     if isinstance(io, NWBHDF5IO):
         return io.mode
     elif isinstance(io, NWBZarrIO):
@@ -65,7 +65,7 @@ def _parse_location_in_memory_nwbfile(current_location: str, neurodata_object: C
 
 def _get_dataset_metadata(
     neurodata_object: Union[TimeSeries, DynamicTable], field_name: str, backend: Literal["hdf5", "zarr"]
-) -> Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration]:
+) -> Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration, None]:
     """Fill in the Dataset model with as many values as can be automatically detected or inferred."""
     DatasetConfigurationClass = BACKEND_TO_DATASET_CONFIGURATION[backend]
 
@@ -176,7 +176,7 @@ def get_default_dataset_configurations(
                     continue  # skip
 
                 # Edge case of in-memory ImageSeries with external mode; data is in fields and is empty array
-                if isinstance(candidate_dataset, np.ndarray) and not np.any(candidate_dataset):
+                if isinstance(candidate_dataset, np.ndarray) and candidate_dataset.size == 0:
                     continue  # skip
 
                 yield _get_dataset_metadata(neurodata_object=time_series, field_name=field_name, backend=backend)

From 8630316c05fae29e4666559350c8cbbdcdb40c4e Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Tue, 21 Nov 2023 10:27:49 -0500
Subject: [PATCH 17/27] PR suggestions

---
 .../nwb_helpers/_dataset_configuration.py     | 113 ++++++++++--------
 1 file changed, 61 insertions(+), 52 deletions(-)

diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
index 2d9023130..67b0a14b6 100644
--- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -1,5 +1,5 @@
 """Collection of helper functions related to configuration of datasets dependent on backend."""
-from typing import Iterable, Literal, Union
+from typing import Iterable, Literal, Union, Generator
 
 import h5py
 import numpy as np
@@ -20,7 +20,7 @@
 BACKEND_TO_CONFIGURATION = dict(hdf5=HDF5BackendConfiguration, zarr=ZarrBackendConfiguration)
 
 
-def _get_mode(io: Union[NWBHDF5IO, NWBZarrIO]) -> str:
+def _get_io_mode(io: Union[NWBHDF5IO, NWBZarrIO]) -> str:
     """NWBHDF5IO and NWBZarrIO have different ways of storing the io mode (e.g. "r", "a", "w") they used on a path."""
     if isinstance(io, NWBHDF5IO):
         return io.mode
@@ -28,7 +28,7 @@ def _get_mode(io: Union[NWBHDF5IO, NWBZarrIO]) -> str:
         return io._ZarrIO__mode
 
 
-def _is_value_already_written_to_file(
+def _is_dataset_written_to_file(
     candidate_dataset: Union[h5py.Dataset, zarr.Array],
     backend: Literal["hdf5", "zarr"],
     existing_file: Union[h5py.File, zarr.Group, None],
@@ -40,11 +40,11 @@ def _is_value_already_written_to_file(
     """
     return (
         isinstance(candidate_dataset, h5py.Dataset)  # If the source data is an HDF5 Dataset
-        and backend == "hdf5"  # If working in append mode
+        and backend == "hdf5"
         and candidate_dataset.file == existing_file  # If the source HDF5 Dataset is the appending NWBFile
     ) or (
         isinstance(candidate_dataset, zarr.Array)  # If the source data is an Zarr Array
-        and backend == "zarr"  # If working in append mode
+        and backend == "zarr"
         and candidate_dataset.store == existing_file  # If the source Zarr 'file' is the appending NWBFile
     )
 
@@ -54,15 +54,30 @@ def _parse_location_in_memory_nwbfile(current_location: str, neurodata_object: C
     if isinstance(parent, NWBFile):
         # Items in defined top-level places like acquisition, intervals, etc. do not act as 'containers'
         # in the .parent sense; ask if object is in their in-memory dictionaries instead
-        for outer_field_name, outer_field_value in parent.fields.items():
-            if isinstance(outer_field_value, dict) and neurodata_object.name in outer_field_value:
-                return outer_field_name + "/" + neurodata_object.name + "/" + current_location
+        for parent_field_name, parent_field_value in parent.fields.items():
+            if isinstance(parent_field_value, dict) and neurodata_object.name in parent_field_value:
+                return parent_field_name + "/" + neurodata_object.name + "/" + current_location
         return neurodata_object.name + "/" + current_location
     return _parse_location_in_memory_nwbfile(
         current_location=neurodata_object.name + "/" + current_location, neurodata_object=parent
     )
 
 
+def _infer_dtype_using_data_chunk_iterator(candidate_dataset: Union[h5py.Dataset, zarr.Array]):
+    """
+    The DataChunkIterator has one of the best generic dtype inference, though logic is hard to peel out of it.
+
+    It can fail in rare cases but not essential to our default configuration
+    """
+    try:
+        return DataChunkIterator(candidate_dataset).dtype
+    except Exception as exception:
+        if str(exception) != "Data type could not be determined. Please specify dtype in DataChunkIterator init.":
+            raise exception
+        else:
+            return np.dtype("object")
+
+
 def _get_dataset_metadata(
     neurodata_object: Union[TimeSeries, DynamicTable], field_name: str, backend: Literal["hdf5", "zarr"]
 ) -> Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration, None]:
@@ -70,56 +85,50 @@ def _get_dataset_metadata(
     DatasetConfigurationClass = BACKEND_TO_DATASET_CONFIGURATION[backend]
 
     candidate_dataset = getattr(neurodata_object, field_name)
+
     # For now, skip over datasets already wrapped in DataIO
     # Could maybe eventually support modifying chunks in place
     # But setting buffer shape only possible if iterator was wrapped first
-    if not isinstance(candidate_dataset, DataIO):
-        # DataChunkIterator has best generic dtype inference, though logic is hard to peel out of it
-        # And it can fail in rare cases but not essential to our default configuration
-        try:
-            dtype = DataChunkIterator(candidate_dataset).dtype
-        except Exception as exception:
-            if str(exception) != "Data type could not be determined. Please specify dtype in DataChunkIterator init.":
-                raise exception
-            else:
-                dtype = np.dtype("object")
-
-        full_shape = get_data_shape(data=candidate_dataset)
-
-        if isinstance(candidate_dataset, GenericDataChunkIterator):
-            chunk_shape = candidate_dataset.chunk_shape
-            buffer_shape = candidate_dataset.buffer_shape
-        elif dtype != "unknown":
-            # TODO: eventually replace this with staticmethods on hdmf.data_utils.GenericDataChunkIterator
-            chunk_shape = SliceableDataChunkIterator.estimate_default_chunk_shape(
-                chunk_mb=10.0, maxshape=full_shape, dtype=np.dtype(dtype)
-            )
-            buffer_shape = SliceableDataChunkIterator.estimate_default_buffer_shape(
-                buffer_gb=0.5, chunk_shape=chunk_shape, maxshape=full_shape, dtype=np.dtype(dtype)
-            )
-        else:
-            pass  # TODO: think on this; perhaps zarr's standalone estimator?
-
-        location = _parse_location_in_memory_nwbfile(current_location=field_name, neurodata_object=neurodata_object)
-        dataset_name = location.strip("/")[-1]
-        dataset_info = DatasetInfo(
-            object_id=neurodata_object.object_id,
-            object_name=neurodata_object.name,
-            location=location,
-            dataset_name=dataset_name,
-            full_shape=full_shape,
-            dtype=dtype,
+    if isinstance(candidate_dataset, DataIO):
+        return None
+
+    # DataChunkIterator has best generic dtype inference, though logic is hard to peel out of it
+    # And it can fail in rare cases but not essential to our default configuration
+    dtype = _infer_dtype_using_data_chunk_iterator(candidate_dataset=candidate_dataset)
+    full_shape = get_data_shape(data=candidate_dataset)
+
+    if isinstance(candidate_dataset, GenericDataChunkIterator):
+        chunk_shape = candidate_dataset.chunk_shape
+        buffer_shape = candidate_dataset.buffer_shape
+    elif dtype != "unknown":
+        # TODO: eventually replace this with staticmethods on hdmf.data_utils.GenericDataChunkIterator
+        chunk_shape = SliceableDataChunkIterator.estimate_default_chunk_shape(
+            chunk_mb=10.0, maxshape=full_shape, dtype=np.dtype(dtype)
         )
-        dataset_configuration = DatasetConfigurationClass(
-            dataset_info=dataset_info, chunk_shape=chunk_shape, buffer_shape=buffer_shape
+        buffer_shape = SliceableDataChunkIterator.estimate_default_buffer_shape(
+            buffer_gb=0.5, chunk_shape=chunk_shape, maxshape=full_shape, dtype=np.dtype(dtype)
         )
-        return dataset_configuration
+    else:
+        pass  # TODO: think on this; perhaps zarr's standalone estimator?
+
+    location = _parse_location_in_memory_nwbfile(current_location=field_name, neurodata_object=neurodata_object)
+    dataset_info = DatasetInfo(
+        object_id=neurodata_object.object_id,
+        object_name=neurodata_object.name,
+        location=location,
+        full_shape=full_shape,
+        dtype=dtype,
+    )
+    dataset_configuration = DatasetConfigurationClass(
+        dataset_info=dataset_info, chunk_shape=chunk_shape, buffer_shape=buffer_shape
+    )
+    return dataset_configuration
 
 
 def get_default_dataset_configurations(
     nwbfile: NWBFile,
     backend: Union[None, Literal["hdf5", "zarr"]] = None,  # None for auto-detect from append mode, otherwise required
-) -> Iterable[DatasetConfiguration]:
+) -> Generator[DatasetConfiguration, None, None]:
     """
     Method for automatically detecting all objects in the file that could be wrapped in a DataIO.
 
@@ -147,10 +156,10 @@ def get_default_dataset_configurations(
 
     detected_backend = None
     existing_file = None
-    if isinstance(nwbfile.read_io, NWBHDF5IO) and _get_mode(io=nwbfile.read_io) in ("r+", "a"):
+    if isinstance(nwbfile.read_io, NWBHDF5IO) and _get_io_mode(io=nwbfile.read_io) in ("r+", "a"):
         detected_backend = "hdf5"
         existing_file = nwbfile.read_io._file
-    elif isinstance(nwbfile.read_io, NWBZarrIO) and _get_mode(io=nwbfile.read_io) in ("r+", "a"):
+    elif isinstance(nwbfile.read_io, NWBZarrIO) and _get_io_mode(io=nwbfile.read_io) in ("r+", "a"):
         detected_backend = "zarr"
         existing_file = nwbfile.read_io.file.store
     backend = backend or detected_backend
@@ -170,7 +179,7 @@ def get_default_dataset_configurations(
                     continue
 
                 candidate_dataset = getattr(time_series, field_name)
-                if _is_value_already_written_to_file(
+                if _is_dataset_written_to_file(
                     candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file
                 ):
                     continue  # skip
@@ -185,7 +194,7 @@ def get_default_dataset_configurations(
 
             for column_name in dynamic_table.colnames:
                 candidate_dataset = dynamic_table[column_name].data  # VectorData object
-                if _is_value_already_written_to_file(
+                if _is_dataset_written_to_file(
                     candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file
                 ):
                     continue  # skip

From f7e1be6dad73116a12a6e9dac96ebf5a15e3bf21 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Tue, 21 Nov 2023 10:29:19 -0500
Subject: [PATCH 18/27] Update
 src/neuroconv/tools/nwb_helpers/_dataset_configuration.py

---
 src/neuroconv/tools/nwb_helpers/_dataset_configuration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
index 67b0a14b6..6a98b23e9 100644
--- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -53,7 +53,7 @@ def _parse_location_in_memory_nwbfile(current_location: str, neurodata_object: C
     parent = neurodata_object.parent
     if isinstance(parent, NWBFile):
         # Items in defined top-level places like acquisition, intervals, etc. do not act as 'containers'
-        # in the .parent sense; ask if object is in their in-memory dictionaries instead
+        # in that they do not set the `.parent` attribute; ask if object is in their in-memory dictionaries instead
         for parent_field_name, parent_field_value in parent.fields.items():
             if isinstance(parent_field_value, dict) and neurodata_object.name in parent_field_value:
                 return parent_field_name + "/" + neurodata_object.name + "/" + current_location

From 6f0806aeedbe3061dbac0b92cd071e4227528e0e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 21 Nov 2023 15:30:07 +0000
Subject: [PATCH 19/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/neuroconv/tools/nwb_helpers/_dataset_configuration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
index 6a98b23e9..f964ea472 100644
--- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -1,5 +1,5 @@
 """Collection of helper functions related to configuration of datasets dependent on backend."""
-from typing import Iterable, Literal, Union, Generator
+from typing import Generator, Iterable, Literal, Union
 
 import h5py
 import numpy as np

From b07a541a030eda6e5e36800a142854bb76b18276 Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Tue, 21 Nov 2023 10:53:22 -0500
Subject: [PATCH 20/27] add IO to dataset config names

---
 src/neuroconv/tools/nwb_helpers/__init__.py   |  8 ++--
 .../nwb_helpers/_dataset_configuration.py     | 22 +++++-----
 .../tools/nwb_helpers/_models/_base_models.py |  4 +-
 .../tools/nwb_helpers/_models/_hdf5_models.py |  6 +--
 .../tools/nwb_helpers/_models/_zarr_models.py |  6 +--
 src/neuroconv/tools/testing/__init__.py       |  4 +-
 .../testing/_mock/_mock_dataset_models.py     | 28 ++++++-------
 ..._common_dataset_io_configuration_model.py} | 40 +++++++++----------
 ..._get_default_dataset_io_configurations.py} | 16 ++++----
 ...taset_io_configurations_appended_files.py} | 24 +++++------
 ...=> test_dataset_io_configuration_model.py} | 12 +++---
 .../test_hdf5_backend_configuration_model.py  |  2 +-
 ...st_hdf5_dataset_io_configuration_model.py} | 26 ++++++------
 .../test_zarr_backend_configuration_model.py  |  2 +-
 ...st_zarr_dataset_io_configuration_model.py} | 40 +++++++++----------
 15 files changed, 119 insertions(+), 121 deletions(-)
 rename tests/test_minimal/test_tools/test_backend_and_dataset_configuration/{test_common_dataset_configuration_model.py => test_common_dataset_io_configuration_model.py} (80%)
 rename tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/{test_get_default_dataset_configurations.py => test_get_default_dataset_io_configurations.py} (84%)
 rename tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/{test_get_default_dataset_configurations_appended_files.py => test_get_default_dataset_io_configurations_appended_files.py} (86%)
 rename tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/{test_dataset_configuration_model.py => test_dataset_io_configuration_model.py} (58%)
 rename tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/{test_hdf5_dataset_configuration_model.py => test_hdf5_dataset_io_configuration_model.py} (73%)
 rename tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/{test_zarr_dataset_configuration_model.py => test_zarr_dataset_io_configuration_model.py} (79%)

diff --git a/src/neuroconv/tools/nwb_helpers/__init__.py b/src/neuroconv/tools/nwb_helpers/__init__.py
index c17fa3ef0..cb78a67a5 100644
--- a/src/neuroconv/tools/nwb_helpers/__init__.py
+++ b/src/neuroconv/tools/nwb_helpers/__init__.py
@@ -1,4 +1,4 @@
-from ._dataset_configuration import get_default_dataset_configurations
+from ._dataset_configuration import get_default_dataset_io_configurations
 from ._metadata_and_file_helpers import (
     add_device_from_metadata,
     get_default_nwbfile_metadata,
@@ -10,13 +10,13 @@
 from ._models._hdf5_models import (
     AVAILABLE_HDF5_COMPRESSION_METHODS,
     HDF5BackendConfiguration,
-    HDF5DatasetConfiguration,
+    HDF5DatasetIOConfiguration,
 )
 from ._models._zarr_models import (
     AVAILABLE_ZARR_COMPRESSION_METHODS,
     ZarrBackendConfiguration,
-    ZarrDatasetConfiguration,
+    ZarrDatasetIOConfiguration,
 )
 
-DATASET_CONFIGURATIONS = dict(hdf5=HDF5DatasetConfiguration, zarr=ZarrDatasetConfiguration)
 BACKEND_CONFIGURATIONS = dict(hdf5=HDF5BackendConfiguration, zarr=ZarrBackendConfiguration)
+DATASET_IO_CONFIGURATIONS = dict(hdf5=HDF5DatasetIOConfiguration, zarr=ZarrDatasetIOConfiguration)
diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
index 6a98b23e9..775f7c923 100644
--- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -1,5 +1,5 @@
 """Collection of helper functions related to configuration of datasets dependent on backend."""
-from typing import Iterable, Literal, Union, Generator
+from typing import Literal, Union, Generator
 
 import h5py
 import numpy as np
@@ -11,12 +11,12 @@
 from pynwb import NWBHDF5IO, NWBFile, TimeSeries
 from pynwb.base import DynamicTable
 
-from ._models._base_models import DatasetConfiguration, DatasetInfo
-from ._models._hdf5_models import HDF5BackendConfiguration, HDF5DatasetConfiguration
-from ._models._zarr_models import ZarrBackendConfiguration, ZarrDatasetConfiguration
+from ._models._base_models import DatasetIOConfiguration, DatasetInfo
+from ._models._hdf5_models import HDF5BackendConfiguration, HDF5DatasetIOConfiguration
+from ._models._zarr_models import ZarrBackendConfiguration, ZarrDatasetIOConfiguration
 from ..hdmf import SliceableDataChunkIterator
 
-BACKEND_TO_DATASET_CONFIGURATION = dict(hdf5=HDF5DatasetConfiguration, zarr=ZarrDatasetConfiguration)
+BACKEND_TO_DATASET_CONFIGURATION = dict(hdf5=HDF5DatasetIOConfiguration, zarr=ZarrDatasetIOConfiguration)
 BACKEND_TO_CONFIGURATION = dict(hdf5=HDF5BackendConfiguration, zarr=ZarrBackendConfiguration)
 
 
@@ -80,9 +80,9 @@ def _infer_dtype_using_data_chunk_iterator(candidate_dataset: Union[h5py.Dataset
 
 def _get_dataset_metadata(
     neurodata_object: Union[TimeSeries, DynamicTable], field_name: str, backend: Literal["hdf5", "zarr"]
-) -> Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration, None]:
+) -> Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration, None]:
     """Fill in the Dataset model with as many values as can be automatically detected or inferred."""
-    DatasetConfigurationClass = BACKEND_TO_DATASET_CONFIGURATION[backend]
+    DatasetIOConfigurationClass = BACKEND_TO_DATASET_CONFIGURATION[backend]
 
     candidate_dataset = getattr(neurodata_object, field_name)
 
@@ -119,16 +119,16 @@ def _get_dataset_metadata(
         full_shape=full_shape,
         dtype=dtype,
     )
-    dataset_configuration = DatasetConfigurationClass(
+    dataset_configuration = DatasetIOConfigurationClass(
         dataset_info=dataset_info, chunk_shape=chunk_shape, buffer_shape=buffer_shape
     )
     return dataset_configuration
 
 
-def get_default_dataset_configurations(
+def get_default_dataset_io_configurations(
     nwbfile: NWBFile,
     backend: Union[None, Literal["hdf5", "zarr"]] = None,  # None for auto-detect from append mode, otherwise required
-) -> Generator[DatasetConfiguration, None, None]:
+) -> Generator[DatasetIOConfiguration, None, None]:
     """
     Method for automatically detecting all objects in the file that could be wrapped in a DataIO.
 
@@ -141,7 +141,7 @@ def get_default_dataset_configurations(
 
     Yields
     ------
-    DatasetConfiguration
+    DatasetIOConfiguration
         A summary of each detected object that can be wrapped in a DataIO.
     """
     if backend is None and nwbfile.read_io is None:
diff --git a/src/neuroconv/tools/nwb_helpers/_models/_base_models.py b/src/neuroconv/tools/nwb_helpers/_models/_base_models.py
index 72b364dea..8a6486e74 100644
--- a/src/neuroconv/tools/nwb_helpers/_models/_base_models.py
+++ b/src/neuroconv/tools/nwb_helpers/_models/_base_models.py
@@ -62,7 +62,7 @@ def __init__(self, **values):
         super().__init__(**values)
 
 
-class DatasetConfiguration(BaseModel, ABC):
+class DatasetIOConfiguration(BaseModel, ABC):
     """A data model for configuring options about an object that will become a HDF5 or Zarr Dataset in the file."""
 
     # TODO: When using Pydantic v2, remove
@@ -188,7 +188,7 @@ class BackendConfiguration(BaseModel):
 
     backend: Literal["hdf5", "zarr"] = Field(description="The name of the backend used to configure the NWBFile.")
     data_io_class: Type[DataIO] = Field(description="The DataIO class that is specific to this backend.")
-    dataset_configurations: Dict[str, DatasetConfiguration] = Field(
+    dataset_configurations: Dict[str, DatasetIOConfiguration] = Field(
         description=(
             "A mapping from object locations (e.g. `acquisition/TestElectricalSeriesAP/data`) "
             "to their DatasetConfiguration specification that contains all information "
diff --git a/src/neuroconv/tools/nwb_helpers/_models/_hdf5_models.py b/src/neuroconv/tools/nwb_helpers/_models/_hdf5_models.py
index daf772688..b34671154 100644
--- a/src/neuroconv/tools/nwb_helpers/_models/_hdf5_models.py
+++ b/src/neuroconv/tools/nwb_helpers/_models/_hdf5_models.py
@@ -6,7 +6,7 @@
 from pydantic import Field
 from pynwb import H5DataIO
 
-from ._base_models import BackendConfiguration, DatasetConfiguration
+from ._base_models import BackendConfiguration, DatasetIOConfiguration
 
 _base_hdf5_filters = set(h5py.filters.decode)
 _excluded_hdf5_filters = set(
@@ -29,7 +29,7 @@
     )
 
 
-class HDF5DatasetConfiguration(DatasetConfiguration):
+class HDF5DatasetIOConfiguration(DatasetIOConfiguration):
     """A data model for configuring options about an object that will become a HDF5 Dataset in the file."""
 
     # TODO: When using Pydantic v2, replace with `model_config = ConfigDict(...)`
@@ -90,7 +90,7 @@ class HDF5BackendConfiguration(BackendConfiguration):
     data_io_class: Type[H5DataIO] = Field(  # TODO: in pydantic v2 use property instead of class attribute
         default=H5DataIO, description="The DataIO class that is specific to HDF5."
     )
-    dataset_configurations: Dict[str, HDF5DatasetConfiguration] = Field(
+    dataset_configurations: Dict[str, HDF5DatasetIOConfiguration] = Field(
         description=(
             "A mapping from object locations to their HDF5DatasetConfiguration specification that contains all "
             "information for writing the datasets to disk using the HDF5 backend."
diff --git a/src/neuroconv/tools/nwb_helpers/_models/_zarr_models.py b/src/neuroconv/tools/nwb_helpers/_models/_zarr_models.py
index 760c7c2a9..14214b513 100644
--- a/src/neuroconv/tools/nwb_helpers/_models/_zarr_models.py
+++ b/src/neuroconv/tools/nwb_helpers/_models/_zarr_models.py
@@ -7,7 +7,7 @@
 from hdmf_zarr import ZarrDataIO
 from pydantic import Field, root_validator
 
-from ._base_models import BackendConfiguration, DatasetConfiguration
+from ._base_models import BackendConfiguration, DatasetIOConfiguration
 
 _base_zarr_codecs = set(zarr.codec_registry.keys())
 _lossy_zarr_codecs = set(("astype", "bitround", "quantize"))
@@ -43,7 +43,7 @@
 }
 
 
-class ZarrDatasetConfiguration(DatasetConfiguration):
+class ZarrDatasetIOConfiguration(DatasetIOConfiguration):
     """A data model for configuring options about an object that will become a Zarr Dataset in the file."""
 
     # TODO: When using Pydantic v2, replace with `model_config = ConfigDict(...)`
@@ -147,7 +147,7 @@ class ZarrBackendConfiguration(BackendConfiguration):
     data_io_class: Type[ZarrDataIO] = Field(
         default=ZarrDataIO, description="The DataIO class that is specific to Zarr."
     )
-    dataset_configurations: Dict[str, ZarrDatasetConfiguration] = Field(
+    dataset_configurations: Dict[str, ZarrDatasetIOConfiguration] = Field(
         description=(
             "A mapping from object locations to their ZarrDatasetConfiguration specification that contains all "
             "information for writing the datasets to disk using the Zarr backend."
diff --git a/src/neuroconv/tools/testing/__init__.py b/src/neuroconv/tools/testing/__init__.py
index 502634466..2d5b06497 100644
--- a/src/neuroconv/tools/testing/__init__.py
+++ b/src/neuroconv/tools/testing/__init__.py
@@ -1,9 +1,9 @@
 from ._mock._mock_dataset_models import (
     mock_DatasetInfo,
     mock_HDF5BackendConfiguration,
-    mock_HDF5DatasetConfiguration,
+    mock_HDF5DatasetIOConfiguration,
     mock_ZarrBackendConfiguration,
-    mock_ZarrDatasetConfiguration,
+    mock_ZarrDatasetIOConfiguration,
 )
 from .mock_files import generate_path_expander_demo_ibl
 from .mock_interfaces import MockBehaviorEventInterface, MockSpikeGLXNIDQInterface
diff --git a/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py b/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py
index 6860f7078..e8ea80826 100644
--- a/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py
+++ b/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py
@@ -9,9 +9,9 @@
     AVAILABLE_ZARR_COMPRESSION_METHODS,
     DatasetInfo,
     HDF5BackendConfiguration,
-    HDF5DatasetConfiguration,
+    HDF5DatasetIOConfiguration,
     ZarrBackendConfiguration,
-    ZarrDatasetConfiguration,
+    ZarrDatasetIOConfiguration,
 )
 
 
@@ -30,14 +30,14 @@ def mock_DatasetInfo(
     )
 
 
-def mock_HDF5DatasetConfiguration(
+def mock_HDF5DatasetIOConfiguration(
     compression_method: Union[
         Literal[tuple(AVAILABLE_HDF5_COMPRESSION_METHODS.keys())], h5py._hl.filters.FilterRefBase, None
     ] = "gzip",
     compression_options: Union[Dict[str, Any], None] = None,
-) -> HDF5DatasetConfiguration:
-    """Mock instance of a HDF5DatasetConfiguration with NeuroPixel-like values to show chunk/buffer recommendations."""
-    return HDF5DatasetConfiguration(
+) -> HDF5DatasetIOConfiguration:
+    """Mock object of a HDF5DatasetIOConfiguration with NeuroPixel-like values to show chunk/buffer recommendations."""
+    return HDF5DatasetIOConfiguration(
         dataset_info=mock_DatasetInfo(),
         chunk_shape=(78_125, 64),  # ~10 MB
         buffer_shape=(1_250_000, 384),  # ~1 GB
@@ -46,7 +46,7 @@ def mock_HDF5DatasetConfiguration(
     )
 
 
-def mock_ZarrDatasetConfiguration(
+def mock_ZarrDatasetIOConfiguration(
     compression_method: Union[
         Literal[tuple(AVAILABLE_ZARR_COMPRESSION_METHODS.keys())], numcodecs.abc.Codec, None
     ] = "gzip",
@@ -55,9 +55,9 @@ def mock_ZarrDatasetConfiguration(
         Union[Literal[tuple(AVAILABLE_ZARR_COMPRESSION_METHODS.keys())], numcodecs.abc.Codec, None]
     ] = None,
     filter_options: Union[Iterable[Dict[str, Any]], None] = None,
-) -> ZarrDatasetConfiguration:
-    """Mock instance of a ZarrDatasetConfiguration with NeuroPixel-like values to show chunk/buffer recommendations."""
-    return ZarrDatasetConfiguration(
+) -> ZarrDatasetIOConfiguration:
+    """Mock object of a ZarrDatasetIOConfiguration with NeuroPixel-like values to show chunk/buffer recommendations."""
+    return ZarrDatasetIOConfiguration(
         dataset_info=mock_DatasetInfo(),
         chunk_shape=(78_125, 64),  # ~10 MB
         buffer_shape=(1_250_000, 384),  # ~1 GB
@@ -71,12 +71,12 @@ def mock_ZarrDatasetConfiguration(
 def mock_HDF5BackendConfiguration() -> HDF5BackendConfiguration:
     """Mock instance of a HDF5BackendConfiguration with two NeuroPixel-like datasets."""
     dataset_configurations = {
-        "acquisition/TestElectricalSeriesAP/data": HDF5DatasetConfiguration(
+        "acquisition/TestElectricalSeriesAP/data": HDF5DatasetIOConfiguration(
             dataset_info=mock_DatasetInfo(location="acquisition/TestElectricalSeriesAP/data"),
             chunk_shape=(78_125, 64),  # ~10 MB
             buffer_shape=(1_250_000, 384),  # ~1 GB
         ),
-        "acquisition/TestElectricalSeriesLF/data": HDF5DatasetConfiguration(
+        "acquisition/TestElectricalSeriesLF/data": HDF5DatasetIOConfiguration(
             dataset_info=mock_DatasetInfo(
                 object_id="bc37e164-519f-4b65-a976-206440f1d325",
                 location="acquisition/TestElectricalSeriesLF/data",
@@ -93,13 +93,13 @@ def mock_HDF5BackendConfiguration() -> HDF5BackendConfiguration:
 def mock_ZarrBackendConfiguration() -> ZarrBackendConfiguration:
     """Mock instance of a HDF5BackendConfiguration with several NeuroPixel-like datasets."""
     dataset_configurations = {
-        "acquisition/TestElectricalSeriesAP/data": ZarrDatasetConfiguration(
+        "acquisition/TestElectricalSeriesAP/data": ZarrDatasetIOConfiguration(
             dataset_info=mock_DatasetInfo(location="acquisition/TestElectricalSeriesAP/data"),
             chunk_shape=(78_125, 64),
             buffer_shape=(1_250_000, 384),  # ~1 GB
             filter_methods=["delta"],
         ),
-        "acquisition/TestElectricalSeriesLF/data": ZarrDatasetConfiguration(
+        "acquisition/TestElectricalSeriesLF/data": ZarrDatasetIOConfiguration(
             dataset_info=mock_DatasetInfo(
                 object_id="bc37e164-519f-4b65-a976-206440f1d325",
                 location="acquisition/TestElectricalSeriesLF/data",
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py
similarity index 80%
rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_configuration_model.py
rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py
index 892638a2c..3babb046e 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py
@@ -4,21 +4,21 @@
 import pytest
 
 from neuroconv.tools.nwb_helpers import (
-    HDF5DatasetConfiguration,
-    ZarrDatasetConfiguration,
+    HDF5DatasetIOConfiguration,
+    ZarrDatasetIOConfiguration,
 )
 from neuroconv.tools.testing import (
     mock_DatasetInfo,
-    mock_HDF5DatasetConfiguration,
-    mock_ZarrDatasetConfiguration,
+    mock_HDF5DatasetIOConfiguration,
+    mock_ZarrDatasetIOConfiguration,
 )
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 )
 def test_validator_chunk_length_consistency(
-    dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
         dataset_configuration_class(
@@ -35,10 +35,10 @@ def test_validator_chunk_length_consistency(
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 )
 def test_validator_chunk_and_buffer_length_consistency(
-    dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
         dataset_configuration_class(
@@ -55,10 +55,10 @@ def test_validator_chunk_and_buffer_length_consistency(
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 )
 def test_validator_chunk_shape_nonpositive_elements(
-    dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
         dataset_configuration_class(
@@ -75,10 +75,10 @@ def test_validator_chunk_shape_nonpositive_elements(
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 )
 def test_validator_buffer_shape_nonpositive_elements(
-    dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
         dataset_configuration_class(
@@ -95,10 +95,10 @@ def test_validator_buffer_shape_nonpositive_elements(
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 )
 def test_validator_chunk_shape_exceeds_buffer_shape(
-    dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
         dataset_configuration_class(
@@ -115,10 +115,10 @@ def test_validator_chunk_shape_exceeds_buffer_shape(
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 )
 def test_validator_buffer_shape_exceeds_full_shape(
-    dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
         dataset_configuration_class(
@@ -135,10 +135,10 @@ def test_validator_buffer_shape_exceeds_full_shape(
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 )
 def test_validator_chunk_dimensions_do_not_evenly_divide_buffer(
-    dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration]
+    dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
         dataset_configuration_class(
@@ -155,10 +155,10 @@ def test_validator_chunk_dimensions_do_not_evenly_divide_buffer(
 
 
 @pytest.mark.parametrize(
-    argnames="mock_dataset_configuration", argvalues=[mock_HDF5DatasetConfiguration(), mock_ZarrDatasetConfiguration()]
+    argnames="mock_dataset_configuration", argvalues=[mock_HDF5DatasetIOConfiguration(), mock_ZarrDatasetIOConfiguration()]
 )
 def test_mutation_validation(
-    mock_dataset_configuration: Union[mock_HDF5DatasetConfiguration, mock_ZarrDatasetConfiguration]
+    mock_dataset_configuration: Union[mock_HDF5DatasetIOConfiguration, mock_ZarrDatasetIOConfiguration]
 ):
     """
     Only testing on one dummy case to verify the root validator is triggered.
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
similarity index 84%
rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
index 9b88c0027..b91cc45fb 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
@@ -1,4 +1,4 @@
-"""Unit tests for `get_default_dataset_configurations`."""
+"""Unit tests for `get_default_dataset_io_configurations`."""
 from typing import Literal
 
 import numpy as np
@@ -12,8 +12,8 @@
 
 from neuroconv.tools.hdmf import SliceableDataChunkIterator
 from neuroconv.tools.nwb_helpers import (
-    DATASET_CONFIGURATIONS,
-    get_default_dataset_configurations,
+    DATASET_IO_CONFIGURATIONS,
+    get_default_dataset_io_configurations,
 )
 
 
@@ -27,12 +27,12 @@ def test_configuration_on_time_series(iterator: callable, backend: Literal["hdf5
     time_series = mock_TimeSeries(name="TestTimeSeries", data=data)
     nwbfile.add_acquisition(time_series)
 
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend=backend))
+    dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend=backend))
 
     assert len(dataset_configurations) == 1
 
     dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, DATASET_CONFIGURATIONS[backend])
+    assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
     assert dataset_configuration.dataset_info.object_id == time_series.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data"
     assert dataset_configuration.dataset_info.full_shape == array.shape
@@ -53,7 +53,7 @@ def test_configuration_on_external_image_series(backend: Literal["hdf5", "zarr"]
     image_series = ImageSeries(name="TestImageSeries", external_file=[""], rate=1.0)
     nwbfile.add_acquisition(image_series)
 
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend=backend))
+    dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend=backend))
 
     assert len(dataset_configurations) == 0
 
@@ -69,12 +69,12 @@ def test_configuration_on_dynamic_table(iterator: callable, backend: Literal["hd
     dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column], id=list(range(len(array))))
     nwbfile.add_acquisition(dynamic_table)
 
-    dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend=backend))
+    dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend=backend))
 
     assert len(dataset_configurations) == 1
 
     dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, DATASET_CONFIGURATIONS[backend])
+    assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
     assert dataset_configuration.dataset_info.object_id == column.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
     assert dataset_configuration.dataset_info.full_shape == array.shape
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations_appended_files.py
similarity index 86%
rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py
rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations_appended_files.py
index f33334e93..3125bfc73 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_configurations_appended_files.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations_appended_files.py
@@ -1,5 +1,5 @@
 """
-Unit tests for `get_default_dataset_configurations` operating on already written files open in append mode.
+Unit tests for `get_default_dataset_io_configurations` operating on already written files open in append mode.
 Mostly testing that the right objects are skipped from identification as candidates for configuration.
 """
 from pathlib import Path
@@ -14,9 +14,9 @@
 from pynwb.testing.mock.file import mock_NWBFile
 
 from neuroconv.tools.nwb_helpers import (
-    HDF5DatasetConfiguration,
-    ZarrDatasetConfiguration,
-    get_default_dataset_configurations,
+    HDF5DatasetIOConfiguration,
+    ZarrDatasetIOConfiguration,
+    get_default_dataset_io_configurations,
 )
 
 
@@ -55,12 +55,12 @@ def test_unwrapped_time_series_hdf5(hdf5_nwbfile_path):
         nwbfile = io.read()
         new_time_series = mock_TimeSeries(name="NewTimeSeries", data=array)
         nwbfile.add_acquisition(new_time_series)
-        dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+        dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend="hdf5"))
 
     assert len(dataset_configurations) == 1
 
     dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+    assert isinstance(dataset_configuration, HDF5DatasetIOConfiguration)
     assert dataset_configuration.dataset_info.object_id == new_time_series.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/NewTimeSeries/data"
     assert dataset_configuration.dataset_info.full_shape == array.shape
@@ -78,12 +78,12 @@ def test_unwrapped_time_series_zarr(zarr_nwbfile_path):
         nwbfile = io.read()
         new_time_series = mock_TimeSeries(name="NewTimeSeries", data=array)
         nwbfile.add_acquisition(new_time_series)
-        dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+        dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend="zarr"))
 
     assert len(dataset_configurations) == 1
 
     dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+    assert isinstance(dataset_configuration, ZarrDatasetIOConfiguration)
     assert dataset_configuration.dataset_info.object_id == new_time_series.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/NewTimeSeries/data"
     assert dataset_configuration.dataset_info.full_shape == array.shape
@@ -104,12 +104,12 @@ def test_unwrapped_dynamic_table_hdf5(hdf5_nwbfile_path):
         column = VectorData(name="TestColumn", description="", data=array.squeeze())
         dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column])
         nwbfile.add_acquisition(dynamic_table)
-        dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="hdf5"))
+        dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend="hdf5"))
 
     assert len(dataset_configurations) == 1
 
     dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, HDF5DatasetConfiguration)
+    assert isinstance(dataset_configuration, HDF5DatasetIOConfiguration)
     assert dataset_configuration.dataset_info.object_id == column.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
     assert dataset_configuration.dataset_info.full_shape == array.shape
@@ -128,12 +128,12 @@ def test_unwrapped_dynamic_table_zarr(zarr_nwbfile_path):
         column = VectorData(name="TestColumn", description="", data=array.squeeze())
         dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column])
         nwbfile.add_acquisition(dynamic_table)
-        dataset_configurations = list(get_default_dataset_configurations(nwbfile=nwbfile, backend="zarr"))
+        dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend="zarr"))
 
     assert len(dataset_configurations) == 1
 
     dataset_configuration = dataset_configurations[0]
-    assert isinstance(dataset_configuration, ZarrDatasetConfiguration)
+    assert isinstance(dataset_configuration, ZarrDatasetIOConfiguration)
     assert dataset_configuration.dataset_info.object_id == column.object_id
     assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data"
     assert dataset_configuration.dataset_info.full_shape == array.shape
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py
similarity index 58%
rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_configuration_model.py
rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py
index fd9e624a3..33b32d10a 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py
@@ -1,26 +1,26 @@
 """Unit tests for the DatasetConfiguration Pydantic model."""
 import pytest
 
-from neuroconv.tools.nwb_helpers._models._base_models import DatasetConfiguration
+from neuroconv.tools.nwb_helpers._models._base_models import DatasetIOConfiguration
 from neuroconv.tools.testing import mock_DatasetInfo
 
 
 def test_get_data_io_kwargs_abstract_error():
     with pytest.raises(TypeError) as error_info:
-        DatasetConfiguration(dataset_info=mock_DatasetInfo(), chunk_shape=(78_125, 64), buffer_shape=(1_250_000, 384))
-    assert "Can't instantiate abstract class DatasetConfiguration with abstract" in str(error_info.value)
+        DatasetIOConfiguration(dataset_info=mock_DatasetInfo(), chunk_shape=(78_125, 64), buffer_shape=(1_250_000, 384))
+    assert "Can't instantiate abstract class DatasetIOConfiguration with abstract" in str(error_info.value)
 
 
 def test_get_data_io_kwargs_not_implemented():
-    class TestDatasetConfiguration(DatasetConfiguration):
+    class TestDatasetIOConfiguration(DatasetIOConfiguration):
         def get_data_io_kwargs(self):
             super().get_data_io_kwargs()
 
-    dataset_configuration = TestDatasetConfiguration(
+    dataset_io_configuration = TestDatasetIOConfiguration(
         dataset_info=mock_DatasetInfo(),
         chunk_shape=(78_125, 64),
         buffer_shape=(1_250_000, 384),
     )
 
     with pytest.raises(NotImplementedError):
-        dataset_configuration.get_data_io_kwargs()
+        dataset_io_configuration.get_data_io_kwargs()
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_backend_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_backend_configuration_model.py
index 2d6242ad1..7377ff1b8 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_backend_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_backend_configuration_model.py
@@ -6,7 +6,7 @@
 
 
 def test_hdf5_backend_configuration_print():
-    """Test the printout display of a HDF5DatasetConfiguration model looks nice."""
+    """Test the printout display of a HDF5BackendConfiguration model looks nice."""
     hdf5_backend_configuration = mock_HDF5BackendConfiguration()
 
     with patch("sys.stdout", new=StringIO()) as out:
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_io_configuration_model.py
similarity index 73%
rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_configuration_model.py
rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_io_configuration_model.py
index d6de7ab4c..b31387fbf 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_io_configuration_model.py
@@ -1,16 +1,14 @@
-"""Unit tests for the HDF5DatasetConfiguration Pydantic model."""
+"""Unit tests for the HDF5DatasetIOConfiguration Pydantic model."""
 from io import StringIO
 from unittest.mock import patch
 
-import pytest
-
 from neuroconv.tools.nwb_helpers import AVAILABLE_HDF5_COMPRESSION_METHODS
-from neuroconv.tools.testing import mock_HDF5DatasetConfiguration
+from neuroconv.tools.testing import mock_HDF5DatasetIOConfiguration
 
 
 def test_hdf5_dataset_configuration_print():
-    """Test the printout display of a HDF5DatasetConfiguration model looks nice."""
-    hdf5_dataset_configuration = mock_HDF5DatasetConfiguration()
+    """Test the printout display of a HDF5DatasetIOConfiguration model looks nice."""
+    hdf5_dataset_configuration = mock_HDF5DatasetIOConfiguration()
 
     with patch("sys.stdout", new=StringIO()) as out:
         print(hdf5_dataset_configuration)
@@ -35,8 +33,8 @@ def test_hdf5_dataset_configuration_print():
 
 
 def test_hdf5_dataset_configuration_print_with_compression_options():
-    """Test the printout display of a HDF5DatasetConfiguration model looks nice."""
-    hdf5_dataset_configuration = mock_HDF5DatasetConfiguration(compression_options=dict(level=5))
+    """Test the printout display of a HDF5DatasetIOConfiguration model looks nice."""
+    hdf5_dataset_configuration = mock_HDF5DatasetIOConfiguration(compression_options=dict(level=5))
 
     with patch("sys.stdout", new=StringIO()) as out:
         print(hdf5_dataset_configuration)
@@ -62,8 +60,8 @@ def test_hdf5_dataset_configuration_print_with_compression_options():
 
 
 def test_hdf5_dataset_configuration_print_with_compression_disabled():
-    """Test the printout display of a HDF5DatasetConfiguration model looks nice."""
-    hdf5_dataset_configuration = mock_HDF5DatasetConfiguration(compression_method=None)
+    """Test the printout display of a HDF5DatasetIOConfiguration model looks nice."""
+    hdf5_dataset_configuration = mock_HDF5DatasetIOConfiguration(compression_method=None)
 
     with patch("sys.stdout", new=StringIO()) as out:
         print(hdf5_dataset_configuration)
@@ -86,12 +84,12 @@ def test_hdf5_dataset_configuration_print_with_compression_disabled():
 
 
 def test_hdf5_dataset_configuration_repr():
-    """Test the programmatic repr of a HDF5DatasetConfiguration model is more dataclass-like."""
-    hdf5_dataset_configuration = mock_HDF5DatasetConfiguration()
+    """Test the programmatic repr of a HDF5DatasetIOConfiguration model is more dataclass-like."""
+    hdf5_dataset_configuration = mock_HDF5DatasetIOConfiguration()
 
     # Important to keep the `repr` unmodified for appearance inside iterables of DatasetInfo objects
     expected_repr = (
-        "HDF5DatasetConfiguration(dataset_info=DatasetInfo(object_id='481a0860-3a0c-40ec-b931-df4a3e9b101f', "
+        "HDF5DatasetIOConfiguration(dataset_info=DatasetInfo(object_id='481a0860-3a0c-40ec-b931-df4a3e9b101f', "
         "location='acquisition/TestElectricalSeries/data', dataset_name='data', dtype=dtype('int16'), "
         "full_shape=(1800000, 384)), chunk_shape=(78125, 64), buffer_shape=(1250000, 384), compression_method='gzip', "
         "compression_options=None)"
@@ -108,7 +106,7 @@ def test_default_compression_is_always_available():
 
 
 def test_get_data_io_kwargs():
-    hdf5_dataset_configuration = mock_HDF5DatasetConfiguration()
+    hdf5_dataset_configuration = mock_HDF5DatasetIOConfiguration()
 
     assert hdf5_dataset_configuration.get_data_io_kwargs() == dict(
         chunks=(78125, 64), compression="gzip", compression_opts=None
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_backend_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_backend_configuration_model.py
index e8017c719..da417710c 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_backend_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_backend_configuration_model.py
@@ -6,7 +6,7 @@
 
 
 def test_zarr_backend_configuration_print():
-    """Test the printout display of a HDF5DatasetConfiguration model looks nice."""
+    """Test the printout display of a ZarrBackendConfiguration model looks nice."""
     zarr_backend_configuration = mock_ZarrBackendConfiguration()
 
     with patch("sys.stdout", new=StringIO()) as out:
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_io_configuration_model.py
similarity index 79%
rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_configuration_model.py
rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_io_configuration_model.py
index 8ddc5bf7e..e99c1dbca 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_io_configuration_model.py
@@ -1,4 +1,4 @@
-"""Unit tests for the ZarrDatasetConfiguration Pydantic model."""
+"""Unit tests for the ZarrDatasetIOConfiguration Pydantic model."""
 from io import StringIO
 from unittest.mock import patch
 
@@ -7,14 +7,14 @@
 
 from neuroconv.tools.nwb_helpers import (
     AVAILABLE_ZARR_COMPRESSION_METHODS,
-    ZarrDatasetConfiguration,
+    ZarrDatasetIOConfiguration,
 )
-from neuroconv.tools.testing import mock_DatasetInfo, mock_ZarrDatasetConfiguration
+from neuroconv.tools.testing import mock_DatasetInfo, mock_ZarrDatasetIOConfiguration
 
 
-def test_zarr_dataset_configuration_print():
-    """Test the printout display of a ZarrDatasetConfiguration model looks nice."""
-    zarr_dataset_configuration = mock_ZarrDatasetConfiguration()
+def test_zarr_dataset_io_configuration_print():
+    """Test the printout display of a ZarrDatasetIOConfiguration model looks nice."""
+    zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration()
 
     with patch("sys.stdout", new=StringIO()) as out:
         print(zarr_dataset_configuration)
@@ -39,8 +39,8 @@ def test_zarr_dataset_configuration_print():
 
 
 def test_zarr_dataset_configuration_print_with_compression_options():
-    """Test the printout display of a ZarrDatasetConfiguration model looks nice."""
-    zarr_dataset_configuration = mock_ZarrDatasetConfiguration(compression_options=dict(level=5))
+    """Test the printout display of a ZarrDatasetIOConfiguration model looks nice."""
+    zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration(compression_options=dict(level=5))
 
     with patch("sys.stdout", new=StringIO()) as out:
         print(zarr_dataset_configuration)
@@ -66,8 +66,8 @@ def test_zarr_dataset_configuration_print_with_compression_options():
 
 
 def test_zarr_dataset_configuration_print_with_compression_disabled():
-    """Test the printout display of a ZarrDatasetConfiguration model looks nice."""
-    zarr_dataset_configuration = mock_ZarrDatasetConfiguration(compression_method=None)
+    """Test the printout display of a ZarrDatasetIOConfiguration model looks nice."""
+    zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration(compression_method=None)
 
     with patch("sys.stdout", new=StringIO()) as out:
         print(zarr_dataset_configuration)
@@ -90,8 +90,8 @@ def test_zarr_dataset_configuration_print_with_compression_disabled():
 
 
 def test_zarr_dataset_configuration_print_with_filter_methods():
-    """Test the printout display of a ZarrDatasetConfiguration model looks nice."""
-    zarr_dataset_configuration = mock_ZarrDatasetConfiguration(filter_methods=["delta"])
+    """Test the printout display of a ZarrDatasetIOConfiguration model looks nice."""
+    zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration(filter_methods=["delta"])
 
     with patch("sys.stdout", new=StringIO()) as out:
         print(zarr_dataset_configuration)
@@ -118,8 +118,8 @@ def test_zarr_dataset_configuration_print_with_filter_methods():
 
 
 def test_zarr_dataset_configuration_print_with_filter_options():
-    """Test the printout display of a ZarrDatasetConfiguration model looks nice."""
-    zarr_dataset_configuration = mock_ZarrDatasetConfiguration(
+    """Test the printout display of a ZarrDatasetIOConfiguration model looks nice."""
+    zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration(
         filter_methods=["blosc"], filter_options=[dict(clevel=5)]
     )
 
@@ -149,12 +149,12 @@ def test_zarr_dataset_configuration_print_with_filter_options():
 
 
 def test_zarr_dataset_configuration_repr():
-    """Test the programmatic repr of a ZarrDatasetConfiguration model is more dataclass-like."""
-    zarr_dataset_configuration = mock_ZarrDatasetConfiguration()
+    """Test the programmatic repr of a ZarrDatasetIOConfiguration model is more dataclass-like."""
+    zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration()
 
     # Important to keep the `repr` unmodified for appearance inside iterables of DatasetInfo objects
     expected_repr = (
-        "ZarrDatasetConfiguration(dataset_info=DatasetInfo(object_id='481a0860-3a0c-40ec-b931-df4a3e9b101f', "
+        "ZarrDatasetIOConfiguration(dataset_info=DatasetInfo(object_id='481a0860-3a0c-40ec-b931-df4a3e9b101f', "
         "location='acquisition/TestElectricalSeries/data', dataset_name='data', dtype=dtype('int16'), "
         "full_shape=(1800000, 384)), chunk_shape=(78125, 64), buffer_shape=(1250000, 384), compression_method='gzip', "
         "compression_options=None, filter_methods=None, filter_options=None)"
@@ -164,7 +164,7 @@ def test_zarr_dataset_configuration_repr():
 
 def test_validator_filter_options_has_methods():
     with pytest.raises(ValueError) as error_info:
-        ZarrDatasetConfiguration(
+        ZarrDatasetIOConfiguration(
             dataset_info=mock_DatasetInfo(),
             chunk_shape=(78_125, 64),
             buffer_shape=(1_250_000, 384),
@@ -181,7 +181,7 @@ def test_validator_filter_options_has_methods():
 
 def test_validator_filter_methods_length_match_options():
     with pytest.raises(ValueError) as error_info:
-        ZarrDatasetConfiguration(
+        ZarrDatasetIOConfiguration(
             dataset_info=mock_DatasetInfo(),
             chunk_shape=(78_125, 64),
             buffer_shape=(1_250_000, 384),
@@ -205,7 +205,7 @@ def test_default_compression_is_always_available():
 
 
 def test_get_data_io_kwargs():
-    zarr_dataset_configuration = mock_ZarrDatasetConfiguration()
+    zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration()
 
     assert zarr_dataset_configuration.get_data_io_kwargs() == dict(
         chunks=(78125, 64), compressor=GZip(level=1), filters=None

From 89915ab2a5ad449ee49049e7fb6acde74dd97a2b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 21 Nov 2023 15:54:17 +0000
Subject: [PATCH 21/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/neuroconv/tools/nwb_helpers/_dataset_configuration.py     | 4 ++--
 .../test_common_dataset_io_configuration_model.py             | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
index 775f7c923..2417ad19e 100644
--- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -1,5 +1,5 @@
 """Collection of helper functions related to configuration of datasets dependent on backend."""
-from typing import Literal, Union, Generator
+from typing import Generator, Literal, Union
 
 import h5py
 import numpy as np
@@ -11,7 +11,7 @@
 from pynwb import NWBHDF5IO, NWBFile, TimeSeries
 from pynwb.base import DynamicTable
 
-from ._models._base_models import DatasetIOConfiguration, DatasetInfo
+from ._models._base_models import DatasetInfo, DatasetIOConfiguration
 from ._models._hdf5_models import HDF5BackendConfiguration, HDF5DatasetIOConfiguration
 from ._models._zarr_models import ZarrBackendConfiguration, ZarrDatasetIOConfiguration
 from ..hdmf import SliceableDataChunkIterator
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py
index 3babb046e..c8a6738b7 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py
@@ -155,7 +155,8 @@ def test_validator_chunk_dimensions_do_not_evenly_divide_buffer(
 
 
 @pytest.mark.parametrize(
-    argnames="mock_dataset_configuration", argvalues=[mock_HDF5DatasetIOConfiguration(), mock_ZarrDatasetIOConfiguration()]
+    argnames="mock_dataset_configuration",
+    argvalues=[mock_HDF5DatasetIOConfiguration(), mock_ZarrDatasetIOConfiguration()],
 )
 def test_mutation_validation(
     mock_dataset_configuration: Union[mock_HDF5DatasetIOConfiguration, mock_ZarrDatasetIOConfiguration]

From bfe1049abd8e25773c2a37833acbddf8207dd5b8 Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Tue, 21 Nov 2023 11:48:09 -0500
Subject: [PATCH 22/27] fix minimal test

---
 src/neuroconv/tools/hdmf.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/neuroconv/tools/hdmf.py b/src/neuroconv/tools/hdmf.py
index ebfdf54c0..46f0fd865 100644
--- a/src/neuroconv/tools/hdmf.py
+++ b/src/neuroconv/tools/hdmf.py
@@ -58,14 +58,14 @@ def estimate_default_buffer_shape(
             return tuple(maxshape)
 
         buffer_bytes = chunk_bytes
-        axis_sizes_bytes = maxshape * self.dtype.itemsize
+        axis_sizes_bytes = maxshape * dtype.itemsize
         target_buffer_bytes = buffer_gb * 1e9
 
         if min(axis_sizes_bytes) > target_buffer_bytes:
             if num_axes > 1:
-                smallest_chunk_axis, second_smallest_chunk_axis, *_ = np.argsort(self.chunk_shape)
-                # If the smallest full axis does not fit within the buffer size, form a square along the two smallest axes
-                sub_square_buffer_shape = np.array(self.chunk_shape)
+                smallest_chunk_axis, second_smallest_chunk_axis, *_ = np.argsort(chunk_shape)
+                # If the smallest full axis does not fit within the buffer size, form a square along the smallest axes
+                sub_square_buffer_shape = np.array(chunk_shape)
                 if min(axis_sizes_bytes) > target_buffer_bytes:
                     k1 = math.floor((target_buffer_bytes / chunk_bytes) ** 0.5)
                     for axis in [smallest_chunk_axis, second_smallest_chunk_axis]:
@@ -78,7 +78,7 @@ def estimate_default_buffer_shape(
                     k1 = math.floor(target_buffer_bytes / chunk_bytes)
                     return tuple(
                         [
-                            k1 * self.chunk_shape[0],
+                            k1 * chunk_shape[0],
                         ]
                     )
             else:

From f1683fa46c94efc676d7b7297123b5bf3d493902 Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Tue, 21 Nov 2023 12:57:20 -0500
Subject: [PATCH 23/27] alter private method name

---
 .../tools/nwb_helpers/_dataset_configuration.py     | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
index 2417ad19e..f0528e84d 100644
--- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -49,7 +49,12 @@ def _is_dataset_written_to_file(
     )
 
 
-def _parse_location_in_memory_nwbfile(current_location: str, neurodata_object: Container) -> str:
+def _find_location_in_memory_nwbfile(current_location: str, neurodata_object: Container) -> str:
+    """
+    Method for determining the location of a neurodata object within an in-memory NWBFile object.
+
+    Distinct from methods from other packages, such as the NWB Inspector, which rely on such files being read from disk.
+    """
     parent = neurodata_object.parent
     if isinstance(parent, NWBFile):
         # Items in defined top-level places like acquisition, intervals, etc. do not act as 'containers'
@@ -58,7 +63,7 @@ def _parse_location_in_memory_nwbfile(current_location: str, neurodata_object: C
             if isinstance(parent_field_value, dict) and neurodata_object.name in parent_field_value:
                 return parent_field_name + "/" + neurodata_object.name + "/" + current_location
         return neurodata_object.name + "/" + current_location
-    return _parse_location_in_memory_nwbfile(
+    return _find_location_in_memory_nwbfile(
         current_location=neurodata_object.name + "/" + current_location, neurodata_object=parent
     )
 
@@ -92,8 +97,6 @@ def _get_dataset_metadata(
     if isinstance(candidate_dataset, DataIO):
         return None
 
-    # DataChunkIterator has best generic dtype inference, though logic is hard to peel out of it
-    # And it can fail in rare cases but not essential to our default configuration
     dtype = _infer_dtype_using_data_chunk_iterator(candidate_dataset=candidate_dataset)
     full_shape = get_data_shape(data=candidate_dataset)
 
@@ -111,7 +114,7 @@ def _get_dataset_metadata(
     else:
         pass  # TODO: think on this; perhaps zarr's standalone estimator?
 
-    location = _parse_location_in_memory_nwbfile(current_location=field_name, neurodata_object=neurodata_object)
+    location = _find_location_in_memory_nwbfile(current_location=field_name, neurodata_object=neurodata_object)
     dataset_info = DatasetInfo(
         object_id=neurodata_object.object_id,
         object_name=neurodata_object.name,

From 185a69da8638dd16685f32a7aa39520a1b9220a4 Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Tue, 21 Nov 2023 14:02:19 -0500
Subject: [PATCH 24/27] add extra tests

---
 .../nwb_helpers/_dataset_configuration.py     |  30 ++---
 ...t_get_default_dataset_io_configurations.py | 111 +++++++++++++++++-
 2 files changed, 122 insertions(+), 19 deletions(-)

diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
index f0528e84d..14e5d1c79 100644
--- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -174,7 +174,22 @@ def get_default_dataset_io_configurations(
         )
 
     for neurodata_object in nwbfile.objects.values():
-        if isinstance(neurodata_object, TimeSeries):
+        if isinstance(neurodata_object, DynamicTable):
+            dynamic_table = neurodata_object  # for readability
+
+            for column_name in dynamic_table.colnames:
+                candidate_dataset = dynamic_table[column_name].data  # VectorData object
+                if _is_dataset_written_to_file(
+                    candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file
+                ):
+                    continue  # skip
+
+                yield _get_dataset_metadata(
+                    neurodata_object=dynamic_table[column_name], field_name="data", backend=backend
+                )
+        else:
+            # Primarily for TimeSeries, but also any extended class that has 'data' or 'timestamps'
+            # The most common example of this is ndx-events Events/LabeledEvents types
             time_series = neurodata_object  # for readability
 
             for field_name in ("data", "timestamps"):
@@ -192,16 +207,3 @@ def get_default_dataset_io_configurations(
                     continue  # skip
 
                 yield _get_dataset_metadata(neurodata_object=time_series, field_name=field_name, backend=backend)
-        elif isinstance(neurodata_object, DynamicTable):
-            dynamic_table = neurodata_object  # for readability
-
-            for column_name in dynamic_table.colnames:
-                candidate_dataset = dynamic_table[column_name].data  # VectorData object
-                if _is_dataset_written_to_file(
-                    candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file
-                ):
-                    continue  # skip
-
-                yield _get_dataset_metadata(
-                    neurodata_object=dynamic_table[column_name], field_name="data", backend=backend
-                )
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
index b91cc45fb..9d9943d6e 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
@@ -7,14 +7,14 @@
 from hdmf.data_utils import DataChunkIterator
 from pynwb.base import DynamicTable
 from pynwb.image import ImageSeries
-from pynwb.testing.mock.base import mock_TimeSeries
+from pynwb.behavior import CompassDirection
 from pynwb.testing.mock.file import mock_NWBFile
+from pynwb.testing.mock.base import mock_TimeSeries
+from pynwb.testing.mock.behavior import mock_SpatialSeries
+from nwbinspector.utils import is_module_installed
 
 from neuroconv.tools.hdmf import SliceableDataChunkIterator
-from neuroconv.tools.nwb_helpers import (
-    DATASET_IO_CONFIGURATIONS,
-    get_default_dataset_io_configurations,
-)
+from neuroconv.tools.nwb_helpers import DATASET_IO_CONFIGURATIONS, get_default_dataset_io_configurations, get_module
 
 
 @pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])
@@ -87,3 +87,104 @@ def test_configuration_on_dynamic_table(iterator: callable, backend: Literal["hd
     if backend == "zarr":
         assert dataset_configuration.filter_methods is None
         assert dataset_configuration.filter_options is None
+
+
+@pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])
+@pytest.mark.parametrize("backend", ["hdf5", "zarr"])
+def test_configuration_on_compass_direction(iterator: callable, backend: Literal["hdf5", "zarr"]):
+    array = np.array([[1, 2, 3], [4, 5, 6]])
+    data = iterator(array)
+
+    nwbfile = mock_NWBFile()
+    spatial_series = mock_SpatialSeries(name="TestSpatialSeries", data=data)
+    compass_direction = CompassDirection(name="TestCompassDirection", spatial_series=spatial_series)
+    behavior_module = get_module(nwbfile=nwbfile, name="behavior")
+    behavior_module.add(compass_direction)
+
+    dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend=backend))
+
+    assert len(dataset_configurations) == 1
+
+    dataset_configuration = dataset_configurations[0]
+    assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
+    assert dataset_configuration.dataset_info.object_id == spatial_series.object_id
+    assert (
+        dataset_configuration.dataset_info.location == "processing/behavior/TestCompassDirection/TestSpatialSeries/data"
+    )
+    assert dataset_configuration.dataset_info.full_shape == array.shape
+    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.chunk_shape == array.shape
+    assert dataset_configuration.buffer_shape == array.shape
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+    if backend == "zarr":
+        assert dataset_configuration.filter_methods is None
+        assert dataset_configuration.filter_options is None
+
+
+@pytest.mark.skipif(
+    not is_module_installed(module_name="ndx_events"), reason="The extra testing package 'ndx-events' is not installed!"
+)
+@pytest.mark.parametrize("backend", ["hdf5", "zarr"])
+def test_configuration_on_ndx_events(backend: Literal["hdf5", "zarr"]):
+    from ndx_events import LabeledEvents
+
+    # ndx_events data fields do not support wrapping in DataChunkIterators - data is nearly always small enough
+    # to fit entirely in memory
+    data = np.array([1, 2, 3], dtype="uint32")
+    timestamps = np.array([4.5, 6.7, 8.9])
+
+    nwbfile = mock_NWBFile()
+    labeled_events = LabeledEvents(
+        name="TestLabeledEvents",
+        description="",
+        timestamps=timestamps,
+        data=data,
+        labels=["response_left", "cue_onset", "cue_offset"],
+    )
+    behavior_module = get_module(nwbfile=nwbfile, name="behavior")
+    behavior_module.add(labeled_events)
+
+    dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend=backend))
+
+    # Note that the labels dataset is not caught since we search only for 'data' and 'timestamps' fields
+    assert len(dataset_configurations) == 2
+
+    data_dataset_configuration = next(
+        dataset_configuration
+        for dataset_configuration in dataset_configurations
+        if dataset_configuration.dataset_info.dataset_name == "data"
+    )
+    assert isinstance(data_dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
+    assert data_dataset_configuration.dataset_info.object_id == labeled_events.object_id
+    assert data_dataset_configuration.dataset_info.location == "processing/behavior/TestLabeledEvents/data"
+    assert data_dataset_configuration.dataset_info.full_shape == data.shape
+    assert data_dataset_configuration.dataset_info.dtype == data.dtype
+    assert data_dataset_configuration.chunk_shape == data.shape
+    assert data_dataset_configuration.buffer_shape == data.shape
+    assert data_dataset_configuration.compression_method == "gzip"
+    assert data_dataset_configuration.compression_options is None
+
+    if backend == "zarr":
+        assert data_dataset_configuration.filter_methods is None
+        assert data_dataset_configuration.filter_options is None
+
+    timestamps_dataset_configuration = next(
+        dataset_configuration
+        for dataset_configuration in dataset_configurations
+        if dataset_configuration.dataset_info.dataset_name == "timestamps"
+    )
+    assert isinstance(timestamps_dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
+    assert timestamps_dataset_configuration.dataset_info.object_id == labeled_events.object_id
+    assert timestamps_dataset_configuration.dataset_info.location == "processing/behavior/TestLabeledEvents/timestamps"
+    assert timestamps_dataset_configuration.dataset_info.full_shape == timestamps.shape
+    assert timestamps_dataset_configuration.dataset_info.dtype == timestamps.dtype
+    assert timestamps_dataset_configuration.chunk_shape == timestamps.shape
+    assert timestamps_dataset_configuration.buffer_shape == timestamps.shape
+    assert timestamps_dataset_configuration.compression_method == "gzip"
+    assert timestamps_dataset_configuration.compression_options is None
+
+    if backend == "zarr":
+        assert timestamps_dataset_configuration.filter_methods is None
+        assert timestamps_dataset_configuration.filter_options is None

From 6fad0030fcd8369f385f98f0d48d76b72850c338 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 21 Nov 2023 19:03:39 +0000
Subject: [PATCH 25/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../test_get_default_dataset_io_configurations.py    | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
index 9d9943d6e..1d87821ab 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
@@ -5,16 +5,20 @@
 import pytest
 from hdmf.common import VectorData
 from hdmf.data_utils import DataChunkIterator
+from nwbinspector.utils import is_module_installed
 from pynwb.base import DynamicTable
-from pynwb.image import ImageSeries
 from pynwb.behavior import CompassDirection
-from pynwb.testing.mock.file import mock_NWBFile
+from pynwb.image import ImageSeries
 from pynwb.testing.mock.base import mock_TimeSeries
 from pynwb.testing.mock.behavior import mock_SpatialSeries
-from nwbinspector.utils import is_module_installed
+from pynwb.testing.mock.file import mock_NWBFile
 
 from neuroconv.tools.hdmf import SliceableDataChunkIterator
-from neuroconv.tools.nwb_helpers import DATASET_IO_CONFIGURATIONS, get_default_dataset_io_configurations, get_module
+from neuroconv.tools.nwb_helpers import (
+    DATASET_IO_CONFIGURATIONS,
+    get_default_dataset_io_configurations,
+    get_module,
+)
 
 
 @pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])

From 5ce5914660f23e50ec08ed720f5f7eadd99d6593 Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Wed, 22 Nov 2023 11:15:24 -0500
Subject: [PATCH 26/27] add test for ragged tables; debug

---
 .../nwb_helpers/_dataset_configuration.py     |   9 +-
 ...t_get_default_dataset_io_configurations.py | 106 ++++++++++++++++++
 2 files changed, 110 insertions(+), 5 deletions(-)

diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
index 14e5d1c79..4e7783aff 100644
--- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -177,16 +177,15 @@ def get_default_dataset_io_configurations(
         if isinstance(neurodata_object, DynamicTable):
             dynamic_table = neurodata_object  # for readability
 
-            for column_name in dynamic_table.colnames:
-                candidate_dataset = dynamic_table[column_name].data  # VectorData object
+            for column in dynamic_table.columns:
+                column_name = column.name
+                candidate_dataset = column.data  # VectorData object
                 if _is_dataset_written_to_file(
                     candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file
                 ):
                     continue  # skip
 
-                yield _get_dataset_metadata(
-                    neurodata_object=dynamic_table[column_name], field_name="data", backend=backend
-                )
+                yield _get_dataset_metadata(neurodata_object=column, field_name="data", backend=backend)
         else:
             # Primarily for TimeSeries, but also any extended class that has 'data' or 'timestamps'
             # The most common example of this is ndx-events Events/LabeledEvents types
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
index 1d87821ab..7db52da3d 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
@@ -9,6 +9,7 @@
 from pynwb.base import DynamicTable
 from pynwb.behavior import CompassDirection
 from pynwb.image import ImageSeries
+from pynwb.misc import Units
 from pynwb.testing.mock.base import mock_TimeSeries
 from pynwb.testing.mock.behavior import mock_SpatialSeries
 from pynwb.testing.mock.file import mock_NWBFile
@@ -93,6 +94,111 @@ def test_configuration_on_dynamic_table(iterator: callable, backend: Literal["hd
         assert dataset_configuration.filter_options is None
 
 
+@pytest.mark.parametrize("backend", ["hdf5", "zarr"])
+def test_configuration_on_ragged_units_table(backend: Literal["hdf5", "zarr"]):
+    nwbfile = mock_NWBFile()
+    units = Units(name="units", description="")
+
+    spike_times = np.array([0.0, 1.0, 2.0])
+    waveforms = np.array([[[1, 2, 3], [1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3], [1, 2, 3]]])
+    units.add_unit(spike_times=spike_times, waveforms=waveforms)
+
+    spike_times = np.array([3.0, 4.0])
+    waveforms = np.array([[[4, 5], [4, 5], [4, 5]], [[4, 5], [4, 5], [4, 5]]])
+    units.add_unit(spike_times=spike_times, waveforms=waveforms)
+
+    nwbfile.units = units
+
+    dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend=backend))
+
+    assert len(dataset_configurations) == 5
+
+    dataset_configuration = next(
+        dataset_configuration
+        for dataset_configuration in dataset_configurations
+        if dataset_configuration.dataset_info.location == "units/spike_times/data"
+    )
+    assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
+    assert dataset_configuration.dataset_info.full_shape == (5,)
+    assert dataset_configuration.dataset_info.dtype == np.dtype("float64")
+    assert dataset_configuration.chunk_shape == (5,)
+    assert dataset_configuration.buffer_shape == (5,)
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+    if backend == "zarr":
+        assert dataset_configuration.filter_methods is None
+        assert dataset_configuration.filter_options is None
+
+    dataset_configuration = next(
+        dataset_configuration
+        for dataset_configuration in dataset_configurations
+        if dataset_configuration.dataset_info.location == "units/spike_times_index/data"
+    )
+    assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
+    assert dataset_configuration.dataset_info.full_shape == (2,)
+    assert dataset_configuration.dataset_info.dtype == np.dtype("uint8")
+    assert dataset_configuration.chunk_shape == (2,)
+    assert dataset_configuration.buffer_shape == (2,)
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+    if backend == "zarr":
+        assert dataset_configuration.filter_methods is None
+        assert dataset_configuration.filter_options is None
+
+    dataset_configuration = next(
+        dataset_configuration
+        for dataset_configuration in dataset_configurations
+        if dataset_configuration.dataset_info.location == "units/waveforms/data"
+    )
+    assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
+    assert dataset_configuration.dataset_info.full_shape == (12, 3)
+    assert dataset_configuration.dataset_info.dtype == np.dtype("int32")
+    assert dataset_configuration.chunk_shape == (12, 3)
+    assert dataset_configuration.buffer_shape == (12, 3)
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+    if backend == "zarr":
+        assert dataset_configuration.filter_methods is None
+        assert dataset_configuration.filter_options is None
+
+    dataset_configuration = next(
+        dataset_configuration
+        for dataset_configuration in dataset_configurations
+        if dataset_configuration.dataset_info.location == "units/waveforms_index/data"
+    )
+    assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
+    assert dataset_configuration.dataset_info.full_shape == (4,)
+    assert dataset_configuration.dataset_info.dtype == np.dtype("uint8")
+    assert dataset_configuration.chunk_shape == (4,)
+    assert dataset_configuration.buffer_shape == (4,)
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+    if backend == "zarr":
+        assert dataset_configuration.filter_methods is None
+        assert dataset_configuration.filter_options is None
+
+    dataset_configuration = next(
+        dataset_configuration
+        for dataset_configuration in dataset_configurations
+        if dataset_configuration.dataset_info.location == "units/waveforms_index_index/data"
+    )
+    assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
+    assert dataset_configuration.dataset_info.full_shape == (2,)
+    assert dataset_configuration.dataset_info.dtype == np.dtype("uint8")
+    assert dataset_configuration.chunk_shape == (2,)
+    assert dataset_configuration.buffer_shape == (2,)
+    assert dataset_configuration.compression_method == "gzip"
+    assert dataset_configuration.compression_options is None
+
+    if backend == "zarr":
+        assert dataset_configuration.filter_methods is None
+        assert dataset_configuration.filter_options is None
+
+
 @pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator])
 @pytest.mark.parametrize("backend", ["hdf5", "zarr"])
 def test_configuration_on_compass_direction(iterator: callable, backend: Literal["hdf5", "zarr"]):

From 3032755fbe7e4004f0fc8cca70309738b850737e Mon Sep 17 00:00:00 2001
From: Cody Baker <cbaker9@nd.edu>
Date: Wed, 22 Nov 2023 11:29:00 -0500
Subject: [PATCH 27/27] adjust for cross-platform

---
 .../test_get_default_dataset_io_configurations.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
index 7db52da3d..69545adbf 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
@@ -100,11 +100,11 @@ def test_configuration_on_ragged_units_table(backend: Literal["hdf5", "zarr"]):
     units = Units(name="units", description="")
 
     spike_times = np.array([0.0, 1.0, 2.0])
-    waveforms = np.array([[[1, 2, 3], [1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3], [1, 2, 3]]])
+    waveforms = np.array([[[1, 2, 3], [1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3], [1, 2, 3]]], dtype="int32")
     units.add_unit(spike_times=spike_times, waveforms=waveforms)
 
     spike_times = np.array([3.0, 4.0])
-    waveforms = np.array([[[4, 5], [4, 5], [4, 5]], [[4, 5], [4, 5], [4, 5]]])
+    waveforms = np.array([[[4, 5], [4, 5], [4, 5]], [[4, 5], [4, 5], [4, 5]]], dtype="int32")
     units.add_unit(spike_times=spike_times, waveforms=waveforms)
 
     nwbfile.units = units