From 2a2d805d29ff5b4c1e3e2d42066f3a8574a52f09 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 6 Mar 2024 12:48:50 -0500
Subject: [PATCH] Upgrade to Pydantic 2 (#767)

---
 CHANGELOG.md                                  |   9 +
 requirements-minimal.txt                      |   5 +-
 src/neuroconv/tools/nwb_helpers/__init__.py   |  15 +-
 .../_configuration_models/_base_backend.py    |  16 +-
 .../_configuration_models/_base_dataset_io.py | 213 +++++++++---------
 .../_configuration_models/_hdf5_backend.py    |   8 +-
 .../_configuration_models/_hdf5_dataset_io.py |   9 +-
 .../_configuration_models/_zarr_backend.py    |   8 +-
 .../_configuration_models/_zarr_dataset_io.py |  37 ++-
 .../tools/nwb_helpers/_configure_backend.py   |   8 +-
 .../nwb_helpers/_dataset_configuration.py     |  10 +-
 src/neuroconv/tools/testing/__init__.py       |   1 -
 .../testing/_mock/_mock_dataset_models.py     |  85 +++----
 ...t_common_dataset_io_configuration_model.py |  96 ++------
 ...t_get_default_dataset_io_configurations.py |  80 +++----
 ...ataset_io_configurations_appended_files.py |  32 +--
 .../test_models/test_dataset_info_model.py    |  43 ----
 .../test_dataset_io_configuration_model.py    |  20 +-
 ...est_hdf5_dataset_io_configuration_model.py |   4 +-
 ...est_zarr_dataset_io_configuration_model.py |  21 +-
 20 files changed, 306 insertions(+), 414 deletions(-)
 delete mode 100644 tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_info_model.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8388f5ed7..fb1ae15df 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Upcoming
 
+### Improvements
+
+* Upgraded Pydantic support to `>v2.0.0`. [PR #767](https://github.com/catalystneuro/neuroconv/pull/767)
+* Absorbed the `DatasetInfo` model into the `DatasetIOConfiguration` model. [PR #767](https://github.com/catalystneuro/neuroconv/pull/767)
+* Keyword argument `field_name` of the `DatasetIOConfiguration.from_neurodata_object` method has been renamed to `dataset_name` to be more consistent with its usage. This only affects direct initialization of the model; usage via the `BackendConfiguration` constructor and its associated helper functions in `neuroconv.tools.nwb_helpers` is unaffected. [PR #767](https://github.com/catalystneuro/neuroconv/pull/767)
+* Manual construction of a `DatasetIOConfiguration` now requires the field `dataset_name`, and will be validated to match the final path of `location_in_file`. Usage via the automated constructors is unchanged. [PR #767](https://github.com/catalystneuro/neuroconv/pull/767)
+
+
+
 # v0.4.7 (February 21, 2024)
 
 ### Deprecation
diff --git a/requirements-minimal.txt b/requirements-minimal.txt
index dfd7e9619..a05b90ee1 100644
--- a/requirements-minimal.txt
+++ b/requirements-minimal.txt
@@ -4,10 +4,11 @@ jsonschema>=3.2.0
 PyYAML>=5.4
 scipy>=1.4.1
 h5py>=3.9.0
-hdmf>=3.12.1
+hdmf>=3.12.2
 hdmf_zarr>=0.4.0
 pynwb>=2.3.2;python_version>='3.8'
-pydantic>=1.10.13,<2.0.0
+pydantic>=2.0.0
+typing_extensions>=4.1.0
 psutil>=5.8.0
 tqdm>=4.60.0
 pandas
diff --git a/src/neuroconv/tools/nwb_helpers/__init__.py b/src/neuroconv/tools/nwb_helpers/__init__.py
index 2c1951edf..1437d87d0 100644
--- a/src/neuroconv/tools/nwb_helpers/__init__.py
+++ b/src/neuroconv/tools/nwb_helpers/__init__.py
@@ -6,7 +6,7 @@
 
 from ._backend_configuration import get_default_backend_configuration
 from ._configuration_models._base_backend import BackendConfiguration
-from ._configuration_models._base_dataset_io import DatasetInfo, DatasetIOConfiguration
+from ._configuration_models._base_dataset_io import DatasetIOConfiguration
 from ._configuration_models._hdf5_backend import HDF5BackendConfiguration
 from ._configuration_models._hdf5_dataset_io import (
     AVAILABLE_HDF5_COMPRESSION_METHODS,
@@ -37,11 +37,15 @@
     "BACKEND_CONFIGURATIONS",
     "DATASET_IO_CONFIGURATIONS",
     "BACKEND_NWB_IO",
+    "BackendConfiguration",
+    "HDF5BackendConfiguration",
+    "ZarrBackendConfiguration",
+    "DatasetIOConfiguration",
+    "HDF5DatasetIOConfiguration",
+    "ZarrDatasetIOConfiguration",
     "get_default_backend_configuration",
     "get_default_dataset_io_configurations",
     "configure_backend",
-    "BackendConfiguration",
-    "DatasetIOConfiguration",
     "get_default_dataset_io_configurations",
     "get_default_backend_configuration",
     "add_device_from_metadata",
@@ -49,9 +53,4 @@
     "get_module",
     "make_nwbfile_from_metadata",
     "make_or_load_nwbfile",
-    "DatasetInfo",
-    "HDF5BackendConfiguration",
-    "HDF5DatasetIOConfiguration",
-    "ZarrBackendConfiguration",
-    "ZarrDatasetIOConfiguration",
 ]
diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_backend.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_backend.py
index b66d56975..7d6766e2b 100644
--- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_backend.py
+++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_backend.py
@@ -3,8 +3,9 @@
 from typing import ClassVar, Dict, Literal, Type
 
 from hdmf.container import DataIO
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 from pynwb import NWBFile
+from typing_extensions import Self
 
 from ._base_dataset_io import DatasetIOConfiguration
 from .._dataset_configuration import get_default_dataset_io_configurations
@@ -13,10 +14,11 @@
 class BackendConfiguration(BaseModel):
     """A model for matching collections of DatasetConfigurations to a specific backend."""
 
-    backend: ClassVar[Literal["hdf5", "zarr"]] = Field(
-        description="The name of the backend used to configure the NWBFile."
-    )
-    data_io_class: Type[DataIO] = Field(description="The DataIO class that is specific to this backend.")
+    backend: ClassVar[Literal["hdf5", "zarr"]]
+    data_io_class: ClassVar[Type[DataIO]]
+
+    model_config = ConfigDict(validate_assignment=True)  # Re-validate model on mutation
+
     dataset_configurations: Dict[str, DatasetIOConfiguration] = Field(
         description=(
             "A mapping from object locations (e.g. `acquisition/TestElectricalSeriesAP/data`) "
@@ -38,10 +40,10 @@ def __str__(self) -> str:
         return string
 
     @classmethod
-    def from_nwbfile(cls, nwbfile: NWBFile) -> "BackendConfiguration":
+    def from_nwbfile(cls, nwbfile: NWBFile) -> Self:
         default_dataset_configurations = get_default_dataset_io_configurations(nwbfile=nwbfile, backend=cls.backend)
         dataset_configurations = {
-            default_dataset_configuration.dataset_info.location_in_file: default_dataset_configuration
+            default_dataset_configuration.location_in_file: default_dataset_configuration
             for default_dataset_configuration in default_dataset_configurations
         }
 
diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py
index 0d4b842ac..9562fa83e 100644
--- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py
+++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py
@@ -11,13 +11,21 @@
 from hdmf import Container
 from hdmf.data_utils import GenericDataChunkIterator
 from hdmf.utils import get_data_shape
-from pydantic import BaseModel, Field, root_validator
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    InstanceOf,
+    PositiveInt,
+    model_validator,
+)
 from pynwb import NWBFile
+from typing_extensions import Self
 
 from ...hdmf import SliceableDataChunkIterator
 
 
-def _find_location_in_memory_nwbfile(current_location: str, neurodata_object: Container) -> str:
+def _recursively_find_location_in_memory_nwbfile(current_location: str, neurodata_object: Container) -> str:
     """
     Method for determining the location of a neurodata object within an in-memory NWBFile object.
 
@@ -31,11 +39,20 @@ def _find_location_in_memory_nwbfile(current_location: str, neurodata_object: Co
             if isinstance(parent_field_value, dict) and neurodata_object.name in parent_field_value:
                 return parent_field_name + "/" + neurodata_object.name + "/" + current_location
         return neurodata_object.name + "/" + current_location
-    return _find_location_in_memory_nwbfile(
+    return _recursively_find_location_in_memory_nwbfile(
         current_location=neurodata_object.name + "/" + current_location, neurodata_object=parent
     )
 
 
+def _find_location_in_memory_nwbfile(neurodata_object: Container, field_name: str) -> str:
+    """
+    More readable call for the recursive location finder for a field of a neurodata object in an in-memory NWBFile.
+
+    The recursive method forms from the buttom-up using the initial 'current_location' of the field itself.
+    """
+    return _recursively_find_location_in_memory_nwbfile(current_location=field_name, neurodata_object=neurodata_object)
+
+
 def _infer_dtype_of_list(list_: List[Union[int, float, list]]) -> np.dtype:
     """
     Attempt to infer the dtype of values in an arbitrarily sized and nested list.
@@ -67,105 +84,55 @@ def _infer_dtype(dataset: Union[h5py.Dataset, zarr.Array]) -> np.dtype:
     return data_type
 
 
-class DatasetInfo(BaseModel):
-    """A data model to represent immutable aspects of an object that will become a HDF5 or Zarr dataset on write."""
-
-    # TODO: When using Pydantic v2, replace with
-    # model_config = ConfigDict(allow_mutation=False)
-    class Config:  # noqa: D106
-        allow_mutation = False
-        arbitrary_types_allowed = True
-
-    object_id: str = Field(description="The UUID of the neurodata object containing the dataset.")
-    location_in_file: str = Field(  # TODO: in v2, use init_var=False or assign as a property
-        description="The relative location of the this dataset within the in-memory NWBFile. (e.g. 'acquisition/TestElectricalSeries/data')"
-    )
-    dataset_name: Literal["data", "timestamps"] = Field(description="The reference name of the dataset.")
-    dtype: np.dtype = Field(  # TODO: When using Pydantic v2, replace np.dtype with InstanceOf[np.dtype]
-        description="The data type of elements of this dataset."
-    )
-    full_shape: Tuple[int, ...] = Field(description="The maximum shape of the entire dataset.")
-
-    def __hash__(self):
-        """To allow instances of this class to be used as keys in dictionaries."""
-        return hash((type(self),) + tuple(self.__dict__.values()))
-
-    def __str__(self) -> str:
-        """
-        Not overriding __repr__ as this is intended to render only when wrapped in print().
-
-        Reason being two-fold; a standard `repr` is intended to be slightly more machine-readable / a more basic
-        representation of the true object state. But then also because an iterable of these objects, such as a
-        `List[DataSetInfo]`, would print out the nested representations, which only look good when using the basic
-        `repr` (that is, this fancy string print-out does not look good when nested in another container).
-        """
-        source_size_in_gb = math.prod(self.full_shape) * self.dtype.itemsize / 1e9
-
-        string = (
-            f"\n{self.location_in_file}"
-            f"\n{'-' * len(self.location_in_file)}"
-            f"\n  dtype : {self.dtype}"
-            f"\n  full shape of source array : {self.full_shape}"
-            f"\n  full size of source array : {source_size_in_gb:0.2f} GB"
-        )
-        return string
-
-    def __init__(self, **values):
-        location = values["location_in_file"]
-
-        # For more efficient / explicit reference downstream, instead of reparsing from location multiple times
-        dataset_name = location.split("/")[-1]
-        values.update(dataset_name=dataset_name)
-        super().__init__(**values)
-
-    @classmethod
-    def from_neurodata_object(cls, neurodata_object: Container, field_name: str) -> "DatasetInfo":
-        location_in_file = _find_location_in_memory_nwbfile(
-            current_location=field_name, neurodata_object=neurodata_object
-        )
-        candidate_dataset = getattr(neurodata_object, field_name)
-
-        full_shape = get_data_shape(data=candidate_dataset)
-        dtype = _infer_dtype(dataset=candidate_dataset)
-
-        return cls(
-            object_id=neurodata_object.object_id,
-            object_name=neurodata_object.name,
-            location_in_file=location_in_file,
-            full_shape=full_shape,
-            dtype=dtype,
-        )
-
-
 class DatasetIOConfiguration(BaseModel, ABC):
     """A data model for configuring options about an object that will become a HDF5 or Zarr Dataset in the file."""
 
-    # TODO: When using Pydantic v2, remove
-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(validate_assignment=True)  # Re-validate model on mutation
+
+    # Immutable fields about the dataset
+    object_id: str = Field(description="The UUID of the neurodata object containing the dataset.", frozen=True)
+    location_in_file: str = Field(
+        description=(
+            "The location of the this dataset within the in-memory NWBFile relative to the top-level root, "
+            "e.g. 'acquisition/ElectricalSeries/data'."
+        ),
+        frozen=True,
+    )
+    dataset_name: Literal["data", "timestamps"] = Field(description="The reference name of the dataset.", frozen=True)
+    dtype: InstanceOf[np.dtype] = Field(description="The data type of elements of this dataset.", frozen=True)
+    full_shape: Tuple[int, ...] = Field(description="The maximum shape of the entire dataset.", frozen=True)
 
-    dataset_info: DatasetInfo = Field(description="The immutable information about this dataset.")
-    chunk_shape: Tuple[int, ...] = Field(  # When using Pydantic v2.0, specify PositiveInt
+    # User specifiable fields
+    chunk_shape: Union[Tuple[PositiveInt, ...], None] = Field(
         description=(
             "The specified shape to use when chunking the dataset. "
             "For optimized streaming speeds, a total size of around 10 MB is recommended."
-        )
+        ),
     )
-    buffer_shape: Tuple[int, ...] = Field(
+    buffer_shape: Union[Tuple[int, ...], None] = Field(
         description=(
             "The specified shape to use when iteratively loading data into memory while writing the dataset. "
             "For optimized writing speeds and minimal RAM usage, a total size of around 1 GB is recommended."
-        )
+        ),
     )
-    # TODO: When using Pydantic v2, wrap h5py._hl.filters.FilterRefBase and numcodecs.abc.Codec with InstanceOf
-    compression_method: Union[str, h5py._hl.filters.FilterRefBase, numcodecs.abc.Codec, None] = Field(
-        default="gzip",
+    compression_method: Union[
+        str, InstanceOf[h5py._hl.filters.FilterRefBase], InstanceOf[numcodecs.abc.Codec], None
+    ] = Field(
         description="The specified compression method to apply to this dataset. Set to `None` to disable compression.",
     )
     compression_options: Union[Dict[str, Any], None] = Field(
         default=None, description="The optional parameters to use for the specified compression method."
     )
 
+    @abstractmethod
+    def get_data_io_kwargs(self) -> Dict[str, Any]:
+        """
+        Fetch the properly structured dictionary of input arguments.
+
+        Should be passed directly as dynamic keyword arguments (**kwargs) into a H5DataIO or ZarrDataIO.
+        """
+        raise NotImplementedError
+
     def __str__(self) -> str:
         """
         Not overriding __repr__ as this is intended to render only when wrapped in print().
@@ -175,15 +142,15 @@ def __str__(self) -> str:
         `List[DatasetConfiguration]`, would print out the nested representations, which only look good when using the
         basic `repr` (that is, this fancy string print-out does not look good when nested in another container).
         """
-        source_size_in_gb = math.prod(self.dataset_info.full_shape) * self.dataset_info.dtype.itemsize / 1e9
-        maximum_ram_usage_per_iteration_in_gb = math.prod(self.buffer_shape) * self.dataset_info.dtype.itemsize / 1e9
-        disk_space_usage_per_chunk_in_mb = math.prod(self.chunk_shape) * self.dataset_info.dtype.itemsize / 1e6
+        source_size_in_gb = math.prod(self.full_shape) * self.dtype.itemsize / 1e9
+        maximum_ram_usage_per_iteration_in_gb = math.prod(self.buffer_shape) * self.dtype.itemsize / 1e9
+        disk_space_usage_per_chunk_in_mb = math.prod(self.chunk_shape) * self.dtype.itemsize / 1e6
 
         string = (
-            f"\n{self.dataset_info.location_in_file}"
-            f"\n{'-' * len(self.dataset_info.location_in_file)}"
-            f"\n  dtype : {self.dataset_info.dtype}"
-            f"\n  full shape of source array : {self.dataset_info.full_shape}"
+            f"\n{self.location_in_file}"
+            f"\n{'-' * len(self.location_in_file)}"
+            f"\n  dtype : {self.dtype}"
+            f"\n  full shape of source array : {self.full_shape}"
             f"\n  full size of source array : {source_size_in_gb:0.2f} GB"
             # TODO: add nicer auto-selection/rendering of units and amount for source data size
             "\n"
@@ -204,12 +171,18 @@ def __str__(self) -> str:
 
         return string
 
-    @root_validator
+    @model_validator(mode="before")
     def validate_all_shapes(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        location_in_file = values["location_in_file"]
+        dataset_name = values["dataset_name"]
+
+        assert (
+            dataset_name == location_in_file.split("/")[-1]
+        ), f"The `dataset_name` ({dataset_name}) does not match the end of the `location_in_file` ({location_in_file})!"
+
         chunk_shape = values["chunk_shape"]
         buffer_shape = values["buffer_shape"]
-        full_shape = values["dataset_info"].full_shape
-        location_in_file = values["dataset_info"].location_in_file  # For more identifiable error messages.
+        full_shape = values["full_shape"]
 
         if len(chunk_shape) != len(buffer_shape):
             raise ValueError(
@@ -254,35 +227,51 @@ def validate_all_shapes(cls, values: Dict[str, Any]) -> Dict[str, Any]:
 
         return values
 
-    @abstractmethod
-    def get_data_io_kwargs(self) -> Dict[str, Any]:
+    @classmethod
+    def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Literal["data", "timestamps"]) -> Self:
         """
-        Fetch the properly structured dictionary of input arguments.
-
-        Should be passed directly as dynamic keyword arguments (**kwargs) into a H5DataIO or ZarrDataIO.
+        Construct an instance of a DatasetIOConfiguration for a dataset in a neurodata object in an NWBFile.
+
+        Parameters
+        ----------
+        neurodata_object : hdmf.Container
+            The neurodata object containing the field that will become a dataset when written to disk.
+        dataset_name : "data" or "timestamps"
+            The name of the field that will become a dataset when written to disk.
+            Some neurodata objects can have multiple such fields, such as `pynwb.TimeSeries` which can have both `data`
+            and `timestamps`, each of which can be configured separately.
         """
-        raise NotImplementedError
-
-    @classmethod
-    def from_neurodata_object(cls, neurodata_object: Container, field_name: str) -> "DatasetIOConfiguration":
-        candidate_dataset = getattr(neurodata_object, field_name)
-
-        dataset_info = DatasetInfo.from_neurodata_object(neurodata_object=neurodata_object, field_name=field_name)
+        location_in_file = _find_location_in_memory_nwbfile(neurodata_object=neurodata_object, field_name=dataset_name)
 
-        dtype = dataset_info.dtype
-        full_shape = dataset_info.full_shape
+        candidate_dataset = getattr(neurodata_object, dataset_name)
+        full_shape = get_data_shape(data=candidate_dataset)
+        dtype = _infer_dtype(dataset=candidate_dataset)
 
         if isinstance(candidate_dataset, GenericDataChunkIterator):
             chunk_shape = candidate_dataset.chunk_shape
             buffer_shape = candidate_dataset.buffer_shape
-        elif dtype != "unknown":
+            compression_method = "gzip"
+        elif dtype != np.dtype("object"):
             chunk_shape = SliceableDataChunkIterator.estimate_default_chunk_shape(
                 chunk_mb=10.0, maxshape=full_shape, dtype=np.dtype(dtype)
             )
             buffer_shape = SliceableDataChunkIterator.estimate_default_buffer_shape(
                 buffer_gb=0.5, chunk_shape=chunk_shape, maxshape=full_shape, dtype=np.dtype(dtype)
             )
-        else:
-            pass
+            compression_method = "gzip"
+        elif dtype == np.dtype("object"):  # Unclear what default chunking/compression should be for compound objects
+            chunk_shape = None
+            buffer_shape = None
+            compression_method = None
 
-        return cls(dataset_info=dataset_info, chunk_shape=chunk_shape, buffer_shape=buffer_shape)
+        return cls(
+            object_id=neurodata_object.object_id,
+            object_name=neurodata_object.name,
+            location_in_file=location_in_file,
+            dataset_name=dataset_name,
+            full_shape=full_shape,
+            dtype=dtype,
+            chunk_shape=chunk_shape,
+            buffer_shape=buffer_shape,
+            compression_method=compression_method,
+        )
diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_hdf5_backend.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_hdf5_backend.py
index 6d199c237..2949e3bcb 100644
--- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_hdf5_backend.py
+++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_hdf5_backend.py
@@ -13,12 +13,8 @@ class HDF5BackendConfiguration(BackendConfiguration):
     """A model for matching collections of DatasetConfigurations specific to the HDF5 backend."""
 
     backend: ClassVar[Literal["hdf5"]] = "hdf5"
-    # Field(  # TODO: in pydantic v2 use property instead of class attribute
-    #     default="hdf5", description="The name of the backend used to configure the NWBFile."
-    # )
-    data_io_class: Type[H5DataIO] = Field(  # TODO: in pydantic v2 use property instead of class attribute
-        default=H5DataIO, description="The DataIO class that is specific to HDF5."
-    )
+    data_io_class: ClassVar[Type[H5DataIO]] = H5DataIO
+
     dataset_configurations: Dict[str, HDF5DatasetIOConfiguration] = Field(
         description=(
             "A mapping from object locations to their HDF5DatasetConfiguration specification that contains all "
diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_hdf5_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_hdf5_dataset_io.py
index e256c8c3b..828a37998 100644
--- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_hdf5_dataset_io.py
+++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_hdf5_dataset_io.py
@@ -3,7 +3,7 @@
 from typing import Any, Dict, Literal, Union
 
 import h5py
-from pydantic import Field
+from pydantic import Field, InstanceOf
 
 from ._base_dataset_io import DatasetIOConfiguration
 from ...importing import is_package_installed
@@ -32,13 +32,8 @@
 class HDF5DatasetIOConfiguration(DatasetIOConfiguration):
     """A data model for configuring options about an object that will become a HDF5 Dataset in the file."""
 
-    # TODO: When using Pydantic v2, replace with `model_config = ConfigDict(...)`
-    class Config:
-        arbitrary_types_allowed = True
-        validate_assignment = True
-
     compression_method: Union[
-        Literal[tuple(AVAILABLE_HDF5_COMPRESSION_METHODS.keys())], h5py._hl.filters.FilterRefBase, None
+        Literal[tuple(AVAILABLE_HDF5_COMPRESSION_METHODS.keys())], InstanceOf[h5py._hl.filters.FilterRefBase], None
     ] = Field(
         default="gzip",
         description=(
diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_zarr_backend.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_zarr_backend.py
index 1c17a77a3..7f43a1299 100644
--- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_zarr_backend.py
+++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_zarr_backend.py
@@ -14,12 +14,8 @@ class ZarrBackendConfiguration(BackendConfiguration):
     """A model for matching collections of DatasetConfigurations specific to the Zarr backend."""
 
     backend: ClassVar[Literal["zarr"]] = "zarr"
-    # Field( # TODO: in pydantic v2 use property instead of class attribute
-    #     default="zarr", description="The name of the backend used to configure the NWBFile."
-    # )
-    data_io_class: Type[ZarrDataIO] = Field(  # TODO: in pydantic v2 use property instead of class attribute
-        default=ZarrDataIO, description="The DataIO class that is specific to Zarr."
-    )
+    data_io_class: ClassVar[Type[ZarrDataIO]] = ZarrDataIO
+
     dataset_configurations: Dict[str, ZarrDatasetIOConfiguration] = Field(
         description=(
             "A mapping from object locations to their ZarrDatasetConfiguration specification that contains all "
diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_zarr_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_zarr_dataset_io.py
index 10e3951ce..c070a20e9 100644
--- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_zarr_dataset_io.py
+++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_zarr_dataset_io.py
@@ -4,7 +4,7 @@
 
 import numcodecs
 import zarr
-from pydantic import Field, root_validator
+from pydantic import Field, InstanceOf, model_validator
 
 from ._base_dataset_io import DatasetIOConfiguration
 
@@ -45,21 +45,16 @@
 class ZarrDatasetIOConfiguration(DatasetIOConfiguration):
     """A data model for configuring options about an object that will become a Zarr Dataset in the file."""
 
-    # TODO: When using Pydantic v2, replace with `model_config = ConfigDict(...)`
-    class Config:
-        arbitrary_types_allowed = True
-        validate_assignment = True
-
-    compression_method: Union[Literal[tuple(AVAILABLE_ZARR_COMPRESSION_METHODS.keys())], numcodecs.abc.Codec, None] = (
-        Field(
-            default="gzip",  # TODO: would like this to be 'auto'
-            description=(
-                "The specified compression method to apply to this dataset. "
-                "Can be either a string that matches an available method on your system, "
-                "or an instantiated numcodec.Codec object."
-                "Set to `None` to disable compression."
-            ),
-        )
+    compression_method: Union[
+        Literal[tuple(AVAILABLE_ZARR_COMPRESSION_METHODS.keys())], InstanceOf[numcodecs.abc.Codec], None
+    ] = Field(
+        default="gzip",  # TODO: would like this to be 'auto'
+        description=(
+            "The specified compression method to apply to this dataset. "
+            "Can be either a string that matches an available method on your system, "
+            "or an instantiated numcodec.Codec object."
+            "Set to `None` to disable compression."
+        ),
     )
     # TODO: actually provide better schematic rendering of options. Only support defaults in GUIDE for now.
     # Looks like they'll have to be hand-typed however... Can try parsing the numpy docstrings - no annotation typing.
@@ -67,7 +62,7 @@ class Config:
         default=None, description="The optional parameters to use for the specified compression method."
     )
     filter_methods: Union[
-        List[Union[Literal[tuple(AVAILABLE_ZARR_COMPRESSION_METHODS.keys())], numcodecs.abc.Codec]], None
+        List[Union[Literal[tuple(AVAILABLE_ZARR_COMPRESSION_METHODS.keys())], InstanceOf[numcodecs.abc.Codec]]], None
     ] = Field(
         default=None,
         description=(
@@ -81,7 +76,7 @@ class Config:
         default=None, description="The optional parameters to use for each specified filter method."
     )
 
-    def __str__(self) -> str:
+    def __str__(self) -> str:  # Inherited docstring from parent. noqa: D105
         string = super().__str__()
         if self.filter_methods is not None:
             string += f"\n  filter methods : {self.filter_methods}"
@@ -92,10 +87,10 @@ def __str__(self) -> str:
 
         return string
 
-    @root_validator
+    @model_validator(mode="before")
     def validate_filter_methods_and_options_length_match(cls, values: Dict[str, Any]):
-        filter_methods = values["filter_methods"]
-        filter_options = values["filter_options"]
+        filter_methods = values.get("filter_methods", None)
+        filter_options = values.get("filter_options", None)
 
         if filter_methods is None and filter_options is not None:
             raise ValueError(
diff --git a/src/neuroconv/tools/nwb_helpers/_configure_backend.py b/src/neuroconv/tools/nwb_helpers/_configure_backend.py
index a2dcaec69..6bc055a04 100644
--- a/src/neuroconv/tools/nwb_helpers/_configure_backend.py
+++ b/src/neuroconv/tools/nwb_helpers/_configure_backend.py
@@ -17,8 +17,8 @@ def configure_backend(
 
     data_io_class = backend_configuration.data_io_class
     for dataset_configuration in backend_configuration.dataset_configurations.values():
-        object_id = dataset_configuration.dataset_info.object_id
-        dataset_name = dataset_configuration.dataset_info.dataset_name
+        object_id = dataset_configuration.object_id
+        dataset_name = dataset_configuration.dataset_name
         data_io_kwargs = dataset_configuration.get_data_io_kwargs()
 
         # TODO: update buffer shape in iterator, if present
@@ -30,7 +30,9 @@ def configure_backend(
             nwbfile_object.set_data_io(data_io_class=data_io_class, data_io_kwargs=data_io_kwargs)
         # TimeSeries data or timestamps
         elif isinstance(nwbfile_object, TimeSeries) and not is_dataset_linked:
-            nwbfile_object.set_data_io(dataset_name=dataset_name, data_io_class=data_io_class, **data_io_kwargs)
+            nwbfile_object.set_data_io(
+                dataset_name=dataset_name, data_io_class=data_io_class, data_io_kwargs=data_io_kwargs
+            )
         # Skip the setting of a DataIO when target dataset is a link (assume it will be found in parent)
         elif isinstance(nwbfile_object, TimeSeries) and is_dataset_linked:
             continue
diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
index 60801b9c2..51f414244 100644
--- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -114,7 +114,7 @@ def get_default_dataset_io_configurations(
                     continue
 
                 dataset_io_configuration = DatasetIOConfigurationClass.from_neurodata_object(
-                    neurodata_object=column, field_name="data"
+                    neurodata_object=column, dataset_name="data"
                 )
 
                 yield dataset_io_configuration
@@ -123,11 +123,11 @@ def get_default_dataset_io_configurations(
             # The most common example of this is ndx-events Events/LabeledEvents types
             time_series = neurodata_object  # for readability
 
-            for field_name in ("data", "timestamps"):
-                if field_name not in time_series.fields:  # timestamps is optional
+            for dataset_name in ("data", "timestamps"):
+                if dataset_name not in time_series.fields:  # timestamps is optional
                     continue
 
-                candidate_dataset = getattr(time_series, field_name)
+                candidate_dataset = getattr(time_series, dataset_name)
                 if _is_dataset_written_to_file(
                     candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file
                 ):
@@ -142,7 +142,7 @@ def get_default_dataset_io_configurations(
                     continue  # skip
 
                 dataset_io_configuration = DatasetIOConfigurationClass.from_neurodata_object(
-                    neurodata_object=time_series, field_name=field_name
+                    neurodata_object=time_series, dataset_name=dataset_name
                 )
 
                 yield dataset_io_configuration
diff --git a/src/neuroconv/tools/testing/__init__.py b/src/neuroconv/tools/testing/__init__.py
index 2d5b06497..7179a7544 100644
--- a/src/neuroconv/tools/testing/__init__.py
+++ b/src/neuroconv/tools/testing/__init__.py
@@ -1,5 +1,4 @@
 from ._mock._mock_dataset_models import (
-    mock_DatasetInfo,
     mock_HDF5BackendConfiguration,
     mock_HDF5DatasetIOConfiguration,
     mock_ZarrBackendConfiguration,
diff --git a/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py b/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py
index 6d23e0af3..4e16740d9 100644
--- a/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py
+++ b/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py
@@ -7,7 +7,6 @@
 from ...nwb_helpers import (
     AVAILABLE_HDF5_COMPRESSION_METHODS,
     AVAILABLE_ZARR_COMPRESSION_METHODS,
-    DatasetInfo,
     HDF5BackendConfiguration,
     HDF5DatasetIOConfiguration,
     ZarrBackendConfiguration,
@@ -15,22 +14,14 @@
 )
 
 
-def mock_DatasetInfo(
+def mock_HDF5DatasetIOConfiguration(
     object_id: str = "481a0860-3a0c-40ec-b931-df4a3e9b101f",
     location_in_file: str = "acquisition/TestElectricalSeries/data",
+    dataset_name: Literal["data", "timestamps"] = "data",
     full_shape: Tuple[int, ...] = (60 * 30_000, 384),  # ~1 minute of v1 NeuroPixels probe
-    dtype=np.dtype("int16"),
-) -> DatasetInfo:
-    """Mock instance of a DatasetInfo with NeuroPixel-like values to showcase chunk/buffer recommendations."""
-    return DatasetInfo(
-        object_id=object_id,
-        location_in_file=location_in_file,
-        full_shape=full_shape,
-        dtype=dtype,
-    )
-
-
-def mock_HDF5DatasetIOConfiguration(
+    dtype: np.dtype = np.dtype("int16"),
+    chunk_shape: Tuple[int, ...] = (78_125, 64),  # ~10 MB
+    buffer_shape: Tuple[int, ...] = (1_250_000, 384),  # ~1 GB
     compression_method: Union[
         Literal[tuple(AVAILABLE_HDF5_COMPRESSION_METHODS.keys())], h5py._hl.filters.FilterRefBase, None
     ] = "gzip",
@@ -38,15 +29,26 @@ def mock_HDF5DatasetIOConfiguration(
 ) -> HDF5DatasetIOConfiguration:
     """Mock object of a HDF5DatasetIOConfiguration with NeuroPixel-like values to show chunk/buffer recommendations."""
     return HDF5DatasetIOConfiguration(
-        dataset_info=mock_DatasetInfo(),
-        chunk_shape=(78_125, 64),  # ~10 MB
-        buffer_shape=(1_250_000, 384),  # ~1 GB
+        object_id=object_id,
+        location_in_file=location_in_file,
+        dataset_name=dataset_name,
+        full_shape=full_shape,
+        dtype=dtype,
+        chunk_shape=chunk_shape,
+        buffer_shape=buffer_shape,
         compression_method=compression_method,
         compression_options=compression_options,
     )
 
 
 def mock_ZarrDatasetIOConfiguration(
+    object_id: str = "481a0860-3a0c-40ec-b931-df4a3e9b101f",
+    location_in_file: str = "acquisition/TestElectricalSeries/data",
+    dataset_name: Literal["data", "timestamps"] = "data",
+    full_shape: Tuple[int, ...] = (60 * 30_000, 384),  # ~1 minute of v1 NeuroPixels probe
+    dtype: np.dtype = np.dtype("int16"),
+    chunk_shape: Tuple[int, ...] = (78_125, 64),  # ~10 MB
+    buffer_shape: Tuple[int, ...] = (1_250_000, 384),  # ~1 GB
     compression_method: Union[
         Literal[tuple(AVAILABLE_ZARR_COMPRESSION_METHODS.keys())], numcodecs.abc.Codec, None
     ] = "gzip",
@@ -58,9 +60,13 @@ def mock_ZarrDatasetIOConfiguration(
 ) -> ZarrDatasetIOConfiguration:
     """Mock object of a ZarrDatasetIOConfiguration with NeuroPixel-like values to show chunk/buffer recommendations."""
     return ZarrDatasetIOConfiguration(
-        dataset_info=mock_DatasetInfo(),
-        chunk_shape=(78_125, 64),  # ~10 MB
-        buffer_shape=(1_250_000, 384),  # ~1 GB
+        object_id=object_id,
+        location_in_file=location_in_file,
+        dataset_name=dataset_name,
+        full_shape=full_shape,
+        dtype=dtype,
+        chunk_shape=chunk_shape,
+        buffer_shape=buffer_shape,
         compression_method=compression_method,
         compression_options=compression_options,
         filter_methods=filter_methods,
@@ -70,18 +76,15 @@ def mock_ZarrDatasetIOConfiguration(
 
 def mock_HDF5BackendConfiguration() -> HDF5BackendConfiguration:
     """Mock instance of a HDF5BackendConfiguration with two NeuroPixel-like datasets."""
-    dataset_configurations = {
-        "acquisition/TestElectricalSeriesAP/data": HDF5DatasetIOConfiguration(
-            dataset_info=mock_DatasetInfo(location_in_file="acquisition/TestElectricalSeriesAP/data"),
-            chunk_shape=(78_125, 64),  # ~10 MB
-            buffer_shape=(1_250_000, 384),  # ~1 GB
+    dataset_configurations: Dict[str, HDF5DatasetIOConfiguration] = {
+        "acquisition/TestElectricalSeriesAP/data": mock_HDF5DatasetIOConfiguration(
+            location_in_file="acquisition/TestElectricalSeriesAP/data", dataset_name="data"
         ),
-        "acquisition/TestElectricalSeriesLF/data": HDF5DatasetIOConfiguration(
-            dataset_info=mock_DatasetInfo(
-                object_id="bc37e164-519f-4b65-a976-206440f1d325",
-                location_in_file="acquisition/TestElectricalSeriesLF/data",
-                full_shape=(75_000, 384),
-            ),
+        "acquisition/TestElectricalSeriesLF/data": mock_HDF5DatasetIOConfiguration(
+            object_id="bc37e164-519f-4b65-a976-206440f1d325",
+            location_in_file="acquisition/TestElectricalSeriesLF/data",
+            dataset_name="data",
+            full_shape=(75_000, 384),
             chunk_shape=(37_500, 128),  # ~10 MB
             buffer_shape=(75_000, 384),
         ),
@@ -92,19 +95,17 @@ def mock_HDF5BackendConfiguration() -> HDF5BackendConfiguration:
 
 def mock_ZarrBackendConfiguration() -> ZarrBackendConfiguration:
     """Mock instance of a HDF5BackendConfiguration with several NeuroPixel-like datasets."""
-    dataset_configurations = {
-        "acquisition/TestElectricalSeriesAP/data": ZarrDatasetIOConfiguration(
-            dataset_info=mock_DatasetInfo(location_in_file="acquisition/TestElectricalSeriesAP/data"),
-            chunk_shape=(78_125, 64),
-            buffer_shape=(1_250_000, 384),  # ~1 GB
+    dataset_configurations: Dict[str, ZarrDatasetIOConfiguration] = {
+        "acquisition/TestElectricalSeriesAP/data": mock_ZarrDatasetIOConfiguration(
+            location_in_file="acquisition/TestElectricalSeriesAP/data",
+            dataset_name="data",
             filter_methods=["delta"],
         ),
-        "acquisition/TestElectricalSeriesLF/data": ZarrDatasetIOConfiguration(
-            dataset_info=mock_DatasetInfo(
-                object_id="bc37e164-519f-4b65-a976-206440f1d325",
-                location_in_file="acquisition/TestElectricalSeriesLF/data",
-                full_shape=(75_000, 384),
-            ),
+        "acquisition/TestElectricalSeriesLF/data": mock_ZarrDatasetIOConfiguration(
+            object_id="bc37e164-519f-4b65-a976-206440f1d325",
+            location_in_file="acquisition/TestElectricalSeriesLF/data",
+            dataset_name="data",
+            full_shape=(75_000, 384),
             chunk_shape=(37_500, 128),  # ~10 MB
             buffer_shape=(75_000, 384),
             filter_methods=["delta"],
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py
index a83076e98..0a6e56695 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py
@@ -9,183 +9,131 @@
     ZarrDatasetIOConfiguration,
 )
 from neuroconv.tools.testing import (
-    mock_DatasetInfo,
     mock_HDF5DatasetIOConfiguration,
     mock_ZarrDatasetIOConfiguration,
 )
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
+    argnames="dataset_configuration_class", argvalues=[mock_HDF5DatasetIOConfiguration, mock_ZarrDatasetIOConfiguration]
 )
 def test_validator_chunk_length_consistency(
     dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
-        dataset_configuration_class(
-            dataset_info=mock_DatasetInfo(),
-            chunk_shape=(78_125, 64, 1),
-            buffer_shape=(1_250_000, 384),
-        )
+        dataset_configuration_class(chunk_shape=(78_125, 64, 1), buffer_shape=(1_250_000, 384))
 
     expected_error = (
         "len(chunk_shape)=3 does not match len(buffer_shape)=2 for dataset at location "
-        "'acquisition/TestElectricalSeries/data'! (type=value_error)"
+        "'acquisition/TestElectricalSeries/data'! [type=value_error, "
     )
     assert expected_error in str(error_info.value)
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
+    argnames="dataset_configuration_class", argvalues=[mock_HDF5DatasetIOConfiguration, mock_ZarrDatasetIOConfiguration]
 )
 def test_validator_chunk_and_buffer_length_consistency(
     dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
-        dataset_configuration_class(
-            dataset_info=mock_DatasetInfo(),
-            chunk_shape=(78_125, 64, 1),
-            buffer_shape=(1_250_000, 384, 1),
-        )
+        dataset_configuration_class(chunk_shape=(78_125, 64, 1), buffer_shape=(1_250_000, 384, 1))
 
     expected_error = (
         "len(buffer_shape)=3 does not match len(full_shape)=2 for dataset at location "
-        "'acquisition/TestElectricalSeries/data'! (type=value_error)"
+        "'acquisition/TestElectricalSeries/data'! [type=value_error, "
     )
     assert expected_error in str(error_info.value)
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
+    argnames="dataset_configuration_class", argvalues=[mock_HDF5DatasetIOConfiguration, mock_ZarrDatasetIOConfiguration]
 )
 def test_validator_chunk_shape_nonpositive_elements(
     dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
-        dataset_configuration_class(
-            dataset_info=mock_DatasetInfo(),
-            chunk_shape=(1, -2),
-            buffer_shape=(1_250_000, 384),
-        )
+        dataset_configuration_class(chunk_shape=(1, -2), buffer_shape=(1_250_000, 384))
 
     expected_error = (
         "Some dimensions of the chunk_shape=(1, -2) are less than or equal to zero for dataset at "
-        "location 'acquisition/TestElectricalSeries/data'! (type=value_error)"
+        "location 'acquisition/TestElectricalSeries/data'! [type=value_error, "
     )
     assert expected_error in str(error_info.value)
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
+    argnames="dataset_configuration_class", argvalues=[mock_HDF5DatasetIOConfiguration, mock_ZarrDatasetIOConfiguration]
 )
 def test_validator_buffer_shape_nonpositive_elements(
     dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
-        dataset_configuration_class(
-            dataset_info=mock_DatasetInfo(),
-            chunk_shape=(78_125, 64),
-            buffer_shape=(78_125, -2),
-        )
+        dataset_configuration_class(chunk_shape=(78_125, 64), buffer_shape=(78_125, -2))
 
     expected_error = (
         "Some dimensions of the buffer_shape=(78125, -2) are less than or equal to zero for dataset at "
-        "location 'acquisition/TestElectricalSeries/data'! (type=value_error)"
+        "location 'acquisition/TestElectricalSeries/data'! [type=value_error, "
     )
     assert expected_error in str(error_info.value)
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
+    argnames="dataset_configuration_class", argvalues=[mock_HDF5DatasetIOConfiguration, mock_ZarrDatasetIOConfiguration]
 )
 def test_validator_chunk_shape_exceeds_buffer_shape(
     dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
-        dataset_configuration_class(
-            dataset_info=mock_DatasetInfo(),
-            chunk_shape=(78_126, 64),
-            buffer_shape=(78_125, 384),
-        )
+        dataset_configuration_class(chunk_shape=(78_126, 64), buffer_shape=(78_125, 384))
 
     expected_error = (
         "Some dimensions of the chunk_shape=(78126, 64) exceed the buffer_shape=(78125, 384) for dataset at location "
-        "'acquisition/TestElectricalSeries/data'! (type=value_error)"
+        "'acquisition/TestElectricalSeries/data'! [type=value_error, "
     )
     assert expected_error in str(error_info.value)
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
+    argnames="dataset_configuration_class", argvalues=[mock_HDF5DatasetIOConfiguration, mock_ZarrDatasetIOConfiguration]
 )
 def test_validator_buffer_shape_exceeds_full_shape(
     dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
-        dataset_configuration_class(
-            dataset_info=mock_DatasetInfo(),
-            chunk_shape=(78_125, 64),
-            buffer_shape=(1_250_000, 385),
-        )
+        dataset_configuration_class(chunk_shape=(78_125, 64), buffer_shape=(1_250_000, 385))
 
     expected_error = (
         "Some dimensions of the buffer_shape=(1250000, 385) exceed the full_shape=(1800000, 384) for "
-        "dataset at location 'acquisition/TestElectricalSeries/data'! (type=value_error)"
+        "dataset at location 'acquisition/TestElectricalSeries/data'! [type=value_error, "
     )
     assert expected_error in str(error_info.value)
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
+    argnames="dataset_configuration_class", argvalues=[mock_HDF5DatasetIOConfiguration, mock_ZarrDatasetIOConfiguration]
 )
 def test_validator_chunk_dimensions_do_not_evenly_divide_buffer(
     dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     with pytest.raises(ValueError) as error_info:
         dataset_configuration_class(
-            dataset_info=mock_DatasetInfo(),
             chunk_shape=(78_125, 7),
             buffer_shape=(1_250_000, 383),  # Different trigger condition when not the full shape for an axis
         )
 
     expected_error = (
         "Some dimensions of the chunk_shape=(78125, 7) do not evenly divide the buffer_shape=(1250000, 383) for "
-        "dataset at location 'acquisition/TestElectricalSeries/data'! (type=value_error)"
+        "dataset at location 'acquisition/TestElectricalSeries/data'! [type=value_error, "
     )
     assert expected_error in str(error_info.value)
 
 
 @pytest.mark.parametrize(
-    argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
+    argnames="dataset_configuration_class", argvalues=[mock_HDF5DatasetIOConfiguration, mock_ZarrDatasetIOConfiguration]
 )
 def test_validator_chunk_dimensions_do_not_evenly_divide_buffer_skip_full_shape(
     dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration]
 ):
     """Any divisibility is allowed when the buffer shape is capped at the full length of an axis."""
-    dataset_configuration_class(
-        dataset_info=mock_DatasetInfo(),
-        chunk_shape=(78_125, 7),
-        buffer_shape=(1_250_000, 384),
-    )
-
-
-@pytest.mark.parametrize(
-    argnames="mock_dataset_configuration",
-    argvalues=[mock_HDF5DatasetIOConfiguration(), mock_ZarrDatasetIOConfiguration()],
-)
-def test_mutation_validation(
-    mock_dataset_configuration: Union[mock_HDF5DatasetIOConfiguration, mock_ZarrDatasetIOConfiguration]
-):
-    """
-    Only testing on one dummy case to verify the root validator is triggered.
-
-    Trust the rest should follow.
-    """
-    with pytest.raises(ValueError) as error_info:
-        mock_dataset_configuration.chunk_shape = (1, -2)
-
-    expected_error = (
-        "Some dimensions of the chunk_shape=(1, -2) are less than or equal to zero for dataset at "
-        "location 'acquisition/TestElectricalSeries/data'! (type=value_error)"
-    )
-    assert expected_error in str(error_info.value)
+    dataset_configuration_class(chunk_shape=(78_125, 7), buffer_shape=(1_250_000, 384))
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
index 768ceb634..28a4bccbd 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py
@@ -39,10 +39,10 @@ def test_configuration_on_time_series(iterator: callable, backend: Literal["hdf5
 
     dataset_configuration = dataset_configurations[0]
     assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
-    assert dataset_configuration.dataset_info.object_id == time_series.object_id
-    assert dataset_configuration.dataset_info.location_in_file == "acquisition/TestTimeSeries/data"
-    assert dataset_configuration.dataset_info.full_shape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.object_id == time_series.object_id
+    assert dataset_configuration.location_in_file == "acquisition/TestTimeSeries/data"
+    assert dataset_configuration.full_shape == array.shape
+    assert dataset_configuration.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
     assert dataset_configuration.compression_method == "gzip"
@@ -81,10 +81,10 @@ def test_configuration_on_dynamic_table(iterator: callable, backend: Literal["hd
 
     dataset_configuration = dataset_configurations[0]
     assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
-    assert dataset_configuration.dataset_info.object_id == column.object_id
-    assert dataset_configuration.dataset_info.location_in_file == "acquisition/TestDynamicTable/TestColumn/data"
-    assert dataset_configuration.dataset_info.full_shape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.object_id == column.object_id
+    assert dataset_configuration.location_in_file == "acquisition/TestDynamicTable/TestColumn/data"
+    assert dataset_configuration.full_shape == array.shape
+    assert dataset_configuration.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
     assert dataset_configuration.compression_method == "gzip"
@@ -117,11 +117,11 @@ def test_configuration_on_ragged_units_table(backend: Literal["hdf5", "zarr"]):
     dataset_configuration = next(
         dataset_configuration
         for dataset_configuration in dataset_configurations
-        if dataset_configuration.dataset_info.location_in_file == "units/spike_times/data"
+        if dataset_configuration.location_in_file == "units/spike_times/data"
     )
     assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
-    assert dataset_configuration.dataset_info.full_shape == (5,)
-    assert dataset_configuration.dataset_info.dtype == np.dtype("float64")
+    assert dataset_configuration.full_shape == (5,)
+    assert dataset_configuration.dtype == np.dtype("float64")
     assert dataset_configuration.chunk_shape == (5,)
     assert dataset_configuration.buffer_shape == (5,)
     assert dataset_configuration.compression_method == "gzip"
@@ -134,11 +134,11 @@ def test_configuration_on_ragged_units_table(backend: Literal["hdf5", "zarr"]):
     dataset_configuration = next(
         dataset_configuration
         for dataset_configuration in dataset_configurations
-        if dataset_configuration.dataset_info.location_in_file == "units/spike_times_index/data"
+        if dataset_configuration.location_in_file == "units/spike_times_index/data"
     )
     assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
-    assert dataset_configuration.dataset_info.full_shape == (2,)
-    assert dataset_configuration.dataset_info.dtype == np.dtype("uint8")
+    assert dataset_configuration.full_shape == (2,)
+    assert dataset_configuration.dtype == np.dtype("uint8")
     assert dataset_configuration.chunk_shape == (2,)
     assert dataset_configuration.buffer_shape == (2,)
     assert dataset_configuration.compression_method == "gzip"
@@ -151,11 +151,11 @@ def test_configuration_on_ragged_units_table(backend: Literal["hdf5", "zarr"]):
     dataset_configuration = next(
         dataset_configuration
         for dataset_configuration in dataset_configurations
-        if dataset_configuration.dataset_info.location_in_file == "units/waveforms/data"
+        if dataset_configuration.location_in_file == "units/waveforms/data"
     )
     assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
-    assert dataset_configuration.dataset_info.full_shape == (12, 3)
-    assert dataset_configuration.dataset_info.dtype == np.dtype("int32")
+    assert dataset_configuration.full_shape == (12, 3)
+    assert dataset_configuration.dtype == np.dtype("int32")
     assert dataset_configuration.chunk_shape == (12, 3)
     assert dataset_configuration.buffer_shape == (12, 3)
     assert dataset_configuration.compression_method == "gzip"
@@ -168,11 +168,11 @@ def test_configuration_on_ragged_units_table(backend: Literal["hdf5", "zarr"]):
     dataset_configuration = next(
         dataset_configuration
         for dataset_configuration in dataset_configurations
-        if dataset_configuration.dataset_info.location_in_file == "units/waveforms_index/data"
+        if dataset_configuration.location_in_file == "units/waveforms_index/data"
     )
     assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
-    assert dataset_configuration.dataset_info.full_shape == (4,)
-    assert dataset_configuration.dataset_info.dtype == np.dtype("uint8")
+    assert dataset_configuration.full_shape == (4,)
+    assert dataset_configuration.dtype == np.dtype("uint8")
     assert dataset_configuration.chunk_shape == (4,)
     assert dataset_configuration.buffer_shape == (4,)
     assert dataset_configuration.compression_method == "gzip"
@@ -185,11 +185,11 @@ def test_configuration_on_ragged_units_table(backend: Literal["hdf5", "zarr"]):
     dataset_configuration = next(
         dataset_configuration
         for dataset_configuration in dataset_configurations
-        if dataset_configuration.dataset_info.location_in_file == "units/waveforms_index_index/data"
+        if dataset_configuration.location_in_file == "units/waveforms_index_index/data"
     )
     assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
-    assert dataset_configuration.dataset_info.full_shape == (2,)
-    assert dataset_configuration.dataset_info.dtype == np.dtype("uint8")
+    assert dataset_configuration.full_shape == (2,)
+    assert dataset_configuration.dtype == np.dtype("uint8")
     assert dataset_configuration.chunk_shape == (2,)
     assert dataset_configuration.buffer_shape == (2,)
     assert dataset_configuration.compression_method == "gzip"
@@ -218,13 +218,10 @@ def test_configuration_on_compass_direction(iterator: callable, backend: Literal
 
     dataset_configuration = dataset_configurations[0]
     assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
-    assert dataset_configuration.dataset_info.object_id == spatial_series.object_id
-    assert (
-        dataset_configuration.dataset_info.location_in_file
-        == "processing/behavior/TestCompassDirection/TestSpatialSeries/data"
-    )
-    assert dataset_configuration.dataset_info.full_shape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.object_id == spatial_series.object_id
+    assert dataset_configuration.location_in_file == "processing/behavior/TestCompassDirection/TestSpatialSeries/data"
+    assert dataset_configuration.full_shape == array.shape
+    assert dataset_configuration.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
     assert dataset_configuration.compression_method == "gzip"
@@ -267,13 +264,13 @@ def test_configuration_on_ndx_events(backend: Literal["hdf5", "zarr"]):
     data_dataset_configuration = next(
         dataset_configuration
         for dataset_configuration in dataset_configurations
-        if dataset_configuration.dataset_info.dataset_name == "data"
+        if dataset_configuration.dataset_name == "data"
     )
     assert isinstance(data_dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
-    assert data_dataset_configuration.dataset_info.object_id == labeled_events.object_id
-    assert data_dataset_configuration.dataset_info.location_in_file == "processing/behavior/TestLabeledEvents/data"
-    assert data_dataset_configuration.dataset_info.full_shape == data.shape
-    assert data_dataset_configuration.dataset_info.dtype == data.dtype
+    assert data_dataset_configuration.object_id == labeled_events.object_id
+    assert data_dataset_configuration.location_in_file == "processing/behavior/TestLabeledEvents/data"
+    assert data_dataset_configuration.full_shape == data.shape
+    assert data_dataset_configuration.dtype == data.dtype
     assert data_dataset_configuration.chunk_shape == data.shape
     assert data_dataset_configuration.buffer_shape == data.shape
     assert data_dataset_configuration.compression_method == "gzip"
@@ -286,16 +283,13 @@ def test_configuration_on_ndx_events(backend: Literal["hdf5", "zarr"]):
     timestamps_dataset_configuration = next(
         dataset_configuration
         for dataset_configuration in dataset_configurations
-        if dataset_configuration.dataset_info.dataset_name == "timestamps"
+        if dataset_configuration.dataset_name == "timestamps"
     )
     assert isinstance(timestamps_dataset_configuration, DATASET_IO_CONFIGURATIONS[backend])
-    assert timestamps_dataset_configuration.dataset_info.object_id == labeled_events.object_id
-    assert (
-        timestamps_dataset_configuration.dataset_info.location_in_file
-        == "processing/behavior/TestLabeledEvents/timestamps"
-    )
-    assert timestamps_dataset_configuration.dataset_info.full_shape == timestamps.shape
-    assert timestamps_dataset_configuration.dataset_info.dtype == timestamps.dtype
+    assert timestamps_dataset_configuration.object_id == labeled_events.object_id
+    assert timestamps_dataset_configuration.location_in_file == "processing/behavior/TestLabeledEvents/timestamps"
+    assert timestamps_dataset_configuration.full_shape == timestamps.shape
+    assert timestamps_dataset_configuration.dtype == timestamps.dtype
     assert timestamps_dataset_configuration.chunk_shape == timestamps.shape
     assert timestamps_dataset_configuration.buffer_shape == timestamps.shape
     assert timestamps_dataset_configuration.compression_method == "gzip"
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations_appended_files.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations_appended_files.py
index b78448399..dca727f03 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations_appended_files.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations_appended_files.py
@@ -62,10 +62,10 @@ def test_unwrapped_time_series_hdf5(hdf5_nwbfile_path):
 
     dataset_configuration = dataset_configurations[0]
     assert isinstance(dataset_configuration, HDF5DatasetIOConfiguration)
-    assert dataset_configuration.dataset_info.object_id == new_time_series.object_id
-    assert dataset_configuration.dataset_info.location_in_file == "acquisition/NewTimeSeries/data"
-    assert dataset_configuration.dataset_info.full_shape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.object_id == new_time_series.object_id
+    assert dataset_configuration.location_in_file == "acquisition/NewTimeSeries/data"
+    assert dataset_configuration.full_shape == array.shape
+    assert dataset_configuration.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
     assert dataset_configuration.compression_method == "gzip"
@@ -85,10 +85,10 @@ def test_unwrapped_time_series_zarr(zarr_nwbfile_path):
 
     dataset_configuration = dataset_configurations[0]
     assert isinstance(dataset_configuration, ZarrDatasetIOConfiguration)
-    assert dataset_configuration.dataset_info.object_id == new_time_series.object_id
-    assert dataset_configuration.dataset_info.location_in_file == "acquisition/NewTimeSeries/data"
-    assert dataset_configuration.dataset_info.full_shape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.object_id == new_time_series.object_id
+    assert dataset_configuration.location_in_file == "acquisition/NewTimeSeries/data"
+    assert dataset_configuration.full_shape == array.shape
+    assert dataset_configuration.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
     assert dataset_configuration.compression_method == "gzip"
@@ -111,10 +111,10 @@ def test_unwrapped_dynamic_table_hdf5(hdf5_nwbfile_path):
 
     dataset_configuration = dataset_configurations[0]
     assert isinstance(dataset_configuration, HDF5DatasetIOConfiguration)
-    assert dataset_configuration.dataset_info.object_id == column.object_id
-    assert dataset_configuration.dataset_info.location_in_file == "acquisition/TestDynamicTable/TestColumn/data"
-    assert dataset_configuration.dataset_info.full_shape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.object_id == column.object_id
+    assert dataset_configuration.location_in_file == "acquisition/TestDynamicTable/TestColumn/data"
+    assert dataset_configuration.full_shape == array.shape
+    assert dataset_configuration.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
     assert dataset_configuration.compression_method == "gzip"
@@ -135,10 +135,10 @@ def test_unwrapped_dynamic_table_zarr(zarr_nwbfile_path):
 
     dataset_configuration = dataset_configurations[0]
     assert isinstance(dataset_configuration, ZarrDatasetIOConfiguration)
-    assert dataset_configuration.dataset_info.object_id == column.object_id
-    assert dataset_configuration.dataset_info.location_in_file == "acquisition/TestDynamicTable/TestColumn/data"
-    assert dataset_configuration.dataset_info.full_shape == array.shape
-    assert dataset_configuration.dataset_info.dtype == array.dtype
+    assert dataset_configuration.object_id == column.object_id
+    assert dataset_configuration.location_in_file == "acquisition/TestDynamicTable/TestColumn/data"
+    assert dataset_configuration.full_shape == array.shape
+    assert dataset_configuration.dtype == array.dtype
     assert dataset_configuration.chunk_shape == array.shape
     assert dataset_configuration.buffer_shape == array.shape
     assert dataset_configuration.compression_method == "gzip"
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_info_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_info_model.py
deleted file mode 100644
index b9469ef54..000000000
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_info_model.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""Unit tests for the DatasetInfo Pydantic model."""
-
-from io import StringIO
-from unittest.mock import patch
-
-from neuroconv.tools.testing import mock_DatasetInfo
-
-
-def test_dataset_info_print():
-    """Test the printout display of a Dataset model looks nice."""
-    dataset_info = mock_DatasetInfo()
-
-    with patch("sys.stdout", new=StringIO()) as out:
-        print(dataset_info)
-
-    expected_print = """
-acquisition/TestElectricalSeries/data
--------------------------------------
-  dtype : int16
-  full shape of source array : (1800000, 384)
-  full size of source array : 1.38 GB
-"""
-    assert out.getvalue() == expected_print
-
-
-def test_dataset_info_repr():
-    """Test the programmatic repr of a Dataset model is more dataclass-like."""
-    dataset_info = mock_DatasetInfo()
-
-    # Important to keep the `repr` unmodified for appearance inside iterables of DatasetInfo objects
-    expected_repr = (
-        "DatasetInfo(object_id='481a0860-3a0c-40ec-b931-df4a3e9b101f', "
-        "location_in_file='acquisition/TestElectricalSeries/data', dataset_name='data', dtype=dtype('int16'), "
-        "full_shape=(1800000, 384))"
-    )
-    assert repr(dataset_info) == expected_repr
-
-
-def test_dataset_info_hashability():
-    dataset_info = mock_DatasetInfo()
-
-    test_dict = {dataset_info: True}  # Technically this alone would raise an error if it didn't work...
-    assert test_dict[dataset_info] is True  # ... but asserting this for good measure.
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py
index b5fffa134..060811ebf 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py
@@ -1,14 +1,23 @@
 """Unit tests for the DatasetConfiguration Pydantic model."""
 
+import numpy as np
 import pytest
 
 from neuroconv.tools.nwb_helpers import DatasetIOConfiguration
-from neuroconv.tools.testing import mock_DatasetInfo
 
 
 def test_get_data_io_kwargs_abstract_error():
     with pytest.raises(TypeError) as error_info:
-        DatasetIOConfiguration(dataset_info=mock_DatasetInfo(), chunk_shape=(78_125, 64), buffer_shape=(1_250_000, 384))
+        DatasetIOConfiguration(
+            object_id="481a0860-3a0c-40ec-b931-df4a3e9b101f",
+            location_in_file="acquisition/TestElectricalSeries/data",
+            dataset_name="data",
+            full_shape=(60 * 30_000, 384),  # ~1 minute of v1 NeuroPixels probe
+            dtype=np.dtype("int16"),
+            chunk_shape=(78_125, 64),
+            buffer_shape=(1_250_000, 384),
+            compression_method="gzip",
+        )
     assert "Can't instantiate abstract class DatasetIOConfiguration" in str(error_info.value)
 
 
@@ -18,9 +27,14 @@ def get_data_io_kwargs(self):
             super().get_data_io_kwargs()
 
     dataset_io_configuration = TestDatasetIOConfiguration(
-        dataset_info=mock_DatasetInfo(),
+        object_id="481a0860-3a0c-40ec-b931-df4a3e9b101f",
+        location_in_file="acquisition/TestElectricalSeries/data",
+        dataset_name="data",
+        full_shape=(60 * 30_000, 384),  # ~1 minute of v1 NeuroPixels probe
+        dtype=np.dtype("int16"),
         chunk_shape=(78_125, 64),
         buffer_shape=(1_250_000, 384),
+        compression_method="gzip",
     )
 
     with pytest.raises(NotImplementedError):
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_io_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_io_configuration_model.py
index 63579a72b..fd3434493 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_io_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_io_configuration_model.py
@@ -90,9 +90,9 @@ def test_hdf5_dataset_configuration_repr():
 
     # Important to keep the `repr` unmodified for appearance inside iterables of DatasetInfo objects
     expected_repr = (
-        "HDF5DatasetIOConfiguration(dataset_info=DatasetInfo(object_id='481a0860-3a0c-40ec-b931-df4a3e9b101f', "
+        "HDF5DatasetIOConfiguration(object_id='481a0860-3a0c-40ec-b931-df4a3e9b101f', "
         "location_in_file='acquisition/TestElectricalSeries/data', dataset_name='data', dtype=dtype('int16'), "
-        "full_shape=(1800000, 384)), chunk_shape=(78125, 64), buffer_shape=(1250000, 384), compression_method='gzip', "
+        "full_shape=(1800000, 384), chunk_shape=(78125, 64), buffer_shape=(1250000, 384), compression_method='gzip', "
         "compression_options=None)"
     )
     assert repr(hdf5_dataset_configuration) == expected_repr
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_io_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_io_configuration_model.py
index ce7ddb0c7..2e5735c44 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_io_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_io_configuration_model.py
@@ -6,11 +6,8 @@
 import pytest
 from numcodecs import GZip
 
-from neuroconv.tools.nwb_helpers import (
-    AVAILABLE_ZARR_COMPRESSION_METHODS,
-    ZarrDatasetIOConfiguration,
-)
-from neuroconv.tools.testing import mock_DatasetInfo, mock_ZarrDatasetIOConfiguration
+from neuroconv.tools.nwb_helpers import AVAILABLE_ZARR_COMPRESSION_METHODS
+from neuroconv.tools.testing import mock_ZarrDatasetIOConfiguration
 
 
 def test_zarr_dataset_io_configuration_print():
@@ -155,9 +152,9 @@ def test_zarr_dataset_configuration_repr():
 
     # Important to keep the `repr` unmodified for appearance inside iterables of DatasetInfo objects
     expected_repr = (
-        "ZarrDatasetIOConfiguration(dataset_info=DatasetInfo(object_id='481a0860-3a0c-40ec-b931-df4a3e9b101f', "
+        "ZarrDatasetIOConfiguration(object_id='481a0860-3a0c-40ec-b931-df4a3e9b101f', "
         "location_in_file='acquisition/TestElectricalSeries/data', dataset_name='data', dtype=dtype('int16'), "
-        "full_shape=(1800000, 384)), chunk_shape=(78125, 64), buffer_shape=(1250000, 384), compression_method='gzip', "
+        "full_shape=(1800000, 384), chunk_shape=(78125, 64), buffer_shape=(1250000, 384), compression_method='gzip', "
         "compression_options=None, filter_methods=None, filter_options=None)"
     )
     assert repr(zarr_dataset_configuration) == expected_repr
@@ -165,8 +162,7 @@ def test_zarr_dataset_configuration_repr():
 
 def test_validator_filter_options_has_methods():
     with pytest.raises(ValueError) as error_info:
-        ZarrDatasetIOConfiguration(
-            dataset_info=mock_DatasetInfo(),
+        mock_ZarrDatasetIOConfiguration(
             chunk_shape=(78_125, 64),
             buffer_shape=(1_250_000, 384),
             filter_methods=None,
@@ -175,15 +171,14 @@ def test_validator_filter_options_has_methods():
 
     expected_error = (
         "`filter_methods` is `None` but `filter_options` is not `None` "
-        "(received `filter_options=[{'clevel': 5}]`)! (type=value_error)"
+        "(received `filter_options=[{'clevel': 5}]`)! [type=value_error, "
     )
     assert expected_error in str(error_info.value)
 
 
 def test_validator_filter_methods_length_match_options():
     with pytest.raises(ValueError) as error_info:
-        ZarrDatasetIOConfiguration(
-            dataset_info=mock_DatasetInfo(),
+        mock_ZarrDatasetIOConfiguration(
             chunk_shape=(78_125, 64),
             buffer_shape=(1_250_000, 384),
             filter_methods=["blosc", "delta"],
@@ -192,7 +187,7 @@ def test_validator_filter_methods_length_match_options():
 
     expected_error = (
         "Length mismatch between `filter_methods` (2 methods specified) and `filter_options` (1 options found)! "
-        "`filter_methods` and `filter_options` should be the same length. (type=value_error)"
+        "`filter_methods` and `filter_options` should be the same length. [type=value_error, "
     )
     assert expected_error in str(error_info.value)