diff --git a/CHANGELOG.md b/CHANGELOG.md index ffd672e43..49dd65081 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ ### Features * Added `backend` control to the `make_or_load_nwbfile` helper method in `neuroconv.tools.nwb_helpers`. [PR #800](https://github.com/catalystneuro/neuroconv/pull/800) * Added `get_default_backend_configuration` method to all `DataInterface` classes. Also added HDF5 `backend` control to all standalone `.run_conversion(...)` methods for those interfaces. [PR #801](https://github.com/catalystneuro/neuroconv/pull/801) +* Added `get_default_backend_configuration` method to all `NWBConverter` classes. Also added HDF5 `backend` control to `.run_conversion(...)`. [PR #804](https://github.com/catalystneuro/neuroconv/pull/804) * Released the first official Docker images for the package on the GitHub Container Repository (GHCR). [PR #383](https://github.com/catalystneuro/neuroconv/pull/383) * Added `ScanImageMultiFileImagingInterface` for multi-file (buffered) ScanImage format and changed `ScanImageImagingInterface` to be routing classes for single and multi-plane imaging. [PR #809](https://github.com/catalystneuro/neuroconv/pull/809) diff --git a/src/neuroconv/nwbconverter.py b/src/neuroconv/nwbconverter.py index 8817c5c42..3faa5385d 100644 --- a/src/neuroconv/nwbconverter.py +++ b/src/neuroconv/nwbconverter.py @@ -1,13 +1,20 @@ import json from collections import Counter from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Literal, Optional, Tuple, Union from jsonschema import validate from pynwb import NWBFile from .basedatainterface import BaseDataInterface -from .tools.nwb_helpers import get_default_nwbfile_metadata, make_or_load_nwbfile +from .tools.nwb_helpers import ( + HDF5BackendConfiguration, + ZarrBackendConfiguration, + configure_backend, + get_default_backend_configuration, + get_default_nwbfile_metadata, + make_or_load_nwbfile, +) from .utils import ( dict_deep_update, fill_defaults, @@ -126,6 +133,11 @@ def run_conversion( nwbfile: Optional[NWBFile] = None, metadata: Optional[dict] = None, overwrite: bool = False, + # TODO: when all H5DataIO prewraps are gone, introduce Zarr safely + # backend: Union[Literal["hdf5", "zarr"]], + # backend_configuration: Optional[Union[HDF5BackendConfiguration, ZarrBackendConfiguration]] = None, + backend: Literal["hdf5"] = "hdf5", + backend_configuration: Optional[HDF5BackendConfiguration] = None, conversion_options: Optional[dict] = None, ) -> None: """ @@ -142,6 +154,13 @@ def run_conversion( overwrite : bool, default: False Whether to overwrite the NWBFile if one exists at the nwbfile_path. The default is False (append mode). + backend : "hdf5" or a HDF5BackendConfiguration, default: "hdf5" + The type of backend to use when writing the file. + backend_configuration : HDF5BackendConfiguration, optional + The configuration model to use when configuring the datasets for this backend. + To customize, call the `.get_default_backend_configuration(...)` method, modify the returned + BackendConfiguration object, and pass that instead. + Otherwise, all datasets will use default configuration settings. conversion_options : dict, optional Similar to source_data, a dictionary containing keywords for each interface for which non-default conversion specification is requested. @@ -150,7 +169,6 @@ def run_conversion( metadata = self.get_metadata() self.validate_metadata(metadata=metadata) - self.validate_conversion_options(conversion_options=conversion_options) self.temporally_align_data_interfaces() @@ -160,14 +178,43 @@ def run_conversion( nwbfile=nwbfile, metadata=metadata, overwrite=overwrite, - verbose=self.verbose, + backend=backend, + verbose=getattr(self, "verbose", False), ) as nwbfile_out: - self.add_to_nwbfile(nwbfile_out, metadata, conversion_options) + if backend_configuration is None: + # Otherwise assume the data has already been added to the NWBFile + self.add_to_nwbfile(nwbfile_out, metadata=metadata, conversion_options=conversion_options) + + backend_configuration = self.get_default_backend_configuration(nwbfile=nwbfile_out, backend=backend) + + configure_backend(nwbfile=nwbfile_out, backend_configuration=backend_configuration) def temporally_align_data_interfaces(self): """Override this method to implement custom alignment""" pass + @staticmethod + def get_default_backend_configuration( + nwbfile: NWBFile, + backend: Literal["hdf5", "zarr"] = "hdf5", + ) -> Union[HDF5BackendConfiguration, ZarrBackendConfiguration]: + """ + Fill and return a default backend configuration to serve as a starting point for further customization. + + Parameters + ---------- + nwbfile : pynwb.NWBFile + The in-memory object with this interface's data already added to it. + backend : "hdf5" or "zarr", default: "hdf5" + The type of backend to use when creating the file. + + Returns + ------- + backend_configuration : HDF5BackendConfiguration or ZarrBackendConfiguration + The default configuration for the specified backend type. + """ + return get_default_backend_configuration(nwbfile=nwbfile, backend=backend) + class ConverterPipe(NWBConverter): """Takes a list or dict of pre-initialized interfaces as arguments to build an NWBConverter class""" diff --git a/src/neuroconv/tools/nwb_helpers/_configure_backend.py b/src/neuroconv/tools/nwb_helpers/_configure_backend.py index d4fe1d2c8..ab3326614 100644 --- a/src/neuroconv/tools/nwb_helpers/_configure_backend.py +++ b/src/neuroconv/tools/nwb_helpers/_configure_backend.py @@ -1,5 +1,6 @@ """Collection of helper functions related to configuration of datasets dependent on backend.""" +import sys from typing import Union from hdmf.common import Data @@ -7,6 +8,7 @@ from ._configuration_models._hdf5_backend import HDF5BackendConfiguration from ._configuration_models._zarr_backend import ZarrBackendConfiguration +from ..importing import is_package_installed def configure_backend( @@ -22,6 +24,9 @@ def configure_backend( backend_configuration : HDF5BackendConfiguration, optional The configuration model to use when configuring the datasets for this backend. """ + is_ndx_events_installed = is_package_installed(package_name="ndx_events") + ndx_events = sys.modules.get("ndx_events", None) + nwbfile_objects = nwbfile.objects # Set all DataIO based on the configuration @@ -44,10 +49,18 @@ def configure_backend( neurodata_object.set_data_io( dataset_name=dataset_name, data_io_class=data_io_class, data_io_kwargs=data_io_kwargs ) + # Special ndx-events v0.2.0 types + elif is_ndx_events_installed and isinstance(neurodata_object, ndx_events.Events): + neurodata_object.set_data_io( + dataset_name=dataset_name, data_io_class=data_io_class, data_io_kwargs=data_io_kwargs + ) + # But temporarily skipping LabeledEvents + elif is_ndx_events_installed and isinstance(neurodata_object, ndx_events.LabeledEvents): + continue # Skip the setting of a DataIO when target dataset is a link (assume it will be found in parent) elif isinstance(neurodata_object, TimeSeries) and is_dataset_linked: continue - # Strictly speaking, it would be odd if a backend_configuration led to this, but might as well be safe + # Strictly speaking, it would be odd if a `backend_configuration` got to this line, but might as well be safe else: raise NotImplementedError( f"Unsupported object type {type(neurodata_object)} for backend configuration " diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py index 35eb44f23..b8a233519 100644 --- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py +++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py @@ -10,6 +10,7 @@ from hdmf_zarr import NWBZarrIO from pynwb import NWBHDF5IO, NWBFile from pynwb.base import DynamicTable, TimeSeriesReferenceVectorData +from pynwb.file import NWBContainer from ._configuration_models._base_dataset_io import DatasetIOConfiguration @@ -101,6 +102,7 @@ def get_default_dataset_io_configurations( f"({backend}) does not match! Set `backend=None` or remove the keyword argument to allow it to auto-detect." ) + known_dataset_fields = ("data", "timestamps") for neurodata_object in nwbfile.objects.values(): if isinstance(neurodata_object, DynamicTable): dynamic_table = neurodata_object # For readability @@ -129,16 +131,13 @@ def get_default_dataset_io_configurations( ) yield dataset_io_configuration - else: - # Primarily for TimeSeries, but also any extended class that has 'data' or 'timestamps' - # The most common example of this is ndx-events Events/LabeledEvents types - time_series = neurodata_object # For readability - - for dataset_name in ("data", "timestamps"): - if dataset_name not in time_series.fields: # The 'timestamps' field is optional + elif isinstance(neurodata_object, NWBContainer): + for known_dataset_field in known_dataset_fields: + # Skip optional fields that aren't present + if known_dataset_field not in neurodata_object.fields: continue - candidate_dataset = getattr(time_series, dataset_name) + candidate_dataset = getattr(neurodata_object, known_dataset_field) # Skip if already written to file if _is_dataset_written_to_file( @@ -155,7 +154,7 @@ def get_default_dataset_io_configurations( continue dataset_io_configuration = DatasetIOConfigurationClass.from_neurodata_object( - neurodata_object=time_series, dataset_name=dataset_name + neurodata_object=neurodata_object, dataset_name=known_dataset_field ) yield dataset_io_configuration diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py index 4e36bb6a4..e635ab995 100644 --- a/src/neuroconv/tools/testing/data_interface_mixins.py +++ b/src/neuroconv/tools/testing/data_interface_mixins.py @@ -16,7 +16,7 @@ from pynwb.testing.mock.file import mock_NWBFile from spikeinterface.core.testing import check_recordings_equal, check_sortings_equal -from neuroconv.basedatainterface import BaseDataInterface +from neuroconv import BaseDataInterface, NWBConverter from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import ( BaseRecordingExtractorInterface, ) @@ -124,6 +124,29 @@ def check_run_conversion_custom_backend(self, nwbfile_path: str, backend: Litera **self.conversion_options, ) + def check_run_conversion_default_backend_in_nwbconverter( + self, nwbfile_path: str, backend: Literal["hdf5", "zarr"] = "hdf5" + ): + class TestNWBConverter(NWBConverter): + data_interface_classes = dict(Test=type(self.interface)) + + test_kwargs = self.test_kwargs[0] if isinstance(self.test_kwargs, list) else self.test_kwargs + source_data = dict(Test=test_kwargs) + converter = TestNWBConverter(source_data=source_data) + + metadata = converter.get_metadata() + if "session_start_time" not in metadata["NWBFile"]: + metadata["NWBFile"].update(session_start_time=datetime.now().astimezone()) + + conversion_options = dict(Test=self.conversion_options) + converter.run_conversion( + nwbfile_path=nwbfile_path, + overwrite=True, + metadata=metadata, + backend=backend, + conversion_options=conversion_options, + ) + @abstractmethod def check_read_nwb(self, nwbfile_path: str): """Read the produced NWB file and compare it to the interface.""" @@ -142,7 +165,7 @@ def run_custom_checks(self): """Override this in child classes to inject additional custom checks.""" pass - def test_conversion_as_lone_interface(self): + def test_all_conversion_checks(self): interface_kwargs = self.interface_kwargs if isinstance(interface_kwargs, dict): interface_kwargs = [interface_kwargs] @@ -159,6 +182,10 @@ def test_conversion_as_lone_interface(self): self.check_no_metadata_mutation() + self.check_run_conversion_default_backend_in_nwbconverter( + nwbfile_path=self.nwbfile_path, backend="hdf5" + ) + self.check_run_conversion_default_backend(nwbfile_path=self.nwbfile_path, backend="hdf5") self.check_run_conversion_custom_backend(nwbfile_path=self.nwbfile_path, backend="hdf5") @@ -545,7 +572,7 @@ def test_interface_alignment(self): self.check_nwbfile_temporal_alignment() - def test_conversion_as_lone_interface(self): + def test_all_conversion_checks(self): interface_kwargs = self.interface_kwargs if isinstance(interface_kwargs, dict): interface_kwargs = [interface_kwargs] @@ -568,6 +595,10 @@ def test_conversion_as_lone_interface(self): self.check_no_metadata_mutation() + self.check_run_conversion_default_backend_in_nwbconverter( + nwbfile_path=self.nwbfile_path, backend="hdf5" + ) + self.check_run_conversion_default_backend(nwbfile_path=self.nwbfile_path, backend="hdf5") self.check_run_conversion_custom_backend(nwbfile_path=self.nwbfile_path, backend="hdf5") @@ -708,11 +739,13 @@ def test_interface_alignment(self): class AudioInterfaceTestMixin(DataInterfaceTestMixin, TemporalAlignmentMixin): + # Currently asserted in the downstream testing suite; could be refactored in future PR def check_read_nwb(self, nwbfile_path: str): - pass # asserted in the testing suite; could be refactored in future PR + pass + # Currently asserted in the downstream testing suite def test_interface_alignment(self): - pass # Currently asserted in the testing suite + pass class DeepLabCutInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin): diff --git a/src/neuroconv/tools/testing/mock_interfaces.py b/src/neuroconv/tools/testing/mock_interfaces.py index b0e2b880b..19514f94c 100644 --- a/src/neuroconv/tools/testing/mock_interfaces.py +++ b/src/neuroconv/tools/testing/mock_interfaces.py @@ -128,7 +128,8 @@ def __init__( self, num_channels: int = 4, sampling_frequency: float = 30_000.0, - durations: List[float] = [1.0], + # durations: Tuple[float] = (1.0,), # Uncomment when pydantic is integrated for schema validation + durations: tuple = (1.0,), seed: int = 0, verbose: bool = True, es_key: str = "ElectricalSeries", diff --git a/tests/test_on_data/test_behavior_interfaces.py b/tests/test_on_data/test_behavior_interfaces.py index 3986233db..082faf709 100644 --- a/tests/test_on_data/test_behavior_interfaces.py +++ b/tests/test_on_data/test_behavior_interfaces.py @@ -1,6 +1,7 @@ import unittest from datetime import datetime, timezone from pathlib import Path +from typing import Literal import numpy as np import pandas as pd @@ -225,6 +226,12 @@ def check_read_nwb(self, nwbfile_path: str): # This is currently structured to assert spatial_series.timestamps[0] == 0.0 + # TODO: undo this skip in future PR + def check_run_conversion_default_backend_in_nwbconverter( + self, nwbfile_path: str, backend: Literal["hdf5", "zarr"] = "hdf5" + ): + pass + class TestFicTracDataInterfaceWithRadius(DataInterfaceTestMixin, unittest.TestCase): data_interface_cls = FicTracDataInterface diff --git a/tests/test_on_data/test_recording_interfaces.py b/tests/test_on_data/test_recording_interfaces.py index 294eb03da..3b3d952f4 100644 --- a/tests/test_on_data/test_recording_interfaces.py +++ b/tests/test_on_data/test_recording_interfaces.py @@ -1,6 +1,7 @@ from datetime import datetime from platform import python_version from sys import platform +from typing import Literal from unittest import skip, skipIf import numpy as np @@ -174,6 +175,12 @@ def test_interface_alignment(self): self.check_nwbfile_temporal_alignment() + # EDF has simultaneous access issues; can't have multiple interfaces open on the same file at once... + def check_run_conversion_default_backend_in_nwbconverter( + self, nwbfile_path: str, backend: Literal["hdf5", "zarr"] = "hdf5" + ): + pass + class TestIntanRecordingInterface(RecordingExtractorInterfaceTestMixin, TestCase): data_interface_cls = IntanRecordingInterface