Skip to content

Commit

Permalink
[Backend Configuration IVd] Integrate backend methods with NWBConvert…
Browse files Browse the repository at this point in the history
…er (#804)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: CodyCBakerPhD <[email protected]>
  • Loading branch information
3 people authored Apr 26, 2024
1 parent 7a18fc6 commit 62802c4
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 21 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
### Features
* Added `backend` control to the `make_or_load_nwbfile` helper method in `neuroconv.tools.nwb_helpers`. [PR #800](https://github.com/catalystneuro/neuroconv/pull/800)
* Added `get_default_backend_configuration` method to all `DataInterface` classes. Also added HDF5 `backend` control to all standalone `.run_conversion(...)` methods for those interfaces. [PR #801](https://github.com/catalystneuro/neuroconv/pull/801)
* Added `get_default_backend_configuration` method to all `NWBConverter` classes. Also added HDF5 `backend` control to `.run_conversion(...)`. [PR #804](https://github.com/catalystneuro/neuroconv/pull/804)
* Released the first official Docker images for the package on the GitHub Container Repository (GHCR). [PR #383](https://github.com/catalystneuro/neuroconv/pull/383)
* Added `ScanImageMultiFileImagingInterface` for multi-file (buffered) ScanImage format and changed `ScanImageImagingInterface` to be routing classes for single and multi-plane imaging. [PR #809](https://github.com/catalystneuro/neuroconv/pull/809)

Expand Down
57 changes: 52 additions & 5 deletions src/neuroconv/nwbconverter.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
import json
from collections import Counter
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, List, Literal, Optional, Tuple, Union

from jsonschema import validate
from pynwb import NWBFile

from .basedatainterface import BaseDataInterface
from .tools.nwb_helpers import get_default_nwbfile_metadata, make_or_load_nwbfile
from .tools.nwb_helpers import (
HDF5BackendConfiguration,
ZarrBackendConfiguration,
configure_backend,
get_default_backend_configuration,
get_default_nwbfile_metadata,
make_or_load_nwbfile,
)
from .utils import (
dict_deep_update,
fill_defaults,
Expand Down Expand Up @@ -126,6 +133,11 @@ def run_conversion(
nwbfile: Optional[NWBFile] = None,
metadata: Optional[dict] = None,
overwrite: bool = False,
# TODO: when all H5DataIO prewraps are gone, introduce Zarr safely
# backend: Union[Literal["hdf5", "zarr"]],
# backend_configuration: Optional[Union[HDF5BackendConfiguration, ZarrBackendConfiguration]] = None,
backend: Literal["hdf5"] = "hdf5",
backend_configuration: Optional[HDF5BackendConfiguration] = None,
conversion_options: Optional[dict] = None,
) -> None:
"""
Expand All @@ -142,6 +154,13 @@ def run_conversion(
overwrite : bool, default: False
Whether to overwrite the NWBFile if one exists at the nwbfile_path.
The default is False (append mode).
backend : "hdf5" or a HDF5BackendConfiguration, default: "hdf5"
The type of backend to use when writing the file.
backend_configuration : HDF5BackendConfiguration, optional
The configuration model to use when configuring the datasets for this backend.
To customize, call the `.get_default_backend_configuration(...)` method, modify the returned
BackendConfiguration object, and pass that instead.
Otherwise, all datasets will use default configuration settings.
conversion_options : dict, optional
Similar to source_data, a dictionary containing keywords for each interface for which non-default
conversion specification is requested.
Expand All @@ -150,7 +169,6 @@ def run_conversion(
metadata = self.get_metadata()

self.validate_metadata(metadata=metadata)

self.validate_conversion_options(conversion_options=conversion_options)

self.temporally_align_data_interfaces()
Expand All @@ -160,14 +178,43 @@ def run_conversion(
nwbfile=nwbfile,
metadata=metadata,
overwrite=overwrite,
verbose=self.verbose,
backend=backend,
verbose=getattr(self, "verbose", False),
) as nwbfile_out:
self.add_to_nwbfile(nwbfile_out, metadata, conversion_options)
if backend_configuration is None:
# Otherwise assume the data has already been added to the NWBFile
self.add_to_nwbfile(nwbfile_out, metadata=metadata, conversion_options=conversion_options)

backend_configuration = self.get_default_backend_configuration(nwbfile=nwbfile_out, backend=backend)

configure_backend(nwbfile=nwbfile_out, backend_configuration=backend_configuration)

def temporally_align_data_interfaces(self):
"""Override this method to implement custom alignment"""
pass

@staticmethod
def get_default_backend_configuration(
nwbfile: NWBFile,
backend: Literal["hdf5", "zarr"] = "hdf5",
) -> Union[HDF5BackendConfiguration, ZarrBackendConfiguration]:
"""
Fill and return a default backend configuration to serve as a starting point for further customization.
Parameters
----------
nwbfile : pynwb.NWBFile
The in-memory object with this interface's data already added to it.
backend : "hdf5" or "zarr", default: "hdf5"
The type of backend to use when creating the file.
Returns
-------
backend_configuration : HDF5BackendConfiguration or ZarrBackendConfiguration
The default configuration for the specified backend type.
"""
return get_default_backend_configuration(nwbfile=nwbfile, backend=backend)


class ConverterPipe(NWBConverter):
"""Takes a list or dict of pre-initialized interfaces as arguments to build an NWBConverter class"""
Expand Down
15 changes: 14 additions & 1 deletion src/neuroconv/tools/nwb_helpers/_configure_backend.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""Collection of helper functions related to configuration of datasets dependent on backend."""

import sys
from typing import Union

from hdmf.common import Data
from pynwb import NWBFile, TimeSeries

from ._configuration_models._hdf5_backend import HDF5BackendConfiguration
from ._configuration_models._zarr_backend import ZarrBackendConfiguration
from ..importing import is_package_installed


def configure_backend(
Expand All @@ -22,6 +24,9 @@ def configure_backend(
backend_configuration : HDF5BackendConfiguration, optional
The configuration model to use when configuring the datasets for this backend.
"""
is_ndx_events_installed = is_package_installed(package_name="ndx_events")
ndx_events = sys.modules.get("ndx_events", None)

nwbfile_objects = nwbfile.objects

# Set all DataIO based on the configuration
Expand All @@ -44,10 +49,18 @@ def configure_backend(
neurodata_object.set_data_io(
dataset_name=dataset_name, data_io_class=data_io_class, data_io_kwargs=data_io_kwargs
)
# Special ndx-events v0.2.0 types
elif is_ndx_events_installed and isinstance(neurodata_object, ndx_events.Events):
neurodata_object.set_data_io(
dataset_name=dataset_name, data_io_class=data_io_class, data_io_kwargs=data_io_kwargs
)
# But temporarily skipping LabeledEvents
elif is_ndx_events_installed and isinstance(neurodata_object, ndx_events.LabeledEvents):
continue
# Skip the setting of a DataIO when target dataset is a link (assume it will be found in parent)
elif isinstance(neurodata_object, TimeSeries) and is_dataset_linked:
continue
# Strictly speaking, it would be odd if a backend_configuration led to this, but might as well be safe
# Strictly speaking, it would be odd if a `backend_configuration` got to this line, but might as well be safe
else:
raise NotImplementedError(
f"Unsupported object type {type(neurodata_object)} for backend configuration "
Expand Down
17 changes: 8 additions & 9 deletions src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from hdmf_zarr import NWBZarrIO
from pynwb import NWBHDF5IO, NWBFile
from pynwb.base import DynamicTable, TimeSeriesReferenceVectorData
from pynwb.file import NWBContainer

from ._configuration_models._base_dataset_io import DatasetIOConfiguration

Expand Down Expand Up @@ -101,6 +102,7 @@ def get_default_dataset_io_configurations(
f"({backend}) does not match! Set `backend=None` or remove the keyword argument to allow it to auto-detect."
)

known_dataset_fields = ("data", "timestamps")
for neurodata_object in nwbfile.objects.values():
if isinstance(neurodata_object, DynamicTable):
dynamic_table = neurodata_object # For readability
Expand Down Expand Up @@ -129,16 +131,13 @@ def get_default_dataset_io_configurations(
)

yield dataset_io_configuration
else:
# Primarily for TimeSeries, but also any extended class that has 'data' or 'timestamps'
# The most common example of this is ndx-events Events/LabeledEvents types
time_series = neurodata_object # For readability

for dataset_name in ("data", "timestamps"):
if dataset_name not in time_series.fields: # The 'timestamps' field is optional
elif isinstance(neurodata_object, NWBContainer):
for known_dataset_field in known_dataset_fields:
# Skip optional fields that aren't present
if known_dataset_field not in neurodata_object.fields:
continue

candidate_dataset = getattr(time_series, dataset_name)
candidate_dataset = getattr(neurodata_object, known_dataset_field)

# Skip if already written to file
if _is_dataset_written_to_file(
Expand All @@ -155,7 +154,7 @@ def get_default_dataset_io_configurations(
continue

dataset_io_configuration = DatasetIOConfigurationClass.from_neurodata_object(
neurodata_object=time_series, dataset_name=dataset_name
neurodata_object=neurodata_object, dataset_name=known_dataset_field
)

yield dataset_io_configuration
43 changes: 38 additions & 5 deletions src/neuroconv/tools/testing/data_interface_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pynwb.testing.mock.file import mock_NWBFile
from spikeinterface.core.testing import check_recordings_equal, check_sortings_equal

from neuroconv.basedatainterface import BaseDataInterface
from neuroconv import BaseDataInterface, NWBConverter
from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import (
BaseRecordingExtractorInterface,
)
Expand Down Expand Up @@ -124,6 +124,29 @@ def check_run_conversion_custom_backend(self, nwbfile_path: str, backend: Litera
**self.conversion_options,
)

def check_run_conversion_default_backend_in_nwbconverter(
self, nwbfile_path: str, backend: Literal["hdf5", "zarr"] = "hdf5"
):
class TestNWBConverter(NWBConverter):
data_interface_classes = dict(Test=type(self.interface))

test_kwargs = self.test_kwargs[0] if isinstance(self.test_kwargs, list) else self.test_kwargs
source_data = dict(Test=test_kwargs)
converter = TestNWBConverter(source_data=source_data)

metadata = converter.get_metadata()
if "session_start_time" not in metadata["NWBFile"]:
metadata["NWBFile"].update(session_start_time=datetime.now().astimezone())

conversion_options = dict(Test=self.conversion_options)
converter.run_conversion(
nwbfile_path=nwbfile_path,
overwrite=True,
metadata=metadata,
backend=backend,
conversion_options=conversion_options,
)

@abstractmethod
def check_read_nwb(self, nwbfile_path: str):
"""Read the produced NWB file and compare it to the interface."""
Expand All @@ -142,7 +165,7 @@ def run_custom_checks(self):
"""Override this in child classes to inject additional custom checks."""
pass

def test_conversion_as_lone_interface(self):
def test_all_conversion_checks(self):
interface_kwargs = self.interface_kwargs
if isinstance(interface_kwargs, dict):
interface_kwargs = [interface_kwargs]
Expand All @@ -159,6 +182,10 @@ def test_conversion_as_lone_interface(self):

self.check_no_metadata_mutation()

self.check_run_conversion_default_backend_in_nwbconverter(
nwbfile_path=self.nwbfile_path, backend="hdf5"
)

self.check_run_conversion_default_backend(nwbfile_path=self.nwbfile_path, backend="hdf5")
self.check_run_conversion_custom_backend(nwbfile_path=self.nwbfile_path, backend="hdf5")

Expand Down Expand Up @@ -545,7 +572,7 @@ def test_interface_alignment(self):

self.check_nwbfile_temporal_alignment()

def test_conversion_as_lone_interface(self):
def test_all_conversion_checks(self):
interface_kwargs = self.interface_kwargs
if isinstance(interface_kwargs, dict):
interface_kwargs = [interface_kwargs]
Expand All @@ -568,6 +595,10 @@ def test_conversion_as_lone_interface(self):

self.check_no_metadata_mutation()

self.check_run_conversion_default_backend_in_nwbconverter(
nwbfile_path=self.nwbfile_path, backend="hdf5"
)

self.check_run_conversion_default_backend(nwbfile_path=self.nwbfile_path, backend="hdf5")
self.check_run_conversion_custom_backend(nwbfile_path=self.nwbfile_path, backend="hdf5")

Expand Down Expand Up @@ -708,11 +739,13 @@ def test_interface_alignment(self):


class AudioInterfaceTestMixin(DataInterfaceTestMixin, TemporalAlignmentMixin):
# Currently asserted in the downstream testing suite; could be refactored in future PR
def check_read_nwb(self, nwbfile_path: str):
pass # asserted in the testing suite; could be refactored in future PR
pass

# Currently asserted in the downstream testing suite
def test_interface_alignment(self):
pass # Currently asserted in the testing suite
pass


class DeepLabCutInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin):
Expand Down
3 changes: 2 additions & 1 deletion src/neuroconv/tools/testing/mock_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ def __init__(
self,
num_channels: int = 4,
sampling_frequency: float = 30_000.0,
durations: List[float] = [1.0],
# durations: Tuple[float] = (1.0,), # Uncomment when pydantic is integrated for schema validation
durations: tuple = (1.0,),
seed: int = 0,
verbose: bool = True,
es_key: str = "ElectricalSeries",
Expand Down
7 changes: 7 additions & 0 deletions tests/test_on_data/test_behavior_interfaces.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest
from datetime import datetime, timezone
from pathlib import Path
from typing import Literal

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -225,6 +226,12 @@ def check_read_nwb(self, nwbfile_path: str): # This is currently structured to

assert spatial_series.timestamps[0] == 0.0

# TODO: undo this skip in future PR
def check_run_conversion_default_backend_in_nwbconverter(
self, nwbfile_path: str, backend: Literal["hdf5", "zarr"] = "hdf5"
):
pass


class TestFicTracDataInterfaceWithRadius(DataInterfaceTestMixin, unittest.TestCase):
data_interface_cls = FicTracDataInterface
Expand Down
7 changes: 7 additions & 0 deletions tests/test_on_data/test_recording_interfaces.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import datetime
from platform import python_version
from sys import platform
from typing import Literal
from unittest import skip, skipIf

import numpy as np
Expand Down Expand Up @@ -174,6 +175,12 @@ def test_interface_alignment(self):

self.check_nwbfile_temporal_alignment()

# EDF has simultaneous access issues; can't have multiple interfaces open on the same file at once...
def check_run_conversion_default_backend_in_nwbconverter(
self, nwbfile_path: str, backend: Literal["hdf5", "zarr"] = "hdf5"
):
pass


class TestIntanRecordingInterface(RecordingExtractorInterfaceTestMixin, TestCase):
data_interface_cls = IntanRecordingInterface
Expand Down

0 comments on commit 62802c4

Please sign in to comment.