Skip to content

Commit

Permalink
implemented builder-based compound dtype check
Browse files Browse the repository at this point in the history
  • Loading branch information
pauladkisson committed Dec 5, 2024
1 parent 5298401 commit 6e05f7d
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,10 @@
import h5py
import numcodecs
import numpy as np
import pynwb
import zarr
from hdmf import Container
from hdmf.build.builders import (
DatasetBuilder,
GroupBuilder,
BaseBuilder,
)
from hdmf.utils import get_data_shape
from pydantic import (
Expand Down Expand Up @@ -249,7 +247,9 @@ def model_json_schema(cls, **kwargs) -> dict[str, Any]:
return super().model_json_schema(mode="validation", schema_generator=PureJSONSchemaGenerator, **kwargs)

@classmethod
def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Literal["data", "timestamps"]) -> Self:
def from_neurodata_object(
cls, neurodata_object: Container, dataset_name: Literal["data", "timestamps"], builder: BaseBuilder
) -> Self:
"""
Construct an instance of a DatasetIOConfiguration for a dataset in a neurodata object in an NWBFile.
Expand All @@ -265,16 +265,7 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera
location_in_file = _find_location_in_memory_nwbfile(neurodata_object=neurodata_object, field_name=dataset_name)
candidate_dataset = getattr(neurodata_object, dataset_name)

manager = pynwb.get_manager()
builder = manager.build(neurodata_object)
if isinstance(builder, GroupBuilder):
dtype = builder.datasets[dataset_name].dtype
elif isinstance(builder, DatasetBuilder):
dtype = builder.dtype
else:
raise NotImplementedError(f"Builder Type {type(builder)} not supported!")

if isinstance(dtype, list): # compound dtype
if has_compound_dtype(builder, location_in_file):
full_shape = (len(candidate_dataset),)
else:
full_shape = get_data_shape(data=candidate_dataset)
Expand Down Expand Up @@ -330,3 +321,17 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera
buffer_shape=buffer_shape,
compression_method=compression_method,
)


def has_compound_dtype(builder, location_in_file):
split_location = iter(location_in_file.split("/"))
location = next(split_location)
while location in builder.groups:
builder = builder.groups[location]
location = next(split_location)

if location in builder.datasets:
builder = builder.datasets[location]
else:
raise ValueError(f"Could not find location '{location}' in builder.")
return isinstance(builder.dtype, list)
8 changes: 5 additions & 3 deletions src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from hdmf.data_utils import DataIO
from hdmf.utils import get_data_shape
from hdmf_zarr import NWBZarrIO
from pynwb import NWBHDF5IO, NWBFile
from pynwb import NWBHDF5IO, NWBFile, get_manager
from pynwb.base import DynamicTable, TimeSeriesReferenceVectorData
from pynwb.file import NWBContainer

Expand Down Expand Up @@ -102,6 +102,8 @@ def get_default_dataset_io_configurations(
)

known_dataset_fields = ("data", "timestamps")
manager = get_manager()
builder = manager.build(nwbfile)
for neurodata_object in nwbfile.objects.values():
if isinstance(neurodata_object, DynamicTable):
dynamic_table = neurodata_object # For readability
Expand Down Expand Up @@ -134,7 +136,7 @@ def get_default_dataset_io_configurations(
continue

dataset_io_configuration = DatasetIOConfigurationClass.from_neurodata_object(
neurodata_object=column, dataset_name=dataset_name
neurodata_object=column, dataset_name=dataset_name, builder=builder
)

yield dataset_io_configuration
Expand Down Expand Up @@ -168,7 +170,7 @@ def get_default_dataset_io_configurations(
continue

dataset_io_configuration = DatasetIOConfigurationClass.from_neurodata_object(
neurodata_object=neurodata_object, dataset_name=known_dataset_field
neurodata_object=neurodata_object, dataset_name=known_dataset_field, builder=builder
)

yield dataset_io_configuration

0 comments on commit 6e05f7d

Please sign in to comment.