Skip to content

Commit

Permalink
Merge pull request #315 from NeurodataWithoutBorders/fix_data_shape
Browse files Browse the repository at this point in the history
fix get_data_shape for unbounded dataset
  • Loading branch information
CodyCBakerPhD authored Nov 30, 2022
2 parents d324736 + 3c81420 commit 733e711
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Fixes
* Fix `check_subject_proper_age_range` to parse years. [PR #314](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/314)
* Write a custom `get_data_shape` method that does not return `maxshape`, which fixes errors in parsing shape. [PR #315](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/315)


# v0.4.20
Expand Down
2 changes: 1 addition & 1 deletion src/nwbinspector/checks/ecephys.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pynwb.misc import Units
from pynwb.ecephys import ElectricalSeries

from hdmf.utils import get_data_shape
from ..utils import get_data_shape

from ..register_checks import register_check, Importance, InspectorMessage

Expand Down
2 changes: 1 addition & 1 deletion src/nwbinspector/checks/ophys.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
ImagingPlane,
)

from hdmf.utils import get_data_shape
from ..utils import get_data_shape

from ..register_checks import register_check, Importance, InspectorMessage

Expand Down
2 changes: 1 addition & 1 deletion src/nwbinspector/checks/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import numpy as np
from hdmf.common import DynamicTable, DynamicTableRegion, VectorIndex
from hdmf.utils import get_data_shape
from pynwb.file import TimeIntervals, Units

from ..register_checks import register_check, InspectorMessage, Importance
Expand All @@ -14,6 +13,7 @@
is_ascending_series,
is_dict_in_string,
is_string_json_loadable,
get_data_shape,
)


Expand Down
3 changes: 1 addition & 2 deletions src/nwbinspector/checks/time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
import numpy as np

from pynwb import TimeSeries
from hdmf.utils import get_data_shape

from ..register_checks import register_check, Importance, Severity, InspectorMessage
from ..utils import is_regular_series, is_ascending_series
from ..utils import is_regular_series, is_ascending_series, get_data_shape


@register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=TimeSeries)
Expand Down
39 changes: 39 additions & 0 deletions src/nwbinspector/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,42 @@ def calculate_number_of_cpu(requested_cpu: int = 1) -> int:
return requested_cpu
else:
return total_cpu + requested_cpu


def get_data_shape(data, strict_no_data_load=False):
"""
modified from hdmf.utils.get_data_shape to return shape instead of maxshape
Helper function used to determine the shape of the given array.
In order to determine the shape of nested tuples, lists, and sets, this function
recursively inspects elements along the dimensions, assuming that the data has a regular,
rectangular shape. In the case of out-of-core iterators, this means that the first item
along each dimension would potentially be loaded into memory. Set strict_no_data_load=True
to enforce that this does not happen, at the cost that we may not be able to determine
the shape of the array.
:param data: Array for which we should determine the shape.
:type data: List, numpy.ndarray, DataChunkIterator, any object that support __len__ or .shape.
:param strict_no_data_load: If True and data is an out-of-core iterator, None may be returned. If False (default),
the first element of data may be loaded into memory.
:return: Tuple of ints indicating the size of known dimensions. Dimensions for which the size is unknown
will be set to None.
"""

def __get_shape_helper(local_data):
shape = list()
if hasattr(local_data, "__len__"):
shape.append(len(local_data))
if len(local_data):
el = next(iter(local_data))
if not isinstance(el, (str, bytes)):
shape.extend(__get_shape_helper(el))
return tuple(shape)

if hasattr(data, "shape") and data.shape is not None:
return data.shape
if isinstance(data, dict):
return
if hasattr(data, "__len__") and not isinstance(data, (str, bytes)):
if not strict_no_data_load or isinstance(data, (list, tuple, set)):
return __get_shape_helper(data)
21 changes: 21 additions & 0 deletions tests/unit_tests/test_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import pytest
from packaging import version

import h5py

from nwbinspector import (
InspectorMessage,
Importance,
Expand Down Expand Up @@ -77,6 +79,25 @@ def test_check_data_orientation():
)


def test_check_data_orientation_unbounded_maxshape(tmp_path):
filepath = tmp_path / "test.nwb"
with h5py.File(filepath, "w") as file:
data = file.create_dataset(
"data",
data=np.ones((10, 3)),
maxshape=(None, 3),
)

time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=data,
rate=1.0,
)

assert check_data_orientation(time_series) is None


def test_check_timestamps():
assert check_timestamps_match_first_dimension(
time_series=pynwb.TimeSeries(
Expand Down

0 comments on commit 733e711

Please sign in to comment.