Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
CodyCBakerPhD committed Aug 12, 2024
1 parent cc62ab4 commit 2ae24e7
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 8 deletions.
10 changes: 5 additions & 5 deletions src/nwbinspector/checks/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from .._registration import register_check, InspectorMessage, Importance
from nwbinspector.utils._utils import (
_cache_data_selection,
cache_data_selection,
format_byte_size,
is_ascending_series,
is_dict_in_string,
Expand Down Expand Up @@ -87,8 +87,8 @@ def check_time_intervals_stop_after_start(time_intervals: TimeIntervals, nelems:
load the entire arrays.
"""
if np.any(
np.asarray(_cache_data_selection(data=time_intervals["stop_time"].data, selection=slice(nelems)))
- np.asarray(_cache_data_selection(data=time_intervals["start_time"].data, selection=slice(nelems)))
np.asarray(cache_data_selection(data=time_intervals["stop_time"].data, selection=slice(nelems)))
- np.asarray(cache_data_selection(data=time_intervals["start_time"].data, selection=slice(nelems)))
< 0
):
return InspectorMessage(
Expand Down Expand Up @@ -120,7 +120,7 @@ def check_column_binary_capability(table: DynamicTable, nelems: Optional[int] =
if np.asarray(column.data[0]).itemsize == 1:
continue # already boolean, int8, or uint8
try:
unique_values = np.unique(_cache_data_selection(data=column.data, selection=slice(nelems)))
unique_values = np.unique(cache_data_selection(data=column.data, selection=slice(nelems)))
except TypeError: # some contained objects are unhashable or have no comparison defined
continue
if unique_values.size != 2:
Expand Down Expand Up @@ -188,7 +188,7 @@ def check_table_values_for_dict(table: DynamicTable, nelems: Optional[int] = NEL
for column in table.columns:
if not hasattr(column, "data") or isinstance(column, VectorIndex) or not isinstance(column.data[0], str):
continue
for string in _cache_data_selection(data=column.data, selection=slice(nelems)):
for string in cache_data_selection(data=column.data, selection=slice(nelems)):
if is_dict_in_string(string=string):
message = (
f"The column '{column.name}' contains a string value that contains a dictionary! Please "
Expand Down
13 changes: 12 additions & 1 deletion src/nwbinspector/testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,15 @@
TESTING_CONFIG_FILE_PATH,
)

__all__ = []
__all__ = [
"check_streaming_tests_enabled",
"check_streaming_enabled",
"check_hdf5_io_open",
"check_zarr_io_open",
"load_testing_config",
"update_testing_config",
"generate_testing_files",
"generate_image_series_testing_files",
"make_minimal_nwbfilem",
"TESTING_CONFIG_FILE_PATH",
]
31 changes: 31 additions & 0 deletions src/nwbinspector/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from ._utils import (
get_data_shape,
strtobool,
format_byte_size,
cache_data_selection,
is_regular_series,
is_ascending_series,
is_dict_in_string,
is_string_json_loadable,
is_module_installed,
get_package_version,
robust_s3_read,
calculate_number_of_cpu,
get_data_shape,
)

__all__ = [
"get_data_shape",
"strtobool",
"format_byte_size",
"cache_data_selection",
"is_regular_series",
"is_ascending_series",
"is_dict_in_string",
"is_string_json_loadable",
"is_module_installed",
"get_package_version",
"robust_s3_read",
"calculate_number_of_cpu",
"get_data_shape",
]
4 changes: 2 additions & 2 deletions src/nwbinspector/utils/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def _cache_data_retrieval_command(
return data[selection]


def _cache_data_selection(data: Union[h5py.Dataset, ArrayLike], selection: Union[slice, Tuple[slice]]) -> np.ndarray:
def cache_data_selection(data: Union[h5py.Dataset, ArrayLike], selection: Union[slice, Tuple[slice]]) -> np.ndarray:
"""Extract the selection lazily from the data object for efficient caching (most beneficial during streaming)."""
if isinstance(data, np.memmap): # np.memmap objects are not hashable - simply return the selection lazily
return data[selection]
Expand Down Expand Up @@ -91,7 +91,7 @@ def is_regular_series(series: np.ndarray, tolerance_decimals: int = 9):
def is_ascending_series(series: Union[h5py.Dataset, ArrayLike], nelems: Optional[int] = None):
"""General purpose function for determining if a series is monotonic increasing."""
if isinstance(series, h5py.Dataset):
data = _cache_data_selection(data=series, selection=slice(nelems))
data = cache_data_selection(data=series, selection=slice(nelems))
else:
data = series[:nelems]

Expand Down

0 comments on commit 2ae24e7

Please sign in to comment.