Skip to content

Commit

Permalink
Use cached namespaces during validation (#531)
Browse files Browse the repository at this point in the history
* update pynwb validation to use cached namespaces

* add zarr check when searching for nwbfiles

* add dir test, fix parallel test indent

* update zarr dir detection

* update CHANGELOG.md

---------

Co-authored-by: Ryan Ly <[email protected]>
  • Loading branch information
stephprince and rly authored Nov 20, 2024
1 parent c26924b commit 3aa3dab
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 68 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

### Fixes
* Fixed incorrect error message for OptogeneticStimulusSite. [#524](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/524)
* Fixed detection of Zarr directories for inspection. [#531](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/531)


# v0.5.2
Expand Down
11 changes: 7 additions & 4 deletions src/nwbinspector/_nwb_inspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
from warnings import filterwarnings, warn

import pynwb
from hdmf_zarr import ZarrIO
from natsort import natsorted
from tqdm import tqdm

from ._configuration import configure_checks
from ._registration import Importance, InspectorMessage, available_checks
from .tools._read_nwbfile import read_nwbfile, read_nwbfile_and_io
from .tools._read_nwbfile import read_nwbfile
from .utils import (
OptionalListOfStrings,
PathType,
Expand Down Expand Up @@ -126,7 +127,9 @@ def inspect_all(
if progress_bar_options is None:
progress_bar_options = dict(position=0, leave=False)

if in_path.is_dir():
if in_path.is_dir() and (in_path.match("*.nwb*")) and ZarrIO.can_read(in_path):
nwbfiles = [in_path] # if it is a zarr directory
elif in_path.is_dir():
nwbfiles = list(in_path.rglob("*.nwb*"))

# Remove any macOS sidecar files
Expand Down Expand Up @@ -271,10 +274,10 @@ def inspect_nwbfile(
filterwarnings(action="ignore", message="Ignoring cached namespace .*")

try:
in_memory_nwbfile, io = read_nwbfile_and_io(nwbfile_path=nwbfile_path)
in_memory_nwbfile = read_nwbfile(nwbfile_path=nwbfile_path)

if not skip_validate:
validation_errors = pynwb.validate(io=io)
validation_errors, _ = pynwb.validate(paths=[nwbfile_path])
for validation_error in validation_errors:
yield InspectorMessage(
message=validation_error.reason,
Expand Down
140 changes: 76 additions & 64 deletions tests/test_inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,74 +268,86 @@ def test_inspect_all(self):
]
self.assertCountEqual(first=test_results, second=true_results)

def test_inspect_all_parallel(self):
test_results = list(
inspect_all(path=Path(self.nwbfile_paths[0]).parent, select=[x.__name__ for x in self.checks], n_jobs=2)
def test_inspect_all_parallel(self):
test_results = list(
inspect_all(
path=Path(self.nwbfile_paths[0]).parent,
select=[x.__name__ for x in self.checks],
n_jobs=2,
skip_validate=self.skip_validate,
)
true_results = [
InspectorMessage(
message="data is not compressed. Consider enabling compression when writing a dataset.",
importance=Importance.BEST_PRACTICE_SUGGESTION,
severity=Severity.LOW,
check_function_name="check_small_dataset_compression",
object_type="TimeSeries",
object_name="test_time_series_1",
location="/acquisition/test_time_series_1",
file_path=self.nwbfile_paths[0],
),
InspectorMessage(
message=(
"TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 "
"and rate=0.5 instead of timestamps."
),
importance=Importance.BEST_PRACTICE_VIOLATION,
severity=Severity.LOW,
check_function_name="check_regular_timestamps",
object_type="TimeSeries",
object_name="test_time_series_2",
location="/acquisition/test_time_series_2",
file_path=self.nwbfile_paths[0],
),
InspectorMessage(
message=(
"Data may be in the wrong orientation. Time should be in the first dimension, and is usually "
"the longest dimension. Here, another dimension is longer."
),
importance=Importance.CRITICAL,
severity=Severity.LOW,
check_function_name="check_data_orientation",
object_type="SpatialSeries",
object_name="my_spatial_series",
location="/processing/behavior/Position/my_spatial_series",
file_path=self.nwbfile_paths[0],
)
true_results = [
InspectorMessage(
message="data is not compressed. Consider enabling compression when writing a dataset.",
importance=Importance.BEST_PRACTICE_SUGGESTION,
severity=Severity.LOW,
check_function_name="check_small_dataset_compression",
object_type="TimeSeries",
object_name="test_time_series_1",
location="/acquisition/test_time_series_1",
file_path=self.nwbfile_paths[0],
),
InspectorMessage(
message=(
"TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 "
"and rate=0.5 instead of timestamps."
),
InspectorMessage(
message=(
"The length of the first dimension of data (4) does not match the length of timestamps (3)."
),
importance=Importance.CRITICAL,
severity=Severity.LOW,
check_function_name="check_timestamps_match_first_dimension",
object_type="TimeSeries",
object_name="test_time_series_3",
location="/acquisition/test_time_series_3",
file_path=self.nwbfile_paths[0],
importance=Importance.BEST_PRACTICE_VIOLATION,
severity=Severity.LOW,
check_function_name="check_regular_timestamps",
object_type="TimeSeries",
object_name="test_time_series_2",
location="/acquisition/test_time_series_2",
file_path=self.nwbfile_paths[0],
),
InspectorMessage(
message=(
"Data may be in the wrong orientation. Time should be in the first dimension, and is usually "
"the longest dimension. Here, another dimension is longer."
),
InspectorMessage(
message=(
"TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 "
"and rate=0.5 instead of timestamps."
),
importance=Importance.BEST_PRACTICE_VIOLATION,
severity=Severity.LOW,
check_function_name="check_regular_timestamps",
object_type="TimeSeries",
object_name="test_time_series_2",
location="/acquisition/test_time_series_2",
file_path=self.nwbfile_paths[1],
importance=Importance.CRITICAL,
severity=Severity.LOW,
check_function_name="check_data_orientation",
object_type="SpatialSeries",
object_name="my_spatial_series",
location="/processing/behavior/Position/my_spatial_series",
file_path=self.nwbfile_paths[0],
),
InspectorMessage(
message=("The length of the first dimension of data (4) does not match the length of timestamps (3)."),
importance=Importance.CRITICAL,
severity=Severity.LOW,
check_function_name="check_timestamps_match_first_dimension",
object_type="TimeSeries",
object_name="test_time_series_3",
location="/acquisition/test_time_series_3",
file_path=self.nwbfile_paths[0],
),
InspectorMessage(
message=(
"TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 "
"and rate=0.5 instead of timestamps."
),
]
self.assertCountEqual(first=test_results, second=true_results)
importance=Importance.BEST_PRACTICE_VIOLATION,
severity=Severity.LOW,
check_function_name="check_regular_timestamps",
object_type="TimeSeries",
object_name="test_time_series_2",
location="/acquisition/test_time_series_2",
file_path=self.nwbfile_paths[1],
),
]
self.assertCountEqual(first=test_results, second=true_results)

def test_inspect_all_directory(self):
"""Test that inspect_all will find the file when given a valid path (in the case of Zarr, this path may be a directory)."""
test_results = list(
inspect_all(
path=self.nwbfile_paths[0], select=[x.__name__ for x in self.checks], skip_validate=self.skip_validate
)
)
self.assertGreater(len(test_results), 0)

def test_inspect_nwbfile(self):
test_results = list(
Expand Down

0 comments on commit 3aa3dab

Please sign in to comment.