diff --git a/CHANGELOG.md b/CHANGELOG.md index acf3d174e..f12991684 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Fixed pending deprecations and issues in CI. @rly [#1594](https://github.com/NeurodataWithoutBorders/pynwb/pull/1594) - Added ``NWBHDF5IO.nwb_version`` property to get the NWB version from an NWB HDF5 file @oruebel [#1612](https://github.com/NeurodataWithoutBorders/pynwb/pull/1612) - Updated ``NWBHDF5IO.read`` to check NWB version before read and raise more informative error if an unsupported version is found @oruebel [#1612](https://github.com/NeurodataWithoutBorders/pynwb/pull/1612) +- Added the `driver` keyword argument to the `pynwb.validate` function as well as the corresponding namespace caching. @CodyCBakerPhD [#1588](https://github.com/NeurodataWithoutBorders/pynwb/pull/1588) ### Documentation and tutorial enhancements: - Adjusted [ecephys tutorial](https://pynwb.readthedocs.io/en/stable/tutorials/domain/ecephys.html) to create fake data with proper dimensions @bendichter [#1581](https://github.com/NeurodataWithoutBorders/pynwb/pull/1581) diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index a5a313481..23b3aee6f 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -1,7 +1,7 @@ """Command line tool to Validate an NWB file against a namespace.""" import sys from argparse import ArgumentParser -from typing import Tuple, List, Dict +from typing import Tuple, List, Dict, Optional from hdmf.spec import NamespaceCatalog from hdmf.build import BuildManager @@ -29,7 +29,9 @@ def _validate_helper(io: HDMFIO, namespace: str = CORE_NAMESPACE) -> list: return validator.validate(builder) -def _get_cached_namespaces_to_validate(path: str) -> Tuple[List[str], BuildManager, Dict[str, str]]: +def _get_cached_namespaces_to_validate( + path: str, driver: Optional[str] = None +) -> Tuple[List[str], BuildManager, Dict[str, str]]: """ Determine the most specific namespace(s) that are cached in the given NWBFile that can be used for validation. @@ -56,7 +58,7 @@ def _get_cached_namespaces_to_validate(path: str) -> Tuple[List[str], BuildManag catalog = NamespaceCatalog( group_spec_cls=NWBGroupSpec, dataset_spec_cls=NWBDatasetSpec, spec_namespace_cls=NWBNamespace ) - namespace_dependencies = NWBHDF5IO.load_namespaces(namespace_catalog=catalog, path=path) + namespace_dependencies = NWBHDF5IO.load_namespaces(namespace_catalog=catalog, path=path, driver=driver) # Determine which namespaces are the most specific (i.e. extensions) and validate against those candidate_namespaces = set(namespace_dependencies.keys()) @@ -107,6 +109,12 @@ def _get_cached_namespaces_to_validate(path: str) -> Tuple[List[str], BuildManag "doc": "Whether or not to print messages to stdout.", "default": False, }, + { + "name": "driver", + "type": str, + "doc": "Driver for h5py to use when opening the HDF5 file.", + "default": None, + }, returns="Validation errors in the file.", rtype=(list, (list, bool)), is_method=False, @@ -115,8 +123,8 @@ def validate(**kwargs): """Validate NWB file(s) against a namespace or its cached namespaces.""" from . import NWBHDF5IO # TODO: modularize to avoid circular import - io, paths, use_cached_namespaces, namespace, verbose = getargs( - "io", "paths", "use_cached_namespaces", "namespace", "verbose", kwargs + io, paths, use_cached_namespaces, namespace, verbose, driver = getargs( + "io", "paths", "use_cached_namespaces", "namespace", "verbose", "driver", kwargs ) assert io != paths, "Both 'io' and 'paths' were specified! Please choose only one." @@ -129,10 +137,12 @@ def validate(**kwargs): for path in paths: namespaces_to_validate = [] namespace_message = "PyNWB namespace information" - io_kwargs = dict(path=path, mode="r") + io_kwargs = dict(path=path, mode="r", driver=driver) if use_cached_namespaces: - cached_namespaces, manager, namespace_dependencies = _get_cached_namespaces_to_validate(path=path) + cached_namespaces, manager, namespace_dependencies = _get_cached_namespaces_to_validate( + path=path, driver=driver + ) io_kwargs.update(manager=manager) if any(cached_namespaces): diff --git a/tests/integration/ros3/test_ros3.py b/tests/integration/ros3/test_ros3.py index 1289f19b7..c2f7b562d 100644 --- a/tests/integration/ros3/test_ros3.py +++ b/tests/integration/ros3/test_ros3.py @@ -1,4 +1,6 @@ from pynwb import NWBHDF5IO +from pynwb import validate +from pynwb.validate import _get_cached_namespaces_to_validate from pynwb.testing import TestCase import urllib.request import h5py @@ -10,6 +12,11 @@ class TestRos3Streaming(TestCase): This test module requires h5py to be built with the ROS3 driver: conda install -c conda-forge h5py """ + @classmethod + def setUpClass(cls): + # this is the NWB Test Data dandiset #000126 sub-1/sub-1.nwb + cls.s3_test_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + def setUp(self): # Skip ROS3 tests if internet is not available or the ROS3 driver is not installed try: @@ -27,10 +34,53 @@ def test_read(self): self.assertEqual(len(test_data), 3) def test_dandi_read(self): - # this is the NWB Test Data dandiset #000126 sub-1/sub-1.nwb - s3_path = 'https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991' - - with NWBHDF5IO(s3_path, mode='r', driver='ros3') as io: + with NWBHDF5IO(path=self.s3_test_path, mode='r', driver='ros3') as io: nwbfile = io.read() test_data = nwbfile.acquisition['TestData'].data[:] self.assertEqual(len(test_data), 3) + + def test_dandi_get_cached_namespaces(self): + expected_namespaces = ["core"] + expected_namespace_dependencies = { + 'core': { + 'hdmf-common': ( + 'AlignedDynamicTable', + 'CSRMatrix', + 'Container', + 'Data', + 'DynamicTable', + 'DynamicTableRegion', + 'ElementIdentifiers', + 'SimpleMultiContainer', + 'VectorData', + 'VectorIndex' + ) + }, + 'hdmf-common': {}, + 'hdmf-experimental': { + 'hdmf-common': ( + 'AlignedDynamicTable', + 'CSRMatrix', + 'Container', + 'Data', + 'DynamicTable', + 'DynamicTableRegion', + 'ElementIdentifiers', + 'SimpleMultiContainer', + 'VectorData', + 'VectorIndex' + ) + } + } + found_namespaces, _, found_namespace_dependencies = _get_cached_namespaces_to_validate( + path=self.s3_test_path, driver="ros3" + ) + + self.assertCountEqual(first=found_namespaces, second=expected_namespaces) + self.assertDictEqual(d1=expected_namespace_dependencies, d2=expected_namespace_dependencies) + + def test_dandi_validate(self): + result, status = validate(paths=[self.s3_test_path], driver="ros3") + + assert result == [] + assert status == 0