From f33f768c48a5bb8a36bdd34509b74031154ad40f Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Fri, 23 Aug 2024 15:12:35 -0400 Subject: [PATCH] initial sketch --- setup.py | 2 +- src/nwbinspector/__init__.py | 7 +- src/nwbinspector/_dandi_inspection.py | 171 ++++++++++++++++++ .../{_inspection.py => _nwb_inspection.py} | 2 +- src/nwbinspector/nwbinspector/__init__.py | 2 +- 5 files changed, 179 insertions(+), 5 deletions(-) create mode 100644 src/nwbinspector/_dandi_inspection.py rename src/nwbinspector/{_inspection.py => _nwb_inspection.py} (99%) diff --git a/setup.py b/setup.py index b6b741704..e597b7e1f 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ include_package_data=True, # Includes files described in MANIFEST.in in the installation install_requires=install_requires, # zarr<2.18.0 because of https://github.com/NeurodataWithoutBorders/nwbinspector/pull/460 - extras_require=dict(dandi=["dandi>=0.39.2", "zarr<2.18.0"], zarr=["hdmf_zarr>=0.3.0", "zarr<2.18.0"]), + extras_require=dict(dandi=["dandi>=0.39.2", "zarr<2.18.0", "remfile"], zarr=["hdmf_zarr>=0.3.0", "zarr<2.18.0"]), entry_points={"console_scripts": ["nwbinspector=nwbinspector._inspection_cli:_inspect_all_cli"]}, license="BSD-3-Clause", classifiers=[ diff --git a/src/nwbinspector/__init__.py b/src/nwbinspector/__init__.py index c8c2e181f..83f1aaf31 100644 --- a/src/nwbinspector/__init__.py +++ b/src/nwbinspector/__init__.py @@ -2,13 +2,13 @@ from ._registration import available_checks, register_check from ._types import Importance, Severity, InspectorMessage from ._configuration import load_config, validate_config, configure_checks -from ._inspection import ( +from ._nwb_inspection import ( inspect_all, inspect_nwbfile, inspect_nwbfile_object, run_checks, ) -from ._inspection import inspect_nwb # TODO: remove after 7/1/2023 +from ._nwb_inspection import inspect_nwb # TODO: remove after 7/1/2023 from ._formatting import ( format_messages, print_to_console, @@ -18,6 +18,7 @@ InspectorOutputJSONEncoder, ) from ._organization import organize_messages +from ._dandi_inspection import inspect_dandiset, inspect_dandi_file_path from .checks import * # These need to be imported to trigger registration with 'available_checks', but are not exposed default_check_registry = {check.__name__: check for check in available_checks} @@ -32,6 +33,8 @@ "load_config", "configure_checks", "InspectorOutputJSONEncoder", + "inspect_dandiset", + "inspect_dandi_file_path", "inspect_all", "inspect_nwbfile", "inspect_nwbfile_object", diff --git a/src/nwbinspector/_dandi_inspection.py b/src/nwbinspector/_dandi_inspection.py new file mode 100644 index 000000000..56a80ee13 --- /dev/null +++ b/src/nwbinspector/_dandi_inspection.py @@ -0,0 +1,171 @@ +import pathlib +from typing import Union, Iterable, Literal + +import h5py +import pynwb + +from ._types import InspectorMessage, Importance +from ._nwb_inspection import inspect_nwbfile_object + + +def inspect_dandiset( + *, + dandiset_id: str, + dandiset_version: Union[str, Literal["draft"], None] = None, + checks: Optional[list] = None, + ignore: Optional[List[str]] = None, + select: Optional[List[str]] = None, + importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION, + client: Union["dandi.dandiapi.DandiAPIClient", None] = None, +) -> Iterable[InspectorMessage]: + """ + Inspect a Dandiset for common issues. + + Parameters + ---------- + dandiset_id : six-digit string, "draft", or None + The six-digit ID of the Dandiset. + dandiset_version : string + The specific published version of the Dandiset to inspect. + If None, the latest version is used. + If there are no published versions, then 'draft' is used instead. + checks : list, optional + list of checks to run + ignore: list, optional + Names of functions to skip. + select: list, optional + Names of functions to pick out of available checks. + importance_threshold : string or Importance, optional + Ignores tests with an assigned importance below this threshold. + Importance has three levels: + + CRITICAL + - potentially incorrect data + BEST_PRACTICE_VIOLATION + - very suboptimal data representation + BEST_PRACTICE_SUGGESTION + - improvable data representation + + The default is the lowest level, BEST_PRACTICE_SUGGESTION. + client: dandi.dandiapi.DandiAPIClient + The client object can be passed to avoid re-instantiation over an iteration. + """ + if client is None: + import dandi.dandiapi + + client = dandi.dandiapi.DandiAPIClient() + + dandiset = client.get_dandiset(dandiset_id=dandiset_id, version_id=dandiset_version) + + if not any( + asset_type.get("identifier", "") != "RRID:SCR_015242" # Identifier for NWB standard + for asset_type in dandiset.get_raw_metadata().get("assetsSummary", {}).get("dataStandard", []) + ): + yield iter([]) + + for asset in dandiset.get_assets(): + if ".nwb" not in pathlib.Path(asset.path).suffixes: + continue + + dandi_s3_url = asset.get_content_url(follow_redirects=1, strip_query=True) + yield _insect_dandi_s3_nwb( + dandi_s3_url=dandi_s3_url, + dandiset_id=dandiset_id, + dandiset_version=dandiset_version, + checks=checks, + ignore=ignore, + select=select, + importance_threshold=importance_threshold, + client=client, + ) + + pass + + +def inspect_dandi_file_path( + *, + dandi_file_path: str, + dandiset_id: str, + dandiset_version: Union[str, Literal["draft"], None] = None, + checks: Optional[list] = None, + ignore: Optional[List[str]] = None, + select: Optional[List[str]] = None, + importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION, + client: Union["dandi.dandiapi.DandiAPIClient", None] = None, +) -> Iterable[InspectorMessage]: + """ + Inspect a Dandifile for common issues. + + Parameters + ---------- + dandi_file_path : string + The path to the Dandifile as seen on the archive; e.g., 'sub-123_ses-456+ecephys.nwb'. + dandiset_id : six-digit string, "draft", or None + The six-digit ID of the Dandiset. + dandiset_version : string + The specific published version of the Dandiset to inspect. + If None, the latest version is used. + If there are no published versions, then 'draft' is used instead. + checks : list, optional + list of checks to run + ignore: list, optional + Names of functions to skip. + select: list, optional + Names of functions to pick out of available checks. + importance_threshold : string or Importance, optional + Ignores tests with an assigned importance below this threshold. + Importance has three levels: + + CRITICAL + - potentially incorrect data + BEST_PRACTICE_VIOLATION + - very suboptimal data representation + BEST_PRACTICE_SUGGESTION + - improvable data representation + + The default is the lowest level, BEST_PRACTICE_SUGGESTION. + client: dandi.dandiapi.DandiAPIClient + The client object can be passed to avoid re-instantiation over an iteration. + """ + if client is None: + import dandi.dandiapi + + client = dandi.dandiapi.DandiAPIClient() + + dandiset = client.get_dandiset(dandiset_id=dandiset_id, version_id=dandiset_version) + asset = dandiset.get_asset_by_path(path=dandi_file_path) + dandi_s3_url = asset.get_content_url(follow_redirects=1, strip_query=True) + + yield _insect_dandi_s3_nwb( + dandi_s3_url=dandi_s3_url, + checks=checks, + ignore=ignore, + select=select, + importance_threshold=importance_threshold, + client=client, + ) + + +def _insect_dandi_s3_nwb( + *, + dandi_s3_url: str, + checks: Union[list, None] = None, + ignore: Union[List[str], None] = None, + select: Union[List[str], None] = None, + importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION, +) -> Iterable[InspectorMessage]: + import remfile + + byte_stream = remfile.File(url=dandi_s3_url) + file = h5py.File(name=byte_stream) + io = pynwb.NWBHDF5IO(file=file) + nwbfile = io.read() + + yield inspect_nwbfile_object( + nwbfile_object=nwbfile, + checks=checks, + config="dandi", + ignore=ignore, + select=select, + importance_threshold=importance_threshold, + ) diff --git a/src/nwbinspector/_inspection.py b/src/nwbinspector/_nwb_inspection.py similarity index 99% rename from src/nwbinspector/_inspection.py rename to src/nwbinspector/_nwb_inspection.py index 6cf8b391d..ae4476c1b 100644 --- a/src/nwbinspector/_inspection.py +++ b/src/nwbinspector/_nwb_inspection.py @@ -372,7 +372,7 @@ def inspect_nwbfile_object( ignore: Optional[List[str]] = None, select: Optional[List[str]] = None, importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION, -) -> List[InspectorMessage]: +) -> Iterable[InspectorMessage]: """ Inspect an in-memory NWBFile object and return suggestions for improvements according to best practices. diff --git a/src/nwbinspector/nwbinspector/__init__.py b/src/nwbinspector/nwbinspector/__init__.py index 9530be85a..fe2e1a9f1 100644 --- a/src/nwbinspector/nwbinspector/__init__.py +++ b/src/nwbinspector/nwbinspector/__init__.py @@ -16,7 +16,7 @@ load_config, configure_checks, ) -from .._inspection import ( +from .._nwb_inspection import ( inspect_all, inspect_nwb, # TODO: remove inspect_nwbfile,