Skip to content

Commit

Permalink
initial sketch
Browse files Browse the repository at this point in the history
  • Loading branch information
CodyCBakerPhD committed Aug 23, 2024
1 parent 916ae92 commit f33f768
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 5 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
include_package_data=True, # Includes files described in MANIFEST.in in the installation
install_requires=install_requires,
# zarr<2.18.0 because of https://github.com/NeurodataWithoutBorders/nwbinspector/pull/460
extras_require=dict(dandi=["dandi>=0.39.2", "zarr<2.18.0"], zarr=["hdmf_zarr>=0.3.0", "zarr<2.18.0"]),
extras_require=dict(dandi=["dandi>=0.39.2", "zarr<2.18.0", "remfile"], zarr=["hdmf_zarr>=0.3.0", "zarr<2.18.0"]),
entry_points={"console_scripts": ["nwbinspector=nwbinspector._inspection_cli:_inspect_all_cli"]},
license="BSD-3-Clause",
classifiers=[
Expand Down
7 changes: 5 additions & 2 deletions src/nwbinspector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
from ._registration import available_checks, register_check
from ._types import Importance, Severity, InspectorMessage
from ._configuration import load_config, validate_config, configure_checks
from ._inspection import (
from ._nwb_inspection import (
inspect_all,
inspect_nwbfile,
inspect_nwbfile_object,
run_checks,
)
from ._inspection import inspect_nwb # TODO: remove after 7/1/2023
from ._nwb_inspection import inspect_nwb # TODO: remove after 7/1/2023
from ._formatting import (
format_messages,
print_to_console,
Expand All @@ -18,6 +18,7 @@
InspectorOutputJSONEncoder,
)
from ._organization import organize_messages
from ._dandi_inspection import inspect_dandiset, inspect_dandi_file_path
from .checks import * # These need to be imported to trigger registration with 'available_checks', but are not exposed

default_check_registry = {check.__name__: check for check in available_checks}
Expand All @@ -32,6 +33,8 @@
"load_config",
"configure_checks",
"InspectorOutputJSONEncoder",
"inspect_dandiset",
"inspect_dandi_file_path",
"inspect_all",
"inspect_nwbfile",
"inspect_nwbfile_object",
Expand Down
171 changes: 171 additions & 0 deletions src/nwbinspector/_dandi_inspection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import pathlib
from typing import Union, Iterable, Literal

import h5py
import pynwb

from ._types import InspectorMessage, Importance
from ._nwb_inspection import inspect_nwbfile_object


def inspect_dandiset(
*,
dandiset_id: str,
dandiset_version: Union[str, Literal["draft"], None] = None,
checks: Optional[list] = None,
ignore: Optional[List[str]] = None,
select: Optional[List[str]] = None,
importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
client: Union["dandi.dandiapi.DandiAPIClient", None] = None,
) -> Iterable[InspectorMessage]:
"""
Inspect a Dandiset for common issues.
Parameters
----------
dandiset_id : six-digit string, "draft", or None
The six-digit ID of the Dandiset.
dandiset_version : string
The specific published version of the Dandiset to inspect.
If None, the latest version is used.
If there are no published versions, then 'draft' is used instead.
checks : list, optional
list of checks to run
ignore: list, optional
Names of functions to skip.
select: list, optional
Names of functions to pick out of available checks.
importance_threshold : string or Importance, optional
Ignores tests with an assigned importance below this threshold.
Importance has three levels:
CRITICAL
- potentially incorrect data
BEST_PRACTICE_VIOLATION
- very suboptimal data representation
BEST_PRACTICE_SUGGESTION
- improvable data representation
The default is the lowest level, BEST_PRACTICE_SUGGESTION.
client: dandi.dandiapi.DandiAPIClient
The client object can be passed to avoid re-instantiation over an iteration.
"""
if client is None:
import dandi.dandiapi

client = dandi.dandiapi.DandiAPIClient()

dandiset = client.get_dandiset(dandiset_id=dandiset_id, version_id=dandiset_version)

if not any(
asset_type.get("identifier", "") != "RRID:SCR_015242" # Identifier for NWB standard
for asset_type in dandiset.get_raw_metadata().get("assetsSummary", {}).get("dataStandard", [])
):
yield iter([])

for asset in dandiset.get_assets():
if ".nwb" not in pathlib.Path(asset.path).suffixes:
continue

dandi_s3_url = asset.get_content_url(follow_redirects=1, strip_query=True)
yield _insect_dandi_s3_nwb(
dandi_s3_url=dandi_s3_url,
dandiset_id=dandiset_id,
dandiset_version=dandiset_version,
checks=checks,
ignore=ignore,
select=select,
importance_threshold=importance_threshold,
client=client,
)

pass


def inspect_dandi_file_path(
*,
dandi_file_path: str,
dandiset_id: str,
dandiset_version: Union[str, Literal["draft"], None] = None,
checks: Optional[list] = None,
ignore: Optional[List[str]] = None,
select: Optional[List[str]] = None,
importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
client: Union["dandi.dandiapi.DandiAPIClient", None] = None,
) -> Iterable[InspectorMessage]:
"""
Inspect a Dandifile for common issues.
Parameters
----------
dandi_file_path : string
The path to the Dandifile as seen on the archive; e.g., 'sub-123_ses-456+ecephys.nwb'.
dandiset_id : six-digit string, "draft", or None
The six-digit ID of the Dandiset.
dandiset_version : string
The specific published version of the Dandiset to inspect.
If None, the latest version is used.
If there are no published versions, then 'draft' is used instead.
checks : list, optional
list of checks to run
ignore: list, optional
Names of functions to skip.
select: list, optional
Names of functions to pick out of available checks.
importance_threshold : string or Importance, optional
Ignores tests with an assigned importance below this threshold.
Importance has three levels:
CRITICAL
- potentially incorrect data
BEST_PRACTICE_VIOLATION
- very suboptimal data representation
BEST_PRACTICE_SUGGESTION
- improvable data representation
The default is the lowest level, BEST_PRACTICE_SUGGESTION.
client: dandi.dandiapi.DandiAPIClient
The client object can be passed to avoid re-instantiation over an iteration.
"""
if client is None:
import dandi.dandiapi

client = dandi.dandiapi.DandiAPIClient()

dandiset = client.get_dandiset(dandiset_id=dandiset_id, version_id=dandiset_version)
asset = dandiset.get_asset_by_path(path=dandi_file_path)
dandi_s3_url = asset.get_content_url(follow_redirects=1, strip_query=True)

yield _insect_dandi_s3_nwb(
dandi_s3_url=dandi_s3_url,
checks=checks,
ignore=ignore,
select=select,
importance_threshold=importance_threshold,
client=client,
)


def _insect_dandi_s3_nwb(
*,
dandi_s3_url: str,
checks: Union[list, None] = None,
ignore: Union[List[str], None] = None,
select: Union[List[str], None] = None,
importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
) -> Iterable[InspectorMessage]:
import remfile

byte_stream = remfile.File(url=dandi_s3_url)
file = h5py.File(name=byte_stream)
io = pynwb.NWBHDF5IO(file=file)
nwbfile = io.read()

yield inspect_nwbfile_object(
nwbfile_object=nwbfile,
checks=checks,
config="dandi",
ignore=ignore,
select=select,
importance_threshold=importance_threshold,
)
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ def inspect_nwbfile_object(
ignore: Optional[List[str]] = None,
select: Optional[List[str]] = None,
importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
) -> List[InspectorMessage]:
) -> Iterable[InspectorMessage]:
"""
Inspect an in-memory NWBFile object and return suggestions for improvements according to best practices.
Expand Down
2 changes: 1 addition & 1 deletion src/nwbinspector/nwbinspector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
load_config,
configure_checks,
)
from .._inspection import (
from .._nwb_inspection import (
inspect_all,
inspect_nwb, # TODO: remove
inspect_nwbfile,
Expand Down

0 comments on commit f33f768

Please sign in to comment.