wip

NeurodataWithoutBorders · Aug 23, 2024 · 233661e · 233661e
1 parent 1009eea
commit 233661e
Show file tree

Hide file tree

Showing 4 changed files with 154 additions and 326 deletions.
diff --git a/setup.py b/setup.py
@@ -37,8 +37,8 @@
     entry_points={
         "console_scripts": [
             "nwbinspector=nwbinspector._nwb_inspection_cli:_inspect_all_cli",
-            "inspect_dandiset=nwbinspector._inspect_dandiset_cli:_inspect_dandiset_cli",
-            "inspect_dandi_file_path=nwbinspector._inspect_dandi_file_path_cli:_inspect_dandi_file_path_cli",
+            "inspect_dandiset=nwbinspector._dandi_inspection_cli:_inspect_dandiset_cli",
+            "inspect_dandi_file_path=nwbinspector._dandi_inspection_cli:_inspect_dandi_file_path_cli",
         ],
     },
     license="BSD-3-Clause",

diff --git a/src/nwbinspector/_dandi_inspection.py b/src/nwbinspector/_dandi_inspection.py
@@ -16,6 +16,8 @@ def inspect_dandiset(
     ignore: Union[List[str], None] = None,
     select: Union[List[str], None] = None,
     importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
+    skip_validate: bool = False,
+    show_progress_bar: bool = True,
     client: Union["dandi.dandiapi.DandiAPIClient", None] = None,
 ) -> Iterable[InspectorMessage]:
     """
@@ -47,6 +49,11 @@ def inspect_dandiset(
                 - improvable data representation
 
         The default is the lowest level, BEST_PRACTICE_SUGGESTION.
+    skip_validate : bool, default: False
+        Skip the PyNWB validation step.
+        This may be desired for older NWBFiles (< schema version v2.10).
+    show_progress_bar : bool, optional
+        Whether to display a progress bar while scanning the assets on the Dandiset.
     client: dandi.dandiapi.DandiAPIClient
         The client object can be passed to avoid re-instantiation over an iteration.
     """
@@ -63,11 +70,19 @@ def inspect_dandiset(
     ):
         yield iter([])
 
-    for asset in dandiset.get_assets():
-        if ".nwb" not in pathlib.Path(asset.path).suffixes:
-            continue
+    nwb_assets = [asset for asset in dandiset.get_assets() if ".nwb" in pathlib.Path(asset.path).suffixes]
 
+    nwb_assets_iterator = nwb_assets
+    if show_progress_bar is True:
+        import tqdm
+
+        nwb_assets_iterator = tqdm.tqdm(
+            iterable=nwb_assets, total=len(nwb_assets), desc="Inspecting NWB files", unit="file", position=0, leave=True
+        )
+
+    for asset in nwb_assets_iterator:
         dandi_s3_url = asset.get_content_url(follow_redirects=1, strip_query=True)
+
         yield _insect_dandi_s3_nwb(
             dandi_s3_url=dandi_s3_url,
             dandiset_id=dandiset_id,
@@ -76,6 +91,7 @@ def inspect_dandiset(
             ignore=ignore,
             select=select,
             importance_threshold=importance_threshold,
+            skip_validate=skip_validate,
             client=client,
         )
 
@@ -153,12 +169,26 @@ def _insect_dandi_s3_nwb(
     ignore: Union[List[str], None] = None,
     select: Union[List[str], None] = None,
     importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
+    skip_validate: bool = False,
 ) -> Iterable[InspectorMessage]:
     import remfile
 
     byte_stream = remfile.File(url=dandi_s3_url)
     file = h5py.File(name=byte_stream)
     io = pynwb.NWBHDF5IO(file=file)
+
+    if skip_validate is False:
+        validation_errors = pynwb.validate(io=io)
+
+        for validation_error in validation_errors:
+            yield InspectorMessage(
+                message=validation_error.reason,
+                importance=Importance.PYNWB_VALIDATION,
+                check_function_name=validation_error.name,
+                location=validation_error.location,
+                file_path=nwbfile_path,
+            )
+
     nwbfile = io.read()
 
     yield inspect_nwbfile_object(

diff --git a/...inspector/_inspect_dandi_file_path_cli.py → src/nwbinspector/_dandi_inspection_cli.py b/...inspector/_inspect_dandi_file_path_cli.py → src/nwbinspector/_dandi_inspection_cli.py
@@ -4,131 +4,185 @@
 import re
 import json
 from pathlib import Path
-from typing import Optional
+from typing import Optional, Union
 from warnings import warn
+import importlib
 
 import click
 
-from ._formatting import _get_report_header
-from . import Importance, inspect_all, format_messages, print_to_console, save_report
+from ._formatting import _get_report_header, format_messages, print_to_console, save_report
+from ._types import Importance
 from .utils import strtobool
 from ._dandi_inspection import inspect_dandiset, inspect_dandi_file_path
 
 
 @click.command("--inspect_dandiset")
 @click.option(
-    "--dandiset_id",
+    "--dandiset",
     help="The six-digit ID of the Dandiset to inspect.",
-    required=True,
     type=str,
+    required=True,
+    default=None,
 )
 @click.option(
-    "--report-file-path",
+    "---version",
+    help="The version of the Dandiset to inspect.",
+    type=str,
+    required=False,
     default=None,
-    help="Save path for the report file.",
+)
+@click.option(
+    "--path",
+    help="File path specifying where to save the report for future reference.",
     type=click.Path(writable=True),
+    required=False,
+    default=None,
 )
-@click.option("--levels", help="Comma-separated names of InspectorMessage attributes to organize by.")
 @click.option(
-    "--reverse", help="Comma-separated booleans corresponding to reversing the order for each value of 'levels'."
+    "--JSON",
+    help="Save the report as a JSON file at the specified path. Otherwise, the classic text structure is used.",
+    is_flag=True,
+    required=False,
+    default=None,
+)
+@click.option(
+    "--levels",
+    help="Comma-separated names of InspectorMessage attributes to organize by.",
+    type=str,
+    required=False,
+    default=None,
 )
-@click.option("--overwrite", help="Overwrite an existing report file at the location.", is_flag=True)
-@click.option("--ignore", help="Comma-separated names of checks to skip.")
-@click.option("--select", help="Comma-separated names of checks to run.")
 @click.option(
-    "--importance_threshold",
+    "--reverse",
+    help="Comma-separated booleans corresponding to reversing the order for each value of 'levels'.",
+    type=str,
+    required=False,
+    default=None,
+)
+@click.option(
+    "--ignore",
+    help="Comma-separated names of checks to skip.",
+    type=str,
+    required=False,
+    default=None,
+)
+@click.option(
+    "--select",
+    help="Comma-separated names of checks to run.",
+    type=str,
+    required=False,
+    default=None,
+)
+@click.option(
+    "--importance",
     help="Ignores tests with an assigned importance below this threshold.",
     type=click.Choice(["CRITICAL", "BEST_PRACTICE_VIOLATION", "BEST_PRACTICE_SUGGESTION"]),
+    required=False,
     default="BEST_PRACTICE_SUGGESTION",
 )
-@click.option("--config", help="Name of config or path of config .yaml file that overwrites importance of checks.")
-@click.option("--json-file-path", help="Write JSON output to this location.")
-@click.option("--n-jobs", help="Number of jobs to use in parallel.", default=1)
 @click.option(
-    "--skip-validate",
+    "--validate",
     help="Skip the PyNWB validation step.",
     is_flag=True,
     required=False,
-    default=False,
+    default=True,
 )
 @click.option(
     "--detailed",
     help=(
-        "If file_path is the last of 'levels' (the default), identical checks will be aggregated in the display. "
+        "If file_path is the last of 'levels' (the default), similar checks will be aggregated in the display. "
         "Use `--detailed` to see the complete report."
     ),
     is_flag=True,
     required=False,
     default=False,
 )
 @click.option(
-    "--progress-bar",
-    help="Set this flag to False to disable display of the progress bar.",
-    type=str,
+    "--progress",
+    help="Whether to display a progress bar while scanning the assets on the Dandiset.",
+    is_flag=True,
     required=False,
+    default=True,
 )
 @click.option(
     "--modules",
     help="Modules to import prior to reading the file(s). Necessary for registration of custom checks functions.",
     type=str,
     required=False,
+    default=None,
 )
 def _inspect_dandiset_cli(
-    dandiset_id: str,
-    report_file_path: str = None,
+    *,
+    dandiset: str,
+    version: str,
+    path: str = None,
+    JSON: bool = None,
     levels: str = None,
-    reverse: Optional[str] = None,
-    overwrite: bool = False,
-    ignore: Optional[str] = None,
-    select: Optional[str] = None,
+    reverse: Union[str, None] = None,
+    ignore: Union[str, None] = None,
+    select: Union[str, None] = None,
     threshold: str = "BEST_PRACTICE_SUGGESTION",
-    config: Optional[str] = None,
-    json_file_path: Optional[str] = None,
-    n_jobs: int = 1,
-    skip_validate: bool = False,
+    json_file_path: Union[str, None] = None,
+    validate: bool = True,
     detailed: bool = False,
-    progress_bar: Optional[str] = None,
-    modules: Optional[str] = None,
-):
-    """Run the NWBInspector via the command line."""
-    levels = ["importance", "file_path"] if levels is None else levels.split(",")
-    reverse = [False] * len(levels) if reverse is None else [strtobool(x) for x in reverse.split(",")]
-    progress_bar = strtobool(progress_bar) if progress_bar is not None else True
-    modules = [] if modules is None else modules.split(",")
+    progress: bool = None,
+    modules: Union[str, None] = None,
+) -> None:
+    """
+    Run the `inspect_dandiset` method via the command line.
 
-    if config is not None:
-        config = load_config(filepath_or_keyword=config)
+    Shortens a few argument names for the CLI.
+
+    Always enforces the DANDI config.
+    """
+    # Match one-word CLI arguments to the API for readability (except for comma splits)
+    # Also parse items from string-based inputs into their correct Python types
+    dandiset_id = dandiset
+    dandiset_version = version
+    report_file_path = pathlib.Path(path) if path is not None else None
+    as_json_format = JSON
+    split_levels = levels.split(",") if levels is not None else ["importance", "file_path"]
+    split_reverse = [strtobool(x) for x in reverse.split(",")] if reverse is not None else [False] * len(levels)
+    split_ignore = ignore.split(",") if ignore is not None else None
+    split_select = select.split(",") if select is not None else None
+    importance_threshold = Importance[threshold]
+    show_progress_bar = progress
+    split_modules = modules.split(",") if modules is not None else []
+
+    for module_name in split_modules:
+        importlib.import_module(name=module_name)
 
     messages = list(
         inspect_dandiset(
             dandiset_id=dandiset_id,
-            modules=modules,
-            ignore=ignore if ignore is None else ignore.split(","),
-            select=select if select is None else select.split(","),
-            importance_threshold=Importance[threshold],
-            config=config,
-            n_jobs=n_jobs,
-            skip_validate=skip_validate,
-            progress_bar=progress_bar,
-            stream=stream,
-            version_id=version_id,
+            dandiset_version=dandiset_version,
+            ignore=split_ignore,
+            select=split_select,
+            importance_threshold=importance_threshold,
+            skip_validate=not validate,
+            show_progress_bar=show_progress_bar,
         )
     )
 
-    if json_file_path is not None:
-        if Path(json_file_path).exists() and not overwrite:
-            raise FileExistsError(f"The file {json_file_path} already exists! Specify the '-o' flag to overwrite.")
-        with open(file=json_file_path, mode="w") as fp:
+    formatted_messages = format_messages(
+        messages=messages, levels=split_levels, reverse=split_reverse, detailed=detailed
+    )
+    print_to_console(formatted_messages=formatted_messages)
+
+    if path is None:
+        return None
+
+    if as_json_format is True:
+        with open(file=path, mode="w") as fp:
             json_report = dict(header=_get_report_header(), messages=messages)
             json.dump(obj=json_report, fp=fp, cls=InspectorOutputJSONEncoder)
             print(f"{os.linesep*2}Report saved to {str(Path(json_file_path).absolute())}!{os.linesep}")
-
-    formatted_messages = format_messages(messages=messages, levels=levels, reverse=reverse, detailed=detailed)
-    print_to_console(formatted_messages=formatted_messages)
-    if report_file_path is not None:
-        save_report(report_file_path=report_file_path, formatted_messages=formatted_messages, overwrite=overwrite)
+    else:
+        save_report(report_file_path=path, formatted_messages=formatted_messages, overwrite=True)
         print(f"{os.linesep*2}Report saved to {str(Path(report_file_path).absolute())}!{os.linesep}")
 
+    return None
+
 
 @click.command("--inspect_dandi_file_path")
 @click.option(
@@ -186,6 +240,7 @@ def _inspect_dandiset_cli(
     ),
 )
 def _inspect_dandi_file_path_cli(
+    *,
     path: str,
     modules: Optional[str] = None,
     report_file_path: str = None,
@@ -203,7 +258,7 @@ def _inspect_dandi_file_path_cli(
     progress_bar: Optional[str] = None,
     stream: bool = False,
     version_id: Optional[str] = None,
-):
+) -> None:
     """
     Run the NWBInspector via the command line.
 
@@ -257,3 +312,5 @@ def _inspect_dandi_file_path_cli(
     if report_file_path is not None:
         save_report(report_file_path=report_file_path, formatted_messages=formatted_messages, overwrite=overwrite)
         print(f"{os.linesep*2}Report saved to {str(Path(report_file_path).absolute())}!{os.linesep}")
+
+    return None