Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
CodyCBakerPhD committed Aug 23, 2024
1 parent 1009eea commit 233661e
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 326 deletions.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
entry_points={
"console_scripts": [
"nwbinspector=nwbinspector._nwb_inspection_cli:_inspect_all_cli",
"inspect_dandiset=nwbinspector._inspect_dandiset_cli:_inspect_dandiset_cli",
"inspect_dandi_file_path=nwbinspector._inspect_dandi_file_path_cli:_inspect_dandi_file_path_cli",
"inspect_dandiset=nwbinspector._dandi_inspection_cli:_inspect_dandiset_cli",
"inspect_dandi_file_path=nwbinspector._dandi_inspection_cli:_inspect_dandi_file_path_cli",
],
},
license="BSD-3-Clause",
Expand Down
36 changes: 33 additions & 3 deletions src/nwbinspector/_dandi_inspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def inspect_dandiset(
ignore: Union[List[str], None] = None,
select: Union[List[str], None] = None,
importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
skip_validate: bool = False,
show_progress_bar: bool = True,
client: Union["dandi.dandiapi.DandiAPIClient", None] = None,
) -> Iterable[InspectorMessage]:
"""
Expand Down Expand Up @@ -47,6 +49,11 @@ def inspect_dandiset(
- improvable data representation
The default is the lowest level, BEST_PRACTICE_SUGGESTION.
skip_validate : bool, default: False
Skip the PyNWB validation step.
This may be desired for older NWBFiles (< schema version v2.10).
show_progress_bar : bool, optional
Whether to display a progress bar while scanning the assets on the Dandiset.
client: dandi.dandiapi.DandiAPIClient
The client object can be passed to avoid re-instantiation over an iteration.
"""
Expand All @@ -63,11 +70,19 @@ def inspect_dandiset(
):
yield iter([])

for asset in dandiset.get_assets():
if ".nwb" not in pathlib.Path(asset.path).suffixes:
continue
nwb_assets = [asset for asset in dandiset.get_assets() if ".nwb" in pathlib.Path(asset.path).suffixes]

nwb_assets_iterator = nwb_assets
if show_progress_bar is True:
import tqdm

nwb_assets_iterator = tqdm.tqdm(
iterable=nwb_assets, total=len(nwb_assets), desc="Inspecting NWB files", unit="file", position=0, leave=True
)

for asset in nwb_assets_iterator:
dandi_s3_url = asset.get_content_url(follow_redirects=1, strip_query=True)

yield _insect_dandi_s3_nwb(
dandi_s3_url=dandi_s3_url,
dandiset_id=dandiset_id,
Expand All @@ -76,6 +91,7 @@ def inspect_dandiset(
ignore=ignore,
select=select,
importance_threshold=importance_threshold,
skip_validate=skip_validate,
client=client,
)

Expand Down Expand Up @@ -153,12 +169,26 @@ def _insect_dandi_s3_nwb(
ignore: Union[List[str], None] = None,
select: Union[List[str], None] = None,
importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
skip_validate: bool = False,
) -> Iterable[InspectorMessage]:
import remfile

byte_stream = remfile.File(url=dandi_s3_url)
file = h5py.File(name=byte_stream)
io = pynwb.NWBHDF5IO(file=file)

if skip_validate is False:
validation_errors = pynwb.validate(io=io)

for validation_error in validation_errors:
yield InspectorMessage(
message=validation_error.reason,
importance=Importance.PYNWB_VALIDATION,
check_function_name=validation_error.name,
location=validation_error.location,
file_path=nwbfile_path,
)

nwbfile = io.read()

yield inspect_nwbfile_object(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,131 +4,185 @@
import re
import json
from pathlib import Path
from typing import Optional
from typing import Optional, Union
from warnings import warn
import importlib

import click

from ._formatting import _get_report_header
from . import Importance, inspect_all, format_messages, print_to_console, save_report
from ._formatting import _get_report_header, format_messages, print_to_console, save_report
from ._types import Importance
from .utils import strtobool
from ._dandi_inspection import inspect_dandiset, inspect_dandi_file_path


@click.command("--inspect_dandiset")
@click.option(
"--dandiset_id",
"--dandiset",
help="The six-digit ID of the Dandiset to inspect.",
required=True,
type=str,
required=True,
default=None,
)
@click.option(
"--report-file-path",
"---version",
help="The version of the Dandiset to inspect.",
type=str,
required=False,
default=None,
help="Save path for the report file.",
)
@click.option(
"--path",
help="File path specifying where to save the report for future reference.",
type=click.Path(writable=True),
required=False,
default=None,
)
@click.option("--levels", help="Comma-separated names of InspectorMessage attributes to organize by.")
@click.option(
"--reverse", help="Comma-separated booleans corresponding to reversing the order for each value of 'levels'."
"--JSON",
help="Save the report as a JSON file at the specified path. Otherwise, the classic text structure is used.",
is_flag=True,
required=False,
default=None,
)
@click.option(
"--levels",
help="Comma-separated names of InspectorMessage attributes to organize by.",
type=str,
required=False,
default=None,
)
@click.option("--overwrite", help="Overwrite an existing report file at the location.", is_flag=True)
@click.option("--ignore", help="Comma-separated names of checks to skip.")
@click.option("--select", help="Comma-separated names of checks to run.")
@click.option(
"--importance_threshold",
"--reverse",
help="Comma-separated booleans corresponding to reversing the order for each value of 'levels'.",
type=str,
required=False,
default=None,
)
@click.option(
"--ignore",
help="Comma-separated names of checks to skip.",
type=str,
required=False,
default=None,
)
@click.option(
"--select",
help="Comma-separated names of checks to run.",
type=str,
required=False,
default=None,
)
@click.option(
"--importance",
help="Ignores tests with an assigned importance below this threshold.",
type=click.Choice(["CRITICAL", "BEST_PRACTICE_VIOLATION", "BEST_PRACTICE_SUGGESTION"]),
required=False,
default="BEST_PRACTICE_SUGGESTION",
)
@click.option("--config", help="Name of config or path of config .yaml file that overwrites importance of checks.")
@click.option("--json-file-path", help="Write JSON output to this location.")
@click.option("--n-jobs", help="Number of jobs to use in parallel.", default=1)
@click.option(
"--skip-validate",
"--validate",
help="Skip the PyNWB validation step.",
is_flag=True,
required=False,
default=False,
default=True,
)
@click.option(
"--detailed",
help=(
"If file_path is the last of 'levels' (the default), identical checks will be aggregated in the display. "
"If file_path is the last of 'levels' (the default), similar checks will be aggregated in the display. "
"Use `--detailed` to see the complete report."
),
is_flag=True,
required=False,
default=False,
)
@click.option(
"--progress-bar",
help="Set this flag to False to disable display of the progress bar.",
type=str,
"--progress",
help="Whether to display a progress bar while scanning the assets on the Dandiset.",
is_flag=True,
required=False,
default=True,
)
@click.option(
"--modules",
help="Modules to import prior to reading the file(s). Necessary for registration of custom checks functions.",
type=str,
required=False,
default=None,
)
def _inspect_dandiset_cli(
dandiset_id: str,
report_file_path: str = None,
*,
dandiset: str,
version: str,
path: str = None,
JSON: bool = None,
levels: str = None,
reverse: Optional[str] = None,
overwrite: bool = False,
ignore: Optional[str] = None,
select: Optional[str] = None,
reverse: Union[str, None] = None,
ignore: Union[str, None] = None,
select: Union[str, None] = None,
threshold: str = "BEST_PRACTICE_SUGGESTION",
config: Optional[str] = None,
json_file_path: Optional[str] = None,
n_jobs: int = 1,
skip_validate: bool = False,
json_file_path: Union[str, None] = None,
validate: bool = True,
detailed: bool = False,
progress_bar: Optional[str] = None,
modules: Optional[str] = None,
):
"""Run the NWBInspector via the command line."""
levels = ["importance", "file_path"] if levels is None else levels.split(",")
reverse = [False] * len(levels) if reverse is None else [strtobool(x) for x in reverse.split(",")]
progress_bar = strtobool(progress_bar) if progress_bar is not None else True
modules = [] if modules is None else modules.split(",")
progress: bool = None,
modules: Union[str, None] = None,
) -> None:
"""
Run the `inspect_dandiset` method via the command line.
if config is not None:
config = load_config(filepath_or_keyword=config)
Shortens a few argument names for the CLI.
Always enforces the DANDI config.
"""
# Match one-word CLI arguments to the API for readability (except for comma splits)
# Also parse items from string-based inputs into their correct Python types
dandiset_id = dandiset
dandiset_version = version
report_file_path = pathlib.Path(path) if path is not None else None
as_json_format = JSON
split_levels = levels.split(",") if levels is not None else ["importance", "file_path"]
split_reverse = [strtobool(x) for x in reverse.split(",")] if reverse is not None else [False] * len(levels)
split_ignore = ignore.split(",") if ignore is not None else None
split_select = select.split(",") if select is not None else None
importance_threshold = Importance[threshold]
show_progress_bar = progress
split_modules = modules.split(",") if modules is not None else []

for module_name in split_modules:
importlib.import_module(name=module_name)

messages = list(
inspect_dandiset(
dandiset_id=dandiset_id,
modules=modules,
ignore=ignore if ignore is None else ignore.split(","),
select=select if select is None else select.split(","),
importance_threshold=Importance[threshold],
config=config,
n_jobs=n_jobs,
skip_validate=skip_validate,
progress_bar=progress_bar,
stream=stream,
version_id=version_id,
dandiset_version=dandiset_version,
ignore=split_ignore,
select=split_select,
importance_threshold=importance_threshold,
skip_validate=not validate,
show_progress_bar=show_progress_bar,
)
)

if json_file_path is not None:
if Path(json_file_path).exists() and not overwrite:
raise FileExistsError(f"The file {json_file_path} already exists! Specify the '-o' flag to overwrite.")
with open(file=json_file_path, mode="w") as fp:
formatted_messages = format_messages(
messages=messages, levels=split_levels, reverse=split_reverse, detailed=detailed
)
print_to_console(formatted_messages=formatted_messages)

if path is None:
return None

if as_json_format is True:
with open(file=path, mode="w") as fp:
json_report = dict(header=_get_report_header(), messages=messages)
json.dump(obj=json_report, fp=fp, cls=InspectorOutputJSONEncoder)
print(f"{os.linesep*2}Report saved to {str(Path(json_file_path).absolute())}!{os.linesep}")

formatted_messages = format_messages(messages=messages, levels=levels, reverse=reverse, detailed=detailed)
print_to_console(formatted_messages=formatted_messages)
if report_file_path is not None:
save_report(report_file_path=report_file_path, formatted_messages=formatted_messages, overwrite=overwrite)
else:
save_report(report_file_path=path, formatted_messages=formatted_messages, overwrite=True)
print(f"{os.linesep*2}Report saved to {str(Path(report_file_path).absolute())}!{os.linesep}")

return None


@click.command("--inspect_dandi_file_path")
@click.option(
Expand Down Expand Up @@ -186,6 +240,7 @@ def _inspect_dandiset_cli(
),
)
def _inspect_dandi_file_path_cli(
*,
path: str,
modules: Optional[str] = None,
report_file_path: str = None,
Expand All @@ -203,7 +258,7 @@ def _inspect_dandi_file_path_cli(
progress_bar: Optional[str] = None,
stream: bool = False,
version_id: Optional[str] = None,
):
) -> None:
"""
Run the NWBInspector via the command line.
Expand Down Expand Up @@ -257,3 +312,5 @@ def _inspect_dandi_file_path_cli(
if report_file_path is not None:
save_report(report_file_path=report_file_path, formatted_messages=formatted_messages, overwrite=overwrite)
print(f"{os.linesep*2}Report saved to {str(Path(report_file_path).absolute())}!{os.linesep}")

return None
Loading

0 comments on commit 233661e

Please sign in to comment.