diff --git a/CHANGELOG.md b/CHANGELOG.md index 24ed9130d..8d2f5b482 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,18 @@ # Upcoming +### Deprecation (API) +* Certain low-level functions have been marked as private (such as the former `nwbinspector.register_checks.auto_parse`) indicating they should not have been imported by downstream users. [#485](https://github.com/NeurodataWithoutBorders/nwbinspector/issues/485) +* Various inappropriate imports from certain submodules have been hard deprecated (e.g., `from nwbinspector.inspector_tools import natsorted`). [#485](https://github.com/NeurodataWithoutBorders/nwbinspector/issues/485) + +### Pending Deprecation (API) +* The `inspector_tools`, `register_check`, and `` submodules have been soft deprecated and will be removed in the next major release. [#485](https://github.com/NeurodataWithoutBorders/nwbinspector/issues/485) + ### Improvements * Update util function `is_ascending_series` to discard nan values and add `check_timestamps_without_nans` fun to check if timestamps contain NaN values [#476](https://github.com/NeurodataWithoutBorders/nwbinspector/issues/476) +* Updated the import structure to match modern Python packaging standards. [#485](https://github.com/NeurodataWithoutBorders/nwbinspector/issues/485) + + # v0.4.37 diff --git a/docs/api/nwbinspector.rst b/docs/api/nwbinspector.rst index 1fb49417c..2d4bdee0f 100644 --- a/docs/api/nwbinspector.rst +++ b/docs/api/nwbinspector.rst @@ -1,4 +1,4 @@ Core Library ============ -.. automodule:: nwbinspector.nwbinspector +.. automodule:: nwbinspector._inspection diff --git a/docs/api/register_check.rst b/docs/api/register_check.rst index ca3fa6271..213f503fa 100644 --- a/docs/api/register_check.rst +++ b/docs/api/register_check.rst @@ -3,4 +3,6 @@ Data Classes and Check Registration All common data class used across the package, as well as the decorator for adding a check function to the registry. -.. automodule:: nwbinspector.register_checks +.. automodule:: nwbinspector._types + +.. automodule:: nwbinspector._registration diff --git a/docs/api/tools.rst b/docs/api/tools.rst index 48028764e..60384deb3 100644 --- a/docs/api/tools.rst +++ b/docs/api/tools.rst @@ -1,5 +1,5 @@ -Tools for Organizating and Displaying Reports -============================================= +Tools for Organizing and Displaying Reports +=========================================== .. automodule:: nwbinspector :members: @@ -9,5 +9,8 @@ Tools for Organizating and Displaying Reports .. toctree:: :maxdepth: 4 -.. automodule:: nwbinspector.inspector_tools +.. automodule:: nwbinspector._organization + :noindex: + +.. automodule:: nwbinspector._formatting :noindex: diff --git a/docs/conf.py b/docs/conf.py index f1605ca2d..998c66e8d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,9 +1,12 @@ +import os import sys from pathlib import Path +from nwbinspector import available_checks +from collections import defaultdict + sys.path.append(str(Path(__file__).parent)) from conf_extlinks import extlinks, intersphinx_mapping -from gen_checks_by_importance import gen_checks_by_importance sys.path.insert(0, Path(__file__).resolve().parents[1]) @@ -73,4 +76,35 @@ def setup(app): app.connect("autodoc-process-docstring", add_refs_to_docstrings) -gen_checks_by_importance() +def _gen_checks_by_importance(): + dd = defaultdict(list) + + for x in available_checks: + dd[x.importance.name.replace("_", " ")].append(x) + + generate_checks_rst_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "checks_by_importance.rst") + with open(generate_checks_rst_file, "w") as f: + f.write( + """Checks by Importance +====================== + +This section lists the available checks organized by their importance level. + +""" + ) + + for importance_level, checks in dd.items(): + f.write( + f"""{importance_level} +{'-' * (len(f'{importance_level}') + 1)} + +""" + ) + + for check in checks: + f.write(f"* :py:func:`~{check.__module__}.{check.__name__}`\n") + + f.write("\n") + + +_gen_checks_by_importance() diff --git a/docs/gen_checks_by_importance.py b/docs/gen_checks_by_importance.py deleted file mode 100644 index d65614999..000000000 --- a/docs/gen_checks_by_importance.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from nwbinspector import available_checks - -from collections import defaultdict - - -def gen_checks_by_importance(): - dd = defaultdict(list) - - for x in available_checks: - dd[x.importance.name.replace("_", " ")].append(x) - - generate_checks_rst_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "checks_by_importance.rst") - with open(generate_checks_rst_file, "w") as f: - f.write( - """Checks by Importance -====================== - -This section lists the available checks organized by their importance level. - -""" - ) - - for importance_level, checks in dd.items(): - f.write( - f"""{importance_level} -{'-' * (len(f'{importance_level}') + 1)} - -""" - ) - - for check in checks: - f.write(f"* :py:func:`~{check.__module__}.{check.__name__}`\n") - - f.write("\n") diff --git a/setup.py b/setup.py index 8619d666a..b6b741704 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ long_description = f.read() with open(root / "requirements.txt") as f: install_requires = f.readlines() -with open(root / "src" / "nwbinspector" / "version.py") as f: +with open(root / "src" / "nwbinspector" / "_version.py") as f: version = f.read() # Instantiate the testing configuration file from the base file `base_test_config.json` @@ -34,7 +34,7 @@ install_requires=install_requires, # zarr<2.18.0 because of https://github.com/NeurodataWithoutBorders/nwbinspector/pull/460 extras_require=dict(dandi=["dandi>=0.39.2", "zarr<2.18.0"], zarr=["hdmf_zarr>=0.3.0", "zarr<2.18.0"]), - entry_points={"console_scripts": ["nwbinspector=nwbinspector.nwbinspector:inspect_all_cli"]}, + entry_points={"console_scripts": ["nwbinspector=nwbinspector._inspection_cli:_inspect_all_cli"]}, license="BSD-3-Clause", classifiers=[ "Development Status :: 4 - Beta", diff --git a/src/nwbinspector/__init__.py b/src/nwbinspector/__init__.py index c73d0f208..682725704 100644 --- a/src/nwbinspector/__init__.py +++ b/src/nwbinspector/__init__.py @@ -1,24 +1,47 @@ -from .version import __version__ -from .register_checks import available_checks, Importance -from .nwbinspector import ( - InspectorOutputJSONEncoder, +from ._version import __version__ +from ._registration import available_checks, register_check +from ._types import Importance, Severity, InspectorMessage +from ._configuration import load_config, validate_config, configure_checks +from ._inspection import ( inspect_all, inspect_nwbfile, inspect_nwbfile_object, run_checks, - load_config, ) -from .nwbinspector import inspect_nwb # TODO: remove after 7/1/2023 -from .checks.ecephys import * -from .checks.general import * -from .checks.image_series import * -from .checks.images import * -from .checks.nwb_containers import * -from .checks.nwbfile_metadata import * -from .checks.ogen import * -from .checks.ophys import * -from .checks.tables import * -from .checks.time_series import * -from .checks.icephys import * +from ._inspection import inspect_nwb # TODO: remove after 7/1/2023 +from ._formatting import ( + format_messages, + print_to_console, + save_report, + MessageFormatter, + FormatterOptions, + InspectorOutputJSONEncoder, +) +from ._organization import organize_messages +from .checks import * default_check_registry = {check.__name__: check for check in available_checks} + +__all__ = [ + "available_checks", + "default_check_registry", + "register_check", + "Importance", + "Severity", + "InspectorMessage", + "load_config", + "configure_checks", + "InspectorOutputJSONEncoder", + "inspect_all", + "inspect_nwbfile", + "inspect_nwbfile_object", + "inspect_nwb", # TODO: remove after 7/1/2023 + "run_checks", + "format_messages", + "print_to_console", + "save_report", + "MessageFormatter", + "FormatterOptions", + "organize_messages", + "__version__", +] diff --git a/src/nwbinspector/_configuration.py b/src/nwbinspector/_configuration.py new file mode 100644 index 000000000..17daeca2e --- /dev/null +++ b/src/nwbinspector/_configuration.py @@ -0,0 +1,137 @@ +"""Primary functions for inspecting NWBFiles.""" + +import json +import jsonschema +from pathlib import Path +from enum import Enum +from typing import Optional, List +from types import FunctionType +from packaging.version import Version + +import yaml + +from . import available_checks +from ._registration import InspectorMessage, Importance +from nwbinspector.utils._utils import ( + PathType, +) + +INTERNAL_CONFIGS = dict(dandi=Path(__file__).parent / "internal_configs" / "dandi.inspector_config.yaml") + + +def validate_config(config: dict): + """Validate an instance of configuration against the official schema.""" + with open(file=Path(__file__).parent / "config.schema.json", mode="r") as fp: + schema = json.load(fp=fp) + jsonschema.validate(instance=config, schema=schema) + + +def _copy_function(function): + """Copy the core parts of a given function, excluding wrappers, then return a new function.""" + copied_function = FunctionType( + function.__code__, function.__globals__, function.__name__, function.__defaults__, function.__closure__ + ) + + # in case f was given attrs (note this dict is a shallow copy) + copied_function.__dict__.update(function.__dict__) + return copied_function + + +def copy_check(check): + """ + Copy a check function so that internal attributes can be adjusted without changing the original function. + + Required to ensure our configuration of functions in the registry does not effect the registry itself. + + Also copies the wrapper for auto-parsing results, + see https://github.com/NeurodataWithoutBorders/nwbinspector/pull/218 for explanation. + """ + copied_check = _copy_function(function=check) + copied_check.__wrapped__ = _copy_function(function=check.__wrapped__) + return copied_check + + +def load_config(filepath_or_keyword: PathType) -> dict: + """ + Load a config dictionary either via keyword search of the internal configs, or an explicit filepath. + + Currently supported keywords are: + - 'dandi' + For all DANDI archive related practices, including validation and upload. + """ + file = INTERNAL_CONFIGS.get(filepath_or_keyword, filepath_or_keyword) + with open(file=file, mode="r") as stream: + config = yaml.load(stream=stream, Loader=yaml.Loader) + return config + + +def configure_checks( + checks: list = available_checks, + config: Optional[dict] = None, + ignore: Optional[List[str]] = None, + select: Optional[List[str]] = None, + importance_threshold: Importance = Importance.BEST_PRACTICE_SUGGESTION, +) -> list: + """ + Filter a list of check functions (the entire base registry by default) according to the configuration. + + Parameters + ---------- + config : dict + Dictionary valid against our JSON configuration schema. + Can specify a mapping of importance levels and list of check functions whose importance you wish to change. + Typically loaded via json.load from a valid .json file + checks : list of check functions + Defaults to all registered checks. + ignore: list, optional + Names of functions to skip. + select: list, optional + If loading all registered checks, this can be shorthand for selecting only a handful of them. + importance_threshold : string, optional + Ignores all tests with an post-configuration assigned importance below this threshold. + Importance has three levels: + + CRITICAL + - potentially incorrect data + BEST_PRACTICE_VIOLATION + - very suboptimal data representation + BEST_PRACTICE_SUGGESTION + - improvable data representation + + The default is the lowest level, BEST_PRACTICE_SUGGESTION. + """ + if ignore is not None and select is not None: + raise ValueError("Options 'ignore' and 'select' cannot both be used.") + if importance_threshold not in Importance: + raise ValueError( + f"Indicated importance_threshold ({importance_threshold}) is not a valid importance level! Please choose " + "from [CRITICAL_IMPORTANCE, BEST_PRACTICE_VIOLATION, BEST_PRACTICE_SUGGESTION]." + ) + if config is not None: + validate_config(config=config) + checks_out = [] + ignore = ignore or [] + for check in checks: + mapped_check = copy_check(check=check) + for importance_name, func_names in config.items(): + if check.__name__ in func_names: + if importance_name == "SKIP": + ignore.append(check.__name__) + continue + mapped_check.importance = Importance[importance_name] + # Output wrappers are apparently parsed at time of wrapping not of time of output return... + # Attempting to re-wrap the copied function if the importance level is being adjusted... + # From https://github.com/NeurodataWithoutBorders/nwbinspector/issues/302 + # new_check_wrapper = _copy_function(function=mapped_check.__wrapped__) + # new_check_wrapper.importance = Importance[importance_name] + # mapped_check.__wrapped__ = new_check_wrapper + checks_out.append(mapped_check) + else: + checks_out = checks + if select: + checks_out = [x for x in checks_out if x.__name__ in select] + elif ignore: + checks_out = [x for x in checks_out if x.__name__ not in ignore] + if importance_threshold: + checks_out = [x for x in checks_out if x.importance.value >= importance_threshold.value] + return checks_out diff --git a/src/nwbinspector/inspector_tools.py b/src/nwbinspector/_formatting.py similarity index 83% rename from src/nwbinspector/inspector_tools.py rename to src/nwbinspector/_formatting.py index ee4703806..26f026f29 100644 --- a/src/nwbinspector/inspector_tools.py +++ b/src/nwbinspector/_formatting.py @@ -1,6 +1,7 @@ """Internally used tools specifically for rendering more human-readable output from collected check results.""" import os +import json import sys from typing import Dict, List, Optional, Union from pathlib import Path @@ -9,15 +10,28 @@ from platform import platform from collections import defaultdict -from natsort import natsorted - import numpy as np -from .register_checks import InspectorMessage, Importance -from .utils import FilePathType, get_package_version +from ._types import InspectorMessage, Importance +from ._organization import organize_messages +from .utils import get_package_version, FilePathType + + +class InspectorOutputJSONEncoder(json.JSONEncoder): + """Custom JSONEncoder for the NWBInspector.""" + + def default(self, o): # noqa D102 + if isinstance(o, InspectorMessage): + return o.__dict__ + if isinstance(o, Enum): + return o.name + if isinstance(o, Version): + return str(o) + else: + return super().default(o) -def get_report_header(): +def _get_report_header(): """Grab basic information from system at time of report generation.""" return dict( Timestamp=str(datetime.now().astimezone()), @@ -26,54 +40,6 @@ def get_report_header(): ) -def _sort_unique_values(unique_values: list, reverse: bool = False): - """Technically, the 'set' method applies basic sorting to the unique contents, but natsort is more general.""" - if any(unique_values) and isinstance(unique_values[0], Enum): - return natsorted(unique_values, key=lambda x: -x.value, reverse=reverse) - else: - return natsorted(unique_values, reverse=reverse) - - -def organize_messages(messages: List[InspectorMessage], levels: List[str], reverse: Optional[List[bool]] = None): - """ - General function for organizing list of InspectorMessages. - - Returns a nested dictionary organized according to the order of the 'levels' argument. - - Parameters - ---------- - messages : list of InspectorMessages - levels: list of strings - Each string in this list must correspond onto an attribute of the InspectorMessage class, excluding the - 'message' text and 'object_name' (this will be coupled to the 'object_type'). - """ - assert all([x not in levels for x in ["message", "object_name", "severity"]]), ( - "You must specify levels to organize by that correspond to attributes of the InspectorMessage class, excluding " - "the text message, object_name, and severity." - ) - if reverse is None: - reverse = [False] * len(levels) - unique_values = list(set(getattr(message, levels[0]) for message in messages)) - sorted_values = _sort_unique_values(unique_values, reverse=reverse[0]) - if len(levels) > 1: - return { - value: organize_messages( - messages=[message for message in messages if getattr(message, levels[0]) == value], - levels=levels[1:], - reverse=reverse[1:], - ) - for value in sorted_values - } - else: - return { - value: sorted( - [message for message in messages if getattr(message, levels[0]) == value], - key=lambda x: -x.severity.value, - ) - for value in sorted_values - } - - class FormatterOptions: """Class structure for defining all free attributes for the design of a report format.""" @@ -227,7 +193,7 @@ def _add_subsection( def format_messages(self) -> List[str]: """Deploy recursive addition of sections, terminating with message display.""" - report_header = get_report_header() + report_header = _get_report_header() self.formatted_messages.extend( [ "*" * 50, diff --git a/src/nwbinspector/nwbinspector.py b/src/nwbinspector/_inspection.py similarity index 63% rename from src/nwbinspector/nwbinspector.py rename to src/nwbinspector/_inspection.py index 8f6f89e7c..6cf8b391d 100644 --- a/src/nwbinspector/nwbinspector.py +++ b/src/nwbinspector/_inspection.py @@ -1,34 +1,21 @@ """Primary functions for inspecting NWBFiles.""" -import os import re import importlib import traceback -import json -import jsonschema from pathlib import Path -from enum import Enum from typing import Union, Optional, List, Iterable from concurrent.futures import ProcessPoolExecutor, as_completed -from types import FunctionType from warnings import filterwarnings, warn from collections import defaultdict from packaging.version import Version -import click import pynwb -import yaml from tqdm import tqdm from natsort import natsorted -from . import available_checks -from .inspector_tools import ( - get_report_header, - format_messages, - print_to_console, - save_report, -) -from .register_checks import InspectorMessage, Importance +from . import available_checks, configure_checks +from ._registration import InspectorMessage, Importance from .tools import get_s3_urls_and_dandi_paths from .utils import ( FilePathType, @@ -37,268 +24,7 @@ robust_s3_read, calculate_number_of_cpu, get_package_version, - strtobool, -) -from nwbinspector import __version__ - -INTERNAL_CONFIGS = dict(dandi=Path(__file__).parent / "internal_configs" / "dandi.inspector_config.yaml") - - -class InspectorOutputJSONEncoder(json.JSONEncoder): - """Custom JSONEncoder for the NWBInspector.""" - - def default(self, o): # noqa D102 - if isinstance(o, InspectorMessage): - return o.__dict__ - if isinstance(o, Enum): - return o.name - if isinstance(o, Version): - return str(o) - else: - return super().default(o) - - -def validate_config(config: dict): - """Validate an instance of configuration against the official schema.""" - with open(file=Path(__file__).parent / "config.schema.json", mode="r") as fp: - schema = json.load(fp=fp) - jsonschema.validate(instance=config, schema=schema) - - -def _copy_function(function): - """Copy the core parts of a given function, excluding wrappers, then return a new function.""" - copied_function = FunctionType( - function.__code__, function.__globals__, function.__name__, function.__defaults__, function.__closure__ - ) - - # in case f was given attrs (note this dict is a shallow copy) - copied_function.__dict__.update(function.__dict__) - return copied_function - - -def copy_check(check): - """ - Copy a check function so that internal attributes can be adjusted without changing the original function. - - Required to ensure our configuration of functions in the registry does not effect the registry itself. - - Also copies the wrapper for auto-parsing results, - see https://github.com/NeurodataWithoutBorders/nwbinspector/pull/218 for explanation. - """ - copied_check = _copy_function(function=check) - copied_check.__wrapped__ = _copy_function(function=check.__wrapped__) - return copied_check - - -def load_config(filepath_or_keyword: PathType) -> dict: - """ - Load a config dictionary either via keyword search of the internal configs, or an explicit filepath. - - Currently supported keywords are: - - 'dandi' - For all DANDI archive related practices, including validation and upload. - """ - file = INTERNAL_CONFIGS.get(filepath_or_keyword, filepath_or_keyword) - with open(file=file, mode="r") as stream: - config = yaml.load(stream=stream, Loader=yaml.Loader) - return config - - -def configure_checks( - checks: list = available_checks, - config: Optional[dict] = None, - ignore: Optional[List[str]] = None, - select: Optional[List[str]] = None, - importance_threshold: Importance = Importance.BEST_PRACTICE_SUGGESTION, -) -> list: - """ - Filter a list of check functions (the entire base registry by default) according to the configuration. - - Parameters - ---------- - config : dict - Dictionary valid against our JSON configuration schema. - Can specify a mapping of importance levels and list of check functions whose importance you wish to change. - Typically loaded via json.load from a valid .json file - checks : list of check functions - Defaults to all registered checks. - ignore: list, optional - Names of functions to skip. - select: list, optional - If loading all registered checks, this can be shorthand for selecting only a handful of them. - importance_threshold : string, optional - Ignores all tests with an post-configuration assigned importance below this threshold. - Importance has three levels: - - CRITICAL - - potentially incorrect data - BEST_PRACTICE_VIOLATION - - very suboptimal data representation - BEST_PRACTICE_SUGGESTION - - improvable data representation - - The default is the lowest level, BEST_PRACTICE_SUGGESTION. - """ - if ignore is not None and select is not None: - raise ValueError("Options 'ignore' and 'select' cannot both be used.") - if importance_threshold not in Importance: - raise ValueError( - f"Indicated importance_threshold ({importance_threshold}) is not a valid importance level! Please choose " - "from [CRITICAL_IMPORTANCE, BEST_PRACTICE_VIOLATION, BEST_PRACTICE_SUGGESTION]." - ) - if config is not None: - validate_config(config=config) - checks_out = [] - ignore = ignore or [] - for check in checks: - mapped_check = copy_check(check=check) - for importance_name, func_names in config.items(): - if check.__name__ in func_names: - if importance_name == "SKIP": - ignore.append(check.__name__) - continue - mapped_check.importance = Importance[importance_name] - # Output wrappers are apparently parsed at time of wrapping not of time of output return... - # Attempting to re-wrap the copied function if the importance level is being adjusted... - # From https://github.com/NeurodataWithoutBorders/nwbinspector/issues/302 - # new_check_wrapper = _copy_function(function=mapped_check.__wrapped__) - # new_check_wrapper.importance = Importance[importance_name] - # mapped_check.__wrapped__ = new_check_wrapper - checks_out.append(mapped_check) - else: - checks_out = checks - if select: - checks_out = [x for x in checks_out if x.__name__ in select] - elif ignore: - checks_out = [x for x in checks_out if x.__name__ not in ignore] - if importance_threshold: - checks_out = [x for x in checks_out if x.importance.value >= importance_threshold.value] - return checks_out - - -@click.command() -@click.argument("path") -@click.option("--modules", help="Modules to import prior to reading the file(s).") -@click.option( - "--report-file-path", - default=None, - help="Save path for the report file.", - type=click.Path(writable=True), -) -@click.option("--levels", help="Comma-separated names of InspectorMessage attributes to organize by.") -@click.option( - "--reverse", help="Comma-separated booleans corresponding to reversing the order for each value of 'levels'." -) -@click.option("--overwrite", help="Overwrite an existing report file at the location.", is_flag=True) -@click.option("--ignore", help="Comma-separated names of checks to skip.") -@click.option("--select", help="Comma-separated names of checks to run.") -@click.option( - "--threshold", - default="BEST_PRACTICE_SUGGESTION", - type=click.Choice(["CRITICAL", "BEST_PRACTICE_VIOLATION", "BEST_PRACTICE_SUGGESTION"]), - help="Ignores tests with an assigned importance below this threshold.", -) -@click.option("--config", help="Name of config or path of config .yaml file that overwrites importance of checks.") -@click.option("--json-file-path", help="Write json output to this location.") -@click.option("--n-jobs", help="Number of jobs to use in parallel.", default=1) -@click.option("--skip-validate", help="Skip the PyNWB validation step.", is_flag=True) -@click.option( - "--detailed", - help=( - "If file_path is the last of 'levels' (the default), identical checks will be aggregated in the display. " - "Use '--detailed' to see the complete report." - ), - is_flag=True, -) -@click.option("--progress-bar", help="Set this flag to False to disable display of the progress bar.") -@click.option( - "--stream", - help=( - "Stream data from the DANDI archive. If the 'path' is a local copy of the target DANDISet, specifying this " - "flag will still force the data to be streamed instead of using the local copy. To use the local copy, simply " - "remove this flag. Requires the Read Only S3 (ros3) driver to be installed with h5py." - ), - is_flag=True, ) -@click.option( - "--version-id", - help=( - "When 'path' is a six-digit DANDISet ID, this further specifies which version of " "the DANDISet to inspect." - ), -) -@click.version_option(__version__) -def inspect_all_cli( - path: str, - modules: Optional[str] = None, - report_file_path: str = None, - levels: str = None, - reverse: Optional[str] = None, - overwrite: bool = False, - ignore: Optional[str] = None, - select: Optional[str] = None, - threshold: str = "BEST_PRACTICE_SUGGESTION", - config: Optional[str] = None, - json_file_path: Optional[str] = None, - n_jobs: int = 1, - skip_validate: bool = False, - detailed: bool = False, - progress_bar: Optional[str] = None, - stream: bool = False, - version_id: Optional[str] = None, -): - """ - Run the NWBInspector via the command line. - - path : - Path to either a local NWBFile, a local folder containing NWBFiles, a link to a dataset on - DANDI archive (i.e., https://dandiarchive.org/dandiset/{dandiset_id}/{version_id}), or a six-digit Dandiset ID. - """ - levels = ["importance", "file_path"] if levels is None else levels.split(",") - modules = [] if modules is None else modules.split(",") - reverse = [False] * len(levels) if reverse is None else [strtobool(x) for x in reverse.split(",")] - progress_bar = strtobool(progress_bar) if progress_bar is not None else True - if config is not None: - config = load_config(filepath_or_keyword=config) - if stream: - url_path = path if path.startswith("https://") else None - if url_path: - dandiset_id, version_id = url_path.split("/")[-2:] - path = dandiset_id - assert url_path or re.fullmatch( - pattern="^[0-9]{6}$", string=path - ), "'--stream' flag was enabled, but 'path' is neither a full link to the DANDI archive nor a DANDISet ID." - if Path(path).is_dir(): - warn( - f"The local DANDISet '{path}' exists, but the '--stream' flag was used. " - "NWBInspector will use S3 streaming from DANDI. To use local data, remove the '--stream' flag." - ) - messages = list( - inspect_all( - path=path, - modules=modules, - ignore=ignore if ignore is None else ignore.split(","), - select=select if select is None else select.split(","), - importance_threshold=Importance[threshold], - config=config, - n_jobs=n_jobs, - skip_validate=skip_validate, - progress_bar=progress_bar, - stream=stream, - version_id=version_id, - ) - ) - if json_file_path is not None: - if Path(json_file_path).exists() and not overwrite: - raise FileExistsError(f"The file {json_file_path} already exists! Specify the '-o' flag to overwrite.") - with open(file=json_file_path, mode="w") as fp: - json_report = dict(header=get_report_header(), messages=messages) - json.dump(obj=json_report, fp=fp, cls=InspectorOutputJSONEncoder) - print(f"{os.linesep*2}Report saved to {str(Path(json_file_path).absolute())}!{os.linesep}") - formatted_messages = format_messages(messages=messages, levels=levels, reverse=reverse, detailed=detailed) - print_to_console(formatted_messages=formatted_messages) - if report_file_path is not None: - save_report(report_file_path=report_file_path, formatted_messages=formatted_messages, overwrite=overwrite) - print(f"{os.linesep*2}Report saved to {str(Path(report_file_path).absolute())}!{os.linesep}") def inspect_all( diff --git a/src/nwbinspector/_inspection_cli.py b/src/nwbinspector/_inspection_cli.py new file mode 100644 index 000000000..2f1668137 --- /dev/null +++ b/src/nwbinspector/_inspection_cli.py @@ -0,0 +1,143 @@ +"""Primary functions for inspecting NWBFiles.""" + +import os +import re +import json +from pathlib import Path +from typing import Optional +from warnings import warn + +import click + +from ._formatting import _get_report_header +from . import Importance, inspect_all, format_messages, print_to_console, save_report, __version__ +from .utils import strtobool + + +@click.command() +@click.argument("path") +@click.option("--modules", help="Modules to import prior to reading the file(s).") +@click.option( + "--report-file-path", + default=None, + help="Save path for the report file.", + type=click.Path(writable=True), +) +@click.option("--levels", help="Comma-separated names of InspectorMessage attributes to organize by.") +@click.option( + "--reverse", help="Comma-separated booleans corresponding to reversing the order for each value of 'levels'." +) +@click.option("--overwrite", help="Overwrite an existing report file at the location.", is_flag=True) +@click.option("--ignore", help="Comma-separated names of checks to skip.") +@click.option("--select", help="Comma-separated names of checks to run.") +@click.option( + "--threshold", + default="BEST_PRACTICE_SUGGESTION", + type=click.Choice(["CRITICAL", "BEST_PRACTICE_VIOLATION", "BEST_PRACTICE_SUGGESTION"]), + help="Ignores tests with an assigned importance below this threshold.", +) +@click.option("--config", help="Name of config or path of config .yaml file that overwrites importance of checks.") +@click.option("--json-file-path", help="Write json output to this location.") +@click.option("--n-jobs", help="Number of jobs to use in parallel.", default=1) +@click.option("--skip-validate", help="Skip the PyNWB validation step.", is_flag=True) +@click.option( + "--detailed", + help=( + "If file_path is the last of 'levels' (the default), identical checks will be aggregated in the display. " + "Use '--detailed' to see the complete report." + ), + is_flag=True, +) +@click.option("--progress-bar", help="Set this flag to False to disable display of the progress bar.") +@click.option( + "--stream", + help=( + "Stream data from the DANDI archive. If the 'path' is a local copy of the target DANDISet, specifying this " + "flag will still force the data to be streamed instead of using the local copy. To use the local copy, simply " + "remove this flag. Requires the Read Only S3 (ros3) driver to be installed with h5py." + ), + is_flag=True, +) +@click.option( + "--version-id", + help=( + "When 'path' is a six-digit DANDISet ID, this further specifies which version of " "the DANDISet to inspect." + ), +) +@click.version_option(__version__) +def _inspect_all_cli( + path: str, + modules: Optional[str] = None, + report_file_path: str = None, + levels: str = None, + reverse: Optional[str] = None, + overwrite: bool = False, + ignore: Optional[str] = None, + select: Optional[str] = None, + threshold: str = "BEST_PRACTICE_SUGGESTION", + config: Optional[str] = None, + json_file_path: Optional[str] = None, + n_jobs: int = 1, + skip_validate: bool = False, + detailed: bool = False, + progress_bar: Optional[str] = None, + stream: bool = False, + version_id: Optional[str] = None, +): + """ + Run the NWBInspector via the command line. + + path : + Path to either a local NWBFile, a local folder containing NWBFiles, a link to a dataset on + DANDI archive (i.e., https://dandiarchive.org/dandiset/{dandiset_id}/{version_id}), or a six-digit Dandiset ID. + """ + levels = ["importance", "file_path"] if levels is None else levels.split(",") + modules = [] if modules is None else modules.split(",") + reverse = [False] * len(levels) if reverse is None else [strtobool(x) for x in reverse.split(",")] + progress_bar = strtobool(progress_bar) if progress_bar is not None else True + if config is not None: + config = load_config(filepath_or_keyword=config) + if stream: + url_path = path if path.startswith("https://") else None + if url_path: + dandiset_id, version_id = url_path.split("/")[-2:] + path = dandiset_id + assert url_path or re.fullmatch( + pattern="^[0-9]{6}$", string=path + ), "'--stream' flag was enabled, but 'path' is neither a full link to the DANDI archive nor a DANDISet ID." + if Path(path).is_dir(): + warn( + f"The local DANDISet '{path}' exists, but the '--stream' flag was used. " + "NWBInspector will use S3 streaming from DANDI. To use local data, remove the '--stream' flag." + ) + messages = list( + inspect_all( + path=path, + modules=modules, + ignore=ignore if ignore is None else ignore.split(","), + select=select if select is None else select.split(","), + importance_threshold=Importance[threshold], + config=config, + n_jobs=n_jobs, + skip_validate=skip_validate, + progress_bar=progress_bar, + stream=stream, + version_id=version_id, + ) + ) + if json_file_path is not None: + if Path(json_file_path).exists() and not overwrite: + raise FileExistsError(f"The file {json_file_path} already exists! Specify the '-o' flag to overwrite.") + with open(file=json_file_path, mode="w") as fp: + json_report = dict(header=_get_report_header(), messages=messages) + json.dump(obj=json_report, fp=fp, cls=InspectorOutputJSONEncoder) + print(f"{os.linesep*2}Report saved to {str(Path(json_file_path).absolute())}!{os.linesep}") + formatted_messages = format_messages(messages=messages, levels=levels, reverse=reverse, detailed=detailed) + print_to_console(formatted_messages=formatted_messages) + if report_file_path is not None: + save_report(report_file_path=report_file_path, formatted_messages=formatted_messages, overwrite=overwrite) + print(f"{os.linesep*2}Report saved to {str(Path(report_file_path).absolute())}!{os.linesep}") + + +if __name__ == "__main__": + inspect_all_cli() diff --git a/src/nwbinspector/_organization.py b/src/nwbinspector/_organization.py new file mode 100644 index 000000000..b096b56ed --- /dev/null +++ b/src/nwbinspector/_organization.py @@ -0,0 +1,56 @@ +"""Internally used tools specifically for rendering more human-readable output from collected check results.""" + +from typing import List, Optional +from enum import Enum + +from natsort import natsorted + +from ._registration import InspectorMessage + + +def _sort_unique_values(unique_values: list, reverse: bool = False): + """Technically, the 'set' method applies basic sorting to the unique contents, but natsort is more general.""" + if any(unique_values) and isinstance(unique_values[0], Enum): + return natsorted(unique_values, key=lambda x: -x.value, reverse=reverse) + else: + return natsorted(unique_values, reverse=reverse) + + +def organize_messages(messages: List[InspectorMessage], levels: List[str], reverse: Optional[List[bool]] = None): + """ + General function for organizing list of InspectorMessages. + + Returns a nested dictionary organized according to the order of the 'levels' argument. + + Parameters + ---------- + messages : list of InspectorMessages + levels: list of strings + Each string in this list must correspond onto an attribute of the InspectorMessage class, excluding the + 'message' text and 'object_name' (this will be coupled to the 'object_type'). + """ + assert all([x not in levels for x in ["message", "object_name", "severity"]]), ( + "You must specify levels to organize by that correspond to attributes of the InspectorMessage class, excluding " + "the text message, object_name, and severity." + ) + if reverse is None: + reverse = [False] * len(levels) + unique_values = list(set(getattr(message, levels[0]) for message in messages)) + sorted_values = _sort_unique_values(unique_values, reverse=reverse[0]) + if len(levels) > 1: + return { + value: organize_messages( + messages=[message for message in messages if getattr(message, levels[0]) == value], + levels=levels[1:], + reverse=reverse[1:], + ) + for value in sorted_values + } + else: + return { + value: sorted( + [message for message in messages if getattr(message, levels[0]) == value], + key=lambda x: -x.severity.value, + ) + for value in sorted_values + } diff --git a/src/nwbinspector/register_checks.py b/src/nwbinspector/_registration.py similarity index 60% rename from src/nwbinspector/register_checks.py rename to src/nwbinspector/_registration.py index d8e8ba277..40ad7e734 100644 --- a/src/nwbinspector/register_checks.py +++ b/src/nwbinspector/_registration.py @@ -11,78 +11,12 @@ from pynwb.file import Subject from pynwb.ecephys import Device, ElectrodeGroup - -class Importance(Enum): - """A definition of the valid importance levels for a given check function.""" - - ERROR = 4 - PYNWB_VALIDATION = 3 - CRITICAL = 2 - BEST_PRACTICE_VIOLATION = 1 - BEST_PRACTICE_SUGGESTION = 0 - - -class Severity(Enum): - """ - A definition of the valid severity levels for the output from a given check function. - - Strictly for internal development that improves report organization; users should never directly see these values. - """ - - HIGH = 2 - LOW = 1 +from ._types import Importance, Severity, InspectorMessage available_checks = list() -@dataclass -class InspectorMessage: - """ - The primary output to be returned by every check function. - - Parameters - ---------- - message : str - A message that informs the user of the violation. - severity : Severity, optional - If a check of non-CRITICAL importance has some basis of comparison, such as magnitude of affected data, then - the developer of the check may set the severity as Severity.HIGH or Severity.LOW by calling - `from nwbinspector.register_checks import Severity`. A good example is comparing if h5py.Dataset compression - has been enabled on smaller vs. larger objects (see nwbinspector/checks/nwb_containers.py for details). - - The user will never directly see this severity, but it will prioritize the order in which check results are - presented by the NWBInspector. - - importance : Importance - The Importance level specified by the decorator of the check function. - check_function_name : str - The name of the check function the decorator was applied to. - object_type : str - The specific class of the instantiated object being inspected. - object_name : str - The name of the instantiated object being inspected. - location : str - The location relative to the root of the NWBFile where the inspected object may be found. - file_path : str - The path of the NWBFile this message pertains to - Relative to the path called from inspect_nwb, inspect_all, or the path specified at the command line. - """ - - message: str - importance: Importance = Importance.BEST_PRACTICE_SUGGESTION - severity: Severity = Severity.LOW - check_function_name: str = None - object_type: str = None - object_name: str = None - location: Optional[str] = None - file_path: str = None - - def __repr__(self): - """Representation for InspectorMessage objects according to black format.""" - return "InspectorMessage(\n" + ",\n".join([f" {k}={v.__repr__()}" for k, v in self.__dict__.items()]) + "\n)" - - # TODO: neurodata_type could have annotation hdmf.utils.ExtenderMeta, which seems to apply to all currently checked # objects. We can wait and see how well that holds up before adding it in officially. def register_check(importance: Importance, neurodata_type): @@ -128,11 +62,11 @@ def auto_parse_some_output(*args, **kwargs) -> InspectorMessage: output = check_function(*args, **kwargs) auto_parsed_result = None if isinstance(output, InspectorMessage): - auto_parsed_result = auto_parse(check_function=check_function, obj=obj, result=output) + auto_parsed_result = _auto_parse(check_function=check_function, obj=obj, result=output) elif output is not None: auto_parsed_result = list() for result in output: - auto_parsed_result.append(auto_parse(check_function=check_function, obj=obj, result=result)) + auto_parsed_result.append(_auto_parse(check_function=check_function, obj=obj, result=result)) if not any(auto_parsed_result): auto_parsed_result = None return auto_parsed_result @@ -144,7 +78,7 @@ def auto_parse_some_output(*args, **kwargs) -> InspectorMessage: return register_check_and_auto_parse -def auto_parse(check_function, obj, result: Optional[InspectorMessage] = None): +def _auto_parse(check_function, obj, result: Optional[InspectorMessage] = None): """Automatically fill values in the InspectorMessage from the check function.""" if result is not None: auto_parsed_result = result @@ -158,11 +92,11 @@ def auto_parse(check_function, obj, result: Optional[InspectorMessage] = None): auto_parsed_result.check_function_name = check_function.__name__ auto_parsed_result.object_type = type(obj).__name__ auto_parsed_result.object_name = obj.name - auto_parsed_result.location = parse_location(neurodata_object=obj) + auto_parsed_result.location = _parse_location(neurodata_object=obj) return auto_parsed_result -def parse_location(neurodata_object) -> Optional[str]: +def _parse_location(neurodata_object) -> Optional[str]: """Grab the object location from a h5py.Dataset or a container content that is an h5py.Dataset object.""" known_locations = { NWBFile: "/", diff --git a/src/nwbinspector/_types.py b/src/nwbinspector/_types.py new file mode 100644 index 000000000..623400f1c --- /dev/null +++ b/src/nwbinspector/_types.py @@ -0,0 +1,80 @@ +"""Primary decorator used on a check function to add it to the registry and automatically parse its output.""" + +from collections.abc import Iterable +from functools import wraps +from enum import Enum +from dataclasses import dataclass +from typing import Optional + +import h5py +from pynwb import NWBFile +from pynwb.file import Subject +from pynwb.ecephys import Device, ElectrodeGroup + + +class Importance(Enum): + """A definition of the valid importance levels for a given check function.""" + + ERROR = 4 + PYNWB_VALIDATION = 3 + CRITICAL = 2 + BEST_PRACTICE_VIOLATION = 1 + BEST_PRACTICE_SUGGESTION = 0 + + +class Severity(Enum): + """ + A definition of the valid severity levels for the output from a given check function. + + Strictly for internal development that improves report organization; users should never directly see these values. + """ + + HIGH = 2 + LOW = 1 + + +@dataclass +class InspectorMessage: + """ + The primary output to be returned by every check function. + + Parameters + ---------- + message : str + A message that informs the user of the violation. + severity : Severity, optional + If a check of non-CRITICAL importance has some basis of comparison, such as magnitude of affected data, then + the developer of the check may set the severity as Severity.HIGH or Severity.LOW by calling + `from nwbinspector.register_checks import Severity`. A good example is comparing if h5py.Dataset compression + has been enabled on smaller vs. larger objects (see nwbinspector/checks/nwb_containers.py for details). + + The user will never directly see this severity, but it will prioritize the order in which check results are + presented by the NWBInspector. + + importance : Importance + The Importance level specified by the decorator of the check function. + check_function_name : str + The name of the check function the decorator was applied to. + object_type : str + The specific class of the instantiated object being inspected. + object_name : str + The name of the instantiated object being inspected. + location : str + The location relative to the root of the NWBFile where the inspected object may be found. + file_path : str + The path of the NWBFile this message pertains to + Relative to the path called from inspect_nwb, inspect_all, or the path specified at the command line. + """ + + message: str + importance: Importance = Importance.BEST_PRACTICE_SUGGESTION + severity: Severity = Severity.LOW + check_function_name: str = None + object_type: str = None + object_name: str = None + location: Optional[str] = None + file_path: str = None + + def __repr__(self): + """Representation for InspectorMessage objects according to black format.""" + return "InspectorMessage(\n" + ",\n".join([f" {k}={v.__repr__()}" for k, v in self.__dict__.items()]) + "\n)" diff --git a/src/nwbinspector/version.py b/src/nwbinspector/_version.py similarity index 100% rename from src/nwbinspector/version.py rename to src/nwbinspector/_version.py diff --git a/src/nwbinspector/checks/__init__.py b/src/nwbinspector/checks/__init__.py index e69de29bb..138c28eb9 100644 --- a/src/nwbinspector/checks/__init__.py +++ b/src/nwbinspector/checks/__init__.py @@ -0,0 +1,12 @@ +from .ecephys import * +from .general import * +from .image_series import * +from .images import * +from .nwb_containers import * +from .nwbfile_metadata import * +from .ogen import * +from .ophys import * +from .tables import * +from .time_series import * +from .icephys import * +from .behavior import * diff --git a/src/nwbinspector/checks/behavior.py b/src/nwbinspector/checks/behavior.py index 972e07ab3..8f9929c86 100644 --- a/src/nwbinspector/checks/behavior.py +++ b/src/nwbinspector/checks/behavior.py @@ -3,7 +3,7 @@ import numpy as np from pynwb.behavior import SpatialSeries, CompassDirection -from ..register_checks import register_check, Importance, InspectorMessage +from .._registration import register_check, Importance, InspectorMessage @register_check(importance=Importance.CRITICAL, neurodata_type=SpatialSeries) diff --git a/src/nwbinspector/checks/ecephys.py b/src/nwbinspector/checks/ecephys.py index ae8dea384..2f99df573 100644 --- a/src/nwbinspector/checks/ecephys.py +++ b/src/nwbinspector/checks/ecephys.py @@ -6,8 +6,7 @@ from pynwb.ecephys import ElectricalSeries from ..utils import get_data_shape - -from ..register_checks import register_check, Importance, InspectorMessage +from .._registration import register_check, Importance, InspectorMessage @register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=Units) diff --git a/src/nwbinspector/checks/general.py b/src/nwbinspector/checks/general.py index 08b72244c..e1ef01fdb 100644 --- a/src/nwbinspector/checks/general.py +++ b/src/nwbinspector/checks/general.py @@ -1,6 +1,6 @@ """Check functions that examine any general neurodata_type with the available attributes.""" -from ..register_checks import register_check, InspectorMessage, Importance +from .._registration import register_check, InspectorMessage, Importance COMMON_DESCRIPTION_PLACEHOLDERS = ["no description", "no desc", "none", "placeholder"] diff --git a/src/nwbinspector/checks/icephys.py b/src/nwbinspector/checks/icephys.py index e926b8f2c..57398a7de 100644 --- a/src/nwbinspector/checks/icephys.py +++ b/src/nwbinspector/checks/icephys.py @@ -2,7 +2,7 @@ from pynwb.icephys import IntracellularElectrode -from ..register_checks import register_check, Importance, InspectorMessage +from .._registration import register_check, Importance, InspectorMessage @register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=IntracellularElectrode) diff --git a/src/nwbinspector/checks/image_series.py b/src/nwbinspector/checks/image_series.py index 0d2d76108..3da04fbce 100644 --- a/src/nwbinspector/checks/image_series.py +++ b/src/nwbinspector/checks/image_series.py @@ -6,7 +6,7 @@ from pynwb.image import ImageSeries from pynwb.ophys import TwoPhotonSeries -from ..register_checks import register_check, Importance, InspectorMessage +from .._registration import register_check, Importance, InspectorMessage from ..tools import get_nwbfile_path_from_internal_object diff --git a/src/nwbinspector/checks/images.py b/src/nwbinspector/checks/images.py index e91db0546..a73d5705c 100644 --- a/src/nwbinspector/checks/images.py +++ b/src/nwbinspector/checks/images.py @@ -4,7 +4,7 @@ from pynwb.image import IndexSeries -from ..register_checks import register_check, Importance, InspectorMessage +from .._registration import register_check, Importance, InspectorMessage from ..utils import get_package_version # The Images neurodata type was unavailable prior to PyNWB v.2.1.0 diff --git a/src/nwbinspector/checks/nwb_containers.py b/src/nwbinspector/checks/nwb_containers.py index 1bee852e7..1ad4106a6 100644 --- a/src/nwbinspector/checks/nwb_containers.py +++ b/src/nwbinspector/checks/nwb_containers.py @@ -6,7 +6,7 @@ from pynwb import NWBContainer -from ..register_checks import register_check, Importance, InspectorMessage, Severity +from .._registration import register_check, Importance, InspectorMessage, Severity @register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=NWBContainer) diff --git a/src/nwbinspector/checks/nwbfile_metadata.py b/src/nwbinspector/checks/nwbfile_metadata.py index 5802beccf..f2824a32f 100644 --- a/src/nwbinspector/checks/nwbfile_metadata.py +++ b/src/nwbinspector/checks/nwbfile_metadata.py @@ -7,7 +7,7 @@ from pynwb import NWBFile, ProcessingModule from pynwb.file import Subject -from ..register_checks import register_check, InspectorMessage, Importance +from .._registration import register_check, InspectorMessage, Importance from ..utils import is_module_installed duration_regex = ( diff --git a/src/nwbinspector/checks/ogen.py b/src/nwbinspector/checks/ogen.py index 159af6a24..e23d52e0d 100644 --- a/src/nwbinspector/checks/ogen.py +++ b/src/nwbinspector/checks/ogen.py @@ -1,6 +1,6 @@ from pynwb.ogen import OptogeneticSeries, OptogeneticStimulusSite -from ..register_checks import register_check, Importance, InspectorMessage +from .._registration import register_check, Importance, InspectorMessage @register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=OptogeneticStimulusSite) diff --git a/src/nwbinspector/checks/ophys.py b/src/nwbinspector/checks/ophys.py index a455a38f0..237055055 100644 --- a/src/nwbinspector/checks/ophys.py +++ b/src/nwbinspector/checks/ophys.py @@ -8,8 +8,7 @@ ) from ..utils import get_data_shape - -from ..register_checks import register_check, Importance, InspectorMessage +from .._registration import register_check, Importance, InspectorMessage MIN_LAMBDA = 10.0 # trigger warnings for wavelength values less than this value diff --git a/src/nwbinspector/checks/tables.py b/src/nwbinspector/checks/tables.py index ef13a8b9f..16dc97166 100644 --- a/src/nwbinspector/checks/tables.py +++ b/src/nwbinspector/checks/tables.py @@ -7,9 +7,9 @@ from hdmf.common import DynamicTable, DynamicTableRegion, VectorIndex from pynwb.file import TimeIntervals, Units -from ..register_checks import register_check, InspectorMessage, Importance +from .._registration import register_check, InspectorMessage, Importance from ..utils import ( - _cache_data_selection, + cache_data_selection, format_byte_size, is_ascending_series, is_dict_in_string, @@ -87,8 +87,8 @@ def check_time_intervals_stop_after_start(time_intervals: TimeIntervals, nelems: load the entire arrays. """ if np.any( - np.asarray(_cache_data_selection(data=time_intervals["stop_time"].data, selection=slice(nelems))) - - np.asarray(_cache_data_selection(data=time_intervals["start_time"].data, selection=slice(nelems))) + np.asarray(cache_data_selection(data=time_intervals["stop_time"].data, selection=slice(nelems))) + - np.asarray(cache_data_selection(data=time_intervals["start_time"].data, selection=slice(nelems))) < 0 ): return InspectorMessage( @@ -120,7 +120,7 @@ def check_column_binary_capability(table: DynamicTable, nelems: Optional[int] = if np.asarray(column.data[0]).itemsize == 1: continue # already boolean, int8, or uint8 try: - unique_values = np.unique(_cache_data_selection(data=column.data, selection=slice(nelems))) + unique_values = np.unique(cache_data_selection(data=column.data, selection=slice(nelems))) except TypeError: # some contained objects are unhashable or have no comparison defined continue if unique_values.size != 2: @@ -188,7 +188,7 @@ def check_table_values_for_dict(table: DynamicTable, nelems: Optional[int] = NEL for column in table.columns: if not hasattr(column, "data") or isinstance(column, VectorIndex) or not isinstance(column.data[0], str): continue - for string in _cache_data_selection(data=column.data, selection=slice(nelems)): + for string in cache_data_selection(data=column.data, selection=slice(nelems)): if is_dict_in_string(string=string): message = ( f"The column '{column.name}' contains a string value that contains a dictionary! Please " diff --git a/src/nwbinspector/checks/time_series.py b/src/nwbinspector/checks/time_series.py index 18d5cab43..5757ef3c8 100644 --- a/src/nwbinspector/checks/time_series.py +++ b/src/nwbinspector/checks/time_series.py @@ -5,7 +5,7 @@ from pynwb import TimeSeries from pynwb.image import ImageSeries, IndexSeries -from ..register_checks import register_check, Importance, Severity, InspectorMessage +from .._registration import register_check, Importance, Severity, InspectorMessage from ..utils import is_regular_series, is_ascending_series, get_data_shape diff --git a/src/nwbinspector/inspector_tools/__init__.py b/src/nwbinspector/inspector_tools/__init__.py new file mode 100644 index 000000000..134b1f7b6 --- /dev/null +++ b/src/nwbinspector/inspector_tools/__init__.py @@ -0,0 +1,13 @@ +import warnings + +message = ( + "The 'inspector_tools' submodule has been deprecated. " + "Please import the helper functions from the top-level package." +) + +warnings.warn(message=message, category=DeprecationWarning, stacklevel=2) + +# Still keep imports functional with warning for soft deprecation cycle +# TODO: remove after 9/15/2024 +from .._organization import organize_messages, _get_report_header +from .._formatting import format_message, MessageFormatter, FormatterOptions, print_to_console, save_report diff --git a/src/nwbinspector/nwbinspector/__init__.py b/src/nwbinspector/nwbinspector/__init__.py new file mode 100644 index 000000000..f4df3ee28 --- /dev/null +++ b/src/nwbinspector/nwbinspector/__init__.py @@ -0,0 +1,26 @@ +import warnings + +message = ( + "The 'nwbinspector.nwbinspector' submodule has been deprecated. " + "Please import the helper functions from the top-level package." +) + +warnings.warn(message=message, category=DeprecationWarning, stacklevel=2) + +# Still keep imports functional with warning for soft deprecation cycle +# TODO: remove after 9/15/2024 +from .._configuration import ( + INTERNAL_CONFIGS, + InspectorOutputJSONEncoder, + validate_config, + copy_check, + load_config, + configure_checks, +) +from .._inspection import ( + inspect_all, + inspect_nwb, # TODO: remove + inspect_nwbfile, + inspect_nwbfile_object, + run_checks, +) diff --git a/src/nwbinspector/register_checks/__init__.py b/src/nwbinspector/register_checks/__init__.py new file mode 100644 index 000000000..82435d93d --- /dev/null +++ b/src/nwbinspector/register_checks/__init__.py @@ -0,0 +1,10 @@ +import warnings + +message = "The 'register_checks' submodule has been deprecated. Please import the helper functions from the top-level package." + +warnings.warn(message=message, category=DeprecationWarning, stacklevel=2) + +# Still keep imports functional with warning for soft deprecation cycle +# TODO: remove after 9/15/2024 +from .._types import InspectorMessage, Importance, Severity +from .._registration import register_check, available_checks diff --git a/src/nwbinspector/testing/__init__.py b/src/nwbinspector/testing/__init__.py new file mode 100644 index 000000000..4ff73e5f6 --- /dev/null +++ b/src/nwbinspector/testing/__init__.py @@ -0,0 +1,25 @@ +from ._testing import ( + check_streaming_tests_enabled, + check_streaming_enabled, + check_hdf5_io_open, + check_zarr_io_open, + load_testing_config, + update_testing_config, + generate_testing_files, + generate_image_series_testing_files, + make_minimal_nwbfile, + TESTING_CONFIG_FILE_PATH, +) + +__all__ = [ + "check_streaming_tests_enabled", + "check_streaming_enabled", + "check_hdf5_io_open", + "check_zarr_io_open", + "load_testing_config", + "update_testing_config", + "generate_testing_files", + "generate_image_series_testing_files", + "make_minimal_nwbfile", + "TESTING_CONFIG_FILE_PATH", +] diff --git a/src/nwbinspector/testing.py b/src/nwbinspector/testing/_testing.py similarity index 98% rename from src/nwbinspector/testing.py rename to src/nwbinspector/testing/_testing.py index 6e7985d8f..5d46eabef 100644 --- a/src/nwbinspector/testing.py +++ b/src/nwbinspector/testing/_testing.py @@ -15,7 +15,7 @@ from pynwb import NWBHDF5IO, NWBFile from pynwb.image import ImageSeries -from .utils import is_module_installed, get_package_version, strtobool +from ..utils import is_module_installed, get_package_version, strtobool # The tests must be invoked at the outer level of the repository TESTING_CONFIG_FILE_PATH = Path.cwd() / "tests" / "testing_config.json" diff --git a/src/nwbinspector/tools/__init__.py b/src/nwbinspector/tools/__init__.py index 3521f5a37..243cea2a9 100644 --- a/src/nwbinspector/tools/__init__.py +++ b/src/nwbinspector/tools/__init__.py @@ -1,4 +1,10 @@ from .dandi import get_s3_urls_and_dandi_paths from .nwb import all_of_type, get_nwbfile_path_from_internal_object from ._read_nwbfile import read_nwbfile -from ..testing import check_streaming_enabled, make_minimal_nwbfile # To maintain back-compatability + +__all__ = [ + "get_s3_urls_and_dandi_paths", + "all_of_type", + "get_nwbfile_path_from_internal_object", + "read_nwbfile", +] diff --git a/src/nwbinspector/utils/__init__.py b/src/nwbinspector/utils/__init__.py new file mode 100644 index 000000000..3828286be --- /dev/null +++ b/src/nwbinspector/utils/__init__.py @@ -0,0 +1,34 @@ +from ._utils import ( + get_data_shape, + strtobool, + format_byte_size, + cache_data_selection, + is_regular_series, + is_ascending_series, + is_dict_in_string, + is_string_json_loadable, + is_module_installed, + get_package_version, + robust_s3_read, + calculate_number_of_cpu, + get_data_shape, + PathType, # TODO: deprecate in favor of explicit typing + FilePathType, # TODO: deprecate in favor of explicit typing + OptionalListOfStrings, # TODO: deprecate in favor of explicit typing +) + +__all__ = [ + "get_data_shape", + "strtobool", + "format_byte_size", + "cache_data_selection", + "is_regular_series", + "is_ascending_series", + "is_dict_in_string", + "is_string_json_loadable", + "is_module_installed", + "get_package_version", + "robust_s3_read", + "calculate_number_of_cpu", + "get_data_shape", +] diff --git a/src/nwbinspector/utils.py b/src/nwbinspector/utils/_utils.py similarity index 96% rename from src/nwbinspector/utils.py rename to src/nwbinspector/utils/_utils.py index 055f4eb64..95a17f7e8 100644 --- a/src/nwbinspector/utils.py +++ b/src/nwbinspector/utils/_utils.py @@ -16,11 +16,12 @@ from hdmf.backends.hdf5.h5_utils import H5Dataset +# TODO: deprecated these in favor of explicit typing PathType = TypeVar("PathType", str, Path) # For types that can be either files or folders FilePathType = TypeVar("FilePathType", str, Path) OptionalListOfStrings = Optional[List[str]] -dict_regex = r"({.+:.+})" +dict_regex = r"({.+:.+})" # TODO: remove this from global scope MAX_CACHE_ITEMS = 1000 # lru_cache default is 128 calls of matching input/output, but might need more to get use here @@ -33,7 +34,7 @@ def _cache_data_retrieval_command( return data[selection] -def _cache_data_selection(data: Union[h5py.Dataset, ArrayLike], selection: Union[slice, Tuple[slice]]) -> np.ndarray: +def cache_data_selection(data: Union[h5py.Dataset, ArrayLike], selection: Union[slice, Tuple[slice]]) -> np.ndarray: """Extract the selection lazily from the data object for efficient caching (most beneficial during streaming).""" if isinstance(data, np.memmap): # np.memmap objects are not hashable - simply return the selection lazily return data[selection] @@ -91,7 +92,7 @@ def is_regular_series(series: np.ndarray, tolerance_decimals: int = 9): def is_ascending_series(series: Union[h5py.Dataset, ArrayLike], nelems: Optional[int] = None): """General purpose function for determining if a series is monotonic increasing.""" if isinstance(series, h5py.Dataset): - data = _cache_data_selection(data=series, selection=slice(nelems)) + data = cache_data_selection(data=series, selection=slice(nelems)) else: data = series[:nelems] diff --git a/src/nwbinspector/version/__init__.py b/src/nwbinspector/version/__init__.py new file mode 100644 index 000000000..3127748f2 --- /dev/null +++ b/src/nwbinspector/version/__init__.py @@ -0,0 +1,12 @@ +import warnings + +message = ( + "The 'version' submodule has been deprecated. " + "Please import the version using `importlib.metadata.version('nwbinspector')`." +) + +warnings.warn(message=message, category=DeprecationWarning, stacklevel=2) + +# Still keep imports functional with warning for soft deprecation cycle +# TODO: remove after 9/15/2024 +from .._version import __version__ diff --git a/tests/streaming_tests.py b/tests/streaming_tests.py index 78bec8324..e339ea91b 100644 --- a/tests/streaming_tests.py +++ b/tests/streaming_tests.py @@ -7,9 +7,7 @@ from pathlib import Path from unittest import TestCase -from nwbinspector import Importance -from nwbinspector import inspect_all -from nwbinspector.register_checks import InspectorMessage +from nwbinspector import Importance, inspect_all, InspectorMessage from nwbinspector.tools import read_nwbfile from nwbinspector.testing import check_streaming_tests_enabled, check_hdf5_io_open from nwbinspector.utils import FilePathType diff --git a/tests/test_check_configuration.py b/tests/test_check_configuration.py index bd3cdfa6d..92ff825bf 100644 --- a/tests/test_check_configuration.py +++ b/tests/test_check_configuration.py @@ -9,8 +9,11 @@ check_timestamps_match_first_dimension, available_checks, default_check_registry, + validate_config, + configure_checks, + load_config, ) -from nwbinspector.nwbinspector import validate_config, configure_checks, _copy_function, load_config +from nwbinspector._configuration import _copy_function class TestCheckConfiguration(TestCase): diff --git a/tests/test_inspector.py b/tests/test_inspector.py index 235a5dfcc..7bf3531e0 100644 --- a/tests/test_inspector.py +++ b/tests/test_inspector.py @@ -14,16 +14,23 @@ from nwbinspector import ( Importance, + Severity, + InspectorMessage, + register_check, + load_config, + inspect_all, + inspect_nwbfile, + inspect_nwbfile_object, + available_checks, +) +from nwbinspector.checks import ( check_small_dataset_compression, check_regular_timestamps, check_data_orientation, check_timestamps_match_first_dimension, check_subject_exists, - load_config, ) -from nwbinspector import inspect_all, inspect_nwbfile, inspect_nwbfile_object, available_checks -from nwbinspector.register_checks import Severity, InspectorMessage, register_check -from nwbinspector.tools import make_minimal_nwbfile +from nwbinspector.testing import make_minimal_nwbfile from nwbinspector.utils import FilePathType @@ -120,6 +127,7 @@ def assertFileExists(self, path: FilePathType): def assertLogFileContentsEqual( self, test_file_path: FilePathType, true_file_path: FilePathType, skip_first_newlines: bool = False ): + skip_first_n_lines = 0 with open(file=test_file_path, mode="r") as test_file: with open(file=true_file_path, mode="r") as true_file: test_file_lines = test_file.readlines() diff --git a/tests/test_register_check.py b/tests/test_register_check.py index 1547962c7..28d2062ef 100644 --- a/tests/test_register_check.py +++ b/tests/test_register_check.py @@ -4,7 +4,7 @@ from hdmf.testing import TestCase from pynwb import TimeSeries -from nwbinspector.register_checks import register_check, Importance, Severity, InspectorMessage +from nwbinspector import register_check, Importance, Severity, InspectorMessage class TestRegisterClass(TestCase): diff --git a/tests/test_tools.py b/tests/test_tools.py index a7ae739c6..5f156fe0a 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -5,9 +5,8 @@ import pynwb from hdmf.testing import TestCase +from nwbinspector import InspectorMessage, Importance, Severity, organize_messages from nwbinspector.tools import all_of_type -from nwbinspector.inspector_tools import organize_messages -from nwbinspector.register_checks import InspectorMessage, Importance, Severity def test_all_of_type(): diff --git a/tests/unit_tests/test_behavior.py b/tests/unit_tests/test_behavior.py index bd34161c9..788734beb 100644 --- a/tests/unit_tests/test_behavior.py +++ b/tests/unit_tests/test_behavior.py @@ -2,7 +2,7 @@ import numpy as np from nwbinspector import InspectorMessage, Importance -from nwbinspector.checks.behavior import ( +from nwbinspector.checks import ( check_compass_direction_unit, check_spatial_series_dims, check_spatial_series_degrees_magnitude, diff --git a/tests/unit_tests/test_ecephys.py b/tests/unit_tests/test_ecephys.py index ccd3ab636..4015e94b7 100644 --- a/tests/unit_tests/test_ecephys.py +++ b/tests/unit_tests/test_ecephys.py @@ -8,9 +8,8 @@ from pynwb.misc import Units from hdmf.common.table import DynamicTableRegion, DynamicTable -from nwbinspector import ( - InspectorMessage, - Importance, +from nwbinspector import InspectorMessage, Importance +from nwbinspector.checks import ( check_negative_spike_times, check_electrical_series_dims, check_electrical_series_reference_electrodes_table, diff --git a/tests/unit_tests/test_general.py b/tests/unit_tests/test_general.py index dc32552a3..ba6120030 100644 --- a/tests/unit_tests/test_general.py +++ b/tests/unit_tests/test_general.py @@ -1,6 +1,7 @@ from hdmf.common import DynamicTable -from nwbinspector import InspectorMessage, Importance, check_name_slashes, check_description +from nwbinspector import InspectorMessage, Importance +from nwbinspector.checks import check_name_slashes, check_description def test_check_name_slashes_pass(): diff --git a/tests/unit_tests/test_icephys.py b/tests/unit_tests/test_icephys.py index fe3fa02a7..bd5c6f32d 100644 --- a/tests/unit_tests/test_icephys.py +++ b/tests/unit_tests/test_icephys.py @@ -3,7 +3,8 @@ from pynwb.icephys import IntracellularElectrode from pynwb.device import Device -from nwbinspector import InspectorMessage, Importance, check_intracellular_electrode_cell_id_exists +from nwbinspector import InspectorMessage, Importance +from nwbinspector.checks import check_intracellular_electrode_cell_id_exists from nwbinspector.utils import get_package_version PYNWB_VERSION_LOWER_2_1_0 = get_package_version(name="pynwb") < Version("2.1.0") diff --git a/tests/unit_tests/test_image_series.py b/tests/unit_tests/test_image_series.py index d144172fa..a9b7f5d9c 100644 --- a/tests/unit_tests/test_image_series.py +++ b/tests/unit_tests/test_image_series.py @@ -7,16 +7,14 @@ from pynwb import NWBHDF5IO, H5DataIO from pynwb.image import ImageSeries -from nwbinspector import ( - InspectorMessage, - Importance, +from nwbinspector import InspectorMessage, Importance +from nwbinspector.checks import ( check_image_series_external_file_valid, check_image_series_external_file_relative, check_image_series_data_size, check_timestamps_match_first_dimension, ) -from nwbinspector.tools import make_minimal_nwbfile -from nwbinspector.testing import load_testing_config +from nwbinspector.testing import make_minimal_nwbfile, load_testing_config try: testing_config = load_testing_config() diff --git a/tests/unit_tests/test_images.py b/tests/unit_tests/test_images.py index 6229e4b74..952934273 100644 --- a/tests/unit_tests/test_images.py +++ b/tests/unit_tests/test_images.py @@ -6,7 +6,7 @@ from packaging.version import Version from nwbinspector import InspectorMessage, Importance -from nwbinspector.checks.images import ( +from nwbinspector.checks import ( check_order_of_images_unique, check_order_of_images_len, check_index_series_points_to_image, diff --git a/tests/unit_tests/test_nwb_containers.py b/tests/unit_tests/test_nwb_containers.py index 7a15b6efc..6c114fef7 100644 --- a/tests/unit_tests/test_nwb_containers.py +++ b/tests/unit_tests/test_nwb_containers.py @@ -9,14 +9,13 @@ from pynwb import NWBContainer, NWBFile from pynwb.image import ImageSeries -from nwbinspector import ( - InspectorMessage, - Importance, +from nwbinspector import InspectorMessage, Importance +from nwbinspector.checks import ( check_small_dataset_compression, check_large_dataset_compression, check_empty_string_for_optional_attribute, ) -from nwbinspector.register_checks import Severity +from nwbinspector._registration import Severity class TestNWBContainers(TestCase): diff --git a/tests/unit_tests/test_nwbfile_metadata.py b/tests/unit_tests/test_nwbfile_metadata.py index d86140965..33bbe5072 100644 --- a/tests/unit_tests/test_nwbfile_metadata.py +++ b/tests/unit_tests/test_nwbfile_metadata.py @@ -4,9 +4,8 @@ from pynwb import NWBFile, ProcessingModule from pynwb.file import Subject -from nwbinspector import ( - InspectorMessage, - Importance, +from nwbinspector import InspectorMessage, Importance +from nwbinspector.checks import ( check_experimenter_exists, check_experimenter_form, check_experiment_description, @@ -25,7 +24,7 @@ check_session_start_time_future_date, PROCESSING_MODULE_CONFIG, ) -from nwbinspector.tools import make_minimal_nwbfile +from nwbinspector.testing import make_minimal_nwbfile minimal_nwbfile = make_minimal_nwbfile() diff --git a/tests/unit_tests/test_ogen.py b/tests/unit_tests/test_ogen.py index 0f5ed9e86..158f10af8 100644 --- a/tests/unit_tests/test_ogen.py +++ b/tests/unit_tests/test_ogen.py @@ -5,7 +5,7 @@ from pynwb.device import Device from pynwb.file import NWBFile -from nwbinspector import check_optogenetic_stimulus_site_has_optogenetic_series +from nwbinspector.checks import check_optogenetic_stimulus_site_has_optogenetic_series class TestCheckOptogeneticStimulusSiteHasOptogeneticSeries(TestCase): diff --git a/tests/unit_tests/test_ophys.py b/tests/unit_tests/test_ophys.py index 432fc78fb..13b797087 100644 --- a/tests/unit_tests/test_ophys.py +++ b/tests/unit_tests/test_ophys.py @@ -15,9 +15,8 @@ ) from hdmf.common.table import DynamicTableRegion, DynamicTable -from nwbinspector import ( - InspectorMessage, - Importance, +from nwbinspector import InspectorMessage, Importance +from nwbinspector.checks import ( check_roi_response_series_dims, check_roi_response_series_link_to_plane_segmentation, check_excitation_lambda_in_nm, diff --git a/tests/unit_tests/test_tables.py b/tests/unit_tests/test_tables.py index 8ad90d694..e8a95a90f 100644 --- a/tests/unit_tests/test_tables.py +++ b/tests/unit_tests/test_tables.py @@ -7,9 +7,8 @@ from hdmf.common import DynamicTable, DynamicTableRegion from pynwb.file import TimeIntervals, Units, ElectrodeTable, ElectrodeGroup, Device -from nwbinspector import ( - InspectorMessage, - Importance, +from nwbinspector import InspectorMessage, Importance +from nwbinspector.checks import ( check_empty_table, check_time_interval_time_columns, check_time_intervals_stop_after_start, diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index 2cea34e38..55d557034 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -5,9 +5,8 @@ from packaging import version -from nwbinspector import ( - InspectorMessage, - Importance, +from nwbinspector import InspectorMessage, Importance +from nwbinspector.checks import ( check_regular_timestamps, check_data_orientation, check_timestamps_match_first_dimension, @@ -18,8 +17,7 @@ check_timestamp_of_the_first_sample_is_not_negative, check_rate_is_not_zero, ) -from nwbinspector.tools import make_minimal_nwbfile -from nwbinspector.testing import check_streaming_tests_enabled +from nwbinspector.testing import make_minimal_nwbfile, check_streaming_tests_enabled from nwbinspector.utils import get_package_version, robust_s3_read STREAMING_TESTS_ENABLED, DISABLED_STREAMING_TESTS_REASON = check_streaming_tests_enabled()