Merge branch 'dev' into check_name_colon

NeurodataWithoutBorders · Nov 27, 2024 · 4c64f2f · 4c64f2f
2 parents 57abfc2 + cf99315
commit 4c64f2f
Show file tree

Hide file tree

Showing 13 changed files with 173 additions and 81 deletions.
diff --git a/.github/workflows/auto-publish.yml b/.github/workflows/auto-publish.yml
@@ -19,9 +19,9 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install wheel
+          python -m pip install --upgrade build
       - name: Build package
-        run: python setup.py sdist bdist_wheel
+        run: python -m build
       - name: pypi-publish
         uses: pypa/[email protected]
         with:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -11,7 +11,7 @@ repos:
     -   id: black
 
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.6.9
+  rev: v0.8.0
   hooks:
   - id: ruff
     args: [ --fix ]
@@ -24,7 +24,7 @@ repos:
     - tomli
 
 -   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.11.2
+    rev: v1.13.0
     hooks:
     -   id: mypy
         additional_dependencies: ["types-PyYAML", "types-setuptools"]
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Upcoming
 
+### Improvements
+* Added support for Numpy 2 and h5py 3.12, and pinned PyNWB to <3.0 temporarily. [#536](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/536)
+
+### Fixes
+* Fixed issue where the description check failed if the description was a list. [#535](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/535)
+
+
+# v0.6.0
+
 ### Deprecation
 * Support for Python 3.8 has been removed. [#508](https://github.com/NeurodataWithoutBorders/nwbinspector/issues/508)
 
@@ -18,6 +27,7 @@
 
 ### Fixes
 * Fixed incorrect error message for OptogeneticStimulusSite. [#524](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/524)
+* Fixed detection of Zarr directories for inspection. [#531](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/531)
 
 
 # v0.5.2

diff --git a/docs/api/checks.rst b/docs/api/checks.rst
@@ -8,6 +8,10 @@ NWBFile Metadata
 ----------------
 .. automodule:: nwbinspector.checks._nwbfile_metadata
 
+General
+-------
+.. automodule:: nwbinspector.checks._general
+
 NWB Containers
 --------------
 .. automodule:: nwbinspector.checks._nwb_containers
@@ -43,3 +47,7 @@ Optogenetics (ogen)
 ImageSeries
 -----------
 .. automodule:: nwbinspector.checks._image_series
+
+Images
+------
+.. automodule:: nwbinspector.checks._images
diff --git a/docs/best_practices/best_practices_index.rst b/docs/best_practices/best_practices_index.rst
@@ -29,5 +29,6 @@ Authors: Oliver Ruebel, Andrew Tritt, Ryan Ly, Cody Baker and Ben Dichter
    ophys
    ogen
    image_series
+   images
    simulated_data
    extensions
diff --git a/docs/best_practices/images.rst b/docs/best_practices/images.rst
@@ -0,0 +1,32 @@
+Images
+======
+
+Storage of Images
+-----------------
+
+.. _best_practice_order_of_images_unique:
+.. _best_practice_order_of_images_len:
+
+Storing the order of images in an Images object
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``order_of_images`` field of an :ref:`nwb-schema:sec-Images` object is designed to contain the order
+of the images in the ``images`` field of the :ref:`nwb-schema:sec-Images` object. As such, all of the values
+in the ``order_of_images`` field should be unique, and its length should be equal to the number of
+:ref:`nwb-schema:sec-Image` objects in the :ref:`nwb-schema:sec-Images` object.
+
+Check functions: :py:meth:`~nwbinspector.checks._images.check_order_of_images_unique` and
+:py:meth:`~nwbinspector.checks._images.check_order_of_images_len`
+
+
+.. _best_practice_index_series_points_to_image:
+
+Use of IndexSeries
+~~~~~~~~~~~~~~~~~~
+
+The use of an :ref:`nwb-schema:sec-IndexSeries` object to point to a :ref:`nwb-schema:sec-TimeSeries` will
+be deprecated in a future release of the NWB schema. The :ref:`nwb-schema:sec-IndexSeries` object should
+point to an :ref:`nwb-schema:sec-Images` container, which holds a collection of :ref:`nwb-schema:sec-Image`
+objects instead.
+
+Check function: :py:meth:`~nwbinspector.checks._images.check_index_series_points_to_image`
diff --git a/pyproject.toml b/pyproject.toml
@@ -32,7 +32,7 @@ classifiers = [
 ]
 requires-python = ">=3.9"
 dependencies = [
-    "pynwb>=2.8",   # NWB Inspector should always be used with most recent minor versions of PyNWB
+    "pynwb>=2.8,<3",   # NWB Inspector should always be used with most recent minor versions of PyNWB
     "hdmf-zarr",
     "fsspec",
     "s3fs",
@@ -45,8 +45,6 @@ dependencies = [
     "click",
     "tqdm",
     "isodate",
-    "numpy>=1.22.0,<2.0.0", # TODO: add compatibility for 2.0 - HDMF Zarr and others also still have some troubles
-    "h5py<3.12.0", # TODO: remove pin when https://github.com/h5py/h5py/issues/2505 is fixed and released
 ]
 
 [project.optional-dependencies]

diff --git a/src/nwbinspector/_nwb_inspection.py b/src/nwbinspector/_nwb_inspection.py
@@ -9,12 +9,13 @@
 from warnings import filterwarnings, warn
 
 import pynwb
+from hdmf_zarr import ZarrIO
 from natsort import natsorted
 from tqdm import tqdm
 
 from ._configuration import configure_checks
 from ._registration import Importance, InspectorMessage, available_checks
-from .tools._read_nwbfile import read_nwbfile, read_nwbfile_and_io
+from .tools._read_nwbfile import read_nwbfile
 from .utils import (
     OptionalListOfStrings,
     PathType,
@@ -126,7 +127,9 @@ def inspect_all(
     if progress_bar_options is None:
         progress_bar_options = dict(position=0, leave=False)
 
-    if in_path.is_dir():
+    if in_path.is_dir() and (in_path.match("*.nwb*")) and ZarrIO.can_read(in_path):
+        nwbfiles = [in_path]  # if it is a zarr directory
+    elif in_path.is_dir():
         nwbfiles = list(in_path.rglob("*.nwb*"))
 
         # Remove any macOS sidecar files
@@ -271,10 +274,10 @@ def inspect_nwbfile(
     filterwarnings(action="ignore", message="Ignoring cached namespace .*")
 
     try:
-        in_memory_nwbfile, io = read_nwbfile_and_io(nwbfile_path=nwbfile_path)
+        in_memory_nwbfile = read_nwbfile(nwbfile_path=nwbfile_path)
 
         if not skip_validate:
-            validation_errors = pynwb.validate(io=io)
+            validation_errors, _ = pynwb.validate(paths=[nwbfile_path])
             for validation_error in validation_errors:
                 yield InspectorMessage(
                     message=validation_error.reason,

diff --git a/src/nwbinspector/checks/_general.py b/src/nwbinspector/checks/_general.py
@@ -34,9 +34,13 @@ def check_description(neurodata_object: object) -> Optional[InspectorMessage]:
     """
     if not hasattr(neurodata_object, "description"):
         return None
-    if neurodata_object.description is None or neurodata_object.description.strip(" ") == "":
+
+    description = neurodata_object.description
+    if description is not None and type(description) is not str:
+        return None
+    if description is None or description.strip(" ") == "":
         return InspectorMessage(message="Description is missing.")
-    if neurodata_object.description.lower().strip(".") in COMMON_DESCRIPTION_PLACEHOLDERS:
-        return InspectorMessage(message=f"Description ('{neurodata_object.description}') is a placeholder.")
+    if description.lower().strip(".") in COMMON_DESCRIPTION_PLACEHOLDERS:
+        return InspectorMessage(message=f"Description ('{description}') is a placeholder.")
 
     return None
diff --git a/src/nwbinspector/checks/_tables.py b/src/nwbinspector/checks/_tables.py
@@ -155,7 +155,7 @@ def check_column_binary_capability(
                 ["hit", "miss"],
             ]
             if any([set(parsed_unique_values) == set(pair) for pair in pairs_to_check]):  # type: ignore
-                saved_bytes = (unique_values.dtype.itemsize - 1) * np.product(
+                saved_bytes = (unique_values.dtype.itemsize - 1) * np.prod(
                     get_data_shape(data=column.data, strict_no_data_load=True)
                 )
                 if unique_values.dtype == "float":

diff --git a/tests/test_inspector.py b/tests/test_inspector.py
@@ -268,74 +268,86 @@ def test_inspect_all(self):
         ]
         self.assertCountEqual(first=test_results, second=true_results)
 
-        def test_inspect_all_parallel(self):
-            test_results = list(
-                inspect_all(path=Path(self.nwbfile_paths[0]).parent, select=[x.__name__ for x in self.checks], n_jobs=2)
+    def test_inspect_all_parallel(self):
+        test_results = list(
+            inspect_all(
+                path=Path(self.nwbfile_paths[0]).parent,
+                select=[x.__name__ for x in self.checks],
+                n_jobs=2,
+                skip_validate=self.skip_validate,
             )
-            true_results = [
-                InspectorMessage(
-                    message="data is not compressed. Consider enabling compression when writing a dataset.",
-                    importance=Importance.BEST_PRACTICE_SUGGESTION,
-                    severity=Severity.LOW,
-                    check_function_name="check_small_dataset_compression",
-                    object_type="TimeSeries",
-                    object_name="test_time_series_1",
-                    location="/acquisition/test_time_series_1",
-                    file_path=self.nwbfile_paths[0],
-                ),
-                InspectorMessage(
-                    message=(
-                        "TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 "
-                        "and rate=0.5 instead of timestamps."
-                    ),
-                    importance=Importance.BEST_PRACTICE_VIOLATION,
-                    severity=Severity.LOW,
-                    check_function_name="check_regular_timestamps",
-                    object_type="TimeSeries",
-                    object_name="test_time_series_2",
-                    location="/acquisition/test_time_series_2",
-                    file_path=self.nwbfile_paths[0],
-                ),
-                InspectorMessage(
-                    message=(
-                        "Data may be in the wrong orientation. Time should be in the first dimension, and is usually "
-                        "the longest dimension. Here, another dimension is longer."
-                    ),
-                    importance=Importance.CRITICAL,
-                    severity=Severity.LOW,
-                    check_function_name="check_data_orientation",
-                    object_type="SpatialSeries",
-                    object_name="my_spatial_series",
-                    location="/processing/behavior/Position/my_spatial_series",
-                    file_path=self.nwbfile_paths[0],
+        )
+        true_results = [
+            InspectorMessage(
+                message="data is not compressed. Consider enabling compression when writing a dataset.",
+                importance=Importance.BEST_PRACTICE_SUGGESTION,
+                severity=Severity.LOW,
+                check_function_name="check_small_dataset_compression",
+                object_type="TimeSeries",
+                object_name="test_time_series_1",
+                location="/acquisition/test_time_series_1",
+                file_path=self.nwbfile_paths[0],
+            ),
+            InspectorMessage(
+                message=(
+                    "TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 "
+                    "and rate=0.5 instead of timestamps."
                 ),
-                InspectorMessage(
-                    message=(
-                        "The length of the first dimension of data (4) does not match the length of timestamps (3)."
-                    ),
-                    importance=Importance.CRITICAL,
-                    severity=Severity.LOW,
-                    check_function_name="check_timestamps_match_first_dimension",
-                    object_type="TimeSeries",
-                    object_name="test_time_series_3",
-                    location="/acquisition/test_time_series_3",
-                    file_path=self.nwbfile_paths[0],
+                importance=Importance.BEST_PRACTICE_VIOLATION,
+                severity=Severity.LOW,
+                check_function_name="check_regular_timestamps",
+                object_type="TimeSeries",
+                object_name="test_time_series_2",
+                location="/acquisition/test_time_series_2",
+                file_path=self.nwbfile_paths[0],
+            ),
+            InspectorMessage(
+                message=(
+                    "Data may be in the wrong orientation. Time should be in the first dimension, and is usually "
+                    "the longest dimension. Here, another dimension is longer."
                 ),
-                InspectorMessage(
-                    message=(
-                        "TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 "
-                        "and rate=0.5 instead of timestamps."
-                    ),
-                    importance=Importance.BEST_PRACTICE_VIOLATION,
-                    severity=Severity.LOW,
-                    check_function_name="check_regular_timestamps",
-                    object_type="TimeSeries",
-                    object_name="test_time_series_2",
-                    location="/acquisition/test_time_series_2",
-                    file_path=self.nwbfile_paths[1],
+                importance=Importance.CRITICAL,
+                severity=Severity.LOW,
+                check_function_name="check_data_orientation",
+                object_type="SpatialSeries",
+                object_name="my_spatial_series",
+                location="/processing/behavior/Position/my_spatial_series",
+                file_path=self.nwbfile_paths[0],
+            ),
+            InspectorMessage(
+                message=("The length of the first dimension of data (4) does not match the length of timestamps (3)."),
+                importance=Importance.CRITICAL,
+                severity=Severity.LOW,
+                check_function_name="check_timestamps_match_first_dimension",
+                object_type="TimeSeries",
+                object_name="test_time_series_3",
+                location="/acquisition/test_time_series_3",
+                file_path=self.nwbfile_paths[0],
+            ),
+            InspectorMessage(
+                message=(
+                    "TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 "
+                    "and rate=0.5 instead of timestamps."
                 ),
-            ]
-            self.assertCountEqual(first=test_results, second=true_results)
+                importance=Importance.BEST_PRACTICE_VIOLATION,
+                severity=Severity.LOW,
+                check_function_name="check_regular_timestamps",
+                object_type="TimeSeries",
+                object_name="test_time_series_2",
+                location="/acquisition/test_time_series_2",
+                file_path=self.nwbfile_paths[1],
+            ),
+        ]
+        self.assertCountEqual(first=test_results, second=true_results)
+
+    def test_inspect_all_directory(self):
+        """Test that inspect_all will find the file when given a valid path (in the case of Zarr, this path may be a directory)."""
+        test_results = list(
+            inspect_all(
+                path=self.nwbfile_paths[0], select=[x.__name__ for x in self.checks], skip_validate=self.skip_validate
+            )
+        )
+        self.assertGreater(len(test_results), 0)
 
     def test_inspect_nwbfile(self):
         test_results = list(

diff --git a/tests/unit_tests/test_general.py b/tests/unit_tests/test_general.py
@@ -1,4 +1,4 @@
-from hdmf.common import DynamicTable
+from hdmf.common import DynamicTable, DynamicTableRegion
 
 from nwbinspector import Importance, InspectorMessage
 from nwbinspector.checks import check_description, check_name_slashes
@@ -81,3 +81,25 @@ def test_check_description_missing():
         object_name="test",
         location="/",
     )
+
+
+def test_check_description_feature_extraction():
+    import numpy as np
+    from pynwb.ecephys import FeatureExtraction
+    from pynwb.testing.mock.ecephys import mock_ElectrodeTable
+
+    electrodes = mock_ElectrodeTable()
+
+    dynamic_table_region = DynamicTableRegion(
+        name="electrodes", description="I am wrong", data=[0, 1, 2, 3, 4], table=electrodes
+    )
+
+    feature_extraction = FeatureExtraction(
+        name="PCA_features",
+        electrodes=dynamic_table_region,
+        description=["PC1", "PC2", "PC3", "PC4"],
+        times=[0.033, 0.066, 0.099],
+        features=np.random.rand(3, 5, 4),  # time, channel, feature
+    )
+
+    assert check_description(neurodata_object=feature_extraction) is None
diff --git a/tests/unit_tests/test_tables.py b/tests/unit_tests/test_tables.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 from hdmf.common import DynamicTable, DynamicTableRegion
+from numpy.lib import NumpyVersion
 from pynwb.file import Device, ElectrodeGroup, ElectrodeTable, TimeIntervals, Units
 
 from nwbinspector import Importance, InspectorMessage
@@ -187,7 +188,8 @@ def test_binary_int_fail(self):
         self.table.add_column(name="test_col", description="")
         for x in [1, 0, 1, 0, 1]:
             self.table.add_row(test_col=x)
-        if platform.system() == "Windows":
+        # the default numpy int in Windows with NumPy < 2 is int32. otherwise it is int64.
+        if platform.system() == "Windows" and NumpyVersion(np.__version__) < "2.0.0":
             platform_saved_bytes = "15.00B"
         else:
             platform_saved_bytes = "35.00B"