From bfd41830cf98a9aaf7f998990b27153612c77939 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Thu, 24 Nov 2022 11:16:17 -0500 Subject: [PATCH] Include compression in ImageSeries size check (#311) * include compression consideration with image series size check * Update CHANGELOG.md Co-authored-by: CodyCBakerPhD --- CHANGELOG.md | 6 +++ src/nwbinspector/checks/image_series.py | 9 +++- tests/unit_tests/test_image_series.py | 69 ++++++++++++++++++++++++- 3 files changed, 80 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c18099e9a..f7a05f437 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Upcoming +### Improvements + +* Added compression size consideration to `check_image_series_size`. [PR #311](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/311) + + + # v0.4.19 ### Fixes diff --git a/src/nwbinspector/checks/image_series.py b/src/nwbinspector/checks/image_series.py index 8eb20bf98..76dae4e98 100644 --- a/src/nwbinspector/checks/image_series.py +++ b/src/nwbinspector/checks/image_series.py @@ -57,6 +57,11 @@ def check_image_series_data_size(image_series: ImageSeries, gb_lower_bound: floa Best Practice: :ref:`best_practice_use_external_mode` """ data = image_series.data - data_size_gb = data.size * data.dtype.itemsize / 1e9 + + if getattr(data, "compression", None) is not None: + data_size_gb = data.id.get_storage_size() / 1e9 + else: + data_size_gb = data.size * data.dtype.itemsize / 1e9 + if data_size_gb > gb_lower_bound: - return InspectorMessage(message=f"ImageSeries {image_series.name} is too large. Use external mode for storage") + return InspectorMessage(message="ImageSeries is very large. Consider using external mode for better storage.") diff --git a/tests/unit_tests/test_image_series.py b/tests/unit_tests/test_image_series.py index e7d654fe1..a6c1d4302 100644 --- a/tests/unit_tests/test_image_series.py +++ b/tests/unit_tests/test_image_series.py @@ -1,8 +1,10 @@ import unittest from pathlib import Path +from tempfile import mkdtemp +from shutil import rmtree import numpy as np -from pynwb import NWBHDF5IO +from pynwb import NWBHDF5IO, H5DataIO from pynwb.image import ImageSeries from nwbinspector import ( @@ -12,6 +14,7 @@ check_image_series_external_file_relative, check_image_series_data_size, ) +from nwbinspector.tools import make_minimal_nwbfile from nwbinspector.testing import load_testing_config try: @@ -131,7 +134,7 @@ def test_check_large_image_series_stored_internally(): expected_message = InspectorMessage( importance=Importance.BEST_PRACTICE_VIOLATION, - message=f"ImageSeries {image_series.name} is too large. Use external mode for storage", + message="ImageSeries is very large. Consider using external mode for better storage.", check_function_name="check_image_series_data_size", object_type="ImageSeries", object_name="ImageSeriesLarge", @@ -139,3 +142,65 @@ def test_check_large_image_series_stored_internally(): ) assert inspector_message == expected_message + + +class TestCheckImageSeriesStoredInternally(unittest.TestCase): + maxDiff = None + + @classmethod + def setUpClass(cls): + cls.tmpdir = Path(mkdtemp()) + cls.nwbfile_path = cls.tmpdir / "test_compressed_image_series.nwb" + cls.gb_size = 0.01 # 10 MB + + image_length = 10 + total_frames = int(cls.gb_size * 1e9 / np.dtype("float").itemsize) // (image_length * image_length) + + # Use random data in order to give non-trivial compression size + # Fix the seed to give consistent result every run + np.random.seed = 123 + dtype = "uint8" + data = np.random.randint( + low=0, high=np.iinfo(dtype).max, size=(total_frames, image_length, image_length, 1), dtype=dtype + ) + image_series = ImageSeries(name="ImageSeries", rate=1.0, data=H5DataIO(data), unit="TestUnit") + + nwbfile = make_minimal_nwbfile() + nwbfile.add_acquisition(image_series) + + with NWBHDF5IO(path=cls.nwbfile_path, mode="w") as io: + io.write(nwbfile) + + @classmethod + def tearDownClass(cls): + rmtree(cls.tmpdir) + + def test_check_image_series_stored_internally_compressed_larger_threshold(self): + """With compression enabled, the size by the check should be less than the full uncompressed size.""" + with NWBHDF5IO(path=self.nwbfile_path, mode="r") as io: + nwbfile = io.read() + image_series = nwbfile.acquisition["ImageSeries"] + + assert check_image_series_data_size(image_series=image_series, gb_lower_bound=self.gb_size) is None + + def test_check_image_series_stored_internally_compressed_smaller_threshold(self): + with NWBHDF5IO(path=self.nwbfile_path, mode="r") as io: + nwbfile = io.read() + image_series = nwbfile.acquisition["ImageSeries"] + + expected_message = InspectorMessage( + importance=Importance.BEST_PRACTICE_VIOLATION, + message="ImageSeries is very large. Consider using external mode for better storage.", + check_function_name="check_image_series_data_size", + object_type="ImageSeries", + object_name="ImageSeries", + location="/acquisition/ImageSeries", + ) + + assert ( + check_image_series_data_size( + image_series=image_series, + gb_lower_bound=self.gb_size / 10, # Compression of uint8 noise is unlikely be more than 10:1 ratio + ) + == expected_message + )