Skip to content

Commit

Permalink
Include compression in ImageSeries size check (#311)
Browse files Browse the repository at this point in the history
* include compression consideration with image series size check

* Update CHANGELOG.md

Co-authored-by: CodyCBakerPhD <[email protected]>
  • Loading branch information
CodyCBakerPhD and CodyCBakerPhD authored Nov 24, 2022
1 parent 5ce4e6e commit bfd4183
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 4 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Upcoming

### Improvements

* Added compression size consideration to `check_image_series_size`. [PR #311](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/311)



# v0.4.19

### Fixes
Expand Down
9 changes: 7 additions & 2 deletions src/nwbinspector/checks/image_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ def check_image_series_data_size(image_series: ImageSeries, gb_lower_bound: floa
Best Practice: :ref:`best_practice_use_external_mode`
"""
data = image_series.data
data_size_gb = data.size * data.dtype.itemsize / 1e9

if getattr(data, "compression", None) is not None:
data_size_gb = data.id.get_storage_size() / 1e9
else:
data_size_gb = data.size * data.dtype.itemsize / 1e9

if data_size_gb > gb_lower_bound:
return InspectorMessage(message=f"ImageSeries {image_series.name} is too large. Use external mode for storage")
return InspectorMessage(message="ImageSeries is very large. Consider using external mode for better storage.")
69 changes: 67 additions & 2 deletions tests/unit_tests/test_image_series.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import unittest
from pathlib import Path
from tempfile import mkdtemp
from shutil import rmtree

import numpy as np
from pynwb import NWBHDF5IO
from pynwb import NWBHDF5IO, H5DataIO
from pynwb.image import ImageSeries

from nwbinspector import (
Expand All @@ -12,6 +14,7 @@
check_image_series_external_file_relative,
check_image_series_data_size,
)
from nwbinspector.tools import make_minimal_nwbfile
from nwbinspector.testing import load_testing_config

try:
Expand Down Expand Up @@ -131,11 +134,73 @@ def test_check_large_image_series_stored_internally():

expected_message = InspectorMessage(
importance=Importance.BEST_PRACTICE_VIOLATION,
message=f"ImageSeries {image_series.name} is too large. Use external mode for storage",
message="ImageSeries is very large. Consider using external mode for better storage.",
check_function_name="check_image_series_data_size",
object_type="ImageSeries",
object_name="ImageSeriesLarge",
location="/",
)

assert inspector_message == expected_message


class TestCheckImageSeriesStoredInternally(unittest.TestCase):
maxDiff = None

@classmethod
def setUpClass(cls):
cls.tmpdir = Path(mkdtemp())
cls.nwbfile_path = cls.tmpdir / "test_compressed_image_series.nwb"
cls.gb_size = 0.01 # 10 MB

image_length = 10
total_frames = int(cls.gb_size * 1e9 / np.dtype("float").itemsize) // (image_length * image_length)

# Use random data in order to give non-trivial compression size
# Fix the seed to give consistent result every run
np.random.seed = 123
dtype = "uint8"
data = np.random.randint(
low=0, high=np.iinfo(dtype).max, size=(total_frames, image_length, image_length, 1), dtype=dtype
)
image_series = ImageSeries(name="ImageSeries", rate=1.0, data=H5DataIO(data), unit="TestUnit")

nwbfile = make_minimal_nwbfile()
nwbfile.add_acquisition(image_series)

with NWBHDF5IO(path=cls.nwbfile_path, mode="w") as io:
io.write(nwbfile)

@classmethod
def tearDownClass(cls):
rmtree(cls.tmpdir)

def test_check_image_series_stored_internally_compressed_larger_threshold(self):
"""With compression enabled, the size by the check should be less than the full uncompressed size."""
with NWBHDF5IO(path=self.nwbfile_path, mode="r") as io:
nwbfile = io.read()
image_series = nwbfile.acquisition["ImageSeries"]

assert check_image_series_data_size(image_series=image_series, gb_lower_bound=self.gb_size) is None

def test_check_image_series_stored_internally_compressed_smaller_threshold(self):
with NWBHDF5IO(path=self.nwbfile_path, mode="r") as io:
nwbfile = io.read()
image_series = nwbfile.acquisition["ImageSeries"]

expected_message = InspectorMessage(
importance=Importance.BEST_PRACTICE_VIOLATION,
message="ImageSeries is very large. Consider using external mode for better storage.",
check_function_name="check_image_series_data_size",
object_type="ImageSeries",
object_name="ImageSeries",
location="/acquisition/ImageSeries",
)

assert (
check_image_series_data_size(
image_series=image_series,
gb_lower_bound=self.gb_size / 10, # Compression of uint8 noise is unlikely be more than 10:1 ratio
)
== expected_message
)

0 comments on commit bfd4183

Please sign in to comment.