From 05a567550b444356d78955b6a3084c96759ed082 Mon Sep 17 00:00:00 2001 From: Paul Adkisson Date: Thu, 4 Apr 2024 11:59:53 -0400 Subject: [PATCH] Multifile scanimage (#297) * added singleplane multifile extractor * added multiplane multifile extractor * updated frames_per_slice to work better with single-plane data * added multifile extractors to the init * added multifile extractors to extractor list * added multifile tests * added multifile multiplane tests * improved tests to actually check file names * switched to natsorted * added option to only extract first files metadata and then propagate it to all other files * added some docstring info * added metadata tests * moved natsort import to __init__ * retrigger checks * propagated parsed_metadata along with metadata * updated docstrings * added coverage for extract_all_metadata options --- src/roiextractors/extractorlist.py | 4 + .../tiffimagingextractors/__init__.py | 6 + .../scanimagetiffimagingextractor.py | 180 ++++++++++++++++-- tests/test_scanimagetiffimagingextractor.py | 130 ++++++++++++- 4 files changed, 303 insertions(+), 17 deletions(-) diff --git a/src/roiextractors/extractorlist.py b/src/roiextractors/extractorlist.py index 71feb655..07ca8239 100644 --- a/src/roiextractors/extractorlist.py +++ b/src/roiextractors/extractorlist.py @@ -18,6 +18,8 @@ ScanImageTiffImagingExtractor, ScanImageTiffSinglePlaneImagingExtractor, ScanImageTiffMultiPlaneImagingExtractor, + ScanImageTiffSinglePlaneMultiFileImagingExtractor, + ScanImageTiffMultiPlaneMultiFileImagingExtractor, BrukerTiffMultiPlaneImagingExtractor, BrukerTiffSinglePlaneImagingExtractor, MicroManagerTiffImagingExtractor, @@ -37,6 +39,8 @@ ScanImageTiffImagingExtractor, ScanImageTiffSinglePlaneImagingExtractor, ScanImageTiffMultiPlaneImagingExtractor, + ScanImageTiffSinglePlaneMultiFileImagingExtractor, + ScanImageTiffMultiPlaneMultiFileImagingExtractor, BrukerTiffMultiPlaneImagingExtractor, BrukerTiffSinglePlaneImagingExtractor, MicroManagerTiffImagingExtractor, diff --git a/src/roiextractors/extractors/tiffimagingextractors/__init__.py b/src/roiextractors/extractors/tiffimagingextractors/__init__.py index 4de3eb70..5d37e2bc 100644 --- a/src/roiextractors/extractors/tiffimagingextractors/__init__.py +++ b/src/roiextractors/extractors/tiffimagingextractors/__init__.py @@ -21,6 +21,10 @@ Specialized extractor for reading single-plane TIFF files produced via ScanImage. ScanImageTiffMultiPlaneImagingExtractor Specialized extractor for reading multi-plane TIFF files produced via ScanImage. +ScanImageTiffSinglePlaneMultiFileImagingExtractor + Specialized extractor for reading single-plane multi-file TIFF files produced via ScanImage. +ScanImageTiffMultiPlaneMultiFileImagingExtractor + Specialized extractor for reading multi-plane multi-file TIFF files produced via ScanImage. BrukerTiffMultiPlaneImagingExtractor Specialized extractor for reading TIFF files produced via Bruker. BrukerTiffSinglePlaneImagingExtractor @@ -34,6 +38,8 @@ ScanImageTiffImagingExtractor, ScanImageTiffMultiPlaneImagingExtractor, ScanImageTiffSinglePlaneImagingExtractor, + ScanImageTiffSinglePlaneMultiFileImagingExtractor, + ScanImageTiffMultiPlaneMultiFileImagingExtractor, ) from .brukertiffimagingextractor import BrukerTiffMultiPlaneImagingExtractor, BrukerTiffSinglePlaneImagingExtractor from .micromanagertiffimagingextractor import MicroManagerTiffImagingExtractor diff --git a/src/roiextractors/extractors/tiffimagingextractors/scanimagetiffimagingextractor.py b/src/roiextractors/extractors/tiffimagingextractors/scanimagetiffimagingextractor.py index acad3904..8f873a42 100644 --- a/src/roiextractors/extractors/tiffimagingextractors/scanimagetiffimagingextractor.py +++ b/src/roiextractors/extractors/tiffimagingextractors/scanimagetiffimagingextractor.py @@ -14,6 +14,7 @@ from ...extraction_tools import PathType, FloatType, ArrayType, DtypeType, get_package from ...imagingextractor import ImagingExtractor from ...volumetricimagingextractor import VolumetricImagingExtractor +from ...multiimagingextractor import MultiImagingExtractor from .scanimagetiff_utils import ( extract_extra_metadata, parse_metadata, @@ -22,6 +23,108 @@ ) +class ScanImageTiffMultiPlaneMultiFileImagingExtractor(MultiImagingExtractor): + """Specialized extractor for reading multi-file (buffered) TIFF files produced via ScanImage.""" + + extractor_name = "ScanImageTiffMultiPlaneMultiFileImaging" + is_writable = True + mode = "folder" + + def __init__( + self, folder_path: PathType, file_pattern: str, channel_name: str, extract_all_metadata: bool = True + ) -> None: + """Create a ScanImageTiffMultiPlaneMultiFileImagingExtractor instance from a folder of TIFF files produced by ScanImage. + + Parameters + ---------- + folder_path : PathType + Path to the folder containing the TIFF files. + file_pattern : str + Pattern for the TIFF files to read -- see pathlib.Path.glob for details. + channel_name : str + Channel name for this extractor. + extract_all_metadata : bool + If True, extract metadata from every file in the folder. If False, only extract metadata from the first + file in the folder. The default is True. + """ + self.folder_path = Path(folder_path) + from natsort import natsorted + + file_paths = natsorted(self.folder_path.glob(file_pattern)) + if len(file_paths) == 0: + raise ValueError(f"No files found in folder with pattern: {file_pattern}") + if not extract_all_metadata: + metadata = extract_extra_metadata(file_paths[0]) + parsed_metadata = parse_metadata(metadata) + else: + metadata, parsed_metadata = None, None + imaging_extractors = [] + for file_path in file_paths: + imaging_extractor = ScanImageTiffMultiPlaneImagingExtractor( + file_path=file_path, + channel_name=channel_name, + metadata=metadata, + parsed_metadata=parsed_metadata, + ) + imaging_extractors.append(imaging_extractor) + super().__init__(imaging_extractors=imaging_extractors) + + +class ScanImageTiffSinglePlaneMultiFileImagingExtractor(MultiImagingExtractor): + """Specialized extractor for reading multi-file (buffered) TIFF files produced via ScanImage.""" + + extractor_name = "ScanImageTiffSinglePlaneMultiFileImaging" + is_writable = True + mode = "folder" + + def __init__( + self, + folder_path: PathType, + file_pattern: str, + channel_name: str, + plane_name: str, + extract_all_metadata: bool = True, + ) -> None: + """Create a ScanImageTiffSinglePlaneMultiFileImagingExtractor instance from a folder of TIFF files produced by ScanImage. + + Parameters + ---------- + folder_path : PathType + Path to the folder containing the TIFF files. + file_pattern : str + Pattern for the TIFF files to read -- see pathlib.Path.glob for details. + channel_name : str + Name of the channel for this extractor. + plane_name : str + Name of the plane for this extractor. + extract_all_metadata : bool + If True, extract metadata from every file in the folder. If False, only extract metadata from the first + file in the folder. The default is True. + """ + self.folder_path = Path(folder_path) + from natsort import natsorted + + file_paths = natsorted(self.folder_path.glob(file_pattern)) + if len(file_paths) == 0: + raise ValueError(f"No files found in folder with pattern: {file_pattern}") + if not extract_all_metadata: + metadata = extract_extra_metadata(file_paths[0]) + parsed_metadata = parse_metadata(metadata) + else: + metadata, parsed_metadata = None, None + imaging_extractors = [] + for file_path in file_paths: + imaging_extractor = ScanImageTiffSinglePlaneImagingExtractor( + file_path=file_path, + channel_name=channel_name, + plane_name=plane_name, + metadata=metadata, + parsed_metadata=parsed_metadata, + ) + imaging_extractors.append(imaging_extractor) + super().__init__(imaging_extractors=imaging_extractors) + + class ScanImageTiffMultiPlaneImagingExtractor(VolumetricImagingExtractor): """Specialized extractor for reading multi-plane (volumetric) TIFF files produced via ScanImage.""" @@ -33,18 +136,47 @@ def __init__( self, file_path: PathType, channel_name: Optional[str] = None, + metadata: Optional[dict] = None, + parsed_metadata: Optional[dict] = None, ) -> None: + """Create a ScanImageTiffMultPlaneImagingExtractor instance from a volumetric TIFF file produced by ScanImage. + + Parameters + ---------- + file_path : PathType + Path to the TIFF file. + channel_name : str, optional + Name of the channel for this extractor. If None, the first channel will be used. + metadata : dict, optional + Metadata dictionary. If None, metadata will be extracted from the TIFF file. + parsed_metadata : dict, optional + Parsed metadata dictionary. If None, metadata must also be None. + + Notes + ----- + If metadata is provided, it MUST be in the form outputted by extract_extra_metadata in order to be parsed + correctly. + """ self.file_path = Path(file_path) - self.metadata = extract_extra_metadata(file_path) - parsed_metadata = parse_metadata(self.metadata) - num_planes = parsed_metadata["num_planes"] - channel_names = parsed_metadata["channel_names"] + if metadata is None: + self.metadata = extract_extra_metadata(file_path) + self.parsed_metadata = parse_metadata(self.metadata) + else: + self.metadata = metadata + assert parsed_metadata is not None, "If metadata is provided, parsed_metadata must also be provided." + self.parsed_metadata = parsed_metadata + num_planes = self.parsed_metadata["num_planes"] + channel_names = self.parsed_metadata["channel_names"] if channel_name is None: channel_name = channel_names[0] imaging_extractors = [] for plane in range(num_planes): imaging_extractor = ScanImageTiffSinglePlaneImagingExtractor( - file_path=file_path, channel_name=channel_name, plane_name=str(plane) + file_path=file_path, + channel_name=channel_name, + plane_name=str(plane), + metadata=self.metadata, + parsed_metadata=self.parsed_metadata, ) imaging_extractors.append(imaging_extractor) super().__init__(imaging_extractors=imaging_extractors) @@ -104,6 +236,8 @@ def __init__( file_path: PathType, channel_name: str, plane_name: str, + metadata: Optional[dict] = None, + parsed_metadata: Optional[dict] = None, ) -> None: """Create a ScanImageTiffImagingExtractor instance from a TIFF file produced by ScanImage. @@ -129,15 +263,29 @@ def __init__( Name of the channel for this extractor (default=None). plane_name : str Name of the plane for this extractor (default=None). + metadata : dict, optional + Metadata dictionary. If None, metadata will be extracted from the TIFF file. + parsed_metadata : dict, optional + Parsed metadata dictionary. If None, metadata must also be None. + + Notes + ----- + If metadata is provided, it MUST be in the form outputted by extract_extra_metadata in order to be parsed + correctly. """ self.file_path = Path(file_path) - self.metadata = extract_extra_metadata(file_path) - parsed_metadata = parse_metadata(self.metadata) - self._sampling_frequency = parsed_metadata["sampling_frequency"] - self._num_channels = parsed_metadata["num_channels"] - self._num_planes = parsed_metadata["num_planes"] - self._frames_per_slice = parsed_metadata["frames_per_slice"] - self._channel_names = parsed_metadata["channel_names"] + if metadata is None: + self.metadata = extract_extra_metadata(file_path) + self.parsed_metadata = parse_metadata(self.metadata) + else: + self.metadata = metadata + assert parsed_metadata is not None, "If metadata is provided, parsed_metadata must also be provided." + self.parsed_metadata = parsed_metadata + self._sampling_frequency = self.parsed_metadata["sampling_frequency"] + self._num_channels = self.parsed_metadata["num_channels"] + self._num_planes = self.parsed_metadata["num_planes"] + self._frames_per_slice = self.parsed_metadata["frames_per_slice"] + self._channel_names = self.parsed_metadata["channel_names"] self._plane_names = [f"{i}" for i in range(self._num_planes)] self.channel_name = channel_name self.plane_name = plane_name @@ -153,10 +301,10 @@ def __init__( shape = io.shape() # [frames, rows, columns] if len(shape) == 3: self._total_num_frames, self._num_rows, self._num_columns = shape - if self._frames_per_slice >= self._total_num_frames: - self._frames_per_slice = ( - 1 # For single plane data, framesPerSlice sometimes is set to total number of frames - ) + if ( + self._num_planes == 1 + ): # For single plane data, framesPerSlice sometimes is set to total number of frames + self._frames_per_slice = 1 self._num_raw_per_plane = self._frames_per_slice * self._num_channels self._num_raw_per_cycle = self._num_raw_per_plane * self._num_planes self._num_frames = self._total_num_frames // (self._num_planes * self._num_channels) diff --git a/tests/test_scanimagetiffimagingextractor.py b/tests/test_scanimagetiffimagingextractor.py index 4c0889a9..92e943e1 100644 --- a/tests/test_scanimagetiffimagingextractor.py +++ b/tests/test_scanimagetiffimagingextractor.py @@ -1,7 +1,13 @@ import pytest from numpy.testing import assert_array_equal from ScanImageTiffReader import ScanImageTiffReader -from roiextractors import ScanImageTiffSinglePlaneImagingExtractor, ScanImageTiffMultiPlaneImagingExtractor +from roiextractors import ( + ScanImageTiffSinglePlaneImagingExtractor, + ScanImageTiffMultiPlaneImagingExtractor, + ScanImageTiffSinglePlaneMultiFileImagingExtractor, + ScanImageTiffMultiPlaneMultiFileImagingExtractor, +) +from roiextractors.extractors.tiffimagingextractors.scanimagetiff_utils import extract_extra_metadata, parse_metadata from .setup_paths import OPHYS_DATA_PATH @@ -45,6 +51,44 @@ def test_ScanImageTiffSinglePlaneImagingExtractor__init__invalid(file_path, chan ScanImageTiffSinglePlaneImagingExtractor(file_path=file_path, channel_name=channel_name, plane_name=plane_name) +def test_ScanImageTiffSinglePlaneImagingExtractor__init__metadata_provided(file_path): + metadata = extract_extra_metadata(file_path) + parsed_metadata = parse_metadata(metadata) + extractor = ScanImageTiffSinglePlaneImagingExtractor( + file_path=file_path, + channel_name="Channel 1", + plane_name="0", + metadata=metadata, + parsed_metadata=parsed_metadata, + ) + assert extractor.metadata == metadata + assert extractor.parsed_metadata == parsed_metadata + + +def test_ScanImageTiffSinglePlaneImagingExtractor__init__invalid_metadata_provided(file_path): + metadata = {"invalid_key": "invalid_value"} + parsed_metadata = {"invalid_key": "invalid_value"} + with pytest.raises(KeyError): + ScanImageTiffSinglePlaneImagingExtractor( + file_path=file_path, + channel_name="Channel 1", + plane_name="0", + metadata=metadata, + parsed_metadata=parsed_metadata, + ) + + +def test_ScanImageTiffSinglePlaneImagingExtractor__init__parsed_metadata_not_provided(file_path): + metadata = extract_extra_metadata(file_path) + with pytest.raises(AssertionError): + ScanImageTiffSinglePlaneImagingExtractor( + file_path=file_path, + channel_name="Channel 1", + plane_name="0", + metadata=metadata, + ) + + @pytest.mark.parametrize("frame_idxs", (0, [0, 1, 2], [0, 2, 5])) def test_get_frames(scan_image_tiff_single_plane_imaging_extractor, frame_idxs, expected_properties): frames = scan_image_tiff_single_plane_imaging_extractor.get_frames(frame_idxs=frame_idxs) @@ -230,3 +274,87 @@ def test_ScanImageTiffMultiPlaneImagingExtractor__init__(file_path): def test_ScanImageTiffMultiPlaneImagingExtractor__init__invalid(file_path): with pytest.raises(ValueError): ScanImageTiffMultiPlaneImagingExtractor(file_path=file_path, channel_name="Invalid Channel") + + +def test_ScanImageTiffMultiPlaneImagingExtractor__init__metadata_provided(file_path): + metadata = extract_extra_metadata(file_path) + parsed_metadata = parse_metadata(metadata) + extractor = ScanImageTiffMultiPlaneImagingExtractor( + file_path=file_path, metadata=metadata, parsed_metadata=parsed_metadata + ) + assert extractor.metadata == metadata + assert extractor.parsed_metadata == parsed_metadata + + +def test_ScanImageTiffMultiPlaneImagingExtractor__init__invalid_metadata_provided(file_path): + metadata = {"invalid_key": "invalid_value"} + parsed_metadata = {"invalid_key": "invalid_value"} + with pytest.raises(KeyError): + ScanImageTiffMultiPlaneImagingExtractor(file_path=file_path, metadata=metadata, parsed_metadata=parsed_metadata) + + +@pytest.fixture(scope="module") +def scanimage_folder_path(): + return OPHYS_DATA_PATH / "imaging_datasets" / "ScanImage" + + +@pytest.fixture(scope="module") +def multifile_file_pattern(): + return "scanimage_20240320_multifile_*.tif" + + +@pytest.fixture(scope="module") +def expected_file_names(): + return [ + "scanimage_20240320_multifile_00001.tif", + "scanimage_20240320_multifile_00002.tif", + "scanimage_20240320_multifile_00003.tif", + ] + + +@pytest.mark.parametrize("extract_all_metadata", [True, False]) +def test_ScanImageTiffSinglePlaneMultiFileImagingExtractor__init__( + scanimage_folder_path, multifile_file_pattern, expected_file_names, extract_all_metadata +): + extractor = ScanImageTiffSinglePlaneMultiFileImagingExtractor( + folder_path=scanimage_folder_path, + file_pattern=multifile_file_pattern, + channel_name="Channel 1", + plane_name="0", + extract_all_metadata=extract_all_metadata, + ) + file_names = [imaging_extractor.file_path.name for imaging_extractor in extractor._imaging_extractors] + assert file_names == expected_file_names + + +def test_ScanImageTiffSinglePlaneMultiFileImagingExtractor__init__invalid(scanimage_folder_path): + with pytest.raises(ValueError): + ScanImageTiffSinglePlaneMultiFileImagingExtractor( + folder_path=scanimage_folder_path, + file_pattern="invalid_pattern", + channel_name="Channel 1", + plane_name="0", + ) + + +@pytest.mark.parametrize("extract_all_metadata", [True, False]) +def test_ScanImageTiffMultiPlaneMultiFileImagingExtractor__init__( + scanimage_folder_path, multifile_file_pattern, expected_file_names, extract_all_metadata +): + extractor = ScanImageTiffMultiPlaneMultiFileImagingExtractor( + folder_path=scanimage_folder_path, + file_pattern=multifile_file_pattern, + channel_name="Channel 1", + extract_all_metadata=extract_all_metadata, + ) + file_names = [imaging_extractor.file_path.name for imaging_extractor in extractor._imaging_extractors] + assert file_names == expected_file_names + + +def test_ScanImageTiffMultiPlaneMultiFileImagingExtractor__init__invalid(scanimage_folder_path): + with pytest.raises(ValueError): + ScanImageTiffMultiPlaneMultiFileImagingExtractor( + folder_path=scanimage_folder_path, + file_pattern="invalid_pattern", + channel_name="Channel 1", + )