diff --git a/src/pinto_lab_to_nwb/widefield/extractors/widefield_processed_imagingextractor.py b/src/pinto_lab_to_nwb/widefield/extractors/widefield_processed_imagingextractor.py index e1ea33b..a8b69ba 100644 --- a/src/pinto_lab_to_nwb/widefield/extractors/widefield_processed_imagingextractor.py +++ b/src/pinto_lab_to_nwb/widefield/extractors/widefield_processed_imagingextractor.py @@ -18,6 +18,7 @@ def __init__( info_file_path: FilePathType, strobe_sequence_file_path: FilePathType, channel_name: Optional[str] = "blue", + convert_video_dtype_to: Optional[DtypeType] = None, ): """ The ImagingExtractor for loading the downsampled imaging data for the Widefield session. @@ -32,10 +33,13 @@ def __init__( The path that points to the strobe sequence file. This file should contain the 'strobe_session_key' key. channel_name: str, optional The name of the channel to load the frames for. The default is 'blue'. + convert_video_dtype_to: DtypeType, optional + The dtype to convert the video to. """ import h5py super().__init__(file_path=file_path) + self.convert_video_dtype_to = convert_video_dtype_to or np.uint16 file = h5py.File(file_path, "r") expected_struct_name = "rawf" @@ -97,7 +101,7 @@ def get_video( ) -> np.ndarray: if start_frame is not None and end_frame is not None and start_frame == end_frame: video_start_frame = int(self.frame_indices[start_frame]) - return self._video[video_start_frame].transpose((1, 0)) + return self._video[video_start_frame].transpose((1, 0)).astype(dtype=self.convert_video_dtype_to) start_frame = start_frame or 0 end_frame = end_frame or self.get_num_frames() @@ -113,6 +117,7 @@ def get_video( self._video.lazy_slice[original_video_start_frame:original_video_end_frame, ...] .lazy_transpose(axis_order=(0, 2, 1)) .dsetread() + .astype(dtype=self.convert_video_dtype_to) ) filtered_indices = self.frame_indices[start_frame:end_frame] - self.frame_indices[start_frame] diff --git a/src/pinto_lab_to_nwb/widefield/extractors/widefield_processed_segmentationextractor.py b/src/pinto_lab_to_nwb/widefield/extractors/widefield_processed_segmentationextractor.py new file mode 100644 index 0000000..3b74312 --- /dev/null +++ b/src/pinto_lab_to_nwb/widefield/extractors/widefield_processed_segmentationextractor.py @@ -0,0 +1,133 @@ +from pathlib import Path +from typing import Tuple + +import numpy as np +from neuroconv.utils import FolderPathType +from pymatreader import read_mat +from roiextractors import SegmentationExtractor + + +class WidefieldProcessedSegmentationExtractor(SegmentationExtractor): + """Custom extractor for reading segmentation data for the Widefield experiment""" + + extractor_name = "WidefieldProcessedSegmentation" + mode = "file" + + def __init__( + self, + folder_path: FolderPathType, + ): + """ + The SegmentationExtractor for the downsampled (binned) Widefield imaging data. + + The segmentation data is stored in .mat files: + - info.mat : contains the general metadata of the imaging session such as frame rate etc. + - ROIfromRef.mat : contains the Allen area label of each pixel mapped onto the reference image of the mouse and registered to the session. + - vasculature_mask_2.mat : contains the vasculature mask on the downsampled (binned) session image. + - blue_pca_vasculature_mask_2.mat : contains the PCA mask for the blue channel. + - violet_pca_vasculature_mask_2.mat.mat : contains the PCA mask for the violet channel. + + that contain the following variables: + - The Allen area label for each binned pixel + - The contrast based vasculature mask (corresponds to "binned_vasculature_mask_file_path") + - The PCA masks for the blue and violet channels (corresponds to "blue_pca_mask_file_path" and "violet_pca_mask_file_path") + + Parameters + ---------- + folder_path: FolderPathType + The path that points to the folder that contains the .mat files. + """ + super().__init__() + + self.folder_path = Path(folder_path) + + expected_files = [ + "info.mat", + "ROIfromRef.mat", + "vasculature_mask_2.mat", + "blue_pca_vasculature_mask_2.mat", + ] + mat_file_paths = list(self.folder_path.glob("*.mat")) + assert mat_file_paths, f"The .mat files are missing from {folder_path}." + # assert all expected files are in the folder_path + for expected_file in expected_files: + assert ( + self.folder_path / expected_file + ).exists(), f"The file {expected_file} is missing from {folder_path}." + + info_mat = read_mat(self.folder_path / "info.mat") + assert "info" in info_mat, f"Could not find 'info' struct in 'info.mat'." + self._num_frames = info_mat["info"]["numFrames"] + self._sampling_frequency = info_mat["info"]["frameRate"] + + roi_mat = read_mat(self.folder_path / "ROIfromRef.mat") + assert "ROIcentroids" in roi_mat, f"Could not find 'ROIcentroids' in 'ROIfromRef.mat'." + self._roi_locations = roi_mat[ + "ROIcentroids" + ].T # they should be in height x width (orig they are width x height) + # Allen area locations + assert "ROIlbl" in roi_mat, f"Could not find 'ROIlbl' in 'ROIfromRef.mat'." + self._roi_labels = roi_mat["ROIlbl"] + + binned_height = int(info_mat["info"]["height"] / roi_mat["dsFactor"]) + binned_width = int(info_mat["info"]["width"] / roi_mat["dsFactor"]) + self._image_size = (binned_height, binned_width) + + assert "ROI" in roi_mat, f"Could not find 'ROI' in 'ROIfromRef.mat'." + self._image_masks = self._compute_image_masks(pixel_mask=roi_mat["ROI"]) + self._dtype = self._image_masks.dtype + + # Contrast based vasculature mask + vasculature_mask = read_mat(self.folder_path / "vasculature_mask_2.mat") + assert "mask_binned" in vasculature_mask, f"Could not find 'mask_binned' in 'vasculature_mask_2.mat'." + self._image_vasculature = vasculature_mask["mask_binned"] + + # PCA mask (separate for blue and violet) + pca_mask_blue = read_mat(self.folder_path / f"blue_pca_vasculature_mask_2.mat") + assert "mask" in pca_mask_blue, f"Could not find 'mask' in 'blue_pca_vasculature_mask_2.mat'." + self._image_pca_blue = pca_mask_blue["mask"] + + def _compute_image_masks(self, pixel_mask): + """Compute the image masks from the ROI's pixel locations.""" + num_rois = self.get_num_rois() + image_mask = np.zeros(shape=(*self._image_size, num_rois), dtype=np.uint8) + for roi_ind, pixel_mask_roi in enumerate(pixel_mask): + pixel_mask_roi = pixel_mask_roi[0] + if len(pixel_mask_roi) == 0: + # there are rois with no pixels + continue + x = pixel_mask_roi[:, 0] - 1 + y = pixel_mask_roi[:, 1] - 1 + image_mask[x, y, roi_ind] = 1 + + return image_mask + + def get_channel_names(self): + return ["OpticalChannelBlue"] + + def get_roi_locations(self, roi_ids=None) -> np.ndarray: + return self._roi_locations + + def get_images_dict(self): + """Return the images dict that contain the contrast based vasculature mask and the PCA mask for the blue channel.""" + images_dict = super().get_images_dict() + images_dict.update( + vasculature=self._image_vasculature, + pca_blue=self._image_pca_blue, + ) + return images_dict + + def get_accepted_list(self) -> list: + return self.get_roi_ids() + + def get_rejected_list(self) -> list: + return list() + + def get_num_frames(self) -> int: + return self._num_frames + + def get_image_size(self) -> Tuple[int, int]: + return self._image_size + + def get_num_rois(self) -> int: + return self._roi_locations.shape[1] diff --git a/src/pinto_lab_to_nwb/widefield/interfaces/__init__.py b/src/pinto_lab_to_nwb/widefield/interfaces/__init__.py index 01cc4d4..b775160 100644 --- a/src/pinto_lab_to_nwb/widefield/interfaces/__init__.py +++ b/src/pinto_lab_to_nwb/widefield/interfaces/__init__.py @@ -1,2 +1,5 @@ from .widefield_imaginginterface import WidefieldImagingInterface from .widefield_processed_imaginginterface import WidefieldProcessedImagingInterface +from .widefield_processed_segmentationinterface import WidefieldProcessedSegmentationinterface +from .widefield_segmentation_images_blue_datainterface import WidefieldSegmentationImagesBlueInterface +from .widefield_segmentation_images_violet_datainterface import WidefieldSegmentationImagesVioletInterface diff --git a/src/pinto_lab_to_nwb/widefield/interfaces/widefield_processed_imaginginterface.py b/src/pinto_lab_to_nwb/widefield/interfaces/widefield_processed_imaginginterface.py index ca0b2b2..5a1cd1f 100644 --- a/src/pinto_lab_to_nwb/widefield/interfaces/widefield_processed_imaginginterface.py +++ b/src/pinto_lab_to_nwb/widefield/interfaces/widefield_processed_imaginginterface.py @@ -3,6 +3,7 @@ from neuroconv.datainterfaces.ophys.baseimagingextractorinterface import BaseImagingExtractorInterface from neuroconv.tools.roiextractors import get_nwb_imaging_metadata from neuroconv.utils import FilePathType, dict_deep_update +from roiextractors.extraction_tools import DtypeType from pinto_lab_to_nwb.widefield.extractors.widefield_processed_imagingextractor import ( WidefieldProcessedImagingExtractor, @@ -20,6 +21,7 @@ def __init__( info_file_path: FilePathType, strobe_sequence_file_path: FilePathType, channel_name: Optional[str] = "blue", + convert_video_dtype_to: Optional[DtypeType] = None, verbose: bool = True, ): """ @@ -35,6 +37,8 @@ def __init__( The path that points to the strobe sequence file. This file should contain the 'strobe_session_key' key. channel_name: str, optional The name of the channel to load the frames for. The default is 'blue'. + convert_video_dtype_to: DtypeType, optional + The dtype to convert the video to. """ super().__init__( @@ -42,6 +46,7 @@ def __init__( info_file_path=info_file_path, strobe_sequence_file_path=strobe_sequence_file_path, channel_name=channel_name, + convert_video_dtype_to=convert_video_dtype_to, ) self.channel_name = channel_name self.verbose = verbose diff --git a/src/pinto_lab_to_nwb/widefield/interfaces/widefield_processed_segmentationinterface.py b/src/pinto_lab_to_nwb/widefield/interfaces/widefield_processed_segmentationinterface.py new file mode 100644 index 0000000..692e627 --- /dev/null +++ b/src/pinto_lab_to_nwb/widefield/interfaces/widefield_processed_segmentationinterface.py @@ -0,0 +1,91 @@ +from typing import Optional + +from neuroconv.datainterfaces.ophys.basesegmentationextractorinterface import BaseSegmentationExtractorInterface +from neuroconv.tools import get_module +from neuroconv.utils import FolderPathType +from pynwb import NWBFile + +from pinto_lab_to_nwb.widefield.extractors.widefield_processed_segmentationextractor import ( + WidefieldProcessedSegmentationExtractor, +) + + +class WidefieldProcessedSegmentationinterface(BaseSegmentationExtractorInterface): + """Data interface for WidefieldProcessedSegmentationExtractor.""" + + Extractor = WidefieldProcessedSegmentationExtractor + + def __init__(self, folder_path: FolderPathType, verbose: bool = True): + """ + + Parameters + ---------- + folder_path : FolderPathType + verbose : bool, default: True + """ + super().__init__(folder_path=folder_path) + self.verbose = verbose + + def get_metadata(self) -> dict: + metadata = super().get_metadata() + + imaging_plane_name = "ImagingPlaneBlue" + metadata["Ophys"]["ImagingPlane"][0].update(name=imaging_plane_name) + plane_segmentation_metadata = metadata["Ophys"]["ImageSegmentation"]["plane_segmentations"][0] + default_plane_segmentation_name = plane_segmentation_metadata["name"] + plane_segmentation_name = "PlaneSegmentationProcessedBlue" + plane_segmentation_metadata.update( + name=plane_segmentation_name, + imaging_plane=imaging_plane_name, + ) + summary_images_metadata = metadata["Ophys"]["SegmentationImages"] + _ = summary_images_metadata.pop(default_plane_segmentation_name) + images_metadata = dict( + vasculature=dict( + name="vasculature", description="The contrast based vasculature mask for the blue channel." + ), + pca_blue=dict(name="pca_blue", description="The PCA based mask for the blue channel."), + ) + + metadata["Ophys"]["SegmentationImages"].update({"PlaneSegmentationProcessedBlue": images_metadata}) + + return metadata + + def add_to_nwbfile( + self, + nwbfile: NWBFile, + metadata: Optional[dict] = None, + stub_test: bool = False, + stub_frames: int = 100, + include_roi_centroids: bool = True, + include_roi_acceptance: bool = True, + mask_type: Optional[str] = "image", + plane_segmentation_name: Optional[str] = "PlaneSegmentationProcessedBlue", + iterator_options: Optional[dict] = None, + compression_options: Optional[dict] = None, + ): + super().add_to_nwbfile( + nwbfile=nwbfile, + metadata=metadata, + stub_test=stub_test, + stub_frames=stub_frames, + include_roi_acceptance=include_roi_acceptance, + mask_type=mask_type, + plane_segmentation_name=plane_segmentation_name, + iterator_options=iterator_options, + compression_options=compression_options, + ) + + # Add Allen area labels as a column to the plane segmentation table + ophys = get_module(nwbfile, "ophys") + image_segmentation = ophys.get("ImageSegmentation") + plane_segmentation = image_segmentation.plane_segmentations[plane_segmentation_name] + locations = self.segmentation_extractor._roi_labels + assert len(plane_segmentation.id) == len( + locations + ), "The number of ROIs does not match the number of Allen area labels." + plane_segmentation.add_column( + name="location", + description="The Allen area labels for each ROI.", + data=locations, + ) diff --git a/src/pinto_lab_to_nwb/widefield/interfaces/widefield_segmentation_images_blue_datainterface.py b/src/pinto_lab_to_nwb/widefield/interfaces/widefield_segmentation_images_blue_datainterface.py new file mode 100644 index 0000000..7742acc --- /dev/null +++ b/src/pinto_lab_to_nwb/widefield/interfaces/widefield_segmentation_images_blue_datainterface.py @@ -0,0 +1,76 @@ +from pathlib import Path + +import numpy as np +from neuroconv import BaseDataInterface +from neuroconv.tools import get_module +from neuroconv.utils import FolderPathType +from pymatreader import read_mat +from pynwb import NWBFile +from pynwb.base import Images +from pynwb.image import GrayscaleImage + + +class WidefieldSegmentationImagesBlueInterface(BaseDataInterface): + """The custom interface to add the blue channel manual and vasculature mask to the NWBFile.""" + + def __init__(self, folder_path: FolderPathType, verbose: bool = True): + """ + The interface to add the summary images to the NWBFile. + + Parameters + ---------- + folder_path : FolderPathType + verbose : bool, default: True + """ + super().__init__(folder_path=folder_path) + self.folder_path = Path(folder_path) + self.verbose = verbose + + self._image_vasculature = self._load_vasculature_mask() + self._image_manual = self._load_manual_mask() + + def _load_vasculature_mask(self) -> np.ndarray: + vasculature_mask_file_path = self.folder_path / "vasculature_mask_2.mat" + assert vasculature_mask_file_path.exists(), f"The vasculature mask file is missing from {self.folder_path}." + vasculature_mask_mat = read_mat(str(vasculature_mask_file_path)) + assert "mask" in vasculature_mask_mat, f"The vasculature mask is missing from {vasculature_mask_file_path}." + vasculature_mask = vasculature_mask_mat["mask"] + + return vasculature_mask + + def _load_manual_mask(self): + manual_mask_file_path = Path(self.folder_path) / "regManualMask.mat" + assert manual_mask_file_path.exists(), f"The manual mask file is missing from {self.folder_path}." + manual_mask_mat = read_mat(str(manual_mask_file_path)) + assert "regMask" in manual_mask_mat, f"The manual mask is missing from {manual_mask_file_path}." + manual_mask = manual_mask_mat["regMask"] + + return manual_mask + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + ophys = get_module(nwbfile=nwbfile, name="ophys") + + images_container_name = "SegmentationImagesBlue" + if images_container_name in ophys.data_interfaces: + raise ValueError(f"Images container {images_container_name} already exists in the NWBFile.") + + description = "Contains the manual mask and the contrast based vasculature mask for the blue channel in the full size session image." + images_container = Images( + name=images_container_name, + description=description, + ) + ophys.add(images_container) + + vasculature_image_data = self._image_vasculature.T + images_container.add_image( + GrayscaleImage( + name="vasculature", + description="The contrast based vasculature mask for the blue channel.", + data=vasculature_image_data, + ) + ) + + manual_mask_data = self._image_manual.T + images_container.add_image( + GrayscaleImage(name="manual", description="The manual mask for the blue channel.", data=manual_mask_data) + ) diff --git a/src/pinto_lab_to_nwb/widefield/interfaces/widefield_segmentation_images_violet_datainterface.py b/src/pinto_lab_to_nwb/widefield/interfaces/widefield_segmentation_images_violet_datainterface.py new file mode 100644 index 0000000..e025a7c --- /dev/null +++ b/src/pinto_lab_to_nwb/widefield/interfaces/widefield_segmentation_images_violet_datainterface.py @@ -0,0 +1,61 @@ +from pathlib import Path + +import numpy as np +from neuroconv import BaseDataInterface +from neuroconv.tools import get_module +from neuroconv.utils import FolderPathType +from pymatreader import read_mat +from pynwb import NWBFile +from pynwb.base import Images +from pynwb.image import GrayscaleImage + + +class WidefieldSegmentationImagesVioletInterface(BaseDataInterface): + """The custom interface to add the violet channel PCA mask to the NWBFile.""" + + def __init__(self, folder_path: FolderPathType, verbose: bool = True): + """ + The interface to add the summary images to the NWBFile. + + Parameters + ---------- + folder_path : FolderPathType + verbose : bool, default: True + """ + super().__init__(folder_path=folder_path) + self.folder_path = Path(folder_path) + self.verbose = verbose + + self._image_pca_violet = self._load_pca_mask() + + def _load_pca_mask(self) -> np.ndarray: + pca_mask_file_path = self.folder_path / "violet_pca_vasculature_mask_2.mat" + assert ( + pca_mask_file_path.exists() + ), f"The PCA mask file for the violet channel is missing from {self.folder_path}." + pca_mask_violet = read_mat(str(pca_mask_file_path)) + assert "vasc_mask" in pca_mask_violet, f"Could not find 'vasc_mask' in 'violet_pca_vasculature_mask_2.mat'." + pca_mask = pca_mask_violet["vasc_mask"] + + return pca_mask + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + ophys = get_module(nwbfile=nwbfile, name="ophys") + + images_container_name = "SegmentationImagesProcessedViolet" + if images_container_name in ophys.data_interfaces: + raise ValueError(f"Images container {images_container_name} already exists in the NWBFile.") + + description = "Contains the PCA mask for the violet channel on the binned session image." + images_container = Images( + name=images_container_name, + description=description, + ) + ophys.add(images_container) + + pca_image_data = self._image_pca_violet.T + images_container.add_image( + GrayscaleImage( + name="pca_violet", description="The PCA based mask for the violet channel.", data=pca_image_data + ) + ) diff --git a/src/pinto_lab_to_nwb/widefield/widefield_convert_session.py b/src/pinto_lab_to_nwb/widefield/widefield_convert_session.py index 7376d24..8e05f7f 100644 --- a/src/pinto_lab_to_nwb/widefield/widefield_convert_session.py +++ b/src/pinto_lab_to_nwb/widefield/widefield_convert_session.py @@ -3,7 +3,6 @@ from pathlib import Path from typing import Optional -import numpy as np from dateutil import tz from neuroconv.utils import ( load_dict_from_file, @@ -11,7 +10,6 @@ FolderPathType, FilePathType, ) -from pynwb import NWBHDF5IO from pinto_lab_to_nwb.general import make_subject_metadata from pinto_lab_to_nwb.widefield import WideFieldNWBConverter @@ -37,6 +35,8 @@ def session_to_nwb( The folder path that contains the Micro-Manager OME-TIF imaging output (.ome.tif files). strobe_sequence_file_path: FilePathType The file path to the strobe sequence file. This file should contain the 'strobe_session_key' key. + info_file_path: FilePathType + The file path to the Matlab file with information about the imaging session (e.g. 'frameRate'). subject_metadata_file_path: FilePathType, optional The file path to the subject metadata file. This file should contain the 'metadata' key. stub_test: bool, optional @@ -102,6 +102,21 @@ def session_to_nwb( ), ) + # Add segmentation and summary images for the blue and violet channels + source_data.update( + dict( + SegmentationProcessedBlue=dict( + folder_path=str(widefield_imaging_folder_path), + ), + SummaryImagesBlue=dict( + folder_path=str(widefield_imaging_folder_path), + ), + SummaryImagesViolet=dict( + folder_path=str(widefield_imaging_folder_path), + ), + ) + ) + converter = WideFieldNWBConverter(source_data=source_data) # Add datetime to conversion @@ -139,12 +154,18 @@ def session_to_nwb( if __name__ == "__main__": # Parameters for conversion + + # The folder path that contains the raw imaging data in Micro-Manager OME-TIF format (.ome.tif files). imaging_folder_path = Path("/Users/weian/data/DrChicken_20230419_20hz") + # The file path to the strobe sequence file. strobe_sequence_file_path = imaging_folder_path / "strobe_seq_1_2.mat" + # The file path to the downsampled imaging data in Matlab format (.mat file). processed_imaging_path = imaging_folder_path / "rawf_full.mat" + # The file path to the Matlab file with information about the imaging session (e.g. 'frameRate'). info_file_path = imaging_folder_path / "info.mat" subject_metadata_file_path = "/Volumes/t7-ssd/Pinto/Behavior/subject_metadata.mat" - nwbfile_path = Path("/Volumes/t7-ssd/Pinto/nwbfiles/widefield/stub_DrChicken_20230419_20hz.nwb") + # The file path to the NWB file that will be created. + nwbfile_path = Path("/Volumes/t7-ssd/Pinto/nwbfiles/widefield/DrChicken_20230419_20hz.nwb") stub_test = False diff --git a/src/pinto_lab_to_nwb/widefield/widefield_requirements.txt b/src/pinto_lab_to_nwb/widefield/widefield_requirements.txt index 686587a..ed5cf77 100644 --- a/src/pinto_lab_to_nwb/widefield/widefield_requirements.txt +++ b/src/pinto_lab_to_nwb/widefield/widefield_requirements.txt @@ -1 +1,2 @@ neuroconv[micromanagertiff] +pymatreader==0.0.32 diff --git a/src/pinto_lab_to_nwb/widefield/widefieldnwbconverter.py b/src/pinto_lab_to_nwb/widefield/widefieldnwbconverter.py index f97c922..414f7eb 100644 --- a/src/pinto_lab_to_nwb/widefield/widefieldnwbconverter.py +++ b/src/pinto_lab_to_nwb/widefield/widefieldnwbconverter.py @@ -1,7 +1,13 @@ """Primary NWBConverter class for this dataset.""" from neuroconv import NWBConverter -from pinto_lab_to_nwb.widefield.interfaces import WidefieldImagingInterface, WidefieldProcessedImagingInterface +from pinto_lab_to_nwb.widefield.interfaces import ( + WidefieldImagingInterface, + WidefieldProcessedImagingInterface, + WidefieldProcessedSegmentationinterface, + WidefieldSegmentationImagesBlueInterface, + WidefieldSegmentationImagesVioletInterface, +) class WideFieldNWBConverter(NWBConverter): @@ -12,4 +18,7 @@ class WideFieldNWBConverter(NWBConverter): ImagingViolet=WidefieldImagingInterface, ProcessedImagingBlue=WidefieldProcessedImagingInterface, ProcessedImagingViolet=WidefieldProcessedImagingInterface, + SegmentationProcessedBlue=WidefieldProcessedSegmentationinterface, + SummaryImagesBlue=WidefieldSegmentationImagesBlueInterface, + SummaryImagesViolet=WidefieldSegmentationImagesVioletInterface, )