diff --git a/src/cai_lab_to_nwb/zaki_2024/__init__.py b/src/cai_lab_to_nwb/zaki_2024/__init__.py index 6d8ace2..b9ff89f 100644 --- a/src/cai_lab_to_nwb/zaki_2024/__init__.py +++ b/src/cai_lab_to_nwb/zaki_2024/__init__.py @@ -1,3 +1,4 @@ from .implant_interface import ImplantInterface from .behaviorinterface import FreezingBehaviorInterface -from .sleepinterface import SleepBehaviorInterface \ No newline at end of file +from .sleepinterface import SleepBehaviorInterface +from .minian_segmentation_interface import MinianSegmentationInterface \ No newline at end of file diff --git a/src/cai_lab_to_nwb/zaki_2024/assets/spatial_temporal_comp_segmentation.png b/src/cai_lab_to_nwb/zaki_2024/assets/spatial_temporal_comp_segmentation.png new file mode 100644 index 0000000..ef145d2 Binary files /dev/null and b/src/cai_lab_to_nwb/zaki_2024/assets/spatial_temporal_comp_segmentation.png differ diff --git a/src/cai_lab_to_nwb/zaki_2024/behaviorinterface.py b/src/cai_lab_to_nwb/zaki_2024/behaviorinterface.py index bfe0e19..6fa5619 100644 --- a/src/cai_lab_to_nwb/zaki_2024/behaviorinterface.py +++ b/src/cai_lab_to_nwb/zaki_2024/behaviorinterface.py @@ -10,9 +10,6 @@ from pydantic import FilePath from typing import Optional -from explore_segmentation_data import unit_ids - - class FreezingBehaviorInterface(BaseDataInterface): """Adds intervals of freezing behavior interface.""" diff --git a/src/cai_lab_to_nwb/zaki_2024/minian_segmentation_interface.py b/src/cai_lab_to_nwb/zaki_2024/minian_segmentation_interface.py new file mode 100644 index 0000000..1670c60 --- /dev/null +++ b/src/cai_lab_to_nwb/zaki_2024/minian_segmentation_interface.py @@ -0,0 +1,264 @@ +"""A SegmentationExtractor for Minian. + +Classes +------- +MinianSegmentationExtractor + A class for extracting segmentation from Minian output. +""" + +from pathlib import Path + +import zarr +import warnings +import numpy as np +import pandas as pd + +from roiextractors.extraction_tools import PathType +from neuroconv.datainterfaces.ophys.basesegmentationextractorinterface import BaseSegmentationExtractorInterface +from roiextractors.segmentationextractor import SegmentationExtractor + +from typing import Optional + +from pynwb import NWBFile + +class MinianSegmentationExtractor(SegmentationExtractor): + """A SegmentationExtractor for Minian. + + This class inherits from the SegmentationExtractor class, having all + its functionality specifically applied to the dataset output from + the 'Minian' ROI segmentation method. + + Users can extract key information such as ROI traces, image masks, + and timestamps from the output of the Minian pipeline. + + Key features: + - Extracts fluorescence traces (denoised, baseline, neuropil, deconvolved) for each ROI. + - Retrieves ROI masks and background components. + - Provides access to timestamps corresponding to calcium traces. + - Retrieves maximum projection image. + + Parameters + ---------- + folder_path: str + Path to the folder containing Minian .zarr output files. + + """ + + extractor_name = "MinianSegmentation" + is_writable = True + mode = "file" + + def __init__(self, folder_path: PathType): + """Initialize a MinianSegmentationExtractor instance. + + Parameters + ---------- + folder_path: str + The location of the folder containing minian .zarr output. + """ + SegmentationExtractor.__init__(self) + self.folder_path = folder_path + self._roi_response_denoised = self._read_trace_from_zarr_filed(field="C") + self._roi_response_baseline = self._read_trace_from_zarr_filed(field="b0") + self._roi_response_neuropil = self._read_trace_from_zarr_filed(field="f") + self._roi_response_deconvolved = self._read_trace_from_zarr_filed(field="S") + self._image_maximum_projection = np.array(self._read_zarr_group("/max_proj.zarr/max_proj")) + self._image_masks = self._read_roi_image_mask_from_zarr_filed() + self._background_image_masks = self._read_background_image_mask_from_zarr_filed() + self._times = self._read_timestamps_from_csv() + + def _read_zarr_group(self, zarr_group=""): + """Read the zarr. + + Returns + ------- + zarr.open + The zarr object specified by self.folder_path. + """ + if zarr_group not in zarr.open(self.folder_path, mode="r"): + warnings.warn(f"Group '{zarr_group}' not found in the Zarr store.", UserWarning) + return None + else: + return zarr.open(str(self.folder_path) + f"/{zarr_group}", "r") + + def _read_roi_image_mask_from_zarr_filed(self): + """Read the image masks from the zarr output. + + Returns + ------- + image_masks: numpy.ndarray + The image masks for each ROI. + """ + dataset = self._read_zarr_group("/A.zarr") + if dataset is None or "A" not in dataset: + return None + else: + return np.transpose(dataset["A"], (1, 2, 0)) + + def _read_background_image_mask_from_zarr_filed(self): + """Read the image masks from the zarr output. + + Returns + ------- + image_masks: numpy.ndarray + The image masks for each background components. + """ + dataset = self._read_zarr_group("/b.zarr") + if dataset is None or "b" not in dataset: + return None + else: + return np.expand_dims(dataset["b"], axis=2) + + def _read_trace_from_zarr_filed(self, field): + """Read the traces specified by the field from the zarr object. + + Parameters + ---------- + field: str + The field to read from the zarr object. + + Returns + ------- + trace: numpy.ndarray + The traces specified by the field. + """ + dataset = self._read_zarr_group(f"/{field}.zarr") + + if dataset is None or field not in dataset: + return None + elif dataset[field].ndim == 2: + return np.transpose(dataset[field]) + elif dataset[field].ndim == 1: + return np.expand_dims(dataset[field], axis=1) + + def _read_timestamps_from_csv(self): + """Extract timestamps corresponding to frame numbers of the stored denoised trace + + Returns + ------- + np.ndarray + The timestamps of the denoised trace. + """ + csv_file = self.folder_path / "timeStamps.csv" + df = pd.read_csv(csv_file) + frame_numbers = self._read_zarr_group("/C.zarr/frame") + filtered_df = df[df["Frame Number"].isin(frame_numbers)] * 1e-3 + + return filtered_df["Time Stamp (ms)"].to_numpy() + + def get_image_size(self): + dataset = self._read_zarr_group("/A.zarr") + height = dataset["height"].shape[0] + width = dataset["width"].shape[0] + return (height, width) + + def get_accepted_list(self) -> list: + """Get a list of accepted ROI ids. + + Returns + ------- + accepted_list: list + List of accepted ROI ids. + """ + return list(range(self.get_num_rois())) + + def get_rejected_list(self) -> list: + """Get a list of rejected ROI ids. + + Returns + ------- + rejected_list: list + List of rejected ROI ids. + """ + return list() + + def get_roi_ids(self) -> list: + dataset = self._read_zarr_group("/A.zarr") + return list(dataset["unit_id"]) + + def get_traces_dict(self) -> dict: + """Get traces as a dictionary with key as the name of the ROiResponseSeries. + + Returns + ------- + _roi_response_dict: dict + dictionary with key, values representing different types of RoiResponseSeries: + Raw Fluorescence, DeltaFOverF, Denoised, Neuropil, Deconvolved, Background, etc. + """ + return dict( + denoised=self._roi_response_denoised, + baseline=self._roi_response_baseline, + neuropil=self._roi_response_neuropil, + deconvolved=self._roi_response_deconvolved, + ) + + def get_images_dict(self) -> dict: + """Get images as a dictionary with key as the name of the ROIResponseSeries. + + Returns + ------- + _roi_image_dict: dict + dictionary with key, values representing different types of Images used in segmentation: + Mean, Correlation image + """ + return dict( + mean=self._image_mean, + correlation=self._image_correlation, + maximum_projection=self._image_maximum_projection, + ) + +class MinianSegmentationInterface(BaseSegmentationExtractorInterface): + """Data interface for MinianSegmentationExtractor.""" + + Extractor = MinianSegmentationExtractor + display_name = "Minian Segmentation" + associated_suffixes = (".zarr",) + info = "Interface for Minian segmentation data." + + @classmethod + def get_source_schema(cls) -> dict: + source_metadata = super().get_source_schema() + source_metadata["properties"]["folder_path"]["description"] = "Path to .zarr output." + return source_metadata + + def __init__(self, folder_path: PathType, verbose: bool = True): + """ + + Parameters + ---------- + folder_path : PathType + Path to .zarr path. + verbose : bool, default True + Whether to print progress + """ + super().__init__(folder_path=folder_path) + self.verbose = verbose + + def add_to_nwbfile( + self, + nwbfile: NWBFile, + metadata: Optional[dict] = None, + stub_test: bool = False, + stub_frames: int = 100, + include_background_segmentation: bool = True, + include_roi_centroids: bool = True, + include_roi_acceptance: bool = False, + mask_type: Optional[str] = "image", # Literal["image", "pixel", "voxel"] + plane_segmentation_name: Optional[str] = None, + iterator_options: Optional[dict] = None, + ): + super().add_to_nwbfile( + nwbfile=nwbfile, + metadata=metadata, + stub_test=stub_test, + stub_frames=stub_frames, + include_background_segmentation=include_background_segmentation, + include_roi_centroids=include_roi_centroids, + include_roi_acceptance=include_roi_acceptance, + mask_type=mask_type, + plane_segmentation_name=plane_segmentation_name, + iterator_options=iterator_options, + ) + + + diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_session.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_session.py index 5d010ad..42d88f0 100644 --- a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_session.py +++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_session.py @@ -6,10 +6,9 @@ from neuroconv.utils import load_dict_from_file, dict_deep_update -from cai_lab_to_nwb.zaki_2024 import Embargo2024NWBConverter +from zaki_2024_nwbconverter import Zaki2024NWBConverter - -def session_to_nwb(data_dir_path: Union[str, Path], output_dir_path: Union[str, Path], stub_test: bool = False): +def session_to_nwb(data_dir_path: Union[str, Path], output_dir_path: Union[str, Path], subject_id: str, session_id: str, stub_test: bool = False): data_dir_path = Path(data_dir_path) output_dir_path = Path(output_dir_path) @@ -17,25 +16,17 @@ def session_to_nwb(data_dir_path: Union[str, Path], output_dir_path: Union[str, output_dir_path = output_dir_path / "nwb_stub" output_dir_path.mkdir(parents=True, exist_ok=True) - session_id = "subject_identifier_usually" nwbfile_path = output_dir_path / f"{session_id}.nwb" source_data = dict() conversion_options = dict() - # Add Recording - source_data.update(dict(Recording=dict())) - conversion_options.update(dict(Recording=dict(stub_test=stub_test))) - - # Add Sorting - source_data.update(dict(Sorting=dict())) - conversion_options.update(dict(Sorting=dict())) - - # Add Behavior - source_data.update(dict(Behavior=dict())) - conversion_options.update(dict(Behavior=dict())) + # Add Segmentation + minian_folder_path = data_dir_path / "Ca_EEG_Calcium" / subject_id / session_id / "minian" + source_data.update(dict(MinianSegmentation=dict(folder_path=minian_folder_path))) + conversion_options.update(dict(MinianSegmentation=dict(stub_test=stub_test))) - converter = Embargo2024NWBConverter(source_data=source_data) + converter = Zaki2024NWBConverter(source_data=source_data) # Add datetime to conversion metadata = converter.get_metadata() @@ -46,24 +37,29 @@ def session_to_nwb(data_dir_path: Union[str, Path], output_dir_path: Union[str, metadata["NWBFile"]["session_start_time"] = date # Update default metadata with the editable in the corresponding yaml file - editable_metadata_path = Path(__file__).parent / "embargo_2024_metadata.yaml" + editable_metadata_path = Path(__file__).parent / "zaki_2024_metadata.yaml" editable_metadata = load_dict_from_file(editable_metadata_path) metadata = dict_deep_update(metadata, editable_metadata) - metadata["Subject"]["subject_id"] = "a_subject_id" # Modify here or in the yaml file + metadata["Subject"]["subject_id"] = subject_id # Run conversion - converter.run_conversion(metadata=metadata, nwbfile_path=nwbfile_path, conversion_options=conversion_options) + converter.run_conversion(metadata=metadata, nwbfile_path=nwbfile_path, conversion_options=conversion_options, overwrite=True) if __name__ == "__main__": # Parameters for conversion - data_dir_path = Path("/Directory/With/Raw/Formats/") - output_dir_path = Path("~/conversion_nwb/") + data_dir_path = Path("D:/") + subject_id = "Ca_EEG3-4" + task = "NeutralExposure" + session_id = subject_id + "_" + task + output_dir_path = Path("D:/cai_lab_conversion_nwb/") stub_test = False session_to_nwb(data_dir_path=data_dir_path, output_dir_path=output_dir_path, stub_test=stub_test, + subject_id=subject_id, + session_id=session_id ) diff --git a/src/cai_lab_to_nwb/zaki_2024/metadata.yaml b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_metadata.yaml similarity index 89% rename from src/cai_lab_to_nwb/zaki_2024/metadata.yaml rename to src/cai_lab_to_nwb/zaki_2024/zaki_2024_metadata.yaml index 09519c8..dc300ec 100644 --- a/src/cai_lab_to_nwb/zaki_2024/metadata.yaml +++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_metadata.yaml @@ -1,5 +1,5 @@ NWBFile: - keywords: + keywords: - hippocampus - memory integration - memory-linking @@ -11,7 +11,7 @@ NWBFile: - stress - PTSD related_publications: - https://doi.org/### or link to APA or MLA citation of the publication + - https://www.biorxiv.org/content/10.1101/2023.03.13.532469v2 session_description: A rich text description of the experiment. Can also just be the abstract of the publication. institution: Icahn School of Medicine at Mount Sinai @@ -36,6 +36,6 @@ NWBFile: - Shuman, Tristan - Zaki, Yosif Subject: - species: Mus musculus + species: Mus musculus age: TBD # in ISO 8601, such as "P1W2D" sex: TBD # One of M, F, U, or O diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_notes.md b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_notes.md index 40a9d5d..754da74 100644 --- a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_notes.md +++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_notes.md @@ -817,6 +817,75 @@ https://github.com/denisecailab/minian Read the docs: https://minian.readthedocs.io/en/stable/ +Supported output format: Zarr (for now) +Similarly to Caiman uses CNMF to perform cell identification. + +Output structure (example): + + minian/ + ├── A.zarr + │ ├── A (851, 608, 608) float64 + │ ├── animal () `image_masks` +- C: Temporal components of cells. Should have dimensions “frame” and “unit_id”. --> `roi_response_denoised` +- b: Spatial footprint of background. Should have dimensions (“height”, “width”). --> `background_image_masks` +- f: Temporal dynamic of background. Should have dimension “frame”. --> `roi_response_neuropil` +- b0: Baseline fluorescence for each cell. Should have dimensions (“frame”, “unit_id”) and same shape as C --> `roi_response_baseline` +- c0: Initial calcium decay, in theory triggered by calcium events happened before the recording starts. Should have dimensions (“frame”, “unit_id”) and same shape as C +- S: Deconvolved spikes for each cell. Should have dimensions (“frame”, “unit_id”) and same shape as C --> `roi_response_deconvolved` +- max_proj: the maximum projection --> `summary_image` + + ## Freezing Behavior and Video This data was extracted from the the ezTrack package: @@ -888,4 +957,5 @@ https://youtu.be/BKgh-XcZhIM?t=1338 ## Figures to reproduce in the example notebook: -1) ![img.png](assets/motion_freezing_across_session.png) \ No newline at end of file +1) ![img.png](assets/motion_freezing_across_session.png) +2) ![img.png](assets/spatial_temporal_comp_segmentation.png) \ No newline at end of file diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py index 81aef10..5ef7d04 100644 --- a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py +++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py @@ -1,18 +1,11 @@ """Primary NWBConverter class for this dataset.""" from neuroconv import NWBConverter -from neuroconv.datainterfaces import ( - SpikeGLXRecordingInterface, - PhySortingInterface, -) +# from neuroconv.datainterfaces import MinianSegmentationInterface +from minian_segmentation_interface import MinianSegmentationInterface -from cai_lab_to_nwb.zaki_2024 import Embargo2024BehaviorInterface - - -class Embargo2024NWBConverter(NWBConverter): +class Zaki2024NWBConverter(NWBConverter): """Primary conversion class for my extracellular electrophysiology dataset.""" data_interface_classes = dict( - Recording=SpikeGLXRecordingInterface, - Sorting=PhySortingInterface, - Behavior=Embargo2024BehaviorInterface, + MinianSegmentation = MinianSegmentationInterface, )