diff --git a/README.md b/README.md index 27489e6..9f948c6 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ pip install -r src/pinto_lab_to_nwb/into_the_void/into_the_void_requirements.txt You can run a specific conversion with the following command: ``` -python src/pinto_lab_to_nwb/into_the_void/into_the_void_conversion_script.py +python src/pinto_lab_to_nwb/into_the_void/into_the_void_convert_session.py ``` ## Repository structure @@ -69,17 +69,14 @@ Each conversion is organized in a directory of its own in the `src` directory: ├── setup.py └── src ├── pinto_lab_to_nwb - │ ├── conversion_directory_1 │ └── into_the_void - │ ├── into_the_voidbehaviorinterface.py + │ ├── general_metadata.yaml │ ├── into_the_void_convert_session.py - │ ├── into_the_void_metadata.yml │ ├── into_the_voidnwbconverter.py │ ├── into_the_void_requirements.txt │ ├── into_the_void_notes.md │ └── __init__.py - │ ├── conversion_directory_b └── __init__.py @@ -87,8 +84,7 @@ Each conversion is organized in a directory of its own in the `src` directory: * `into_the_void_convert_sesion.py`: this script defines the function to convert one full session of the conversion. * `into_the_void_requirements.txt`: dependencies specific to this conversion. -* `into_the_void_metadata.yml`: metadata in yaml format for this specific conversion. -* `into_the_voidbehaviorinterface.py`: the behavior interface. Usually ad-hoc for each conversion. +* `general_metadata.yml`: general metadata in yaml format (e.g. session description, experimenter, subject metadata). * `into_the_voidnwbconverter.py`: the place where the `NWBConverter` class is defined. * `into_the_void_notes.md`: notes and comments concerning this specific conversion. diff --git a/src/pinto_lab_to_nwb/into_the_void/__init__.py b/src/pinto_lab_to_nwb/into_the_void/__init__.py index 194b092..b22c5c8 100644 --- a/src/pinto_lab_to_nwb/into_the_void/__init__.py +++ b/src/pinto_lab_to_nwb/into_the_void/__init__.py @@ -1,2 +1 @@ -from .into_the_voidbehaviorinterface import IntoTheVoidBehaviorInterface from .into_the_voidnwbconverter import IntoTheVoidNWBConverter diff --git a/src/pinto_lab_to_nwb/into_the_void/general_metadata.yaml b/src/pinto_lab_to_nwb/into_the_void/general_metadata.yaml new file mode 100644 index 0000000..6d66760 --- /dev/null +++ b/src/pinto_lab_to_nwb/into_the_void/general_metadata.yaml @@ -0,0 +1,10 @@ +NWBFile: + session_description: + A rich text description of the experiment. Can also just be the abstract of the publication. + institution: Northwestern University + lab: Pinto + experimenter: + - Canton, Neto +Subject: + species: Mus musculus + sex: U diff --git a/src/pinto_lab_to_nwb/into_the_void/into_the_void_convert_session.py b/src/pinto_lab_to_nwb/into_the_void/into_the_void_convert_session.py index c5baa81..9b51628 100644 --- a/src/pinto_lab_to_nwb/into_the_void/into_the_void_convert_session.py +++ b/src/pinto_lab_to_nwb/into_the_void/into_the_void_convert_session.py @@ -1,68 +1,105 @@ """Primary script to run to convert an entire session for of data using the NWBConverter.""" +import re from pathlib import Path -from typing import Union -import datetime -from zoneinfo import ZoneInfo - -from neuroconv.utils import load_dict_from_file, dict_deep_update +from dateutil import tz +from neuroconv.utils import ( + load_dict_from_file, + dict_deep_update, + FolderPathType, + FilePathType, +) from pinto_lab_to_nwb.into_the_void import IntoTheVoidNWBConverter +from pinto_lab_to_nwb.into_the_void.into_the_voidnwbconverter import get_default_segmentation_to_imaging_name_mapping + + +def session_to_nwb( + nwbfile_path: FilePathType, + two_photon_imaging_folder_path: FolderPathType, + segmentation_folder_path: FolderPathType, + segmentation_to_imaging_plane_map: dict = None, + stub_test: bool = False, +): + """ + Converts a single session to NWB. + + Parameters + ---------- + nwbfile_path : FilePathType + The file path to the NWB file that will be created. + two_photon_imaging_folder_path: FolderPathType + The folder path that contains the Bruker TIF imaging output (.ome.tif files). + segmentation_folder_path: FolderPathType + The folder that contains the Suite2P segmentation output. + segmentation_to_imaging_plane_map: dict, optional + The optional mapping between the imaging and segmentation planes. + stub_test: bool, optional + For testing purposes, when stub_test=True only writes a subset of imaging and segmentation data. + """ + two_photon_imaging_folder_path = Path(two_photon_imaging_folder_path) + + converter = IntoTheVoidNWBConverter( + imaging_folder_path=imaging_folder_path, + segmentation_folder_path=segmentation_folder_path, + segmentation_to_imaging_map=segmentation_to_imaging_plane_map, + verbose=False, + ) - -def session_to_nwb(data_dir_path: Union[str, Path], output_dir_path: Union[str, Path], stub_test: bool = False): - data_dir_path = Path(data_dir_path) - output_dir_path = Path(output_dir_path) - if stub_test: - output_dir_path = output_dir_path / "nwb_stub" - output_dir_path.mkdir(parents=True, exist_ok=True) - - session_id = "subject_identifier_usually" - nwbfile_path = output_dir_path / f"{session_id}.nwb" - - source_data = dict() - conversion_options = dict() - - # Add Recording - source_data.update(dict(Recording=dict())) - conversion_options.update(dict(Recording=dict())) - - # Add LFP - source_data.update(dict(LFP=dict())) - conversion_options.update(dict(LFP=dict())) - - # Add Sorting - source_data.update(dict(Sorting=dict())) - conversion_options.update(dict(Sorting=dict())) - - # Add Behavior - source_data.update(dict(Behavior=dict())) - conversion_options.update(dict(Behavior=dict())) - - converter = IntoTheVoidNWBConverter(source_data=source_data) + conversion_options = { + interface_name: dict(stub_test=stub_test) for interface_name in converter.data_interface_objects.keys() + } # Add datetime to conversion metadata = converter.get_metadata() - datetime.datetime(year=2020, month=1, day=1, tzinfo=ZoneInfo("US/Eastern")) - date = datetime.datetime.today() # TO-DO: Get this from author - metadata["NWBFile"]["session_start_time"] = date + # For data provenance we can add the time zone information to the conversion if missing + session_start_time = metadata["NWBFile"]["session_start_time"] + tzinfo = tz.gettz("US/Pacific") + metadata["NWBFile"].update(session_start_time=session_start_time.replace(tzinfo=tzinfo)) # Update default metadata with the editable in the corresponding yaml file - editable_metadata_path = Path(__file__).parent / "into_the_void_metadata.yaml" + editable_metadata_path = Path(__file__).parent / "general_metadata.yaml" editable_metadata = load_dict_from_file(editable_metadata_path) metadata = dict_deep_update(metadata, editable_metadata) + # Update metadata with subject_id and session_id from folder_path + # NCCR51_2023_04_07_no_task_dual_color_jrgeco_t_series-001 + file_naming_pattern = r"^(?P[^_]+)_(?:\d{4}_\d{2}_\d{2}_)(?P.+)" + match = re.match(file_naming_pattern, str(two_photon_imaging_folder_path.name)) + if match: + groups_dict = match.groupdict() + metadata["NWBFile"].update(session_id=groups_dict["session_id"].replace("_", "-")) + metadata["Subject"].update(subject_id=groups_dict["subject_id"]) + # Run conversion - converter.run_conversion(metadata=metadata, nwbfile_path=nwbfile_path, conversion_options=conversion_options) + converter.run_conversion( + nwbfile_path=nwbfile_path, metadata=metadata, overwrite=True, conversion_options=conversion_options + ) if __name__ == "__main__": # Parameters for conversion - data_dir_path = Path("/Directory/With/Raw/Formats/") - output_dir_path = Path("~/conversion_nwb/") + + # The folder path that contains the Bruker TIF imaging output (.ome.tif files). + imaging_folder_path = Path("/Volumes/t7-ssd/Pinto/NCCR32_2022_11_03_IntoTheVoid_t_series-005") + # The folder that contains the Suite2P segmentation output. + segmentation_folder_path = imaging_folder_path / "suite2p" + # The folder path that will contain the NWB files. + nwbfile_folder_path = Path("/Volumes/t7-ssd/Pinto/nwbfiles") + # For testing purposes, when stub_test=True only writes a subset of imaging and segmentation data. stub_test = False + # The file path to the NWB file that will be created. + nwbfile_name = imaging_folder_path.name + ".nwb" if not stub_test else "stub_" + imaging_folder_path.name + ".nwb" + nwbfile_path = nwbfile_folder_path / nwbfile_name + + # Provide a mapping between the imaging and segmentation planes + # The default mapping is to rely on the order of the planes in the imaging and segmentation folders + plane_map = get_default_segmentation_to_imaging_name_mapping(imaging_folder_path, segmentation_folder_path) + session_to_nwb( - data_dir_path=data_dir_path, - output_dir_path=output_dir_path, + nwbfile_path=nwbfile_path, + two_photon_imaging_folder_path=imaging_folder_path, + segmentation_folder_path=segmentation_folder_path, + segmentation_to_imaging_plane_map=plane_map, stub_test=stub_test, ) diff --git a/src/pinto_lab_to_nwb/into_the_void/into_the_void_metadata.yaml b/src/pinto_lab_to_nwb/into_the_void/into_the_void_metadata.yaml deleted file mode 100644 index e5d68ac..0000000 --- a/src/pinto_lab_to_nwb/into_the_void/into_the_void_metadata.yaml +++ /dev/null @@ -1,12 +0,0 @@ -NWBFile: - related_publications: - https://doi.org/### or link to APA or MLA citation of the publication - session_description: - A rich text description of the experiment. Can also just be the abstract of the publication. - institution: My Institution - lab: Pinto - experimenter: - - Last, First Middle - - Last, First Middle -Subject: - species: Rattus norvegicus diff --git a/src/pinto_lab_to_nwb/into_the_void/into_the_void_notes.md b/src/pinto_lab_to_nwb/into_the_void/into_the_void_notes.md index 474ed13..c0f72ed 100644 --- a/src/pinto_lab_to_nwb/into_the_void/into_the_void_notes.md +++ b/src/pinto_lab_to_nwb/into_the_void/into_the_void_notes.md @@ -1 +1,83 @@ # Notes concerning the into_the_void conversion + +## Imaging folder structure + +See the example folder structure [here](https://gin.g-node.org/CatalystNeuro/ophys_testing_data/src/main/imaging_datasets/BrukerTif) for the Bruker TIF format. + +## Segmentation folder structure + +See the example folder structure [here](https://gin.g-node.org/CatalystNeuro/ophys_testing_data/src/main/segmentation_datasets/suite2p) for the Suite2p format. + +## Run conversion for a single session + +`into_the_void_convert_sesion.py`: this script defines the function to convert one full session of the conversion. +Parameters: +- "`two_photon_imaging_folder_path`" : The folder path that contains the Bruker TIF imaging output (.ome.tif files). +- "`segmentation_folder_path`": The folder path that contains the Suite2p segmentation output. +- "`segmentation_to_imaging_plane_map`": A dictionary that maps each segmentation plane name to the imaging naming convention. Optional parameter. + +### Imaging and Segmentation modalities naming convention + +The `segmentation_to_imaging_plane_map` is a dictionary that maps each segmentation plane name to the imaging naming convention. +This is necessary when there are multiple channels or planes in the imaging data and the segmentation data, as the name +provided in the segmentation interface might not be the same as the name provided in the imaging interface. + +#### Single plane, dual channel example +For example if the imaging data has a single plane with two channels, the default `segmentation_to_imaging_plane_map` will be defined as follows: + +```python +from pinto_lab_to_nwb.into_the_void.into_the_voidnwbconverter import get_default_segmentation_to_imaging_name_mapping + +# The folder path that contains the Bruker TIF imaging output (.ome.tif files). +imaging_folder_path = "NCCR62_2023_07_06_IntoTheVoid_t_series_Dual_color-000" +# The folder that contains the Suite2P segmentation output. +segmentation_folder_path = "NCCR62_2023_07_06_IntoTheVoid_t_series_Dual_color-000/suite2p" + +# Provide a mapping between the imaging and segmentation planes +# The default mapping is to rely on the order of the planes in the imaging and segmentation folders +plane_map = get_default_segmentation_to_imaging_name_mapping(imaging_folder_path, segmentation_folder_path) +``` +which will output: +``` +{'Chan1Plane0': 'Ch1', 'Chan2Plane0': 'Ch2'} +``` +where the keys are the segmentation plane names and the values are the imaging plane names. + +This way the converter will automatically set the metadata for the segmentation interfaces to use the same naming convention as the imaging interfaces. +The default mapping can be adjusted by the user as follows: + +```python +imaging_to_segmentation_plane_map = {'Chan1Plane0': 'Ch2', 'Chan2Plane0': 'Ch1'} +``` +where the segmentation interface metadata for "Chan1Plane0" (the first channel from 'plane0' from the segmentation output from Suite2p) will be mapped +to use the same naming convention as the imaging interface metadata for "Ch2". + +#### Dual plane, single channel example + +For example if the imaging data has two planes with a single channel, the default `segmentation_to_imaging_plane_map` will be defined as follows: + +```python +{'Chan1Plane0': 'Ch2_000001', 'Chan1Plane1': 'Ch2_000002'} +``` +where the segmentation interface metadata for "Chan1Plane0" will be mapped to use the same naming convention as the imaging interface metadata +for "Ch2_000001" (the first plane for the channel named 'Ch2' in the Bruker XML file). + +If the default mapping has to be adjusted, the user can provide a custom mapping as follows: + +```python +imaging_to_segmentation_plane_map = {'Chan2Plane0': 'Ch1', 'Chan1Plane0': 'Ch2'} +``` +where the segmentation interface metadata for "Chan2Plane0" (the second channel from 'plane0' from the segmentation output from Suite2p) +will be mapped to use the same naming convention as the imaging interface metadata for "Ch1" (the channel named 'Ch1' in the Bruker XML file). + +### Example usage + +To run a specific conversion, you might need to install first some conversion specific dependencies that are located in each conversion directory: +``` +cd src/pinto_lab_to_nwb/into_the_void +pip install -r into_the_void_requirements.txt +``` +Then you can run a specific conversion with the following command: +``` +python into_the_void_convert_session.py +``` diff --git a/src/pinto_lab_to_nwb/into_the_void/into_the_void_requirements.txt b/src/pinto_lab_to_nwb/into_the_void/into_the_void_requirements.txt index e69de29..cf24458 100644 --- a/src/pinto_lab_to_nwb/into_the_void/into_the_void_requirements.txt +++ b/src/pinto_lab_to_nwb/into_the_void/into_the_void_requirements.txt @@ -0,0 +1 @@ +neuroconv[suite2p, brukertiff] diff --git a/src/pinto_lab_to_nwb/into_the_void/into_the_voidbehaviorinterface.py b/src/pinto_lab_to_nwb/into_the_void/into_the_voidbehaviorinterface.py deleted file mode 100644 index 054f6fc..0000000 --- a/src/pinto_lab_to_nwb/into_the_void/into_the_voidbehaviorinterface.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Primary class for converting experiment-specific behavior.""" -from pynwb.file import NWBFile - -from neuroconv.basedatainterface import BaseDataInterface - - -class IntoTheVoidBehaviorInterface(BaseDataInterface): - """Behavior interface for into_the_void conversion""" - - def __init__(self): - # This should load the data lazily and prepare variables you need - pass - - def get_metadata(self): - # Automatically retrieve as much metadata as possible - metadata = super().get_metadata() - - return metadata - - def run_conversion(self, nwbfile: NWBFile, metadata: dict): - # All the custom code to write to PyNWB - - return nwbfile diff --git a/src/pinto_lab_to_nwb/into_the_void/into_the_voidnwbconverter.py b/src/pinto_lab_to_nwb/into_the_void/into_the_voidnwbconverter.py index 9e7a720..a06cfad 100644 --- a/src/pinto_lab_to_nwb/into_the_void/into_the_voidnwbconverter.py +++ b/src/pinto_lab_to_nwb/into_the_void/into_the_voidnwbconverter.py @@ -1,20 +1,152 @@ """Primary NWBConverter class for this dataset.""" +from typing import Optional + from neuroconv import NWBConverter -from neuroconv.datainterfaces import ( - SpikeGLXRecordingInterface, - SpikeGLXLFPInterface, - PhySortingInterface, -) +from neuroconv.datainterfaces import Suite2pSegmentationInterface, BrukerTiffMultiPlaneImagingInterface +from neuroconv.converters import BrukerTiffSinglePlaneConverter, BrukerTiffMultiPlaneConverter +from neuroconv.utils import FolderPathType, DeepDict, dict_deep_update + + +def get_default_segmentation_to_imaging_name_mapping( + imaging_folder_path: FolderPathType, segmentation_folder_path: FolderPathType +) -> dict or None: + """ + Get the default mapping between imaging and segmentation planes. + + Parameters + ---------- + imaging_folder_path: FolderPathType + The folder path that contains the Bruker TIF imaging output (.ome.tif files). + segmentation_folder_path: FolderPathType + The folder that contains the Suite2P segmentation output. (usually named "suite2p") + """ + streams = BrukerTiffMultiPlaneImagingInterface.get_streams( + folder_path=imaging_folder_path, + plane_separation_type="disjoint", + ) + + available_channels = Suite2pSegmentationInterface.get_available_channels(folder_path=segmentation_folder_path) + available_planes = Suite2pSegmentationInterface.get_available_planes(folder_path=segmentation_folder_path) -from pinto_lab_to_nwb.into_the_void import IntoTheVoidBehaviorInterface + if len(available_planes) == 1 and len(available_channels) == 1: + return None + + segmentation_channel_plane_names = [ + f"{channel_name.capitalize()}{plane_name.capitalize()}" + for plane_name in available_planes + for channel_name in available_channels + ] + + if len(available_planes) > 1: + imaging_channel_plane_names = [ + plane_name + for channel_name in streams["plane_streams"] + for plane_name in streams["plane_streams"][channel_name] + ] + else: + imaging_channel_plane_names = streams["channel_streams"] + + segmentation_to_imaging_name_mapping = dict(zip(segmentation_channel_plane_names, imaging_channel_plane_names)) + + return segmentation_to_imaging_name_mapping class IntoTheVoidNWBConverter(NWBConverter): - """Primary conversion class for my extracellular electrophysiology dataset.""" + """Primary conversion class for the Two Photon Imaging (Bruker experiment).""" - data_interface_classes = dict( - Recording=SpikeGLXRecordingInterface, - LFP=SpikeGLXLFPInterface, - Sorting=PhySortingInterface, - Behavior=IntoTheVoidBehaviorInterface, - ) + def __init__( + self, + imaging_folder_path: FolderPathType, + verbose: bool = False, + segmentation_folder_path: Optional[FolderPathType] = None, + segmentation_to_imaging_map: dict = None, + ): + self.verbose = verbose + self.data_interface_objects = dict() + + self.plane_map = segmentation_to_imaging_map + + streams = BrukerTiffMultiPlaneImagingInterface.get_streams( + folder_path=imaging_folder_path, + plane_separation_type="disjoint", + ) + # Choose converter for Bruker depending on the number of planes + # For multiple planes use BrukerTiffMultiPlaneConverter + if streams["plane_streams"]: + self.data_interface_objects.update( + Imaging=BrukerTiffMultiPlaneConverter( + folder_path=imaging_folder_path, + plane_separation_type="disjoint", + verbose=verbose, + ), + ) + + else: + self.data_interface_objects.update( + Imaging=BrukerTiffSinglePlaneConverter(folder_path=imaging_folder_path, verbose=verbose), + ) + + if segmentation_folder_path: + available_planes = Suite2pSegmentationInterface.get_available_planes(folder_path=segmentation_folder_path) + available_channels = Suite2pSegmentationInterface.get_available_channels( + folder_path=segmentation_folder_path + ) + # Add first channel + for plane_name in available_planes: + for channel_name in available_channels: + # check additional channel + if channel_name == "chan2": + # check we have non-empty traces + interface = Suite2pSegmentationInterface( + folder_path=segmentation_folder_path, + channel_name=channel_name, + verbose=verbose, + ) + traces_to_add = interface.segmentation_extractor.get_traces_dict() + any_has_traces = any( + [bool(trace.size) for trace_name, trace in traces_to_add.items() if trace is not None] + ) + if not any_has_traces: + continue + + plane_name_suffix = f"{channel_name.capitalize()}{plane_name.capitalize()}" + segmentation_interface_name = f"Segmentation{plane_name_suffix}" + segmentation_source_data = dict( + folder_path=segmentation_folder_path, + channel_name=channel_name, + plane_name=plane_name, + verbose=verbose, + ) + if self.plane_map: + plane_segmentation_name = "PlaneSegmentation" + self.plane_map.get( + plane_name_suffix, None + ).replace("_", "") + segmentation_source_data.update( + plane_segmentation_name=plane_segmentation_name, + ) + Suite2pSegmentationInterface(**segmentation_source_data) + self.data_interface_objects.update( + {segmentation_interface_name: Suite2pSegmentationInterface(**segmentation_source_data)} + ) + + def get_metadata(self) -> DeepDict: + if not self.plane_map: + return super().get_metadata() + + imaging_metadata = self.data_interface_objects["Imaging"].get_metadata() + metadata = super().get_metadata() + + # override device metadata + device_metadata = imaging_metadata["Ophys"]["Device"] + device_name = device_metadata[0]["name"] + metadata["Ophys"]["Device"] = device_metadata + + for metadata_ind in range(len(imaging_metadata["Ophys"]["ImagingPlane"])): + optical_channel_metadata = imaging_metadata["Ophys"]["ImagingPlane"][metadata_ind]["optical_channel"] + # override optical channel metadata + metadata["Ophys"]["ImagingPlane"][metadata_ind]["optical_channel"] = optical_channel_metadata + + # override device link + metadata["Ophys"]["ImagingPlane"][metadata_ind]["device"] = device_name + + return metadata