From d310e9af0737c0392a80eb9c3d91d5f732f75a49 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sun, 29 Sep 2024 12:25:49 -0400 Subject: [PATCH 1/3] make raw-only video interface; split from processed --- .../convert_brainwide_map_processed_only.py | 6 +- .../convert_brainwide_map_raw_only.py | 78 +++++++++++++++++ src/ibl_to_nwb/datainterfaces/__init__.py | 2 + .../datainterfaces/_pose_estimation.py | 86 ++++++++----------- src/ibl_to_nwb/datainterfaces/_raw_video.py | 75 ++++++++++++++++ 5 files changed, 193 insertions(+), 54 deletions(-) create mode 100644 src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py create mode 100644 src/ibl_to_nwb/datainterfaces/_raw_video.py diff --git a/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only.py b/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only.py index ecb5258..723e28f 100644 --- a/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only.py +++ b/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only.py @@ -82,10 +82,6 @@ subject_folder_path.mkdir(exist_ok=True) nwbfile_path = subject_folder_path / f"sub-{subject_id}_ses-{session_id}_desc-processed_behavior+ecephys.nwb" -session_converter.run_conversion( - nwbfile_path=nwbfile_path, - metadata=metadata, - overwrite=True, -) +session_converter.run_conversion(nwbfile_path=nwbfile_path, metadata=metadata, overwrite=True) check_written_nwbfile_for_consistency(one=ibl_client, nwbfile_path=nwbfile_path) diff --git a/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py b/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py new file mode 100644 index 0000000..2e7982f --- /dev/null +++ b/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py @@ -0,0 +1,78 @@ +from pathlib import Path + +from neuroconv.datainterfaces import SpikeGLXRecordingInterface +from one.api import ONE + +from ibl_to_nwb.converters import BrainwideMapConverter +from ibl_to_nwb.datainterfaces import RawVideoInterface + +session_id = "d32876dd-8303-4720-8e7e-20678dc2fd71" + +# Specify the revision of the pose estimation data +# Setting to 'None' will use whatever the latest released revision is +revision = None + +base_path = Path("E:/IBL") +base_path.mkdir(exist_ok=True) +nwbfiles_folder_path = base_path / "nwbfiles" +nwbfiles_folder_path.mkdir(exist_ok=True) + +# Initialize IBL (ONE) client to download processed data for this session +one_cache_folder_path = base_path / "cache" +ibl_client = ONE( + base_url="https://openalyx.internationalbrainlab.org", + password="international", + silent=True, + cache_dir=one_cache_folder_path, +) + +# Specify the path to the SpikeGLX files on the server +probe_1_source_folder_path = Path("D:/example_data/ephy_testing_data/spikeglx/Noise4Sam_g0") +probe_2_source_folder_path = Path( + "D:/example_data/ephy_testing_data/spikeglx/multi_trigger_multi_gate/SpikeGLX/5-19-2022-CI0/5-19-2022-CI0_g0/" +) + +ap_1_file_path = probe_1_source_folder_path / "Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.ap.bin" +ap_2_file_path = probe_2_source_folder_path / "5-19-2022-CI0_g0_imec0/5-19-2022-CI0_g0_t0.imec0.ap.bin" + +lf_1_file_path = probe_1_source_folder_path / "Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin" +lf_2_file_path = probe_2_source_folder_path / "5-19-2022-CI0_g0_imec0/5-19-2022-CI0_g0_t0.imec0.lf.bin" + +# Initialize as many of each interface as we need across the streams +data_interfaces = list() + +# These interfaces should always be present in source data +data_interfaces.append(SpikeGLXRecordingInterface(file_path=ap_1_file_path)) +data_interfaces.append(SpikeGLXRecordingInterface(file_path=ap_2_file_path)) +data_interfaces.append(SpikeGLXRecordingInterface(file_path=lf_1_file_path)) +data_interfaces.append(SpikeGLXRecordingInterface(file_path=lf_2_file_path)) + +pose_estimation_files = ibl_client.list_datasets(eid=session_id, filename="*.dlc*") +for pose_estimation_file in pose_estimation_files: + camera_name = pose_estimation_file.replace("alf/_ibl_", "").replace(".dlc.pqt", "") + data_interfaces.append( + RawVideoInterface( + nwbfiles_folder_path=nwbfiles_folder_path, one=ibl_client, session=session_id, camera_name=camera_name + ) + ) + +# Run conversion +session_converter = BrainwideMapConverter( + one=ibl_client, session=session_id, data_interfaces=data_interfaces, verbose=False +) + +metadata = session_converter.get_metadata() +subject_id = metadata["Subject"]["subject_id"] + +subject_folder_path = nwbfiles_folder_path / f"sub-{subject_id}" +subject_folder_path.mkdir(exist_ok=True) +nwbfile_path = subject_folder_path / f"sub-{subject_id}_ses-{session_id}_desc-raw_ecephys+image.nwb" + +session_converter.run_conversion( + nwbfile_path=nwbfile_path, + metadata=metadata, + overwrite=True, +) + +# TODO: add some kind of raw-specific check +# check_written_nwbfile_for_consistency(one=ibl_client, nwbfile_path=nwbfile_path) diff --git a/src/ibl_to_nwb/datainterfaces/__init__.py b/src/ibl_to_nwb/datainterfaces/__init__.py index efd752a..ff3826f 100644 --- a/src/ibl_to_nwb/datainterfaces/__init__.py +++ b/src/ibl_to_nwb/datainterfaces/__init__.py @@ -7,6 +7,7 @@ from ._roi_motion_energy import RoiMotionEnergyInterface from ._wheel_movement import WheelInterface from ._brainwide_map_trials import BrainwideMapTrialsInterface +from ._raw_video import RawVideoInterface __all__ = [ "BrainwideMapTrialsInterface", @@ -19,4 +20,5 @@ "PupilTrackingInterface", "RoiMotionEnergyInterface", "WheelInterface", + "RawVideoInterface", ] diff --git a/src/ibl_to_nwb/datainterfaces/_pose_estimation.py b/src/ibl_to_nwb/datainterfaces/_pose_estimation.py index 11dbd25..bbc9996 100644 --- a/src/ibl_to_nwb/datainterfaces/_pose_estimation.py +++ b/src/ibl_to_nwb/datainterfaces/_pose_estimation.py @@ -7,8 +7,6 @@ from neuroconv.tools.nwb_helpers import get_module from one.api import ONE from pynwb import NWBFile -from pynwb.image import ImageSeries -from typing_extensions import Self class IblPoseEstimationInterface(BaseDataInterface): @@ -17,15 +15,25 @@ def __init__( one: ONE, session: str, camera_name: str, - include_video: bool, - include_pose: bool, revision: Optional[str] = None, - ) -> Self: + ) -> None: + """ + Interface for the pose estimation (DLC) data from the IBL Brainwide Map release. + + Parameters + ---------- + one : one.ONE + The ONE API client. + session : str + The session ID (EID in ONE). + camera_name : "left", "right", or "body" + The name of the camera to load the raw video data for. + revision : str, optional + The revision of the pose estimation data to use. If not provided, the latest revision will be used. + """ self.one = one self.session = session self.camera_name = camera_name - self.include_video = include_video - self.include_pose = include_pose self.revision = revision if self.revision is None: @@ -55,52 +63,32 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: left_right_or_body = self.camera_name[:5].rstrip("C") reused_timestamps = None all_pose_estimation_series = list() - if self.include_pose: - for body_part in body_parts: - body_part_data = np.empty(shape=(number_of_frames, 2)) - body_part_data[:, 0] = dlc_data[f"{body_part}_x"] - body_part_data[:, 1] = dlc_data[f"{body_part}_y"] - - pose_estimation_series = PoseEstimationSeries( - name=body_part, - description=f"Marker placed on or around, labeled '{body_part}'.", - data=body_part_data, - unit="px", - reference_frame="(0,0) corresponds to the upper left corner when using width by height convention.", - timestamps=reused_timestamps or timestamps, - confidence=np.array(dlc_data[f"{body_part}_likelihood"]), - ) - all_pose_estimation_series.append(pose_estimation_series) - reused_timestamps = all_pose_estimation_series[0] # A trick for linking timestamps across series + for body_part in body_parts: + body_part_data = np.empty(shape=(number_of_frames, 2)) + body_part_data[:, 0] = dlc_data[f"{body_part}_x"] + body_part_data[:, 1] = dlc_data[f"{body_part}_y"] - pose_estimation_kwargs = dict( - name=f"PoseEstimation{left_right_or_body.capitalize()}Camera", - pose_estimation_series=all_pose_estimation_series, - description="Estimated positions of body parts using DeepLabCut.", - source_software="DeepLabCut", - nodes=body_parts, + pose_estimation_series = PoseEstimationSeries( + name=body_part, + description=f"Marker placed on or around, labeled '{body_part}'.", + data=body_part_data, + unit="px", + reference_frame="(0,0) corresponds to the upper left corner when using width by height convention.", + timestamps=reused_timestamps or timestamps, + confidence=np.array(dlc_data[f"{body_part}_likelihood"]), ) - pose_estimation_container = PoseEstimation(**pose_estimation_kwargs) - behavior_module = get_module(nwbfile=nwbfile, name="behavior", description="Processed behavioral data.") - behavior_module.add(pose_estimation_container) - - if self.include_video and self.one.list_datasets( - eid=self.session, filename=f"raw_video_data/*{self.camera_name}*" - ): all_pose_estimation_series.append(pose_estimation_series) reused_timestamps = all_pose_estimation_series[0] # A trick for linking timestamps across series - original_video_file = self.one.load_dataset( - id=self.session, dataset=f"raw_video_data/*{self.camera_name}*", download_only=True - ) - image_series = ImageSeries( - name=f"OriginalVideo{left_right_or_body.capitalize()}Camera", - description="The original video each pose was estimated from.", - unit="n.a.", - external_file=[str(original_video_file)], - format="external", - timestamps=reused_timestamps or timestamps, - ) - nwbfile.add_acquisition(image_series) + pose_estimation_kwargs = dict( + name=f"PoseEstimation{left_right_or_body.capitalize()}Camera", + pose_estimation_series=all_pose_estimation_series, + description="Estimated positions of body parts using DeepLabCut.", + source_software="DeepLabCut", + nodes=body_parts, + ) + pose_estimation_container = PoseEstimation(**pose_estimation_kwargs) + behavior_module = get_module(nwbfile=nwbfile, name="behavior", description="Processed behavioral data.") + behavior_module.add(pose_estimation_container) diff --git a/src/ibl_to_nwb/datainterfaces/_raw_video.py b/src/ibl_to_nwb/datainterfaces/_raw_video.py new file mode 100644 index 0000000..e925d10 --- /dev/null +++ b/src/ibl_to_nwb/datainterfaces/_raw_video.py @@ -0,0 +1,75 @@ +from typing import Literal + +from neuroconv.basedatainterface import BaseDataInterface +from one.api import ONE +from pydantic import DirectoryPath +from pynwb import NWBFile +from pynwb.image import ImageSeries + + +class RawVideoInterface(BaseDataInterface): + def __init__( + self, + nwbfiles_folder_path: DirectoryPath, + one: ONE, + session: str, + camera_name: Literal["left", "right", "body"], + ) -> None: + """ + Interface for the raw video data from the IBL Brainwide Map release. + + Parameters + ---------- + nwbfiles_folder_path : DirectoryPath + The folder path where the NWB file will be written in DANDI organization structure. + This is an unusual value to pass to __init__, but in this case it is necessary to simplify the DANDI + organization of the externally stored raw video data. + one : one.ONE + The ONE API client. + session : str + The session ID (EID in ONE). + camera_name : "left", "right", or "body" + The name of the camera to load the raw video data for. + revision : str, optional + The revision of the pose estimation data to use. If not provided, the latest revision will be used. + """ + self.nwbfiles_folder_path = nwbfiles_folder_path + self.one = one + self.session = session + self.camera_name = camera_name + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: + camera_data = self.one.load_object( + id=self.session, obj=self.camera_name, collection="alf", revision=self.revision + ) + timestamps = camera_data["times"] + + left_right_or_body = self.camera_name[:5].removesuffix("C") + if self.one.list_datasets(eid=self.session, filename=f"raw_video_data/*{self.camera_name}*"): + original_video_file_path = self.one.load_dataset( + id=self.session, dataset=f"raw_video_data/*{self.camera_name}*", download_only=True + ) + + nwb_video_name = f"OriginalVideo{left_right_or_body.capitalize()}Camera" + + # Rename to DANDI format and relative organization + subject_id = metadata.get("Subject", dict()).get("subject_id", None) + assert subject_id is not None, "Subject ID not found in metadata." + + dandi_sub_ses_stem = f"sub-{subject_id}_ses-{self.session}" + dandi_video_folder_path = self.nwbfiles_folder_path / f"{dandi_sub_ses_stem}_ecephys+image" + dandi_video_folder_path.mkdir(exist_ok=True) + dandi_video_file_path = dandi_video_folder_path / f"{dandi_sub_ses_stem}_{nwb_video_name}.mp4" + + # Move the file into the new DANDI folder and rename to the DANDI pattern + original_video_file_path.rename(dandi_video_file_path) + + image_series = ImageSeries( + name=nwb_video_name, + description="The original video each pose was estimated from.", + unit="n.a.", + external_file=[str(original_video_file_path)], + format="external", + timestamps=timestamps, + ) + nwbfile.add_acquisition(image_series) From 9a2b1e3ff92cb3ec552aecd6a8d728be9caba239 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sun, 29 Sep 2024 12:49:38 -0400 Subject: [PATCH 2/3] final touches --- .../convert_brainwide_map_raw_only.py | 20 ++++++++++++------- src/ibl_to_nwb/converters/_iblconverter.py | 6 ------ src/ibl_to_nwb/datainterfaces/_raw_video.py | 17 ++++++++-------- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py b/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py index 2e7982f..496d930 100644 --- a/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py +++ b/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py @@ -38,23 +38,29 @@ lf_1_file_path = probe_1_source_folder_path / "Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin" lf_2_file_path = probe_2_source_folder_path / "5-19-2022-CI0_g0_imec0/5-19-2022-CI0_g0_t0.imec0.lf.bin" -# Initialize as many of each interface as we need across the streams +# Initialize interfaces data_interfaces = list() - -# These interfaces should always be present in source data data_interfaces.append(SpikeGLXRecordingInterface(file_path=ap_1_file_path)) data_interfaces.append(SpikeGLXRecordingInterface(file_path=ap_2_file_path)) data_interfaces.append(SpikeGLXRecordingInterface(file_path=lf_1_file_path)) data_interfaces.append(SpikeGLXRecordingInterface(file_path=lf_2_file_path)) +# Raw video take some special handling +metadata_retrieval = BrainwideMapConverter(one=ibl_client, session=session_id, data_interfaces=[], verbose=False) +subject_id = metadata_retrieval.get_metadata()["Subject"]["subject_id"] + pose_estimation_files = ibl_client.list_datasets(eid=session_id, filename="*.dlc*") for pose_estimation_file in pose_estimation_files: camera_name = pose_estimation_file.replace("alf/_ibl_", "").replace(".dlc.pqt", "") - data_interfaces.append( - RawVideoInterface( - nwbfiles_folder_path=nwbfiles_folder_path, one=ibl_client, session=session_id, camera_name=camera_name - ) + + video_interface = RawVideoInterface( + nwbfiles_folder_path=nwbfiles_folder_path, + subject_id=subject_id, + one=ibl_client, + session=session_id, + camera_name=camera_name, ) + data_interfaces.append(video_interface) # Run conversion session_converter = BrainwideMapConverter( diff --git a/src/ibl_to_nwb/converters/_iblconverter.py b/src/ibl_to_nwb/converters/_iblconverter.py index 93c0848..ab1b819 100644 --- a/src/ibl_to_nwb/converters/_iblconverter.py +++ b/src/ibl_to_nwb/converters/_iblconverter.py @@ -52,9 +52,6 @@ def get_metadata(self) -> dict: assert len(subject_metadata_list) == 1, "More than one subject metadata returned by query." subject_metadata = subject_metadata_list[0] - if "Subject" not in metadata: - metadata.update(Subject=dict()) - subject_extra_metadata_name_mapping = dict( last_water_restriction="last_water_restriction", # ISO remaining_water="remaining_water_ml", @@ -84,9 +81,6 @@ def run_conversion( nwbfile: Optional[NWBFile] = None, metadata: Optional[dict] = None, overwrite: bool = False, - # TODO: when all H5DataIO prewraps are gone, introduce Zarr safely - # backend: Union[Literal["hdf5", "zarr"]], - # backend_configuration: Optional[Union[HDF5BackendConfiguration, ZarrBackendConfiguration]] = None, backend: Optional[Literal["hdf5"]] = None, backend_configuration: Optional[HDF5BackendConfiguration] = None, conversion_options: Optional[dict] = None, diff --git a/src/ibl_to_nwb/datainterfaces/_raw_video.py b/src/ibl_to_nwb/datainterfaces/_raw_video.py index e925d10..fc2ce18 100644 --- a/src/ibl_to_nwb/datainterfaces/_raw_video.py +++ b/src/ibl_to_nwb/datainterfaces/_raw_video.py @@ -11,6 +11,7 @@ class RawVideoInterface(BaseDataInterface): def __init__( self, nwbfiles_folder_path: DirectoryPath, + subject_id: str, one: ONE, session: str, camera_name: Literal["left", "right", "body"], @@ -24,6 +25,9 @@ def __init__( The folder path where the NWB file will be written in DANDI organization structure. This is an unusual value to pass to __init__, but in this case it is necessary to simplify the DANDI organization of the externally stored raw video data. + subject_id : str + The subject ID to use for the DANDI organization. This is also an unusual value to pass to __init__, but + the custom handling of Subject extensions requires removing it from the main metadata at runtime. one : one.ONE The ONE API client. session : str @@ -34,14 +38,13 @@ def __init__( The revision of the pose estimation data to use. If not provided, the latest revision will be used. """ self.nwbfiles_folder_path = nwbfiles_folder_path + self.subject_id = subject_id self.one = one self.session = session self.camera_name = camera_name def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: - camera_data = self.one.load_object( - id=self.session, obj=self.camera_name, collection="alf", revision=self.revision - ) + camera_data = self.one.load_object(id=self.session, obj=self.camera_name, collection="alf") timestamps = camera_data["times"] left_right_or_body = self.camera_name[:5].removesuffix("C") @@ -53,11 +56,9 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: nwb_video_name = f"OriginalVideo{left_right_or_body.capitalize()}Camera" # Rename to DANDI format and relative organization - subject_id = metadata.get("Subject", dict()).get("subject_id", None) - assert subject_id is not None, "Subject ID not found in metadata." - - dandi_sub_ses_stem = f"sub-{subject_id}_ses-{self.session}" - dandi_video_folder_path = self.nwbfiles_folder_path / f"{dandi_sub_ses_stem}_ecephys+image" + dandi_sub_stem = f"sub-{self.subject_id}" + dandi_sub_ses_stem = f"{dandi_sub_stem}_ses-{self.session}" + dandi_video_folder_path = self.nwbfiles_folder_path / dandi_sub_stem / f"{dandi_sub_ses_stem}_ecephys+image" dandi_video_folder_path.mkdir(exist_ok=True) dandi_video_file_path = dandi_video_folder_path / f"{dandi_sub_ses_stem}_{nwb_video_name}.mp4" From bfedac40b60f99cac9c5040c59a584f07893e117 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sun, 29 Sep 2024 13:00:00 -0400 Subject: [PATCH 3/3] final touches --- src/ibl_to_nwb/datainterfaces/_raw_video.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/ibl_to_nwb/datainterfaces/_raw_video.py b/src/ibl_to_nwb/datainterfaces/_raw_video.py index fc2ce18..066a1c6 100644 --- a/src/ibl_to_nwb/datainterfaces/_raw_video.py +++ b/src/ibl_to_nwb/datainterfaces/_raw_video.py @@ -1,3 +1,4 @@ +from shutil import copyfile from typing import Literal from neuroconv.basedatainterface import BaseDataInterface @@ -53,23 +54,25 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: id=self.session, dataset=f"raw_video_data/*{self.camera_name}*", download_only=True ) - nwb_video_name = f"OriginalVideo{left_right_or_body.capitalize()}Camera" - # Rename to DANDI format and relative organization dandi_sub_stem = f"sub-{self.subject_id}" + dandi_subject_folder = self.nwbfiles_folder_path / dandi_sub_stem + dandi_sub_ses_stem = f"{dandi_sub_stem}_ses-{self.session}" - dandi_video_folder_path = self.nwbfiles_folder_path / dandi_sub_stem / f"{dandi_sub_ses_stem}_ecephys+image" + dandi_video_folder_path = dandi_subject_folder / f"{dandi_sub_ses_stem}_ecephys+image" dandi_video_folder_path.mkdir(exist_ok=True) + + nwb_video_name = f"OriginalVideo{left_right_or_body.capitalize()}Camera" dandi_video_file_path = dandi_video_folder_path / f"{dandi_sub_ses_stem}_{nwb_video_name}.mp4" - # Move the file into the new DANDI folder and rename to the DANDI pattern - original_video_file_path.rename(dandi_video_file_path) + # A little bit of data duplication to copy, but easier for re-running since original file stays in cache + copyfile(src=original_video_file_path, dst=dandi_video_file_path) image_series = ImageSeries( name=nwb_video_name, description="The original video each pose was estimated from.", unit="n.a.", - external_file=[str(original_video_file_path)], + external_file=["./" + str(dandi_video_file_path.relative_to(dandi_subject_folder))], format="external", timestamps=timestamps, )