Merge pull request #92 from catalystneuro/adjust_raw_only

Adjust raw file structure
catalystneuro · Sep 29, 2024 · b886fba · b886fba
2 parents 9e9b152 + bfedac4
commit b886fba
Show file tree

Hide file tree

Showing 6 changed files with 203 additions and 60 deletions.
diff --git a/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only.py b/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only.py
@@ -82,10 +82,6 @@
 subject_folder_path.mkdir(exist_ok=True)
 nwbfile_path = subject_folder_path / f"sub-{subject_id}_ses-{session_id}_desc-processed_behavior+ecephys.nwb"
 
-session_converter.run_conversion(
-    nwbfile_path=nwbfile_path,
-    metadata=metadata,
-    overwrite=True,
-)
+session_converter.run_conversion(nwbfile_path=nwbfile_path, metadata=metadata, overwrite=True)
 
 check_written_nwbfile_for_consistency(one=ibl_client, nwbfile_path=nwbfile_path)
diff --git a/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py b/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py
@@ -0,0 +1,84 @@
+from pathlib import Path
+
+from neuroconv.datainterfaces import SpikeGLXRecordingInterface
+from one.api import ONE
+
+from ibl_to_nwb.converters import BrainwideMapConverter
+from ibl_to_nwb.datainterfaces import RawVideoInterface
+
+session_id = "d32876dd-8303-4720-8e7e-20678dc2fd71"
+
+# Specify the revision of the pose estimation data
+# Setting to 'None' will use whatever the latest released revision is
+revision = None
+
+base_path = Path("E:/IBL")
+base_path.mkdir(exist_ok=True)
+nwbfiles_folder_path = base_path / "nwbfiles"
+nwbfiles_folder_path.mkdir(exist_ok=True)
+
+# Initialize IBL (ONE) client to download processed data for this session
+one_cache_folder_path = base_path / "cache"
+ibl_client = ONE(
+    base_url="https://openalyx.internationalbrainlab.org",
+    password="international",
+    silent=True,
+    cache_dir=one_cache_folder_path,
+)
+
+# Specify the path to the SpikeGLX files on the server
+probe_1_source_folder_path = Path("D:/example_data/ephy_testing_data/spikeglx/Noise4Sam_g0")
+probe_2_source_folder_path = Path(
+    "D:/example_data/ephy_testing_data/spikeglx/multi_trigger_multi_gate/SpikeGLX/5-19-2022-CI0/5-19-2022-CI0_g0/"
+)
+
+ap_1_file_path = probe_1_source_folder_path / "Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.ap.bin"
+ap_2_file_path = probe_2_source_folder_path / "5-19-2022-CI0_g0_imec0/5-19-2022-CI0_g0_t0.imec0.ap.bin"
+
+lf_1_file_path = probe_1_source_folder_path / "Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin"
+lf_2_file_path = probe_2_source_folder_path / "5-19-2022-CI0_g0_imec0/5-19-2022-CI0_g0_t0.imec0.lf.bin"
+
+# Initialize interfaces
+data_interfaces = list()
+data_interfaces.append(SpikeGLXRecordingInterface(file_path=ap_1_file_path))
+data_interfaces.append(SpikeGLXRecordingInterface(file_path=ap_2_file_path))
+data_interfaces.append(SpikeGLXRecordingInterface(file_path=lf_1_file_path))
+data_interfaces.append(SpikeGLXRecordingInterface(file_path=lf_2_file_path))
+
+# Raw video take some special handling
+metadata_retrieval = BrainwideMapConverter(one=ibl_client, session=session_id, data_interfaces=[], verbose=False)
+subject_id = metadata_retrieval.get_metadata()["Subject"]["subject_id"]
+
+pose_estimation_files = ibl_client.list_datasets(eid=session_id, filename="*.dlc*")
+for pose_estimation_file in pose_estimation_files:
+    camera_name = pose_estimation_file.replace("alf/_ibl_", "").replace(".dlc.pqt", "")
+
+    video_interface = RawVideoInterface(
+        nwbfiles_folder_path=nwbfiles_folder_path,
+        subject_id=subject_id,
+        one=ibl_client,
+        session=session_id,
+        camera_name=camera_name,
+    )
+    data_interfaces.append(video_interface)
+
+# Run conversion
+session_converter = BrainwideMapConverter(
+    one=ibl_client, session=session_id, data_interfaces=data_interfaces, verbose=False
+)
+
+metadata = session_converter.get_metadata()
+subject_id = metadata["Subject"]["subject_id"]
+
+subject_folder_path = nwbfiles_folder_path / f"sub-{subject_id}"
+subject_folder_path.mkdir(exist_ok=True)
+nwbfile_path = subject_folder_path / f"sub-{subject_id}_ses-{session_id}_desc-raw_ecephys+image.nwb"
+
+session_converter.run_conversion(
+    nwbfile_path=nwbfile_path,
+    metadata=metadata,
+    overwrite=True,
+)
+
+# TODO: add some kind of raw-specific check
+# check_written_nwbfile_for_consistency(one=ibl_client, nwbfile_path=nwbfile_path)
diff --git a/src/ibl_to_nwb/converters/_iblconverter.py b/src/ibl_to_nwb/converters/_iblconverter.py
@@ -52,9 +52,6 @@ def get_metadata(self) -> dict:
         assert len(subject_metadata_list) == 1, "More than one subject metadata returned by query."
         subject_metadata = subject_metadata_list[0]
 
-        if "Subject" not in metadata:
-            metadata.update(Subject=dict())
-
         subject_extra_metadata_name_mapping = dict(
             last_water_restriction="last_water_restriction",  # ISO
             remaining_water="remaining_water_ml",
@@ -84,9 +81,6 @@ def run_conversion(
         nwbfile: Optional[NWBFile] = None,
         metadata: Optional[dict] = None,
         overwrite: bool = False,
-        # TODO: when all H5DataIO prewraps are gone, introduce Zarr safely
-        # backend: Union[Literal["hdf5", "zarr"]],
-        # backend_configuration: Optional[Union[HDF5BackendConfiguration, ZarrBackendConfiguration]] = None,
         backend: Optional[Literal["hdf5"]] = None,
         backend_configuration: Optional[HDF5BackendConfiguration] = None,
         conversion_options: Optional[dict] = None,

diff --git a/src/ibl_to_nwb/datainterfaces/__init__.py b/src/ibl_to_nwb/datainterfaces/__init__.py
@@ -7,6 +7,7 @@
 from ._roi_motion_energy import RoiMotionEnergyInterface
 from ._wheel_movement import WheelInterface
 from ._brainwide_map_trials import BrainwideMapTrialsInterface
+from ._raw_video import RawVideoInterface
 
 __all__ = [
     "BrainwideMapTrialsInterface",
@@ -19,4 +20,5 @@
     "PupilTrackingInterface",
     "RoiMotionEnergyInterface",
     "WheelInterface",
+    "RawVideoInterface",
 ]
diff --git a/src/ibl_to_nwb/datainterfaces/_pose_estimation.py b/src/ibl_to_nwb/datainterfaces/_pose_estimation.py
@@ -7,8 +7,6 @@
 from neuroconv.tools.nwb_helpers import get_module
 from one.api import ONE
 from pynwb import NWBFile
-from pynwb.image import ImageSeries
-from typing_extensions import Self
 
 
 class IblPoseEstimationInterface(BaseDataInterface):
@@ -17,15 +15,25 @@ def __init__(
         one: ONE,
         session: str,
         camera_name: str,
-        include_video: bool,
-        include_pose: bool,
         revision: Optional[str] = None,
-    ) -> Self:
+    ) -> None:
+        """
+        Interface for the pose estimation (DLC) data from the IBL Brainwide Map release.
+
+        Parameters
+        ----------
+        one : one.ONE
+            The ONE API client.
+        session : str
+            The session ID (EID in ONE).
+        camera_name : "left", "right", or "body"
+            The name of the camera to load the raw video data for.
+        revision : str, optional
+            The revision of the pose estimation data to use. If not provided, the latest revision will be used.
+        """
         self.one = one
         self.session = session
         self.camera_name = camera_name
-        self.include_video = include_video
-        self.include_pose = include_pose
 
         self.revision = revision
         if self.revision is None:
@@ -55,52 +63,32 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
         left_right_or_body = self.camera_name[:5].rstrip("C")
         reused_timestamps = None
         all_pose_estimation_series = list()
-        if self.include_pose:
-            for body_part in body_parts:
-                body_part_data = np.empty(shape=(number_of_frames, 2))
-                body_part_data[:, 0] = dlc_data[f"{body_part}_x"]
-                body_part_data[:, 1] = dlc_data[f"{body_part}_y"]
-
-                pose_estimation_series = PoseEstimationSeries(
-                    name=body_part,
-                    description=f"Marker placed on or around, labeled '{body_part}'.",
-                    data=body_part_data,
-                    unit="px",
-                    reference_frame="(0,0) corresponds to the upper left corner when using width by height convention.",
-                    timestamps=reused_timestamps or timestamps,
-                    confidence=np.array(dlc_data[f"{body_part}_likelihood"]),
-                )
-                all_pose_estimation_series.append(pose_estimation_series)
 
-                reused_timestamps = all_pose_estimation_series[0]  # A trick for linking timestamps across series
+        for body_part in body_parts:
+            body_part_data = np.empty(shape=(number_of_frames, 2))
+            body_part_data[:, 0] = dlc_data[f"{body_part}_x"]
+            body_part_data[:, 1] = dlc_data[f"{body_part}_y"]
 
-            pose_estimation_kwargs = dict(
-                name=f"PoseEstimation{left_right_or_body.capitalize()}Camera",
-                pose_estimation_series=all_pose_estimation_series,
-                description="Estimated positions of body parts using DeepLabCut.",
-                source_software="DeepLabCut",
-                nodes=body_parts,
+            pose_estimation_series = PoseEstimationSeries(
+                name=body_part,
+                description=f"Marker placed on or around, labeled '{body_part}'.",
+                data=body_part_data,
+                unit="px",
+                reference_frame="(0,0) corresponds to the upper left corner when using width by height convention.",
+                timestamps=reused_timestamps or timestamps,
+                confidence=np.array(dlc_data[f"{body_part}_likelihood"]),
             )
-            pose_estimation_container = PoseEstimation(**pose_estimation_kwargs)
-            behavior_module = get_module(nwbfile=nwbfile, name="behavior", description="Processed behavioral data.")
-            behavior_module.add(pose_estimation_container)
-
-        if self.include_video and self.one.list_datasets(
-            eid=self.session, filename=f"raw_video_data/*{self.camera_name}*"
-        ):
             all_pose_estimation_series.append(pose_estimation_series)
 
             reused_timestamps = all_pose_estimation_series[0]  # A trick for linking timestamps across series
 
-            original_video_file = self.one.load_dataset(
-                id=self.session, dataset=f"raw_video_data/*{self.camera_name}*", download_only=True
-            )
-            image_series = ImageSeries(
-                name=f"OriginalVideo{left_right_or_body.capitalize()}Camera",
-                description="The original video each pose was estimated from.",
-                unit="n.a.",
-                external_file=[str(original_video_file)],
-                format="external",
-                timestamps=reused_timestamps or timestamps,
-            )
-            nwbfile.add_acquisition(image_series)
+        pose_estimation_kwargs = dict(
+            name=f"PoseEstimation{left_right_or_body.capitalize()}Camera",
+            pose_estimation_series=all_pose_estimation_series,
+            description="Estimated positions of body parts using DeepLabCut.",
+            source_software="DeepLabCut",
+            nodes=body_parts,
+        )
+        pose_estimation_container = PoseEstimation(**pose_estimation_kwargs)
+        behavior_module = get_module(nwbfile=nwbfile, name="behavior", description="Processed behavioral data.")
+        behavior_module.add(pose_estimation_container)
diff --git a/src/ibl_to_nwb/datainterfaces/_raw_video.py b/src/ibl_to_nwb/datainterfaces/_raw_video.py
@@ -0,0 +1,79 @@
+from shutil import copyfile
+from typing import Literal
+
+from neuroconv.basedatainterface import BaseDataInterface
+from one.api import ONE
+from pydantic import DirectoryPath
+from pynwb import NWBFile
+from pynwb.image import ImageSeries
+
+
+class RawVideoInterface(BaseDataInterface):
+    def __init__(
+        self,
+        nwbfiles_folder_path: DirectoryPath,
+        subject_id: str,
+        one: ONE,
+        session: str,
+        camera_name: Literal["left", "right", "body"],
+    ) -> None:
+        """
+        Interface for the raw video data from the IBL Brainwide Map release.
+
+        Parameters
+        ----------
+        nwbfiles_folder_path : DirectoryPath
+            The folder path where the NWB file will be written in DANDI organization structure.
+            This is an unusual value to pass to __init__, but in this case it is necessary to simplify the DANDI
+            organization of the externally stored raw video data.
+        subject_id : str
+            The subject ID to use for the DANDI organization. This is also an unusual value to pass to __init__, but
+            the custom handling of Subject extensions requires removing it from the main metadata at runtime.
+        one : one.ONE
+            The ONE API client.
+        session : str
+            The session ID (EID in ONE).
+        camera_name : "left", "right", or "body"
+            The name of the camera to load the raw video data for.
+        revision : str, optional
+            The revision of the pose estimation data to use. If not provided, the latest revision will be used.
+        """
+        self.nwbfiles_folder_path = nwbfiles_folder_path
+        self.subject_id = subject_id
+        self.one = one
+        self.session = session
+        self.camera_name = camera_name
+
+    def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
+        camera_data = self.one.load_object(id=self.session, obj=self.camera_name, collection="alf")
+        timestamps = camera_data["times"]
+
+        left_right_or_body = self.camera_name[:5].removesuffix("C")
+        if self.one.list_datasets(eid=self.session, filename=f"raw_video_data/*{self.camera_name}*"):
+            original_video_file_path = self.one.load_dataset(
+                id=self.session, dataset=f"raw_video_data/*{self.camera_name}*", download_only=True
+            )
+
+            # Rename to DANDI format and relative organization
+            dandi_sub_stem = f"sub-{self.subject_id}"
+            dandi_subject_folder = self.nwbfiles_folder_path / dandi_sub_stem
+
+            dandi_sub_ses_stem = f"{dandi_sub_stem}_ses-{self.session}"
+            dandi_video_folder_path = dandi_subject_folder / f"{dandi_sub_ses_stem}_ecephys+image"
+            dandi_video_folder_path.mkdir(exist_ok=True)
+
+            nwb_video_name = f"OriginalVideo{left_right_or_body.capitalize()}Camera"
+            dandi_video_file_path = dandi_video_folder_path / f"{dandi_sub_ses_stem}_{nwb_video_name}.mp4"
+
+            # A little bit of data duplication to copy, but easier for re-running since original file stays in cache
+            copyfile(src=original_video_file_path, dst=dandi_video_file_path)
+
+            image_series = ImageSeries(
+                name=nwb_video_name,
+                description="The original video each pose was estimated from.",
+                unit="n.a.",
+                external_file=["./" + str(dandi_video_file_path.relative_to(dandi_subject_folder))],
+                format="external",
+                timestamps=timestamps,
+            )
+            nwbfile.add_acquisition(image_series)