Skip to content

Commit

Permalink
Merge pull request #92 from catalystneuro/adjust_raw_only
Browse files Browse the repository at this point in the history
Adjust raw file structure
  • Loading branch information
CodyCBakerPhD authored Sep 29, 2024
2 parents 9e9b152 + bfedac4 commit b886fba
Show file tree
Hide file tree
Showing 6 changed files with 203 additions and 60 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,6 @@
subject_folder_path.mkdir(exist_ok=True)
nwbfile_path = subject_folder_path / f"sub-{subject_id}_ses-{session_id}_desc-processed_behavior+ecephys.nwb"

session_converter.run_conversion(
nwbfile_path=nwbfile_path,
metadata=metadata,
overwrite=True,
)
session_converter.run_conversion(nwbfile_path=nwbfile_path, metadata=metadata, overwrite=True)

check_written_nwbfile_for_consistency(one=ibl_client, nwbfile_path=nwbfile_path)
84 changes: 84 additions & 0 deletions src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from pathlib import Path

from neuroconv.datainterfaces import SpikeGLXRecordingInterface
from one.api import ONE

from ibl_to_nwb.converters import BrainwideMapConverter
from ibl_to_nwb.datainterfaces import RawVideoInterface

session_id = "d32876dd-8303-4720-8e7e-20678dc2fd71"

# Specify the revision of the pose estimation data
# Setting to 'None' will use whatever the latest released revision is
revision = None

base_path = Path("E:/IBL")
base_path.mkdir(exist_ok=True)
nwbfiles_folder_path = base_path / "nwbfiles"
nwbfiles_folder_path.mkdir(exist_ok=True)

# Initialize IBL (ONE) client to download processed data for this session
one_cache_folder_path = base_path / "cache"
ibl_client = ONE(
base_url="https://openalyx.internationalbrainlab.org",
password="international",
silent=True,
cache_dir=one_cache_folder_path,
)

# Specify the path to the SpikeGLX files on the server
probe_1_source_folder_path = Path("D:/example_data/ephy_testing_data/spikeglx/Noise4Sam_g0")
probe_2_source_folder_path = Path(
"D:/example_data/ephy_testing_data/spikeglx/multi_trigger_multi_gate/SpikeGLX/5-19-2022-CI0/5-19-2022-CI0_g0/"
)

ap_1_file_path = probe_1_source_folder_path / "Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.ap.bin"
ap_2_file_path = probe_2_source_folder_path / "5-19-2022-CI0_g0_imec0/5-19-2022-CI0_g0_t0.imec0.ap.bin"

lf_1_file_path = probe_1_source_folder_path / "Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin"
lf_2_file_path = probe_2_source_folder_path / "5-19-2022-CI0_g0_imec0/5-19-2022-CI0_g0_t0.imec0.lf.bin"

# Initialize interfaces
data_interfaces = list()
data_interfaces.append(SpikeGLXRecordingInterface(file_path=ap_1_file_path))
data_interfaces.append(SpikeGLXRecordingInterface(file_path=ap_2_file_path))
data_interfaces.append(SpikeGLXRecordingInterface(file_path=lf_1_file_path))
data_interfaces.append(SpikeGLXRecordingInterface(file_path=lf_2_file_path))

# Raw video take some special handling
metadata_retrieval = BrainwideMapConverter(one=ibl_client, session=session_id, data_interfaces=[], verbose=False)
subject_id = metadata_retrieval.get_metadata()["Subject"]["subject_id"]

pose_estimation_files = ibl_client.list_datasets(eid=session_id, filename="*.dlc*")
for pose_estimation_file in pose_estimation_files:
camera_name = pose_estimation_file.replace("alf/_ibl_", "").replace(".dlc.pqt", "")

video_interface = RawVideoInterface(
nwbfiles_folder_path=nwbfiles_folder_path,
subject_id=subject_id,
one=ibl_client,
session=session_id,
camera_name=camera_name,
)
data_interfaces.append(video_interface)

# Run conversion
session_converter = BrainwideMapConverter(
one=ibl_client, session=session_id, data_interfaces=data_interfaces, verbose=False
)

metadata = session_converter.get_metadata()
subject_id = metadata["Subject"]["subject_id"]

subject_folder_path = nwbfiles_folder_path / f"sub-{subject_id}"
subject_folder_path.mkdir(exist_ok=True)
nwbfile_path = subject_folder_path / f"sub-{subject_id}_ses-{session_id}_desc-raw_ecephys+image.nwb"

session_converter.run_conversion(
nwbfile_path=nwbfile_path,
metadata=metadata,
overwrite=True,
)

# TODO: add some kind of raw-specific check
# check_written_nwbfile_for_consistency(one=ibl_client, nwbfile_path=nwbfile_path)
6 changes: 0 additions & 6 deletions src/ibl_to_nwb/converters/_iblconverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@ def get_metadata(self) -> dict:
assert len(subject_metadata_list) == 1, "More than one subject metadata returned by query."
subject_metadata = subject_metadata_list[0]

if "Subject" not in metadata:
metadata.update(Subject=dict())

subject_extra_metadata_name_mapping = dict(
last_water_restriction="last_water_restriction", # ISO
remaining_water="remaining_water_ml",
Expand Down Expand Up @@ -84,9 +81,6 @@ def run_conversion(
nwbfile: Optional[NWBFile] = None,
metadata: Optional[dict] = None,
overwrite: bool = False,
# TODO: when all H5DataIO prewraps are gone, introduce Zarr safely
# backend: Union[Literal["hdf5", "zarr"]],
# backend_configuration: Optional[Union[HDF5BackendConfiguration, ZarrBackendConfiguration]] = None,
backend: Optional[Literal["hdf5"]] = None,
backend_configuration: Optional[HDF5BackendConfiguration] = None,
conversion_options: Optional[dict] = None,
Expand Down
2 changes: 2 additions & 0 deletions src/ibl_to_nwb/datainterfaces/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from ._roi_motion_energy import RoiMotionEnergyInterface
from ._wheel_movement import WheelInterface
from ._brainwide_map_trials import BrainwideMapTrialsInterface
from ._raw_video import RawVideoInterface

__all__ = [
"BrainwideMapTrialsInterface",
Expand All @@ -19,4 +20,5 @@
"PupilTrackingInterface",
"RoiMotionEnergyInterface",
"WheelInterface",
"RawVideoInterface",
]
86 changes: 37 additions & 49 deletions src/ibl_to_nwb/datainterfaces/_pose_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from neuroconv.tools.nwb_helpers import get_module
from one.api import ONE
from pynwb import NWBFile
from pynwb.image import ImageSeries
from typing_extensions import Self


class IblPoseEstimationInterface(BaseDataInterface):
Expand All @@ -17,15 +15,25 @@ def __init__(
one: ONE,
session: str,
camera_name: str,
include_video: bool,
include_pose: bool,
revision: Optional[str] = None,
) -> Self:
) -> None:
"""
Interface for the pose estimation (DLC) data from the IBL Brainwide Map release.
Parameters
----------
one : one.ONE
The ONE API client.
session : str
The session ID (EID in ONE).
camera_name : "left", "right", or "body"
The name of the camera to load the raw video data for.
revision : str, optional
The revision of the pose estimation data to use. If not provided, the latest revision will be used.
"""
self.one = one
self.session = session
self.camera_name = camera_name
self.include_video = include_video
self.include_pose = include_pose

self.revision = revision
if self.revision is None:
Expand Down Expand Up @@ -55,52 +63,32 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
left_right_or_body = self.camera_name[:5].rstrip("C")
reused_timestamps = None
all_pose_estimation_series = list()
if self.include_pose:
for body_part in body_parts:
body_part_data = np.empty(shape=(number_of_frames, 2))
body_part_data[:, 0] = dlc_data[f"{body_part}_x"]
body_part_data[:, 1] = dlc_data[f"{body_part}_y"]

pose_estimation_series = PoseEstimationSeries(
name=body_part,
description=f"Marker placed on or around, labeled '{body_part}'.",
data=body_part_data,
unit="px",
reference_frame="(0,0) corresponds to the upper left corner when using width by height convention.",
timestamps=reused_timestamps or timestamps,
confidence=np.array(dlc_data[f"{body_part}_likelihood"]),
)
all_pose_estimation_series.append(pose_estimation_series)

reused_timestamps = all_pose_estimation_series[0] # A trick for linking timestamps across series
for body_part in body_parts:
body_part_data = np.empty(shape=(number_of_frames, 2))
body_part_data[:, 0] = dlc_data[f"{body_part}_x"]
body_part_data[:, 1] = dlc_data[f"{body_part}_y"]

pose_estimation_kwargs = dict(
name=f"PoseEstimation{left_right_or_body.capitalize()}Camera",
pose_estimation_series=all_pose_estimation_series,
description="Estimated positions of body parts using DeepLabCut.",
source_software="DeepLabCut",
nodes=body_parts,
pose_estimation_series = PoseEstimationSeries(
name=body_part,
description=f"Marker placed on or around, labeled '{body_part}'.",
data=body_part_data,
unit="px",
reference_frame="(0,0) corresponds to the upper left corner when using width by height convention.",
timestamps=reused_timestamps or timestamps,
confidence=np.array(dlc_data[f"{body_part}_likelihood"]),
)
pose_estimation_container = PoseEstimation(**pose_estimation_kwargs)
behavior_module = get_module(nwbfile=nwbfile, name="behavior", description="Processed behavioral data.")
behavior_module.add(pose_estimation_container)

if self.include_video and self.one.list_datasets(
eid=self.session, filename=f"raw_video_data/*{self.camera_name}*"
):
all_pose_estimation_series.append(pose_estimation_series)

reused_timestamps = all_pose_estimation_series[0] # A trick for linking timestamps across series

original_video_file = self.one.load_dataset(
id=self.session, dataset=f"raw_video_data/*{self.camera_name}*", download_only=True
)
image_series = ImageSeries(
name=f"OriginalVideo{left_right_or_body.capitalize()}Camera",
description="The original video each pose was estimated from.",
unit="n.a.",
external_file=[str(original_video_file)],
format="external",
timestamps=reused_timestamps or timestamps,
)
nwbfile.add_acquisition(image_series)
pose_estimation_kwargs = dict(
name=f"PoseEstimation{left_right_or_body.capitalize()}Camera",
pose_estimation_series=all_pose_estimation_series,
description="Estimated positions of body parts using DeepLabCut.",
source_software="DeepLabCut",
nodes=body_parts,
)
pose_estimation_container = PoseEstimation(**pose_estimation_kwargs)
behavior_module = get_module(nwbfile=nwbfile, name="behavior", description="Processed behavioral data.")
behavior_module.add(pose_estimation_container)
79 changes: 79 additions & 0 deletions src/ibl_to_nwb/datainterfaces/_raw_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from shutil import copyfile
from typing import Literal

from neuroconv.basedatainterface import BaseDataInterface
from one.api import ONE
from pydantic import DirectoryPath
from pynwb import NWBFile
from pynwb.image import ImageSeries


class RawVideoInterface(BaseDataInterface):
def __init__(
self,
nwbfiles_folder_path: DirectoryPath,
subject_id: str,
one: ONE,
session: str,
camera_name: Literal["left", "right", "body"],
) -> None:
"""
Interface for the raw video data from the IBL Brainwide Map release.
Parameters
----------
nwbfiles_folder_path : DirectoryPath
The folder path where the NWB file will be written in DANDI organization structure.
This is an unusual value to pass to __init__, but in this case it is necessary to simplify the DANDI
organization of the externally stored raw video data.
subject_id : str
The subject ID to use for the DANDI organization. This is also an unusual value to pass to __init__, but
the custom handling of Subject extensions requires removing it from the main metadata at runtime.
one : one.ONE
The ONE API client.
session : str
The session ID (EID in ONE).
camera_name : "left", "right", or "body"
The name of the camera to load the raw video data for.
revision : str, optional
The revision of the pose estimation data to use. If not provided, the latest revision will be used.
"""
self.nwbfiles_folder_path = nwbfiles_folder_path
self.subject_id = subject_id
self.one = one
self.session = session
self.camera_name = camera_name

def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
camera_data = self.one.load_object(id=self.session, obj=self.camera_name, collection="alf")
timestamps = camera_data["times"]

left_right_or_body = self.camera_name[:5].removesuffix("C")
if self.one.list_datasets(eid=self.session, filename=f"raw_video_data/*{self.camera_name}*"):
original_video_file_path = self.one.load_dataset(
id=self.session, dataset=f"raw_video_data/*{self.camera_name}*", download_only=True
)

# Rename to DANDI format and relative organization
dandi_sub_stem = f"sub-{self.subject_id}"
dandi_subject_folder = self.nwbfiles_folder_path / dandi_sub_stem

dandi_sub_ses_stem = f"{dandi_sub_stem}_ses-{self.session}"
dandi_video_folder_path = dandi_subject_folder / f"{dandi_sub_ses_stem}_ecephys+image"
dandi_video_folder_path.mkdir(exist_ok=True)

nwb_video_name = f"OriginalVideo{left_right_or_body.capitalize()}Camera"
dandi_video_file_path = dandi_video_folder_path / f"{dandi_sub_ses_stem}_{nwb_video_name}.mp4"

# A little bit of data duplication to copy, but easier for re-running since original file stays in cache
copyfile(src=original_video_file_path, dst=dandi_video_file_path)

image_series = ImageSeries(
name=nwb_video_name,
description="The original video each pose was estimated from.",
unit="n.a.",
external_file=["./" + str(dandi_video_file_path.relative_to(dandi_subject_folder))],
format="external",
timestamps=timestamps,
)
nwbfile.add_acquisition(image_series)

0 comments on commit b886fba

Please sign in to comment.