Skip to content

Commit

Permalink
Merge pull request #87 from catalystneuro/alignment
Browse files Browse the repository at this point in the history
Temporal Alignment
  • Loading branch information
CodyCBakerPhD authored Oct 31, 2023
2 parents abee15f + 72e2909 commit cdfc01a
Show file tree
Hide file tree
Showing 10 changed files with 302 additions and 74 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
"""Primary class for handling metadata non-specific to any other DataInterfaces."""
from neuroconv.basedatainterface import BaseDataInterface
from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface
from neuroconv.utils import load_dict_from_file
import pandas as pd
import numpy as np


class BaseDattaInterface(BaseDataInterface):
class BaseDattaInterface(BaseTemporalAlignmentInterface):
"""Base interface for markowitz_gillis_nature_2023 conversion w/ non-specific metadata"""

def get_metadata(self) -> dict:
Expand All @@ -22,4 +24,45 @@ def get_metadata(self) -> dict:
metadata["Subject"]["subject_id"] = session_metadata["subject_id"]
metadata["Subject"]["sex"] = subject_metadata["sex"]

if self.source_data["alignment_path"] is not None:
alignment_df = pd.read_parquet(
"/Volumes/T7/CatalystNeuro/NWB/Datta/xtra_raw/session_20210215162554-455929/alignment_df.parquet"
)
metadata["Alignment"]["slope"] = alignment_df["slope"].iloc[0]
metadata["Alignment"]["bias"] = alignment_df["bias"].iloc[0]

return metadata

def get_metadata_schema(self) -> dict:
metadata_schema = super().get_metadata_schema()
if self.source_data["alignment_path"] is None:
return metadata_schema
metadata_schema["Alignment"] = {
"type": "object",
"description": "Metadata for temporal alignment with photometry data.",
"required": True,
"properties": {
"slope": {
"description": "Slope of the linear regression mapping from behavioral video indices to demodulated photometry indices.",
"required": True,
"type": "float",
},
"bias": {
"description": "Bias of the linear regression mapping from behavioral video indices to demodulated photometry indices.",
"required": True,
"type": "float",
},
"start_time": {
"description": "Start time offset of raw fiber photometry data relative to behavioral video.",
"required": True,
"type": "float",
},
},
}
return metadata_schema

def set_aligned_timestamps(self, aligned_timestamps: np.ndarray) -> None:
self.aligned_timestamps = aligned_timestamps

def get_timestamps(self) -> np.ndarray:
return self.aligned_timestamps
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pandas as pd
from neuroconv.datainterfaces import VideoInterface
from .basedattainterface import BaseDattaInterface
from .utils import convert_timestamps_to_seconds


class BaseVideoInterface(BaseDattaInterface):
Expand All @@ -20,6 +21,7 @@ def __init__(
session_id: str,
session_metadata_path: str,
subject_metadata_path: str,
alignment_path: str = None,
):
super().__init__(
data_path=data_path,
Expand All @@ -28,13 +30,26 @@ def __init__(
session_id=session_id,
session_metadata_path=session_metadata_path,
subject_metadata_path=subject_metadata_path,
alignment_path=alignment_path,
)

def get_original_timestamps(self) -> np.ndarray:
return pd.read_csv(self.source_data["timestamp_path"], header=None).to_numpy().squeeze()

def align_timestamps(self, metadata: dict) -> np.ndarray:
timestamps = self.get_original_timestamps()
timestamps = convert_timestamps_to_seconds(timestamps=timestamps, metadata=metadata)

self.set_aligned_timestamps(aligned_timestamps=timestamps)
if self.source_data["alignment_path"] is not None:
aligned_starting_time = (
metadata["Alignment"]["bias"] / metadata["Constants"]["DEMODULATED_PHOTOMETRY_SAMPLING_RATE"]
)
self.set_aligned_starting_time(aligned_starting_time=aligned_starting_time)
return self.aligned_timestamps

def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
timestamps = pd.read_csv(self.source_data["timestamp_path"]).to_numpy().squeeze()
TIMESTAMPS_TO_SECONDS = metadata["Constants"]["TIMESTAMPS_TO_SECONDS"]
timestamps -= timestamps[0]
timestamps = timestamps * TIMESTAMPS_TO_SECONDS
timestamps = self.align_timestamps(metadata=metadata)

video_interface = VideoInterface(file_paths=[self.source_data["data_path"]], verbose=True)
video_interface.set_aligned_timestamps(aligned_timestamps=[timestamps])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,19 @@ class BehavioralSyllableInterface(BaseDattaInterface):
"""Behavioral Syllable Interface for markowitz_gillis_nature_2023 conversion"""

def __init__(
self, file_path: str, session_uuid: str, session_id: str, session_metadata_path: str, subject_metadata_path: str
self,
file_path: str,
session_uuid: str,
session_id: str,
session_metadata_path: str,
subject_metadata_path: str,
alignment_path: str = None,
):
# This should load the data lazily and prepare variables you need
columns = (
"uuid",
"predicted_syllable (offline)",
"predicted_syllable",
"timestamp",
)
super().__init__(
file_path=file_path,
Expand All @@ -29,6 +34,7 @@ def __init__(
columns=columns,
session_metadata_path=session_metadata_path,
subject_metadata_path=subject_metadata_path,
alignment_path=alignment_path,
)

def get_metadata_schema(self) -> dict:
Expand All @@ -43,16 +49,35 @@ def get_metadata_schema(self) -> dict:
}
return metadata_schema

def get_original_timestamps(self) -> np.ndarray:
session_df = pd.read_parquet(
self.source_data["file_path"],
columns=["timestamp", "uuid"],
filters=[("uuid", "==", self.source_data["session_uuid"])],
)
return session_df["timestamp"].to_numpy()

def align_timestamps(self, metadata: dict) -> np.ndarray:
timestamps = self.get_original_timestamps()
self.set_aligned_timestamps(aligned_timestamps=timestamps)
if self.source_data["alignment_path"] is not None:
aligned_starting_time = (
metadata["Alignment"]["bias"] / metadata["Constants"]["DEMODULATED_PHOTOMETRY_SAMPLING_RATE"]
)
self.set_aligned_starting_time(aligned_starting_time=aligned_starting_time)
return self.aligned_timestamps

def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, velocity_modulation: bool = False) -> None:
if velocity_modulation:
columns = ["uuid", "predicted_syllable", "timestamp"]
columns = ["uuid", "predicted_syllable"]
else:
columns = self.source_data["columns"]
session_df = pd.read_parquet(
self.source_data["file_path"],
columns=columns,
filters=[("uuid", "==", self.source_data["session_uuid"])],
)
timestamps = self.align_timestamps(metadata=metadata)
# Add Syllable Data
sorted_pseudoindex2name = metadata["BehavioralSyllable"]["sorted_pseudoindex2name"]
id2sorted_index = metadata["BehavioralSyllable"]["id2sorted_index"]
Expand All @@ -66,7 +91,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, velocity_modulation:
online_syllables = LabeledEvents(
name="BehavioralSyllableOnline",
description="Behavioral Syllable identified by online Motion Sequencing (MoSeq).",
timestamps=H5DataIO(session_df["timestamp"].to_numpy(), compression=True),
timestamps=H5DataIO(timestamps, compression=True),
data=H5DataIO(online_syllable_indices, compression=True),
labels=H5DataIO(index2name, compression=True),
)
Expand All @@ -82,7 +107,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, velocity_modulation:
offline_syllables = LabeledEvents(
name="BehavioralSyllableOffline",
description="Behavioral Syllable identified by offline Motion Sequencing (MoSeq).",
timestamps=H5DataIO(session_df["timestamp"].to_numpy(), compression=True),
timestamps=H5DataIO(timestamps, compression=True),
data=H5DataIO(offline_syllable_indices, compression=True),
labels=H5DataIO(index2name, compression=True),
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,41 @@ def session_to_nwb(
depth_path = raw_path / "depth.avi"
depth_ts_path = raw_path / "depth_ts.txt"
moseq_path = raw_path / "proc/results_00.h5"
alignment_path = raw_path / "alignment_df.parquet"

source_data, conversion_options = {}, {}
source_data.update(
dict(
MoseqExtract=dict(
file_path=str(moseq_path),
session_metadata_path=str(session_metadata_path),
subject_metadata_path=str(subject_metadata_path),
session_uuid=session_uuid,
session_id=session_id,
),
BehavioralSyllable=dict(
session_metadata_path=str(session_metadata_path),
subject_metadata_path=str(subject_metadata_path),
session_uuid=session_uuid,
session_id=session_id,
),
DepthVideo=dict(
data_path=str(depth_path),
timestamp_path=str(depth_ts_path),
session_metadata_path=str(session_metadata_path),
subject_metadata_path=str(subject_metadata_path),
session_uuid=session_uuid,
session_id=session_id,
),
)
)
conversion_options.update(
dict(
MoseqExtract={},
BehavioralSyllable={},
DepthVideo={},
)
)
if "reinforcement" in session_metadata.keys():
source_data["Optogenetic"] = dict(
file_path=str(optoda_path),
Expand All @@ -62,10 +95,12 @@ def session_to_nwb(
file_path=str(photometry_path),
tdt_path=str(tdt_path),
tdt_metadata_path=str(tdt_metadata_path),
depth_timestamp_path=str(depth_ts_path),
session_metadata_path=str(session_metadata_path),
subject_metadata_path=str(subject_metadata_path),
session_uuid=session_uuid,
session_id=session_id,
alignment_path=str(alignment_path),
)
conversion_options["FiberPhotometry"] = {}
behavioral_syllable_path = photometry_path # Note: if photometry and optogenetics are both present, photometry is used for syllable data bc it is quicker to load
Expand All @@ -76,43 +111,17 @@ def session_to_nwb(
subject_metadata_path=str(subject_metadata_path),
session_uuid=session_uuid,
session_id=session_id,
alignment_path=str(alignment_path),
)
conversion_options["IRVideo"] = {}
source_data.update(
dict(
MoseqExtract=dict(
file_path=str(moseq_path),
session_metadata_path=str(session_metadata_path),
subject_metadata_path=str(subject_metadata_path),
session_uuid=session_uuid,
session_id=session_id,
),
BehavioralSyllable=dict(
file_path=str(behavioral_syllable_path),
session_metadata_path=str(session_metadata_path),
subject_metadata_path=str(subject_metadata_path),
session_uuid=session_uuid,
session_id=session_id,
),
DepthVideo=dict(
data_path=str(depth_path),
timestamp_path=str(depth_ts_path),
session_metadata_path=str(session_metadata_path),
subject_metadata_path=str(subject_metadata_path),
session_uuid=session_uuid,
session_id=session_id,
),
)
)
conversion_options.update(
dict(
MoseqExtract={},
BehavioralSyllable={},
DepthVideo={},
)
)
source_data["MoseqExtract"]["alignment_path"] = str(alignment_path)
source_data["BehavioralSyllable"]["alignment_path"] = str(alignment_path)
source_data["DepthVideo"]["alignment_path"] = str(alignment_path)
source_data["Optogenetic"]["alignment_path"] = str(alignment_path)
source_data["BehavioralSyllable"]["file_path"] = str(behavioral_syllable_path)
if experiment_type == "velocity-modulation":
conversion_options["BehavioralSyllable"] = dict(velocity_modulation=True)
conversion_options["Optogenetic"] = dict(velocity_modulation=True)

converter = DattaNWBConverter(source_data=source_data)
metadata = converter.get_metadata()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ def __init__(
file_path: str,
tdt_path: str,
tdt_metadata_path: str,
depth_timestamp_path: str,
session_uuid: str,
session_id: str,
session_metadata_path: str,
subject_metadata_path: str,
alignment_path: str = None,
):
# This should load the data lazily and prepare variables you need
columns = (
Expand All @@ -29,38 +31,59 @@ def __init__(
"reference_dff",
"uv_reference_fit",
"reference_dff_fit",
"timestamp",
)
super().__init__(
file_path=file_path,
tdt_path=tdt_path,
tdt_metadata_path=tdt_metadata_path,
depth_timestamp_path=depth_timestamp_path,
session_uuid=session_uuid,
session_id=session_id,
columns=columns,
session_metadata_path=session_metadata_path,
subject_metadata_path=subject_metadata_path,
alignment_path=alignment_path,
)

def get_original_timestamps(self) -> np.ndarray:
session_df = pd.read_parquet(
self.source_data["file_path"],
columns=["timestamp", "uuid"],
filters=[("uuid", "==", self.source_data["session_uuid"])],
)
return session_df["timestamp"].to_numpy()

def align_processed_timestamps(self, metadata: dict) -> np.ndarray:
timestamps = self.get_original_timestamps()
self.set_aligned_timestamps(aligned_timestamps=timestamps)
if self.source_data["alignment_path"] is not None:
aligned_starting_time = (
metadata["Alignment"]["bias"] / metadata["Constants"]["DEMODULATED_PHOTOMETRY_SAMPLING_RATE"]
)
self.set_aligned_starting_time(aligned_starting_time=aligned_starting_time)
return self.aligned_timestamps

def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
super().add_to_nwbfile(nwbfile, metadata)
timestamps = self.align_processed_timestamps(metadata)
session_df = pd.read_parquet(
self.source_data["file_path"],
columns=self.source_data["columns"],
filters=[("uuid", "==", self.source_data["session_uuid"])],
)
notnan = pd.notnull(session_df.signal_dff)
signal_series = RoiResponseSeries(
name="SignalDfOverF",
description="The ΔF/F from the blue light excitation (470nm) corresponding to the dopamine signal.",
data=H5DataIO(session_df.signal_dff.to_numpy(), compression=True),
data=H5DataIO(session_df.signal_dff.to_numpy()[notnan], compression=True),
unit="a.u.",
timestamps=H5DataIO(session_df.timestamp.to_numpy(), compression=True),
timestamps=H5DataIO(timestamps[notnan], compression=True),
rois=self.fibers_ref,
)
reference_series = RoiResponseSeries(
name="ReferenceDfOverF",
description="The ∆F/F from the isosbestic UV excitation (405nm) corresponding to the reference signal.",
data=H5DataIO(session_df.reference_dff.to_numpy(), compression=True),
data=H5DataIO(session_df.reference_dff.to_numpy()[notnan], compression=True),
unit="a.u.",
timestamps=signal_series.timestamps,
rois=self.fibers_ref,
Expand All @@ -71,7 +94,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
"The ∆F/F from the isosbestic UV excitation (405nm) that has been smoothed "
"(See Methods: Photometry Active Referencing)."
),
data=H5DataIO(session_df.reference_dff_fit.to_numpy(), compression=True),
data=H5DataIO(session_df.reference_dff_fit.to_numpy()[notnan], compression=True),
unit="a.u.",
timestamps=signal_series.timestamps,
rois=self.fibers_ref,
Expand All @@ -82,7 +105,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
"Raw fluorescence (F) from the isosbestic UV excitation (405nm) that has been smoothed "
"(See Methods: Photometry Active Referencing)."
),
data=H5DataIO(session_df.uv_reference_fit.to_numpy(), compression=True),
data=H5DataIO(session_df.uv_reference_fit.to_numpy()[notnan], compression=True),
unit="n.a.",
timestamps=signal_series.timestamps,
rois=self.fibers_ref,
Expand Down
Loading

0 comments on commit cdfc01a

Please sign in to comment.