Merge pull request #87 from catalystneuro/alignment

Temporal Alignment
catalystneuro · Oct 31, 2023 · cdfc01a · cdfc01a
2 parents abee15f + 72e2909
commit cdfc01a
Show file tree

Hide file tree

Showing 10 changed files with 302 additions and 74 deletions.
diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basedattainterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basedattainterface.py
@@ -1,9 +1,11 @@
 """Primary class for handling metadata non-specific to any other DataInterfaces."""
-from neuroconv.basedatainterface import BaseDataInterface
+from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface
 from neuroconv.utils import load_dict_from_file
+import pandas as pd
+import numpy as np
 
 
-class BaseDattaInterface(BaseDataInterface):
+class BaseDattaInterface(BaseTemporalAlignmentInterface):
     """Base interface for markowitz_gillis_nature_2023 conversion w/ non-specific metadata"""
 
     def get_metadata(self) -> dict:
@@ -22,4 +24,45 @@ def get_metadata(self) -> dict:
         metadata["Subject"]["subject_id"] = session_metadata["subject_id"]
         metadata["Subject"]["sex"] = subject_metadata["sex"]
 
+        if self.source_data["alignment_path"] is not None:
+            alignment_df = pd.read_parquet(
+                "/Volumes/T7/CatalystNeuro/NWB/Datta/xtra_raw/session_20210215162554-455929/alignment_df.parquet"
+            )
+            metadata["Alignment"]["slope"] = alignment_df["slope"].iloc[0]
+            metadata["Alignment"]["bias"] = alignment_df["bias"].iloc[0]
+
         return metadata
+
+    def get_metadata_schema(self) -> dict:
+        metadata_schema = super().get_metadata_schema()
+        if self.source_data["alignment_path"] is None:
+            return metadata_schema
+        metadata_schema["Alignment"] = {
+            "type": "object",
+            "description": "Metadata for temporal alignment with photometry data.",
+            "required": True,
+            "properties": {
+                "slope": {
+                    "description": "Slope of the linear regression mapping from behavioral video indices to demodulated photometry indices.",
+                    "required": True,
+                    "type": "float",
+                },
+                "bias": {
+                    "description": "Bias of the linear regression mapping from behavioral video indices to demodulated photometry indices.",
+                    "required": True,
+                    "type": "float",
+                },
+                "start_time": {
+                    "description": "Start time offset of raw fiber photometry data relative to behavioral video.",
+                    "required": True,
+                    "type": "float",
+                },
+            },
+        }
+        return metadata_schema
+
+    def set_aligned_timestamps(self, aligned_timestamps: np.ndarray) -> None:
+        self.aligned_timestamps = aligned_timestamps
+
+    def get_timestamps(self) -> np.ndarray:
+        return self.aligned_timestamps
diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py
@@ -7,6 +7,7 @@
 import pandas as pd
 from neuroconv.datainterfaces import VideoInterface
 from .basedattainterface import BaseDattaInterface
+from .utils import convert_timestamps_to_seconds
 
 
 class BaseVideoInterface(BaseDattaInterface):
@@ -20,6 +21,7 @@ def __init__(
         session_id: str,
         session_metadata_path: str,
         subject_metadata_path: str,
+        alignment_path: str = None,
     ):
         super().__init__(
             data_path=data_path,
@@ -28,13 +30,26 @@ def __init__(
             session_id=session_id,
             session_metadata_path=session_metadata_path,
             subject_metadata_path=subject_metadata_path,
+            alignment_path=alignment_path,
         )
 
+    def get_original_timestamps(self) -> np.ndarray:
+        return pd.read_csv(self.source_data["timestamp_path"], header=None).to_numpy().squeeze()
+
+    def align_timestamps(self, metadata: dict) -> np.ndarray:
+        timestamps = self.get_original_timestamps()
+        timestamps = convert_timestamps_to_seconds(timestamps=timestamps, metadata=metadata)
+
+        self.set_aligned_timestamps(aligned_timestamps=timestamps)
+        if self.source_data["alignment_path"] is not None:
+            aligned_starting_time = (
+                metadata["Alignment"]["bias"] / metadata["Constants"]["DEMODULATED_PHOTOMETRY_SAMPLING_RATE"]
+            )
+            self.set_aligned_starting_time(aligned_starting_time=aligned_starting_time)
+        return self.aligned_timestamps
+
     def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
-        timestamps = pd.read_csv(self.source_data["timestamp_path"]).to_numpy().squeeze()
-        TIMESTAMPS_TO_SECONDS = metadata["Constants"]["TIMESTAMPS_TO_SECONDS"]
-        timestamps -= timestamps[0]
-        timestamps = timestamps * TIMESTAMPS_TO_SECONDS
+        timestamps = self.align_timestamps(metadata=metadata)
 
         video_interface = VideoInterface(file_paths=[self.source_data["data_path"]], verbose=True)
         video_interface.set_aligned_timestamps(aligned_timestamps=[timestamps])

diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/behavioralsyllableinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/behavioralsyllableinterface.py
@@ -13,14 +13,19 @@ class BehavioralSyllableInterface(BaseDattaInterface):
     """Behavioral Syllable Interface for markowitz_gillis_nature_2023 conversion"""
 
     def __init__(
-        self, file_path: str, session_uuid: str, session_id: str, session_metadata_path: str, subject_metadata_path: str
+        self,
+        file_path: str,
+        session_uuid: str,
+        session_id: str,
+        session_metadata_path: str,
+        subject_metadata_path: str,
+        alignment_path: str = None,
     ):
         # This should load the data lazily and prepare variables you need
         columns = (
             "uuid",
             "predicted_syllable (offline)",
             "predicted_syllable",
-            "timestamp",
         )
         super().__init__(
             file_path=file_path,
@@ -29,6 +34,7 @@ def __init__(
             columns=columns,
             session_metadata_path=session_metadata_path,
             subject_metadata_path=subject_metadata_path,
+            alignment_path=alignment_path,
         )
 
     def get_metadata_schema(self) -> dict:
@@ -43,16 +49,35 @@ def get_metadata_schema(self) -> dict:
         }
         return metadata_schema
 
+    def get_original_timestamps(self) -> np.ndarray:
+        session_df = pd.read_parquet(
+            self.source_data["file_path"],
+            columns=["timestamp", "uuid"],
+            filters=[("uuid", "==", self.source_data["session_uuid"])],
+        )
+        return session_df["timestamp"].to_numpy()
+
+    def align_timestamps(self, metadata: dict) -> np.ndarray:
+        timestamps = self.get_original_timestamps()
+        self.set_aligned_timestamps(aligned_timestamps=timestamps)
+        if self.source_data["alignment_path"] is not None:
+            aligned_starting_time = (
+                metadata["Alignment"]["bias"] / metadata["Constants"]["DEMODULATED_PHOTOMETRY_SAMPLING_RATE"]
+            )
+            self.set_aligned_starting_time(aligned_starting_time=aligned_starting_time)
+        return self.aligned_timestamps
+
     def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, velocity_modulation: bool = False) -> None:
         if velocity_modulation:
-            columns = ["uuid", "predicted_syllable", "timestamp"]
+            columns = ["uuid", "predicted_syllable"]
         else:
             columns = self.source_data["columns"]
         session_df = pd.read_parquet(
             self.source_data["file_path"],
             columns=columns,
             filters=[("uuid", "==", self.source_data["session_uuid"])],
         )
+        timestamps = self.align_timestamps(metadata=metadata)
         # Add Syllable Data
         sorted_pseudoindex2name = metadata["BehavioralSyllable"]["sorted_pseudoindex2name"]
         id2sorted_index = metadata["BehavioralSyllable"]["id2sorted_index"]
@@ -66,7 +91,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, velocity_modulation:
         online_syllables = LabeledEvents(
             name="BehavioralSyllableOnline",
             description="Behavioral Syllable identified by online Motion Sequencing (MoSeq).",
-            timestamps=H5DataIO(session_df["timestamp"].to_numpy(), compression=True),
+            timestamps=H5DataIO(timestamps, compression=True),
             data=H5DataIO(online_syllable_indices, compression=True),
             labels=H5DataIO(index2name, compression=True),
         )
@@ -82,7 +107,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, velocity_modulation:
             offline_syllables = LabeledEvents(
                 name="BehavioralSyllableOffline",
                 description="Behavioral Syllable identified by offline Motion Sequencing (MoSeq).",
-                timestamps=H5DataIO(session_df["timestamp"].to_numpy(), compression=True),
+                timestamps=H5DataIO(timestamps, compression=True),
                 data=H5DataIO(offline_syllable_indices, compression=True),
                 labels=H5DataIO(index2name, compression=True),
             )

diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py
@@ -42,8 +42,41 @@ def session_to_nwb(
     depth_path = raw_path / "depth.avi"
     depth_ts_path = raw_path / "depth_ts.txt"
     moseq_path = raw_path / "proc/results_00.h5"
+    alignment_path = raw_path / "alignment_df.parquet"
 
     source_data, conversion_options = {}, {}
+    source_data.update(
+        dict(
+            MoseqExtract=dict(
+                file_path=str(moseq_path),
+                session_metadata_path=str(session_metadata_path),
+                subject_metadata_path=str(subject_metadata_path),
+                session_uuid=session_uuid,
+                session_id=session_id,
+            ),
+            BehavioralSyllable=dict(
+                session_metadata_path=str(session_metadata_path),
+                subject_metadata_path=str(subject_metadata_path),
+                session_uuid=session_uuid,
+                session_id=session_id,
+            ),
+            DepthVideo=dict(
+                data_path=str(depth_path),
+                timestamp_path=str(depth_ts_path),
+                session_metadata_path=str(session_metadata_path),
+                subject_metadata_path=str(subject_metadata_path),
+                session_uuid=session_uuid,
+                session_id=session_id,
+            ),
+        )
+    )
+    conversion_options.update(
+        dict(
+            MoseqExtract={},
+            BehavioralSyllable={},
+            DepthVideo={},
+        )
+    )
     if "reinforcement" in session_metadata.keys():
         source_data["Optogenetic"] = dict(
             file_path=str(optoda_path),
@@ -62,10 +95,12 @@ def session_to_nwb(
             file_path=str(photometry_path),
             tdt_path=str(tdt_path),
             tdt_metadata_path=str(tdt_metadata_path),
+            depth_timestamp_path=str(depth_ts_path),
             session_metadata_path=str(session_metadata_path),
             subject_metadata_path=str(subject_metadata_path),
             session_uuid=session_uuid,
             session_id=session_id,
+            alignment_path=str(alignment_path),
         )
         conversion_options["FiberPhotometry"] = {}
         behavioral_syllable_path = photometry_path  # Note: if photometry and optogenetics are both present, photometry is used for syllable data bc it is quicker to load
@@ -76,43 +111,17 @@ def session_to_nwb(
             subject_metadata_path=str(subject_metadata_path),
             session_uuid=session_uuid,
             session_id=session_id,
+            alignment_path=str(alignment_path),
         )
         conversion_options["IRVideo"] = {}
-    source_data.update(
-        dict(
-            MoseqExtract=dict(
-                file_path=str(moseq_path),
-                session_metadata_path=str(session_metadata_path),
-                subject_metadata_path=str(subject_metadata_path),
-                session_uuid=session_uuid,
-                session_id=session_id,
-            ),
-            BehavioralSyllable=dict(
-                file_path=str(behavioral_syllable_path),
-                session_metadata_path=str(session_metadata_path),
-                subject_metadata_path=str(subject_metadata_path),
-                session_uuid=session_uuid,
-                session_id=session_id,
-            ),
-            DepthVideo=dict(
-                data_path=str(depth_path),
-                timestamp_path=str(depth_ts_path),
-                session_metadata_path=str(session_metadata_path),
-                subject_metadata_path=str(subject_metadata_path),
-                session_uuid=session_uuid,
-                session_id=session_id,
-            ),
-        )
-    )
-    conversion_options.update(
-        dict(
-            MoseqExtract={},
-            BehavioralSyllable={},
-            DepthVideo={},
-        )
-    )
+        source_data["MoseqExtract"]["alignment_path"] = str(alignment_path)
+        source_data["BehavioralSyllable"]["alignment_path"] = str(alignment_path)
+        source_data["DepthVideo"]["alignment_path"] = str(alignment_path)
+        source_data["Optogenetic"]["alignment_path"] = str(alignment_path)
+    source_data["BehavioralSyllable"]["file_path"] = str(behavioral_syllable_path)
     if experiment_type == "velocity-modulation":
         conversion_options["BehavioralSyllable"] = dict(velocity_modulation=True)
+        conversion_options["Optogenetic"] = dict(velocity_modulation=True)
 
     converter = DattaNWBConverter(source_data=source_data)
     metadata = converter.get_metadata()

diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py
@@ -17,10 +17,12 @@ def __init__(
         file_path: str,
         tdt_path: str,
         tdt_metadata_path: str,
+        depth_timestamp_path: str,
         session_uuid: str,
         session_id: str,
         session_metadata_path: str,
         subject_metadata_path: str,
+        alignment_path: str = None,
     ):
         # This should load the data lazily and prepare variables you need
         columns = (
@@ -29,38 +31,59 @@ def __init__(
             "reference_dff",
             "uv_reference_fit",
             "reference_dff_fit",
-            "timestamp",
         )
         super().__init__(
             file_path=file_path,
             tdt_path=tdt_path,
             tdt_metadata_path=tdt_metadata_path,
+            depth_timestamp_path=depth_timestamp_path,
             session_uuid=session_uuid,
             session_id=session_id,
             columns=columns,
             session_metadata_path=session_metadata_path,
             subject_metadata_path=subject_metadata_path,
+            alignment_path=alignment_path,
         )
 
+    def get_original_timestamps(self) -> np.ndarray:
+        session_df = pd.read_parquet(
+            self.source_data["file_path"],
+            columns=["timestamp", "uuid"],
+            filters=[("uuid", "==", self.source_data["session_uuid"])],
+        )
+        return session_df["timestamp"].to_numpy()
+
+    def align_processed_timestamps(self, metadata: dict) -> np.ndarray:
+        timestamps = self.get_original_timestamps()
+        self.set_aligned_timestamps(aligned_timestamps=timestamps)
+        if self.source_data["alignment_path"] is not None:
+            aligned_starting_time = (
+                metadata["Alignment"]["bias"] / metadata["Constants"]["DEMODULATED_PHOTOMETRY_SAMPLING_RATE"]
+            )
+            self.set_aligned_starting_time(aligned_starting_time=aligned_starting_time)
+        return self.aligned_timestamps
+
     def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
         super().add_to_nwbfile(nwbfile, metadata)
+        timestamps = self.align_processed_timestamps(metadata)
         session_df = pd.read_parquet(
             self.source_data["file_path"],
             columns=self.source_data["columns"],
             filters=[("uuid", "==", self.source_data["session_uuid"])],
         )
+        notnan = pd.notnull(session_df.signal_dff)
         signal_series = RoiResponseSeries(
             name="SignalDfOverF",
             description="The ΔF/F from the blue light excitation (470nm) corresponding to the dopamine signal.",
-            data=H5DataIO(session_df.signal_dff.to_numpy(), compression=True),
+            data=H5DataIO(session_df.signal_dff.to_numpy()[notnan], compression=True),
             unit="a.u.",
-            timestamps=H5DataIO(session_df.timestamp.to_numpy(), compression=True),
+            timestamps=H5DataIO(timestamps[notnan], compression=True),
             rois=self.fibers_ref,
         )
         reference_series = RoiResponseSeries(
             name="ReferenceDfOverF",
             description="The ∆F/F from the isosbestic UV excitation (405nm) corresponding to the reference signal.",
-            data=H5DataIO(session_df.reference_dff.to_numpy(), compression=True),
+            data=H5DataIO(session_df.reference_dff.to_numpy()[notnan], compression=True),
             unit="a.u.",
             timestamps=signal_series.timestamps,
             rois=self.fibers_ref,
@@ -71,7 +94,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
                 "The ∆F/F from the isosbestic UV excitation (405nm) that has been smoothed "
                 "(See Methods: Photometry Active Referencing)."
             ),
-            data=H5DataIO(session_df.reference_dff_fit.to_numpy(), compression=True),
+            data=H5DataIO(session_df.reference_dff_fit.to_numpy()[notnan], compression=True),
             unit="a.u.",
             timestamps=signal_series.timestamps,
             rois=self.fibers_ref,
@@ -82,7 +105,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
                 "Raw fluorescence (F) from the isosbestic UV excitation (405nm) that has been smoothed "
                 "(See Methods: Photometry Active Referencing)."
             ),
-            data=H5DataIO(session_df.uv_reference_fit.to_numpy(), compression=True),
+            data=H5DataIO(session_df.uv_reference_fit.to_numpy()[notnan], compression=True),
             unit="n.a.",
             timestamps=signal_series.timestamps,
             rois=self.fibers_ref,