Merge pull request #23 from pauladkisson/labeled_events

Labeled events
catalystneuro · Jun 19, 2023 · e85baaa · e85baaa
2 parents 7f21bc5 + e2e64ee
commit e85baaa
Show file tree

Hide file tree

Showing 7 changed files with 257 additions and 71 deletions.
diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/__init__.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/__init__.py
@@ -1,3 +1,4 @@
 from .behaviorinterface import BehaviorInterface
+from .behavioralsyllableinterface import BehavioralSyllableInterface
 from .fiberphotometryinterface import FiberPhotometryInterface
 from .nwbconverter import NWBConverter
diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/behavioralsyllableinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/behavioralsyllableinterface.py
@@ -0,0 +1,79 @@
+"""Primary class for converting experiment-specific behavior."""
+import numpy as np
+import pandas as pd
+from pynwb import NWBFile
+from neuroconv.basedatainterface import BaseDataInterface
+from neuroconv.utils import load_dict_from_file
+from hdmf.backends.hdf5.h5_utils import H5DataIO
+from ndx_events import LabeledEvents
+
+
+class BehavioralSyllableInterface(BaseDataInterface):
+    """Behavioral Syllable Interface for markowitz_gillis_nature_2023 conversion"""
+
+    def __init__(self, file_path: str, session_uuid: str, metadata_path: str):
+        # This should load the data lazily and prepare variables you need
+        columns = (
+            "uuid",
+            "predicted_syllable (offline)",
+            "timestamp",
+        )
+        super().__init__(
+            file_path=file_path,
+            session_uuid=session_uuid,
+            columns=columns,
+            metadata_path=metadata_path,
+        )
+
+    def get_metadata(self) -> dict:
+        metadata = super().get_metadata()
+        session_metadata = load_dict_from_file(self.source_data["metadata_path"])
+        session_metadata = session_metadata[self.source_data["session_uuid"]]
+        metadata["NWBFile"]["session_description"] = session_metadata["session_description"]
+        metadata["NWBFile"]["session_start_time"] = session_metadata["session_start_time"]
+        metadata["Subject"] = {}
+        metadata["Subject"]["subject_id"] = session_metadata["subject_id"]
+        metadata["NWBFile"]["identifier"] = self.source_data["session_uuid"]
+        metadata["NWBFile"]["session_id"] = self.source_data["session_uuid"]
+
+        return metadata
+
+    def get_metadata_schema(self) -> dict:
+        metadata_schema = super().get_metadata_schema()
+        metadata_schema["properties"]["BehavioralSyllable"] = {
+            "type": "object",
+            "properties": {
+                "sorted_pseudoindex2name": {"type": "object"},
+                "id2sorted_index": {"type": "object"},
+                "sorted_index2id": {"type": "object"},
+            },
+        }
+        return metadata_schema
+
+    def run_conversion(self, nwbfile: NWBFile, metadata: dict) -> NWBFile:
+        """Run conversion of data from the source file into the nwbfile."""
+        session_df = pd.read_parquet(
+            self.source_data["file_path"],
+            columns=self.source_data["columns"],
+            filters=[("uuid", "==", self.source_data["session_uuid"])],
+        )
+        # Add Syllable Data
+        sorted_pseudoindex2name = metadata["BehavioralSyllable"]["sorted_pseudoindex2name"]
+        id2sorted_index = metadata["BehavioralSyllable"]["id2sorted_index"]
+        syllable_names = np.fromiter(sorted_pseudoindex2name.values(), dtype="O")
+        syllable_pseudoindices = np.fromiter(sorted_pseudoindex2name.keys(), dtype=np.int64)
+        index2name = syllable_names[np.argsort(syllable_pseudoindices)].tolist()
+        for _ in range(len(id2sorted_index) - len(index2name)):
+            index2name.append("Uncommon Syllable (frequency < 1%)")
+        syllable_ids = session_df["predicted_syllable (offline)"]
+        syllable_indices = syllable_ids.map(id2sorted_index).to_numpy(dtype=np.uint8)
+        events = LabeledEvents(
+            name="BehavioralSyllable",
+            description="Behavioral Syllable identified by Motion Sequencing (MoSeq).",
+            timestamps=H5DataIO(session_df["timestamp"].to_numpy(), compression=True),
+            data=H5DataIO(syllable_indices, compression=True),
+            labels=H5DataIO(index2name, compression=True),
+        )
+        nwbfile.add_acquisition(events)
+
+        return nwbfile
diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/behaviorinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/behaviorinterface.py
@@ -1,9 +1,8 @@
 """Primary class for converting experiment-specific behavior."""
 import numpy as np
 import pandas as pd
-from pynwb import NWBFile, TimeSeries
+from pynwb import NWBFile
 from pynwb.behavior import (
-    BehavioralTimeSeries,
     CompassDirection,
     Position,
     SpatialSeries,
@@ -21,7 +20,6 @@ def __init__(self, file_path: str, session_uuid: str, metadata_path: str):
         # This should load the data lazily and prepare variables you need
         columns = (
             "uuid",
-            "predicted_syllable (offline)",
             "centroid_x_mm",
             "centroid_y_mm",
             "height_ave_mm",
@@ -65,12 +63,6 @@ def get_metadata_schema(self) -> dict:
                         "reference_frame": {"type": "string"},
                     },
                 },
-                "Syllable": {
-                    "type": "object",
-                    "properties": {
-                        "syllable_id2name": {"type": "object"},
-                    },
-                },
             },
         }
         return metadata_schema
@@ -110,23 +102,9 @@ def run_conversion(self, nwbfile: NWBFile, metadata: dict) -> NWBFile:
         )
         direction = CompassDirection(spatial_series=direction_spatial_series, name="CompassDirection")
 
-        # Add Syllable Data
-        syllable_time_series = TimeSeries(
-            name="BehavioralSyllable",
-            data=H5DataIO(session_df["predicted_syllable (offline)"].to_numpy(), compression=True),
-            timestamps=position_spatial_series.timestamps,
-            description="Behavioral Syllable identified by Motion Sequencing (MoSeq).",
-            unit="n.a.",
-        )
-        behavioral_time_series = BehavioralTimeSeries(
-            time_series=syllable_time_series,
-            name="SyllableTimeSeries",
-        )
-
         # Combine all data into a behavioral processing module
         behavior_module = nwb_helpers.get_module(nwbfile, name="behavior", description="Processed behavioral data")
         behavior_module.add(position)
         behavior_module.add(direction)
-        behavior_module.add(behavioral_time_series)
 
         return nwbfile
diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py
@@ -48,6 +48,15 @@ def session_to_nwb(
             )
         )
     )
+    source_data.update(
+        dict(
+            BehavioralSyllable=dict(
+                file_path=str(data_path),
+                metadata_path=str(metadata_path),
+                session_uuid=session_id,
+            )
+        )
+    )
     conversion_options.update(dict(FiberPhotometry=dict()))
     conversion_options.update(dict(Behavior=dict()))
 
@@ -83,4 +92,5 @@ def session_to_nwb(
         stub_test=stub_test,
     )
     nwbfile_path = output_dir_path / f"{example_session}.nwb"
-    reproduce_figures.reproduce_fig1d(nwbfile_path)
+    editable_metadata_path = Path(__file__).parent / "markowitz_gillis_nature_2023_metadata.yaml"
+    reproduce_figures.reproduce_fig1d(nwbfile_path, editable_metadata_path)
diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/markowitz_gillis_nature_2023_metadata.yaml b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/markowitz_gillis_nature_2023_metadata.yaml
@@ -28,42 +28,160 @@ Behavior:
     reference_frame: TBD
   CompassDirection:
     reference_frame: TBD
-  Syllable:
-    syllable_id2name: # Mapping from syllable ID (unique) --> syllable name (not unique) [See Extended Data Fig. 2c]
-      4: Pause
-      10: Reared sniff
-      33: Pause, turn left
-      24: Turn left
-      20: Orient left
-      8: Turn left
-      22: High sniff
-      7: High sniff
-      13: Reared sniff
-      15: Short run
-      12: Rear/jump
-      17: Short dart
-      16: Run
-      19: Forward run
-      11: High run
-      26: Walk Forward
-      21: Run
-      31: Leftward rear down
-      18: Wall rear
-      25: Rear up, turn left
-      23: Rear
-      37: Rear
-      30: Pause
-      2: Pause
-      34: Groom/paw lick
-      5: Scrunch
-      29: Dive down
-      28: Rightward rear down
-      1: Paw lick/scrunch
-      32: Groom
-      3: Pause, low rear
-      36: Scrunch right
-      35: Orient right
-      6: Paused low rear
-      9: Reared sniff
-      27: Pause
-      14: Body lick
+BehavioralSyllable:
+  sorted_pseudoindex2name: # Mapping from syllable pseudo-index (sorted by usage, starting at 1) --> syllable name (not unique) [See Extended Data Fig. 2c]
+    4: Pause
+    10: Reared sniff
+    33: Pause, turn left
+    24: Turn left
+    20: Orient left
+    8: Turn left
+    22: High sniff
+    7: High sniff
+    13: Reared sniff
+    15: Short run
+    12: Rear/jump
+    17: Short dart
+    16: Run
+    19: Forward run
+    11: High run
+    26: Walk Forward
+    21: Run
+    31: Leftward rear down
+    18: Wall rear
+    25: Rear up, turn left
+    23: Rear
+    37: Rear
+    30: Pause
+    2: Pause
+    34: Groom/paw lick
+    5: Scrunch
+    29: Dive down
+    28: Rightward rear down
+    1: Paw lick/scrunch
+    32: Groom
+    3: Pause, low rear
+    36: Scrunch right
+    35: Orient right
+    6: Paused low rear
+    9: Reared sniff
+    27: Pause
+    14: Body lick
+  id2sorted_index: # Mapping from syllable id --> syllable index (sorted by usage) [See syllable_stats_offline.toml]
+    74: 0
+    20: 1
+    22: 2
+    17: 3
+    86: 4
+    8: 5
+    12: 6
+    91: 7
+    27: 8
+    64: 9
+    95: 10
+    71: 11
+    19: 12
+    44: 13
+    39: 14
+    25: 15
+    65: 16
+    58: 17
+    41: 18
+    47: 19
+    53: 20
+    57: 21
+    94: 22
+    76: 23
+    35: 24
+    92: 25
+    72: 26
+    7: 27
+    5: 28
+    77: 29
+    67: 30
+    33: 31
+    66: 32
+    42: 33
+    15: 34
+    30: 35
+    56: 36
+    11: 37
+    45: 38
+    13: 39
+    73: 40
+    62: 41
+    31: 42
+    59: 43
+    10: 44
+    75: 45
+    4: 46
+    54: 47
+    43: 48
+    26: 49
+    60: 50
+    85: 51
+    29: 52
+    80: 53
+    68: 54
+    38: 55
+    63: 56
+    -5: 57
+  sorted_index2id:
+    0: 74
+    1: 20
+    2: 22
+    3: 17
+    4: 86
+    5: 8
+    6: 12
+    7: 91
+    8: 27
+    9: 64
+    10: 95
+    11: 71
+    12: 19
+    13: 44
+    14: 39
+    15: 25
+    16: 65
+    17: 58
+    18: 41
+    19: 47
+    20: 53
+    21: 57
+    22: 94
+    23: 76
+    24: 35
+    25: 92
+    26: 72
+    27: 7
+    28: 5
+    29: 77
+    30: 67
+    31: 33
+    32: 66
+    33: 42
+    34: 15
+    35: 30
+    36: 56
+    37: 11
+    38: 45
+    39: 13
+    40: 73
+    41: 62
+    42: 31
+    43: 59
+    44: 10
+    45: 75
+    46: 4
+    47: 54
+    48: 43
+    49: 26
+    50: 60
+    51: 85
+    52: 29
+    53: 80
+    54: 68
+    55: 38
+    56: 63
+    57: -5
diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/nwbconverter.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/nwbconverter.py
@@ -2,12 +2,14 @@
 from neuroconv import NWBConverter
 from .behaviorinterface import BehaviorInterface
 from .fiberphotometryinterface import FiberPhotometryInterface
+from .behavioralsyllableinterface import BehavioralSyllableInterface
 
 
 class NWBConverter(NWBConverter):
     """Primary conversion class."""
 
     data_interface_classes = dict(
         Behavior=BehaviorInterface,
+        BehavioralSyllable=BehavioralSyllableInterface,
         FiberPhotometry=FiberPhotometryInterface,
     )