From 3f4e72ebe48ff2dba3dc5d87a0d53284b26b4185 Mon Sep 17 00:00:00 2001
From: Nicholas Watters <nicholaswatters@dhcp-10-29-244-40.dyn.MIT.EDU>
Date: Thu, 21 Dec 2023 12:53:33 -0500
Subject: [PATCH] Add probe locations, REAME updates, and small cleanups.

---
 src/jazayeri_lab_to_nwb/watters/README.md     | 105 ++++++++----
 .../watters/main_convert_session.py           | 158 +++++++++---------
 src/jazayeri_lab_to_nwb/watters/metadata.yaml |   1 +
 .../watters/nwb_converter.py                  |  19 ++-
 .../watters/recording_interface.py            |   4 +-
 .../watters/requirements.txt                  |   6 +
 .../watters/trials_interface.py               |   7 +-
 7 files changed, 172 insertions(+), 128 deletions(-)

diff --git a/src/jazayeri_lab_to_nwb/watters/README.md b/src/jazayeri_lab_to_nwb/watters/README.md
index e718409..810b392 100644
--- a/src/jazayeri_lab_to_nwb/watters/README.md
+++ b/src/jazayeri_lab_to_nwb/watters/README.md
@@ -1,9 +1,10 @@
 # Watters data conversion pipeline
-NWB conversion scripts for Watters data to the [Neurodata Without Borders](https://nwb-overview.readthedocs.io/) data format.
-
+NWB conversion scripts for Nick Watters working memory data to the
+[Neurodata Without Borders](https://nwb-overview.readthedocs.io/) data format.
 
 ## Usage
-To run a specific conversion, you might need to install first some conversion specific dependencies that are located in each conversion directory:
+To run a specific conversion, you might need to install first some conversion
+specific dependencies that are located in each conversion directory:
 ```
 pip install -r src/jazayeri_lab_to_nwb/watters/watters_requirements.txt
 ```
@@ -13,44 +14,78 @@ You can run a specific conversion with the following command:
 python src/jazayeri_lab_to_nwb/watters/main_convert_session.py $SUBJECT $SESSION
 ```
 
-### Watters working memory task data
-The conversion function for this experiment, `session_to_nwb`, is found in `src/watters/main_convert_session.py`. The function takes arguments:
+where `$SUBJECT` is in `['Perle', 'Elgar']` and `$SESSION` is a session date in
+format `'YYYY-MM-DD'`. For example:
+```
+python src/jazayeri_lab_to_nwb/watters/main_convert_session.py Perle 2022-06-01
+```
+
+The conversion function for this experiment, `session_to_nwb`, is found in
+`src/watters/main_convert_session.py`. The function takes arguments:
 * `subject` subject name, either `'Perle'` or `'Elgar'`.
 * `session` session date in format `'YYYY-MM-DD'`.
-* `stub_test` indicates whether only a small portion of the data should be saved (mainly used by us for testing purposes).
+* `stub_test` indicates whether only a small portion of the data should be
+saved (used for testing purposes).
 * `overwrite` indicates whether to overwrite nwb output files.
-* `dandiset_id` optional dandiset ID.
-
-The function can be imported in a separate script with and run, or you can run the file directly and specify the arguments in the `if name == "__main__"` block at the bottom.
-
-The function expects the raw data in `data_dir_path` to follow this structure:
-
-    data_dir_path/
-    ├── data_open_source
-    │   ├── behavior
-    │   │   └── eye.h.times.npy, etc.
-    │   ├── task
-    │       └── trials.start_times.json, etc.
-    │   └── probes.metadata.json
-    ├── raw_data
-    │   ├── spikeglx
-    │       └── */*/*.ap.bin, */*/*.lf.bin, etc.
-    │   ├── v_probe_0
-    │       └── raw_data.dat
-    │   └── v_probe_{n}
-    │       └── raw_data.dat
-    ├── spike_sorting_raw
-    │   ├── np
-    │   ├── vp_0
-    │   └── vp_{n}
-    ├── sync_pulses
+
+The function can be imported in a separate script with and run, or you can run
+the file directly and specify the arguments in the `if name == "__main__"`
+block at the bottom.
+
+## Data format
+
+The function expects there to exist data paths with this structure:
+```
+    trials
+        ├── eye_h_calibrated.json
+        ├── eye_v_calibrated.json
+        ├── pupil_size_r.json
+        ├── reward_line.json
+        ├── sound.json
+        └── trials.json
+    data_open_source
+        └── probes.metadata.json
+    raw_data
+        ├── spikeglx
+            └── */*/*.ap.bin, */*/*.lf.bin, etc.
+        ├── v_probe_0
+            └── raw_data.dat
+        └── v_probe_{n}
+            └── raw_data.dat
+    spike_sorting
+        ├── np
+        ├── v_probe_0
+        └── v_probe_{n}
+    sync_pulses
         ├── mworks
         ├── open_ephys
         └── spikeglx
-    ...
+```
+Each of the top-level directories may lie in different filesystems. The script
+`get_session_paths.py` contains a function to fetch them given subject and session.
+
+The converted data will be saved in two files in
+`/om/user/nwatters/nwb_data_multi_prediction/staging/sub-$SUBJECT/`:
+    sub-$SUBJECT_ses-$SESSION_ecephys.nwb --- Raw physiology
+    sub-$SUBJECT_ses-$SESSION_behavior+ecephys.nwb --- Task, behavior, and
+        sorted physiology
+
+If you run into memory issues when writing the `{session_id}_raw.nwb` files,
+you may want to set `buffer_gb` to a value smaller than 1 (its default) in the
+`conversion_options` dicts for the recording interfaces, i.e.
+[here](https://github.com/catalystneuro/jazayeri-lab-to-nwb/blob/vprobe_dev/src/jazayeri_lab_to_nwb/watters/main_convert_session.py#L189).
 
-The conversion will try to automatically fetch metadata from the provided data directory. However, some information, such as the subject's name and age, must be specified by the user in the file `src/jazayeri_lab_to_nwb/watters/metadata.yaml`. If any of the automatically fetched metadata is incorrect, it can also be overriden from this file.
+## Uploading to DANDI
 
-The converted data will be saved in two files, one called `{session_id}_raw.nwb`, which contains the raw electrophysiology data from the Neuropixels and V-Probes, and one called `{session_id}_processed.nwb` with behavioral data, trial info, and sorted unit spiking.
+To upload from openmind to DANDI, first log into the openmind data transfer
+node, e.g. `ssh nwatters@openmind-dtn.mit.edu`. Then navigate to the directory
+with the NWB files, e.g.
+`/om/user/nwatters/nwb_data_multi_prediction/staging/`. Finally, run the steps
+in the
+[DANDI uploading pipeline](https://www.dandiarchive.org/handbook/13_upload/#data-uploadmanagement-workflow).
 
-If you run into memory issues when writing the `{session_id}_raw.nwb` files, you may want to set `buffer_gb` to a value smaller than 1 (its default) in the `conversion_options` dicts for the recording interfaces, i.e. [here](https://github.com/catalystneuro/jazayeri-lab-to-nwb/blob/vprobe_dev/src/jazayeri_lab_to_nwb/watters/main_convert_session.py#L189).
+Note that you must pip install dandi to run the uploading steps, and in order
+to activate a conda environment in openmind-dtn you may have to run
+`$ source ~/.bashrc` in the openmind-dtn terminal. Also not that DANDI
+uploading entire sessions of raw data can take a while, so it is convenient to
+run it in a tmux terminal on openmind-dtn.
diff --git a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py
index aab00be..abddb05 100644
--- a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py
+++ b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py
@@ -1,11 +1,11 @@
 """Entrypoint to convert an entire session of data to NWB.
 
 This converts a session to NWB format and writes the nwb files to
-    /om/user/nwatters/nwb_data_multi_prediction/{$SUBJECT}/{$SESSION}
+    /om/user/nwatters/nwb_data_multi_prediction/staging/sub-$SUBJECT/
 Two NWB files are created:
-    $SUBJECT_$SESSION_raw.nwb --- Raw physiology
-    $SUBJECT_$SESSION_processed.nwb --- Task, behavior, and sorted physiology
-These files can be automatically uploaded to a DANDI dataset.
+    sub-$SUBJECT_ses-$SESSION_ecephys.nwb --- Raw physiology
+    sub-$SUBJECT_ses-$SESSION_behavior+ecephys.nwb --- Task, behavior, and
+        sorted physiology
 
 Usage:
     $ python main_convert_session.py $SUBJECT $SESSION
@@ -17,23 +17,18 @@
         _REPO
         _STUB_TEST
         _OVERWRITE
-        _DANDISET_ID
     See comments below for descriptions of these variables.
 """
 
 import glob
 import json
 import logging
-import os
 import sys
 from pathlib import Path
-from typing import Union
 from uuid import uuid4
-from zoneinfo import ZoneInfo
 
 import get_session_paths
 import nwb_converter
-from neuroconv.tools.data_transfers import automatic_dandi_upload
 from neuroconv.utils import dict_deep_update, load_dict_from_file
 
 # Data repository. Either 'globus' or 'openmind'
@@ -42,8 +37,6 @@
 _STUB_TEST = True
 # Whether to overwrite output nwb files
 _OVERWRITE = True
-# ID of the dandiset to upload to, or None to not upload
-_DANDISET_ID = None  # '000620'
 
 # Set logger level for info is displayed in console
 logging.getLogger().setLevel(logging.INFO)
@@ -54,7 +47,7 @@
 }
 _SUBJECT_TO_AGE = {
     "Perle": "P10Y",  # Born 6/11/2012
-    "Elgar": "P10Y",  # Born 5/2/2012
+    "Elgar": "P11Y",  # Born 5/2/2012
 }
 
 
@@ -141,6 +134,7 @@ def _add_spikeglx_data(
     ]
     if len(spikeglx_dir) == 0:
         logging.info("Found no SpikeGLX data")
+        return
     elif len(spikeglx_dir) == 1:
         spikeglx_dir = spikeglx_dir[0]
     else:
@@ -167,12 +161,75 @@ def _add_spikeglx_data(
     )
 
 
+def _update_metadata(metadata, subject, session_id, session_paths):
+    """Update metadata."""
+
+    # Add subject_id, session_id, sex, and age
+    metadata["NWBFile"]["session_id"] = session_id
+    metadata["Subject"]["subject_id"] = subject
+    metadata["Subject"]["sex"] = _SUBJECT_TO_SEX[subject]
+    metadata["Subject"]["age"] = _SUBJECT_TO_AGE[subject]
+
+    # Add probe locations
+    probe_metadata_file = (
+        session_paths.data_open_source / "probes.metadata.json"
+    )
+    probe_metadata = json.load(open(probe_metadata_file, "r"))
+    for entry in metadata["Ecephys"]["ElectrodeGroup"]:
+        if entry["device"] == "Neuropixel-Imec":
+            neuropixel_metadata = [
+                x for x in probe_metadata if x["probe_type"] == "Neuropixels"
+            ][0]
+            coordinate_system = neuropixel_metadata["coordinate_system"]
+            coordinates = neuropixel_metadata["coordinates"]
+            depth_from_surface = neuropixel_metadata["depth_from_surface"]
+            entry["description"] = (
+                f"{entry['description']}\n"
+                f"{coordinate_system}\n"
+                f"coordinates = {coordinates}\n"
+                f"depth_from_surface = {depth_from_surface}"
+            )
+            entry["position"] = [
+                coordinates[0],
+                coordinates[1],
+                depth_from_surface,
+            ]
+        elif "vprobe" in entry["device"]:
+            probe_index = int(entry["device"].split("vprobe")[1])
+            v_probe_metadata = [
+                x for x in probe_metadata if x["probe_type"] == "V-Probe 64"
+            ][probe_index]
+            first_channel = v_probe_metadata["coordinates"]["first_channel"]
+            last_channel = v_probe_metadata["coordinates"]["last_channel"]
+            coordinate_system = v_probe_metadata["coordinate_system"]
+            entry["description"] = (
+                f"{entry['description']}\n"
+                f"{coordinate_system}\n"
+                f"first_channel = {first_channel}\n"
+                f"last_channel = {last_channel}"
+            )
+            entry["position"] = first_channel
+
+    # Update default metadata with the editable in the corresponding yaml file
+    editable_metadata_path = Path(__file__).parent / "metadata.yaml"
+    editable_metadata = load_dict_from_file(editable_metadata_path)
+    metadata = dict_deep_update(metadata, editable_metadata)
+
+    # Ensure session_start_time exists in metadata
+    if "session_start_time" not in metadata["NWBFile"]:
+        raise ValueError(
+            "Session start time was not auto-detected. Please provide it "
+            "in `metadata.yaml`"
+        )
+
+    return metadata
+
+
 def session_to_nwb(
     subject: str,
     session: str,
     stub_test: bool = False,
     overwrite: bool = True,
-    dandiset_id: Union[str, None] = None,
 ):
     """
     Convert a single session to an NWB file.
@@ -189,27 +246,10 @@ def session_to_nwb(
     overwrite : boolean
         If the file exists already, True will delete and replace with a new file, False will append the contents.
         Default is True.
-    dandiset_id : string, optional
-        If you want to upload the file to the DANDI archive, specify the six-digit ID here.
-        Requires the DANDI_API_KEY environment variable to be set.
-        To set this in your bash terminal in Linux or macOS, run
-            export DANDI_API_KEY=...
-        or in Windows
-            set DANDI_API_KEY=...
-        Default is None.
     """
-    if dandiset_id is not None:
-        import dandi  # check importability
-
-        assert os.getenv("DANDI_API_KEY"), (
-            "Unable to find environment variable 'DANDI_API_KEY'. "
-            "Please retrieve your token from DANDI and set this environment "
-            "variable."
-        )
 
     logging.info(f"stub_test = {stub_test}")
     logging.info(f"overwrite = {overwrite}")
-    logging.info(f"dandiset_id = {dandiset_id}")
 
     # Get paths
     session_paths = get_session_paths.get_session_paths(
@@ -288,50 +328,19 @@ def session_to_nwb(
     )
     processed_conversion_options["Display"] = dict()
 
-    # Create processed data converter
+    # Create data converters
     processed_converter = nwb_converter.NWBConverter(
         source_data=processed_source_data,
         sync_dir=session_paths.sync_pulses,
     )
-
-    # Add datetime and subject name to processed converter
-    metadata = processed_converter.get_metadata()
-    metadata["NWBFile"]["session_id"] = session_id
-    metadata["Subject"]["subject_id"] = subject
-    metadata["Subject"]["sex"] = _SUBJECT_TO_SEX[subject]
-    metadata["Subject"]["age"] = _SUBJECT_TO_AGE[subject]
-
-    # EcePhys
-    probe_metadata_file = (
-        session_paths.data_open_source / "probes.metadata.json"
+    raw_converter = nwb_converter.NWBConverter(
+        source_data=raw_source_data,
+        sync_dir=str(session_paths.sync_pulses),
     )
-    with open(probe_metadata_file, "r") as f:
-        probe_metadata = json.load(f)
-    neuropixel_metadata = [
-        x for x in probe_metadata if x["probe_type"] == "Neuropixels"
-    ][0]
-    for entry in metadata["Ecephys"]["ElectrodeGroup"]:
-        if entry["device"] == "Neuropixel-Imec":
-            # TODO: uncomment when fixed in pynwb
-            # entry.update(dict(position=[(
-            #     neuropixel_metadata['coordinates'][0],
-            #     neuropixel_metadata['coordinates'][1],
-            #     neuropixel_metadata['depth_from_surface'],
-            # )]
-            logging.info("\n\n")
-            logging.warning("   PROBE COORDINATES NOT IMPLEMENTED\n\n")
 
-    # Update default metadata with the editable in the corresponding yaml file
-    editable_metadata_path = Path(__file__).parent / "metadata.yaml"
-    editable_metadata = load_dict_from_file(editable_metadata_path)
-    metadata = dict_deep_update(metadata, editable_metadata)
-
-    # Check if session_start_time was found/set
-    if "session_start_time" not in metadata["NWBFile"]:
-        raise ValueError(
-            "Session start time was not auto-detected. Please provide it "
-            "in `metadata.yaml`"
-        )
+    # Update metadata
+    metadata = processed_converter.get_metadata()
+    metadata = _update_metadata(metadata, subject, session_id, session_paths)
 
     # Run conversion
     logging.info("Running processed conversion")
@@ -344,10 +353,6 @@ def session_to_nwb(
 
     logging.info("Running raw data conversion")
     metadata["NWBFile"]["identifier"] = str(uuid4())
-    raw_converter = nwb_converter.NWBConverter(
-        source_data=raw_source_data,
-        sync_dir=str(session_paths.sync_pulses),
-    )
     raw_converter.run_conversion(
         metadata=metadata,
         nwbfile_path=raw_nwb_path,
@@ -355,14 +360,6 @@ def session_to_nwb(
         overwrite=overwrite,
     )
 
-    # Upload to DANDI
-    if dandiset_id is not None:
-        logging.info(f"Uploading to dandiset id {dandiset_id}")
-        automatic_dandi_upload(
-            dandiset_id=dandiset_id,
-            nwb_folder_path=session_paths.output,
-        )
-
 
 if __name__ == "__main__":
     """Run session conversion."""
@@ -374,6 +371,5 @@ def session_to_nwb(
         session=session,
         stub_test=_STUB_TEST,
         overwrite=_OVERWRITE,
-        dandiset_id=_DANDISET_ID,
     )
     logging.info(f"\nFinished conversion for {subject}/{session}\n")
diff --git a/src/jazayeri_lab_to_nwb/watters/metadata.yaml b/src/jazayeri_lab_to_nwb/watters/metadata.yaml
index 0ed943b..fc5e56b 100644
--- a/src/jazayeri_lab_to_nwb/watters/metadata.yaml
+++ b/src/jazayeri_lab_to_nwb/watters/metadata.yaml
@@ -11,5 +11,6 @@ NWBFile:
   lab: Jazayeri
   experimenter:
     - Watters, Nicholas
+    - Gabel, John
 Subject:
   species: Macaca mulatta
diff --git a/src/jazayeri_lab_to_nwb/watters/nwb_converter.py b/src/jazayeri_lab_to_nwb/watters/nwb_converter.py
index 488786d..b4e2d7e 100644
--- a/src/jazayeri_lab_to_nwb/watters/nwb_converter.py
+++ b/src/jazayeri_lab_to_nwb/watters/nwb_converter.py
@@ -6,10 +6,10 @@
 from typing import Optional
 
 import display_interface
+import neuroconv
 import numpy as np
 import timeseries_interface
 import trials_interface
-from neuroconv import NWBConverter, datainterfaces
 from neuroconv.datainterfaces.ecephys.basesortingextractorinterface import (
     BaseSortingExtractorInterface,
 )
@@ -18,17 +18,17 @@
 from spikeinterface.core import waveform_tools
 
 
-class NWBConverter(NWBConverter):
+class NWBConverter(neuroconv.NWBConverter):
     """Primary conversion class for extracellular electrophysiology dataset."""
 
     data_interface_classes = dict(
         RecordingVP0=DatRecordingInterface,
-        SortingVP0=datainterfaces.KiloSortSortingInterface,
+        SortingVP0=neuroconv.datainterfaces.KiloSortSortingInterface,
         RecordingVP1=DatRecordingInterface,
-        SortingVP1=datainterfaces.KiloSortSortingInterface,
-        RecordingNP=datainterfaces.SpikeGLXRecordingInterface,
-        LF=datainterfaces.SpikeGLXRecordingInterface,
-        SortingNP=datainterfaces.KiloSortSortingInterface,
+        SortingVP1=neuroconv.datainterfaces.KiloSortSortingInterface,
+        RecordingNP=neuroconv.datainterfaces.SpikeGLXRecordingInterface,
+        LF=neuroconv.datainterfaces.SpikeGLXRecordingInterface,
+        SortingNP=neuroconv.datainterfaces.KiloSortSortingInterface,
         EyePosition=timeseries_interface.EyePositionInterface,
         PupilSize=timeseries_interface.PupilSizeInterface,
         RewardLine=timeseries_interface.RewardLineInterface,
@@ -82,6 +82,8 @@ def temporally_align_data_interfaces(self):
                 transform_path = sync_dir / "spikeglx" / "transform"
                 transform = json.load(open(transform_path, "r"))
                 lf_interface = self.data_interface_objects["LF"]
+            else:
+                raise ValueError("Invalid probe_name {probe_name}")
             intercept = transform["intercept"]
             coef = transform["coef"]
 
@@ -111,7 +113,8 @@ def temporally_align_data_interfaces(self):
                 )
                 if exceeded_spikes:
                     raise ValueError(
-                        f"Spikes exceeding recording found in Sorting{probe_name}!"
+                        "Spikes exceeding recording found in "
+                        f"Sorting{probe_name}"
                     )
 
                 # Register recording
diff --git a/src/jazayeri_lab_to_nwb/watters/recording_interface.py b/src/jazayeri_lab_to_nwb/watters/recording_interface.py
index 5ee3619..87e5008 100644
--- a/src/jazayeri_lab_to_nwb/watters/recording_interface.py
+++ b/src/jazayeri_lab_to_nwb/watters/recording_interface.py
@@ -1,4 +1,4 @@
-"""Primary class for recording data."""
+"""Primary class for recording V-Probe data from .dat files."""
 
 from typing import Optional
 
@@ -72,7 +72,7 @@ def get_metadata(self) -> dict:
                 manufacturer="Plexon",
             )
         ]
-        description = f"a group representing electrodes on {self.probe_name}"
+        description = f"A group representing electrodes on {self.probe_name}"
         electrode_groups = [
             dict(
                 name=self.probe_name,
diff --git a/src/jazayeri_lab_to_nwb/watters/requirements.txt b/src/jazayeri_lab_to_nwb/watters/requirements.txt
index e69de29..ee88472 100644
--- a/src/jazayeri_lab_to_nwb/watters/requirements.txt
+++ b/src/jazayeri_lab_to_nwb/watters/requirements.txt
@@ -0,0 +1,6 @@
+neuroconv==0.4.6
+spikeinterface==0.99.1
+nwbwidgets==0.11.3
+nwbinspector==0.4.31
+pre-commit==3.6.0
+ndx-events==0.2.0
diff --git a/src/jazayeri_lab_to_nwb/watters/trials_interface.py b/src/jazayeri_lab_to_nwb/watters/trials_interface.py
index d32b07d..a4edb9b 100644
--- a/src/jazayeri_lab_to_nwb/watters/trials_interface.py
+++ b/src/jazayeri_lab_to_nwb/watters/trials_interface.py
@@ -180,13 +180,16 @@ def column_descriptions(self):
                 "Response position for each trial. This differs from "
                 "closed_loop_response_position in that this is calculated "
                 "post-hoc from high-resolution eye tracking data, hence is "
-                "more accurate."
+                "more accurate. Note that unlike "
+                "closed_loop_response_position, this may be inconsistent with "
+                "reward delivery."
             ),
             "response_time": (
                 "Response time for each trial. This differs from "
                 "closed_loop_response_time in that this is calculated post-hoc "
                 "from high-resolution eye tracking data, hence is more "
-                "accurate."
+                "accurate. Note that unlike closed_loop_response_time, this "
+                "may be inconsistent with reward delivery."
             ),
         }