catalystneuro · pauladkisson · Dec 18, 2023 · Dec 7, 2023 · Dec 12, 2023 · Dec 12, 2023
diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basedattainterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basedattainterface.py
@@ -1,8 +1,10 @@
 """Primary class for handling metadata non-specific to any other DataInterfaces."""
-from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface
-from neuroconv.utils import load_dict_from_file
+from pathlib import Path
+
 import pandas as pd
 import numpy as np
+from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface
+from neuroconv.utils import load_dict_from_file
 
 
 class BaseDattaInterface(BaseTemporalAlignmentInterface):
@@ -24,10 +26,13 @@ def get_metadata(self) -> dict:
         metadata["Subject"]["subject_id"] = session_metadata["subject_id"]
         metadata["Subject"]["sex"] = subject_metadata["sex"]
 
-        if self.source_data["alignment_path"] is not None:
+        if self.source_data["alignment_path"] is not None and Path(self.source_data["alignment_path"]).exists():
             alignment_df = pd.read_parquet(self.source_data["alignment_path"])
             metadata["Alignment"]["slope"] = alignment_df["slope"].iloc[0]
             metadata["Alignment"]["bias"] = alignment_df["bias"].iloc[0]
+        else:
+            metadata["Alignment"]["slope"] = 1.0
+            metadata["Alignment"]["bias"] = 0.0
 
         return metadata
 

diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py
@@ -1,17 +1,17 @@
 """Base class for converting raw video data."""
-from pynwb import NWBFile
-from datetime import datetime
-from pytz import timezone
-import h5py
+from pathlib import Path
+
 import numpy as np
 import pandas as pd
+from pynwb import NWBFile
 from neuroconv.datainterfaces import VideoInterface
+
 from .basedattainterface import BaseDattaInterface
 from .utils import convert_timestamps_to_seconds
 
 
 class BaseVideoInterface(BaseDattaInterface):
-    """Base video interface for markowitz_gillis_nature_2023 conversion"""
+    """Base video interface for markowitz_gillis_nature_2023 conversion."""
 
     def __init__(
         self,
@@ -51,6 +51,9 @@ def align_timestamps(self, metadata: dict) -> np.ndarray:
     def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
         timestamps = self.align_timestamps(metadata=metadata)
 
+        if not Path(self.source_data["data_path"]).exists():
+            return
+
         video_interface = VideoInterface(file_paths=[self.source_data["data_path"]], verbose=True)
         video_interface.set_aligned_timestamps(aligned_timestamps=[timestamps])
         video_interface.add_to_nwbfile(

diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py
@@ -1,4 +1,4 @@
-import traceback
+"""Convert the entire dataset."""
 import json
 from pathlib import Path
 from typing import Union
@@ -71,7 +71,7 @@ def get_all_processed_uuids(
     all_processed_uuids = unique_photometry_uuids | unique_reinforcement_uuids | unique_velocity_uuids
 
     with open(file=uuid_file_path, mode="w") as io:
-        json.dump(obj=list(all_processed_uuids), fp=io)
+        json.dump(obj=list(all_processed_uuids), fp=io, indent=4)
     return all_processed_uuids
 
 
@@ -80,13 +80,14 @@ def dataset_to_nwb(
     processed_path: Union[str, Path],
     raw_dir_path: Union[str, Path],
     output_dir_path: Union[str, Path],
-    skip_sessions: set,
-    number_of_jobs: int,
+    skip_sessions: Union[set, None] = None,
+    number_of_jobs: int = 1,
     num_sessions_per_experiment: int = None,
 ):
     processed_path = Path(processed_path)
     raw_dir_path = Path(raw_dir_path)
     output_dir_path = Path(output_dir_path)
+    skip_sessions = skip_sessions or set()
 
     log_folder_path = output_dir_path.parent / "logs"
     log_folder_path.mkdir(exist_ok=True)
@@ -101,7 +102,9 @@ def dataset_to_nwb(
         for folder in raw_dir_path.iterdir()
         if folder.is_dir() and folder.name not in skip_experiments and folder.name.startswith("_")
     ]
-    for experimental_folder in tqdm(iterable=experimental_folders, position=0, description="Converting experiments..."):
+    for experimental_folder in tqdm(
+        iterable=experimental_folders, position=0, desc="Converting experiments...", leave=False
+    ):
         experiment_type = folder_name_to_experiment_type[experimental_folder.name]
         session_folders = [
             folder for folder in experimental_folder.iterdir() if folder.is_dir() and folder.name not in skip_sessions
@@ -143,52 +146,30 @@ def dataset_to_nwb(
                     break
 
             parallel_iterable = tqdm(
-                iterable=as_completed(futures), position=1, description="Converting sessionsin parallel..."
+                iterable=as_completed(futures),
+                total=len(futures),
+                position=1,
+                desc="Converting sessions in parallel...",
+                leave=False,
             )
             for _ in parallel_iterable:
                 pass
 
 
 if __name__ == "__main__":
-    number_of_jobs = 4
+    number_of_jobs = 1
 
     processed_path = Path("E:/Datta/dopamine-reinforces-spontaneous-behavior")
     raw_dir_path = Path("E:/Datta")
-    output_dir_path = Path("E:/datta_output/files")
+    output_dir_path = Path("F:/Datta/nwbfiles")
 
     skip_experiments = {
         "keypoint",  # no proc folder for keypoints
     }
-    temporary_skip_sessions = {
-        "session_20210420113646-974717",  # _aggregate_results_arhmm_photometry_excitation_pulsed_01: missing everything except depth video
-        "session_20210309134748-687283",  # _aggregate_results_arhmm_excitation_03: missing everything except depth video
-        "session_20210224083612-947426",  # _aggregate_results_arhmm_excitation_03: missing proc folder
-        "session_20210224094428-535503",  # _aggregate_results_arhmm_excitation_03: missing proc folder
-        "session_20210309120607-939403",  # _aggregate_results_arhmm_excitation_03: proc folder empty
-        "session_20201109130417-162983",  # _aggregate_results_arhmm_excitation_01: proc folder empty
-        "session_20220308114215-760303",  # _aggregate_results_arhmm_scalar_03: missing proc folder
-        "session_20211217102637-612299",  # _aggregate_results_arhmm_photometry_06: missing everything except ir video
-        "session_20211202155132-245700",  # _aggregate_results_arhmm_photometry_06: missing everything except ir video
-        "session_20210128093041-475933",  # _aggregate_results_arhmm_photometry_02: missing everything except ir video
-        "session_20210215185110-281693",  # _aggregate_results_arhmm_photometry_02: missing everything except ir video
-        "session_20210208173229-833584",  # _aggregate_results_arhmm_photometry_02: missing everything except ir video
-        "session_20210201115439-569392",  # _aggregate_results_arhmm_photometry_02: missing everything except ir video
-        "session_20200729112540-313279",  # _aggregate_results_arhmm_07: missing everything except depth video
-        "session_20200810085750-497237",  # _aggregate_results_arhmm_07: missing everything except depth video
-        "session_20200730090228-985303",  # _aggregate_results_arhmm_07: missing everything except depth video
-        "session_20201207093653-476370",  # _aggregate_results_arhmm_excitation_02: missing everything except depth video
-        "session_20210426143230-310843",  # _aggregate_results_arhmm_09: missing everything except depth video
-        "session_20210429135801-758690",  # _aggregate_results_arhmm_09: missing everything except depth video
-        "session_20191111130454-333065",  # _aggregate_results_arhmm_05: missing proc folder
-        "session_20191111130847-263894",  # _aggregate_results_arhmm_05: missing proc folder
-        "session_20200720110309-817092",
-        "session_20210115130943-880998",
-    }
     dataset_to_nwb(
         processed_path=processed_path,
         raw_dir_path=raw_dir_path,
         output_dir_path=output_dir_path,
-        skip_sessions=temporary_skip_sessions,
         number_of_jobs=number_of_jobs,
-        num_sessions_per_experiment=1,
+        # num_sessions_per_experiment=1,
     )
diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py
@@ -7,7 +7,6 @@
 from neuroconv.utils import dict_deep_update, load_dict_from_file
 from pynwb import NWBHDF5IO
 
-from datta_lab_to_nwb.markowitz_gillis_nature_2023.postconversion import reproduce_fig1d
 from datta_lab_to_nwb.markowitz_gillis_nature_2023.nwbconverter import DattaNWBConverter
 
 
@@ -52,7 +51,11 @@ def session_to_nwb(
         output_dir_path = output_dir_path / "nwb_stub"
     output_dir_path.mkdir(parents=True, exist_ok=True)
     session_id = f"{experiment_type}-{session_uuid}"
+
     nwbfile_path = output_dir_path / f"{session_id}.nwb"
+    if nwbfile_path.exists():
+        return
+
     photometry_path = processed_path / "dlight_raw_data/dlight_photometry_processed_full.parquet"
     if experiment_type == "velocity-modulation":
         optoda_path = processed_path / "optoda_raw_data/closed_loop_behavior_velocity_conditioned.parquet"
@@ -113,22 +116,28 @@ def session_to_nwb(
         conversion_options["BehavioralSyllable"] = dict(reinforcement=True)
         behavioral_syllable_path = optoda_path
     if "photometry" in session_metadata.keys():
-        tdt_path = list(raw_path.glob("tdt_data*.dat"))[0]
-        tdt_metadata_path = list(raw_path.glob("tdt_data*.json"))[0]
         ir_path = raw_path / "ir.avi"
         source_data["FiberPhotometry"] = dict(
             file_path=str(photometry_path),
-            tdt_path=str(tdt_path),
-            tdt_metadata_path=str(tdt_metadata_path),
             depth_timestamp_path=str(depth_ts_path),
             session_metadata_path=str(session_metadata_path),
             subject_metadata_path=str(subject_metadata_path),
             session_uuid=session_uuid,
             session_id=session_id,
             alignment_path=str(alignment_path),
         )
+
+        tdt_paths = list(raw_path.glob("tdt_data*.dat"))
+        if any(tdt_paths):
+            source_data["FiberPhotometry"].update(tdt_path=str(tdt_paths[0]))
+        tdt_metadata_paths = list(raw_path.glob("tdt_data*.json"))
+        if any(tdt_metadata_paths):
+            source_data["FiberPhotometry"].update(tdt_metadata_path=str(tdt_metadata_paths[0]))
+
         conversion_options["FiberPhotometry"] = {}
-        behavioral_syllable_path = photometry_path  # Note: if photometry and optogenetics are both present, photometry is used for syllable data bc it is quicker to load
+        behavioral_syllable_path = photometry_path
+        # Note: if photometry and optogenetics are both present
+        # photometry is used for syllable data bc it is quicker to load
         source_data["IRVideo"] = dict(
             data_path=str(ir_path),
             timestamp_path=str(depth_ts_path),
@@ -212,7 +221,7 @@ def session_to_nwb(
             processed_only = False
         for example_session in example_sessions:
             session_to_nwb(
-                session_uuid=example_session,
+                session_uuid="0fc7bbac-adee-46d8-897a-213a56983ebe",
                 processed_path=processed_path,
                 raw_path=experiment_type2raw_path[experiment_type],
                 output_dir_path=output_dir_path,
@@ -222,7 +231,7 @@ def session_to_nwb(
             )
     with NWBHDF5IO(output_dir_path / f"reinforcement-photometry-{raw_rp_example}.nwb", "r") as io:
         nwbfile = io.read()
-        print(nwbfile)
+
     # nwbfile_path = output_dir_path / f"{figure1d_example}.nwb"
     # paper_metadata_path = Path(__file__).parent / "markowitz_gillis_nature_2023_metadata.yaml"
     # reproduce_figures.reproduce_fig1d(nwbfile_path, paper_metadata_path)
diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py
@@ -1,27 +1,29 @@
 """Primary class for converting fiber photometry data (dLight fluorescence)."""
-# Standard Scientific Python
+from typing import Union
+
 import pandas as pd
 import numpy as np
 
-# NWB Ecosystem
 from pynwb.file import NWBFile
 from pynwb.ophys import RoiResponseSeries
-from .rawfiberphotometryinterface import RawFiberPhotometryInterface
 from neuroconv.tools import nwb_helpers
+from neuroconv.utils import FilePathType
 from hdmf.backends.hdf5.h5_utils import H5DataIO
 
+from .rawfiberphotometryinterface import RawFiberPhotometryInterface
+
 
 class FiberPhotometryInterface(RawFiberPhotometryInterface):
     def __init__(
         self,
         file_path: str,
-        tdt_path: str,
-        tdt_metadata_path: str,
         depth_timestamp_path: str,
         session_uuid: str,
         session_id: str,
         session_metadata_path: str,
         subject_metadata_path: str,
+        tdt_path: Union[FilePathType, None] = None,
+        tdt_metadata_path: Union[FilePathType, None] = None,
         alignment_path: str = None,
     ):
         # This should load the data lazily and prepare variables you need
@@ -72,6 +74,10 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
             filters=[("uuid", "==", self.source_data["session_uuid"])],
         )
         notnan = pd.notnull(session_df.signal_dff)
+
+        if not any(notnan):
+            return
+
         signal_series = RoiResponseSeries(
             name="SignalDfOverF",
             description="The ΔF/F from the blue light excitation (470nm) corresponding to the dopamine signal.",

diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py
@@ -1,7 +1,5 @@
 """Primary class for converting MoSeq Extraction data."""
 from pynwb import NWBFile
-from datetime import datetime
-from pytz import timezone
 import h5py
 import numpy as np
 from hdmf.backends.hdf5.h5_utils import H5DataIO
@@ -19,7 +17,7 @@
 
 
 class MoseqExtractInterface(BaseDattaInterface):
-    """Moseq interface for markowitz_gillis_nature_2023 conversion"""
+    """Moseq interface for markowitz_gillis_nature_2023 conversion."""
 
     def __init__(
         self,