Skip to content

Commit

Permalink
Merge pull request #101 from catalystneuro/more_mods
Browse files Browse the repository at this point in the history
Code used to run conversion
  • Loading branch information
pauladkisson authored Dec 18, 2023
2 parents a13c9bd + 99b9fe5 commit 886a8be
Show file tree
Hide file tree
Showing 7 changed files with 172 additions and 123 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Primary class for handling metadata non-specific to any other DataInterfaces."""
from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface
from neuroconv.utils import load_dict_from_file
from pathlib import Path

import pandas as pd
import numpy as np
from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface
from neuroconv.utils import load_dict_from_file


class BaseDattaInterface(BaseTemporalAlignmentInterface):
Expand All @@ -24,10 +26,13 @@ def get_metadata(self) -> dict:
metadata["Subject"]["subject_id"] = session_metadata["subject_id"]
metadata["Subject"]["sex"] = subject_metadata["sex"]

if self.source_data["alignment_path"] is not None:
if self.source_data["alignment_path"] is not None and Path(self.source_data["alignment_path"]).exists():
alignment_df = pd.read_parquet(self.source_data["alignment_path"])
metadata["Alignment"]["slope"] = alignment_df["slope"].iloc[0]
metadata["Alignment"]["bias"] = alignment_df["bias"].iloc[0]
else:
metadata["Alignment"]["slope"] = 1.0
metadata["Alignment"]["bias"] = 0.0

return metadata

Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
"""Base class for converting raw video data."""
from pynwb import NWBFile
from datetime import datetime
from pytz import timezone
import h5py
from pathlib import Path

import numpy as np
import pandas as pd
from pynwb import NWBFile
from neuroconv.datainterfaces import VideoInterface

from .basedattainterface import BaseDattaInterface
from .utils import convert_timestamps_to_seconds


class BaseVideoInterface(BaseDattaInterface):
"""Base video interface for markowitz_gillis_nature_2023 conversion"""
"""Base video interface for markowitz_gillis_nature_2023 conversion."""

def __init__(
self,
Expand Down Expand Up @@ -51,6 +51,9 @@ def align_timestamps(self, metadata: dict) -> np.ndarray:
def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
timestamps = self.align_timestamps(metadata=metadata)

if not Path(self.source_data["data_path"]).exists():
return

video_interface = VideoInterface(file_paths=[self.source_data["data_path"]], verbose=True)
video_interface.set_aligned_timestamps(aligned_timestamps=[timestamps])
video_interface.add_to_nwbfile(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import traceback
"""Convert the entire dataset."""
import json
from pathlib import Path
from typing import Union
Expand Down Expand Up @@ -71,7 +71,7 @@ def get_all_processed_uuids(
all_processed_uuids = unique_photometry_uuids | unique_reinforcement_uuids | unique_velocity_uuids

with open(file=uuid_file_path, mode="w") as io:
json.dump(obj=list(all_processed_uuids), fp=io)
json.dump(obj=list(all_processed_uuids), fp=io, indent=4)
return all_processed_uuids


Expand All @@ -80,13 +80,14 @@ def dataset_to_nwb(
processed_path: Union[str, Path],
raw_dir_path: Union[str, Path],
output_dir_path: Union[str, Path],
skip_sessions: set,
number_of_jobs: int,
skip_sessions: Union[set, None] = None,
number_of_jobs: int = 1,
num_sessions_per_experiment: int = None,
):
processed_path = Path(processed_path)
raw_dir_path = Path(raw_dir_path)
output_dir_path = Path(output_dir_path)
skip_sessions = skip_sessions or set()

log_folder_path = output_dir_path.parent / "logs"
log_folder_path.mkdir(exist_ok=True)
Expand All @@ -101,7 +102,9 @@ def dataset_to_nwb(
for folder in raw_dir_path.iterdir()
if folder.is_dir() and folder.name not in skip_experiments and folder.name.startswith("_")
]
for experimental_folder in tqdm(iterable=experimental_folders, position=0, description="Converting experiments..."):
for experimental_folder in tqdm(
iterable=experimental_folders, position=0, desc="Converting experiments...", leave=False
):
experiment_type = folder_name_to_experiment_type[experimental_folder.name]
session_folders = [
folder for folder in experimental_folder.iterdir() if folder.is_dir() and folder.name not in skip_sessions
Expand Down Expand Up @@ -143,52 +146,30 @@ def dataset_to_nwb(
break

parallel_iterable = tqdm(
iterable=as_completed(futures), position=1, description="Converting sessionsin parallel..."
iterable=as_completed(futures),
total=len(futures),
position=1,
desc="Converting sessions in parallel...",
leave=False,
)
for _ in parallel_iterable:
pass


if __name__ == "__main__":
number_of_jobs = 4
number_of_jobs = 1

processed_path = Path("E:/Datta/dopamine-reinforces-spontaneous-behavior")
raw_dir_path = Path("E:/Datta")
output_dir_path = Path("E:/datta_output/files")
output_dir_path = Path("F:/Datta/nwbfiles")

skip_experiments = {
"keypoint", # no proc folder for keypoints
}
temporary_skip_sessions = {
"session_20210420113646-974717", # _aggregate_results_arhmm_photometry_excitation_pulsed_01: missing everything except depth video
"session_20210309134748-687283", # _aggregate_results_arhmm_excitation_03: missing everything except depth video
"session_20210224083612-947426", # _aggregate_results_arhmm_excitation_03: missing proc folder
"session_20210224094428-535503", # _aggregate_results_arhmm_excitation_03: missing proc folder
"session_20210309120607-939403", # _aggregate_results_arhmm_excitation_03: proc folder empty
"session_20201109130417-162983", # _aggregate_results_arhmm_excitation_01: proc folder empty
"session_20220308114215-760303", # _aggregate_results_arhmm_scalar_03: missing proc folder
"session_20211217102637-612299", # _aggregate_results_arhmm_photometry_06: missing everything except ir video
"session_20211202155132-245700", # _aggregate_results_arhmm_photometry_06: missing everything except ir video
"session_20210128093041-475933", # _aggregate_results_arhmm_photometry_02: missing everything except ir video
"session_20210215185110-281693", # _aggregate_results_arhmm_photometry_02: missing everything except ir video
"session_20210208173229-833584", # _aggregate_results_arhmm_photometry_02: missing everything except ir video
"session_20210201115439-569392", # _aggregate_results_arhmm_photometry_02: missing everything except ir video
"session_20200729112540-313279", # _aggregate_results_arhmm_07: missing everything except depth video
"session_20200810085750-497237", # _aggregate_results_arhmm_07: missing everything except depth video
"session_20200730090228-985303", # _aggregate_results_arhmm_07: missing everything except depth video
"session_20201207093653-476370", # _aggregate_results_arhmm_excitation_02: missing everything except depth video
"session_20210426143230-310843", # _aggregate_results_arhmm_09: missing everything except depth video
"session_20210429135801-758690", # _aggregate_results_arhmm_09: missing everything except depth video
"session_20191111130454-333065", # _aggregate_results_arhmm_05: missing proc folder
"session_20191111130847-263894", # _aggregate_results_arhmm_05: missing proc folder
"session_20200720110309-817092",
"session_20210115130943-880998",
}
dataset_to_nwb(
processed_path=processed_path,
raw_dir_path=raw_dir_path,
output_dir_path=output_dir_path,
skip_sessions=temporary_skip_sessions,
number_of_jobs=number_of_jobs,
num_sessions_per_experiment=1,
# num_sessions_per_experiment=1,
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from neuroconv.utils import dict_deep_update, load_dict_from_file
from pynwb import NWBHDF5IO

from datta_lab_to_nwb.markowitz_gillis_nature_2023.postconversion import reproduce_fig1d
from datta_lab_to_nwb.markowitz_gillis_nature_2023.nwbconverter import DattaNWBConverter


Expand Down Expand Up @@ -52,7 +51,11 @@ def session_to_nwb(
output_dir_path = output_dir_path / "nwb_stub"
output_dir_path.mkdir(parents=True, exist_ok=True)
session_id = f"{experiment_type}-{session_uuid}"

nwbfile_path = output_dir_path / f"{session_id}.nwb"
if nwbfile_path.exists():
return

photometry_path = processed_path / "dlight_raw_data/dlight_photometry_processed_full.parquet"
if experiment_type == "velocity-modulation":
optoda_path = processed_path / "optoda_raw_data/closed_loop_behavior_velocity_conditioned.parquet"
Expand Down Expand Up @@ -113,22 +116,28 @@ def session_to_nwb(
conversion_options["BehavioralSyllable"] = dict(reinforcement=True)
behavioral_syllable_path = optoda_path
if "photometry" in session_metadata.keys():
tdt_path = list(raw_path.glob("tdt_data*.dat"))[0]
tdt_metadata_path = list(raw_path.glob("tdt_data*.json"))[0]
ir_path = raw_path / "ir.avi"
source_data["FiberPhotometry"] = dict(
file_path=str(photometry_path),
tdt_path=str(tdt_path),
tdt_metadata_path=str(tdt_metadata_path),
depth_timestamp_path=str(depth_ts_path),
session_metadata_path=str(session_metadata_path),
subject_metadata_path=str(subject_metadata_path),
session_uuid=session_uuid,
session_id=session_id,
alignment_path=str(alignment_path),
)

tdt_paths = list(raw_path.glob("tdt_data*.dat"))
if any(tdt_paths):
source_data["FiberPhotometry"].update(tdt_path=str(tdt_paths[0]))
tdt_metadata_paths = list(raw_path.glob("tdt_data*.json"))
if any(tdt_metadata_paths):
source_data["FiberPhotometry"].update(tdt_metadata_path=str(tdt_metadata_paths[0]))

conversion_options["FiberPhotometry"] = {}
behavioral_syllable_path = photometry_path # Note: if photometry and optogenetics are both present, photometry is used for syllable data bc it is quicker to load
behavioral_syllable_path = photometry_path
# Note: if photometry and optogenetics are both present
# photometry is used for syllable data bc it is quicker to load
source_data["IRVideo"] = dict(
data_path=str(ir_path),
timestamp_path=str(depth_ts_path),
Expand Down Expand Up @@ -212,7 +221,7 @@ def session_to_nwb(
processed_only = False
for example_session in example_sessions:
session_to_nwb(
session_uuid=example_session,
session_uuid="0fc7bbac-adee-46d8-897a-213a56983ebe",
processed_path=processed_path,
raw_path=experiment_type2raw_path[experiment_type],
output_dir_path=output_dir_path,
Expand All @@ -222,7 +231,7 @@ def session_to_nwb(
)
with NWBHDF5IO(output_dir_path / f"reinforcement-photometry-{raw_rp_example}.nwb", "r") as io:
nwbfile = io.read()
print(nwbfile)

# nwbfile_path = output_dir_path / f"{figure1d_example}.nwb"
# paper_metadata_path = Path(__file__).parent / "markowitz_gillis_nature_2023_metadata.yaml"
# reproduce_figures.reproduce_fig1d(nwbfile_path, paper_metadata_path)
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
"""Primary class for converting fiber photometry data (dLight fluorescence)."""
# Standard Scientific Python
from typing import Union

import pandas as pd
import numpy as np

# NWB Ecosystem
from pynwb.file import NWBFile
from pynwb.ophys import RoiResponseSeries
from .rawfiberphotometryinterface import RawFiberPhotometryInterface
from neuroconv.tools import nwb_helpers
from neuroconv.utils import FilePathType
from hdmf.backends.hdf5.h5_utils import H5DataIO

from .rawfiberphotometryinterface import RawFiberPhotometryInterface


class FiberPhotometryInterface(RawFiberPhotometryInterface):
def __init__(
self,
file_path: str,
tdt_path: str,
tdt_metadata_path: str,
depth_timestamp_path: str,
session_uuid: str,
session_id: str,
session_metadata_path: str,
subject_metadata_path: str,
tdt_path: Union[FilePathType, None] = None,
tdt_metadata_path: Union[FilePathType, None] = None,
alignment_path: str = None,
):
# This should load the data lazily and prepare variables you need
Expand Down Expand Up @@ -72,6 +74,10 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
filters=[("uuid", "==", self.source_data["session_uuid"])],
)
notnan = pd.notnull(session_df.signal_dff)

if not any(notnan):
return

signal_series = RoiResponseSeries(
name="SignalDfOverF",
description="The ΔF/F from the blue light excitation (470nm) corresponding to the dopamine signal.",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""Primary class for converting MoSeq Extraction data."""
from pynwb import NWBFile
from datetime import datetime
from pytz import timezone
import h5py
import numpy as np
from hdmf.backends.hdf5.h5_utils import H5DataIO
Expand All @@ -19,7 +17,7 @@


class MoseqExtractInterface(BaseDattaInterface):
"""Moseq interface for markowitz_gillis_nature_2023 conversion"""
"""Moseq interface for markowitz_gillis_nature_2023 conversion."""

def __init__(
self,
Expand Down
Loading

0 comments on commit 886a8be

Please sign in to comment.