Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code used to run conversion #101

Merged
merged 5 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Primary class for handling metadata non-specific to any other DataInterfaces."""
from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface
from neuroconv.utils import load_dict_from_file
from pathlib import Path

import pandas as pd
import numpy as np
from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface
from neuroconv.utils import load_dict_from_file


class BaseDattaInterface(BaseTemporalAlignmentInterface):
Expand All @@ -24,10 +26,13 @@ def get_metadata(self) -> dict:
metadata["Subject"]["subject_id"] = session_metadata["subject_id"]
metadata["Subject"]["sex"] = subject_metadata["sex"]

if self.source_data["alignment_path"] is not None:
if self.source_data["alignment_path"] is not None and Path(self.source_data["alignment_path"]).exists():
alignment_df = pd.read_parquet(self.source_data["alignment_path"])
metadata["Alignment"]["slope"] = alignment_df["slope"].iloc[0]
metadata["Alignment"]["bias"] = alignment_df["bias"].iloc[0]
else:
metadata["Alignment"]["slope"] = 1.0
metadata["Alignment"]["bias"] = 0.0

return metadata

Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
"""Base class for converting raw video data."""
from pynwb import NWBFile
from datetime import datetime
from pytz import timezone
import h5py
from pathlib import Path

import numpy as np
import pandas as pd
from pynwb import NWBFile
from neuroconv.datainterfaces import VideoInterface

from .basedattainterface import BaseDattaInterface
from .utils import convert_timestamps_to_seconds


class BaseVideoInterface(BaseDattaInterface):
"""Base video interface for markowitz_gillis_nature_2023 conversion"""
"""Base video interface for markowitz_gillis_nature_2023 conversion."""

def __init__(
self,
Expand Down Expand Up @@ -51,6 +51,9 @@ def align_timestamps(self, metadata: dict) -> np.ndarray:
def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
timestamps = self.align_timestamps(metadata=metadata)

if not Path(self.source_data["data_path"]).exists():
return

video_interface = VideoInterface(file_paths=[self.source_data["data_path"]], verbose=True)
video_interface.set_aligned_timestamps(aligned_timestamps=[timestamps])
video_interface.add_to_nwbfile(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import traceback
"""Convert the entire dataset."""
import json
from pathlib import Path
from typing import Union
Expand Down Expand Up @@ -71,7 +71,7 @@ def get_all_processed_uuids(
all_processed_uuids = unique_photometry_uuids | unique_reinforcement_uuids | unique_velocity_uuids

with open(file=uuid_file_path, mode="w") as io:
json.dump(obj=list(all_processed_uuids), fp=io)
json.dump(obj=list(all_processed_uuids), fp=io, indent=4)
return all_processed_uuids


Expand All @@ -80,13 +80,14 @@ def dataset_to_nwb(
processed_path: Union[str, Path],
raw_dir_path: Union[str, Path],
output_dir_path: Union[str, Path],
skip_sessions: set,
number_of_jobs: int,
skip_sessions: Union[set, None] = None,
number_of_jobs: int = 1,
num_sessions_per_experiment: int = None,
):
processed_path = Path(processed_path)
raw_dir_path = Path(raw_dir_path)
output_dir_path = Path(output_dir_path)
skip_sessions = skip_sessions or set()

log_folder_path = output_dir_path.parent / "logs"
log_folder_path.mkdir(exist_ok=True)
Expand All @@ -101,7 +102,9 @@ def dataset_to_nwb(
for folder in raw_dir_path.iterdir()
if folder.is_dir() and folder.name not in skip_experiments and folder.name.startswith("_")
]
for experimental_folder in tqdm(iterable=experimental_folders, position=0, description="Converting experiments..."):
for experimental_folder in tqdm(
iterable=experimental_folders, position=0, desc="Converting experiments...", leave=False
):
experiment_type = folder_name_to_experiment_type[experimental_folder.name]
session_folders = [
folder for folder in experimental_folder.iterdir() if folder.is_dir() and folder.name not in skip_sessions
Expand Down Expand Up @@ -143,52 +146,30 @@ def dataset_to_nwb(
break

parallel_iterable = tqdm(
iterable=as_completed(futures), position=1, description="Converting sessionsin parallel..."
iterable=as_completed(futures),
total=len(futures),
position=1,
desc="Converting sessions in parallel...",
leave=False,
)
for _ in parallel_iterable:
pass


if __name__ == "__main__":
number_of_jobs = 4
number_of_jobs = 1

processed_path = Path("E:/Datta/dopamine-reinforces-spontaneous-behavior")
raw_dir_path = Path("E:/Datta")
output_dir_path = Path("E:/datta_output/files")
output_dir_path = Path("F:/Datta/nwbfiles")

skip_experiments = {
"keypoint", # no proc folder for keypoints
}
temporary_skip_sessions = {
"session_20210420113646-974717", # _aggregate_results_arhmm_photometry_excitation_pulsed_01: missing everything except depth video
"session_20210309134748-687283", # _aggregate_results_arhmm_excitation_03: missing everything except depth video
"session_20210224083612-947426", # _aggregate_results_arhmm_excitation_03: missing proc folder
"session_20210224094428-535503", # _aggregate_results_arhmm_excitation_03: missing proc folder
"session_20210309120607-939403", # _aggregate_results_arhmm_excitation_03: proc folder empty
"session_20201109130417-162983", # _aggregate_results_arhmm_excitation_01: proc folder empty
"session_20220308114215-760303", # _aggregate_results_arhmm_scalar_03: missing proc folder
"session_20211217102637-612299", # _aggregate_results_arhmm_photometry_06: missing everything except ir video
"session_20211202155132-245700", # _aggregate_results_arhmm_photometry_06: missing everything except ir video
"session_20210128093041-475933", # _aggregate_results_arhmm_photometry_02: missing everything except ir video
"session_20210215185110-281693", # _aggregate_results_arhmm_photometry_02: missing everything except ir video
"session_20210208173229-833584", # _aggregate_results_arhmm_photometry_02: missing everything except ir video
"session_20210201115439-569392", # _aggregate_results_arhmm_photometry_02: missing everything except ir video
"session_20200729112540-313279", # _aggregate_results_arhmm_07: missing everything except depth video
"session_20200810085750-497237", # _aggregate_results_arhmm_07: missing everything except depth video
"session_20200730090228-985303", # _aggregate_results_arhmm_07: missing everything except depth video
"session_20201207093653-476370", # _aggregate_results_arhmm_excitation_02: missing everything except depth video
"session_20210426143230-310843", # _aggregate_results_arhmm_09: missing everything except depth video
"session_20210429135801-758690", # _aggregate_results_arhmm_09: missing everything except depth video
"session_20191111130454-333065", # _aggregate_results_arhmm_05: missing proc folder
"session_20191111130847-263894", # _aggregate_results_arhmm_05: missing proc folder
"session_20200720110309-817092",
"session_20210115130943-880998",
}
dataset_to_nwb(
processed_path=processed_path,
raw_dir_path=raw_dir_path,
output_dir_path=output_dir_path,
skip_sessions=temporary_skip_sessions,
number_of_jobs=number_of_jobs,
num_sessions_per_experiment=1,
# num_sessions_per_experiment=1,
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from neuroconv.utils import dict_deep_update, load_dict_from_file
from pynwb import NWBHDF5IO

from datta_lab_to_nwb.markowitz_gillis_nature_2023.postconversion import reproduce_fig1d
from datta_lab_to_nwb.markowitz_gillis_nature_2023.nwbconverter import DattaNWBConverter


Expand Down Expand Up @@ -52,7 +51,11 @@ def session_to_nwb(
output_dir_path = output_dir_path / "nwb_stub"
output_dir_path.mkdir(parents=True, exist_ok=True)
session_id = f"{experiment_type}-{session_uuid}"

nwbfile_path = output_dir_path / f"{session_id}.nwb"
if nwbfile_path.exists():
return

photometry_path = processed_path / "dlight_raw_data/dlight_photometry_processed_full.parquet"
if experiment_type == "velocity-modulation":
optoda_path = processed_path / "optoda_raw_data/closed_loop_behavior_velocity_conditioned.parquet"
Expand Down Expand Up @@ -113,22 +116,28 @@ def session_to_nwb(
conversion_options["BehavioralSyllable"] = dict(reinforcement=True)
behavioral_syllable_path = optoda_path
if "photometry" in session_metadata.keys():
tdt_path = list(raw_path.glob("tdt_data*.dat"))[0]
tdt_metadata_path = list(raw_path.glob("tdt_data*.json"))[0]
ir_path = raw_path / "ir.avi"
source_data["FiberPhotometry"] = dict(
file_path=str(photometry_path),
tdt_path=str(tdt_path),
tdt_metadata_path=str(tdt_metadata_path),
depth_timestamp_path=str(depth_ts_path),
session_metadata_path=str(session_metadata_path),
subject_metadata_path=str(subject_metadata_path),
session_uuid=session_uuid,
session_id=session_id,
alignment_path=str(alignment_path),
)

tdt_paths = list(raw_path.glob("tdt_data*.dat"))
if any(tdt_paths):
source_data["FiberPhotometry"].update(tdt_path=str(tdt_paths[0]))
tdt_metadata_paths = list(raw_path.glob("tdt_data*.json"))
if any(tdt_metadata_paths):
source_data["FiberPhotometry"].update(tdt_metadata_path=str(tdt_metadata_paths[0]))

conversion_options["FiberPhotometry"] = {}
behavioral_syllable_path = photometry_path # Note: if photometry and optogenetics are both present, photometry is used for syllable data bc it is quicker to load
behavioral_syllable_path = photometry_path
# Note: if photometry and optogenetics are both present
# photometry is used for syllable data bc it is quicker to load
source_data["IRVideo"] = dict(
data_path=str(ir_path),
timestamp_path=str(depth_ts_path),
Expand Down Expand Up @@ -212,7 +221,7 @@ def session_to_nwb(
processed_only = False
for example_session in example_sessions:
session_to_nwb(
session_uuid=example_session,
session_uuid="0fc7bbac-adee-46d8-897a-213a56983ebe",
processed_path=processed_path,
raw_path=experiment_type2raw_path[experiment_type],
output_dir_path=output_dir_path,
Expand All @@ -222,7 +231,7 @@ def session_to_nwb(
)
with NWBHDF5IO(output_dir_path / f"reinforcement-photometry-{raw_rp_example}.nwb", "r") as io:
nwbfile = io.read()
print(nwbfile)

# nwbfile_path = output_dir_path / f"{figure1d_example}.nwb"
# paper_metadata_path = Path(__file__).parent / "markowitz_gillis_nature_2023_metadata.yaml"
# reproduce_figures.reproduce_fig1d(nwbfile_path, paper_metadata_path)
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
"""Primary class for converting fiber photometry data (dLight fluorescence)."""
# Standard Scientific Python
from typing import Union

import pandas as pd
import numpy as np

# NWB Ecosystem
from pynwb.file import NWBFile
from pynwb.ophys import RoiResponseSeries
from .rawfiberphotometryinterface import RawFiberPhotometryInterface
from neuroconv.tools import nwb_helpers
from neuroconv.utils import FilePathType
from hdmf.backends.hdf5.h5_utils import H5DataIO

from .rawfiberphotometryinterface import RawFiberPhotometryInterface


class FiberPhotometryInterface(RawFiberPhotometryInterface):
def __init__(
self,
file_path: str,
tdt_path: str,
tdt_metadata_path: str,
depth_timestamp_path: str,
session_uuid: str,
session_id: str,
session_metadata_path: str,
subject_metadata_path: str,
tdt_path: Union[FilePathType, None] = None,
tdt_metadata_path: Union[FilePathType, None] = None,
alignment_path: str = None,
):
# This should load the data lazily and prepare variables you need
Expand Down Expand Up @@ -72,6 +74,10 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None:
filters=[("uuid", "==", self.source_data["session_uuid"])],
)
notnan = pd.notnull(session_df.signal_dff)

if not any(notnan):
return

signal_series = RoiResponseSeries(
name="SignalDfOverF",
description="The ΔF/F from the blue light excitation (470nm) corresponding to the dopamine signal.",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""Primary class for converting MoSeq Extraction data."""
from pynwb import NWBFile
from datetime import datetime
from pytz import timezone
import h5py
import numpy as np
from hdmf.backends.hdf5.h5_utils import H5DataIO
Expand All @@ -19,7 +17,7 @@


class MoseqExtractInterface(BaseDattaInterface):
"""Moseq interface for markowitz_gillis_nature_2023 conversion"""
"""Moseq interface for markowitz_gillis_nature_2023 conversion."""

def __init__(
self,
Expand Down
Loading