From 0ff86c2c2c70b6bf86786151c6e4e03d372c09fa Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Thu, 7 Dec 2023 17:42:35 -0500 Subject: [PATCH 1/4] cleanup and tqdm fix --- .../convert_dataset.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index fece989..0c7fcd8 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -1,4 +1,3 @@ -import traceback import json from pathlib import Path from typing import Union @@ -101,7 +100,9 @@ def dataset_to_nwb( for folder in raw_dir_path.iterdir() if folder.is_dir() and folder.name not in skip_experiments and folder.name.startswith("_") ] - for experimental_folder in tqdm(iterable=experimental_folders, position=0, description="Converting experiments..."): + for experimental_folder in tqdm( + iterable=experimental_folders, position=0, desc="Converting experiments...", leave=False + ): experiment_type = folder_name_to_experiment_type[experimental_folder.name] session_folders = [ folder for folder in experimental_folder.iterdir() if folder.is_dir() and folder.name not in skip_sessions @@ -143,7 +144,11 @@ def dataset_to_nwb( break parallel_iterable = tqdm( - iterable=as_completed(futures), position=1, description="Converting sessionsin parallel..." + iterable=as_completed(futures), + total=len(futures), + position=1, + desc="Converting sessions in parallel...", + leave=False, ) for _ in parallel_iterable: pass @@ -154,7 +159,7 @@ def dataset_to_nwb( processed_path = Path("E:/Datta/dopamine-reinforces-spontaneous-behavior") raw_dir_path = Path("E:/Datta") - output_dir_path = Path("E:/datta_output/files") + output_dir_path = Path("F:/Datta/nwbfiles") skip_experiments = { "keypoint", # no proc folder for keypoints @@ -190,5 +195,5 @@ def dataset_to_nwb( output_dir_path=output_dir_path, skip_sessions=temporary_skip_sessions, number_of_jobs=number_of_jobs, - num_sessions_per_experiment=1, + # num_sessions_per_experiment=1, ) From 3c059f36cb77b4a457c26da2709a1e452766bc51 Mon Sep 17 00:00:00 2001 From: codycbakerphd Date: Tue, 12 Dec 2023 15:54:43 -0500 Subject: [PATCH 2/4] final details --- .../convert_dataset.py | 36 ++++--------------- .../convert_session.py | 11 ++++-- .../moseqextractinterface.py | 4 +-- .../rawfiberphotometryinterface.py | 10 +++--- 4 files changed, 21 insertions(+), 40 deletions(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index 0c7fcd8..57c89e0 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -1,3 +1,4 @@ +"""Convert the entire dataset.""" import json from pathlib import Path from typing import Union @@ -70,7 +71,7 @@ def get_all_processed_uuids( all_processed_uuids = unique_photometry_uuids | unique_reinforcement_uuids | unique_velocity_uuids with open(file=uuid_file_path, mode="w") as io: - json.dump(obj=list(all_processed_uuids), fp=io) + json.dump(obj=list(all_processed_uuids), fp=io, indent=4) return all_processed_uuids @@ -79,13 +80,14 @@ def dataset_to_nwb( processed_path: Union[str, Path], raw_dir_path: Union[str, Path], output_dir_path: Union[str, Path], - skip_sessions: set, - number_of_jobs: int, + skip_sessions: Union[set, None] = None, + number_of_jobs: int = 1, num_sessions_per_experiment: int = None, ): processed_path = Path(processed_path) raw_dir_path = Path(raw_dir_path) output_dir_path = Path(output_dir_path) + skip_sessions = skip_sessions or set() log_folder_path = output_dir_path.parent / "logs" log_folder_path.mkdir(exist_ok=True) @@ -155,7 +157,7 @@ def dataset_to_nwb( if __name__ == "__main__": - number_of_jobs = 4 + number_of_jobs = 1 processed_path = Path("E:/Datta/dopamine-reinforces-spontaneous-behavior") raw_dir_path = Path("E:/Datta") @@ -164,36 +166,10 @@ def dataset_to_nwb( skip_experiments = { "keypoint", # no proc folder for keypoints } - temporary_skip_sessions = { - "session_20210420113646-974717", # _aggregate_results_arhmm_photometry_excitation_pulsed_01: missing everything except depth video - "session_20210309134748-687283", # _aggregate_results_arhmm_excitation_03: missing everything except depth video - "session_20210224083612-947426", # _aggregate_results_arhmm_excitation_03: missing proc folder - "session_20210224094428-535503", # _aggregate_results_arhmm_excitation_03: missing proc folder - "session_20210309120607-939403", # _aggregate_results_arhmm_excitation_03: proc folder empty - "session_20201109130417-162983", # _aggregate_results_arhmm_excitation_01: proc folder empty - "session_20220308114215-760303", # _aggregate_results_arhmm_scalar_03: missing proc folder - "session_20211217102637-612299", # _aggregate_results_arhmm_photometry_06: missing everything except ir video - "session_20211202155132-245700", # _aggregate_results_arhmm_photometry_06: missing everything except ir video - "session_20210128093041-475933", # _aggregate_results_arhmm_photometry_02: missing everything except ir video - "session_20210215185110-281693", # _aggregate_results_arhmm_photometry_02: missing everything except ir video - "session_20210208173229-833584", # _aggregate_results_arhmm_photometry_02: missing everything except ir video - "session_20210201115439-569392", # _aggregate_results_arhmm_photometry_02: missing everything except ir video - "session_20200729112540-313279", # _aggregate_results_arhmm_07: missing everything except depth video - "session_20200810085750-497237", # _aggregate_results_arhmm_07: missing everything except depth video - "session_20200730090228-985303", # _aggregate_results_arhmm_07: missing everything except depth video - "session_20201207093653-476370", # _aggregate_results_arhmm_excitation_02: missing everything except depth video - "session_20210426143230-310843", # _aggregate_results_arhmm_09: missing everything except depth video - "session_20210429135801-758690", # _aggregate_results_arhmm_09: missing everything except depth video - "session_20191111130454-333065", # _aggregate_results_arhmm_05: missing proc folder - "session_20191111130847-263894", # _aggregate_results_arhmm_05: missing proc folder - "session_20200720110309-817092", - "session_20210115130943-880998", - } dataset_to_nwb( processed_path=processed_path, raw_dir_path=raw_dir_path, output_dir_path=output_dir_path, - skip_sessions=temporary_skip_sessions, number_of_jobs=number_of_jobs, # num_sessions_per_experiment=1, ) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py index e7e177f..04958e9 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py @@ -7,7 +7,6 @@ from neuroconv.utils import dict_deep_update, load_dict_from_file from pynwb import NWBHDF5IO -from datta_lab_to_nwb.markowitz_gillis_nature_2023.postconversion import reproduce_fig1d from datta_lab_to_nwb.markowitz_gillis_nature_2023.nwbconverter import DattaNWBConverter @@ -52,7 +51,11 @@ def session_to_nwb( output_dir_path = output_dir_path / "nwb_stub" output_dir_path.mkdir(parents=True, exist_ok=True) session_id = f"{experiment_type}-{session_uuid}" + nwbfile_path = output_dir_path / f"{session_id}.nwb" + if nwbfile_path.exists(): + return + photometry_path = processed_path / "dlight_raw_data/dlight_photometry_processed_full.parquet" if experiment_type == "velocity-modulation": optoda_path = processed_path / "optoda_raw_data/closed_loop_behavior_velocity_conditioned.parquet" @@ -128,7 +131,9 @@ def session_to_nwb( alignment_path=str(alignment_path), ) conversion_options["FiberPhotometry"] = {} - behavioral_syllable_path = photometry_path # Note: if photometry and optogenetics are both present, photometry is used for syllable data bc it is quicker to load + behavioral_syllable_path = photometry_path + # Note: if photometry and optogenetics are both present + # photometry is used for syllable data bc it is quicker to load source_data["IRVideo"] = dict( data_path=str(ir_path), timestamp_path=str(depth_ts_path), @@ -222,7 +227,7 @@ def session_to_nwb( ) with NWBHDF5IO(output_dir_path / f"reinforcement-photometry-{raw_rp_example}.nwb", "r") as io: nwbfile = io.read() - print(nwbfile) + # print(nwbfile) # nwbfile_path = output_dir_path / f"{figure1d_example}.nwb" # paper_metadata_path = Path(__file__).parent / "markowitz_gillis_nature_2023_metadata.yaml" # reproduce_figures.reproduce_fig1d(nwbfile_path, paper_metadata_path) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py index 53556b5..0004478 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py @@ -1,7 +1,5 @@ """Primary class for converting MoSeq Extraction data.""" from pynwb import NWBFile -from datetime import datetime -from pytz import timezone import h5py import numpy as np from hdmf.backends.hdf5.h5_utils import H5DataIO @@ -19,7 +17,7 @@ class MoseqExtractInterface(BaseDattaInterface): - """Moseq interface for markowitz_gillis_nature_2023 conversion""" + """Moseq interface for markowitz_gillis_nature_2023 conversion.""" def __init__( self, diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py index 1fbb2fb..772e769 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py @@ -118,6 +118,8 @@ def align_raw_timestamps(self, metadata: dict) -> np.ndarray: def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: photometry_dict = load_tdt_data(self.source_data["tdt_path"], fs=metadata["FiberPhotometry"]["raw_rate"]) timestamps = self.align_raw_timestamps(metadata=metadata) + ascending_timestamps_indices = np.argsort(timestamps) + raw_photometry = photometry_dict["pmt00"] commanded_signal = photometry_dict["pmt00_x"] commanded_reference = photometry_dict["pmt01_x"] @@ -130,10 +132,10 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: "A 470nm (blue) LED and a 405nM (UV) LED (Mightex) were sinusoidally modulated at 161Hz and 381Hz, " "respectively (these frequencies were chosen to avoid harmonic cross-talk)." ), - data=H5DataIO(commanded_signal, compression=True), + data=H5DataIO(commanded_signal[ascending_timestamps_indices], compression=True), frequency=metadata["FiberPhotometry"]["signal_freq"], power=float(metadata["FiberPhotometry"]["signal_amp"]), # TODO: Fix this in ndx-photometry - timestamps=H5DataIO(timestamps, compression=True), + timestamps=H5DataIO(timestamps[ascending_timestamps_indices], compression=True), unit="volts", ) commanded_reference_series = multi_commanded_voltage.create_commanded_voltage_series( @@ -142,7 +144,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: "A 470nm (blue) LED and a 405nM (UV) LED (Mightex) were sinusoidally modulated at 161Hz and 381Hz, " "respectively (these frequencies were chosen to avoid harmonic cross-talk)." ), - data=H5DataIO(commanded_reference, compression=True), + data=H5DataIO(commanded_reference[ascending_timestamps_indices], compression=True), frequency=metadata["FiberPhotometry"]["reference_freq"], power=float(metadata["FiberPhotometry"]["reference_amp"]), # TODO: Fix this in ndx-photometry timestamps=commanded_signal_series.timestamps, @@ -239,7 +241,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: raw_photometry = RoiResponseSeries( name="RawPhotometry", description="The raw acquisition with mixed signal from both the blue light excitation (470nm) and UV excitation (405nm).", - data=H5DataIO(raw_photometry, compression=True), + data=H5DataIO(raw_photometry[ascending_timestamps_indices], compression=True), unit="F", timestamps=commanded_signal_series.timestamps, rois=self.fibers_ref, From 986c3b8a788c5345b9e1175c4fe83811ce26de98 Mon Sep 17 00:00:00 2001 From: codycbakerphd Date: Tue, 12 Dec 2023 17:40:13 -0500 Subject: [PATCH 3/4] debugs --- .../basedattainterface.py | 11 +- .../convert_dataset.py | 4 +- .../convert_session.py | 20 +- .../fiberphotometryinterface.py | 16 +- .../rawfiberphotometryinterface.py | 175 +++++++++++------- 5 files changed, 144 insertions(+), 82 deletions(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basedattainterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basedattainterface.py index bced6ff..40a5035 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basedattainterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basedattainterface.py @@ -1,8 +1,10 @@ """Primary class for handling metadata non-specific to any other DataInterfaces.""" -from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface -from neuroconv.utils import load_dict_from_file +from pathlib import Path + import pandas as pd import numpy as np +from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface +from neuroconv.utils import load_dict_from_file class BaseDattaInterface(BaseTemporalAlignmentInterface): @@ -24,10 +26,13 @@ def get_metadata(self) -> dict: metadata["Subject"]["subject_id"] = session_metadata["subject_id"] metadata["Subject"]["sex"] = subject_metadata["sex"] - if self.source_data["alignment_path"] is not None: + if self.source_data["alignment_path"] is not None and Path(self.source_data["alignment_path"]).exists(): alignment_df = pd.read_parquet(self.source_data["alignment_path"]) metadata["Alignment"]["slope"] = alignment_df["slope"].iloc[0] metadata["Alignment"]["bias"] = alignment_df["bias"].iloc[0] + else: + metadata["Alignment"]["slope"] = 1.0 + metadata["Alignment"]["bias"] = 0.0 return metadata diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index 57c89e0..72ed2b4 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -101,7 +101,7 @@ def dataset_to_nwb( folder for folder in raw_dir_path.iterdir() if folder.is_dir() and folder.name not in skip_experiments and folder.name.startswith("_") - ] + ][11:] for experimental_folder in tqdm( iterable=experimental_folders, position=0, desc="Converting experiments...", leave=False ): @@ -157,7 +157,7 @@ def dataset_to_nwb( if __name__ == "__main__": - number_of_jobs = 1 + number_of_jobs = 2 processed_path = Path("E:/Datta/dopamine-reinforces-spontaneous-behavior") raw_dir_path = Path("E:/Datta") diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py index 04958e9..be495f4 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py @@ -53,7 +53,9 @@ def session_to_nwb( session_id = f"{experiment_type}-{session_uuid}" nwbfile_path = output_dir_path / f"{session_id}.nwb" - if nwbfile_path.exists(): + if ( + nwbfile_path.parent.parent / "initial_nwbfiles" / nwbfile_path.name + ).exists() or nwbfile_path.exists(): # temporary return photometry_path = processed_path / "dlight_raw_data/dlight_photometry_processed_full.parquet" @@ -116,13 +118,9 @@ def session_to_nwb( conversion_options["BehavioralSyllable"] = dict(reinforcement=True) behavioral_syllable_path = optoda_path if "photometry" in session_metadata.keys(): - tdt_path = list(raw_path.glob("tdt_data*.dat"))[0] - tdt_metadata_path = list(raw_path.glob("tdt_data*.json"))[0] ir_path = raw_path / "ir.avi" source_data["FiberPhotometry"] = dict( file_path=str(photometry_path), - tdt_path=str(tdt_path), - tdt_metadata_path=str(tdt_metadata_path), depth_timestamp_path=str(depth_ts_path), session_metadata_path=str(session_metadata_path), subject_metadata_path=str(subject_metadata_path), @@ -130,6 +128,14 @@ def session_to_nwb( session_id=session_id, alignment_path=str(alignment_path), ) + + tdt_paths = list(raw_path.glob("tdt_data*.dat")) + if any(tdt_paths): + source_data["FiberPhotometry"].update(tdt_path=str(tdt_paths[0])) + tdt_metadata_paths = list(raw_path.glob("tdt_data*.json")) + if any(tdt_metadata_paths): + source_data["FiberPhotometry"].update(tdt_metadata_path=str(tdt_metadata_paths[0])) + conversion_options["FiberPhotometry"] = {} behavioral_syllable_path = photometry_path # Note: if photometry and optogenetics are both present @@ -217,7 +223,7 @@ def session_to_nwb( processed_only = False for example_session in example_sessions: session_to_nwb( - session_uuid=example_session, + session_uuid="0fc7bbac-adee-46d8-897a-213a56983ebe", processed_path=processed_path, raw_path=experiment_type2raw_path[experiment_type], output_dir_path=output_dir_path, @@ -227,7 +233,7 @@ def session_to_nwb( ) with NWBHDF5IO(output_dir_path / f"reinforcement-photometry-{raw_rp_example}.nwb", "r") as io: nwbfile = io.read() - # print(nwbfile) + # nwbfile_path = output_dir_path / f"{figure1d_example}.nwb" # paper_metadata_path = Path(__file__).parent / "markowitz_gillis_nature_2023_metadata.yaml" # reproduce_figures.reproduce_fig1d(nwbfile_path, paper_metadata_path) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py index f1557d0..b1c73ae 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py @@ -1,27 +1,29 @@ """Primary class for converting fiber photometry data (dLight fluorescence).""" -# Standard Scientific Python +from typing import Union + import pandas as pd import numpy as np -# NWB Ecosystem from pynwb.file import NWBFile from pynwb.ophys import RoiResponseSeries -from .rawfiberphotometryinterface import RawFiberPhotometryInterface from neuroconv.tools import nwb_helpers +from neuroconv.utils import FilePathType from hdmf.backends.hdf5.h5_utils import H5DataIO +from .rawfiberphotometryinterface import RawFiberPhotometryInterface + class FiberPhotometryInterface(RawFiberPhotometryInterface): def __init__( self, file_path: str, - tdt_path: str, - tdt_metadata_path: str, depth_timestamp_path: str, session_uuid: str, session_id: str, session_metadata_path: str, subject_metadata_path: str, + tdt_path: Union[FilePathType, None] = None, + tdt_metadata_path: Union[FilePathType, None] = None, alignment_path: str = None, ): # This should load the data lazily and prepare variables you need @@ -72,6 +74,10 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: filters=[("uuid", "==", self.source_data["session_uuid"])], ) notnan = pd.notnull(session_df.signal_dff) + + if not any(notnan): + return + signal_series = RoiResponseSeries( name="SignalDfOverF", description="The ΔF/F from the blue light excitation (470nm) corresponding to the dopamine signal.", diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py index 772e769..d21ec47 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py @@ -1,10 +1,10 @@ """Primary class for converting Raw fiber photometry data (dLight fluorescence) from the TDT system.""" -# Standard Scientific Python +from pathlib import Path +from typing import Union + import pandas as pd import numpy as np from scipy.interpolate import interp1d - -# NWB Ecosystem from pynwb.file import NWBFile from pynwb.core import DynamicTableRegion from pynwb.ophys import RoiResponseSeries @@ -16,25 +16,26 @@ FiberPhotometry, FluorophoresTable, ) -from .basedattainterface import BaseDattaInterface -from .utils import convert_timestamps_to_seconds -from neuroconv.utils import load_dict_from_file +from neuroconv.utils import load_dict_from_file, FilePathType from neuroconv.tools import nwb_helpers from hdmf.backends.hdf5.h5_utils import H5DataIO +from .basedattainterface import BaseDattaInterface +from .utils import convert_timestamps_to_seconds + class RawFiberPhotometryInterface(BaseDattaInterface): """Raw Fiber Photometry interface for markowitz_gillis_nature_2023 conversion.""" def __init__( self, - tdt_path: str, - tdt_metadata_path: str, depth_timestamp_path: str, session_uuid: str, session_id: str, session_metadata_path: str, subject_metadata_path: str, + tdt_path: Union[FilePathType, None] = None, + tdt_metadata_path: Union[FilePathType, None] = None, alignment_path: str = None, **kwargs, ): @@ -54,7 +55,6 @@ def get_metadata(self) -> dict: metadata = super().get_metadata() session_metadata = load_dict_from_file(self.source_data["session_metadata_path"]) subject_metadata = load_dict_from_file(self.source_data["subject_metadata_path"]) - tdt_metadata = load_dict_from_file(self.source_data["tdt_metadata_path"]) session_metadata = session_metadata[self.source_data["session_uuid"]] subject_metadata = subject_metadata[session_metadata["subject_id"]] @@ -63,12 +63,15 @@ def get_metadata(self) -> dict: metadata["FiberPhotometry"]["signal_reference_corr"] = session_metadata["signal_reference_corr"] metadata["FiberPhotometry"]["snr"] = session_metadata["snr"] metadata["FiberPhotometry"]["area"] = subject_metadata["photometry_area"] - metadata["FiberPhotometry"]["gain"] = float(tdt_metadata["tags"]["OutputGain"]) - metadata["FiberPhotometry"]["signal_amp"] = tdt_metadata["tags"]["LED1Amp"] - metadata["FiberPhotometry"]["reference_amp"] = tdt_metadata["tags"]["LED2Amp"] - metadata["FiberPhotometry"]["signal_freq"] = float(tdt_metadata["tags"]["LED1Freq"]) - metadata["FiberPhotometry"]["reference_freq"] = float(tdt_metadata["tags"]["LED2Freq"]) - metadata["FiberPhotometry"]["raw_rate"] = tdt_metadata["status"]["sampling_rate"] + + if self.source_data["tdt_metadata_path"] is not None: + tdt_metadata = load_dict_from_file(self.source_data["tdt_metadata_path"]) + metadata["FiberPhotometry"]["gain"] = float(tdt_metadata["tags"]["OutputGain"]) + metadata["FiberPhotometry"]["signal_amp"] = tdt_metadata["tags"]["LED1Amp"] + metadata["FiberPhotometry"]["reference_amp"] = tdt_metadata["tags"]["LED2Amp"] + metadata["FiberPhotometry"]["signal_freq"] = float(tdt_metadata["tags"]["LED1Freq"]) + metadata["FiberPhotometry"]["reference_freq"] = float(tdt_metadata["tags"]["LED2Freq"]) + metadata["FiberPhotometry"]["raw_rate"] = tdt_metadata["status"]["sampling_rate"] return metadata @@ -116,40 +119,14 @@ def align_raw_timestamps(self, metadata: dict) -> np.ndarray: return self.aligned_timestamps def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: - photometry_dict = load_tdt_data(self.source_data["tdt_path"], fs=metadata["FiberPhotometry"]["raw_rate"]) - timestamps = self.align_raw_timestamps(metadata=metadata) - ascending_timestamps_indices = np.argsort(timestamps) - - raw_photometry = photometry_dict["pmt00"] - commanded_signal = photometry_dict["pmt00_x"] - commanded_reference = photometry_dict["pmt01_x"] - - # Commanded Voltage - multi_commanded_voltage = MultiCommandedVoltage() - commanded_signal_series = multi_commanded_voltage.create_commanded_voltage_series( - name="commanded_signal", - description=( - "A 470nm (blue) LED and a 405nM (UV) LED (Mightex) were sinusoidally modulated at 161Hz and 381Hz, " - "respectively (these frequencies were chosen to avoid harmonic cross-talk)." - ), - data=H5DataIO(commanded_signal[ascending_timestamps_indices], compression=True), - frequency=metadata["FiberPhotometry"]["signal_freq"], - power=float(metadata["FiberPhotometry"]["signal_amp"]), # TODO: Fix this in ndx-photometry - timestamps=H5DataIO(timestamps[ascending_timestamps_indices], compression=True), - unit="volts", - ) - commanded_reference_series = multi_commanded_voltage.create_commanded_voltage_series( - name="commanded_reference", + # Fibers Table + fibers_table = FibersTable( description=( - "A 470nm (blue) LED and a 405nM (UV) LED (Mightex) were sinusoidally modulated at 161Hz and 381Hz, " - "respectively (these frequencies were chosen to avoid harmonic cross-talk)." - ), - data=H5DataIO(commanded_reference[ascending_timestamps_indices], compression=True), - frequency=metadata["FiberPhotometry"]["reference_freq"], - power=float(metadata["FiberPhotometry"]["reference_amp"]), # TODO: Fix this in ndx-photometry - timestamps=commanded_signal_series.timestamps, - unit="volts", + "Fiber photometry data with 2 excitation sources (470nm and 405nm), 1 PMT photodetector with " + "a peak wavelength of 527nm, and 1 fluorophore (dLight1.1)." + ) ) + # Excitation Sources Table excitation_sources_table = ExcitationSourcesTable( description=( @@ -164,16 +141,6 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: "connected to the optical implant in the freely moving mouse." ) ) - excitation_sources_table.add_row( - peak_wavelength=470.0, - source_type="LED", - commanded_voltage=commanded_signal_series, - ) - excitation_sources_table.add_row( - peak_wavelength=405.0, - source_type="LED", - commanded_voltage=commanded_reference_series, - ) # Photodetectors Table photodetectors_table = PhotodetectorsTable( @@ -186,7 +153,10 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: "offline analysis." ), ) - photodetectors_table.add_row(peak_wavelength=527.0, type="PMT", gain=metadata["FiberPhotometry"]["gain"]) + if "FiberPhotometry" in metadata and "gain" in metadata["FiberPhotometry"]: + photodetectors_table.add_row(peak_wavelength=527.0, type="PMT", gain=metadata["FiberPhotometry"]["gain"]) + else: + photodetectors_table.add_row(peak_wavelength=527.0, type="PMT") # Fluorophores Table fluorophores_table = FluorophoresTable( @@ -204,13 +174,88 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: coordinates=(0.260, 2.550, -2.40), # (AP, ML, DV) ) - # Fibers Table - fibers_table = FibersTable( - description=( - "Fiber photometry data with 2 excitation sources (470nm and 405nm), 1 PMT photodetector with " - "a peak wavelength of 527nm, and 1 fluorophore (dLight1.1)." + skip = ( + self.source_data["tdt_path"] is None + or not Path(self.source_data["tdt_path"]).exists() + or not (Path(self.source_data["tdt_path"]).parent / "alignment_df.parquet").exists() + ) + if skip: + excitation_sources_table.add_row(peak_wavelength=470.0, source_type="LED") + excitation_sources_table.add_row(peak_wavelength=405.0, source_type="LED") + + nwbfile.add_lab_meta_data( + FiberPhotometry( + fibers=fibers_table, + excitation_sources=excitation_sources_table, + photodetectors=photodetectors_table, + fluorophores=fluorophores_table, + ) + ) + # Important: we add the fibers to the fibers table _after_ adding the metadata + # This ensures that we can find this data in their tables of origin + fibers_table.add_fiber( + excitation_source=0, # integers indicated rows of excitation sources table + photodetector=0, + fluorophores=[0], # potentially multiple fluorophores, so list of indices + location=metadata["FiberPhotometry"]["area"], + ) + fibers_table.add_fiber( + excitation_source=1, # integers indicated rows of excitation sources table + photodetector=0, + fluorophores=[0], # potentially multiple fluorophores, so list of indices + location=metadata["FiberPhotometry"]["area"], + ) + self.fibers_ref = DynamicTableRegion( + name="rois", data=[0, 1], description="source fibers", table=fibers_table ) + return + + photometry_dict = load_tdt_data(self.source_data["tdt_path"], fs=metadata["FiberPhotometry"]["raw_rate"]) + timestamps = self.align_raw_timestamps(metadata=metadata) + ascending_timestamps_indices = np.argsort(timestamps) + + raw_photometry = photometry_dict["pmt00"] + commanded_signal = photometry_dict["pmt00_x"] + commanded_reference = photometry_dict["pmt01_x"] + + # Commanded Voltage + multi_commanded_voltage = MultiCommandedVoltage() + commanded_signal_series = multi_commanded_voltage.create_commanded_voltage_series( + name="commanded_signal", + description=( + "A 470nm (blue) LED and a 405nM (UV) LED (Mightex) were sinusoidally modulated at 161Hz and 381Hz, " + "respectively (these frequencies were chosen to avoid harmonic cross-talk)." + ), + data=H5DataIO(commanded_signal[ascending_timestamps_indices], compression=True), + frequency=metadata["FiberPhotometry"]["signal_freq"], + power=float(metadata["FiberPhotometry"]["signal_amp"]), # TODO: Fix this in ndx-photometry + timestamps=H5DataIO(timestamps[ascending_timestamps_indices], compression=True), + unit="volts", ) + commanded_reference_series = multi_commanded_voltage.create_commanded_voltage_series( + name="commanded_reference", + description=( + "A 470nm (blue) LED and a 405nM (UV) LED (Mightex) were sinusoidally modulated at 161Hz and 381Hz, " + "respectively (these frequencies were chosen to avoid harmonic cross-talk)." + ), + data=H5DataIO(commanded_reference[ascending_timestamps_indices], compression=True), + frequency=metadata["FiberPhotometry"]["reference_freq"], + power=float(metadata["FiberPhotometry"]["reference_amp"]), # TODO: Fix this in ndx-photometry + timestamps=commanded_signal_series.timestamps, + unit="volts", + ) + + excitation_sources_table.add_row( + peak_wavelength=470.0, + source_type="LED", + commanded_voltage=commanded_signal_series, + ) + excitation_sources_table.add_row( + peak_wavelength=405.0, + source_type="LED", + commanded_voltage=commanded_reference_series, + ) + nwbfile.add_lab_meta_data( FiberPhotometry( fibers=fibers_table, @@ -219,7 +264,6 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: fluorophores=fluorophores_table, ) ) - # Important: we add the fibers to the fibers table _after_ adding the metadata # This ensures that we can find this data in their tables of origin fibers_table.add_fiber( @@ -235,9 +279,10 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: location=metadata["FiberPhotometry"]["area"], ) + self.fibers_ref = DynamicTableRegion(name="rois", data=[0, 1], description="source fibers", table=fibers_table) + # ROI Response Series # Here we set up a list of fibers that our recording came from - self.fibers_ref = DynamicTableRegion(name="rois", data=[0, 1], description="source fibers", table=fibers_table) raw_photometry = RoiResponseSeries( name="RawPhotometry", description="The raw acquisition with mixed signal from both the blue light excitation (470nm) and UV excitation (405nm).", From 7725defc8642575439e456bb56e9bbe6fa4fb438 Mon Sep 17 00:00:00 2001 From: codycbakerphd Date: Thu, 14 Dec 2023 11:19:33 -0500 Subject: [PATCH 4/4] skip video if missing --- .../basevideointerface.py | 13 ++++++++----- .../markowitz_gillis_nature_2023/convert_dataset.py | 4 ++-- .../markowitz_gillis_nature_2023/convert_session.py | 4 +--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py index c8ad15e..41066ea 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py @@ -1,17 +1,17 @@ """Base class for converting raw video data.""" -from pynwb import NWBFile -from datetime import datetime -from pytz import timezone -import h5py +from pathlib import Path + import numpy as np import pandas as pd +from pynwb import NWBFile from neuroconv.datainterfaces import VideoInterface + from .basedattainterface import BaseDattaInterface from .utils import convert_timestamps_to_seconds class BaseVideoInterface(BaseDattaInterface): - """Base video interface for markowitz_gillis_nature_2023 conversion""" + """Base video interface for markowitz_gillis_nature_2023 conversion.""" def __init__( self, @@ -51,6 +51,9 @@ def align_timestamps(self, metadata: dict) -> np.ndarray: def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: timestamps = self.align_timestamps(metadata=metadata) + if not Path(self.source_data["data_path"]).exists(): + return + video_interface = VideoInterface(file_paths=[self.source_data["data_path"]], verbose=True) video_interface.set_aligned_timestamps(aligned_timestamps=[timestamps]) video_interface.add_to_nwbfile( diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index 72ed2b4..57c89e0 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -101,7 +101,7 @@ def dataset_to_nwb( folder for folder in raw_dir_path.iterdir() if folder.is_dir() and folder.name not in skip_experiments and folder.name.startswith("_") - ][11:] + ] for experimental_folder in tqdm( iterable=experimental_folders, position=0, desc="Converting experiments...", leave=False ): @@ -157,7 +157,7 @@ def dataset_to_nwb( if __name__ == "__main__": - number_of_jobs = 2 + number_of_jobs = 1 processed_path = Path("E:/Datta/dopamine-reinforces-spontaneous-behavior") raw_dir_path = Path("E:/Datta") diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py index be495f4..893e376 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py @@ -53,9 +53,7 @@ def session_to_nwb( session_id = f"{experiment_type}-{session_uuid}" nwbfile_path = output_dir_path / f"{session_id}.nwb" - if ( - nwbfile_path.parent.parent / "initial_nwbfiles" / nwbfile_path.name - ).exists() or nwbfile_path.exists(): # temporary + if nwbfile_path.exists(): return photometry_path = processed_path / "dlight_raw_data/dlight_photometry_processed_full.parquet"