diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basedattainterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basedattainterface.py index a00b3b1..46acd32 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basedattainterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basedattainterface.py @@ -1,9 +1,11 @@ """Primary class for handling metadata non-specific to any other DataInterfaces.""" -from neuroconv.basedatainterface import BaseDataInterface +from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface from neuroconv.utils import load_dict_from_file +import pandas as pd +import numpy as np -class BaseDattaInterface(BaseDataInterface): +class BaseDattaInterface(BaseTemporalAlignmentInterface): """Base interface for markowitz_gillis_nature_2023 conversion w/ non-specific metadata""" def get_metadata(self) -> dict: @@ -22,4 +24,45 @@ def get_metadata(self) -> dict: metadata["Subject"]["subject_id"] = session_metadata["subject_id"] metadata["Subject"]["sex"] = subject_metadata["sex"] + if self.source_data["alignment_path"] is not None: + alignment_df = pd.read_parquet( + "/Volumes/T7/CatalystNeuro/NWB/Datta/xtra_raw/session_20210215162554-455929/alignment_df.parquet" + ) + metadata["Alignment"]["slope"] = alignment_df["slope"].iloc[0] + metadata["Alignment"]["bias"] = alignment_df["bias"].iloc[0] + return metadata + + def get_metadata_schema(self) -> dict: + metadata_schema = super().get_metadata_schema() + if self.source_data["alignment_path"] is None: + return metadata_schema + metadata_schema["Alignment"] = { + "type": "object", + "description": "Metadata for temporal alignment with photometry data.", + "required": True, + "properties": { + "slope": { + "description": "Slope of the linear regression mapping from behavioral video indices to demodulated photometry indices.", + "required": True, + "type": "float", + }, + "bias": { + "description": "Bias of the linear regression mapping from behavioral video indices to demodulated photometry indices.", + "required": True, + "type": "float", + }, + "start_time": { + "description": "Start time offset of raw fiber photometry data relative to behavioral video.", + "required": True, + "type": "float", + }, + }, + } + return metadata_schema + + def set_aligned_timestamps(self, aligned_timestamps: np.ndarray) -> None: + self.aligned_timestamps = aligned_timestamps + + def get_timestamps(self) -> np.ndarray: + return self.aligned_timestamps diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py index 77ebf28..c8ad15e 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py @@ -7,6 +7,7 @@ import pandas as pd from neuroconv.datainterfaces import VideoInterface from .basedattainterface import BaseDattaInterface +from .utils import convert_timestamps_to_seconds class BaseVideoInterface(BaseDattaInterface): @@ -20,6 +21,7 @@ def __init__( session_id: str, session_metadata_path: str, subject_metadata_path: str, + alignment_path: str = None, ): super().__init__( data_path=data_path, @@ -28,13 +30,26 @@ def __init__( session_id=session_id, session_metadata_path=session_metadata_path, subject_metadata_path=subject_metadata_path, + alignment_path=alignment_path, ) + def get_original_timestamps(self) -> np.ndarray: + return pd.read_csv(self.source_data["timestamp_path"], header=None).to_numpy().squeeze() + + def align_timestamps(self, metadata: dict) -> np.ndarray: + timestamps = self.get_original_timestamps() + timestamps = convert_timestamps_to_seconds(timestamps=timestamps, metadata=metadata) + + self.set_aligned_timestamps(aligned_timestamps=timestamps) + if self.source_data["alignment_path"] is not None: + aligned_starting_time = ( + metadata["Alignment"]["bias"] / metadata["Constants"]["DEMODULATED_PHOTOMETRY_SAMPLING_RATE"] + ) + self.set_aligned_starting_time(aligned_starting_time=aligned_starting_time) + return self.aligned_timestamps + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: - timestamps = pd.read_csv(self.source_data["timestamp_path"]).to_numpy().squeeze() - TIMESTAMPS_TO_SECONDS = metadata["Constants"]["TIMESTAMPS_TO_SECONDS"] - timestamps -= timestamps[0] - timestamps = timestamps * TIMESTAMPS_TO_SECONDS + timestamps = self.align_timestamps(metadata=metadata) video_interface = VideoInterface(file_paths=[self.source_data["data_path"]], verbose=True) video_interface.set_aligned_timestamps(aligned_timestamps=[timestamps]) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/behavioralsyllableinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/behavioralsyllableinterface.py index 7425f64..655b35b 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/behavioralsyllableinterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/behavioralsyllableinterface.py @@ -13,14 +13,19 @@ class BehavioralSyllableInterface(BaseDattaInterface): """Behavioral Syllable Interface for markowitz_gillis_nature_2023 conversion""" def __init__( - self, file_path: str, session_uuid: str, session_id: str, session_metadata_path: str, subject_metadata_path: str + self, + file_path: str, + session_uuid: str, + session_id: str, + session_metadata_path: str, + subject_metadata_path: str, + alignment_path: str = None, ): # This should load the data lazily and prepare variables you need columns = ( "uuid", "predicted_syllable (offline)", "predicted_syllable", - "timestamp", ) super().__init__( file_path=file_path, @@ -29,6 +34,7 @@ def __init__( columns=columns, session_metadata_path=session_metadata_path, subject_metadata_path=subject_metadata_path, + alignment_path=alignment_path, ) def get_metadata_schema(self) -> dict: @@ -43,9 +49,27 @@ def get_metadata_schema(self) -> dict: } return metadata_schema + def get_original_timestamps(self) -> np.ndarray: + session_df = pd.read_parquet( + self.source_data["file_path"], + columns=["timestamp", "uuid"], + filters=[("uuid", "==", self.source_data["session_uuid"])], + ) + return session_df["timestamp"].to_numpy() + + def align_timestamps(self, metadata: dict) -> np.ndarray: + timestamps = self.get_original_timestamps() + self.set_aligned_timestamps(aligned_timestamps=timestamps) + if self.source_data["alignment_path"] is not None: + aligned_starting_time = ( + metadata["Alignment"]["bias"] / metadata["Constants"]["DEMODULATED_PHOTOMETRY_SAMPLING_RATE"] + ) + self.set_aligned_starting_time(aligned_starting_time=aligned_starting_time) + return self.aligned_timestamps + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, velocity_modulation: bool = False) -> None: if velocity_modulation: - columns = ["uuid", "predicted_syllable", "timestamp"] + columns = ["uuid", "predicted_syllable"] else: columns = self.source_data["columns"] session_df = pd.read_parquet( @@ -53,6 +77,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, velocity_modulation: columns=columns, filters=[("uuid", "==", self.source_data["session_uuid"])], ) + timestamps = self.align_timestamps(metadata=metadata) # Add Syllable Data sorted_pseudoindex2name = metadata["BehavioralSyllable"]["sorted_pseudoindex2name"] id2sorted_index = metadata["BehavioralSyllable"]["id2sorted_index"] @@ -66,7 +91,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, velocity_modulation: online_syllables = LabeledEvents( name="BehavioralSyllableOnline", description="Behavioral Syllable identified by online Motion Sequencing (MoSeq).", - timestamps=H5DataIO(session_df["timestamp"].to_numpy(), compression=True), + timestamps=H5DataIO(timestamps, compression=True), data=H5DataIO(online_syllable_indices, compression=True), labels=H5DataIO(index2name, compression=True), ) @@ -82,7 +107,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, velocity_modulation: offline_syllables = LabeledEvents( name="BehavioralSyllableOffline", description="Behavioral Syllable identified by offline Motion Sequencing (MoSeq).", - timestamps=H5DataIO(session_df["timestamp"].to_numpy(), compression=True), + timestamps=H5DataIO(timestamps, compression=True), data=H5DataIO(offline_syllable_indices, compression=True), labels=H5DataIO(index2name, compression=True), ) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py index b2710ac..7bf1309 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py @@ -42,8 +42,41 @@ def session_to_nwb( depth_path = raw_path / "depth.avi" depth_ts_path = raw_path / "depth_ts.txt" moseq_path = raw_path / "proc/results_00.h5" + alignment_path = raw_path / "alignment_df.parquet" source_data, conversion_options = {}, {} + source_data.update( + dict( + MoseqExtract=dict( + file_path=str(moseq_path), + session_metadata_path=str(session_metadata_path), + subject_metadata_path=str(subject_metadata_path), + session_uuid=session_uuid, + session_id=session_id, + ), + BehavioralSyllable=dict( + session_metadata_path=str(session_metadata_path), + subject_metadata_path=str(subject_metadata_path), + session_uuid=session_uuid, + session_id=session_id, + ), + DepthVideo=dict( + data_path=str(depth_path), + timestamp_path=str(depth_ts_path), + session_metadata_path=str(session_metadata_path), + subject_metadata_path=str(subject_metadata_path), + session_uuid=session_uuid, + session_id=session_id, + ), + ) + ) + conversion_options.update( + dict( + MoseqExtract={}, + BehavioralSyllable={}, + DepthVideo={}, + ) + ) if "reinforcement" in session_metadata.keys(): source_data["Optogenetic"] = dict( file_path=str(optoda_path), @@ -62,10 +95,12 @@ def session_to_nwb( file_path=str(photometry_path), tdt_path=str(tdt_path), tdt_metadata_path=str(tdt_metadata_path), + depth_timestamp_path=str(depth_ts_path), session_metadata_path=str(session_metadata_path), subject_metadata_path=str(subject_metadata_path), session_uuid=session_uuid, session_id=session_id, + alignment_path=str(alignment_path), ) conversion_options["FiberPhotometry"] = {} behavioral_syllable_path = photometry_path # Note: if photometry and optogenetics are both present, photometry is used for syllable data bc it is quicker to load @@ -76,43 +111,17 @@ def session_to_nwb( subject_metadata_path=str(subject_metadata_path), session_uuid=session_uuid, session_id=session_id, + alignment_path=str(alignment_path), ) conversion_options["IRVideo"] = {} - source_data.update( - dict( - MoseqExtract=dict( - file_path=str(moseq_path), - session_metadata_path=str(session_metadata_path), - subject_metadata_path=str(subject_metadata_path), - session_uuid=session_uuid, - session_id=session_id, - ), - BehavioralSyllable=dict( - file_path=str(behavioral_syllable_path), - session_metadata_path=str(session_metadata_path), - subject_metadata_path=str(subject_metadata_path), - session_uuid=session_uuid, - session_id=session_id, - ), - DepthVideo=dict( - data_path=str(depth_path), - timestamp_path=str(depth_ts_path), - session_metadata_path=str(session_metadata_path), - subject_metadata_path=str(subject_metadata_path), - session_uuid=session_uuid, - session_id=session_id, - ), - ) - ) - conversion_options.update( - dict( - MoseqExtract={}, - BehavioralSyllable={}, - DepthVideo={}, - ) - ) + source_data["MoseqExtract"]["alignment_path"] = str(alignment_path) + source_data["BehavioralSyllable"]["alignment_path"] = str(alignment_path) + source_data["DepthVideo"]["alignment_path"] = str(alignment_path) + source_data["Optogenetic"]["alignment_path"] = str(alignment_path) + source_data["BehavioralSyllable"]["file_path"] = str(behavioral_syllable_path) if experiment_type == "velocity-modulation": conversion_options["BehavioralSyllable"] = dict(velocity_modulation=True) + conversion_options["Optogenetic"] = dict(velocity_modulation=True) converter = DattaNWBConverter(source_data=source_data) metadata = converter.get_metadata() diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py index d25cf61..f1557d0 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py @@ -17,10 +17,12 @@ def __init__( file_path: str, tdt_path: str, tdt_metadata_path: str, + depth_timestamp_path: str, session_uuid: str, session_id: str, session_metadata_path: str, subject_metadata_path: str, + alignment_path: str = None, ): # This should load the data lazily and prepare variables you need columns = ( @@ -29,38 +31,59 @@ def __init__( "reference_dff", "uv_reference_fit", "reference_dff_fit", - "timestamp", ) super().__init__( file_path=file_path, tdt_path=tdt_path, tdt_metadata_path=tdt_metadata_path, + depth_timestamp_path=depth_timestamp_path, session_uuid=session_uuid, session_id=session_id, columns=columns, session_metadata_path=session_metadata_path, subject_metadata_path=subject_metadata_path, + alignment_path=alignment_path, ) + def get_original_timestamps(self) -> np.ndarray: + session_df = pd.read_parquet( + self.source_data["file_path"], + columns=["timestamp", "uuid"], + filters=[("uuid", "==", self.source_data["session_uuid"])], + ) + return session_df["timestamp"].to_numpy() + + def align_processed_timestamps(self, metadata: dict) -> np.ndarray: + timestamps = self.get_original_timestamps() + self.set_aligned_timestamps(aligned_timestamps=timestamps) + if self.source_data["alignment_path"] is not None: + aligned_starting_time = ( + metadata["Alignment"]["bias"] / metadata["Constants"]["DEMODULATED_PHOTOMETRY_SAMPLING_RATE"] + ) + self.set_aligned_starting_time(aligned_starting_time=aligned_starting_time) + return self.aligned_timestamps + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: super().add_to_nwbfile(nwbfile, metadata) + timestamps = self.align_processed_timestamps(metadata) session_df = pd.read_parquet( self.source_data["file_path"], columns=self.source_data["columns"], filters=[("uuid", "==", self.source_data["session_uuid"])], ) + notnan = pd.notnull(session_df.signal_dff) signal_series = RoiResponseSeries( name="SignalDfOverF", description="The ΔF/F from the blue light excitation (470nm) corresponding to the dopamine signal.", - data=H5DataIO(session_df.signal_dff.to_numpy(), compression=True), + data=H5DataIO(session_df.signal_dff.to_numpy()[notnan], compression=True), unit="a.u.", - timestamps=H5DataIO(session_df.timestamp.to_numpy(), compression=True), + timestamps=H5DataIO(timestamps[notnan], compression=True), rois=self.fibers_ref, ) reference_series = RoiResponseSeries( name="ReferenceDfOverF", description="The ∆F/F from the isosbestic UV excitation (405nm) corresponding to the reference signal.", - data=H5DataIO(session_df.reference_dff.to_numpy(), compression=True), + data=H5DataIO(session_df.reference_dff.to_numpy()[notnan], compression=True), unit="a.u.", timestamps=signal_series.timestamps, rois=self.fibers_ref, @@ -71,7 +94,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: "The ∆F/F from the isosbestic UV excitation (405nm) that has been smoothed " "(See Methods: Photometry Active Referencing)." ), - data=H5DataIO(session_df.reference_dff_fit.to_numpy(), compression=True), + data=H5DataIO(session_df.reference_dff_fit.to_numpy()[notnan], compression=True), unit="a.u.", timestamps=signal_series.timestamps, rois=self.fibers_ref, @@ -82,7 +105,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: "Raw fluorescence (F) from the isosbestic UV excitation (405nm) that has been smoothed " "(See Methods: Photometry Active Referencing)." ), - data=H5DataIO(session_df.uv_reference_fit.to_numpy(), compression=True), + data=H5DataIO(session_df.uv_reference_fit.to_numpy()[notnan], compression=True), unit="n.a.", timestamps=signal_series.timestamps, rois=self.fibers_ref, diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/markowitz_gillis_nature_2023_metadata.yaml b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/markowitz_gillis_nature_2023_metadata.yaml index 98a853a..0607f42 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/markowitz_gillis_nature_2023_metadata.yaml +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/markowitz_gillis_nature_2023_metadata.yaml @@ -32,7 +32,9 @@ Subject: description: This study utilized wild-type DAT-IRES-Cre (The Jackson Laboratory 006660) and Ai32 (The Jackson Laboratory 012569) mice, both male and female, between 6-15 weeks of age. Constants: VIDEO_SAMPLING_RATE: 30.0 + DEMODULATED_PHOTOMETRY_SAMPLING_RATE: 500.0 TIMESTAMPS_TO_SECONDS: 1.25e-4 + MAXIMUM_TIMESTAMP: 4294967295 # 32-bit maximum Behavior: Position: reference_frame: TBD diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py index d31ef50..53556b5 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py @@ -6,6 +6,7 @@ import numpy as np from hdmf.backends.hdf5.h5_utils import H5DataIO from .basedattainterface import BaseDattaInterface +from .utils import convert_timestamps_to_seconds from pynwb.image import GrayscaleImage, ImageMaskSeries from pynwb import TimeSeries from pynwb.behavior import ( @@ -21,7 +22,13 @@ class MoseqExtractInterface(BaseDattaInterface): """Moseq interface for markowitz_gillis_nature_2023 conversion""" def __init__( - self, file_path: str, session_uuid: str, session_id: str, session_metadata_path: str, subject_metadata_path: str + self, + file_path: str, + session_uuid: str, + session_id: str, + session_metadata_path: str, + subject_metadata_path: str, + alignment_path: str = None, ): # This should load the data lazily and prepare variables you need super().__init__( @@ -30,9 +37,27 @@ def __init__( session_id=session_id, session_metadata_path=session_metadata_path, subject_metadata_path=subject_metadata_path, + alignment_path=alignment_path, ) + def get_original_timestamps(self) -> np.ndarray: + with h5py.File(self.source_data["file_path"]) as file: + return np.array(file["timestamps"]) + + def align_timestamps(self, metadata: dict) -> np.ndarray: + timestamps = self.get_original_timestamps() + timestamps = convert_timestamps_to_seconds(timestamps=timestamps, metadata=metadata) + + self.set_aligned_timestamps(aligned_timestamps=timestamps) + if self.source_data["alignment_path"] is not None: + aligned_starting_time = ( + metadata["Alignment"]["bias"] / metadata["Constants"]["DEMODULATED_PHOTOMETRY_SAMPLING_RATE"] + ) + self.set_aligned_starting_time(aligned_starting_time=aligned_starting_time) + return self.aligned_timestamps + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: + timestamps = self.align_timestamps(metadata) with h5py.File(self.source_data["file_path"]) as file: # Version version = np.array(file["metadata"]["extraction"]["extract_version"]).item().decode("ASCII") @@ -41,11 +66,6 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: processed_depth_video = np.array(file["frames"]) loglikelihood_video = np.array(file["frames_mask"]) - # Timestamps - TIMESTAMPS_TO_SECONDS = metadata["Constants"]["TIMESTAMPS_TO_SECONDS"] - timestamps = np.array(file["timestamps"]) * TIMESTAMPS_TO_SECONDS - timestamps -= timestamps[0] - # Extraction background = np.array(file["metadata"]["extraction"]["background"]) is_flipped = np.array(file["metadata"]["extraction"]["flips"]) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/optogeneticinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/optogeneticinterface.py index 286b4bd..8e4f027 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/optogeneticinterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/optogeneticinterface.py @@ -20,7 +20,13 @@ class OptogeneticInterface(BaseDattaInterface): """Optogenetic interface for markowitz_gillis_nature_2023 conversion""" def __init__( - self, file_path: str, session_uuid: str, session_id: str, session_metadata_path: str, subject_metadata_path: str + self, + file_path: str, + session_uuid: str, + session_id: str, + session_metadata_path: str, + subject_metadata_path: str, + alignment_path: str = None, ): # This should load the data lazily and prepare variables you need columns = ( @@ -35,6 +41,7 @@ def __init__( columns=columns, session_metadata_path=session_metadata_path, subject_metadata_path=subject_metadata_path, + alignment_path=alignment_path, ) def get_metadata(self) -> dict: @@ -73,7 +80,29 @@ def get_metadata_schema(self) -> dict: } return metadata_schema - def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: + def get_original_timestamps(self) -> np.ndarray: + session_df = pd.read_parquet( + self.source_data["file_path"], + columns=["timestamp", "uuid"], + filters=[("uuid", "==", self.source_data["session_uuid"])], + ) + return session_df["timestamp"].to_numpy() + + def align_timestamps(self, metadata: dict, velocity_modulation: bool) -> np.ndarray: + timestamps = self.get_original_timestamps() + self.set_aligned_timestamps(aligned_timestamps=timestamps) + if self.source_data["alignment_path"] is not None: + aligned_starting_time = ( + metadata["Alignment"]["bias"] / metadata["Constants"]["DEMODULATED_PHOTOMETRY_SAMPLING_RATE"] + ) + self.set_aligned_starting_time(aligned_starting_time=aligned_starting_time) + elif velocity_modulation: + aligned_starting_time = 2700 # See Methods: Closed-loop velocity modulation experiments + self.set_aligned_starting_time(aligned_starting_time=aligned_starting_time) + return self.aligned_timestamps + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, velocity_modulation: bool = False) -> None: + session_timestamps = self.align_timestamps(metadata=metadata, velocity_modulation=velocity_modulation) session_df = pd.read_parquet( self.source_data["file_path"], columns=self.source_data["columns"], @@ -94,9 +123,9 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: ) # Reconstruct optogenetic series from feedback status if pd.isnull(metadata["Optogenetics"]["stim_frequency_Hz"]): # cts stim - data, timestamps = self.reconstruct_cts_stim(metadata, session_df) + data, timestamps = self.reconstruct_cts_stim(metadata, session_df, session_timestamps) else: # pulsed stim - data, timestamps = self.reconstruct_pulsed_stim(metadata, session_df) + data, timestamps = self.reconstruct_pulsed_stim(metadata, session_df, session_timestamps) id2sorted_index = metadata["BehavioralSyllable"]["id2sorted_index"] target_syllable = id2sorted_index[metadata["Optogenetics"]["target_syllable"]] ogen_series = OptogeneticSeries( @@ -111,22 +140,22 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: return nwbfile - def reconstruct_cts_stim(self, metadata, session_df): + def reconstruct_cts_stim(self, metadata, session_df, session_timestamps): stim_duration_s = metadata["Optogenetics"]["stim_duration_s"] power_watts = metadata["Optogenetics"]["power_watts"] feedback_is_on_index = np.where(session_df.feedback_status == 1)[0] data_len = len(feedback_is_on_index) * 2 + 2 data, timestamps = np.zeros(data_len), np.zeros(data_len) - timestamps[0], timestamps[-1] = session_df.timestamp.iloc[0], session_df.timestamp.iloc[-1] + timestamps[0], timestamps[-1] = session_timestamps[0], session_timestamps[-1] for i, index in enumerate(feedback_is_on_index): - t = session_df.timestamp.iloc[index] + t = session_timestamps[index] data[i * 2 + 1 : i * 2 + 3] = [power_watts, 0] timestamps[i * 2 + 1 : i * 2 + 3] = [t, t + stim_duration_s] sorting_index = np.argsort(timestamps) data, timestamps = data[sorting_index], timestamps[sorting_index] return data, timestamps - def reconstruct_pulsed_stim(self, metadata, session_df): + def reconstruct_pulsed_stim(self, metadata, session_df, session_timestamps): stim_duration_s = metadata["Optogenetics"]["stim_duration_s"] power_watts = metadata["Optogenetics"]["power_watts"] stim_frequency_Hz = metadata["Optogenetics"]["stim_frequency_Hz"] @@ -135,9 +164,9 @@ def reconstruct_pulsed_stim(self, metadata, session_df): pulses_per_stim = int(stim_duration_s * stim_frequency_Hz) data_len = len(feedback_is_on_index) * 2 * pulses_per_stim + 2 data, timestamps = np.zeros(data_len), np.zeros(data_len) - timestamps[0], timestamps[-1] = session_df.timestamp.iloc[0], session_df.timestamp.iloc[-1] + timestamps[0], timestamps[-1] = session_timestamps[0], session_timestamps[-1] for i, index in enumerate(feedback_is_on_index): - t0 = session_df.timestamp.iloc[index] + t0 = session_timestamps[index] for pulse in range(pulses_per_stim): t_on = t0 + pulse * 1 / stim_frequency_Hz t_off = t_on + pulse_width_s diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py index a4e5a1e..fd76cd8 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py @@ -2,6 +2,7 @@ # Standard Scientific Python import pandas as pd import numpy as np +from scipy.interpolate import interp1d # NWB Ecosystem from pynwb.file import NWBFile @@ -16,6 +17,7 @@ FluorophoresTable, ) from .basedattainterface import BaseDattaInterface +from .utils import convert_timestamps_to_seconds from neuroconv.utils import load_dict_from_file from neuroconv.tools import nwb_helpers from hdmf.backends.hdf5.h5_utils import H5DataIO @@ -28,10 +30,12 @@ def __init__( self, tdt_path: str, tdt_metadata_path: str, + depth_timestamp_path: str, session_uuid: str, session_id: str, session_metadata_path: str, subject_metadata_path: str, + alignment_path: str = None, **kwargs, ): super().__init__( @@ -41,6 +45,8 @@ def __init__( session_id=session_id, session_metadata_path=session_metadata_path, subject_metadata_path=subject_metadata_path, + alignment_path=alignment_path, + depth_timestamp_path=depth_timestamp_path, **kwargs, ) @@ -80,12 +86,41 @@ def get_metadata_schema(self) -> dict: } return metadata_schema + def align_raw_timestamps(self, metadata: dict) -> np.ndarray: + photometry_dict = load_tdt_data(self.source_data["tdt_path"], fs=metadata["FiberPhotometry"]["raw_rate"]) + timestamps = photometry_dict["tstep"] + depth_timestamps = pd.read_csv(self.source_data["depth_timestamp_path"], header=None).to_numpy().squeeze() + depth_timestamps = convert_timestamps_to_seconds(depth_timestamps, metadata=metadata) + + # Calculate sparse timestamps from linear alignment + DOWN_FS = metadata["Constants"]["DEMODULATED_PHOTOMETRY_SAMPLING_RATE"] + raw_fs = metadata["FiberPhotometry"]["raw_rate"] + raw_indices = [] + for i, _ in enumerate(depth_timestamps): + tdt_down_index = i * metadata["Alignment"]["slope"] + metadata["Alignment"]["bias"] + tdt_raw_index = tdt_down_index / DOWN_FS * raw_fs + raw_indices.append(int(tdt_raw_index)) + raw_indices = np.array(raw_indices) + raw_indices = raw_indices[raw_indices < len(timestamps)] + sparse_timestamps = depth_timestamps[np.arange(len(raw_indices))] + start_time = metadata["Alignment"]["bias"] / metadata["Constants"]["DEMODULATED_PHOTOMETRY_SAMPLING_RATE"] + sparse_timestamps += start_time + sparse_timestamps = np.concatenate((np.array([0]), sparse_timestamps)) + raw_indices = np.concatenate((np.array([0]), raw_indices)) + + # Interpolate aligned timestamps to raw photometry data + temporal_interpolator = interp1d(raw_indices, sparse_timestamps, kind="linear", fill_value="extrapolate") + aligned_timestamps = temporal_interpolator(np.arange(len(timestamps))) + self.set_aligned_timestamps(aligned_timestamps=aligned_timestamps) + + return self.aligned_timestamps + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: photometry_dict = load_tdt_data(self.source_data["tdt_path"], fs=metadata["FiberPhotometry"]["raw_rate"]) - session_start_index = np.flatnonzero(photometry_dict["sync"])[0] - raw_photometry = photometry_dict["pmt00"][session_start_index:] - commanded_signal = photometry_dict["pmt00_x"][session_start_index:] - commanded_reference = photometry_dict["pmt01_x"][session_start_index:] + timestamps = self.align_raw_timestamps(metadata=metadata) + raw_photometry = photometry_dict["pmt00"] + commanded_signal = photometry_dict["pmt00_x"] + commanded_reference = photometry_dict["pmt01_x"] # Commanded Voltage multi_commanded_voltage = MultiCommandedVoltage() @@ -98,7 +133,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: data=H5DataIO(commanded_signal, compression=True), frequency=metadata["FiberPhotometry"]["signal_freq"], power=metadata["FiberPhotometry"]["signal_amp"], # TODO: Fix this in ndx-photometry - rate=metadata["FiberPhotometry"]["raw_rate"], + timestamps=H5DataIO(timestamps, compression=True), unit="volts", ) commanded_reference_series = multi_commanded_voltage.create_commanded_voltage_series( @@ -110,7 +145,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: data=H5DataIO(commanded_reference, compression=True), frequency=metadata["FiberPhotometry"]["reference_freq"], power=metadata["FiberPhotometry"]["reference_amp"], # TODO: Fix this in ndx-photometry - rate=metadata["FiberPhotometry"]["raw_rate"], + timestamps=commanded_signal_series.timestamps, unit="volts", ) # Excitation Sources Table @@ -206,8 +241,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: description="The raw acquisition with mixed signal from both the blue light excitation (470nm) and UV excitation (405nm).", data=H5DataIO(raw_photometry, compression=True), unit="F", - starting_time=0.0, - rate=metadata["FiberPhotometry"]["raw_rate"], + timestamps=commanded_signal_series.timestamps, rois=self.fibers_ref, ) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/utils.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/utils.py new file mode 100644 index 0000000..b620462 --- /dev/null +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/utils.py @@ -0,0 +1,28 @@ +"""Convenient utility functions for datta lab conversion common to multiple interfaces.""" + +import pandas as pd +import numpy as np + + +def convert_timestamps_to_seconds(timestamps: np.ndarray[int], metadata: dict) -> np.ndarray: + """Converts integer timestamps to seconds using the metadata file. + + Parameters + ---------- + timestamps : np.ndarray[int] + The timestamps to convert. + metadata : dict + The metadata file. + + Returns + ------- + np.ndarray + The converted timestamps. + """ + TIMESTAMPS_TO_SECONDS = metadata["Constants"]["TIMESTAMPS_TO_SECONDS"] + timestamps[timestamps < timestamps[0]] = ( + metadata["Constants"]["MAXIMUM_TIMESTAMP"] + timestamps[timestamps < timestamps[0]] + ) + timestamps -= timestamps[0] + timestamps = timestamps * TIMESTAMPS_TO_SECONDS + return timestamps