diff --git a/frozen_dependencies.txt b/frozen_dependencies.txt index 5f9a031..7d9660b 100644 --- a/frozen_dependencies.txt +++ b/frozen_dependencies.txt @@ -121,6 +121,7 @@ ndx-events==0.2.0 ndx-grayscalevolume==0.0.2 ndx-icephys-meta==0.1.0 ndx-photometry @ git+https://github.com/catalystneuro/ndx-photometry.git@7ea9d755ceac9524125f50ab528b403b135c4530 +-e git+https://github.com/rly/ndx-pose.git@f9dd18a8290897e48bdd6ebeedcc0a7095d86265#egg=ndx_pose ndx-spectrum==0.2.2 nest-asyncio==1.5.7 networkx==3.1 diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py index f7e2fbc..77ebf28 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/basevideointerface.py @@ -32,7 +32,7 @@ def __init__( def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: timestamps = pd.read_csv(self.source_data["timestamp_path"]).to_numpy().squeeze() - TIMESTAMPS_TO_SECONDS = 1.25e-4 + TIMESTAMPS_TO_SECONDS = metadata["Constants"]["TIMESTAMPS_TO_SECONDS"] timestamps -= timestamps[0] timestamps = timestamps * TIMESTAMPS_TO_SECONDS diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py index a7fb987..d25cf61 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/fiberphotometryinterface.py @@ -5,28 +5,13 @@ # NWB Ecosystem from pynwb.file import NWBFile -from pynwb.core import DynamicTableRegion from pynwb.ophys import RoiResponseSeries -from ndx_photometry import ( - FibersTable, - PhotodetectorsTable, - ExcitationSourcesTable, - DeconvolvedRoiResponseSeries, - MultiCommandedVoltage, - FiberPhotometry, - FluorophoresTable, -) -from .basedattainterface import BaseDattaInterface -from neuroconv.utils import load_dict_from_file +from .rawfiberphotometryinterface import RawFiberPhotometryInterface from neuroconv.tools import nwb_helpers from hdmf.backends.hdf5.h5_utils import H5DataIO -# Local - - -class FiberPhotometryInterface(BaseDattaInterface): - """Fiber Photometry interface for markowitz_gillis_nature_2023 conversion""" +class FiberPhotometryInterface(RawFiberPhotometryInterface): def __init__( self, file_path: str, @@ -57,184 +42,20 @@ def __init__( subject_metadata_path=subject_metadata_path, ) - def get_metadata(self) -> dict: - metadata = super().get_metadata() - session_metadata = load_dict_from_file(self.source_data["session_metadata_path"]) - subject_metadata = load_dict_from_file(self.source_data["subject_metadata_path"]) - tdt_metadata = load_dict_from_file(self.source_data["tdt_metadata_path"]) - session_metadata = session_metadata[self.source_data["session_uuid"]] - subject_metadata = subject_metadata[session_metadata["subject_id"]] - - metadata["FiberPhotometry"]["reference_max"] = session_metadata["reference_max"] - metadata["FiberPhotometry"]["signal_max"] = session_metadata["signal_max"] - metadata["FiberPhotometry"]["signal_reference_corr"] = session_metadata["signal_reference_corr"] - metadata["FiberPhotometry"]["snr"] = session_metadata["snr"] - metadata["FiberPhotometry"]["area"] = subject_metadata["photometry_area"] - metadata["FiberPhotometry"]["gain"] = float(tdt_metadata["tags"]["OutputGain"]) - metadata["FiberPhotometry"]["signal_amp"] = tdt_metadata["tags"]["LED1Amp"] - metadata["FiberPhotometry"]["reference_amp"] = tdt_metadata["tags"]["LED2Amp"] - metadata["FiberPhotometry"]["signal_freq"] = float(tdt_metadata["tags"]["LED1Freq"]) - metadata["FiberPhotometry"]["reference_freq"] = float(tdt_metadata["tags"]["LED2Freq"]) - metadata["FiberPhotometry"]["raw_rate"] = tdt_metadata["status"]["sampling_rate"] - - return metadata - - def get_metadata_schema(self) -> dict: - metadata_schema = super().get_metadata_schema() - metadata_schema["properties"]["FiberPhotometry"] = { - "type": "object", - "properties": { - "area": {"type": "string"}, - "reference_max": {"type": "number"}, - "signal_max": {"type": "number"}, - "signal_reference_corr": {"type": "number"}, - "snr": {"type": "number"}, - }, - } - return metadata_schema - def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: + super().add_to_nwbfile(nwbfile, metadata) session_df = pd.read_parquet( self.source_data["file_path"], columns=self.source_data["columns"], filters=[("uuid", "==", self.source_data["session_uuid"])], ) - photometry_dict = load_tdt_data(self.source_data["tdt_path"], fs=metadata["FiberPhotometry"]["raw_rate"]) - session_start_index = np.flatnonzero(photometry_dict["sync"])[0] - raw_photometry = photometry_dict["pmt00"][session_start_index:] - commanded_signal = photometry_dict["pmt00_x"][session_start_index:] - commanded_reference = photometry_dict["pmt01_x"][session_start_index:] - - # Commanded Voltage - multi_commanded_voltage = MultiCommandedVoltage() - commanded_signal_series = multi_commanded_voltage.create_commanded_voltage_series( - name="commanded_signal", - description=( - "A 470nm (blue) LED and a 405nM (UV) LED (Mightex) were sinusoidally modulated at 161Hz and 381Hz, " - "respectively (these frequencies were chosen to avoid harmonic cross-talk)." - ), - data=H5DataIO(commanded_signal, compression=True), - frequency=metadata["FiberPhotometry"]["signal_freq"], - power=metadata["FiberPhotometry"]["signal_amp"], # TODO: Fix this in ndx-photometry - rate=metadata["FiberPhotometry"]["raw_rate"], - unit="volts", - ) - commanded_reference_series = multi_commanded_voltage.create_commanded_voltage_series( - name="commanded_reference", - description=( - "A 470nm (blue) LED and a 405nM (UV) LED (Mightex) were sinusoidally modulated at 161Hz and 381Hz, " - "respectively (these frequencies were chosen to avoid harmonic cross-talk)." - ), - data=H5DataIO(commanded_reference, compression=True), - frequency=metadata["FiberPhotometry"]["reference_freq"], - power=metadata["FiberPhotometry"]["reference_amp"], # TODO: Fix this in ndx-photometry - rate=metadata["FiberPhotometry"]["raw_rate"], - unit="volts", - ) - # Excitation Sources Table - excitation_sources_table = ExcitationSourcesTable( - description=( - "A 470nm (blue) LED and a 405nM (UV) LED (Mightex) were sinusoidally modulated at " - "161Hz and 381Hz, respectively (these frequencies were chosen to avoid harmonic cross-talk). " - "Modulated excitation light was passed through a three-colour fluorescence mini-cube " - "(Doric Lenses FMC7_E1(400-410)_F1(420-450)_E2(460-490)_F2(500-540)_E3(550-575)_F3(600-680)_S), " - "then through a pigtailed rotary joint " - "(Doric Lenses B300-0089, FRJ_1x1_PT_200/220/LWMJ-0.37_1.0m_FCM_0.08m_FCM) and finally into a " - "low-autofluorescence fibre-optic patch cord " - "(Doric Lenses MFP_200/230/900-0.37_0.75m_FCM-MF1.25_LAF or MFP_200/230/900-0.57_0.75m_FCM-MF1.25_LAF) " - "connected to the optical implant in the freely moving mouse." - ) - ) - excitation_sources_table.add_row( - peak_wavelength=470.0, - source_type="LED", - commanded_voltage=commanded_signal_series, - ) - excitation_sources_table.add_row( - peak_wavelength=405.0, - source_type="LED", - commanded_voltage=commanded_reference_series, - ) - - # Photodetectors Table - photodetectors_table = PhotodetectorsTable( - description=( - "Emission light was collected through the same patch cord, then passed back through the mini-cube. " - "Light on the F2 port was bandpass filtered for green emission (500–540nm) and sent to a silicon " - "photomultiplier with an integrated transimpedance amplifier (SensL MiniSM-30035-X08). Voltages from " - "the SensL unit were collected through the TDT Active X interface using 24-bit analogue-to-digital " - "convertors at >6kHz, and voltage signals driving the UV and blue LEDs were also stored for " - "offline analysis." - ), - ) - photodetectors_table.add_row(peak_wavelength=527.0, type="PMT", gain=metadata["FiberPhotometry"]["gain"]) - - # Fluorophores Table - fluorophores_table = FluorophoresTable( - description=( - "dLight1.1 was selected to visualize dopamine release dynamics in the DLS owing to its rapid rise and " - "decay times, comparatively lower dopamine affinity (so as to not saturate binding), as well as its " - "responsiveness over much of the physiological range of known DA concentrations in freely moving " - "rodents." - ), - ) - - fluorophores_table.add_row( - label="dlight1.1", - location=metadata["FiberPhotometry"]["area"], - coordinates=(0.260, 2.550, -2.40), # (AP, ML, DV) - ) - - # Fibers Table - fibers_table = FibersTable( - description=( - "Fiber photometry data with 2 excitation sources (470nm and 405nm), 1 PMT photodetector with " - "a peak wavelength of 527nm, and 1 fluorophore (dLight1.1)." - ) - ) - nwbfile.add_lab_meta_data( - FiberPhotometry( - fibers=fibers_table, - excitation_sources=excitation_sources_table, - photodetectors=photodetectors_table, - fluorophores=fluorophores_table, - ) - ) - - # Important: we add the fibers to the fibers table _after_ adding the metadata - # This ensures that we can find this data in their tables of origin - fibers_table.add_fiber( - excitation_source=0, # integers indicated rows of excitation sources table - photodetector=0, - fluorophores=[0], # potentially multiple fluorophores, so list of indices - location=metadata["FiberPhotometry"]["area"], - ) - fibers_table.add_fiber( - excitation_source=1, # integers indicated rows of excitation sources table - photodetector=0, - fluorophores=[0], # potentially multiple fluorophores, so list of indices - location=metadata["FiberPhotometry"]["area"], - ) - - # ROI Response Series - # Here we set up a list of fibers that our recording came from - fibers_ref = DynamicTableRegion(name="rois", data=[0, 1], description="source fibers", table=fibers_table) - raw_photometry = RoiResponseSeries( - name="RawPhotometry", - description="The raw acquisition with mixed signal from both the blue light excitation (470nm) and UV excitation (405nm).", - data=H5DataIO(raw_photometry, compression=True), - unit="F", - starting_time=0.0, - rate=metadata["FiberPhotometry"]["raw_rate"], - rois=fibers_ref, - ) signal_series = RoiResponseSeries( name="SignalDfOverF", description="The ΔF/F from the blue light excitation (470nm) corresponding to the dopamine signal.", data=H5DataIO(session_df.signal_dff.to_numpy(), compression=True), unit="a.u.", timestamps=H5DataIO(session_df.timestamp.to_numpy(), compression=True), - rois=fibers_ref, + rois=self.fibers_ref, ) reference_series = RoiResponseSeries( name="ReferenceDfOverF", @@ -242,7 +63,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: data=H5DataIO(session_df.reference_dff.to_numpy(), compression=True), unit="a.u.", timestamps=signal_series.timestamps, - rois=fibers_ref, + rois=self.fibers_ref, ) reference_fit_series = RoiResponseSeries( name="ReferenceDfOverFSmoothed", @@ -253,7 +74,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: data=H5DataIO(session_df.reference_dff_fit.to_numpy(), compression=True), unit="a.u.", timestamps=signal_series.timestamps, - rois=fibers_ref, + rois=self.fibers_ref, ) uv_reference_fit_series = RoiResponseSeries( name="UVReferenceFSmoothed", @@ -264,38 +85,10 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: data=H5DataIO(session_df.uv_reference_fit.to_numpy(), compression=True), unit="n.a.", timestamps=signal_series.timestamps, - rois=fibers_ref, + rois=self.fibers_ref, ) - - # Aggregate into OPhys Module and NWBFile - nwbfile.add_acquisition(raw_photometry) ophys_module = nwb_helpers.get_module(nwbfile, name="ophys", description="Fiber photometry data") - ophys_module.add(multi_commanded_voltage) ophys_module.add(signal_series) ophys_module.add(reference_series) ophys_module.add(reference_fit_series) ophys_module.add(uv_reference_fit_series) - - -def load_tdt_data(filename, pmt_channels=[0, 3], sync_channel=6, clock_channel=7, nch=8, fs=6103.515625): - float_data = np.fromfile(filename, dtype=">f4") - int_data = np.fromfile(filename, dtype=">i4") - - photometry_dict = {} - - for i, pmt in enumerate(pmt_channels): - photometry_dict["pmt{:02d}".format(i)] = float_data[pmt::nch] - photometry_dict["pmt{:02d}_x".format(i)] = float_data[pmt + 1 :: nch] - photometry_dict["pmt{:02d}_y".format(i)] = float_data[pmt + 2 :: nch] - - photometry_dict["sync"] = int_data[sync_channel::8] - photometry_dict["clock"] = int_data[clock_channel::8] - - if any(np.diff(photometry_dict["clock"]) != 1): - raise IOError("Timebase not uniform in TDT file.") - - clock_df = np.diff(photometry_dict["clock"].astype("float64")) - clock_df = np.insert(clock_df, 0, 0, axis=0) - photometry_dict["tstep"] = np.cumsum(clock_df * 1 / fs) - - return photometry_dict diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/markowitz_gillis_nature_2023_metadata.yaml b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/markowitz_gillis_nature_2023_metadata.yaml index 0e9df74..98a853a 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/markowitz_gillis_nature_2023_metadata.yaml +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/markowitz_gillis_nature_2023_metadata.yaml @@ -30,6 +30,9 @@ Subject: species: Mus musculus age: P6W/P15W description: This study utilized wild-type DAT-IRES-Cre (The Jackson Laboratory 006660) and Ai32 (The Jackson Laboratory 012569) mice, both male and female, between 6-15 weeks of age. +Constants: + VIDEO_SAMPLING_RATE: 30.0 + TIMESTAMPS_TO_SECONDS: 1.25e-4 Behavior: Position: reference_frame: TBD diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py index a358d10..d31ef50 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/moseqextractinterface.py @@ -42,7 +42,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: loglikelihood_video = np.array(file["frames_mask"]) # Timestamps - TIMESTAMPS_TO_SECONDS = 1.25e-4 + TIMESTAMPS_TO_SECONDS = metadata["Constants"]["TIMESTAMPS_TO_SECONDS"] timestamps = np.array(file["timestamps"]) * TIMESTAMPS_TO_SECONDS timestamps -= timestamps[0] diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/preconversion/extract_metadata.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/preconversion/extract_metadata.py index f8444fe..bc4d780 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/preconversion/extract_metadata.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/preconversion/extract_metadata.py @@ -289,6 +289,30 @@ def extract_reinforcement_photometry_metadata( return session_metadata, subject_metadata +def extract_keypoint_metadata(data_path: str): + keypoint_subjects = ["dls-dlight-9", "dls-dlight-10", "dls-dlight-11", "dls-dlight-12", "dls-dlight-13"] + session_metadata, subject_metadata = {}, {} + for subject in keypoint_subjects: + session_metadata[subject] = dict( + keypoint=True, + photometry=True, + session_description="keypoint session", + session_start_time="1901-01-01T00:00:00-05:00", # TODO: replace with real session start time + reference_max=np.NaN, + signal_max=np.NaN, + signal_reference_corr=np.NaN, + snr=np.NaN, + subject_id=subject, + ) + subject_metadata[subject] = dict( + genotype="dls-dlight", + opsin="n/a", + photometry_area="dls", + sex="U", + ) + return session_metadata, subject_metadata + + def resolve_duplicates(photometry_metadata, photometry_ids, reinforcement_metadata, reinforcement_ids): resolved_metadata = {} _resolve_duplicates( @@ -416,6 +440,8 @@ def get_session_name(session_df): reinforcement_photometry_subject_metadata_path = metadata_path / "reinforcement-photometry-subject-metadata.yaml" velocity_session_metadata_path = metadata_path / "velocity-modulation-session-metadata.yaml" velocity_subject_metadata_path = metadata_path / "velocity-modulation-subject-metadata.yaml" + keypoint_session_metadata_path = metadata_path / "keypoint-session-metadata.yaml" + keypoint_subject_metadata_path = metadata_path / "keypoint-subject-metadata.yaml" # Example UUIDs dls_dlight_1_example = "18dc5ad5-13f0-4297-8b21-75d434770e57" @@ -451,6 +477,7 @@ def get_session_name(session_df): velocity_session_metadata, velocity_subject_metadata = extract_velocity_modulation_metadata( data_path, example_uuids=velocity_modulation_examples ) + keypoint_session_metadata, keypoint_subject_metadata = extract_keypoint_metadata(data_path) path2metadata = { photometry_session_metadata_path: photometry_session_metadata, @@ -461,6 +488,8 @@ def get_session_name(session_df): reinforcement_photometry_subject_metadata_path: reinforcement_photometry_subject_metadata, velocity_session_metadata_path: velocity_session_metadata, velocity_subject_metadata_path: velocity_subject_metadata, + keypoint_session_metadata_path: keypoint_session_metadata, + keypoint_subject_metadata_path: keypoint_subject_metadata, } for path, resolved_dict in path2metadata.items(): with open(path, "w") as f: diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py new file mode 100644 index 0000000..a4e5a1e --- /dev/null +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py @@ -0,0 +1,241 @@ +"""Primary class for converting Raw fiber photometry data (dLight fluorescence) from the TDT system.""" +# Standard Scientific Python +import pandas as pd +import numpy as np + +# NWB Ecosystem +from pynwb.file import NWBFile +from pynwb.core import DynamicTableRegion +from pynwb.ophys import RoiResponseSeries +from ndx_photometry import ( + FibersTable, + PhotodetectorsTable, + ExcitationSourcesTable, + MultiCommandedVoltage, + FiberPhotometry, + FluorophoresTable, +) +from .basedattainterface import BaseDattaInterface +from neuroconv.utils import load_dict_from_file +from neuroconv.tools import nwb_helpers +from hdmf.backends.hdf5.h5_utils import H5DataIO + + +class RawFiberPhotometryInterface(BaseDattaInterface): + """Raw Fiber Photometry interface for markowitz_gillis_nature_2023 conversion.""" + + def __init__( + self, + tdt_path: str, + tdt_metadata_path: str, + session_uuid: str, + session_id: str, + session_metadata_path: str, + subject_metadata_path: str, + **kwargs, + ): + super().__init__( + tdt_path=tdt_path, + tdt_metadata_path=tdt_metadata_path, + session_uuid=session_uuid, + session_id=session_id, + session_metadata_path=session_metadata_path, + subject_metadata_path=subject_metadata_path, + **kwargs, + ) + + def get_metadata(self) -> dict: + metadata = super().get_metadata() + session_metadata = load_dict_from_file(self.source_data["session_metadata_path"]) + subject_metadata = load_dict_from_file(self.source_data["subject_metadata_path"]) + tdt_metadata = load_dict_from_file(self.source_data["tdt_metadata_path"]) + session_metadata = session_metadata[self.source_data["session_uuid"]] + subject_metadata = subject_metadata[session_metadata["subject_id"]] + + metadata["FiberPhotometry"]["reference_max"] = session_metadata["reference_max"] + metadata["FiberPhotometry"]["signal_max"] = session_metadata["signal_max"] + metadata["FiberPhotometry"]["signal_reference_corr"] = session_metadata["signal_reference_corr"] + metadata["FiberPhotometry"]["snr"] = session_metadata["snr"] + metadata["FiberPhotometry"]["area"] = subject_metadata["photometry_area"] + metadata["FiberPhotometry"]["gain"] = float(tdt_metadata["tags"]["OutputGain"]) + metadata["FiberPhotometry"]["signal_amp"] = tdt_metadata["tags"]["LED1Amp"] + metadata["FiberPhotometry"]["reference_amp"] = tdt_metadata["tags"]["LED2Amp"] + metadata["FiberPhotometry"]["signal_freq"] = float(tdt_metadata["tags"]["LED1Freq"]) + metadata["FiberPhotometry"]["reference_freq"] = float(tdt_metadata["tags"]["LED2Freq"]) + metadata["FiberPhotometry"]["raw_rate"] = tdt_metadata["status"]["sampling_rate"] + + return metadata + + def get_metadata_schema(self) -> dict: + metadata_schema = super().get_metadata_schema() + metadata_schema["properties"]["FiberPhotometry"] = { + "type": "object", + "properties": { + "area": {"type": "string"}, + "reference_max": {"type": "number"}, + "signal_max": {"type": "number"}, + "signal_reference_corr": {"type": "number"}, + "snr": {"type": "number"}, + }, + } + return metadata_schema + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: + photometry_dict = load_tdt_data(self.source_data["tdt_path"], fs=metadata["FiberPhotometry"]["raw_rate"]) + session_start_index = np.flatnonzero(photometry_dict["sync"])[0] + raw_photometry = photometry_dict["pmt00"][session_start_index:] + commanded_signal = photometry_dict["pmt00_x"][session_start_index:] + commanded_reference = photometry_dict["pmt01_x"][session_start_index:] + + # Commanded Voltage + multi_commanded_voltage = MultiCommandedVoltage() + commanded_signal_series = multi_commanded_voltage.create_commanded_voltage_series( + name="commanded_signal", + description=( + "A 470nm (blue) LED and a 405nM (UV) LED (Mightex) were sinusoidally modulated at 161Hz and 381Hz, " + "respectively (these frequencies were chosen to avoid harmonic cross-talk)." + ), + data=H5DataIO(commanded_signal, compression=True), + frequency=metadata["FiberPhotometry"]["signal_freq"], + power=metadata["FiberPhotometry"]["signal_amp"], # TODO: Fix this in ndx-photometry + rate=metadata["FiberPhotometry"]["raw_rate"], + unit="volts", + ) + commanded_reference_series = multi_commanded_voltage.create_commanded_voltage_series( + name="commanded_reference", + description=( + "A 470nm (blue) LED and a 405nM (UV) LED (Mightex) were sinusoidally modulated at 161Hz and 381Hz, " + "respectively (these frequencies were chosen to avoid harmonic cross-talk)." + ), + data=H5DataIO(commanded_reference, compression=True), + frequency=metadata["FiberPhotometry"]["reference_freq"], + power=metadata["FiberPhotometry"]["reference_amp"], # TODO: Fix this in ndx-photometry + rate=metadata["FiberPhotometry"]["raw_rate"], + unit="volts", + ) + # Excitation Sources Table + excitation_sources_table = ExcitationSourcesTable( + description=( + "A 470nm (blue) LED and a 405nM (UV) LED (Mightex) were sinusoidally modulated at " + "161Hz and 381Hz, respectively (these frequencies were chosen to avoid harmonic cross-talk). " + "Modulated excitation light was passed through a three-colour fluorescence mini-cube " + "(Doric Lenses FMC7_E1(400-410)_F1(420-450)_E2(460-490)_F2(500-540)_E3(550-575)_F3(600-680)_S), " + "then through a pigtailed rotary joint " + "(Doric Lenses B300-0089, FRJ_1x1_PT_200/220/LWMJ-0.37_1.0m_FCM_0.08m_FCM) and finally into a " + "low-autofluorescence fibre-optic patch cord " + "(Doric Lenses MFP_200/230/900-0.37_0.75m_FCM-MF1.25_LAF or MFP_200/230/900-0.57_0.75m_FCM-MF1.25_LAF) " + "connected to the optical implant in the freely moving mouse." + ) + ) + excitation_sources_table.add_row( + peak_wavelength=470.0, + source_type="LED", + commanded_voltage=commanded_signal_series, + ) + excitation_sources_table.add_row( + peak_wavelength=405.0, + source_type="LED", + commanded_voltage=commanded_reference_series, + ) + + # Photodetectors Table + photodetectors_table = PhotodetectorsTable( + description=( + "Emission light was collected through the same patch cord, then passed back through the mini-cube. " + "Light on the F2 port was bandpass filtered for green emission (500–540nm) and sent to a silicon " + "photomultiplier with an integrated transimpedance amplifier (SensL MiniSM-30035-X08). Voltages from " + "the SensL unit were collected through the TDT Active X interface using 24-bit analogue-to-digital " + "convertors at >6kHz, and voltage signals driving the UV and blue LEDs were also stored for " + "offline analysis." + ), + ) + photodetectors_table.add_row(peak_wavelength=527.0, type="PMT", gain=metadata["FiberPhotometry"]["gain"]) + + # Fluorophores Table + fluorophores_table = FluorophoresTable( + description=( + "dLight1.1 was selected to visualize dopamine release dynamics in the DLS owing to its rapid rise and " + "decay times, comparatively lower dopamine affinity (so as to not saturate binding), as well as its " + "responsiveness over much of the physiological range of known DA concentrations in freely moving " + "rodents." + ), + ) + + fluorophores_table.add_row( + label="dlight1.1", + location=metadata["FiberPhotometry"]["area"], + coordinates=(0.260, 2.550, -2.40), # (AP, ML, DV) + ) + + # Fibers Table + fibers_table = FibersTable( + description=( + "Fiber photometry data with 2 excitation sources (470nm and 405nm), 1 PMT photodetector with " + "a peak wavelength of 527nm, and 1 fluorophore (dLight1.1)." + ) + ) + nwbfile.add_lab_meta_data( + FiberPhotometry( + fibers=fibers_table, + excitation_sources=excitation_sources_table, + photodetectors=photodetectors_table, + fluorophores=fluorophores_table, + ) + ) + + # Important: we add the fibers to the fibers table _after_ adding the metadata + # This ensures that we can find this data in their tables of origin + fibers_table.add_fiber( + excitation_source=0, # integers indicated rows of excitation sources table + photodetector=0, + fluorophores=[0], # potentially multiple fluorophores, so list of indices + location=metadata["FiberPhotometry"]["area"], + ) + fibers_table.add_fiber( + excitation_source=1, # integers indicated rows of excitation sources table + photodetector=0, + fluorophores=[0], # potentially multiple fluorophores, so list of indices + location=metadata["FiberPhotometry"]["area"], + ) + + # ROI Response Series + # Here we set up a list of fibers that our recording came from + self.fibers_ref = DynamicTableRegion(name="rois", data=[0, 1], description="source fibers", table=fibers_table) + raw_photometry = RoiResponseSeries( + name="RawPhotometry", + description="The raw acquisition with mixed signal from both the blue light excitation (470nm) and UV excitation (405nm).", + data=H5DataIO(raw_photometry, compression=True), + unit="F", + starting_time=0.0, + rate=metadata["FiberPhotometry"]["raw_rate"], + rois=self.fibers_ref, + ) + + # Aggregate into OPhys Module and NWBFile + nwbfile.add_acquisition(raw_photometry) + ophys_module = nwb_helpers.get_module(nwbfile, name="ophys", description="Fiber photometry data") + ophys_module.add(multi_commanded_voltage) + + +def load_tdt_data(filename, pmt_channels=[0, 3], sync_channel=6, clock_channel=7, nch=8, fs=6103.515625): + float_data = np.fromfile(filename, dtype=">f4") + int_data = np.fromfile(filename, dtype=">i4") + + photometry_dict = {} + + for i, pmt in enumerate(pmt_channels): + photometry_dict["pmt{:02d}".format(i)] = float_data[pmt::nch] + photometry_dict["pmt{:02d}_x".format(i)] = float_data[pmt + 1 :: nch] + photometry_dict["pmt{:02d}_y".format(i)] = float_data[pmt + 2 :: nch] + + photometry_dict["sync"] = int_data[sync_channel::8] + photometry_dict["clock"] = int_data[clock_channel::8] + + if any(np.diff(photometry_dict["clock"]) != 1): + raise IOError("Timebase not uniform in TDT file.") + + clock_df = np.diff(photometry_dict["clock"].astype("float64")) + clock_df = np.insert(clock_df, 0, 0, axis=0) + photometry_dict["tstep"] = np.cumsum(clock_df * 1 / fs) + + return photometry_dict diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/__init__.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/__init__.py new file mode 100644 index 0000000..6892949 --- /dev/null +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/__init__.py @@ -0,0 +1 @@ +from .nwbconverter import NWBConverter diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/convert_session.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/convert_session.py new file mode 100644 index 0000000..bd11a22 --- /dev/null +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/convert_session.py @@ -0,0 +1,114 @@ +"""Primary script to run to convert an entire session for of data using the NWBConverter.""" +# Standard Library +from pathlib import Path +import shutil +from typing import Union, Literal + +# Third Party +from neuroconv.utils import dict_deep_update, load_dict_from_file +from pynwb import NWBHDF5IO + +# Local +from datta_lab_to_nwb import markowitz_gillis_nature_2023_keypoint + + +def session_to_nwb( + subject_id: str, + raw_path: Union[str, Path], + processed_path: Union[str, Path], + summary_image_path: Union[str, Path], + metadata_path: Union[str, Path], + output_dir_path: Union[str, Path], + stub_test: bool = False, +): + output_dir_path = Path(output_dir_path) + if stub_test: + output_dir_path = output_dir_path / "nwb_stub" + output_dir_path.mkdir(parents=True, exist_ok=True) + session_id = f"keypoint-{subject_id}" + nwbfile_path = output_dir_path / f"{session_id}.nwb" + session_metadata_path = metadata_path / f"keypoint-session-metadata.yaml" + subject_metadata_path = metadata_path / f"keypoint-subject-metadata.yaml" + session_metadata = load_dict_from_file(session_metadata_path) + session_metadata = session_metadata[subject_id] + raw_path = Path(raw_path) + keypoint_path = raw_path / "keypoints.p" + + source_data, conversion_options = {}, {} + # Photometry + tdt_path = list(raw_path.glob("tdt_data*.dat"))[0] + tdt_metadata_path = list(raw_path.glob("tdt_data*.json"))[0] + source_data["FiberPhotometry"] = dict( + file_path=str(processed_path), + tdt_path=str(tdt_path), + tdt_metadata_path=str(tdt_metadata_path), + session_metadata_path=str(session_metadata_path), + subject_metadata_path=str(subject_metadata_path), + session_uuid=subject_id, + session_id=session_id, + ) + conversion_options["FiberPhotometry"] = {} + + # IR Video + source_data["IRVideo"] = dict( + data_path=str(raw_path), + session_metadata_path=str(session_metadata_path), + subject_metadata_path=str(subject_metadata_path), + session_uuid=subject_id, + session_id=session_id, + ) + conversion_options["IRVideo"] = {} + + # Keypoints + source_data["Keypoint"] = dict( + file_path=str(keypoint_path), + summary_image_path=str(summary_image_path), + session_metadata_path=str(session_metadata_path), + subject_metadata_path=str(subject_metadata_path), + session_uuid=subject_id, + session_id=session_id, + ) + + converter = markowitz_gillis_nature_2023_keypoint.NWBConverter(source_data=source_data) + metadata = converter.get_metadata() + + # Update metadata + paper_metadata_path = Path(__file__).parent / "markowitz_gillis_nature_2023_keypoint_metadata.yaml" + paper_metadata = load_dict_from_file(paper_metadata_path) + metadata = dict_deep_update(metadata, paper_metadata) + + # Run conversion + converter.run_conversion(metadata=metadata, nwbfile_path=nwbfile_path, conversion_options=conversion_options) + + +if __name__ == "__main__": + # Parameters for conversion + base_raw_path = Path("/Volumes/T7/CatalystNeuro/NWB/Datta/xtra_raw") + output_dir_path = Path("/Volumes/T7/CatalystNeuro/NWB/Datta/conversion_nwb/") + base_processed_path = Path( + "/Volumes/T7/CatalystNeuro/NWB/Datta/dopamine-reinforces-spontaneous-behavior/keypoints_raw_data/photometry-dls-dlight-keypoints" + ) + summary_image_path = Path( + "/Volumes/T7/CatalystNeuro/NWB/Datta/dopamine-reinforces-spontaneous-behavior/keypoints_raw_data/video-image.p" + ) + metadata_path = Path("/Volumes/T7/CatalystNeuro/NWB/Datta/dopamine-reinforces-spontaneous-behavior/metadata") + if output_dir_path.exists(): + shutil.rmtree( + output_dir_path, ignore_errors=True + ) # ignore errors due to MacOS race condition (https://github.com/python/cpython/issues/81441) + stub_test = False + + # Example subjects + example_subjects = ["dls-dlight-9", "dls-dlight-10", "dls-dlight-11", "dls-dlight-12", "dls-dlight-13"] + for subject_id in example_subjects: + raw_path = base_raw_path / ("photometry-" + subject_id) + processed_path = base_processed_path / ("photometry-" + subject_id + ".p") + session_to_nwb( + subject_id=subject_id, + raw_path=raw_path, + processed_path=processed_path, + summary_image_path=summary_image_path, + metadata_path=metadata_path, + output_dir_path=output_dir_path, + stub_test=stub_test, + ) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/fiberphotometryinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/fiberphotometryinterface.py new file mode 100644 index 0000000..1e0878c --- /dev/null +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/fiberphotometryinterface.py @@ -0,0 +1,64 @@ +"""Primary class for converting fiber photometry data (dLight fluorescence).""" +# Standard Scientific Python +import numpy as np +import joblib + +# NWB Ecosystem +from pynwb.file import NWBFile +from pynwb.ophys import RoiResponseSeries +from ..markowitz_gillis_nature_2023.rawfiberphotometryinterface import RawFiberPhotometryInterface +from neuroconv.tools import nwb_helpers +from hdmf.backends.hdf5.h5_utils import H5DataIO + + +class FiberPhotometryInterface(RawFiberPhotometryInterface): + def __init__( + self, + file_path: str, + tdt_path: str, + tdt_metadata_path: str, + session_uuid: str, + session_id: str, + session_metadata_path: str, + subject_metadata_path: str, + ): + super().__init__( + file_path=file_path, + tdt_path=tdt_path, + tdt_metadata_path=tdt_metadata_path, + session_uuid=session_uuid, + session_id=session_id, + session_metadata_path=session_metadata_path, + subject_metadata_path=subject_metadata_path, + ) + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: + SAMPLING_RATE = 30 + super().add_to_nwbfile(nwbfile, metadata) + processed_photometry = joblib.load(self.source_data["file_path"]) + timestamps = np.arange(processed_photometry["dlight"].shape[0]) / SAMPLING_RATE + signal_series = RoiResponseSeries( + name="SignalF", + description=( + "Demodulated raw fluorescence (F) from the blue light excitation (470nm) " + "(See Methods: Photometry Active Referencing)." + ), + data=H5DataIO(processed_photometry["dlight"], compression=True), + unit="n.a.", + timestamps=H5DataIO(timestamps, compression=True), + rois=self.fibers_ref, + ) + reference_series = RoiResponseSeries( + name="UVReferenceF", + description=( + "Demodulated raw fluorescence (F) from the isosbestic UV excitation (405nm) " + "(See Methods: Photometry Active Referencing)." + ), + data=H5DataIO(processed_photometry["uv"], compression=True), + unit="n.a.", + timestamps=signal_series.timestamps, + rois=self.fibers_ref, + ) + ophys_module = nwb_helpers.get_module(nwbfile, name="ophys", description="Fiber photometry data") + ophys_module.add(signal_series) + ophys_module.add(reference_series) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/irvideointerface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/irvideointerface.py new file mode 100644 index 0000000..c0fb761 --- /dev/null +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/irvideointerface.py @@ -0,0 +1,78 @@ +"""Base class for converting raw video data.""" +from pynwb import NWBFile +import numpy as np +from pathlib import Path +from neuroconv.datainterfaces import VideoInterface +from ..markowitz_gillis_nature_2023.basedattainterface import BaseDattaInterface + + +class IRVideoInterface(BaseDattaInterface): + """IR video interface for markowitz_gillis_nature_2023 conversion of keypoint data.""" + + def __init__( + self, + data_path: str, + session_uuid: str, + session_id: str, + session_metadata_path: str, + subject_metadata_path: str, + ): + super().__init__( + data_path=Path(data_path), + session_uuid=session_uuid, + session_id=session_id, + session_metadata_path=session_metadata_path, + subject_metadata_path=subject_metadata_path, + ) + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: + SAMPLING_RATE = metadata["Constants"]["VIDEO_SAMPLING_RATE"] + matched_timestamp_path = ( + self.source_data["data_path"] / f"photometry-{self.source_data['session_uuid']}.matched_timestamps.npy" + ) + matched_timestamps = np.load(matched_timestamp_path) + assert np.all( + np.diff(matched_timestamps, axis=0) == 1 + ), "Matched timestamp indices are not monotonically increasing" + timestamp_offsets = matched_timestamps[0, :] + camera_names = ["bottom", "side1", "side2", "side3", "side4", "top"] + camera_paths, camera_timestamps = [], [] + for i, camera in enumerate(camera_names): + timestamp_path = ( + self.source_data["data_path"] + / f"photometry-{self.source_data['session_uuid']}.{camera}.system_timestamps.npy" + ) + timestamps = np.load(timestamp_path) + timestamps = timestamps + (np.max(timestamp_offsets) - timestamp_offsets[i]) * SAMPLING_RATE + camera_path = ( + self.source_data["data_path"] / f"photometry-{self.source_data['session_uuid']}.{camera}.ir.avi" + ) + camera_paths.append(camera_path) + camera_timestamps.append(timestamps) + + video_interface = VideoInterface(file_paths=camera_paths, verbose=True) + video_interface.set_aligned_timestamps(aligned_timestamps=camera_timestamps) + video_interface.add_to_nwbfile( + nwbfile=nwbfile, + metadata=metadata, + stub_test=False, + external_mode=True, + starting_frames=timestamp_offsets, + ) + video_metadata = dict( + Behavior=dict( + Videos=[ + dict( + name="ir_video", + description=( + "To capture 3D keypoints, mice were recorded in a multi-camera open field arena with " + "transparent floor and walls. Near-infrared video recordings at 30 Hz were obtained from " + "six cameras (Microsoft Azure Kinect; cameras were placed above, below and at four " + "cardinal directions)." + ), + unit="n.a.", + ) + ] + ) + ) + metadata.update(video_metadata) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/keypointinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/keypointinterface.py new file mode 100644 index 0000000..dfececb --- /dev/null +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/keypointinterface.py @@ -0,0 +1,103 @@ +"""Primary class for converting experiment-specific behavior.""" +import numpy as np +from pynwb import NWBFile +from pynwb.image import RGBImage, Images +import joblib +from ..markowitz_gillis_nature_2023.basedattainterface import BaseDattaInterface +from neuroconv.tools import nwb_helpers +from hdmf.backends.hdf5.h5_utils import H5DataIO +from ndx_pose import PoseEstimationSeries, PoseEstimation + + +class KeypointInterface(BaseDattaInterface): + """Keypoint Interface for markowitz_gillis_nature_2023 conversion""" + + def __init__( + self, + file_path: str, + session_uuid: str, + session_id: str, + session_metadata_path: str, + subject_metadata_path: str, + summary_image_path: str, + ): + super().__init__( + file_path=file_path, + summary_image_path=summary_image_path, + session_uuid=session_uuid, + session_id=session_id, + session_metadata_path=session_metadata_path, + subject_metadata_path=subject_metadata_path, + ) + + def get_metadata_schema(self) -> dict: + metadata_schema = super().get_metadata_schema() + metadata_schema["properties"]["Keypoint"] = { + "type": "object", + "properties": { + "index_to_name": {"type": "object"}, + }, + } + return metadata_schema + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: + SAMPLING_RATE = metadata["Constants"]["VIDEO_SAMPLING_RATE"] + keypoint_dict = joblib.load(self.source_data["file_path"]) + raw_keypoints = keypoint_dict["positions_median"] + timestamps = H5DataIO(np.arange(raw_keypoints.shape[0]) / SAMPLING_RATE, compression=True) + + index_to_name = metadata["Keypoint"]["index_to_name"] + camera_names = ["bottom", "side1", "side2", "side3", "side4", "top"] + keypoints = [] + for camera in camera_names: + nwbfile.create_device( + name=f"{camera}_camera", description=f"{camera} IR camera", manufacturer="Microsoft Azure Kinect" + ) + for keypoint_index, keypoint_name in index_to_name.items(): + keypoint = PoseEstimationSeries( + name=keypoint_name, + description=f"Keypoint corresponding to {keypoint_name}", + data=H5DataIO(raw_keypoints[:, keypoint_index, :], compression=True), + timestamps=timestamps, + unit="mm", + reference_frame=metadata["Keypoint"]["reference_frame"], + ) + keypoints.append(keypoint) + keypoints = PoseEstimation( + pose_estimation_series=keypoints, + name="keypoints", + description=( + "To capture 3D keypoints, mice were recorded in a multi-camera open field arena with " + "transparent floor and walls. Near-infrared video recordings at 30 Hz were obtained from six " + "cameras (Microsoft Azure Kinect; cameras were placed above, below and at four cardinal " + "directions). Separate deep neural networks with an HRNet architecture were trained to detect " + "keypoints in each view (top, bottom and side) using ~1,000 hand-labelled frames70. " + "Frame-labelling was crowdsourced through a commercial service (Scale AI), and included the " + "tail tip, tail base, three points along the spine, the ankle and toe of each hind limb, the " + "forepaws, ears, nose and implant. After detection of 2D keypoints from each camera, " + "3D keypoint coordinates were triangulated and then refined using GIMBAL—a model-based " + "approach that leverages anatomical constraints and motion continuity71. GIMBAL requires " + "learning an anatomical model and then applying the model to multi-camera behaviour " + "recordings. For model fitting, we followed the approach described in ref. 71, using 50 pose " + "states and excluding outlier poses using the EllipticEnvelope method from sklearn. " + "For applying GIMBAL to behaviour recordings, we again followed71, setting the parameters " + "obs_outlier_variance, obs_inlier_variance, and pos_dt_variance to 1e6, 10 and 10, " + "respectively for all keypoints." + ), + nodes=list(index_to_name.values()), + edges=metadata["Keypoint"]["edges"], + ) + behavior_module = nwb_helpers.get_module( + nwbfile, + name="behavior", + description="3D Keypoints", + ) + behavior_module.add(keypoints) + summary_image = joblib.load(self.source_data["summary_image_path"]) + summary_image = RGBImage( + name="summary_image", + description="Summary image of 3D keypoints", + data=summary_image, + ) + summary_images = Images(name="summary_images", images=[summary_image]) + behavior_module.add(summary_images) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/markowitz_gillis_nature_2023_keypoint_metadata.yaml b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/markowitz_gillis_nature_2023_keypoint_metadata.yaml new file mode 100644 index 0000000..4ac9d68 --- /dev/null +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/markowitz_gillis_nature_2023_keypoint_metadata.yaml @@ -0,0 +1,68 @@ +NWBFile: + related_publications: + - https://doi.org/10.1038/s41586-022-05611-2 + experiment_description: Abstract -- Spontaneous animal behaviour is built from action modules that are concatenated by the brain into sequences1,2. However, the neural mechanisms that guide the composition of naturalistic, self-motivated behaviour remain unknown. Here we show that dopamine systematically fluctuates in the dorsolateral striatum (DLS) as mice spontaneously express sub-second behavioural modules, despite the absence of task structure, sensory cues or exogenous reward. Photometric recordings and calibrated closed-loop optogenetic manipulations during open field behaviour demonstrate that DLS dopamine fluctuations increase sequence variation over seconds, reinforce the use of associated behavioural modules over minutes, and modulate the vigour with which modules are expressed, without directly influencing movement initiation or moment-to-moment kinematics. Although the reinforcing effects of optogenetic DLS dopamine manipulations vary across behavioural modules and individual mice, these differences are well predicted by observed variation in the relationships between endogenous dopamine and module use. Consistent with the possibility that DLS dopamine fluctuations act as a teaching signal, mice build sequences during exploration as if to maximize dopamine. Together, these findings suggest a model in which the same circuits and computations that govern action choices in structured tasks have a key role in sculpting the content of unconstrained, high-dimensional, spontaneous behaviour. + keywords: + - Basal Ganglia + - Neural circuits + - Reward + institution: Harvard Medical School + lab: Datta + experimenter: + - Markowitz, Jeffrey E. + - Gillis, Winthrop F. + - Jay, Maya + - Wood, Jeffrey + - Harris, Ryley W. + - Cieszkowski, Robert + - Scott, Rebecca + - Brann, David + - Koveal, Dorothy + - Kula, Tomasz + - Weinreb, Caleb + - Osman, Mohammed Abdal Monium + - Pinto, Sandra Romero + - Uchida, Naoshige + - Linderman, Scott W. + - Sabatini, Bernardo L. + - Datta, Sandeep Robert +Subject: + species: Mus musculus + age: P6W/P15W + description: This study utilized wild-type DAT-IRES-Cre (The Jackson Laboratory 006660) and Ai32 (The Jackson Laboratory 012569) mice, both male and female, between 6-15 weeks of age. +Constants: + VIDEO_SAMPLING_RATE: 30.0 + TIMESTAMPS_TO_SECONDS: 1.25e-4 +Keypoint: + reference_frame: TBD + index_to_name: # Mapping from keypoint index --> keypoint name (unique) [See fig_s03 notebook] + 0: 'rostral_spine' + 1: 'medial_spine' + 2: 'caudal_spine' + 3: 'tail_base' + 4: 'tail_tip' + 5: 'head_center' + 6: 'left_ear' + 7: 'right_ear' + 8: 'nose' + 9: 'left_hind_ankle' + 10: 'left_hindpaw' + 11: 'right_hind_ankle' + 12: 'right_hindpaw' + 13: 'left_forepaw' + 14: 'right_forepaw' + edges: + - [4, 3] + - [3, 2] + - [2, 1] + - [1, 0] + - [0, 5] + - [5, 6] + - [5, 7] + - [5, 8] + - [2, 9] + - [9, 10] + - [2, 11] + - [11, 12] + - [0, 13] + - [0, 14] diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/nwbconverter.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/nwbconverter.py new file mode 100644 index 0000000..e71a71a --- /dev/null +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/nwbconverter.py @@ -0,0 +1,15 @@ +"""Primary NWBConverter class for this dataset.""" +from neuroconv import NWBConverter +from .fiberphotometryinterface import FiberPhotometryInterface +from .irvideointerface import IRVideoInterface +from .keypointinterface import KeypointInterface + + +class NWBConverter(NWBConverter): + """Primary conversion class.""" + + data_interface_classes = dict( + FiberPhotometry=FiberPhotometryInterface, + IRVideo=IRVideoInterface, + Keypoint=KeypointInterface, + ) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/post_conversion/reproduce_figS3.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/post_conversion/reproduce_figS3.py new file mode 100644 index 0000000..16b964e --- /dev/null +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/post_conversion/reproduce_figS3.py @@ -0,0 +1,428 @@ +import os +import toml +import numpy as np +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + +from scipy import signal +from toolz import partial +from tqdm.auto import tqdm + +from rl_analysis.util import zscore +from sklearn.linear_model import LinearRegression +from rl_analysis.photometry.signal import rolling_fluor_normalization, rereference + +from pynwb import NWBHDF5IO +from pathlib import Path +import yaml + + +def _partial(df, x, y, window=range(-30, 30)): + # new approach - remove transfer function here + X = [df[x].shift(i).rename(index=str(i)) for i in window] + X = pd.DataFrame(X).T + + X[y] = df[y] + X = X.dropna() + + mdl = LinearRegression() + mdl.fit(X.drop(columns=[y]), X[y]) + p = mdl.predict(X.drop(columns=[y])) + df.loc[X.index, f"{y} partial"] = X[y] - p + df[f"{y} partial (z)"] = zscore(df[f"{y} partial"]) + return df + + +# rereference the dLight signal to remove motion artifact +def filter_signal(v): + sos = signal.butter(2, 3, output="sos", fs=30.0) + f = signal.sosfiltfilt(sos, v) + return f + + +def get_z(x, y, bw_adjust=1, subsample_factor=100): + from scipy.stats import gaussian_kde + + xy = np.vstack([x[::subsample_factor], y[::subsample_factor]]) + kernel = gaussian_kde(xy) + kernel.set_bandwidth(kernel.scotts_factor() * bw_adjust) + z = kernel(np.vstack([x, y])) + return z + + +def partial_out(df, xs, y): + assert isinstance(xs, list) + X = df[xs] + y = df[y] + mdl = LinearRegression() + mdl.fit(X, y) + p = mdl.predict(X) + return zscore(y - p) + + +def compute_keypoint_velocity(kp, n=2): + return (kp[n:] - kp[:-n]) / n + + +def compute_global_velocity(kp, n=2): + # nframes X nkeypoints X 3 + kp = np.nanmean(kp[:, :3], axis=1) + v = (kp[n:] - kp[:-n]) / n + # only consider the first 3 points as they are the spine + return np.linalg.norm(v, axis=1) + + +def bin_data(data, timescale, n_offsets=0, neural_agg="mean", data_agg="mean", window=range(-15, 15)): + step = timescale / (n_offsets + 1) + offsets = np.arange(0, timescale, step) + + time = np.arange(0, 1500, timescale) # step through 0-1500 seconds at this timescale + + aggs = [] + for offset in offsets: + cuts = pd.cut(data["timestamp"], time + offset, labels=False).astype("Int16").rename("time bin") + data["time bin"] = cuts + agg = data.groupby(["mouse_id", "time bin"]).agg(neural_agg) + agg["offset"] = offset + aggs.append(agg) + agg_matrix = pd.concat(aggs).reset_index() + agg_matrix["timescale"] = timescale + agg_matrix = agg_matrix.sort_values(by=["mouse_id", "timestamp"]) + + # partial out velocity + for dim in ("2d", "3d"): + agg_matrix = agg_matrix.groupby("mouse_id", group_keys=False).apply( + _partial, x=f"body speed {dim} (z)", y=f"forelimb speed {dim} (z)", window=window + ) + # partial out 2d velocity from 3d forelimb speed + agg_matrix = agg_matrix.groupby("mouse_id", group_keys=False).apply( + _partial, x="body speed 2d (z)", y="forelimb speed 3d (z)", window=window + ) + # partial velocity from dlight + agg_matrix = agg_matrix.groupby("mouse_id", group_keys=False).apply( + _partial, x="body speed 2d (z)", y="dlight_reref_dff (z)", window=window + ) + + if isinstance(neural_agg, str): + agg_matrix["neural_agg"] = neural_agg + elif hasattr(neural_agg, "__name__"): + agg_matrix["neural_agg"] = neural_agg.__name__ + + return agg_matrix + + +def reproduce_figS3(nwbfile_paths, config_path, metadata): + print("reproducing figure S3") + with open(config_path, "r") as f: + config = toml.load(f) + dlight_config = config["dlight_basic_analysis"] + index_to_name = metadata["Keypoint"]["index_to_name"] + data = {} + for nwbfile_path in nwbfile_paths: + with NWBHDF5IO(nwbfile_path, "r", load_namespaces=True) as io: + nwbfile = io.read() + timestamps = ( + nwbfile.processing["behavior"] + .data_interfaces["keypoints"] + .pose_estimation_series["rostral spine"] + .timestamps[:] + ) + positions_median = np.zeros((len(timestamps), 15, 3)) + for i, name in index_to_name.items(): + positions_median[:, i, :] = ( + nwbfile.processing["behavior"].data_interfaces["keypoints"].pose_estimation_series[name].data[:] + ) + data[os.path.basename(nwbfile_path)] = { + "dlight": nwbfile.processing["ophys"].data_interfaces["SignalF"].data[:], + "uv": nwbfile.processing["ophys"].data_interfaces["UVReferenceF"].data[:], + "positions_median": positions_median, + } + summary_image = ( + nwbfile.processing["behavior"].data_interfaces["summary_images"].images["summary_image"].data[:] + ) + + plot_3a(summary_image=summary_image) + + window = range(-10, 10) + dfs = {} + + for k, v in data.items(): + speed = compute_global_velocity(v["positions_median"]) + kp_vels = compute_keypoint_velocity(v["positions_median"]) + kp_speed = np.linalg.norm(kp_vels, axis=2) + + speed_2d = compute_global_velocity(v["positions_median"][..., :2]) + kp_vels_2d = compute_keypoint_velocity(v["positions_median"][..., :2]) + kp_speed_2d = np.linalg.norm(kp_vels_2d, axis=2) + + dlight_df = pd.DataFrame( + { + "dLight": rolling_fluor_normalization(v["dlight"][2:], normalizer="zscore"), + "body speed 3d": speed, + "right forepaw 3d": kp_speed[:, 14], + "left forepaw 3d": kp_speed[:, 13], + "right hindpaw 3d": kp_speed[:, 12], + "left hindpaw 3d": kp_speed[:, 10], + "forelimb speed 3d": np.nanmean(kp_speed[:, [13, 14]], axis=1), + "hindlimb speed 3d": np.nanmean(kp_speed[:, [10, 12]], axis=1), + "body speed 2d": speed_2d, + "right forepaw 2d": kp_speed_2d[:, 14], + "left forepaw 2d": kp_speed_2d[:, 13], + "right hindpaw 2d": kp_speed_2d[:, 12], + "left hindpaw 2d": kp_speed_2d[:, 10], + "forelimb speed 2d": np.nanmean(kp_speed_2d[:, [13, 14]], axis=1), + "hindlimb speed 2d": np.nanmean(kp_speed_2d[:, [10, 12]], axis=1), + "mouse": k, + } + ) + y = [] + + for dimension in ("2d", "3d"): + X = [dlight_df[f"body speed {dimension}"].shift(i) for i in window] + X = pd.DataFrame(np.array(X).T, columns=map(str, window), index=dlight_df.index) + + X[f"forelimb speed {dimension}"] = dlight_df[f"forelimb speed {dimension}"] + X = X.dropna() + + mdl = LinearRegression() + + mdl.fit( + X.drop(columns=[f"forelimb speed {dimension}"]), + X[[f"forelimb speed {dimension}"]], + ) + + X[f"forelimb partial {dimension}"] = ( + X[f"forelimb speed {dimension}"] + - mdl.predict(X.drop(columns=[f"forelimb speed {dimension}"])).squeeze() + ) + X[f"forelimb partial {dimension} (z)"] = zscore(X[f"forelimb partial {dimension}"]) + X[f"forelimb speed {dimension} (z)"] = zscore(X[f"forelimb speed {dimension}"]) + if dimension == "2d": + X["dLight"] = dlight_df["dLight"] + X["dLight (z)"] = zscore(X["dLight"]) + X["timestamp"] = np.arange(len(X)) / 30 + X["mouse_id"] = k + X[f"speed {dimension} (z)"] = zscore(X["0"]) + + y.append(X.drop(columns=map(str, window))) + dfs[k] = pd.concat(y, axis=1) + + df = pd.concat(dfs.values()) + + secs = 30 * 10 + threshold = 1.96 + window = range(-secs, secs) + dff_func = partial(rolling_fluor_normalization, window_size=5, quantile=0.1, normalizer="dff") + dfs = {} + + for k, v in tqdm(data.items()): + speed = compute_global_velocity(v["positions_median"]) + kp_vels = compute_keypoint_velocity(v["positions_median"]) + kp_speed = np.linalg.norm(kp_vels, axis=2) + + speed_2d = compute_global_velocity(v["positions_median"][..., :2]) + kp_vels_2d = compute_keypoint_velocity(v["positions_median"][..., :2]) + kp_speed_2d = np.linalg.norm(kp_vels_2d, axis=2) + + signal_dff = dff_func(v["dlight"][2:]) + reference_dff = dff_func(v["uv"][2:]) + + dlight_df = pd.DataFrame( + { + "dlight_dff": signal_dff, + "reference_dff": reference_dff, + "body speed 3d": speed, + "right forepaw 3d": kp_speed[:, 14], + "left forepaw 3d": kp_speed[:, 13], + "right hindpaw 3d": kp_speed[:, 12], + "left hindpaw 3d": kp_speed[:, 10], + "forelimb speed 3d": np.nanmean(kp_speed[:, [13, 14]], axis=1), + "hindlimb speed 3d": np.nanmean(kp_speed[:, [10, 12]], axis=1), + "body speed 2d": speed_2d, + "right forepaw 2d": kp_speed_2d[:, 14], + "left forepaw 2d": kp_speed_2d[:, 13], + "right hindpaw 2d": kp_speed_2d[:, 12], + "left hindpaw 2d": kp_speed_2d[:, 10], + "forelimb speed 2d": np.nanmean(kp_speed_2d[:, [13, 14]], axis=1), + "hindlimb speed 2d": np.nanmean(kp_speed_2d[:, [10, 12]], axis=1), + "height": v["positions_median"][2:, 5, 2], + "mouse_id": k, + "timestamp": np.arange(len(speed)) / 30, + } + ) + nans = dlight_df["dlight_dff"].isna() + x = dlight_df.loc[~nans] + dlight_df["dlight_reref_dff"] = rereference( + pd.Series(filter_signal(x["reference_dff"]), index=x.index), + x["dlight_dff"], + center=False, + clip=False, + )["rereference"] + dlight_df["dlight_reref_dff"] = rolling_fluor_normalization( + dlight_df["dlight_reref_dff"], normalizer="zscore", window_size=20 + ) + z_cols = [x for x in dlight_df.columns if x not in ("mouse_id", "timestamp")] + + dlight_df[[f"{c} (z)" for c in z_cols]] = zscore(dlight_df[z_cols]) + dlight_df["peak_rate_cross"] = 0 + z = dlight_df["dlight_reref_dff (z)"].to_numpy() + dlight_df.loc[dlight_df.index[1:], "peak_rate_cross"] = (z[:-1] < threshold) & (z[1:] > threshold) + y = [] + + # partial body speed from dLight + dlight_df = _partial(dlight_df, x="body speed 2d (z)", y="dlight_reref_dff (z)", window=window) + + dfs[k] = dlight_df + df = pd.concat(dfs.values()) + df["peak_rate_cross"] = df["peak_rate_cross"].astype(float) + + results = bin_data(data=df, timescale=0.4, n_offsets=0, window=range(1)) + + forelimb_partial = results.groupby("mouse_id", sort=False).apply( + partial_out, + xs=[ + "height", + "body speed 2d", + "body speed 3d", + ], + y="forelimb speed 3d", + ) + results["forelimb_partial"] = forelimb_partial.droplevel(0) + + forelimb_partial = results.groupby("mouse_id", sort=False).apply( + partial_out, + xs=[ + "height", + "body speed 2d", + "body speed 3d", + ], + y="dlight_reref_dff (z)", + ) + results["dlight_partial"] = forelimb_partial.droplevel(0) + + corrs = results.groupby(["timescale", "mouse_id", "neural_agg"]).corr(method="pearson") + corrs.index = corrs.index.rename("feature", level=-1) + plot_3c(results=results) + + timescales = np.arange(*dlight_config["timescale_correlation"]["bins"]) + + func = partial(bin_data, data=df, n_offsets=4, window=range(1)) + results_list = [func(timescale=t) for t in tqdm(timescales)] + + results = pd.concat(results_list) + + corrs = results.groupby(["timescale", "mouse_id", "neural_agg"]).corr(method="pearson") + corrs.index = corrs.index.rename("feature", level=-1) + plot_3b(corrs=corrs) + + +def plot_3a(summary_image): + fig = plt.figure() + plt.imshow(summary_image) + plt.show() + + +def plot_3c(results): + tmp = results.dropna() + x = tmp["dlight_reref_dff (z)"] + y = tmp["body speed 2d (z)"] + z = get_z(x, y, 1, subsample_factor=100) + + fig = plt.figure(figsize=(2, 2), dpi=300) + plt.scatter(x, y, c=z, s=2, alpha=0.3, cmap="inferno", linewidths=0) + ax = sns.regplot( + data=results, + x="dlight_reref_dff (z)", + y="body speed 2d (z)", + n_boot=100, + line_kws=dict(color="darkorange"), + ci=95, + scatter=False, + ) + ax.set(ylabel="2D velocity (z)", xlabel="dF/F0 (z)") + sns.despine() + plt.xlabel("dF/F0 (z)") + plt.ylabel("2D velocity (z)") + plt.show() + + tmp = results.dropna() + x = tmp["dlight_partial"] + y = tmp["forelimb speed 3d (z)"] + z = get_z(x, y, 1, subsample_factor=100) + + fig = plt.figure(figsize=(2, 2), dpi=300) + fig.dpi = 300 + plt.scatter(x, y, c=z, s=2, alpha=0.3, cmap="inferno", linewidths=0) + ax = sns.regplot( + data=results, + x="dlight_partial", + y="forelimb speed 3d (z)", + n_boot=100, + line_kws=dict(color="darkorange"), + ci=95, + scatter=False, + ) + ax.set(ylabel="3D forelimb velocity (z)", xlabel="dF/F0 (partialed; z)") + sns.despine() + plt.xlabel("dF/F0 (partialed; z)") + plt.ylabel("3D forelimb velocity (z)") + plt.show() + + +def plot_3b(corrs): + dlight_to_vel_corrs = corrs.xs("dlight_reref_dff (z)", level="feature") + # dlight_to_vel_corrs = corrs.xs("dlight_dff", level="feature") + + c_ave = dlight_to_vel_corrs.groupby(["mouse_id", "timescale", "neural_agg"]).mean() + c_ave = c_ave.melt(ignore_index=False) + + fig = plt.figure(figsize=(2, 2), dpi=200) + ax = sns.lineplot( + data=c_ave.reset_index(), + x="timescale", + y="value", + hue="variable", + n_boot=100, + hue_order=["body speed 2d (z)"], + errorbar=("ci", 68), + err_kws=dict(lw=0), + ) + ax.axhline(0, ls="--", c="k") + ax.set(ylabel="Correlation (Pearson R)", xlabel="Bin duration (s)", ylim=(-0.25, 0.2)) + plt.legend(frameon=False, loc="upper left", bbox_to_anchor=(1, 1)) + sns.despine() + plt.show() + + dlight_to_vel_corrs = corrs.xs("peak_rate_cross", level="feature") + c_ave = dlight_to_vel_corrs.groupby(["mouse_id", "timescale", "neural_agg"]).mean() + c_ave = c_ave.melt(ignore_index=False) + + fig = plt.figure(figsize=(2, 2), dpi=200) + ax = sns.lineplot( + data=c_ave.reset_index(), + x="timescale", + y="value", + hue="variable", + n_boot=100, + hue_order=["body speed 2d (z)"], + errorbar=("ci", 68), + err_kws=dict(lw=0), + ) + ax.axhline(0, ls="--", c="k") + ax.set(ylabel="Correlation (Pearson r)", xlabel="Bin duration (s)", ylim=(-0.25, 0.2)) + plt.legend(frameon=False, loc="upper left", bbox_to_anchor=(1, 1)) + sns.despine() + plt.show() + + +if __name__ == "__main__": + config_path = "/Users/pauladkisson/Documents/CatalystNeuro/NWB/DattaConv/dopamine-reinforces-spontaneous-behavior/analysis_configuration.toml" + output_dir_path = Path("/Volumes/T7/CatalystNeuro/NWB/Datta/conversion_nwb/") + nwbfile_paths = [output_dir_path / f"keypoint-dls-dlight-{i}.nwb" for i in range(9, 14)] + with open( + "/Users/pauladkisson/Documents/CatalystNeuro/NWB/DattaConv/catalystneuro/datta-lab-to-nwb/src/datta_lab_to_nwb/markowitz_gillis_nature_2023_keypoint/markowitz_gillis_nature_2023_keypoint_metadata.yaml", + "r", + ) as f: + metadata = yaml.safe_load(f) + reproduce_figS3(nwbfile_paths=nwbfile_paths, config_path=config_path, metadata=metadata)