From c0309436eec7925398042fdfe63a199255754082 Mon Sep 17 00:00:00 2001 From: Nicholas Watters Date: Mon, 18 Dec 2023 18:02:48 -0500 Subject: [PATCH 1/8] Nick's changes. --- README.md | 40 +- requirements.txt | 11 +- src/jazayeri_lab_to_nwb/watters/README.md | 56 +++ src/jazayeri_lab_to_nwb/watters/__init__.py | 8 +- .../watters/display_interface.py | 98 +++++ .../watters/get_session_paths.py | 131 +++++++ .../watters/main_convert_session.py | 359 ++++++++++++++++++ src/jazayeri_lab_to_nwb/watters/metadata.yaml | 15 + ...attersnwbconverter.py => nwb_converter.py} | 96 +++-- ...inginterface.py => recording_interface.py} | 5 +- ...ters_requirements.txt => requirements.txt} | 0 .../watters/timeseries_interface.py | 212 +++++++++++ .../watters/trials_interface.py | 190 +++++++++ .../watters/watters_convert_session.py | 253 ------------ .../watters/watters_metadata.yaml | 16 - .../watters/watters_notes.md | 1 - .../watters/wattersbehaviorinterface.py | 127 ------- .../watters/watterstrialsinterface.py | 186 --------- 18 files changed, 1135 insertions(+), 669 deletions(-) create mode 100644 src/jazayeri_lab_to_nwb/watters/README.md create mode 100644 src/jazayeri_lab_to_nwb/watters/display_interface.py create mode 100644 src/jazayeri_lab_to_nwb/watters/get_session_paths.py create mode 100644 src/jazayeri_lab_to_nwb/watters/main_convert_session.py create mode 100644 src/jazayeri_lab_to_nwb/watters/metadata.yaml rename src/jazayeri_lab_to_nwb/watters/{wattersnwbconverter.py => nwb_converter.py} (68%) rename src/jazayeri_lab_to_nwb/watters/{wattersrecordinginterface.py => recording_interface.py} (97%) rename src/jazayeri_lab_to_nwb/watters/{watters_requirements.txt => requirements.txt} (100%) create mode 100644 src/jazayeri_lab_to_nwb/watters/timeseries_interface.py create mode 100644 src/jazayeri_lab_to_nwb/watters/trials_interface.py delete mode 100644 src/jazayeri_lab_to_nwb/watters/watters_convert_session.py delete mode 100644 src/jazayeri_lab_to_nwb/watters/watters_metadata.yaml delete mode 100644 src/jazayeri_lab_to_nwb/watters/watters_notes.md delete mode 100644 src/jazayeri_lab_to_nwb/watters/wattersbehaviorinterface.py delete mode 100644 src/jazayeri_lab_to_nwb/watters/watterstrialsinterface.py diff --git a/README.md b/README.md index 7ee8a68..0769fcf 100644 --- a/README.md +++ b/README.md @@ -40,27 +40,22 @@ Each conversion is organized in a directory of its own in the `src` directory: └── src ├── jazayeri_lab_to_nwb │ ├── watters - │ ├── wattersbehaviorinterface.py - │ ├── watters_convert_session.py - │ ├── watters_metadata.yml - │ ├── wattersnwbconverter.py - │ ├── watters_requirements.txt - │ ├── watters_notes.md - + │ ├── behavior_interface.py + │ ├── main_convert_session.py + │ ├── metadata.yml + │ ├── nwb_converter.py + │ ├── requirements.txt │ └── __init__.py - │ └── another_conversion - └── __init__.py For example, for the conversion `watters` you can find a directory located in `src/jazayeri-lab-to-nwb/watters`. Inside each conversion directory you can find the following files: -* `watters_convert_sesion.py`: this script defines the function to convert one full session of the conversion. -* `watters_requirements.txt`: dependencies specific to this conversion. -* `watters_metadata.yml`: metadata in yaml format for this specific conversion. -* `wattersbehaviorinterface.py`: the behavior interface. Usually ad-hoc for each conversion. -* `wattersnwbconverter.py`: the place where the `NWBConverter` class is defined. -* `watters_notes.md`: notes and comments concerning this specific conversion. +* `main_convert_sesion.py`: this script defines the function to convert one full session of the conversion. +* `requirements.txt`: dependencies specific to this conversion. +* `metadata.yml`: metadata in yaml format for this specific conversion. +* `behavior_interface.py`: the behavior interface. Usually ad-hoc for each conversion. +* `nwb_converter.py`: the place where the `NWBConverter` class is defined. The directory might contain other files that are necessary for the conversion but those are the central ones. @@ -73,15 +68,16 @@ pip install -r src/jazayeri_lab_to_nwb/watters/watters_requirements.txt You can run a specific conversion with the following command: ``` -python src/jazayeri_lab_to_nwb/watters/watters_convert_session.py +python src/jazayeri_lab_to_nwb/watters/main_convert_session.py $SUBJECT $SESSION ``` ### Watters working memory task data -The conversion function for this experiment, `session_to_nwb`, is found in `src/watters/watters_convert_session.py`. The function takes three arguments: -* `data_dir_path` points to the root directory for the data for a given session. -* `output_dir_path` points to where the converted data should be saved. +The conversion function for this experiment, `session_to_nwb`, is found in `src/watters/main_convert_session.py`. The function takes arguments: +* `subject` subject name, either `'Perle'` or `'Elgar'`. +* `session` session date in format `'YYYY-MM-DD'`. * `stub_test` indicates whether only a small portion of the data should be saved (mainly used by us for testing purposes). -* `overwrite` indicates whether existing NWB files at the auto-generated output file paths should be overwritten. +* `overwrite` indicates whether to overwrite nwb output files. +* `dandiset_id` optional dandiset ID. The function can be imported in a separate script with and run, or you can run the file directly and specify the arguments in the `if name == "__main__"` block at the bottom. @@ -111,8 +107,8 @@ The function expects the raw data in `data_dir_path` to follow this structure: └── spikeglx ... -The conversion will try to automatically fetch metadata from the provided data directory. However, some information, such as the subject's name and age, must be specified by the user in the file `src/jazayeri_lab_to_nwb/watters/watters_metadata.yaml`. If any of the automatically fetched metadata is incorrect, it can also be overriden from this file. +The conversion will try to automatically fetch metadata from the provided data directory. However, some information, such as the subject's name and age, must be specified by the user in the file `src/jazayeri_lab_to_nwb/watters/metadata.yaml`. If any of the automatically fetched metadata is incorrect, it can also be overriden from this file. The converted data will be saved in two files, one called `{session_id}_raw.nwb`, which contains the raw electrophysiology data from the Neuropixels and V-Probes, and one called `{session_id}_processed.nwb` with behavioral data, trial info, and sorted unit spiking. -If you run into memory issues when writing the `{session_id}_raw.nwb` files, you may want to set `buffer_gb` to a value smaller than 1 (its default) in the `conversion_options` dicts for the recording interfaces, i.e. [here](https://github.com/catalystneuro/jazayeri-lab-to-nwb/blob/vprobe_dev/src/jazayeri_lab_to_nwb/watters/watters_convert_session.py#L49) and [here](https://github.com/catalystneuro/jazayeri-lab-to-nwb/blob/vprobe_dev/src/jazayeri_lab_to_nwb/watters/watters_convert_session.py#L71). +If you run into memory issues when writing the `{session_id}_raw.nwb` files, you may want to set `buffer_gb` to a value smaller than 1 (its default) in the `conversion_options` dicts for the recording interfaces, i.e. [here](https://github.com/catalystneuro/jazayeri-lab-to-nwb/blob/vprobe_dev/src/jazayeri_lab_to_nwb/watters/main_convert_session.py#L189). diff --git a/requirements.txt b/requirements.txt index e411ef3..ee88472 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ -neuroconv==0.4.4 -spikeinterface==0.98.2 -nwbwidgets -nwbinspector -pre-commit +neuroconv==0.4.6 +spikeinterface==0.99.1 +nwbwidgets==0.11.3 +nwbinspector==0.4.31 +pre-commit==3.6.0 +ndx-events==0.2.0 diff --git a/src/jazayeri_lab_to_nwb/watters/README.md b/src/jazayeri_lab_to_nwb/watters/README.md new file mode 100644 index 0000000..e718409 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/watters/README.md @@ -0,0 +1,56 @@ +# Watters data conversion pipeline +NWB conversion scripts for Watters data to the [Neurodata Without Borders](https://nwb-overview.readthedocs.io/) data format. + + +## Usage +To run a specific conversion, you might need to install first some conversion specific dependencies that are located in each conversion directory: +``` +pip install -r src/jazayeri_lab_to_nwb/watters/watters_requirements.txt +``` + +You can run a specific conversion with the following command: +``` +python src/jazayeri_lab_to_nwb/watters/main_convert_session.py $SUBJECT $SESSION +``` + +### Watters working memory task data +The conversion function for this experiment, `session_to_nwb`, is found in `src/watters/main_convert_session.py`. The function takes arguments: +* `subject` subject name, either `'Perle'` or `'Elgar'`. +* `session` session date in format `'YYYY-MM-DD'`. +* `stub_test` indicates whether only a small portion of the data should be saved (mainly used by us for testing purposes). +* `overwrite` indicates whether to overwrite nwb output files. +* `dandiset_id` optional dandiset ID. + +The function can be imported in a separate script with and run, or you can run the file directly and specify the arguments in the `if name == "__main__"` block at the bottom. + +The function expects the raw data in `data_dir_path` to follow this structure: + + data_dir_path/ + ├── data_open_source + │ ├── behavior + │ │ └── eye.h.times.npy, etc. + │ ├── task + │ └── trials.start_times.json, etc. + │ └── probes.metadata.json + ├── raw_data + │ ├── spikeglx + │ └── */*/*.ap.bin, */*/*.lf.bin, etc. + │ ├── v_probe_0 + │ └── raw_data.dat + │ └── v_probe_{n} + │ └── raw_data.dat + ├── spike_sorting_raw + │ ├── np + │ ├── vp_0 + │ └── vp_{n} + ├── sync_pulses + ├── mworks + ├── open_ephys + └── spikeglx + ... + +The conversion will try to automatically fetch metadata from the provided data directory. However, some information, such as the subject's name and age, must be specified by the user in the file `src/jazayeri_lab_to_nwb/watters/metadata.yaml`. If any of the automatically fetched metadata is incorrect, it can also be overriden from this file. + +The converted data will be saved in two files, one called `{session_id}_raw.nwb`, which contains the raw electrophysiology data from the Neuropixels and V-Probes, and one called `{session_id}_processed.nwb` with behavioral data, trial info, and sorted unit spiking. + +If you run into memory issues when writing the `{session_id}_raw.nwb` files, you may want to set `buffer_gb` to a value smaller than 1 (its default) in the `conversion_options` dicts for the recording interfaces, i.e. [here](https://github.com/catalystneuro/jazayeri-lab-to-nwb/blob/vprobe_dev/src/jazayeri_lab_to_nwb/watters/main_convert_session.py#L189). diff --git a/src/jazayeri_lab_to_nwb/watters/__init__.py b/src/jazayeri_lab_to_nwb/watters/__init__.py index 880f32a..06f0206 100644 --- a/src/jazayeri_lab_to_nwb/watters/__init__.py +++ b/src/jazayeri_lab_to_nwb/watters/__init__.py @@ -1,4 +1,4 @@ -from .wattersbehaviorinterface import WattersEyePositionInterface, WattersPupilSizeInterface -from .watterstrialsinterface import WattersTrialsInterface -from .wattersrecordinginterface import WattersDatRecordingInterface -from .wattersnwbconverter import WattersNWBConverter +from .behavior_interface import EyePositionInterface, PupilSizeInterface +from .trials_interface import TrialsInterface +from .recording_interface import DatRecordingInterface +from .nwb_converter import NWBConverter diff --git a/src/jazayeri_lab_to_nwb/watters/display_interface.py b/src/jazayeri_lab_to_nwb/watters/display_interface.py new file mode 100644 index 0000000..096f362 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/watters/display_interface.py @@ -0,0 +1,98 @@ +"""Class for converting data about display frames.""" + +import itertools +import json +from pathlib import Path +from typing import Optional + +import numpy as np +import pandas as pd +from neuroconv.datainterfaces.text.timeintervalsinterface import TimeIntervalsInterface +from neuroconv.utils import DeepDict, FilePathType, FolderPathType +from pynwb import NWBFile + + +class DisplayInterface(TimeIntervalsInterface): + """Class for converting data about display frames. + + All events that occur exactly once per display update are contained in this + interface. + """ + + KEY_MAP = { + 'frame_object_positions': 'object_positions', + 'frame_fixation_cross_scale': 'fixation_cross_scale', + 'frame_closed_loop_gaze_position': 'closed_loop_eye_position', + 'frame_task_phase': 'task_phase', + 'frame_display_times': 'start_time', + } + + def __init__(self, folder_path: FolderPathType, verbose: bool = True): + super().__init__(file_path=folder_path, verbose=verbose) + + def get_metadata(self) -> dict: + metadata = super().get_metadata() + metadata['TimeIntervals'] = dict( + display=dict( + table_name='display', + table_description='data about each displayed frame', + ) + ) + return metadata + + def get_timestamps(self) -> np.ndarray: + return super(DisplayInterface, self).get_timestamps(column='start_time') + + def set_aligned_starting_time(self, aligned_starting_time: float) -> None: + self.dataframe.start_time += aligned_starting_time + + def _read_file(self, file_path: FolderPathType): + # Create dataframe with data for each frame + trials = json.load(open(Path(file_path) / 'trials.json', 'r')) + frames = { + k_mapped: list(itertools.chain(*[d[k] for d in trials])) + for k, k_mapped in DisplayInterface.KEY_MAP.items() + } + + # Serialize object_positions data for hdf5 conversion to work + frames['object_positions'] = [ + json.dumps(x) for x in frames['object_positions'] + ] + + return pd.DataFrame(frames) + + def add_to_nwbfile(self, + nwbfile: NWBFile, + metadata: Optional[dict] = None, + tag: str = 'display'): + return super(DisplayInterface, self).add_to_nwbfile( + nwbfile=nwbfile, + metadata=metadata, + tag=tag, + column_descriptions=self.column_descriptions, + ) + + @property + def column_descriptions(self): + column_descriptions = { + 'object_positions': ( + 'For each frame, a serialized list with one element for each ' + 'object. Each element is an (x, y) position of the ' + 'corresponding object, in coordinates of arena width.' + ), + 'fixation_cross_scale': ( + 'For each frame, the scale of the central fixation cross. ' + 'Fixation cross scale grows as the eye position deviates from ' + 'the center of the fixation cross, to provide a cue to ' + 'maintain good fixation.' + ), + 'closed_loop_eye_position': ( + 'For each frame, the eye position in the close-loop task ' + 'engine. This was used to for real-time eye position ' + 'computations, such as saccade detection and reward delivery.' + ), + 'task_phase': 'The phase of the task for each frame.', + 'start_time': 'Time of display update for each frame.', + } + + return column_descriptions diff --git a/src/jazayeri_lab_to_nwb/watters/get_session_paths.py b/src/jazayeri_lab_to_nwb/watters/get_session_paths.py new file mode 100644 index 0000000..0cb29e3 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/watters/get_session_paths.py @@ -0,0 +1,131 @@ +"""Function for getting paths to data on openmind.""" + +import collections +import pathlib + +SUBJECT_NAME_TO_ID = { + 'Perle': 'monkey0', + 'Elgar': 'monkey1', +} + +SessionPaths = collections.namedtuple( + 'SessionPaths', + [ + 'output', + 'raw_data', + 'data_open_source', + 'task_behavior_data', + 'sync_pulses', + 'spike_sorting_raw', + ], +) + + +def _get_session_paths_openmind(subject, session, stub_test=False): + """Get paths to all components of the data on openmind.""" + subject_id = SUBJECT_NAME_TO_ID[subject] + + # Path to write output nwb files to + output_path = ( + f'/om/user/nwatters/nwb_data_multi_prediction/{subject}/{session}' + ) + if stub_test: + output_path = f'{output_path}/stub' + + # Path to the raw data. This is used for reading raw physiology data. + raw_data_path = ( + f'/om4/group/jazlab/nwatters/multi_prediction/phys_data/{subject}/' + f'{session}/raw_data' + ) + + # Path to task and behavior data. + task_behavior_data_path = ( + '/om4/group/jazlab/nwatters/multi_prediction/datasets/data_nwb_trials/' + f'{subject}/{session}' + ) + + # Path to open-source data. This is used for reading behavior and task data. + data_open_source_path = ( + '/om4/group/jazlab/nwatters/multi_prediction/datasets/data_open_source/' + f'Subjects/{subject_id}/{session}/001' + ) + + # Path to sync pulses. This is used for reading timescale transformations + # between physiology and mworks data streams. + sync_pulses_path = ( + '/om4/group/jazlab/nwatters/multi_prediction/data_processed/' + f'{subject}/{session}/sync_pulses' + ) + + # Path to spike sorting. This is used for reading spike sorted data. + spike_sorting_raw_path = ( + f'/om4/group/jazlab/nwatters/multi_prediction/phys_data/{subject}/' + f'{session}/spike_sorting' + ) + + session_paths = SessionPaths( + output=pathlib.Path(output_path), + raw_data=pathlib.Path(raw_data_path), + data_open_source=pathlib.Path(data_open_source_path), + task_behavior_data=pathlib.Path(task_behavior_data_path), + sync_pulses=pathlib.Path(sync_pulses_path), + spike_sorting_raw=pathlib.Path(spike_sorting_raw_path), + ) + + return session_paths + + +def _get_session_paths_globus(subject, session, stub_test=False): + """Get paths to all components of the data in the globus repo.""" + subject_id = SUBJECT_NAME_TO_ID[subject] + base_data_dir = f'/shared/catalystneuro/JazLab/{subject_id}/{session}/' + + # Path to write output nwb files to + output_path = ( + f'~/conversion_nwb/jazayeri-lab-to-nwb/{subject}/{session}' + ) + if stub_test: + output_path = f'{output_path}/stub' + + # Path to the raw data. This is used for reading raw physiology data. + raw_data_path = f'{base_data_dir}/raw_data' + + # Path to task and behavior data. + task_behavior_data_path = f'{base_data_dir}/processed_task_data' + + # Path to open-source data. This is used for reading behavior and task data. + data_open_source_path = f'{base_data_dir}/data_open_source' + + # Path to sync pulses. This is used for reading timescale transformations + # between physiology and mworks data streams. + sync_pulses_path = f'{base_data_dir}/sync_pulses' + + # Path to spike sorting. This is used for reading spike sorted data. + spike_sorting_raw_path = f'{base_data_dir}/spike_sorting' + + session_paths = SessionPaths( + output=pathlib.Path(output_path), + raw_data=pathlib.Path(raw_data_path), + data_open_source=pathlib.Path(data_open_source_path), + task_behavior_data=pathlib.Path(task_behavior_data_path), + sync_pulses=pathlib.Path(sync_pulses_path), + spike_sorting_raw=pathlib.Path(spike_sorting_raw_path), + ) + + return session_paths + + +def get_session_paths(subject, session, stub_test=False, repo='openmind'): + """Get paths to all components of the data. + + Returns: + SessionPaths namedtuple. + """ + if repo == 'openmind': + return _get_session_paths_openmind( + subject=subject, session=session, stub_test=stub_test) + elif repo == 'globus': + return _get_session_paths_globus( + subject=subject, session=session, stub_test=stub_test) + else: + raise ValueError(f'Invalid repo {repo}') \ No newline at end of file diff --git a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py new file mode 100644 index 0000000..ea40aa7 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py @@ -0,0 +1,359 @@ +"""Entrypoint to convert an entire session of data to NWB. + +This converts a session to NWB format and writes the nwb files to + /om/user/nwatters/nwb_data_multi_prediction/{$SUBJECT}/{$SESSION} +Two NWB files are created: + $SUBJECT_$SESSION_raw.nwb --- Raw physiology + $SUBJECT_$SESSION_processed.nwb --- Task, behavior, and sorted physiology +These files can be automatically uploaded to a DANDI dataset. + +Usage: + $ python main_convert_session.py $SUBJECT $SESSION + where $SUBJECT is the subject name and $SESSION is the session date + YYYY-MM-DD. For example: + $ python main_convert_session.py Perle 2022-06-01 + + Please read and consider changing the following variables: + _REPO + _STUB_TEST + _OVERWRITE + _DANDISET_ID + See comments below for descriptions of these variables. +""" + +import datetime +import glob +import json +import logging +import os +import sys +from pathlib import Path +from typing import Union +from uuid import uuid4 +from zoneinfo import ZoneInfo + +import get_session_paths +import nwb_converter +from neuroconv.tools.data_transfers import automatic_dandi_upload +from neuroconv.utils import dict_deep_update, load_dict_from_file + +# Data repository. Either 'globus' or 'openmind' +_REPO = 'globus' +# Whether to run all the physiology data or only a stub +_STUB_TEST = True +# Whether to overwrite output nwb files +_OVERWRITE = True +# ID of the dandiset to upload to, or None to not upload +_DANDISET_ID = None # '000620' + +# Set logger level for info is displayed in console +logging.getLogger().setLevel(logging.INFO) + +_SUBJECT_TO_SEX = { + 'Perle': 'F', + 'Elgar': 'M', +} +_SUBJECT_TO_AGE = { + 'Perle': 'P10Y', # Born 6/11/2012 + 'Elgar': 'P10Y', # Born 5/2/2012 +} + + +def _get_single_file(directory, suffix=''): + """Get path to a file in given directory with given suffix. + + Raises error if not exactly one satisfying file. + """ + files = list(glob.glob(str(directory / f'*{suffix}'))) + if len(files) == 0: + raise ValueError(f'No {suffix} files found in {directory}') + if len(files) > 1: + raise ValueError(f'Multiple {suffix} files found in {directory}') + return files[0] + + +def _add_v_probe_data(raw_source_data, + raw_conversion_options, + processed_source_data, + processed_conversion_options, + session_paths, + probe_num, + stub_test): + """Add V-Probe session data.""" + probe_data_dir = session_paths.raw_data / f'v_probe_{probe_num}' + if not probe_data_dir.exists(): + return + logging.info(f'Adding V-probe {probe_num} session data') + + # Raw data + recording_file = _get_single_file(probe_data_dir, suffix='.dat') + metadata_path = str(session_paths.data_open_source / 'probes.metadata.json') + raw_source_data[f'RecordingVP{probe_num}'] = dict( + file_path=recording_file, + probe_metadata_file=metadata_path, + probe_key=f'probe{(probe_num + 1):02d}', + probe_name=f'vprobe{probe_num}', + es_key=f'ElectricalSeriesVP{probe_num}', + ) + raw_conversion_options[f'RecordingVP{probe_num}'] = dict( + stub_test=stub_test) + + # Processed data + sorting_path = ( + session_paths.spike_sorting_raw / + f'v_probe_{probe_num}' / + 'ks_3_output_pre_v6_curated' + ) + processed_source_data[f'RecordingVP{probe_num}'] = raw_source_data[ + f'RecordingVP{probe_num}'] + processed_source_data[f'SortingVP{probe_num}'] = dict( + folder_path=str(sorting_path), + keep_good_only=False, + ) + processed_conversion_options[f'RecordingVP{probe_num}'] = dict( + stub_test=stub_test, write_electrical_series=False) + processed_conversion_options[f'SortingVP{probe_num}'] = dict( + stub_test=stub_test, write_as='processing') + + +def _add_spikeglx_data(raw_source_data, + raw_conversion_options, + processed_source_data, + processed_conversion_options, + session_paths, + stub_test): + """Add SpikeGLX recording data.""" + logging.info('Adding SpikeGLX data') + + # Raw data + spikeglx_dir = [ + x for x in (session_paths.raw_data / 'spikeglx').iterdir() + if 'settling' not in str(x) + ] + if len(spikeglx_dir) == 0: + logging.info('Found no SpikeGLX data') + elif len(spikeglx_dir) == 1: + spikeglx_dir = spikeglx_dir[0] + else: + raise ValueError(f'Found multiple spikeglx directories {spikeglx_dir}') + ap_file = _get_single_file(spikeglx_dir, suffix='/*.ap.bin') + lfp_file = _get_single_file(spikeglx_dir, suffix='/*.lf.bin') + raw_source_data['RecordingNP'] = dict(file_path=ap_file) + raw_source_data['LF'] = dict(file_path=lfp_file) + processed_source_data['RecordingNP'] = dict(file_path=ap_file) + processed_source_data['LF'] = dict(file_path=lfp_file) + raw_conversion_options['RecordingNP'] = dict(stub_test=stub_test) + raw_conversion_options['LF'] = dict(stub_test=stub_test) + processed_conversion_options['RecordingNP'] = dict(stub_test=stub_test) + processed_conversion_options['LF'] = dict(stub_test=stub_test) + + # Processed data + sorting_path = session_paths.spike_sorting_raw / 'np_0' / 'ks_3_output_v2' + processed_source_data['SortingNP'] = dict( + folder_path=str(sorting_path), + keep_good_only=False, + ) + processed_conversion_options['SortingNP'] = dict( + stub_test=stub_test, write_as='processing') + + +def session_to_nwb(subject: str, + session: str, + stub_test: bool = False, + overwrite: bool = True, + dandiset_id: Union[str, None] = None): + """ + Convert a single session to an NWB file. + + Parameters + ---------- + subject : string + Subject, either 'Perle' or 'Elgar'. + session : string + Session date in format 'YYYY-MM-DD'. + stub_test : boolean + Whether or not to generate a preview file by limiting data write to a few MB. + Default is False. + overwrite : boolean + If the file exists already, True will delete and replace with a new file, False will append the contents. + Default is True. + dandiset_id : string, optional + If you want to upload the file to the DANDI archive, specify the six-digit ID here. + Requires the DANDI_API_KEY environment variable to be set. + To set this in your bash terminal in Linux or macOS, run + export DANDI_API_KEY=... + or in Windows + set DANDI_API_KEY=... + Default is None. + """ + if dandiset_id is not None: + import dandi # check importability + assert os.getenv('DANDI_API_KEY'), ( + "Unable to find environment variable 'DANDI_API_KEY'. " + "Please retrieve your token from DANDI and set this environment " + "variable." + ) + + logging.info(f'stub_test = {stub_test}') + logging.info(f'overwrite = {overwrite}') + logging.info(f'dandiset_id = {dandiset_id}') + + # Get paths + session_paths = get_session_paths.get_session_paths( + subject, session, stub_test=stub_test, repo=_REPO) + logging.info(f'session_paths: {session_paths}') + + # Get paths for nwb files to write + session_paths.output.mkdir(parents=True, exist_ok=True) + session_id = f'{subject}_{session}' + raw_nwb_path = session_paths.output / f'{session_id}_raw.nwb' + processed_nwb_path = session_paths.output / f'{session_id}_processed.nwb' + logging.info(f'raw_nwb_path = {raw_nwb_path}') + logging.info(f'processed_nwb_path = {processed_nwb_path}') + logging.info('') + + # Initialize empty data dictionaries + raw_source_data = {} + raw_conversion_options = {} + processed_source_data = {} + processed_conversion_options = {} + + # Add V-Probe data + for probe_num in range(2): + _add_v_probe_data( + raw_source_data=raw_source_data, + raw_conversion_options=raw_conversion_options, + processed_source_data=processed_source_data, + processed_conversion_options=processed_conversion_options, + session_paths=session_paths, + probe_num=probe_num, + stub_test=stub_test, + ) + + # Add SpikeGLX data + _add_spikeglx_data( + raw_source_data=raw_source_data, + raw_conversion_options=raw_conversion_options, + processed_source_data=processed_source_data, + processed_conversion_options=processed_conversion_options, + session_paths=session_paths, + stub_test=stub_test, + ) + + # Add behavior data + logging.info('Adding behavior data') + behavior_path = str(session_paths.task_behavior_data) + processed_source_data['EyePosition'] = dict(folder_path=behavior_path) + processed_conversion_options['EyePosition'] = dict() + processed_source_data['PupilSize'] = dict(folder_path=behavior_path) + processed_conversion_options['PupilSize'] = dict() + processed_source_data['RewardLine'] = dict(folder_path=behavior_path) + processed_conversion_options['RewardLine'] = dict() + processed_source_data['Audio'] = dict(folder_path=behavior_path) + processed_conversion_options['Audio'] = dict() + + # Add trials data + logging.info('Adding trials data') + processed_source_data['Trials'] = dict( + folder_path=str(session_paths.task_behavior_data)) + processed_conversion_options['Trials'] = dict() + + # Add display data + logging.info('Adding display data') + processed_source_data['Display'] = dict( + folder_path=str(session_paths.task_behavior_data)) + processed_conversion_options['Display'] = dict() + + # Create processed data converter + processed_converter = nwb_converter.NWBConverter( + source_data=processed_source_data, + sync_dir=session_paths.sync_pulses, + ) + + # Add datetime and subject name to processed converter + metadata = processed_converter.get_metadata() + metadata['NWBFile']['session_id'] = session_id + metadata['Subject']['subject_id'] = subject + metadata['Subject']['sex'] = _SUBJECT_TO_SEX[subject] + metadata['Subject']['age'] = _SUBJECT_TO_AGE[subject] + + # EcePhys + probe_metadata_file = ( + session_paths.data_open_source / 'probes.metadata.json') + with open(probe_metadata_file, 'r') as f: + probe_metadata = json.load(f) + neuropixel_metadata = [ + x for x in probe_metadata if x['probe_type'] == 'Neuropixels' + ][0] + for entry in metadata['Ecephys']['ElectrodeGroup']: + if entry['device'] == 'Neuropixel-Imec': + # TODO: uncomment when fixed in pynwb + # entry.update(dict(position=[( + # neuropixel_metadata['coordinates'][0], + # neuropixel_metadata['coordinates'][1], + # neuropixel_metadata['depth_from_surface'], + # )] + logging.info('\n\n') + logging.warning(' PROBE COORDINATES NOT IMPLEMENTED\n\n') + + # Update default metadata with the editable in the corresponding yaml file + editable_metadata_path = Path(__file__).parent / 'metadata.yaml' + editable_metadata = load_dict_from_file(editable_metadata_path) + metadata = dict_deep_update(metadata, editable_metadata) + + # Check if session_start_time was found/set + if 'session_start_time' not in metadata['NWBFile']: + try: + date = datetime.datetime.strptime(session, '%Y-%m-%d') + date = date.replace(tzinfo=ZoneInfo('US/Eastern')) + except: + raise ValueError( + 'Session start time was not auto-detected. Please provide it ' + 'in `metadata.yaml`' + ) + metadata['NWBFile']['session_start_time'] = date + + # Run conversion + logging.info('Running processed conversion') + processed_converter.run_conversion( + metadata=metadata, + nwbfile_path=processed_nwb_path, + conversion_options=processed_conversion_options, + overwrite=overwrite, + ) + + logging.info('Running raw data conversion') + metadata['NWBFile']['identifier'] = str(uuid4()) + raw_converter = nwb_converter.NWBConverter( + source_data=raw_source_data, + sync_dir=str(session_paths.sync_pulses), + ) + raw_converter.run_conversion( + metadata=metadata, + nwbfile_path=raw_nwb_path, + conversion_options=raw_conversion_options, + overwrite=overwrite, + ) + + # Upload to DANDI + if dandiset_id is not None: + logging.info(f'Uploading to dandiset id {dandiset_id}') + automatic_dandi_upload( + dandiset_id=dandiset_id, + nwb_folder_path=session_paths.output, + ) + + +if __name__ == '__main__': + """Run session conversion.""" + subject = sys.argv[1] + session = sys.argv[2] + logging.info(f'\nStarting conversion for {subject}/{session}\n') + session_to_nwb( + subject=subject, + session=session, + stub_test=_STUB_TEST, + overwrite=_OVERWRITE, + dandiset_id=_DANDISET_ID, + ) + logging.info(f'\nFinished conversion for {subject}/{session}\n') diff --git a/src/jazayeri_lab_to_nwb/watters/metadata.yaml b/src/jazayeri_lab_to_nwb/watters/metadata.yaml new file mode 100644 index 0000000..216dcf7 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/watters/metadata.yaml @@ -0,0 +1,15 @@ +NWBFile: + # related_publications: # no pubs yet + # - https://doi.org/12345 + session_description: + Data from macaque performing multi-object working memory task. Subject is + presented with multiple objects at different locations on a screen. After a + delay, the subject is then cued with one of the objects, now displayed at + the center of the screen. Subject should respond by saccading to the + location of the cued object at its initial presentation. + institution: MIT + lab: Jazayeri + experimenter: + - Watters, Nicholas +Subject: + species: Macaca mulatta diff --git a/src/jazayeri_lab_to_nwb/watters/wattersnwbconverter.py b/src/jazayeri_lab_to_nwb/watters/nwb_converter.py similarity index 68% rename from src/jazayeri_lab_to_nwb/watters/wattersnwbconverter.py rename to src/jazayeri_lab_to_nwb/watters/nwb_converter.py index 96267b7..eb58876 100644 --- a/src/jazayeri_lab_to_nwb/watters/wattersnwbconverter.py +++ b/src/jazayeri_lab_to_nwb/watters/nwb_converter.py @@ -1,55 +1,53 @@ """Primary NWBConverter class for this dataset.""" + import json import logging import numpy as np -from typing import Optional from pathlib import Path +from typing import Optional +import display_interface +import timeseries_interfaces +import trials_interface from neuroconv import NWBConverter -from neuroconv.utils import FolderPathType +from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface from neuroconv.datainterfaces import ( - SpikeGLXRecordingInterface, KiloSortSortingInterface, + SpikeGLXRecordingInterface, ) from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import BaseRecordingExtractorInterface from neuroconv.datainterfaces.ecephys.basesortingextractorinterface import BaseSortingExtractorInterface -from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface from neuroconv.datainterfaces.text.timeintervalsinterface import TimeIntervalsInterface - +from neuroconv.utils import FolderPathType +from recording_interface import DatRecordingInterface from spikeinterface.core.waveform_tools import has_exceeding_spikes from spikeinterface.curation import remove_excess_spikes -from . import ( - WattersDatRecordingInterface, - WattersEyePositionInterface, - WattersPupilSizeInterface, - WattersTrialsInterface, -) - -class WattersNWBConverter(NWBConverter): - """Primary conversion class for my extracellular electrophysiology dataset.""" +class NWBConverter(NWBConverter): + """Primary conversion class for extracellular electrophysiology dataset.""" data_interface_classes = dict( - RecordingVP0=WattersDatRecordingInterface, + RecordingVP0=DatRecordingInterface, SortingVP0=KiloSortSortingInterface, - RecordingVP1=WattersDatRecordingInterface, + RecordingVP1=DatRecordingInterface, SortingVP1=KiloSortSortingInterface, RecordingNP=SpikeGLXRecordingInterface, LF=SpikeGLXRecordingInterface, SortingNP=KiloSortSortingInterface, - EyePosition=WattersEyePositionInterface, - PupilSize=WattersPupilSizeInterface, - Trials=WattersTrialsInterface, + EyePosition=timeseries_interfaces.EyePositionInterface, + PupilSize=timeseries_interfaces.PupilSizeInterface, + RewardLine=timeseries_interfaces.RewardLineInterface, + Audio=timeseries_interfaces.AudioInterface, + Trials=trials_interface.TrialsInterface, + Display=display_interface.DisplayInterface, ) - def __init__( - self, - source_data: dict[str, dict], - sync_dir: Optional[FolderPathType] = None, - verbose: bool = True, - ): - """Validate source_data against source_schema and initialize all data interfaces.""" + def __init__(self, + source_data: dict[str, dict], + sync_dir: Optional[FolderPathType] = None, + verbose: bool = True): + """Validate source_data and initialize all data interfaces.""" super().__init__(source_data=source_data, verbose=verbose) self.sync_dir = sync_dir @@ -58,30 +56,27 @@ def __init__( if isinstance(data_interface, BaseSortingExtractorInterface): unit_ids = np.array(data_interface.sorting_extractor.unit_ids) data_interface.sorting_extractor.set_property( - key="unit_name", values=(unit_ids + unit_name_start).astype(str) + key='unit_name', + values=(unit_ids + unit_name_start).astype(str), ) unit_name_start += np.max(unit_ids) + 1 def temporally_align_data_interfaces(self): - logging.info("Temporally aligning data interfaces") - + logging.info('Temporally aligning data interfaces') + if self.sync_dir is None: return sync_dir = Path(self.sync_dir) - # constant bias - with open(sync_dir / "mworks" / "open_source_minus_processed", "r") as f: - bias = float(f.read().strip()) - # openephys alignment with open(sync_dir / "open_ephys" / "recording_start_time") as f: - start_time = float(f.read().strip()) + open_ephys_start_time = float(f.read().strip()) with open(sync_dir / "open_ephys" / "transform", "r") as f: - transform = json.load(f) + open_ephys_transform = json.load(f) for i in [0, 1]: if f"RecordingVP{i}" in self.data_interface_objects: - orig_timestamps = self.data_interface_objects[f"RecordingVP{i}"].get_timestamps() - aligned_timestamps = bias + transform["intercept"] + transform["coef"] * (start_time + orig_timestamps) + orig_timestamps = self.data_interface_objects[f"RecordingVP{i}"].get_original_timestamps() + aligned_timestamps = open_ephys_transform["intercept"] + open_ephys_transform["coef"] * (open_ephys_start_time + orig_timestamps) self.data_interface_objects[f"RecordingVP{i}"].set_aligned_timestamps(aligned_timestamps) # openephys sorting alignment if f"SortingVP{i}" in self.data_interface_objects: @@ -101,14 +96,14 @@ def temporally_align_data_interfaces(self): ) # neuropixel alignment - orig_timestamps = self.data_interface_objects["RecordingNP"].get_timestamps() + orig_timestamps = self.data_interface_objects["RecordingNP"].get_original_timestamps() with open(sync_dir / "spikeglx" / "transform", "r") as f: - transform = json.load(f) - aligned_timestamps = bias + transform["intercept"] + transform["coef"] * orig_timestamps + spikeglx_transform = json.load(f) + aligned_timestamps = spikeglx_transform["intercept"] + spikeglx_transform["coef"] * orig_timestamps self.data_interface_objects["RecordingNP"].set_aligned_timestamps(aligned_timestamps) # neuropixel LFP alignment - orig_timestamps = self.data_interface_objects["LF"].get_timestamps() - aligned_timestamps = bias + transform["intercept"] + transform["coef"] * orig_timestamps + orig_timestamps = self.data_interface_objects["LF"].get_original_timestamps() + aligned_timestamps = spikeglx_transform["intercept"] + spikeglx_transform["coef"] * orig_timestamps self.data_interface_objects["LF"].set_aligned_timestamps(aligned_timestamps) # neuropixel sorting alignment if "SortingNP" in self.data_interface_objects: @@ -124,21 +119,16 @@ def temporally_align_data_interfaces(self): sorting=self.data_interface_objects[f"SortingNP"].sorting_extractor, ) self.data_interface_objects[f"SortingNP"].register_recording(self.data_interface_objects[f"RecordingNP"]) - + # align recording start to 0 aligned_start_times = [] for name, data_interface in self.data_interface_objects.items(): - if isinstance(data_interface, BaseTemporalAlignmentInterface): - start_time = data_interface.get_timestamps()[0] - aligned_start_times.append(start_time) - elif isinstance(data_interface, TimeIntervalsInterface): - start_time = data_interface.get_timestamps(column="start_time")[0] - aligned_start_times.append(start_time) + start_time = data_interface.get_timestamps()[0] + aligned_start_times.append(start_time) zero_time = -1.0 * min(aligned_start_times) for name, data_interface in self.data_interface_objects.items(): if isinstance(data_interface, BaseSortingExtractorInterface): - # don't need to align b/c recording will be aligned separately + # Do not need to align because recording will be aligned continue - elif hasattr(data_interface, "set_aligned_starting_time"): - start_time = data_interface.set_aligned_starting_time(aligned_starting_time=zero_time) - aligned_start_times.append(start_time) + start_time = data_interface.set_aligned_starting_time( + aligned_starting_time=zero_time) diff --git a/src/jazayeri_lab_to_nwb/watters/wattersrecordinginterface.py b/src/jazayeri_lab_to_nwb/watters/recording_interface.py similarity index 97% rename from src/jazayeri_lab_to_nwb/watters/wattersrecordinginterface.py rename to src/jazayeri_lab_to_nwb/watters/recording_interface.py index cae0b91..be80415 100644 --- a/src/jazayeri_lab_to_nwb/watters/wattersrecordinginterface.py +++ b/src/jazayeri_lab_to_nwb/watters/recording_interface.py @@ -1,4 +1,5 @@ -"""Primary class for Watters Plexon probe data.""" +"""Primary class for recording data.""" + import os import json import numpy as np @@ -74,7 +75,7 @@ def add_electrode_locations( return electrode_metadata -class WattersDatRecordingInterface(BaseRecordingExtractorInterface): +class DatRecordingInterface(BaseRecordingExtractorInterface): ExtractorName = "NumpyRecording" diff --git a/src/jazayeri_lab_to_nwb/watters/watters_requirements.txt b/src/jazayeri_lab_to_nwb/watters/requirements.txt similarity index 100% rename from src/jazayeri_lab_to_nwb/watters/watters_requirements.txt rename to src/jazayeri_lab_to_nwb/watters/requirements.txt diff --git a/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py b/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py new file mode 100644 index 0000000..e3c6ccb --- /dev/null +++ b/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py @@ -0,0 +1,212 @@ +"""Primary classes for timeseries variables. + +The classes here handle variables like eye position, reward line, and audio +stimuli that are not necessarily tied to the trial structure of display updates. +For trial structured variables, see ../trials_interface.py. For variables +pertaining to display updates, see ../frames_interface.py. +""" + +import json +from pathlib import Path + +import numpy as np +from hdmf.backends.hdf5 import H5DataIO +from ndx_events import LabeledEvents +from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface +from neuroconv.tools.nwb_helpers import get_module +from neuroconv.utils import FolderPathType +from pynwb import NWBFile, TimeSeries +from pynwb.behavior import SpatialSeries + + +class TimestampsFromArrayInterface(BaseTemporalAlignmentInterface): + """Interface implementing temporal alignment functions with timestamps.""" + + def __init__(self, folder_path: FolderPathType): + super().__init__(folder_path=folder_path) + + def set_original_timestamps(self, original_timestamps: np.ndarray) -> None: + self._original_timestamps = original_timestamps + self._timestamps = np.copy(original_timestamps) + + def get_original_timestamps(self) -> np.ndarray: + return self._original_timestamps + + def set_aligned_timestamps(self, aligned_timestamps: np.ndarray) -> None: + self._timestamps = aligned_timestamps + + def get_timestamps(self): + return self._timestamps + + +class EyePositionInterface(TimestampsFromArrayInterface): + """Eye position interface.""" + + def __init__(self, folder_path: FolderPathType): + folder_path = Path(folder_path) + super().__init__(folder_path=folder_path) + + # Find eye position files and check they all exist + eye_h_file = folder_path / 'eye_h_calibrated.json' + eye_v_file = folder_path / 'eye_v_calibrated.json' + assert eye_h_file.exists(), f'Could not find {eye_h_file}' + assert eye_v_file.exists(), f'Could not find {eye_v_file}' + + # Load eye data + eye_h_data = json.load(open(eye_h_file, 'r')) + eye_v_data = json.load(open(eye_v_file, 'r')) + eye_h_times = np.array(eye_h_data['times']) + eye_h_values = 0.5 + (np.array(eye_h_data['values']) / 20) + eye_v_times = np.array(eye_v_data['times']) + eye_v_values = 0.5 + (np.array(eye_v_data['values']) / 20) + + # Check eye_h and eye_v have the same number of samples + if len(eye_h_times) != len(eye_v_times): + raise ValueError( + f'len(eye_h_times) = {len(eye_h_times)}, but len(eye_v_times) ' + f'= {len(eye_v_times)}' + ) + # Check that eye_h_times and eye_v_times are similar to within 0.5ms + if not np.allclose(eye_h_times, eye_v_times, atol=0.0005): + raise ValueError( + 'eye_h_times and eye_v_times are not sufficiently similar' + ) + + # Set data attributes + self.set_original_timestamps(eye_h_times) + self._eye_pos = np.stack([eye_h_values, eye_v_values], axis=1) + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + # Make SpatialSeries + eye_position = SpatialSeries( + name='eye_position', + data=H5DataIO(self._eye_pos, compression='gzip'), + reference_frame='(0,0) is bottom left corner of screen', + unit='meters', + conversion=0.257, + timestamps=H5DataIO(self._timestamps, compression='gzip'), + description='Eye position data recorded by EyeLink camera', + ) + + # Get processing module + module_description = 'Contains behavioral data from experiment.' + processing_module = get_module( + nwbfile=nwbfile, name='behavior', description=module_description) + + # Add data to module + processing_module.add_data_interface(eye_position) + + return nwbfile + + +class PupilSizeInterface(TimestampsFromArrayInterface): + """Pupil size interface.""" + + def __init__(self, folder_path: FolderPathType): + # Find pupil size file + folder_path = Path(folder_path) + pupil_size_file = folder_path / 'pupil_size_r.json' + assert pupil_size_file.exists(), f'Could not find {pupil_size_file}' + + # Load pupil size data and set data attributes + pupil_size_data = json.load(open(pupil_size_file, 'r')) + self.set_original_timestamps(np.array(pupil_size_data['times'])) + self._pupil_size = np.array(pupil_size_data['values']) + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + # Make TimeSeries + pupil_size = TimeSeries( + name='pupil_size', + data=H5DataIO(self._pupil_size, compression='gzip'), + unit='pixels', + conversion=1.0, + timestamps=H5DataIO(self._timestamps, compression='gzip'), + description='Pupil size data recorded by EyeLink camera', + ) + + # Get processing module + module_description = 'Contains behavioral data from experiment.' + processing_module = get_module( + nwbfile=nwbfile, name='behavior', description=module_description) + + # Add data to module + processing_module.add_data_interface(pupil_size) + + return nwbfile + + +class RewardLineInterface(TimestampsFromArrayInterface): + """Reward line interface.""" + + def __init__(self, folder_path: FolderPathType): + # Find reward line file + folder_path = Path(folder_path) + reward_line_file = folder_path / 'reward_line.json' + assert reward_line_file.exists(), f'Could not find {reward_line_file}' + + # Load reward line data and set data attributes + reward_line_data = json.load(open(reward_line_file, 'r')) + self.set_original_timestamps(np.array(reward_line_data['times'])) + self._reward_line = reward_line_data['values'] + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + # Make LabeledEvents + reward_line = LabeledEvents( + name='reward_line', + description=( + 'Reward line data representing events of reward dispenser' + ), + timestamps=H5DataIO(self._timestamps, compression='gzip'), + data=self._reward_line, + labels=['closed', 'open'], + ) + + # Get processing module + module_description = 'Contains audio and reward data from experiment.' + processing_module = get_module( + nwbfile=nwbfile, name='behavior', description=module_description) + + # Add data to module + processing_module.add_data_interface(reward_line) + + return nwbfile + + +class AudioInterface(TimestampsFromArrayInterface): + """Audio interface.""" + + SOUNDS = ['failure_sound', 'success_sound'] + + def __init__(self, folder_path: FolderPathType): + # Find sound file + folder_path = Path(folder_path) + sound_file = folder_path / 'sound.json' + assert sound_file.exists(), f'Could not find {sound_file}' + + # Load sound data and set data attributes + sound_data = json.load(open(sound_file, 'r')) + self.set_original_timestamps(np.array(sound_data['times'])) + audio = np.array(sound_data['values']) + + sound_to_code = {k: i for i, k in enumerate(AudioInterface.SOUNDS)} + self._sound_codes = [sound_to_code[x] for x in audio] + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + # Make LabeledEvents + audio = LabeledEvents( + name='audio', + description='Audio data representing auditory stimuli events', + timestamps=H5DataIO(self._timestamps, compression='gzip'), + data=self._sound_codes, + labels=AudioInterface.SOUNDS, + ) + + # Get processing module + module_description = 'Contains audio and reward data from experiment.' + processing_module = get_module( + nwbfile=nwbfile, name='behavior', description=module_description) + + # Add data to module + processing_module.add_data_interface(audio) + + return nwbfile diff --git a/src/jazayeri_lab_to_nwb/watters/trials_interface.py b/src/jazayeri_lab_to_nwb/watters/trials_interface.py new file mode 100644 index 0000000..d130307 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/watters/trials_interface.py @@ -0,0 +1,190 @@ +"""Class for converting trial-structured data.""" + +import json +import warnings +from pathlib import Path +from typing import Optional + +import numpy as np +import pandas as pd +from neuroconv.datainterfaces.text.timeintervalsinterface import TimeIntervalsInterface +from neuroconv.utils import FolderPathType +from pynwb import NWBFile + + +class TrialsInterface(TimeIntervalsInterface): + """Class for converting trial-structured data. + + All events that occur exactly once per trial are contained in this + interface. + """ + + KEY_MAP = { + 'background_indices': 'background_indices', + 'broke_fixation': 'broke_fixation', + 'stimulus_object_identities': 'stimulus_object_identities', + 'stimulus_object_positions': 'stimulus_object_positions', + 'stimulus_object_velocities': 'stimulus_object_velocities', + 'stimulus_object_target': 'stimulus_object_target', + 'delay_object_blanks': 'delay_object_blanks', + 'closed_loop_response_position': 'closed_loop_response_position', + 'closed_loop_response_time': 'closed_loop_response_time', + 'time_start': 'start_time', + 'time_phase_fixation': 'phase_fixation_time', + 'time_phase_stimulus': 'phase_stimulus_time', + 'time_phase_delay': 'phase_delay_time', + 'time_phase_cue': 'phase_cue_time', + 'time_phase_response': 'phase_response_time', + 'time_phase_reveal': 'phase_reveal_time', + 'time_phase_iti': 'phase_iti_time', + 'reward_time': 'reward_time', + 'reward_duration': 'reward_duration', + 'response_position': 'response_position', + 'response_time': 'response_time', + } + + def __init__(self, folder_path: FolderPathType, verbose: bool = True): + super().__init__(file_path=folder_path, verbose=verbose) + + def get_metadata(self) -> dict: + metadata = super().get_metadata() + metadata['TimeIntervals'] = dict( + trials=dict( + table_name='trials', + table_description='data about each trial', + ) + ) + return metadata + + def get_timestamps(self) -> np.ndarray: + return super(TrialsInterface, self).get_timestamps(column='start_time') + + def set_aligned_starting_time(self, aligned_starting_time: float) -> None: + self.dataframe.closed_loop_response_time += aligned_starting_time + self.dataframe.start_time += aligned_starting_time + self.dataframe.phase_fixation_time += aligned_starting_time + self.dataframe.phase_stimulus_time += aligned_starting_time + self.dataframe.phase_delay_time += aligned_starting_time + self.dataframe.phase_cue_time += aligned_starting_time + self.dataframe.phase_response_time += aligned_starting_time + self.dataframe.phase_reveal_time += aligned_starting_time + self.dataframe.phase_iti_time += aligned_starting_time + self.dataframe.reward_time += aligned_starting_time + self.dataframe.response_time += aligned_starting_time + + def _read_file(self, file_path: FolderPathType): + # Create dataframe with data for each trial + trials = json.load(open(Path(file_path) / 'trials.json', 'r')) + trials = { + k_mapped: [d[k] for d in trials] + for k, k_mapped in TrialsInterface.KEY_MAP.items() + } + + # Field closed_loop_response_position may have None values, so replace + # those with NaN to make hdf5 conversion work + trials['closed_loop_response_position'] = [ + [np.nan, np.nan] if x is None else x + for x in trials['closed_loop_response_position'] + ] + + # Serialize fields with variable-length lists for hdf5 conversion + for k in [ + 'stimulus_object_identities', + 'stimulus_object_positions', + 'stimulus_object_velocities', + 'stimulus_object_target', + ]: + trials[k] = [json.dumps(x) for x in trials[k]] + + return pd.DataFrame(trials) + + def add_to_nwbfile(self, + nwbfile: NWBFile, + metadata: Optional[dict] = None, + tag: str = 'trials'): + return super(TrialsInterface, self).add_to_nwbfile( + nwbfile=nwbfile, + metadata=metadata, + tag=tag, + column_descriptions=self.column_descriptions, + ) + + @property + def column_descriptions(self): + column_descriptions = { + 'background_indices': ( + 'For each trial, the indices of the background noise pattern ' + 'patch.' + ), + 'broke_fixation': ( + 'For each trial, whether the subject broke fixation and the ' + 'trial was aborted' + ), + 'stimulus_object_identities': ( + 'For each trial, a serialized list with one element for each ' + 'object. Each element is the identity symbol (e.g. "a", "b", ' + '"c", ...) of the corresponding object.' + ), + 'stimulus_object_positions': ( + 'For each trial, a serialized list with one element for each ' + 'object. Each element is the initial (x, y) position of the ' + 'corresponding object, in coordinates of arena width.' + ), + 'stimulus_object_velocities': ( + 'For each trial, a serialized list with one element for each ' + 'object. Each element is the initial (dx/dt, dy/dt) velocity ' + 'of the corresponding object, in units of arena width per ' + 'display update.' + ), + 'stimulus_object_target': ( + 'For each trial, a serialized list with one element for each ' + 'object. Each element is a boolean indicating whether the ' + 'corresponding object is ultimately the cued target.' + ), + 'delay_object_blanks': ( + 'For each trial, a boolean indicating whether the objects were ' + 'rendered as blank discs during the delay phase.' + ), + 'closed_loop_response_position': ( + 'For each trial, the position of the response saccade used by ' + 'the closed-loop game engine. This is used for determining ' + 'reward.' + ), + 'closed_loop_response_time': ( + 'For each trial, the time of the response saccade used by ' + 'the closed-loop game engine. This is used for the timing of ' + 'reward delivery.' + ), + 'start_time': 'Start time of each trial.', + 'phase_fixation_time': ( + 'Time of fixation phase onset for each trial.' + ), + 'phase_stimulus_time': ( + 'Time of stimulus phase onset for each trial.' + ), + 'phase_delay_time': 'Time of delay phase onset for each trial.', + 'phase_cue_time': 'Time of cue phase onset for each trial.', + 'phase_response_time': ( + 'Time of response phase onset for each trial.' + ), + 'phase_reveal_time': 'Time of reveal phase onset for each trial.', + 'phase_iti_time': ( + 'Time of inter-trial interval onset for each trial.' + ), + 'reward_time': 'Time of reward delivery onset for each trial.', + 'reward_duration': 'Reward duration for each trial', + 'response_position': ( + 'Response position for each trial. This differs from ' + 'closed_loop_response_position in that this is calculated ' + 'post-hoc from high-resolution eye tracking data, hence is ' + 'more accurate.' + ), + 'response_time': ( + 'Response time for each trial. This differs from ' + 'closed_loop_response_time in that this is calculated post-hoc ' + 'from high-resolution eye tracking data, hence is more ' + 'accurate.' + ), + } + + return column_descriptions diff --git a/src/jazayeri_lab_to_nwb/watters/watters_convert_session.py b/src/jazayeri_lab_to_nwb/watters/watters_convert_session.py deleted file mode 100644 index d7c1072..0000000 --- a/src/jazayeri_lab_to_nwb/watters/watters_convert_session.py +++ /dev/null @@ -1,253 +0,0 @@ -"""Primary script to run to convert an entire session for of data using the NWBConverter.""" -import os -import datetime -import glob -import json -import logging -from pathlib import Path -from typing import Union -from uuid import uuid4 -from zoneinfo import ZoneInfo - -from neuroconv.tools.data_transfers import automatic_dandi_upload -from neuroconv.utils import load_dict_from_file, dict_deep_update - -from jazayeri_lab_to_nwb.watters import WattersNWBConverter - -# Set logger level for info is displayed in console -logging.getLogger().setLevel(logging.INFO) - - -def _get_single_file(directory, suffix=""): - """Get path to a file in given directory with given suffix. - - Raisees error if not exactly one satisfying file. - """ - files = list(glob.glob(str(directory / f"*{suffix}"))) - if len(files) == 0: - raise ValueError(f"No {suffix} files found in {directory}") - if len(files) > 1: - raise ValueError(f"Multiple {suffix} files found in {directory}") - return files[0] - - -def session_to_nwb( - data_dir: Union[str, Path], - output_dir_path: Union[str, Path], - stub_test: bool = False, - overwrite: bool = True, - dandiset_id: Union[str, None] = None, -): - """ - Convert a single session to an NWB file. - - Parameters - ---------- - data_dir : string or Path - Source data directory. - output_dir_path : string or Path - Output data directory. - stub_test : boolean - Whether or not to generate a preview file by limiting data write to a few MB. - Default is False. - overwrite : boolean - If the file exists already, True will delete and replace with a new file, False will append the contents. - Default is True. - dandiset_id : string, optional - If you want to upload the file to the DANDI archive, specify the six-digit ID here. - Requires the DANDI_API_KEY environment variable to be set. - To set this in your bash terminal in Linux or macOS, run - export DANDI_API_KEY=... - or in Windows - set DANDI_API_KEY=... - Default is None. - """ - if dandiset_id is not None: - import dandi # check importability - assert os.getenv("DANDI_API_KEY"), ( - "Unable to find environment variable 'DANDI_API_KEY'. " - "Please retrieve your token from DANDI and set this environment variable." - ) - - logging.info("") - logging.info(f"data_dir = {data_dir}") - logging.info(f"output_dir_path = {output_dir_path}") - logging.info(f"stub_test = {stub_test}") - - data_dir = Path(data_dir) - output_dir_path = Path(output_dir_path) - if stub_test: - output_dir_path = output_dir_path / "nwb_stub" - output_dir_path.mkdir(parents=True, exist_ok=True) - - session_id = data_dir.name - raw_nwbfile_path = output_dir_path / f"{session_id}_raw.nwb" - processed_nwbfile_path = output_dir_path / f"{session_id}_processed.nwb" - logging.info(f"raw_nwbfile_path = {raw_nwbfile_path}") - logging.info(f"processed_nwbfile_path = {processed_nwbfile_path}") - - raw_source_data = dict() - raw_conversion_options = dict() - processed_source_data = dict() - processed_conversion_options = dict() - - for probe_num in range(2): - # Add V-Probe Recording - probe_data_dir = data_dir / "raw_data" / f"v_probe_{probe_num}" - if not probe_data_dir.exists(): - continue - logging.info(f"\nAdding V-probe {probe_num} recording") - - logging.info(" Raw data") - recording_file = _get_single_file(probe_data_dir, suffix=".dat") - recording_source_data = { - f"RecordingVP{probe_num}": dict( - file_path=recording_file, - probe_metadata_file=str(data_dir / "data_open_source" / "probes.metadata.json"), - probe_key=f"probe{(probe_num + 1):02d}", - probe_name=f"vprobe{probe_num}", - es_key=f"ElectricalSeriesVP{probe_num}", - ) - } - raw_source_data.update(recording_source_data) - processed_source_data.update(recording_source_data) - raw_conversion_options.update({f"RecordingVP{probe_num}": dict(stub_test=stub_test)}) - processed_conversion_options.update( - {f"RecordingVP{probe_num}": dict(stub_test=stub_test, write_electrical_series=False)} - ) - - # Add V-Probe Sorting - logging.info(" Spike sorted data") - processed_source_data.update( - { - f"SortingVP{probe_num}": dict( - folder_path=str(data_dir / "spike_sorting_raw" / f"v_probe_{probe_num}"), - keep_good_only=False, - ) - } - ) - processed_conversion_options.update({f"SortingVP{probe_num}": dict(stub_test=stub_test, write_as="processing")}) - - # Add SpikeGLX Recording - logging.info("Adding SpikeGLX recordings") - logging.info(" AP data") - probe_data_dir = data_dir / "raw_data" / "spikeglx" / "*" / "*" - ap_file = _get_single_file(probe_data_dir, suffix=".ap.bin") - raw_source_data.update(dict(RecordingNP=dict(file_path=ap_file))) - processed_source_data.update(dict(RecordingNP=dict(file_path=ap_file))) - raw_conversion_options.update(dict(RecordingNP=dict(stub_test=stub_test))) - processed_conversion_options.update(dict(RecordingNP=dict(stub_test=stub_test, write_electrical_series=False))) - - # Add LFP - logging.info(" LFP data") - lfp_file = _get_single_file(probe_data_dir, suffix=".lf.bin") - raw_source_data.update(dict(LF=dict(file_path=lfp_file))) - processed_source_data.update(dict(LF=dict(file_path=lfp_file))) - raw_conversion_options.update(dict(LF=dict(stub_test=stub_test))) - processed_conversion_options.update(dict(LF=dict(stub_test=stub_test, write_electrical_series=False))) - - # Add Sorting - logging.info(" Spike sorted data") - processed_source_data.update( - dict( - SortingNP=dict( - folder_path=str(data_dir / "spike_sorting_raw" / "np"), - keep_good_only=False, - ) - ) - ) - processed_conversion_options.update(dict(SortingNP=dict(stub_test=stub_test, write_as="processing"))) - - # Add Behavior - logging.info("Adding behavior") - behavior_path = str(data_dir / "data_open_source" / "behavior") - processed_source_data.update(dict(EyePosition=dict(folder_path=behavior_path))) - processed_conversion_options.update(dict(EyePosition=dict())) - - processed_source_data.update(dict(PupilSize=dict(folder_path=behavior_path))) - processed_conversion_options.update(dict(PupilSize=dict())) - - # Add Trials - logging.info("Adding task data") - processed_source_data.update(dict(Trials=dict(folder_path=str(data_dir / "data_open_source")))) - processed_conversion_options.update(dict(Trials=dict())) - - processed_converter = WattersNWBConverter(source_data=processed_source_data, sync_dir=str(data_dir / "sync_pulses")) - - # Add datetime to conversion - metadata = processed_converter.get_metadata() - metadata["NWBFile"]["session_id"] = session_id - - # Subject name - if "monkey0" in str(data_dir): - metadata["Subject"]["subject_id"] = "Perle" - elif "monkey1" in str(data_dir): - metadata["Subject"]["subject_id"] = "Elgar" - - # EcePhys - probe_metadata_file = data_dir / "data_open_source" / "probes.metadata.json" - with open(probe_metadata_file, "r") as f: - probe_metadata = json.load(f) - neuropixel_metadata = [entry for entry in probe_metadata if entry["label"] == "probe00"][0] - for entry in metadata["Ecephys"]["ElectrodeGroup"]: - if entry["device"] == "Neuropixel-Imec": - # TODO: uncomment when fixed in pynwb - # entry.update(dict(position=[( - # neuropixel_metadata["coordinates"][0], - # neuropixel_metadata["coordinates"][1], - # neuropixel_metadata["depth_from_surface"], - # )] - logging.warning("\n\n PROBE COORDINATES NOT IMPLEMENTED\n\n") - - # Update default metadata with the editable in the corresponding yaml file - editable_metadata_path = Path(__file__).parent / "watters_metadata.yaml" - editable_metadata = load_dict_from_file(editable_metadata_path) - metadata = dict_deep_update(metadata, editable_metadata) - - # check if session_start_time was found/set - if "session_start_time" not in metadata["NWBFile"]: - try: - date = datetime.datetime.strptime(data_dir.name, "%Y-%m-%d") - date = date.replace(tzinfo=ZoneInfo("US/Eastern")) - except: - raise ValueError( - "Session start time was not auto-detected. Please provide it " "in `watters_metadata.yaml`" - ) - metadata["NWBFile"]["session_start_time"] = date - - # Run conversion - logging.info("Running processed conversion") - processed_converter.run_conversion( - metadata=metadata, - nwbfile_path=processed_nwbfile_path, - conversion_options=processed_conversion_options, - overwrite=overwrite, - ) - - logging.info("Running raw data conversion") - metadata["NWBFile"]["identifier"] = str(uuid4()) - raw_converter = WattersNWBConverter(source_data=raw_source_data, sync_dir=str(data_dir / "sync_pulses")) - raw_converter.run_conversion( - metadata=metadata, - nwbfile_path=raw_nwbfile_path, - conversion_options=raw_conversion_options, - overwrite=overwrite, - ) - automatic_dandi_upload(dandiset_id=dandiset_id) - - -if __name__ == "__main__": - - # Parameters for conversion - data_dir = Path("/om2/user/nwatters/catalystneuro/initial_data_transfer/" "monkey0/2022-06-01/") - output_dir_path = Path("/om/user/nwatters/nwb_data/watters_perle_combined/") - stub_test = True - overwrite = True - - session_to_nwb( - data_dir=data_dir, - output_dir_path=output_dir_path, - stub_test=stub_test, - overwrite=overwrite, - # dandiset_id = "000620", - ) diff --git a/src/jazayeri_lab_to_nwb/watters/watters_metadata.yaml b/src/jazayeri_lab_to_nwb/watters/watters_metadata.yaml deleted file mode 100644 index 8207f76..0000000 --- a/src/jazayeri_lab_to_nwb/watters/watters_metadata.yaml +++ /dev/null @@ -1,16 +0,0 @@ -NWBFile: - # related_publications: # no pubs yet - # - https://doi.org/12345 - session_description: - Data from macaque performing working memory task. Subject is presented with multiple objects at different locations - on a screen. After a delay, the subject is then cued with one of the objects, now displayed at the center of the - screen. Subject should respond by saccading to the location of the cued object at its initial presentation. - institution: MIT - lab: Jazayeri - experimenter: - - Watters, Nicholas -Subject: - species: Macaca mulatta - # subject_id: Elgar # currently auto-detected from session path, but can be overridden here - age: P6Y # in ISO 8601, such as "P1W2D" - sex: U # One of M, F, U, or O diff --git a/src/jazayeri_lab_to_nwb/watters/watters_notes.md b/src/jazayeri_lab_to_nwb/watters/watters_notes.md deleted file mode 100644 index c23b5b8..0000000 --- a/src/jazayeri_lab_to_nwb/watters/watters_notes.md +++ /dev/null @@ -1 +0,0 @@ -# Notes concerning the watters conversion diff --git a/src/jazayeri_lab_to_nwb/watters/wattersbehaviorinterface.py b/src/jazayeri_lab_to_nwb/watters/wattersbehaviorinterface.py deleted file mode 100644 index 180e052..0000000 --- a/src/jazayeri_lab_to_nwb/watters/wattersbehaviorinterface.py +++ /dev/null @@ -1,127 +0,0 @@ -"""Primary classes for converting experiment-specific behavior.""" -import numpy as np -from pathlib import Path -from pynwb import NWBFile, TimeSeries -from pynwb.behavior import SpatialSeries -from hdmf.backends.hdf5 import H5DataIO - -from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface -from neuroconv.utils import DeepDict, FolderPathType, FilePathType -from neuroconv.tools.nwb_helpers import get_module - - -class NumpyTemporalAlignmentMixin: - """Mixin that implements temporal alignment functions with .npy timestamps""" - - timestamp_file_path: FilePathType - timestamps: np.ndarray - - def get_original_timestamps(self) -> np.ndarray: - return np.load(self.timestamp_file_path) - - def get_timestamps(self) -> np.ndarray: - return self.timestamps - - def set_aligned_timestamps(self, aligned_timestamps: np.ndarray) -> None: - self.timestamps = aligned_timestamps - - -class WattersEyePositionInterface(NumpyTemporalAlignmentMixin, BaseTemporalAlignmentInterface): - """Eye position interface for Watters conversion""" - - def __init__(self, folder_path: FolderPathType): - # initialize interface - super().__init__(folder_path=folder_path) - - # find eye position files and check they all exist - folder_path = Path(folder_path) - eye_h_file = folder_path / "eye.h.values.npy" - eye_h_times_file = folder_path / "eye.h.times.npy" - eye_v_file = folder_path / "eye.v.values.npy" - eye_v_times_file = folder_path / "eye.v.times.npy" - for file_path in [eye_h_file, eye_h_times_file, eye_v_file, eye_v_times_file]: - assert file_path.exists(), f"Could not find {file_path}" - - # load timestamps for both fields and check that they're close/equal - eye_h_times = np.load(eye_h_times_file) - eye_v_times = np.load(eye_v_times_file) - assert np.allclose(eye_h_times, eye_v_times) - - # set timestamps for temporal alignment - self.timestamp_file_path = eye_h_times_file - self.timestamps = eye_h_times - - def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): - # get file paths and load eye position data - folder_path = Path(self.source_data["folder_path"]) - eye_h = np.load(folder_path / "eye.h.values.npy") - eye_v = np.load(folder_path / "eye.v.values.npy") - - # stack and transform data into screen coordinate system - eye_pos = np.stack([eye_h, eye_v], axis=1) - eye_pos = (eye_pos + 10.0) / 20.0 # desired conversion specified by Nick - - # make SpatialSeries - eye_position = SpatialSeries( - name="eye_position", - data=H5DataIO(eye_pos, compression="gzip"), - reference_frame="(0,0) is bottom left corner of screen", - unit="meters", - conversion=0.257, - timestamps=H5DataIO(self.timestamps, compression="gzip"), - description="Eye position data recorded by EyeLink camera", - ) - - # get processing module - module_name = "behavior" - module_description = "Contains behavioral data from experiment." - processing_module = get_module(nwbfile=nwbfile, name=module_name, description=module_description) - - # add data to module - processing_module.add_data_interface(eye_position) - - return nwbfile - - -class WattersPupilSizeInterface(NumpyTemporalAlignmentMixin, BaseTemporalAlignmentInterface): - """Pupil size interface for Watters conversion""" - - def __init__(self, folder_path: FolderPathType): - # initialize interface with timestamps - super().__init__(folder_path=folder_path) - - # find eye position files (assume they all exist) - folder_path = Path(folder_path) - pupil_file = folder_path / "eye.pupil.values.npy" - pupil_times_file = folder_path / "eye.pupil.times.npy" - assert pupil_file.exists(), f"Could not find {pupil_file}" - assert pupil_times_file.exists(), f"Could not find {pupil_times_file}" - - # set timestamps for temporal alignment - self.timestamp_file_path = pupil_times_file - self.timestamps = np.load(pupil_times_file) - - def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): - # get file paths and load eye position data - folder_path = Path(self.source_data["folder_path"]) - pupil = np.load(folder_path / "eye.pupil.values.npy") - - # make SpatialSeries - pupil_size = TimeSeries( - name="pupil_size", - data=H5DataIO(pupil, compression="gzip"), - unit="pixels", - conversion=1.0, - timestamps=H5DataIO(self.timestamps, compression="gzip"), - description="Pupil size data recorded by EyeLink camera", - ) - - # get processing module - module_name = "behavior" - module_description = "Contains behavioral data from experiment." - processing_module = get_module(nwbfile=nwbfile, name=module_name, description=module_description) - - # add data to module - processing_module.add_data_interface(pupil_size) - - return nwbfile diff --git a/src/jazayeri_lab_to_nwb/watters/watterstrialsinterface.py b/src/jazayeri_lab_to_nwb/watters/watterstrialsinterface.py deleted file mode 100644 index fe13f50..0000000 --- a/src/jazayeri_lab_to_nwb/watters/watterstrialsinterface.py +++ /dev/null @@ -1,186 +0,0 @@ -"""Primary class for converting experiment-specific behavior.""" -import json -import numpy as np -import pandas as pd -import warnings -from pathlib import Path -from pynwb import NWBFile -from typing import Optional - -from neuroconv.datainterfaces.text.timeintervalsinterface import TimeIntervalsInterface -from neuroconv.utils import DeepDict, FolderPathType, FilePathType - - -class WattersTrialsInterface(TimeIntervalsInterface): - def __init__(self, folder_path: FolderPathType, verbose: bool = True): - super().__init__(file_path=folder_path, verbose=verbose) - - def get_metadata(self) -> dict: - metadata = super().get_metadata() - metadata["TimeIntervals"] = dict( - trials=dict( - table_name="trials", - table_description=f"experimental trials generated from JSON files", - ) - ) - - return metadata - - def _read_file(self, file_path: FolderPathType): - # define files to read - folder_path = Path(file_path) - all_fields = [ - "behavior/trials.broke_fixation.json", - "behavior/trials.response.error.json", - "behavior/trials.response.location.json", - "behavior/trials.response.object.json", - "task/trials.object_blanks.json", - "task/trials.start_times.json", - "task/trials.relative_phase_times.json", - "task/trials.reward.duration.json", - "task/trials.reward.time.json", - "task/trials.stimuli_init.json", - ] - - # check that all data exist - for field in all_fields: - assert (folder_path / field).exists(), f"Could not find {folder_path / field}" - - # load into a dictionary - data_dict = {} - for field in all_fields: - with open(folder_path / field, "r") as f: - data_dict[field] = json.load(f) - - # define useful helpers - get_by_index = lambda lst, idx: np.nan if (idx >= len(lst)) else lst[idx] - none_to_nan = lambda val, dim: val or (np.nan if dim <= 1 else np.full((dim,), np.nan).tolist()) - - # process trial data - processed_data = [] - n_trials = len(data_dict["task/trials.start_times.json"]) - for i in range(n_trials): - # get trial start time - start_time = data_dict["task/trials.start_times.json"][i] - if np.isnan(start_time): - warnings.warn(f"Start time for trial {i} is NaN. Dropping this trial.", stacklevel=2) - continue - - # map response object index to id - response_object = data_dict["behavior/trials.response.object.json"][i] - if response_object is None: - response_object = "" - else: - response_object = data_dict["task/trials.stimuli_init.json"][i][response_object]["id"] - - # map stimuli info from list to corresponding ids - object_info = {"a": {}, "b": {}, "c": {}} - target_object = None - for object_dict in data_dict["task/trials.stimuli_init.json"][i]: - object_id = object_dict["id"] - assert object_id in object_info.keys() - object_info[object_id]["position"] = [object_dict["x"], object_dict["y"]] - object_info[object_id]["velocity"] = [object_dict["x_vel"], object_dict["y_vel"]] - if object_dict["target"]: - target_object = object_id - assert target_object is not None - - processed_data.append( - dict( - start_time=start_time, - stop_time=start_time + data_dict["task/trials.relative_phase_times.json"][i][-1], - broke_fixation=data_dict["behavior/trials.broke_fixation.json"][i], - response_error=none_to_nan(data_dict["behavior/trials.response.error.json"][i], 1), - response_location=none_to_nan(data_dict["behavior/trials.response.location.json"][i], 2), - response_object=response_object, - object_blank=data_dict["task/trials.object_blanks.json"][i], - stimulus_time=start_time + get_by_index(data_dict["task/trials.relative_phase_times.json"][i], 0), - delay_start_time=start_time - + get_by_index(data_dict["task/trials.relative_phase_times.json"][i], 1), - cue_time=start_time + get_by_index(data_dict["task/trials.relative_phase_times.json"][i], 2), - response_time=start_time + get_by_index(data_dict["task/trials.relative_phase_times.json"][i], 3), - reveal_time=start_time + get_by_index(data_dict["task/trials.relative_phase_times.json"][i], 4), - reward_duration=none_to_nan(data_dict["task/trials.reward.duration.json"][i], 1), - reward_time=start_time + none_to_nan(data_dict["task/trials.reward.time.json"][i], 1), - target_object=target_object, - object_a_position=object_info["a"].get("position", [np.nan, np.nan]), - object_a_velocity=object_info["a"].get("velocity", [np.nan, np.nan]), - object_b_position=object_info["b"].get("position", [np.nan, np.nan]), - object_b_velocity=object_info["b"].get("velocity", [np.nan, np.nan]), - object_c_position=object_info["c"].get("position", [np.nan, np.nan]), - object_c_velocity=object_info["c"].get("velocity", [np.nan, np.nan]), - ) - ) - - return pd.DataFrame(processed_data) - - def add_to_nwbfile( - self, - nwbfile: NWBFile, - metadata: Optional[dict] = None, - tag: str = "trials", - ): - column_descriptions = { - "broke_fixation": "Whether the subject broke fixation before the response period.", - "response_error": ( - "Euclidean distance between subject's response fixation position and the true target " - "object's position, in units of display sidelength." - ), - "response_location": ( - "Position of the subject's response fixation, in units of display sidelength, with (0,0) " - "being the bottom left corner of the display." - ), - "response_object": ( - "The ID of the stimulus object nearest to the subject's response, one of 'a' for Apple, " - "'b' for Blueberry, or 'c' for Orange. If the trial ended prematurely, the field is left blank." - ), - "object_blank": "Whether the object locations were visible in the delay phase as blank disks.", - "stimulus_time": "Time of stimulus presentation.", - "delay_start_time": "Time of the beginning of the delay period.", - "cue_time": "Time of cue object presentation.", - "response_time": "Time of subject's response.", - "reveal_time": "Time of reveal of correct object position.", - "reward_duration": "Duration of juice reward, in seconds.", - "reward_time": "Time of reward delivery.", - "target_object": ( - "ID of the stimulus object that is the target object, one of 'a' for Apple, 'b' for Blueberry, " - "or 'c' for Orange." - ), - "object_a_position": ( - "Position of stimulus object 'a', or Apple. Values are (x,y) coordinates in units of screen " - "sidelength, with (0,0) being the bottom left corner. If the object is not presented in a " - "particular trial, the position is empty." - ), - "object_a_velocity": ( - "Velocity of stimulus object 'a', or Apple. Values are (x,y) velocity vectors, in units of " - "screen sidelength per simulation timestep. If the object is not presented in a particular " - "trial, the velocity is empty." - ), - "object_b_position": ( - "Position of stimulus object 'b', or Blueberry. Values are (x,y) coordinates in units of " - "screen sidelength, with (0,0) being the bottom left corner. If the object is not presented " - "in a particular trial, the position is empty." - ), - "object_b_velocity": ( - "Velocity of stimulus object 'b', or Blueberry. Values are (x,y) velocity vectors, in units " - "of screen sidelength per simulation timestep. If the object is not presented in a particular " - "trial, the velocity is empty." - ), - "object_c_position": ( - "Position of stimulus object 'c', or Orange. Values are (x,y) coordinates in units of screen " - "sidelength, with (0,0) being the bottom left corner. If the object is not presented in a " - "particular trial, the position is empty." - ), - "object_c_velocity": ( - "Velocity of stimulus object 'c', or Orange. Values are (x,y) velocity vectors, in units of " - "screen sidelength per simulation timestep. If the object is not presented in a particular " - "trial, the velocity is empty." - ), - } - - return super().add_to_nwbfile( - nwbfile=nwbfile, - metadata=metadata, - tag=tag, - column_descriptions=column_descriptions, - ) From 5f555608510f457dc48c9be87d9b44bcd44ca257 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 18 Dec 2023 23:07:30 +0000 Subject: [PATCH 2/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- setup.py | 3 +- src/jazayeri_lab_to_nwb/watters/__init__.py | 4 +- .../watters/display_interface.py | 80 +++--- .../watters/get_session_paths.py | 109 +++---- .../watters/main_convert_session.py | 272 ++++++++---------- src/jazayeri_lab_to_nwb/watters/metadata.yaml | 8 +- .../watters/nwb_converter.py | 30 +- .../watters/recording_interface.py | 10 +- .../watters/timeseries_interface.py | 143 +++++---- .../watters/trials_interface.py | 207 ++++++------- 10 files changed, 400 insertions(+), 466 deletions(-) diff --git a/setup.py b/setup.py index 5018ad7..f8e8892 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from pathlib import Path -from setuptools import setup, find_packages + +from setuptools import find_packages, setup requirements_file_path = Path(__file__).parent / "requirements.txt" with open(requirements_file_path) as file: diff --git a/src/jazayeri_lab_to_nwb/watters/__init__.py b/src/jazayeri_lab_to_nwb/watters/__init__.py index 06f0206..47379f1 100644 --- a/src/jazayeri_lab_to_nwb/watters/__init__.py +++ b/src/jazayeri_lab_to_nwb/watters/__init__.py @@ -1,4 +1,4 @@ from .behavior_interface import EyePositionInterface, PupilSizeInterface -from .trials_interface import TrialsInterface -from .recording_interface import DatRecordingInterface from .nwb_converter import NWBConverter +from .recording_interface import DatRecordingInterface +from .trials_interface import TrialsInterface diff --git a/src/jazayeri_lab_to_nwb/watters/display_interface.py b/src/jazayeri_lab_to_nwb/watters/display_interface.py index 096f362..b819b16 100644 --- a/src/jazayeri_lab_to_nwb/watters/display_interface.py +++ b/src/jazayeri_lab_to_nwb/watters/display_interface.py @@ -14,85 +14,79 @@ class DisplayInterface(TimeIntervalsInterface): """Class for converting data about display frames. - + All events that occur exactly once per display update are contained in this interface. """ - + KEY_MAP = { - 'frame_object_positions': 'object_positions', - 'frame_fixation_cross_scale': 'fixation_cross_scale', - 'frame_closed_loop_gaze_position': 'closed_loop_eye_position', - 'frame_task_phase': 'task_phase', - 'frame_display_times': 'start_time', + "frame_object_positions": "object_positions", + "frame_fixation_cross_scale": "fixation_cross_scale", + "frame_closed_loop_gaze_position": "closed_loop_eye_position", + "frame_task_phase": "task_phase", + "frame_display_times": "start_time", } - + def __init__(self, folder_path: FolderPathType, verbose: bool = True): super().__init__(file_path=folder_path, verbose=verbose) def get_metadata(self) -> dict: metadata = super().get_metadata() - metadata['TimeIntervals'] = dict( + metadata["TimeIntervals"] = dict( display=dict( - table_name='display', - table_description='data about each displayed frame', + table_name="display", + table_description="data about each displayed frame", ) ) return metadata - + def get_timestamps(self) -> np.ndarray: - return super(DisplayInterface, self).get_timestamps(column='start_time') - + return super(DisplayInterface, self).get_timestamps(column="start_time") + def set_aligned_starting_time(self, aligned_starting_time: float) -> None: self.dataframe.start_time += aligned_starting_time def _read_file(self, file_path: FolderPathType): # Create dataframe with data for each frame - trials = json.load(open(Path(file_path) / 'trials.json', 'r')) + trials = json.load(open(Path(file_path) / "trials.json", "r")) frames = { - k_mapped: list(itertools.chain(*[d[k] for d in trials])) - for k, k_mapped in DisplayInterface.KEY_MAP.items() + k_mapped: list(itertools.chain(*[d[k] for d in trials])) for k, k_mapped in DisplayInterface.KEY_MAP.items() } - + # Serialize object_positions data for hdf5 conversion to work - frames['object_positions'] = [ - json.dumps(x) for x in frames['object_positions'] - ] - + frames["object_positions"] = [json.dumps(x) for x in frames["object_positions"]] + return pd.DataFrame(frames) - def add_to_nwbfile(self, - nwbfile: NWBFile, - metadata: Optional[dict] = None, - tag: str = 'display'): + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None, tag: str = "display"): return super(DisplayInterface, self).add_to_nwbfile( nwbfile=nwbfile, metadata=metadata, tag=tag, column_descriptions=self.column_descriptions, ) - + @property def column_descriptions(self): column_descriptions = { - 'object_positions': ( - 'For each frame, a serialized list with one element for each ' - 'object. Each element is an (x, y) position of the ' - 'corresponding object, in coordinates of arena width.' + "object_positions": ( + "For each frame, a serialized list with one element for each " + "object. Each element is an (x, y) position of the " + "corresponding object, in coordinates of arena width." ), - 'fixation_cross_scale': ( - 'For each frame, the scale of the central fixation cross. ' - 'Fixation cross scale grows as the eye position deviates from ' - 'the center of the fixation cross, to provide a cue to ' - 'maintain good fixation.' + "fixation_cross_scale": ( + "For each frame, the scale of the central fixation cross. " + "Fixation cross scale grows as the eye position deviates from " + "the center of the fixation cross, to provide a cue to " + "maintain good fixation." ), - 'closed_loop_eye_position': ( - 'For each frame, the eye position in the close-loop task ' - 'engine. This was used to for real-time eye position ' - 'computations, such as saccade detection and reward delivery.' + "closed_loop_eye_position": ( + "For each frame, the eye position in the close-loop task " + "engine. This was used to for real-time eye position " + "computations, such as saccade detection and reward delivery." ), - 'task_phase': 'The phase of the task for each frame.', - 'start_time': 'Time of display update for each frame.', + "task_phase": "The phase of the task for each frame.", + "start_time": "Time of display update for each frame.", } - + return column_descriptions diff --git a/src/jazayeri_lab_to_nwb/watters/get_session_paths.py b/src/jazayeri_lab_to_nwb/watters/get_session_paths.py index 0cb29e3..e9830ec 100644 --- a/src/jazayeri_lab_to_nwb/watters/get_session_paths.py +++ b/src/jazayeri_lab_to_nwb/watters/get_session_paths.py @@ -4,19 +4,19 @@ import pathlib SUBJECT_NAME_TO_ID = { - 'Perle': 'monkey0', - 'Elgar': 'monkey1', + "Perle": "monkey0", + "Elgar": "monkey1", } SessionPaths = collections.namedtuple( - 'SessionPaths', + "SessionPaths", [ - 'output', - 'raw_data', - 'data_open_source', - 'task_behavior_data', - 'sync_pulses', - 'spike_sorting_raw', + "output", + "raw_data", + "data_open_source", + "task_behavior_data", + "sync_pulses", + "spike_sorting_raw", ], ) @@ -24,45 +24,34 @@ def _get_session_paths_openmind(subject, session, stub_test=False): """Get paths to all components of the data on openmind.""" subject_id = SUBJECT_NAME_TO_ID[subject] - + # Path to write output nwb files to - output_path = ( - f'/om/user/nwatters/nwb_data_multi_prediction/{subject}/{session}' - ) + output_path = f"/om/user/nwatters/nwb_data_multi_prediction/{subject}/{session}" if stub_test: - output_path = f'{output_path}/stub' - + output_path = f"{output_path}/stub" + # Path to the raw data. This is used for reading raw physiology data. - raw_data_path = ( - f'/om4/group/jazlab/nwatters/multi_prediction/phys_data/{subject}/' - f'{session}/raw_data' - ) - + raw_data_path = f"/om4/group/jazlab/nwatters/multi_prediction/phys_data/{subject}/" f"{session}/raw_data" + # Path to task and behavior data. task_behavior_data_path = ( - '/om4/group/jazlab/nwatters/multi_prediction/datasets/data_nwb_trials/' - f'{subject}/{session}' + "/om4/group/jazlab/nwatters/multi_prediction/datasets/data_nwb_trials/" f"{subject}/{session}" ) - + # Path to open-source data. This is used for reading behavior and task data. data_open_source_path = ( - '/om4/group/jazlab/nwatters/multi_prediction/datasets/data_open_source/' - f'Subjects/{subject_id}/{session}/001' + "/om4/group/jazlab/nwatters/multi_prediction/datasets/data_open_source/" f"Subjects/{subject_id}/{session}/001" ) - + # Path to sync pulses. This is used for reading timescale transformations # between physiology and mworks data streams. - sync_pulses_path = ( - '/om4/group/jazlab/nwatters/multi_prediction/data_processed/' - f'{subject}/{session}/sync_pulses' - ) - + sync_pulses_path = "/om4/group/jazlab/nwatters/multi_prediction/data_processed/" f"{subject}/{session}/sync_pulses" + # Path to spike sorting. This is used for reading spike sorted data. spike_sorting_raw_path = ( - f'/om4/group/jazlab/nwatters/multi_prediction/phys_data/{subject}/' - f'{session}/spike_sorting' + f"/om4/group/jazlab/nwatters/multi_prediction/phys_data/{subject}/" f"{session}/spike_sorting" ) - + session_paths = SessionPaths( output=pathlib.Path(output_path), raw_data=pathlib.Path(raw_data_path), @@ -71,38 +60,36 @@ def _get_session_paths_openmind(subject, session, stub_test=False): sync_pulses=pathlib.Path(sync_pulses_path), spike_sorting_raw=pathlib.Path(spike_sorting_raw_path), ) - + return session_paths def _get_session_paths_globus(subject, session, stub_test=False): """Get paths to all components of the data in the globus repo.""" subject_id = SUBJECT_NAME_TO_ID[subject] - base_data_dir = f'/shared/catalystneuro/JazLab/{subject_id}/{session}/' - + base_data_dir = f"/shared/catalystneuro/JazLab/{subject_id}/{session}/" + # Path to write output nwb files to - output_path = ( - f'~/conversion_nwb/jazayeri-lab-to-nwb/{subject}/{session}' - ) + output_path = f"~/conversion_nwb/jazayeri-lab-to-nwb/{subject}/{session}" if stub_test: - output_path = f'{output_path}/stub' - + output_path = f"{output_path}/stub" + # Path to the raw data. This is used for reading raw physiology data. - raw_data_path = f'{base_data_dir}/raw_data' - + raw_data_path = f"{base_data_dir}/raw_data" + # Path to task and behavior data. - task_behavior_data_path = f'{base_data_dir}/processed_task_data' - + task_behavior_data_path = f"{base_data_dir}/processed_task_data" + # Path to open-source data. This is used for reading behavior and task data. - data_open_source_path = f'{base_data_dir}/data_open_source' - + data_open_source_path = f"{base_data_dir}/data_open_source" + # Path to sync pulses. This is used for reading timescale transformations # between physiology and mworks data streams. - sync_pulses_path = f'{base_data_dir}/sync_pulses' - + sync_pulses_path = f"{base_data_dir}/sync_pulses" + # Path to spike sorting. This is used for reading spike sorted data. - spike_sorting_raw_path = f'{base_data_dir}/spike_sorting' - + spike_sorting_raw_path = f"{base_data_dir}/spike_sorting" + session_paths = SessionPaths( output=pathlib.Path(output_path), raw_data=pathlib.Path(raw_data_path), @@ -111,21 +98,19 @@ def _get_session_paths_globus(subject, session, stub_test=False): sync_pulses=pathlib.Path(sync_pulses_path), spike_sorting_raw=pathlib.Path(spike_sorting_raw_path), ) - + return session_paths -def get_session_paths(subject, session, stub_test=False, repo='openmind'): +def get_session_paths(subject, session, stub_test=False, repo="openmind"): """Get paths to all components of the data. - + Returns: SessionPaths namedtuple. """ - if repo == 'openmind': - return _get_session_paths_openmind( - subject=subject, session=session, stub_test=stub_test) - elif repo == 'globus': - return _get_session_paths_globus( - subject=subject, session=session, stub_test=stub_test) + if repo == "openmind": + return _get_session_paths_openmind(subject=subject, session=session, stub_test=stub_test) + elif repo == "globus": + return _get_session_paths_globus(subject=subject, session=session, stub_test=stub_test) else: - raise ValueError(f'Invalid repo {repo}') \ No newline at end of file + raise ValueError(f"Invalid repo {repo}") diff --git a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py index ea40aa7..8803bf9 100644 --- a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py @@ -2,17 +2,17 @@ This converts a session to NWB format and writes the nwb files to /om/user/nwatters/nwb_data_multi_prediction/{$SUBJECT}/{$SESSION} -Two NWB files are created: +Two NWB files are created: $SUBJECT_$SESSION_raw.nwb --- Raw physiology $SUBJECT_$SESSION_processed.nwb --- Task, behavior, and sorted physiology These files can be automatically uploaded to a DANDI dataset. - + Usage: $ python main_convert_session.py $SUBJECT $SESSION where $SUBJECT is the subject name and $SESSION is the session date YYYY-MM-DD. For example: $ python main_convert_session.py Perle 2022-06-01 - + Please read and consider changing the following variables: _REPO _STUB_TEST @@ -38,7 +38,7 @@ from neuroconv.utils import dict_deep_update, load_dict_from_file # Data repository. Either 'globus' or 'openmind' -_REPO = 'globus' +_REPO = "globus" # Whether to run all the physiology data or only a stub _STUB_TEST = True # Whether to overwrite output nwb files @@ -50,118 +50,108 @@ logging.getLogger().setLevel(logging.INFO) _SUBJECT_TO_SEX = { - 'Perle': 'F', - 'Elgar': 'M', + "Perle": "F", + "Elgar": "M", } _SUBJECT_TO_AGE = { - 'Perle': 'P10Y', # Born 6/11/2012 - 'Elgar': 'P10Y', # Born 5/2/2012 + "Perle": "P10Y", # Born 6/11/2012 + "Elgar": "P10Y", # Born 5/2/2012 } -def _get_single_file(directory, suffix=''): +def _get_single_file(directory, suffix=""): """Get path to a file in given directory with given suffix. - + Raises error if not exactly one satisfying file. """ - files = list(glob.glob(str(directory / f'*{suffix}'))) + files = list(glob.glob(str(directory / f"*{suffix}"))) if len(files) == 0: - raise ValueError(f'No {suffix} files found in {directory}') + raise ValueError(f"No {suffix} files found in {directory}") if len(files) > 1: - raise ValueError(f'Multiple {suffix} files found in {directory}') + raise ValueError(f"Multiple {suffix} files found in {directory}") return files[0] -def _add_v_probe_data(raw_source_data, - raw_conversion_options, - processed_source_data, - processed_conversion_options, - session_paths, - probe_num, - stub_test): +def _add_v_probe_data( + raw_source_data, + raw_conversion_options, + processed_source_data, + processed_conversion_options, + session_paths, + probe_num, + stub_test, +): """Add V-Probe session data.""" - probe_data_dir = session_paths.raw_data / f'v_probe_{probe_num}' + probe_data_dir = session_paths.raw_data / f"v_probe_{probe_num}" if not probe_data_dir.exists(): return - logging.info(f'Adding V-probe {probe_num} session data') - + logging.info(f"Adding V-probe {probe_num} session data") + # Raw data - recording_file = _get_single_file(probe_data_dir, suffix='.dat') - metadata_path = str(session_paths.data_open_source / 'probes.metadata.json') - raw_source_data[f'RecordingVP{probe_num}'] = dict( + recording_file = _get_single_file(probe_data_dir, suffix=".dat") + metadata_path = str(session_paths.data_open_source / "probes.metadata.json") + raw_source_data[f"RecordingVP{probe_num}"] = dict( file_path=recording_file, probe_metadata_file=metadata_path, - probe_key=f'probe{(probe_num + 1):02d}', - probe_name=f'vprobe{probe_num}', - es_key=f'ElectricalSeriesVP{probe_num}', + probe_key=f"probe{(probe_num + 1):02d}", + probe_name=f"vprobe{probe_num}", + es_key=f"ElectricalSeriesVP{probe_num}", ) - raw_conversion_options[f'RecordingVP{probe_num}'] = dict( - stub_test=stub_test) + raw_conversion_options[f"RecordingVP{probe_num}"] = dict(stub_test=stub_test) # Processed data - sorting_path = ( - session_paths.spike_sorting_raw / - f'v_probe_{probe_num}' / - 'ks_3_output_pre_v6_curated' - ) - processed_source_data[f'RecordingVP{probe_num}'] = raw_source_data[ - f'RecordingVP{probe_num}'] - processed_source_data[f'SortingVP{probe_num}'] = dict( + sorting_path = session_paths.spike_sorting_raw / f"v_probe_{probe_num}" / "ks_3_output_pre_v6_curated" + processed_source_data[f"RecordingVP{probe_num}"] = raw_source_data[f"RecordingVP{probe_num}"] + processed_source_data[f"SortingVP{probe_num}"] = dict( folder_path=str(sorting_path), keep_good_only=False, ) - processed_conversion_options[f'RecordingVP{probe_num}'] = dict( - stub_test=stub_test, write_electrical_series=False) - processed_conversion_options[f'SortingVP{probe_num}'] = dict( - stub_test=stub_test, write_as='processing') - - -def _add_spikeglx_data(raw_source_data, - raw_conversion_options, - processed_source_data, - processed_conversion_options, - session_paths, - stub_test): + processed_conversion_options[f"RecordingVP{probe_num}"] = dict(stub_test=stub_test, write_electrical_series=False) + processed_conversion_options[f"SortingVP{probe_num}"] = dict(stub_test=stub_test, write_as="processing") + + +def _add_spikeglx_data( + raw_source_data, + raw_conversion_options, + processed_source_data, + processed_conversion_options, + session_paths, + stub_test, +): """Add SpikeGLX recording data.""" - logging.info('Adding SpikeGLX data') - + logging.info("Adding SpikeGLX data") + # Raw data - spikeglx_dir = [ - x for x in (session_paths.raw_data / 'spikeglx').iterdir() - if 'settling' not in str(x) - ] + spikeglx_dir = [x for x in (session_paths.raw_data / "spikeglx").iterdir() if "settling" not in str(x)] if len(spikeglx_dir) == 0: - logging.info('Found no SpikeGLX data') + logging.info("Found no SpikeGLX data") elif len(spikeglx_dir) == 1: spikeglx_dir = spikeglx_dir[0] else: - raise ValueError(f'Found multiple spikeglx directories {spikeglx_dir}') - ap_file = _get_single_file(spikeglx_dir, suffix='/*.ap.bin') - lfp_file = _get_single_file(spikeglx_dir, suffix='/*.lf.bin') - raw_source_data['RecordingNP'] = dict(file_path=ap_file) - raw_source_data['LF'] = dict(file_path=lfp_file) - processed_source_data['RecordingNP'] = dict(file_path=ap_file) - processed_source_data['LF'] = dict(file_path=lfp_file) - raw_conversion_options['RecordingNP'] = dict(stub_test=stub_test) - raw_conversion_options['LF'] = dict(stub_test=stub_test) - processed_conversion_options['RecordingNP'] = dict(stub_test=stub_test) - processed_conversion_options['LF'] = dict(stub_test=stub_test) + raise ValueError(f"Found multiple spikeglx directories {spikeglx_dir}") + ap_file = _get_single_file(spikeglx_dir, suffix="/*.ap.bin") + lfp_file = _get_single_file(spikeglx_dir, suffix="/*.lf.bin") + raw_source_data["RecordingNP"] = dict(file_path=ap_file) + raw_source_data["LF"] = dict(file_path=lfp_file) + processed_source_data["RecordingNP"] = dict(file_path=ap_file) + processed_source_data["LF"] = dict(file_path=lfp_file) + raw_conversion_options["RecordingNP"] = dict(stub_test=stub_test) + raw_conversion_options["LF"] = dict(stub_test=stub_test) + processed_conversion_options["RecordingNP"] = dict(stub_test=stub_test) + processed_conversion_options["LF"] = dict(stub_test=stub_test) # Processed data - sorting_path = session_paths.spike_sorting_raw / 'np_0' / 'ks_3_output_v2' - processed_source_data['SortingNP'] = dict( + sorting_path = session_paths.spike_sorting_raw / "np_0" / "ks_3_output_v2" + processed_source_data["SortingNP"] = dict( folder_path=str(sorting_path), keep_good_only=False, ) - processed_conversion_options['SortingNP'] = dict( - stub_test=stub_test, write_as='processing') + processed_conversion_options["SortingNP"] = dict(stub_test=stub_test, write_as="processing") -def session_to_nwb(subject: str, - session: str, - stub_test: bool = False, - overwrite: bool = True, - dandiset_id: Union[str, None] = None): +def session_to_nwb( + subject: str, session: str, stub_test: bool = False, overwrite: bool = True, dandiset_id: Union[str, None] = None +): """ Convert a single session to an NWB file. @@ -188,29 +178,29 @@ def session_to_nwb(subject: str, """ if dandiset_id is not None: import dandi # check importability - assert os.getenv('DANDI_API_KEY'), ( + + assert os.getenv("DANDI_API_KEY"), ( "Unable to find environment variable 'DANDI_API_KEY'. " "Please retrieve your token from DANDI and set this environment " "variable." ) - - logging.info(f'stub_test = {stub_test}') - logging.info(f'overwrite = {overwrite}') - logging.info(f'dandiset_id = {dandiset_id}') - + + logging.info(f"stub_test = {stub_test}") + logging.info(f"overwrite = {overwrite}") + logging.info(f"dandiset_id = {dandiset_id}") + # Get paths - session_paths = get_session_paths.get_session_paths( - subject, session, stub_test=stub_test, repo=_REPO) - logging.info(f'session_paths: {session_paths}') + session_paths = get_session_paths.get_session_paths(subject, session, stub_test=stub_test, repo=_REPO) + logging.info(f"session_paths: {session_paths}") # Get paths for nwb files to write session_paths.output.mkdir(parents=True, exist_ok=True) - session_id = f'{subject}_{session}' - raw_nwb_path = session_paths.output / f'{session_id}_raw.nwb' - processed_nwb_path = session_paths.output / f'{session_id}_processed.nwb' - logging.info(f'raw_nwb_path = {raw_nwb_path}') - logging.info(f'processed_nwb_path = {processed_nwb_path}') - logging.info('') + session_id = f"{subject}_{session}" + raw_nwb_path = session_paths.output / f"{session_id}_raw.nwb" + processed_nwb_path = session_paths.output / f"{session_id}_processed.nwb" + logging.info(f"raw_nwb_path = {raw_nwb_path}") + logging.info(f"processed_nwb_path = {processed_nwb_path}") + logging.info("") # Initialize empty data dictionaries raw_source_data = {} @@ -241,28 +231,26 @@ def session_to_nwb(subject: str, ) # Add behavior data - logging.info('Adding behavior data') + logging.info("Adding behavior data") behavior_path = str(session_paths.task_behavior_data) - processed_source_data['EyePosition'] = dict(folder_path=behavior_path) - processed_conversion_options['EyePosition'] = dict() - processed_source_data['PupilSize'] = dict(folder_path=behavior_path) - processed_conversion_options['PupilSize'] = dict() - processed_source_data['RewardLine'] = dict(folder_path=behavior_path) - processed_conversion_options['RewardLine'] = dict() - processed_source_data['Audio'] = dict(folder_path=behavior_path) - processed_conversion_options['Audio'] = dict() - + processed_source_data["EyePosition"] = dict(folder_path=behavior_path) + processed_conversion_options["EyePosition"] = dict() + processed_source_data["PupilSize"] = dict(folder_path=behavior_path) + processed_conversion_options["PupilSize"] = dict() + processed_source_data["RewardLine"] = dict(folder_path=behavior_path) + processed_conversion_options["RewardLine"] = dict() + processed_source_data["Audio"] = dict(folder_path=behavior_path) + processed_conversion_options["Audio"] = dict() + # Add trials data - logging.info('Adding trials data') - processed_source_data['Trials'] = dict( - folder_path=str(session_paths.task_behavior_data)) - processed_conversion_options['Trials'] = dict() - + logging.info("Adding trials data") + processed_source_data["Trials"] = dict(folder_path=str(session_paths.task_behavior_data)) + processed_conversion_options["Trials"] = dict() + # Add display data - logging.info('Adding display data') - processed_source_data['Display'] = dict( - folder_path=str(session_paths.task_behavior_data)) - processed_conversion_options['Display'] = dict() + logging.info("Adding display data") + processed_source_data["Display"] = dict(folder_path=str(session_paths.task_behavior_data)) + processed_conversion_options["Display"] = dict() # Create processed data converter processed_converter = nwb_converter.NWBConverter( @@ -272,49 +260,43 @@ def session_to_nwb(subject: str, # Add datetime and subject name to processed converter metadata = processed_converter.get_metadata() - metadata['NWBFile']['session_id'] = session_id - metadata['Subject']['subject_id'] = subject - metadata['Subject']['sex'] = _SUBJECT_TO_SEX[subject] - metadata['Subject']['age'] = _SUBJECT_TO_AGE[subject] + metadata["NWBFile"]["session_id"] = session_id + metadata["Subject"]["subject_id"] = subject + metadata["Subject"]["sex"] = _SUBJECT_TO_SEX[subject] + metadata["Subject"]["age"] = _SUBJECT_TO_AGE[subject] # EcePhys - probe_metadata_file = ( - session_paths.data_open_source / 'probes.metadata.json') - with open(probe_metadata_file, 'r') as f: + probe_metadata_file = session_paths.data_open_source / "probes.metadata.json" + with open(probe_metadata_file, "r") as f: probe_metadata = json.load(f) - neuropixel_metadata = [ - x for x in probe_metadata if x['probe_type'] == 'Neuropixels' - ][0] - for entry in metadata['Ecephys']['ElectrodeGroup']: - if entry['device'] == 'Neuropixel-Imec': + neuropixel_metadata = [x for x in probe_metadata if x["probe_type"] == "Neuropixels"][0] + for entry in metadata["Ecephys"]["ElectrodeGroup"]: + if entry["device"] == "Neuropixel-Imec": # TODO: uncomment when fixed in pynwb # entry.update(dict(position=[( # neuropixel_metadata['coordinates'][0], # neuropixel_metadata['coordinates'][1], # neuropixel_metadata['depth_from_surface'], # )] - logging.info('\n\n') - logging.warning(' PROBE COORDINATES NOT IMPLEMENTED\n\n') + logging.info("\n\n") + logging.warning(" PROBE COORDINATES NOT IMPLEMENTED\n\n") # Update default metadata with the editable in the corresponding yaml file - editable_metadata_path = Path(__file__).parent / 'metadata.yaml' + editable_metadata_path = Path(__file__).parent / "metadata.yaml" editable_metadata = load_dict_from_file(editable_metadata_path) metadata = dict_deep_update(metadata, editable_metadata) # Check if session_start_time was found/set - if 'session_start_time' not in metadata['NWBFile']: + if "session_start_time" not in metadata["NWBFile"]: try: - date = datetime.datetime.strptime(session, '%Y-%m-%d') - date = date.replace(tzinfo=ZoneInfo('US/Eastern')) + date = datetime.datetime.strptime(session, "%Y-%m-%d") + date = date.replace(tzinfo=ZoneInfo("US/Eastern")) except: - raise ValueError( - 'Session start time was not auto-detected. Please provide it ' - 'in `metadata.yaml`' - ) - metadata['NWBFile']['session_start_time'] = date + raise ValueError("Session start time was not auto-detected. Please provide it " "in `metadata.yaml`") + metadata["NWBFile"]["session_start_time"] = date # Run conversion - logging.info('Running processed conversion') + logging.info("Running processed conversion") processed_converter.run_conversion( metadata=metadata, nwbfile_path=processed_nwb_path, @@ -322,8 +304,8 @@ def session_to_nwb(subject: str, overwrite=overwrite, ) - logging.info('Running raw data conversion') - metadata['NWBFile']['identifier'] = str(uuid4()) + logging.info("Running raw data conversion") + metadata["NWBFile"]["identifier"] = str(uuid4()) raw_converter = nwb_converter.NWBConverter( source_data=raw_source_data, sync_dir=str(session_paths.sync_pulses), @@ -334,21 +316,21 @@ def session_to_nwb(subject: str, conversion_options=raw_conversion_options, overwrite=overwrite, ) - + # Upload to DANDI if dandiset_id is not None: - logging.info(f'Uploading to dandiset id {dandiset_id}') + logging.info(f"Uploading to dandiset id {dandiset_id}") automatic_dandi_upload( dandiset_id=dandiset_id, nwb_folder_path=session_paths.output, ) - - -if __name__ == '__main__': + + +if __name__ == "__main__": """Run session conversion.""" subject = sys.argv[1] session = sys.argv[2] - logging.info(f'\nStarting conversion for {subject}/{session}\n') + logging.info(f"\nStarting conversion for {subject}/{session}\n") session_to_nwb( subject=subject, session=session, @@ -356,4 +338,4 @@ def session_to_nwb(subject: str, overwrite=_OVERWRITE, dandiset_id=_DANDISET_ID, ) - logging.info(f'\nFinished conversion for {subject}/{session}\n') + logging.info(f"\nFinished conversion for {subject}/{session}\n") diff --git a/src/jazayeri_lab_to_nwb/watters/metadata.yaml b/src/jazayeri_lab_to_nwb/watters/metadata.yaml index 216dcf7..0ed943b 100644 --- a/src/jazayeri_lab_to_nwb/watters/metadata.yaml +++ b/src/jazayeri_lab_to_nwb/watters/metadata.yaml @@ -2,10 +2,10 @@ NWBFile: # related_publications: # no pubs yet # - https://doi.org/12345 session_description: - Data from macaque performing multi-object working memory task. Subject is - presented with multiple objects at different locations on a screen. After a - delay, the subject is then cued with one of the objects, now displayed at - the center of the screen. Subject should respond by saccading to the + Data from macaque performing multi-object working memory task. Subject is + presented with multiple objects at different locations on a screen. After a + delay, the subject is then cued with one of the objects, now displayed at + the center of the screen. Subject should respond by saccading to the location of the cued object at its initial presentation. institution: MIT lab: Jazayeri diff --git a/src/jazayeri_lab_to_nwb/watters/nwb_converter.py b/src/jazayeri_lab_to_nwb/watters/nwb_converter.py index eb58876..c4c3919 100644 --- a/src/jazayeri_lab_to_nwb/watters/nwb_converter.py +++ b/src/jazayeri_lab_to_nwb/watters/nwb_converter.py @@ -2,11 +2,11 @@ import json import logging -import numpy as np from pathlib import Path from typing import Optional import display_interface +import numpy as np import timeseries_interfaces import trials_interface from neuroconv import NWBConverter @@ -15,8 +15,12 @@ KiloSortSortingInterface, SpikeGLXRecordingInterface, ) -from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import BaseRecordingExtractorInterface -from neuroconv.datainterfaces.ecephys.basesortingextractorinterface import BaseSortingExtractorInterface +from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import ( + BaseRecordingExtractorInterface, +) +from neuroconv.datainterfaces.ecephys.basesortingextractorinterface import ( + BaseSortingExtractorInterface, +) from neuroconv.datainterfaces.text.timeintervalsinterface import TimeIntervalsInterface from neuroconv.utils import FolderPathType from recording_interface import DatRecordingInterface @@ -43,10 +47,7 @@ class NWBConverter(NWBConverter): Display=display_interface.DisplayInterface, ) - def __init__(self, - source_data: dict[str, dict], - sync_dir: Optional[FolderPathType] = None, - verbose: bool = True): + def __init__(self, source_data: dict[str, dict], sync_dir: Optional[FolderPathType] = None, verbose: bool = True): """Validate source_data and initialize all data interfaces.""" super().__init__(source_data=source_data, verbose=verbose) self.sync_dir = sync_dir @@ -56,14 +57,14 @@ def __init__(self, if isinstance(data_interface, BaseSortingExtractorInterface): unit_ids = np.array(data_interface.sorting_extractor.unit_ids) data_interface.sorting_extractor.set_property( - key='unit_name', + key="unit_name", values=(unit_ids + unit_name_start).astype(str), ) unit_name_start += np.max(unit_ids) + 1 def temporally_align_data_interfaces(self): - logging.info('Temporally aligning data interfaces') - + logging.info("Temporally aligning data interfaces") + if self.sync_dir is None: return sync_dir = Path(self.sync_dir) @@ -76,7 +77,9 @@ def temporally_align_data_interfaces(self): for i in [0, 1]: if f"RecordingVP{i}" in self.data_interface_objects: orig_timestamps = self.data_interface_objects[f"RecordingVP{i}"].get_original_timestamps() - aligned_timestamps = open_ephys_transform["intercept"] + open_ephys_transform["coef"] * (open_ephys_start_time + orig_timestamps) + aligned_timestamps = open_ephys_transform["intercept"] + open_ephys_transform["coef"] * ( + open_ephys_start_time + orig_timestamps + ) self.data_interface_objects[f"RecordingVP{i}"].set_aligned_timestamps(aligned_timestamps) # openephys sorting alignment if f"SortingVP{i}" in self.data_interface_objects: @@ -119,7 +122,7 @@ def temporally_align_data_interfaces(self): sorting=self.data_interface_objects[f"SortingNP"].sorting_extractor, ) self.data_interface_objects[f"SortingNP"].register_recording(self.data_interface_objects[f"RecordingNP"]) - + # align recording start to 0 aligned_start_times = [] for name, data_interface in self.data_interface_objects.items(): @@ -130,5 +133,4 @@ def temporally_align_data_interfaces(self): if isinstance(data_interface, BaseSortingExtractorInterface): # Do not need to align because recording will be aligned continue - start_time = data_interface.set_aligned_starting_time( - aligned_starting_time=zero_time) + start_time = data_interface.set_aligned_starting_time(aligned_starting_time=zero_time) diff --git a/src/jazayeri_lab_to_nwb/watters/recording_interface.py b/src/jazayeri_lab_to_nwb/watters/recording_interface.py index be80415..ebc340e 100644 --- a/src/jazayeri_lab_to_nwb/watters/recording_interface.py +++ b/src/jazayeri_lab_to_nwb/watters/recording_interface.py @@ -1,14 +1,16 @@ """Primary class for recording data.""" -import os import json -import numpy as np -from pynwb import NWBFile +import os from pathlib import Path from typing import Optional, Union -from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import BaseRecordingExtractorInterface +import numpy as np +from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import ( + BaseRecordingExtractorInterface, +) from neuroconv.utils import FilePathType +from pynwb import NWBFile from spikeinterface import BaseRecording diff --git a/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py b/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py index e3c6ccb..5c7b922 100644 --- a/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py +++ b/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py @@ -21,20 +21,20 @@ class TimestampsFromArrayInterface(BaseTemporalAlignmentInterface): """Interface implementing temporal alignment functions with timestamps.""" - + def __init__(self, folder_path: FolderPathType): super().__init__(folder_path=folder_path) def set_original_timestamps(self, original_timestamps: np.ndarray) -> None: self._original_timestamps = original_timestamps self._timestamps = np.copy(original_timestamps) - + def get_original_timestamps(self) -> np.ndarray: return self._original_timestamps def set_aligned_timestamps(self, aligned_timestamps: np.ndarray) -> None: self._timestamps = aligned_timestamps - + def get_timestamps(self): return self._timestamps @@ -45,33 +45,28 @@ class EyePositionInterface(TimestampsFromArrayInterface): def __init__(self, folder_path: FolderPathType): folder_path = Path(folder_path) super().__init__(folder_path=folder_path) - + # Find eye position files and check they all exist - eye_h_file = folder_path / 'eye_h_calibrated.json' - eye_v_file = folder_path / 'eye_v_calibrated.json' - assert eye_h_file.exists(), f'Could not find {eye_h_file}' - assert eye_v_file.exists(), f'Could not find {eye_v_file}' - + eye_h_file = folder_path / "eye_h_calibrated.json" + eye_v_file = folder_path / "eye_v_calibrated.json" + assert eye_h_file.exists(), f"Could not find {eye_h_file}" + assert eye_v_file.exists(), f"Could not find {eye_v_file}" + # Load eye data - eye_h_data = json.load(open(eye_h_file, 'r')) - eye_v_data = json.load(open(eye_v_file, 'r')) - eye_h_times = np.array(eye_h_data['times']) - eye_h_values = 0.5 + (np.array(eye_h_data['values']) / 20) - eye_v_times = np.array(eye_v_data['times']) - eye_v_values = 0.5 + (np.array(eye_v_data['values']) / 20) - + eye_h_data = json.load(open(eye_h_file, "r")) + eye_v_data = json.load(open(eye_v_file, "r")) + eye_h_times = np.array(eye_h_data["times"]) + eye_h_values = 0.5 + (np.array(eye_h_data["values"]) / 20) + eye_v_times = np.array(eye_v_data["times"]) + eye_v_values = 0.5 + (np.array(eye_v_data["values"]) / 20) + # Check eye_h and eye_v have the same number of samples if len(eye_h_times) != len(eye_v_times): - raise ValueError( - f'len(eye_h_times) = {len(eye_h_times)}, but len(eye_v_times) ' - f'= {len(eye_v_times)}' - ) + raise ValueError(f"len(eye_h_times) = {len(eye_h_times)}, but len(eye_v_times) " f"= {len(eye_v_times)}") # Check that eye_h_times and eye_v_times are similar to within 0.5ms if not np.allclose(eye_h_times, eye_v_times, atol=0.0005): - raise ValueError( - 'eye_h_times and eye_v_times are not sufficiently similar' - ) - + raise ValueError("eye_h_times and eye_v_times are not sufficiently similar") + # Set data attributes self.set_original_timestamps(eye_h_times) self._eye_pos = np.stack([eye_h_values, eye_v_values], axis=1) @@ -79,19 +74,18 @@ def __init__(self, folder_path: FolderPathType): def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): # Make SpatialSeries eye_position = SpatialSeries( - name='eye_position', - data=H5DataIO(self._eye_pos, compression='gzip'), - reference_frame='(0,0) is bottom left corner of screen', - unit='meters', + name="eye_position", + data=H5DataIO(self._eye_pos, compression="gzip"), + reference_frame="(0,0) is bottom left corner of screen", + unit="meters", conversion=0.257, - timestamps=H5DataIO(self._timestamps, compression='gzip'), - description='Eye position data recorded by EyeLink camera', + timestamps=H5DataIO(self._timestamps, compression="gzip"), + description="Eye position data recorded by EyeLink camera", ) # Get processing module - module_description = 'Contains behavioral data from experiment.' - processing_module = get_module( - nwbfile=nwbfile, name='behavior', description=module_description) + module_description = "Contains behavioral data from experiment." + processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) # Add data to module processing_module.add_data_interface(eye_position) @@ -105,29 +99,28 @@ class PupilSizeInterface(TimestampsFromArrayInterface): def __init__(self, folder_path: FolderPathType): # Find pupil size file folder_path = Path(folder_path) - pupil_size_file = folder_path / 'pupil_size_r.json' - assert pupil_size_file.exists(), f'Could not find {pupil_size_file}' - + pupil_size_file = folder_path / "pupil_size_r.json" + assert pupil_size_file.exists(), f"Could not find {pupil_size_file}" + # Load pupil size data and set data attributes - pupil_size_data = json.load(open(pupil_size_file, 'r')) - self.set_original_timestamps(np.array(pupil_size_data['times'])) - self._pupil_size = np.array(pupil_size_data['values']) + pupil_size_data = json.load(open(pupil_size_file, "r")) + self.set_original_timestamps(np.array(pupil_size_data["times"])) + self._pupil_size = np.array(pupil_size_data["values"]) def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): # Make TimeSeries pupil_size = TimeSeries( - name='pupil_size', - data=H5DataIO(self._pupil_size, compression='gzip'), - unit='pixels', + name="pupil_size", + data=H5DataIO(self._pupil_size, compression="gzip"), + unit="pixels", conversion=1.0, - timestamps=H5DataIO(self._timestamps, compression='gzip'), - description='Pupil size data recorded by EyeLink camera', + timestamps=H5DataIO(self._timestamps, compression="gzip"), + description="Pupil size data recorded by EyeLink camera", ) # Get processing module - module_description = 'Contains behavioral data from experiment.' - processing_module = get_module( - nwbfile=nwbfile, name='behavior', description=module_description) + module_description = "Contains behavioral data from experiment." + processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) # Add data to module processing_module.add_data_interface(pupil_size) @@ -141,30 +134,27 @@ class RewardLineInterface(TimestampsFromArrayInterface): def __init__(self, folder_path: FolderPathType): # Find reward line file folder_path = Path(folder_path) - reward_line_file = folder_path / 'reward_line.json' - assert reward_line_file.exists(), f'Could not find {reward_line_file}' - + reward_line_file = folder_path / "reward_line.json" + assert reward_line_file.exists(), f"Could not find {reward_line_file}" + # Load reward line data and set data attributes - reward_line_data = json.load(open(reward_line_file, 'r')) - self.set_original_timestamps(np.array(reward_line_data['times'])) - self._reward_line = reward_line_data['values'] + reward_line_data = json.load(open(reward_line_file, "r")) + self.set_original_timestamps(np.array(reward_line_data["times"])) + self._reward_line = reward_line_data["values"] def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): # Make LabeledEvents reward_line = LabeledEvents( - name='reward_line', - description=( - 'Reward line data representing events of reward dispenser' - ), - timestamps=H5DataIO(self._timestamps, compression='gzip'), + name="reward_line", + description=("Reward line data representing events of reward dispenser"), + timestamps=H5DataIO(self._timestamps, compression="gzip"), data=self._reward_line, - labels=['closed', 'open'], + labels=["closed", "open"], ) # Get processing module - module_description = 'Contains audio and reward data from experiment.' - processing_module = get_module( - nwbfile=nwbfile, name='behavior', description=module_description) + module_description = "Contains audio and reward data from experiment." + processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) # Add data to module processing_module.add_data_interface(reward_line) @@ -174,37 +164,36 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): class AudioInterface(TimestampsFromArrayInterface): """Audio interface.""" - - SOUNDS = ['failure_sound', 'success_sound'] + + SOUNDS = ["failure_sound", "success_sound"] def __init__(self, folder_path: FolderPathType): # Find sound file folder_path = Path(folder_path) - sound_file = folder_path / 'sound.json' - assert sound_file.exists(), f'Could not find {sound_file}' - + sound_file = folder_path / "sound.json" + assert sound_file.exists(), f"Could not find {sound_file}" + # Load sound data and set data attributes - sound_data = json.load(open(sound_file, 'r')) - self.set_original_timestamps(np.array(sound_data['times'])) - audio = np.array(sound_data['values']) - + sound_data = json.load(open(sound_file, "r")) + self.set_original_timestamps(np.array(sound_data["times"])) + audio = np.array(sound_data["values"]) + sound_to_code = {k: i for i, k in enumerate(AudioInterface.SOUNDS)} self._sound_codes = [sound_to_code[x] for x in audio] def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): # Make LabeledEvents audio = LabeledEvents( - name='audio', - description='Audio data representing auditory stimuli events', - timestamps=H5DataIO(self._timestamps, compression='gzip'), + name="audio", + description="Audio data representing auditory stimuli events", + timestamps=H5DataIO(self._timestamps, compression="gzip"), data=self._sound_codes, labels=AudioInterface.SOUNDS, ) # Get processing module - module_description = 'Contains audio and reward data from experiment.' - processing_module = get_module( - nwbfile=nwbfile, name='behavior', description=module_description) + module_description = "Contains audio and reward data from experiment." + processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) # Add data to module processing_module.add_data_interface(audio) diff --git a/src/jazayeri_lab_to_nwb/watters/trials_interface.py b/src/jazayeri_lab_to_nwb/watters/trials_interface.py index d130307..0470184 100644 --- a/src/jazayeri_lab_to_nwb/watters/trials_interface.py +++ b/src/jazayeri_lab_to_nwb/watters/trials_interface.py @@ -14,33 +14,33 @@ class TrialsInterface(TimeIntervalsInterface): """Class for converting trial-structured data. - + All events that occur exactly once per trial are contained in this interface. """ - + KEY_MAP = { - 'background_indices': 'background_indices', - 'broke_fixation': 'broke_fixation', - 'stimulus_object_identities': 'stimulus_object_identities', - 'stimulus_object_positions': 'stimulus_object_positions', - 'stimulus_object_velocities': 'stimulus_object_velocities', - 'stimulus_object_target': 'stimulus_object_target', - 'delay_object_blanks': 'delay_object_blanks', - 'closed_loop_response_position': 'closed_loop_response_position', - 'closed_loop_response_time': 'closed_loop_response_time', - 'time_start': 'start_time', - 'time_phase_fixation': 'phase_fixation_time', - 'time_phase_stimulus': 'phase_stimulus_time', - 'time_phase_delay': 'phase_delay_time', - 'time_phase_cue': 'phase_cue_time', - 'time_phase_response': 'phase_response_time', - 'time_phase_reveal': 'phase_reveal_time', - 'time_phase_iti': 'phase_iti_time', - 'reward_time': 'reward_time', - 'reward_duration': 'reward_duration', - 'response_position': 'response_position', - 'response_time': 'response_time', + "background_indices": "background_indices", + "broke_fixation": "broke_fixation", + "stimulus_object_identities": "stimulus_object_identities", + "stimulus_object_positions": "stimulus_object_positions", + "stimulus_object_velocities": "stimulus_object_velocities", + "stimulus_object_target": "stimulus_object_target", + "delay_object_blanks": "delay_object_blanks", + "closed_loop_response_position": "closed_loop_response_position", + "closed_loop_response_time": "closed_loop_response_time", + "time_start": "start_time", + "time_phase_fixation": "phase_fixation_time", + "time_phase_stimulus": "phase_stimulus_time", + "time_phase_delay": "phase_delay_time", + "time_phase_cue": "phase_cue_time", + "time_phase_response": "phase_response_time", + "time_phase_reveal": "phase_reveal_time", + "time_phase_iti": "phase_iti_time", + "reward_time": "reward_time", + "reward_duration": "reward_duration", + "response_position": "response_position", + "response_time": "response_time", } def __init__(self, folder_path: FolderPathType, verbose: bool = True): @@ -48,17 +48,17 @@ def __init__(self, folder_path: FolderPathType, verbose: bool = True): def get_metadata(self) -> dict: metadata = super().get_metadata() - metadata['TimeIntervals'] = dict( + metadata["TimeIntervals"] = dict( trials=dict( - table_name='trials', - table_description='data about each trial', + table_name="trials", + table_description="data about each trial", ) ) return metadata - + def get_timestamps(self) -> np.ndarray: - return super(TrialsInterface, self).get_timestamps(column='start_time') - + return super(TrialsInterface, self).get_timestamps(column="start_time") + def set_aligned_starting_time(self, aligned_starting_time: float) -> None: self.dataframe.closed_loop_response_time += aligned_starting_time self.dataframe.start_time += aligned_starting_time @@ -71,120 +71,99 @@ def set_aligned_starting_time(self, aligned_starting_time: float) -> None: self.dataframe.phase_iti_time += aligned_starting_time self.dataframe.reward_time += aligned_starting_time self.dataframe.response_time += aligned_starting_time - + def _read_file(self, file_path: FolderPathType): # Create dataframe with data for each trial - trials = json.load(open(Path(file_path) / 'trials.json', 'r')) - trials = { - k_mapped: [d[k] for d in trials] - for k, k_mapped in TrialsInterface.KEY_MAP.items() - } - + trials = json.load(open(Path(file_path) / "trials.json", "r")) + trials = {k_mapped: [d[k] for d in trials] for k, k_mapped in TrialsInterface.KEY_MAP.items()} + # Field closed_loop_response_position may have None values, so replace # those with NaN to make hdf5 conversion work - trials['closed_loop_response_position'] = [ - [np.nan, np.nan] if x is None else x - for x in trials['closed_loop_response_position'] + trials["closed_loop_response_position"] = [ + [np.nan, np.nan] if x is None else x for x in trials["closed_loop_response_position"] ] - + # Serialize fields with variable-length lists for hdf5 conversion for k in [ - 'stimulus_object_identities', - 'stimulus_object_positions', - 'stimulus_object_velocities', - 'stimulus_object_target', - ]: + "stimulus_object_identities", + "stimulus_object_positions", + "stimulus_object_velocities", + "stimulus_object_target", + ]: trials[k] = [json.dumps(x) for x in trials[k]] - + return pd.DataFrame(trials) - def add_to_nwbfile(self, - nwbfile: NWBFile, - metadata: Optional[dict] = None, - tag: str = 'trials'): + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None, tag: str = "trials"): return super(TrialsInterface, self).add_to_nwbfile( nwbfile=nwbfile, metadata=metadata, tag=tag, column_descriptions=self.column_descriptions, ) - + @property def column_descriptions(self): column_descriptions = { - 'background_indices': ( - 'For each trial, the indices of the background noise pattern ' - 'patch.' - ), - 'broke_fixation': ( - 'For each trial, whether the subject broke fixation and the ' - 'trial was aborted' - ), - 'stimulus_object_identities': ( - 'For each trial, a serialized list with one element for each ' + "background_indices": ("For each trial, the indices of the background noise pattern " "patch."), + "broke_fixation": ("For each trial, whether the subject broke fixation and the " "trial was aborted"), + "stimulus_object_identities": ( + "For each trial, a serialized list with one element for each " 'object. Each element is the identity symbol (e.g. "a", "b", ' '"c", ...) of the corresponding object.' ), - 'stimulus_object_positions': ( - 'For each trial, a serialized list with one element for each ' - 'object. Each element is the initial (x, y) position of the ' - 'corresponding object, in coordinates of arena width.' - ), - 'stimulus_object_velocities': ( - 'For each trial, a serialized list with one element for each ' - 'object. Each element is the initial (dx/dt, dy/dt) velocity ' - 'of the corresponding object, in units of arena width per ' - 'display update.' + "stimulus_object_positions": ( + "For each trial, a serialized list with one element for each " + "object. Each element is the initial (x, y) position of the " + "corresponding object, in coordinates of arena width." ), - 'stimulus_object_target': ( - 'For each trial, a serialized list with one element for each ' - 'object. Each element is a boolean indicating whether the ' - 'corresponding object is ultimately the cued target.' + "stimulus_object_velocities": ( + "For each trial, a serialized list with one element for each " + "object. Each element is the initial (dx/dt, dy/dt) velocity " + "of the corresponding object, in units of arena width per " + "display update." ), - 'delay_object_blanks': ( - 'For each trial, a boolean indicating whether the objects were ' - 'rendered as blank discs during the delay phase.' + "stimulus_object_target": ( + "For each trial, a serialized list with one element for each " + "object. Each element is a boolean indicating whether the " + "corresponding object is ultimately the cued target." ), - 'closed_loop_response_position': ( - 'For each trial, the position of the response saccade used by ' - 'the closed-loop game engine. This is used for determining ' - 'reward.' + "delay_object_blanks": ( + "For each trial, a boolean indicating whether the objects were " + "rendered as blank discs during the delay phase." ), - 'closed_loop_response_time': ( - 'For each trial, the time of the response saccade used by ' - 'the closed-loop game engine. This is used for the timing of ' - 'reward delivery.' + "closed_loop_response_position": ( + "For each trial, the position of the response saccade used by " + "the closed-loop game engine. This is used for determining " + "reward." ), - 'start_time': 'Start time of each trial.', - 'phase_fixation_time': ( - 'Time of fixation phase onset for each trial.' + "closed_loop_response_time": ( + "For each trial, the time of the response saccade used by " + "the closed-loop game engine. This is used for the timing of " + "reward delivery." ), - 'phase_stimulus_time': ( - 'Time of stimulus phase onset for each trial.' + "start_time": "Start time of each trial.", + "phase_fixation_time": ("Time of fixation phase onset for each trial."), + "phase_stimulus_time": ("Time of stimulus phase onset for each trial."), + "phase_delay_time": "Time of delay phase onset for each trial.", + "phase_cue_time": "Time of cue phase onset for each trial.", + "phase_response_time": ("Time of response phase onset for each trial."), + "phase_reveal_time": "Time of reveal phase onset for each trial.", + "phase_iti_time": ("Time of inter-trial interval onset for each trial."), + "reward_time": "Time of reward delivery onset for each trial.", + "reward_duration": "Reward duration for each trial", + "response_position": ( + "Response position for each trial. This differs from " + "closed_loop_response_position in that this is calculated " + "post-hoc from high-resolution eye tracking data, hence is " + "more accurate." ), - 'phase_delay_time': 'Time of delay phase onset for each trial.', - 'phase_cue_time': 'Time of cue phase onset for each trial.', - 'phase_response_time': ( - 'Time of response phase onset for each trial.' - ), - 'phase_reveal_time': 'Time of reveal phase onset for each trial.', - 'phase_iti_time': ( - 'Time of inter-trial interval onset for each trial.' - ), - 'reward_time': 'Time of reward delivery onset for each trial.', - 'reward_duration': 'Reward duration for each trial', - 'response_position': ( - 'Response position for each trial. This differs from ' - 'closed_loop_response_position in that this is calculated ' - 'post-hoc from high-resolution eye tracking data, hence is ' - 'more accurate.' - ), - 'response_time': ( - 'Response time for each trial. This differs from ' - 'closed_loop_response_time in that this is calculated post-hoc ' - 'from high-resolution eye tracking data, hence is more ' - 'accurate.' + "response_time": ( + "Response time for each trial. This differs from " + "closed_loop_response_time in that this is calculated post-hoc " + "from high-resolution eye tracking data, hence is more " + "accurate." ), } - + return column_descriptions From 6a27f4c0d9313ab918914889552e4386cae317cc Mon Sep 17 00:00:00 2001 From: Nicholas Watters Date: Mon, 18 Dec 2023 23:51:26 -0500 Subject: [PATCH 3/8] Update session_id. --- src/jazayeri_lab_to_nwb/watters/main_convert_session.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py index 8803bf9..a587ae0 100644 --- a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py @@ -195,9 +195,8 @@ def session_to_nwb( # Get paths for nwb files to write session_paths.output.mkdir(parents=True, exist_ok=True) - session_id = f"{subject}_{session}" - raw_nwb_path = session_paths.output / f"{session_id}_raw.nwb" - processed_nwb_path = session_paths.output / f"{session_id}_processed.nwb" + raw_nwb_path = session_paths.output / f"{session}_raw.nwb" + processed_nwb_path = session_paths.output / f"{session}_processed.nwb" logging.info(f"raw_nwb_path = {raw_nwb_path}") logging.info(f"processed_nwb_path = {processed_nwb_path}") logging.info("") @@ -260,7 +259,7 @@ def session_to_nwb( # Add datetime and subject name to processed converter metadata = processed_converter.get_metadata() - metadata["NWBFile"]["session_id"] = session_id + metadata["NWBFile"]["session_id"] = session metadata["Subject"]["subject_id"] = subject metadata["Subject"]["sex"] = _SUBJECT_TO_SEX[subject] metadata["Subject"]["age"] = _SUBJECT_TO_AGE[subject] From b8a4b9ff8a598b38e0effc68a6c6df6df749f366 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:00:31 +0000 Subject: [PATCH 4/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/jazayeri_lab_to_nwb/watters/nwb_converter.py | 13 ++++++++++--- .../watters/recording_interface.py | 6 ++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/watters/nwb_converter.py b/src/jazayeri_lab_to_nwb/watters/nwb_converter.py index 93da58b..45a1aca 100644 --- a/src/jazayeri_lab_to_nwb/watters/nwb_converter.py +++ b/src/jazayeri_lab_to_nwb/watters/nwb_converter.py @@ -10,15 +10,22 @@ KiloSortSortingInterface, SpikeGLXRecordingInterface, ) -from neuroconv.datainterfaces.ecephys.basesortingextractorinterface import BaseSortingExtractorInterface +from neuroconv.datainterfaces.ecephys.basesortingextractorinterface import ( + BaseSortingExtractorInterface, +) from neuroconv.utils import FolderPathType from spikeinterface.core.waveform_tools import has_exceeding_spikes from spikeinterface.curation import remove_excess_spikes -from .recording_interface import DatRecordingInterface from .display_interface import DisplayInterface +from .recording_interface import DatRecordingInterface +from .timeseries_interface import ( + AudioInterface, + EyePositionInterface, + PupilSizeInterface, + RewardLineInterface, +) from .trials_interface import TrialsInterface -from .timeseries_interface import EyePositionInterface, PupilSizeInterface, RewardLineInterface, AudioInterface class NWBConverter(NWBConverter): diff --git a/src/jazayeri_lab_to_nwb/watters/recording_interface.py b/src/jazayeri_lab_to_nwb/watters/recording_interface.py index bec953c..9492cb3 100644 --- a/src/jazayeri_lab_to_nwb/watters/recording_interface.py +++ b/src/jazayeri_lab_to_nwb/watters/recording_interface.py @@ -2,9 +2,11 @@ import json from typing import Optional -import probeinterface import numpy as np -from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import BaseRecordingExtractorInterface +import probeinterface +from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import ( + BaseRecordingExtractorInterface, +) from neuroconv.utils import FilePathType from spikeinterface import BaseRecording From 004b37f215810434edf38e19093b471b2f137d9f Mon Sep 17 00:00:00 2001 From: Nicholas Watters Date: Tue, 19 Dec 2023 10:52:30 -0500 Subject: [PATCH 5/8] Incorporate V-Probe coordinates. --- requirements.txt | 2 +- src/jazayeri_lab_to_nwb/watters/__init__.py | 4 - .../watters/get_session_paths.py | 18 +- .../watters/main_convert_session.py | 19 +- .../watters/recording_interface.py | 139 ++++-------- .../watters/timeseries_interfaces.py | 212 ++++++++++++++++++ 6 files changed, 278 insertions(+), 116 deletions(-) create mode 100644 src/jazayeri_lab_to_nwb/watters/timeseries_interfaces.py diff --git a/requirements.txt b/requirements.txt index ee88472..da271ed 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -neuroconv==0.4.6 +neuroconv==0.4.7 spikeinterface==0.99.1 nwbwidgets==0.11.3 nwbinspector==0.4.31 diff --git a/src/jazayeri_lab_to_nwb/watters/__init__.py b/src/jazayeri_lab_to_nwb/watters/__init__.py index 47379f1..e69de29 100644 --- a/src/jazayeri_lab_to_nwb/watters/__init__.py +++ b/src/jazayeri_lab_to_nwb/watters/__init__.py @@ -1,4 +0,0 @@ -from .behavior_interface import EyePositionInterface, PupilSizeInterface -from .nwb_converter import NWBConverter -from .recording_interface import DatRecordingInterface -from .trials_interface import TrialsInterface diff --git a/src/jazayeri_lab_to_nwb/watters/get_session_paths.py b/src/jazayeri_lab_to_nwb/watters/get_session_paths.py index e9830ec..c6b5a7b 100644 --- a/src/jazayeri_lab_to_nwb/watters/get_session_paths.py +++ b/src/jazayeri_lab_to_nwb/watters/get_session_paths.py @@ -21,14 +21,12 @@ ) -def _get_session_paths_openmind(subject, session, stub_test=False): +def _get_session_paths_openmind(subject, session): """Get paths to all components of the data on openmind.""" subject_id = SUBJECT_NAME_TO_ID[subject] # Path to write output nwb files to - output_path = f"/om/user/nwatters/nwb_data_multi_prediction/{subject}/{session}" - if stub_test: - output_path = f"{output_path}/stub" + output_path = f"/om/user/nwatters/nwb_data_multi_prediction/sub-{subject}" # Path to the raw data. This is used for reading raw physiology data. raw_data_path = f"/om4/group/jazlab/nwatters/multi_prediction/phys_data/{subject}/" f"{session}/raw_data" @@ -64,15 +62,13 @@ def _get_session_paths_openmind(subject, session, stub_test=False): return session_paths -def _get_session_paths_globus(subject, session, stub_test=False): +def _get_session_paths_globus(subject, session): """Get paths to all components of the data in the globus repo.""" subject_id = SUBJECT_NAME_TO_ID[subject] base_data_dir = f"/shared/catalystneuro/JazLab/{subject_id}/{session}/" # Path to write output nwb files to - output_path = f"~/conversion_nwb/jazayeri-lab-to-nwb/{subject}/{session}" - if stub_test: - output_path = f"{output_path}/stub" + output_path = f"~/conversion_nwb/jazayeri-lab-to-nwb" # Path to the raw data. This is used for reading raw physiology data. raw_data_path = f"{base_data_dir}/raw_data" @@ -102,15 +98,15 @@ def _get_session_paths_globus(subject, session, stub_test=False): return session_paths -def get_session_paths(subject, session, stub_test=False, repo="openmind"): +def get_session_paths(subject, session, repo="openmind"): """Get paths to all components of the data. Returns: SessionPaths namedtuple. """ if repo == "openmind": - return _get_session_paths_openmind(subject=subject, session=session, stub_test=stub_test) + return _get_session_paths_openmind(subject=subject, session=session) elif repo == "globus": - return _get_session_paths_globus(subject=subject, session=session, stub_test=stub_test) + return _get_session_paths_globus(subject=subject, session=session) else: raise ValueError(f"Invalid repo {repo}") diff --git a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py index a587ae0..b1d33fb 100644 --- a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py @@ -190,13 +190,22 @@ def session_to_nwb( logging.info(f"dandiset_id = {dandiset_id}") # Get paths - session_paths = get_session_paths.get_session_paths(subject, session, stub_test=stub_test, repo=_REPO) + session_paths = get_session_paths.get_session_paths(subject, session, repo=_REPO) logging.info(f"session_paths: {session_paths}") # Get paths for nwb files to write session_paths.output.mkdir(parents=True, exist_ok=True) - raw_nwb_path = session_paths.output / f"{session}_raw.nwb" - processed_nwb_path = session_paths.output / f"{session}_processed.nwb" + subject_id = f"sub-{subject}" + if stub_test: + session_id = f"ses-{session}_stub" + else: + session_id = f"ses-{session}" + raw_nwb_path = ( + session_paths.output / f"{subject_id}_{session_id}_ecephys.nwb" + ) + processed_nwb_path = ( + session_paths.output / f"{subject_id}_{session_id}_behavior+ecephys.nwb" + ) logging.info(f"raw_nwb_path = {raw_nwb_path}") logging.info(f"processed_nwb_path = {processed_nwb_path}") logging.info("") @@ -259,8 +268,8 @@ def session_to_nwb( # Add datetime and subject name to processed converter metadata = processed_converter.get_metadata() - metadata["NWBFile"]["session_id"] = session - metadata["Subject"]["subject_id"] = subject + metadata["NWBFile"]["session_id"] = session_id + metadata["Subject"]["subject_id"] = subject_id metadata["Subject"]["sex"] = _SUBJECT_TO_SEX[subject] metadata["Subject"]["age"] = _SUBJECT_TO_AGE[subject] diff --git a/src/jazayeri_lab_to_nwb/watters/recording_interface.py b/src/jazayeri_lab_to_nwb/watters/recording_interface.py index ebc340e..22e73af 100644 --- a/src/jazayeri_lab_to_nwb/watters/recording_interface.py +++ b/src/jazayeri_lab_to_nwb/watters/recording_interface.py @@ -1,85 +1,19 @@ """Primary class for recording data.""" import json -import os -from pathlib import Path -from typing import Optional, Union +from typing import Optional import numpy as np +import probeinterface as pi from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import ( BaseRecordingExtractorInterface, ) from neuroconv.utils import FilePathType -from pynwb import NWBFile -from spikeinterface import BaseRecording - - -def add_electrode_locations( - recording_extractor: BaseRecording, - probe_metadata_file: FilePathType, - probe_name: str, - probe_key: str, -) -> list[dict]: - with open(probe_metadata_file, "r") as f: - all_probe_metadata = json.load(f) - probe_metadata = None - for entry in all_probe_metadata: - if entry["label"] == probe_key: - probe_metadata = entry - - if probe_metadata is None: - return [] - - probe_coord_system = probe_metadata["coordinate_system"] - coord_names = probe_coord_system.split("[")[1].split("]")[0].split(",") - electrode_metadata = [ - { - "name": "x", - "description": f"{coord_names[0].strip()} coordinate. {probe_coord_system}", - }, - { - "name": "y", - "description": f"{coord_names[1].strip()} coordinate. {probe_coord_system}", - }, - ] - if len(coord_names) == 3: - electrode_metadata.append( - { - "name": "z", - "description": f"{coord_names[2].strip()} coordinate. {probe_coord_system}", - }, - ) - - channel_ids = recording_extractor.get_channel_ids() - recording_extractor.set_property( - key="group_name", - ids=channel_ids, - values=[probe_name] * len(channel_ids), - ) - coordinates = probe_metadata["coordinates"] - recording_extractor.set_property( - key="x", - values=[coordinates["first_channel"][0], coordinates["last_channel"][0]], - ids=channel_ids[[0, -1]], - ) - recording_extractor.set_property( - key="y", - values=[coordinates["first_channel"][1], coordinates["last_channel"][1]], - ids=channel_ids[[0, -1]], - ) - if len(coord_names) == 3: - recording_extractor.set_property( - key="z", - values=[coordinates["first_channel"][2], coordinates["last_channel"][2]], - ids=channel_ids[[0, -1]], - ) - - return electrode_metadata class DatRecordingInterface(BaseRecordingExtractorInterface): - ExtractorName = "NumpyRecording" + ExtractorName = "BinaryRecordingExtractor" def __init__( self, @@ -91,38 +25,57 @@ def __init__( t_start: float = 0.0, sampling_frequency: float = 30000.0, channel_ids: Optional[list] = None, - gain_to_uv: list = [1.0], + gain_to_uv: list = 1.0, + offset_to_uv: list = 0.0, probe_metadata_file: Optional[FilePathType] = None, probe_name: str = "vprobe", probe_key: Optional[str] = None, ): - traces = np.memmap(file_path, dtype=dtype, mode="r").reshape(-1, channel_count) source_data = { - "traces_list": [traces], + "file_paths": [file_path], "sampling_frequency": sampling_frequency, + "num_channels": channel_count, "t_starts": [t_start], "channel_ids": channel_ids, + "gain_to_uV": gain_to_uv, + "offset_to_uV": offset_to_uv, + "dtype": dtype, } super().__init__(verbose=verbose, es_key=es_key, **source_data) - if gain_to_uv is not None: - if len(gain_to_uv) == 1: - gain_to_uv = np.full((channel_count,), gain_to_uv[0], dtype=float) - else: - assert len(gain_to_uv) == channel_count, ( - f"There are {channel_count} channels " f"but `gain_to_uv` has length {len(gain_to_uv)}" - ) - gain_to_uv = np.array(gain_to_uv, dtype=float) - self.recording_extractor.set_property("gain_to_uV", gain_to_uv) - self.probe_metadata_file = probe_metadata_file + + # this is used for metadata naming self.probe_name = probe_name - self.probe_key = probe_key - - self.electrode_metadata = None - if self.probe_metadata_file is not None and self.probe_key is not None: - self.electrode_metadata = add_electrode_locations( - self.recording_extractor, self.probe_metadata_file, self.probe_name, self.probe_key - ) - + + # add probe information + probe_metadata = None + if probe_metadata_file is not None and probe_key is not None: + with open(probe_metadata_file, "r") as f: + all_probe_metadata = json.load(f) + for entry in all_probe_metadata: + if entry["label"] == probe_key: + probe_metadata = entry + + if probe_metadata is not None and "electrodes_locations" in probe_metadata: + # Grab electrode position from metadata + locations_array = np.array(probe_metadata["electrodes_locations"]) + ndim = locations_array.shape[1] + probe = pi.Probe(ndim=ndim) + probe.set_contacts(locations_array) + else: + # Generate V-probe geometry: 64 channels arranged vertically with 50 um spacing + probe = pi.generate_linear_probe(num_elec=channel_count, ypitch=50) + probe.set_device_channel_indices(np.arange(channel_count)) + probe.name = probe_name + + # set probe to interface recording + self.set_probe(probe, group_mode="by_probe") + + # set group_name property to match electrode group name in metadata + self.recording_extractor.set_property( + key="group_name", + values=[probe_name] * len(self.recording_extractor.channel_ids), + ) + def get_metadata(self) -> dict: metadata = super().get_metadata() metadata["Ecephys"]["Device"] = [ @@ -141,9 +94,5 @@ def get_metadata(self) -> dict: ) ] metadata["Ecephys"]["ElectrodeGroup"] = electrode_groups - - if self.electrode_metadata is None: - return metadata - - metadata["Ecephys"]["Electrodes"] = self.electrode_metadata + return metadata diff --git a/src/jazayeri_lab_to_nwb/watters/timeseries_interfaces.py b/src/jazayeri_lab_to_nwb/watters/timeseries_interfaces.py new file mode 100644 index 0000000..e3c6ccb --- /dev/null +++ b/src/jazayeri_lab_to_nwb/watters/timeseries_interfaces.py @@ -0,0 +1,212 @@ +"""Primary classes for timeseries variables. + +The classes here handle variables like eye position, reward line, and audio +stimuli that are not necessarily tied to the trial structure of display updates. +For trial structured variables, see ../trials_interface.py. For variables +pertaining to display updates, see ../frames_interface.py. +""" + +import json +from pathlib import Path + +import numpy as np +from hdmf.backends.hdf5 import H5DataIO +from ndx_events import LabeledEvents +from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface +from neuroconv.tools.nwb_helpers import get_module +from neuroconv.utils import FolderPathType +from pynwb import NWBFile, TimeSeries +from pynwb.behavior import SpatialSeries + + +class TimestampsFromArrayInterface(BaseTemporalAlignmentInterface): + """Interface implementing temporal alignment functions with timestamps.""" + + def __init__(self, folder_path: FolderPathType): + super().__init__(folder_path=folder_path) + + def set_original_timestamps(self, original_timestamps: np.ndarray) -> None: + self._original_timestamps = original_timestamps + self._timestamps = np.copy(original_timestamps) + + def get_original_timestamps(self) -> np.ndarray: + return self._original_timestamps + + def set_aligned_timestamps(self, aligned_timestamps: np.ndarray) -> None: + self._timestamps = aligned_timestamps + + def get_timestamps(self): + return self._timestamps + + +class EyePositionInterface(TimestampsFromArrayInterface): + """Eye position interface.""" + + def __init__(self, folder_path: FolderPathType): + folder_path = Path(folder_path) + super().__init__(folder_path=folder_path) + + # Find eye position files and check they all exist + eye_h_file = folder_path / 'eye_h_calibrated.json' + eye_v_file = folder_path / 'eye_v_calibrated.json' + assert eye_h_file.exists(), f'Could not find {eye_h_file}' + assert eye_v_file.exists(), f'Could not find {eye_v_file}' + + # Load eye data + eye_h_data = json.load(open(eye_h_file, 'r')) + eye_v_data = json.load(open(eye_v_file, 'r')) + eye_h_times = np.array(eye_h_data['times']) + eye_h_values = 0.5 + (np.array(eye_h_data['values']) / 20) + eye_v_times = np.array(eye_v_data['times']) + eye_v_values = 0.5 + (np.array(eye_v_data['values']) / 20) + + # Check eye_h and eye_v have the same number of samples + if len(eye_h_times) != len(eye_v_times): + raise ValueError( + f'len(eye_h_times) = {len(eye_h_times)}, but len(eye_v_times) ' + f'= {len(eye_v_times)}' + ) + # Check that eye_h_times and eye_v_times are similar to within 0.5ms + if not np.allclose(eye_h_times, eye_v_times, atol=0.0005): + raise ValueError( + 'eye_h_times and eye_v_times are not sufficiently similar' + ) + + # Set data attributes + self.set_original_timestamps(eye_h_times) + self._eye_pos = np.stack([eye_h_values, eye_v_values], axis=1) + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + # Make SpatialSeries + eye_position = SpatialSeries( + name='eye_position', + data=H5DataIO(self._eye_pos, compression='gzip'), + reference_frame='(0,0) is bottom left corner of screen', + unit='meters', + conversion=0.257, + timestamps=H5DataIO(self._timestamps, compression='gzip'), + description='Eye position data recorded by EyeLink camera', + ) + + # Get processing module + module_description = 'Contains behavioral data from experiment.' + processing_module = get_module( + nwbfile=nwbfile, name='behavior', description=module_description) + + # Add data to module + processing_module.add_data_interface(eye_position) + + return nwbfile + + +class PupilSizeInterface(TimestampsFromArrayInterface): + """Pupil size interface.""" + + def __init__(self, folder_path: FolderPathType): + # Find pupil size file + folder_path = Path(folder_path) + pupil_size_file = folder_path / 'pupil_size_r.json' + assert pupil_size_file.exists(), f'Could not find {pupil_size_file}' + + # Load pupil size data and set data attributes + pupil_size_data = json.load(open(pupil_size_file, 'r')) + self.set_original_timestamps(np.array(pupil_size_data['times'])) + self._pupil_size = np.array(pupil_size_data['values']) + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + # Make TimeSeries + pupil_size = TimeSeries( + name='pupil_size', + data=H5DataIO(self._pupil_size, compression='gzip'), + unit='pixels', + conversion=1.0, + timestamps=H5DataIO(self._timestamps, compression='gzip'), + description='Pupil size data recorded by EyeLink camera', + ) + + # Get processing module + module_description = 'Contains behavioral data from experiment.' + processing_module = get_module( + nwbfile=nwbfile, name='behavior', description=module_description) + + # Add data to module + processing_module.add_data_interface(pupil_size) + + return nwbfile + + +class RewardLineInterface(TimestampsFromArrayInterface): + """Reward line interface.""" + + def __init__(self, folder_path: FolderPathType): + # Find reward line file + folder_path = Path(folder_path) + reward_line_file = folder_path / 'reward_line.json' + assert reward_line_file.exists(), f'Could not find {reward_line_file}' + + # Load reward line data and set data attributes + reward_line_data = json.load(open(reward_line_file, 'r')) + self.set_original_timestamps(np.array(reward_line_data['times'])) + self._reward_line = reward_line_data['values'] + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + # Make LabeledEvents + reward_line = LabeledEvents( + name='reward_line', + description=( + 'Reward line data representing events of reward dispenser' + ), + timestamps=H5DataIO(self._timestamps, compression='gzip'), + data=self._reward_line, + labels=['closed', 'open'], + ) + + # Get processing module + module_description = 'Contains audio and reward data from experiment.' + processing_module = get_module( + nwbfile=nwbfile, name='behavior', description=module_description) + + # Add data to module + processing_module.add_data_interface(reward_line) + + return nwbfile + + +class AudioInterface(TimestampsFromArrayInterface): + """Audio interface.""" + + SOUNDS = ['failure_sound', 'success_sound'] + + def __init__(self, folder_path: FolderPathType): + # Find sound file + folder_path = Path(folder_path) + sound_file = folder_path / 'sound.json' + assert sound_file.exists(), f'Could not find {sound_file}' + + # Load sound data and set data attributes + sound_data = json.load(open(sound_file, 'r')) + self.set_original_timestamps(np.array(sound_data['times'])) + audio = np.array(sound_data['values']) + + sound_to_code = {k: i for i, k in enumerate(AudioInterface.SOUNDS)} + self._sound_codes = [sound_to_code[x] for x in audio] + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + # Make LabeledEvents + audio = LabeledEvents( + name='audio', + description='Audio data representing auditory stimuli events', + timestamps=H5DataIO(self._timestamps, compression='gzip'), + data=self._sound_codes, + labels=AudioInterface.SOUNDS, + ) + + # Get processing module + module_description = 'Contains audio and reward data from experiment.' + processing_module = get_module( + nwbfile=nwbfile, name='behavior', description=module_description) + + # Add data to module + processing_module.add_data_interface(audio) + + return nwbfile From 36c53bb397da0672c0af68db18cb8e5610ede31b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Dec 2023 16:11:24 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../watters/main_convert_session.py | 8 +- .../watters/recording_interface.py | 1 - .../watters/timeseries_interfaces.py | 143 ++++++++---------- 3 files changed, 68 insertions(+), 84 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py index a553ae7..b68ef18 100644 --- a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py @@ -199,12 +199,8 @@ def session_to_nwb( session_id = f"{session}-stub" else: session_id = f"{session}" - raw_nwb_path = ( - session_paths.output / f"{subject}_{session_id}_raw.nwb" - ) - processed_nwb_path = ( - session_paths.output / f"{subject}_{session_id}_processed.nwb" - ) + raw_nwb_path = session_paths.output / f"{subject}_{session_id}_raw.nwb" + processed_nwb_path = session_paths.output / f"{subject}_{session_id}_processed.nwb" logging.info(f"raw_nwb_path = {raw_nwb_path}") logging.info(f"processed_nwb_path = {processed_nwb_path}") logging.info("") diff --git a/src/jazayeri_lab_to_nwb/watters/recording_interface.py b/src/jazayeri_lab_to_nwb/watters/recording_interface.py index ff02eb9..216bfaf 100644 --- a/src/jazayeri_lab_to_nwb/watters/recording_interface.py +++ b/src/jazayeri_lab_to_nwb/watters/recording_interface.py @@ -1,7 +1,6 @@ """Primary class for recording data.""" import json from typing import Optional -from typing import Optional import numpy as np import probeinterface diff --git a/src/jazayeri_lab_to_nwb/watters/timeseries_interfaces.py b/src/jazayeri_lab_to_nwb/watters/timeseries_interfaces.py index e3c6ccb..5c7b922 100644 --- a/src/jazayeri_lab_to_nwb/watters/timeseries_interfaces.py +++ b/src/jazayeri_lab_to_nwb/watters/timeseries_interfaces.py @@ -21,20 +21,20 @@ class TimestampsFromArrayInterface(BaseTemporalAlignmentInterface): """Interface implementing temporal alignment functions with timestamps.""" - + def __init__(self, folder_path: FolderPathType): super().__init__(folder_path=folder_path) def set_original_timestamps(self, original_timestamps: np.ndarray) -> None: self._original_timestamps = original_timestamps self._timestamps = np.copy(original_timestamps) - + def get_original_timestamps(self) -> np.ndarray: return self._original_timestamps def set_aligned_timestamps(self, aligned_timestamps: np.ndarray) -> None: self._timestamps = aligned_timestamps - + def get_timestamps(self): return self._timestamps @@ -45,33 +45,28 @@ class EyePositionInterface(TimestampsFromArrayInterface): def __init__(self, folder_path: FolderPathType): folder_path = Path(folder_path) super().__init__(folder_path=folder_path) - + # Find eye position files and check they all exist - eye_h_file = folder_path / 'eye_h_calibrated.json' - eye_v_file = folder_path / 'eye_v_calibrated.json' - assert eye_h_file.exists(), f'Could not find {eye_h_file}' - assert eye_v_file.exists(), f'Could not find {eye_v_file}' - + eye_h_file = folder_path / "eye_h_calibrated.json" + eye_v_file = folder_path / "eye_v_calibrated.json" + assert eye_h_file.exists(), f"Could not find {eye_h_file}" + assert eye_v_file.exists(), f"Could not find {eye_v_file}" + # Load eye data - eye_h_data = json.load(open(eye_h_file, 'r')) - eye_v_data = json.load(open(eye_v_file, 'r')) - eye_h_times = np.array(eye_h_data['times']) - eye_h_values = 0.5 + (np.array(eye_h_data['values']) / 20) - eye_v_times = np.array(eye_v_data['times']) - eye_v_values = 0.5 + (np.array(eye_v_data['values']) / 20) - + eye_h_data = json.load(open(eye_h_file, "r")) + eye_v_data = json.load(open(eye_v_file, "r")) + eye_h_times = np.array(eye_h_data["times"]) + eye_h_values = 0.5 + (np.array(eye_h_data["values"]) / 20) + eye_v_times = np.array(eye_v_data["times"]) + eye_v_values = 0.5 + (np.array(eye_v_data["values"]) / 20) + # Check eye_h and eye_v have the same number of samples if len(eye_h_times) != len(eye_v_times): - raise ValueError( - f'len(eye_h_times) = {len(eye_h_times)}, but len(eye_v_times) ' - f'= {len(eye_v_times)}' - ) + raise ValueError(f"len(eye_h_times) = {len(eye_h_times)}, but len(eye_v_times) " f"= {len(eye_v_times)}") # Check that eye_h_times and eye_v_times are similar to within 0.5ms if not np.allclose(eye_h_times, eye_v_times, atol=0.0005): - raise ValueError( - 'eye_h_times and eye_v_times are not sufficiently similar' - ) - + raise ValueError("eye_h_times and eye_v_times are not sufficiently similar") + # Set data attributes self.set_original_timestamps(eye_h_times) self._eye_pos = np.stack([eye_h_values, eye_v_values], axis=1) @@ -79,19 +74,18 @@ def __init__(self, folder_path: FolderPathType): def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): # Make SpatialSeries eye_position = SpatialSeries( - name='eye_position', - data=H5DataIO(self._eye_pos, compression='gzip'), - reference_frame='(0,0) is bottom left corner of screen', - unit='meters', + name="eye_position", + data=H5DataIO(self._eye_pos, compression="gzip"), + reference_frame="(0,0) is bottom left corner of screen", + unit="meters", conversion=0.257, - timestamps=H5DataIO(self._timestamps, compression='gzip'), - description='Eye position data recorded by EyeLink camera', + timestamps=H5DataIO(self._timestamps, compression="gzip"), + description="Eye position data recorded by EyeLink camera", ) # Get processing module - module_description = 'Contains behavioral data from experiment.' - processing_module = get_module( - nwbfile=nwbfile, name='behavior', description=module_description) + module_description = "Contains behavioral data from experiment." + processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) # Add data to module processing_module.add_data_interface(eye_position) @@ -105,29 +99,28 @@ class PupilSizeInterface(TimestampsFromArrayInterface): def __init__(self, folder_path: FolderPathType): # Find pupil size file folder_path = Path(folder_path) - pupil_size_file = folder_path / 'pupil_size_r.json' - assert pupil_size_file.exists(), f'Could not find {pupil_size_file}' - + pupil_size_file = folder_path / "pupil_size_r.json" + assert pupil_size_file.exists(), f"Could not find {pupil_size_file}" + # Load pupil size data and set data attributes - pupil_size_data = json.load(open(pupil_size_file, 'r')) - self.set_original_timestamps(np.array(pupil_size_data['times'])) - self._pupil_size = np.array(pupil_size_data['values']) + pupil_size_data = json.load(open(pupil_size_file, "r")) + self.set_original_timestamps(np.array(pupil_size_data["times"])) + self._pupil_size = np.array(pupil_size_data["values"]) def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): # Make TimeSeries pupil_size = TimeSeries( - name='pupil_size', - data=H5DataIO(self._pupil_size, compression='gzip'), - unit='pixels', + name="pupil_size", + data=H5DataIO(self._pupil_size, compression="gzip"), + unit="pixels", conversion=1.0, - timestamps=H5DataIO(self._timestamps, compression='gzip'), - description='Pupil size data recorded by EyeLink camera', + timestamps=H5DataIO(self._timestamps, compression="gzip"), + description="Pupil size data recorded by EyeLink camera", ) # Get processing module - module_description = 'Contains behavioral data from experiment.' - processing_module = get_module( - nwbfile=nwbfile, name='behavior', description=module_description) + module_description = "Contains behavioral data from experiment." + processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) # Add data to module processing_module.add_data_interface(pupil_size) @@ -141,30 +134,27 @@ class RewardLineInterface(TimestampsFromArrayInterface): def __init__(self, folder_path: FolderPathType): # Find reward line file folder_path = Path(folder_path) - reward_line_file = folder_path / 'reward_line.json' - assert reward_line_file.exists(), f'Could not find {reward_line_file}' - + reward_line_file = folder_path / "reward_line.json" + assert reward_line_file.exists(), f"Could not find {reward_line_file}" + # Load reward line data and set data attributes - reward_line_data = json.load(open(reward_line_file, 'r')) - self.set_original_timestamps(np.array(reward_line_data['times'])) - self._reward_line = reward_line_data['values'] + reward_line_data = json.load(open(reward_line_file, "r")) + self.set_original_timestamps(np.array(reward_line_data["times"])) + self._reward_line = reward_line_data["values"] def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): # Make LabeledEvents reward_line = LabeledEvents( - name='reward_line', - description=( - 'Reward line data representing events of reward dispenser' - ), - timestamps=H5DataIO(self._timestamps, compression='gzip'), + name="reward_line", + description=("Reward line data representing events of reward dispenser"), + timestamps=H5DataIO(self._timestamps, compression="gzip"), data=self._reward_line, - labels=['closed', 'open'], + labels=["closed", "open"], ) # Get processing module - module_description = 'Contains audio and reward data from experiment.' - processing_module = get_module( - nwbfile=nwbfile, name='behavior', description=module_description) + module_description = "Contains audio and reward data from experiment." + processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) # Add data to module processing_module.add_data_interface(reward_line) @@ -174,37 +164,36 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): class AudioInterface(TimestampsFromArrayInterface): """Audio interface.""" - - SOUNDS = ['failure_sound', 'success_sound'] + + SOUNDS = ["failure_sound", "success_sound"] def __init__(self, folder_path: FolderPathType): # Find sound file folder_path = Path(folder_path) - sound_file = folder_path / 'sound.json' - assert sound_file.exists(), f'Could not find {sound_file}' - + sound_file = folder_path / "sound.json" + assert sound_file.exists(), f"Could not find {sound_file}" + # Load sound data and set data attributes - sound_data = json.load(open(sound_file, 'r')) - self.set_original_timestamps(np.array(sound_data['times'])) - audio = np.array(sound_data['values']) - + sound_data = json.load(open(sound_file, "r")) + self.set_original_timestamps(np.array(sound_data["times"])) + audio = np.array(sound_data["values"]) + sound_to_code = {k: i for i, k in enumerate(AudioInterface.SOUNDS)} self._sound_codes = [sound_to_code[x] for x in audio] def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): # Make LabeledEvents audio = LabeledEvents( - name='audio', - description='Audio data representing auditory stimuli events', - timestamps=H5DataIO(self._timestamps, compression='gzip'), + name="audio", + description="Audio data representing auditory stimuli events", + timestamps=H5DataIO(self._timestamps, compression="gzip"), data=self._sound_codes, labels=AudioInterface.SOUNDS, ) # Get processing module - module_description = 'Contains audio and reward data from experiment.' - processing_module = get_module( - nwbfile=nwbfile, name='behavior', description=module_description) + module_description = "Contains audio and reward data from experiment." + processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) # Add data to module processing_module.add_data_interface(audio) From 1c110b2f8456049e2a32fcdf672a15df8294bb03 Mon Sep 17 00:00:00 2001 From: Nicholas Watters Date: Tue, 19 Dec 2023 11:12:17 -0500 Subject: [PATCH 7/8] Remove watters_convert_session.py that is no longer in use. --- .../watters/watters_convert_session.py | 254 ------------------ 1 file changed, 254 deletions(-) delete mode 100644 src/jazayeri_lab_to_nwb/watters/watters_convert_session.py diff --git a/src/jazayeri_lab_to_nwb/watters/watters_convert_session.py b/src/jazayeri_lab_to_nwb/watters/watters_convert_session.py deleted file mode 100644 index e5e26aa..0000000 --- a/src/jazayeri_lab_to_nwb/watters/watters_convert_session.py +++ /dev/null @@ -1,254 +0,0 @@ -"""Primary script to run to convert an entire session for of data using the NWBConverter.""" -import datetime -import glob -import json -import logging -import os -from pathlib import Path -from typing import Union -from uuid import uuid4 -from zoneinfo import ZoneInfo - -from neuroconv.tools.data_transfers import automatic_dandi_upload -from neuroconv.utils import dict_deep_update, load_dict_from_file - -from jazayeri_lab_to_nwb.watters import WattersNWBConverter - -# Set logger level for info is displayed in console -logging.getLogger().setLevel(logging.INFO) - - -def _get_single_file(directory, suffix=""): - """Get path to a file in given directory with given suffix. - - Raisees error if not exactly one satisfying file. - """ - files = list(glob.glob(str(directory / f"*{suffix}"))) - if len(files) == 0: - raise ValueError(f"No {suffix} files found in {directory}") - if len(files) > 1: - raise ValueError(f"Multiple {suffix} files found in {directory}") - return files[0] - - -def session_to_nwb( - data_dir: Union[str, Path], - output_dir_path: Union[str, Path], - stub_test: bool = False, - overwrite: bool = True, - dandiset_id: Union[str, None] = None, -): - """ - Convert a single session to an NWB file. - - Parameters - ---------- - data_dir : string or Path - Source data directory. - output_dir_path : string or Path - Output data directory. - stub_test : boolean - Whether or not to generate a preview file by limiting data write to a few MB. - Default is False. - overwrite : boolean - If the file exists already, True will delete and replace with a new file, False will append the contents. - Default is True. - dandiset_id : string, optional - If you want to upload the file to the DANDI archive, specify the six-digit ID here. - Requires the DANDI_API_KEY environment variable to be set. - To set this in your bash terminal in Linux or macOS, run - export DANDI_API_KEY=... - or in Windows - set DANDI_API_KEY=... - Default is None. - """ - if dandiset_id is not None: - import dandi # check importability - - assert os.getenv("DANDI_API_KEY"), ( - "Unable to find environment variable 'DANDI_API_KEY'. " - "Please retrieve your token from DANDI and set this environment variable." - ) - - logging.info("") - logging.info(f"data_dir = {data_dir}") - logging.info(f"output_dir_path = {output_dir_path}") - logging.info(f"stub_test = {stub_test}") - - data_dir = Path(data_dir) - output_dir_path = Path(output_dir_path) - if stub_test: - output_dir_path = output_dir_path / "nwb_stub" - output_dir_path.mkdir(parents=True, exist_ok=True) - - session_id = data_dir.name - raw_nwbfile_path = output_dir_path / f"{session_id}_raw.nwb" - processed_nwbfile_path = output_dir_path / f"{session_id}_processed.nwb" - logging.info(f"raw_nwbfile_path = {raw_nwbfile_path}") - logging.info(f"processed_nwbfile_path = {processed_nwbfile_path}") - - raw_source_data = dict() - raw_conversion_options = dict() - processed_source_data = dict() - processed_conversion_options = dict() - - for probe_num in range(2): - # Add V-Probe Recording - probe_data_dir = data_dir / "raw_data" / f"v_probe_{probe_num}" - if not probe_data_dir.exists(): - continue - logging.info(f"\nAdding V-probe {probe_num} recording") - - logging.info(" Raw data") - recording_file = _get_single_file(probe_data_dir, suffix=".dat") - recording_source_data = { - f"RecordingVP{probe_num}": dict( - file_path=recording_file, - probe_metadata_file=str(data_dir / "data_open_source" / "probes.metadata.json"), - probe_key=f"probe{(probe_num + 1):02d}", - probe_name=f"vprobe{probe_num}", - es_key=f"ElectricalSeriesVP{probe_num}", - ) - } - raw_source_data.update(recording_source_data) - processed_source_data.update(recording_source_data) - raw_conversion_options.update({f"RecordingVP{probe_num}": dict(stub_test=stub_test)}) - processed_conversion_options.update( - {f"RecordingVP{probe_num}": dict(stub_test=stub_test, write_electrical_series=False)} - ) - - # Add V-Probe Sorting - logging.info(" Spike sorted data") - processed_source_data.update( - { - f"SortingVP{probe_num}": dict( - folder_path=str(data_dir / "spike_sorting_raw" / f"v_probe_{probe_num}"), - keep_good_only=False, - ) - } - ) - processed_conversion_options.update({f"SortingVP{probe_num}": dict(stub_test=stub_test, write_as="processing")}) - - # Add SpikeGLX Recording - logging.info("Adding SpikeGLX recordings") - logging.info(" AP data") - probe_data_dir = data_dir / "raw_data" / "spikeglx" / "*" / "*" - ap_file = _get_single_file(probe_data_dir, suffix=".ap.bin") - raw_source_data.update(dict(RecordingNP=dict(file_path=ap_file))) - processed_source_data.update(dict(RecordingNP=dict(file_path=ap_file))) - raw_conversion_options.update(dict(RecordingNP=dict(stub_test=stub_test))) - processed_conversion_options.update(dict(RecordingNP=dict(stub_test=stub_test, write_electrical_series=False))) - - # Add LFP - logging.info(" LFP data") - lfp_file = _get_single_file(probe_data_dir, suffix=".lf.bin") - raw_source_data.update(dict(LF=dict(file_path=lfp_file))) - processed_source_data.update(dict(LF=dict(file_path=lfp_file))) - raw_conversion_options.update(dict(LF=dict(stub_test=stub_test))) - processed_conversion_options.update(dict(LF=dict(stub_test=stub_test, write_electrical_series=False))) - - # Add Sorting - logging.info(" Spike sorted data") - processed_source_data.update( - dict( - SortingNP=dict( - folder_path=str(data_dir / "spike_sorting_raw" / "np"), - keep_good_only=False, - ) - ) - ) - processed_conversion_options.update(dict(SortingNP=dict(stub_test=stub_test, write_as="processing"))) - - # Add Behavior - logging.info("Adding behavior") - behavior_path = str(data_dir / "data_open_source" / "behavior") - processed_source_data.update(dict(EyePosition=dict(folder_path=behavior_path))) - processed_conversion_options.update(dict(EyePosition=dict())) - - processed_source_data.update(dict(PupilSize=dict(folder_path=behavior_path))) - processed_conversion_options.update(dict(PupilSize=dict())) - - # Add Trials - logging.info("Adding task data") - processed_source_data.update(dict(Trials=dict(folder_path=str(data_dir / "data_open_source")))) - processed_conversion_options.update(dict(Trials=dict())) - - processed_converter = WattersNWBConverter(source_data=processed_source_data, sync_dir=str(data_dir / "sync_pulses")) - - # Add datetime to conversion - metadata = processed_converter.get_metadata() - metadata["NWBFile"]["session_id"] = session_id - - # Subject name - if "monkey0" in str(data_dir): - metadata["Subject"]["subject_id"] = "Perle" - elif "monkey1" in str(data_dir): - metadata["Subject"]["subject_id"] = "Elgar" - - # EcePhys - probe_metadata_file = data_dir / "data_open_source" / "probes.metadata.json" - with open(probe_metadata_file, "r") as f: - probe_metadata = json.load(f) - neuropixel_metadata = [entry for entry in probe_metadata if entry["label"] == "probe00"][0] - for entry in metadata["Ecephys"]["ElectrodeGroup"]: - if entry["device"] == "Neuropixel-Imec": - # TODO: uncomment when fixed in pynwb - # entry.update(dict(position=[( - # neuropixel_metadata["coordinates"][0], - # neuropixel_metadata["coordinates"][1], - # neuropixel_metadata["depth_from_surface"], - # )] - logging.warning("\n\n PROBE COORDINATES NOT IMPLEMENTED\n\n") - - # Update default metadata with the editable in the corresponding yaml file - editable_metadata_path = Path(__file__).parent / "watters_metadata.yaml" - editable_metadata = load_dict_from_file(editable_metadata_path) - metadata = dict_deep_update(metadata, editable_metadata) - - # check if session_start_time was found/set - if "session_start_time" not in metadata["NWBFile"]: - try: - date = datetime.datetime.strptime(data_dir.name, "%Y-%m-%d") - date = date.replace(tzinfo=ZoneInfo("US/Eastern")) - except: - raise ValueError( - "Session start time was not auto-detected. Please provide it " "in `watters_metadata.yaml`" - ) - metadata["NWBFile"]["session_start_time"] = date - - # Run conversion - logging.info("Running processed conversion") - processed_converter.run_conversion( - metadata=metadata, - nwbfile_path=processed_nwbfile_path, - conversion_options=processed_conversion_options, - overwrite=overwrite, - ) - - logging.info("Running raw data conversion") - metadata["NWBFile"]["identifier"] = str(uuid4()) - raw_converter = WattersNWBConverter(source_data=raw_source_data, sync_dir=str(data_dir / "sync_pulses")) - raw_converter.run_conversion( - metadata=metadata, - nwbfile_path=raw_nwbfile_path, - conversion_options=raw_conversion_options, - overwrite=overwrite, - ) - automatic_dandi_upload(dandiset_id=dandiset_id) - - -if __name__ == "__main__": - - # Parameters for conversion - data_dir = Path("/om2/user/nwatters/catalystneuro/initial_data_transfer/" "monkey0/2022-06-01/") - output_dir_path = Path("/om/user/nwatters/nwb_data/watters_perle_combined/") - stub_test = True - overwrite = True - - session_to_nwb( - data_dir=data_dir, - output_dir_path=output_dir_path, - stub_test=stub_test, - overwrite=overwrite, - # dandiset_id = "000620", - ) From 055c7159d9b25f60a516cf8edc90342d73688840 Mon Sep 17 00:00:00 2001 From: Nicholas Watters Date: Tue, 19 Dec 2023 11:16:41 -0500 Subject: [PATCH 8/8] Remove duplicate file timeseries_interfaces.py. --- .../watters/timeseries_interfaces.py | 201 ------------------ 1 file changed, 201 deletions(-) delete mode 100644 src/jazayeri_lab_to_nwb/watters/timeseries_interfaces.py diff --git a/src/jazayeri_lab_to_nwb/watters/timeseries_interfaces.py b/src/jazayeri_lab_to_nwb/watters/timeseries_interfaces.py deleted file mode 100644 index 5c7b922..0000000 --- a/src/jazayeri_lab_to_nwb/watters/timeseries_interfaces.py +++ /dev/null @@ -1,201 +0,0 @@ -"""Primary classes for timeseries variables. - -The classes here handle variables like eye position, reward line, and audio -stimuli that are not necessarily tied to the trial structure of display updates. -For trial structured variables, see ../trials_interface.py. For variables -pertaining to display updates, see ../frames_interface.py. -""" - -import json -from pathlib import Path - -import numpy as np -from hdmf.backends.hdf5 import H5DataIO -from ndx_events import LabeledEvents -from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface -from neuroconv.tools.nwb_helpers import get_module -from neuroconv.utils import FolderPathType -from pynwb import NWBFile, TimeSeries -from pynwb.behavior import SpatialSeries - - -class TimestampsFromArrayInterface(BaseTemporalAlignmentInterface): - """Interface implementing temporal alignment functions with timestamps.""" - - def __init__(self, folder_path: FolderPathType): - super().__init__(folder_path=folder_path) - - def set_original_timestamps(self, original_timestamps: np.ndarray) -> None: - self._original_timestamps = original_timestamps - self._timestamps = np.copy(original_timestamps) - - def get_original_timestamps(self) -> np.ndarray: - return self._original_timestamps - - def set_aligned_timestamps(self, aligned_timestamps: np.ndarray) -> None: - self._timestamps = aligned_timestamps - - def get_timestamps(self): - return self._timestamps - - -class EyePositionInterface(TimestampsFromArrayInterface): - """Eye position interface.""" - - def __init__(self, folder_path: FolderPathType): - folder_path = Path(folder_path) - super().__init__(folder_path=folder_path) - - # Find eye position files and check they all exist - eye_h_file = folder_path / "eye_h_calibrated.json" - eye_v_file = folder_path / "eye_v_calibrated.json" - assert eye_h_file.exists(), f"Could not find {eye_h_file}" - assert eye_v_file.exists(), f"Could not find {eye_v_file}" - - # Load eye data - eye_h_data = json.load(open(eye_h_file, "r")) - eye_v_data = json.load(open(eye_v_file, "r")) - eye_h_times = np.array(eye_h_data["times"]) - eye_h_values = 0.5 + (np.array(eye_h_data["values"]) / 20) - eye_v_times = np.array(eye_v_data["times"]) - eye_v_values = 0.5 + (np.array(eye_v_data["values"]) / 20) - - # Check eye_h and eye_v have the same number of samples - if len(eye_h_times) != len(eye_v_times): - raise ValueError(f"len(eye_h_times) = {len(eye_h_times)}, but len(eye_v_times) " f"= {len(eye_v_times)}") - # Check that eye_h_times and eye_v_times are similar to within 0.5ms - if not np.allclose(eye_h_times, eye_v_times, atol=0.0005): - raise ValueError("eye_h_times and eye_v_times are not sufficiently similar") - - # Set data attributes - self.set_original_timestamps(eye_h_times) - self._eye_pos = np.stack([eye_h_values, eye_v_values], axis=1) - - def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): - # Make SpatialSeries - eye_position = SpatialSeries( - name="eye_position", - data=H5DataIO(self._eye_pos, compression="gzip"), - reference_frame="(0,0) is bottom left corner of screen", - unit="meters", - conversion=0.257, - timestamps=H5DataIO(self._timestamps, compression="gzip"), - description="Eye position data recorded by EyeLink camera", - ) - - # Get processing module - module_description = "Contains behavioral data from experiment." - processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) - - # Add data to module - processing_module.add_data_interface(eye_position) - - return nwbfile - - -class PupilSizeInterface(TimestampsFromArrayInterface): - """Pupil size interface.""" - - def __init__(self, folder_path: FolderPathType): - # Find pupil size file - folder_path = Path(folder_path) - pupil_size_file = folder_path / "pupil_size_r.json" - assert pupil_size_file.exists(), f"Could not find {pupil_size_file}" - - # Load pupil size data and set data attributes - pupil_size_data = json.load(open(pupil_size_file, "r")) - self.set_original_timestamps(np.array(pupil_size_data["times"])) - self._pupil_size = np.array(pupil_size_data["values"]) - - def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): - # Make TimeSeries - pupil_size = TimeSeries( - name="pupil_size", - data=H5DataIO(self._pupil_size, compression="gzip"), - unit="pixels", - conversion=1.0, - timestamps=H5DataIO(self._timestamps, compression="gzip"), - description="Pupil size data recorded by EyeLink camera", - ) - - # Get processing module - module_description = "Contains behavioral data from experiment." - processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) - - # Add data to module - processing_module.add_data_interface(pupil_size) - - return nwbfile - - -class RewardLineInterface(TimestampsFromArrayInterface): - """Reward line interface.""" - - def __init__(self, folder_path: FolderPathType): - # Find reward line file - folder_path = Path(folder_path) - reward_line_file = folder_path / "reward_line.json" - assert reward_line_file.exists(), f"Could not find {reward_line_file}" - - # Load reward line data and set data attributes - reward_line_data = json.load(open(reward_line_file, "r")) - self.set_original_timestamps(np.array(reward_line_data["times"])) - self._reward_line = reward_line_data["values"] - - def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): - # Make LabeledEvents - reward_line = LabeledEvents( - name="reward_line", - description=("Reward line data representing events of reward dispenser"), - timestamps=H5DataIO(self._timestamps, compression="gzip"), - data=self._reward_line, - labels=["closed", "open"], - ) - - # Get processing module - module_description = "Contains audio and reward data from experiment." - processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) - - # Add data to module - processing_module.add_data_interface(reward_line) - - return nwbfile - - -class AudioInterface(TimestampsFromArrayInterface): - """Audio interface.""" - - SOUNDS = ["failure_sound", "success_sound"] - - def __init__(self, folder_path: FolderPathType): - # Find sound file - folder_path = Path(folder_path) - sound_file = folder_path / "sound.json" - assert sound_file.exists(), f"Could not find {sound_file}" - - # Load sound data and set data attributes - sound_data = json.load(open(sound_file, "r")) - self.set_original_timestamps(np.array(sound_data["times"])) - audio = np.array(sound_data["values"]) - - sound_to_code = {k: i for i, k in enumerate(AudioInterface.SOUNDS)} - self._sound_codes = [sound_to_code[x] for x in audio] - - def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): - # Make LabeledEvents - audio = LabeledEvents( - name="audio", - description="Audio data representing auditory stimuli events", - timestamps=H5DataIO(self._timestamps, compression="gzip"), - data=self._sound_codes, - labels=AudioInterface.SOUNDS, - ) - - # Get processing module - module_description = "Contains audio and reward data from experiment." - processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) - - # Add data to module - processing_module.add_data_interface(audio) - - return nwbfile