From 13d08a55c5cf4545ed83dd7f96bcfca017eba281 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Tue, 19 Dec 2023 16:48:22 -0500 Subject: [PATCH 01/28] Created new folder and update get_session_paths --- src/jazayeri_lab_to_nwb/piccato/README.md | 56 +++ src/jazayeri_lab_to_nwb/piccato/__init__.py | 0 .../piccato/display_interface.py | 91 +++++ .../piccato/get_session_paths.py | 120 ++++++ .../piccato/main_convert_session.py | 352 ++++++++++++++++++ src/jazayeri_lab_to_nwb/piccato/metadata.yaml | 15 + .../piccato/nwb_converter.py | 137 +++++++ .../piccato/recording_interface.py | 97 +++++ .../piccato/requirements.txt | 0 .../piccato/timeseries_interface.py | 200 ++++++++++ .../piccato/trials_interface.py | 167 +++++++++ 11 files changed, 1235 insertions(+) create mode 100644 src/jazayeri_lab_to_nwb/piccato/README.md create mode 100644 src/jazayeri_lab_to_nwb/piccato/__init__.py create mode 100644 src/jazayeri_lab_to_nwb/piccato/display_interface.py create mode 100644 src/jazayeri_lab_to_nwb/piccato/get_session_paths.py create mode 100644 src/jazayeri_lab_to_nwb/piccato/main_convert_session.py create mode 100644 src/jazayeri_lab_to_nwb/piccato/metadata.yaml create mode 100644 src/jazayeri_lab_to_nwb/piccato/nwb_converter.py create mode 100644 src/jazayeri_lab_to_nwb/piccato/recording_interface.py create mode 100644 src/jazayeri_lab_to_nwb/piccato/requirements.txt create mode 100644 src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py create mode 100644 src/jazayeri_lab_to_nwb/piccato/trials_interface.py diff --git a/src/jazayeri_lab_to_nwb/piccato/README.md b/src/jazayeri_lab_to_nwb/piccato/README.md new file mode 100644 index 0000000..e718409 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/piccato/README.md @@ -0,0 +1,56 @@ +# Watters data conversion pipeline +NWB conversion scripts for Watters data to the [Neurodata Without Borders](https://nwb-overview.readthedocs.io/) data format. + + +## Usage +To run a specific conversion, you might need to install first some conversion specific dependencies that are located in each conversion directory: +``` +pip install -r src/jazayeri_lab_to_nwb/watters/watters_requirements.txt +``` + +You can run a specific conversion with the following command: +``` +python src/jazayeri_lab_to_nwb/watters/main_convert_session.py $SUBJECT $SESSION +``` + +### Watters working memory task data +The conversion function for this experiment, `session_to_nwb`, is found in `src/watters/main_convert_session.py`. The function takes arguments: +* `subject` subject name, either `'Perle'` or `'Elgar'`. +* `session` session date in format `'YYYY-MM-DD'`. +* `stub_test` indicates whether only a small portion of the data should be saved (mainly used by us for testing purposes). +* `overwrite` indicates whether to overwrite nwb output files. +* `dandiset_id` optional dandiset ID. + +The function can be imported in a separate script with and run, or you can run the file directly and specify the arguments in the `if name == "__main__"` block at the bottom. + +The function expects the raw data in `data_dir_path` to follow this structure: + + data_dir_path/ + ├── data_open_source + │ ├── behavior + │ │ └── eye.h.times.npy, etc. + │ ├── task + │ └── trials.start_times.json, etc. + │ └── probes.metadata.json + ├── raw_data + │ ├── spikeglx + │ └── */*/*.ap.bin, */*/*.lf.bin, etc. + │ ├── v_probe_0 + │ └── raw_data.dat + │ └── v_probe_{n} + │ └── raw_data.dat + ├── spike_sorting_raw + │ ├── np + │ ├── vp_0 + │ └── vp_{n} + ├── sync_pulses + ├── mworks + ├── open_ephys + └── spikeglx + ... + +The conversion will try to automatically fetch metadata from the provided data directory. However, some information, such as the subject's name and age, must be specified by the user in the file `src/jazayeri_lab_to_nwb/watters/metadata.yaml`. If any of the automatically fetched metadata is incorrect, it can also be overriden from this file. + +The converted data will be saved in two files, one called `{session_id}_raw.nwb`, which contains the raw electrophysiology data from the Neuropixels and V-Probes, and one called `{session_id}_processed.nwb` with behavioral data, trial info, and sorted unit spiking. + +If you run into memory issues when writing the `{session_id}_raw.nwb` files, you may want to set `buffer_gb` to a value smaller than 1 (its default) in the `conversion_options` dicts for the recording interfaces, i.e. [here](https://github.com/catalystneuro/jazayeri-lab-to-nwb/blob/vprobe_dev/src/jazayeri_lab_to_nwb/watters/main_convert_session.py#L189). diff --git a/src/jazayeri_lab_to_nwb/piccato/__init__.py b/src/jazayeri_lab_to_nwb/piccato/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/jazayeri_lab_to_nwb/piccato/display_interface.py b/src/jazayeri_lab_to_nwb/piccato/display_interface.py new file mode 100644 index 0000000..f5ecadd --- /dev/null +++ b/src/jazayeri_lab_to_nwb/piccato/display_interface.py @@ -0,0 +1,91 @@ +"""Class for converting data about display frames.""" +import itertools +import json +from pathlib import Path +from typing import Optional + +import numpy as np +import pandas as pd +from neuroconv.datainterfaces.text.timeintervalsinterface import TimeIntervalsInterface +from neuroconv.utils import FolderPathType +from pynwb import NWBFile + + +class DisplayInterface(TimeIntervalsInterface): + """Class for converting data about display frames. + + All events that occur exactly once per display update are contained in this + interface. + """ + + KEY_MAP = { + "frame_object_positions": "object_positions", + "frame_fixation_cross_scale": "fixation_cross_scale", + "frame_closed_loop_gaze_position": "closed_loop_eye_position", + "frame_task_phase": "task_phase", + "frame_display_times": "start_time", + } + + def __init__(self, folder_path: FolderPathType, verbose: bool = True): + super().__init__(file_path=folder_path, verbose=verbose) + + def get_metadata(self) -> dict: + metadata = super().get_metadata() + metadata["TimeIntervals"] = dict( + display=dict( + table_name="display", + table_description="data about each displayed frame", + ) + ) + return metadata + + def get_timestamps(self) -> np.ndarray: + return super(DisplayInterface, self).get_timestamps(column="start_time") + + def set_aligned_starting_time(self, aligned_starting_time: float) -> None: + self.dataframe.start_time += aligned_starting_time + + def _read_file(self, file_path: FolderPathType): + # Create dataframe with data for each frame + trials = json.load(open(Path(file_path) / "trials.json", "r")) + frames = { + k_mapped: list(itertools.chain(*[d[k] for d in trials])) for k, k_mapped in DisplayInterface.KEY_MAP.items() + } + + # Serialize object_positions data for hdf5 conversion to work + frames["object_positions"] = [json.dumps(x) for x in frames["object_positions"]] + + return pd.DataFrame(frames) + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None, tag: str = "display"): + return super(DisplayInterface, self).add_to_nwbfile( + nwbfile=nwbfile, + metadata=metadata, + tag=tag, + column_descriptions=self.column_descriptions, + ) + + @property + def column_descriptions(self): + column_descriptions = { + "object_positions": ( + "For each frame, a serialized list with one element for each " + "object. Each element is an (x, y) position of the " + "corresponding object, in coordinates of arena width." + ), + "fixation_cross_scale": ( + "For each frame, the scale of the central fixation cross. " + "Fixation cross scale grows as the eye position deviates from " + "the center of the fixation cross, to provide a cue to " + "maintain good fixation." + ), + "closed_loop_eye_position": ( + "For each frame, the eye position in the close-loop task " + "engine. This was used to for real-time eye position " + "computations, such as saccade detection and reward delivery." + ), + "task_phase": "The phase of the task for each frame.", + "start_time": "Time of display update for each frame.", + } + + return column_descriptions diff --git a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py new file mode 100644 index 0000000..1320725 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py @@ -0,0 +1,120 @@ +"""Function for getting paths to data on openmind.""" + +import collections +import pathlib + +SUBJECT_NAME_TO_ID = { + "elgar": "elgar", +} + +SessionPaths = collections.namedtuple( + "SessionPaths", + [ + "output", + "raw_data", + # "data_open_source", + "task_behavior_data", + "sync_pulses", + "spike_sorting_raw", + ], +) + + +def _get_session_paths_openmind(subject, session): + """Get paths to all components of the data on openmind.""" + # subject_id = SUBJECT_NAME_TO_ID[subject] + + # Path to write output nwb files to + output_path = f"/om2/user/apiccato/nwb_data/staging/sub-{subject}" + + # Path to the raw data. This is used for reading raw physiology data. + raw_data_path = pathlib.Path( + "/om4/group/jazlab/apiccato/", + "phys_preprocessing_open_source/phys_data/", + f"{session}/raw_data/{subject}/") + + # Path to task and behavior data. + task_behavior_data_path = pathlib.Path( + "/om4/group/jazlab/apiccato/phys_preprocessing_open_source/", + f"{subject}/{session}" + ) + + # # Path to open-source data. This is used for reading behavior and task data. + # data_open_source_path = ( + # "/om4/group/jazlab/nwatters/multi_prediction/datasets/data_open_source/" f"Subjects/{subject_id}/{session}/001" + # ) + + # Path to sync pulses. This is used for reading timescale transformations + # between physiology and mworks data streams. + sync_pulses_path = pathlib.Path( + "/om4/group/jazlab/phys_preprocessing_open_source", + f"{subject}/{session}/sync_signals" + ) + + # Path to spike sorting. This is used for reading spike sorted data. + spike_sorting_raw_path = pathlib.Path( + "/om4/group/jazlab/apiccato/phys_preprocessing_open_source/" + f"{subject}/{session}/spike_sorting" + ) + + session_paths = SessionPaths( + output=output_path, + raw_data=raw_data_path, + # data_open_source=pathlib.Path(data_open_source_path), + task_behavior_data=task_behavior_data_path, + sync_pulses=sync_pulses_path, + spike_sorting_raw=spike_sorting_raw_path, + ) + + return session_paths + +# TODO: Update Globus paths when these are available + +def _get_session_paths_globus(subject, session): + """Get paths to all components of the data in the globus repo.""" + subject_id = SUBJECT_NAME_TO_ID[subject] + base_data_dir = f"/shared/catalystneuro/JazLab/{subject_id}/{session}/" + + # Path to write output nwb files to + output_path = f"~/conversion_nwb/jazayeri-lab-to-nwb" + + # Path to the raw data. This is used for reading raw physiology data. + raw_data_path = f"{base_data_dir}/raw_data" + + # Path to task and behavior data. + task_behavior_data_path = f"{base_data_dir}/processed_task_data" + + # Path to open-source data. This is used for reading behavior and task data. + data_open_source_path = f"{base_data_dir}/data_open_source" + + # Path to sync pulses. This is used for reading timescale transformations + # between physiology and mworks data streams. + sync_pulses_path = f"{base_data_dir}/sync_pulses" + + # Path to spike sorting. This is used for reading spike sorted data. + spike_sorting_raw_path = f"{base_data_dir}/spike_sorting" + + session_paths = SessionPaths( + output=pathlib.Path(output_path), + raw_data=pathlib.Path(raw_data_path), + data_open_source=pathlib.Path(data_open_source_path), + task_behavior_data=pathlib.Path(task_behavior_data_path), + sync_pulses=pathlib.Path(sync_pulses_path), + spike_sorting_raw=pathlib.Path(spike_sorting_raw_path), + ) + + return session_paths + + +def get_session_paths(subject, session, repo="openmind"): + """Get paths to all components of the data. + + Returns: + SessionPaths namedtuple. + """ + if repo == "openmind": + return _get_session_paths_openmind(subject=subject, session=session) + elif repo == "globus": + return _get_session_paths_globus(subject=subject, session=session) + else: + raise ValueError(f"Invalid repo {repo}") diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py new file mode 100644 index 0000000..2606802 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -0,0 +1,352 @@ +"""Entrypoint to convert an entire session of data to NWB. + +This converts a session to NWB format and writes the nwb files to + /om/user/nwatters/nwb_data_multi_prediction/{$SUBJECT}/{$SESSION} +Two NWB files are created: + $SUBJECT_$SESSION_raw.nwb --- Raw physiology + $SUBJECT_$SESSION_processed.nwb --- Task, behavior, and sorted physiology +These files can be automatically uploaded to a DANDI dataset. + +Usage: + $ python main_convert_session.py $SUBJECT $SESSION + where $SUBJECT is the subject name and $SESSION is the session date + YYYY-MM-DD. For example: + $ python main_convert_session.py Perle 2022-06-01 + + Please read and consider changing the following variables: + _REPO + _STUB_TEST + _OVERWRITE + _DANDISET_ID + See comments below for descriptions of these variables. +""" + +import datetime +import glob +import json +import logging +import os +import sys +from pathlib import Path +from typing import Union +from uuid import uuid4 +from zoneinfo import ZoneInfo + +import get_session_paths +import nwb_converter +from neuroconv.tools.data_transfers import automatic_dandi_upload +from neuroconv.utils import dict_deep_update, load_dict_from_file + +# Data repository. Either 'globus' or 'openmind' +_REPO = "openmind" +# Whether to run all the physiology data or only a stub +_STUB_TEST = True +# Whether to overwrite output nwb files +_OVERWRITE = True +# ID of the dandiset to upload to, or None to not upload +_DANDISET_ID = None # '000620' + +# Set logger level for info is displayed in console +logging.getLogger().setLevel(logging.INFO) + +_SUBJECT_TO_SEX = { + "Elgar": "M", +} +_SUBJECT_TO_AGE = { + "Elgar": "P10Y", # Born 5/2/2012 +} + + +def _get_single_file(directory, suffix=""): + """Get path to a file in given directory with given suffix. + + Raises error if not exactly one satisfying file. + """ + files = list(glob.glob(str(directory / f"*{suffix}"))) + if len(files) == 0: + raise ValueError(f"No {suffix} files found in {directory}") + if len(files) > 1: + raise ValueError(f"Multiple {suffix} files found in {directory}") + return files[0] + + +def _add_v_probe_data( + raw_source_data, + raw_conversion_options, + processed_source_data, + processed_conversion_options, + session_paths, + probe_num, + stub_test, +): + """Add V-Probe session data.""" + probe_data_dir = session_paths.raw_data / f"v_probe_{probe_num}" + if not probe_data_dir.exists(): + return + logging.info(f"Adding V-probe {probe_num} session data") + + # Raw data + recording_file = _get_single_file(probe_data_dir, suffix=".dat") + metadata_path = str(session_paths.data_open_source / "probes.metadata.json") + raw_source_data[f"RecordingVP{probe_num}"] = dict( + file_path=recording_file, + probe_metadata_file=metadata_path, + probe_key=f"probe{(probe_num + 1):02d}", + probe_name=f"vprobe{probe_num}", + es_key=f"ElectricalSeriesVP{probe_num}", + ) + raw_conversion_options[f"RecordingVP{probe_num}"] = dict(stub_test=stub_test) + + # Processed data + sorting_path = (session_paths.spike_sorting_raw / + f"v_probe_{probe_num}" / + "ks_3_output_pre_v6_curated") + processed_source_data[f"RecordingVP{probe_num}"] = raw_source_data[f"RecordingVP{probe_num}"] + processed_source_data[f"SortingVP{probe_num}"] = dict( + folder_path=str(sorting_path), + keep_good_only=False, + ) + processed_conversion_options[f"RecordingVP{probe_num}"] = dict(stub_test=stub_test, write_electrical_series=False) + processed_conversion_options[f"SortingVP{probe_num}"] = dict(stub_test=stub_test, write_as="processing") + + +def _add_spikeglx_data( + raw_source_data, + raw_conversion_options, + processed_source_data, + processed_conversion_options, + session_paths, + stub_test, +): + """Add SpikeGLX recording data.""" + logging.info("Adding SpikeGLX data") + + # Raw data + spikeglx_dir = [x for x in (session_paths.raw_data / "spikeglx").iterdir() if "settling" not in str(x)] + if len(spikeglx_dir) == 0: + logging.info("Found no SpikeGLX data") + elif len(spikeglx_dir) == 1: + spikeglx_dir = spikeglx_dir[0] + else: + raise ValueError(f"Found multiple spikeglx directories {spikeglx_dir}") + ap_file = _get_single_file(spikeglx_dir, suffix="/*.ap.bin") + lfp_file = _get_single_file(spikeglx_dir, suffix="/*.lf.bin") + raw_source_data["RecordingNP"] = dict(file_path=ap_file) + raw_source_data["LF"] = dict(file_path=lfp_file) + processed_source_data["RecordingNP"] = dict(file_path=ap_file) + processed_source_data["LF"] = dict(file_path=lfp_file) + raw_conversion_options["RecordingNP"] = dict(stub_test=stub_test) + raw_conversion_options["LF"] = dict(stub_test=stub_test) + processed_conversion_options["RecordingNP"] = dict(stub_test=stub_test) + processed_conversion_options["LF"] = dict(stub_test=stub_test) + + # Processed data + sorting_path = session_paths.spike_sorting_raw / "np_0" / "ks_3_output_v2" + processed_source_data["SortingNP"] = dict( + folder_path=str(sorting_path), + keep_good_only=False, + ) + processed_conversion_options["SortingNP"] = dict(stub_test=stub_test, write_as="processing") + + +def session_to_nwb( + subject: str, + session: str, + stub_test: bool = False, + overwrite: bool = True, + dandiset_id: Union[str, None] = None +): + """ + Convert a single session to an NWB file. + + Parameters + ---------- + subject : string + Subject, either 'Perle' or 'Elgar'. + session : string + Session date in format 'YYYY-MM-DD'. + stub_test : boolean + Whether or not to generate a preview file by limiting data write to a + few MB. + Default is False. + overwrite : boolean + If the file exists already, True will delete and replace with a new + file, False will append the contents. + Default is True. + dandiset_id : string, optional + If you want to upload the file to the DANDI archive, specify the + six-digit ID here. Requires the DANDI_API_KEY environment variable to + be set. To set this in your bash terminal in Linux or macOS, run + export DANDI_API_KEY=... + or in Windows + set DANDI_API_KEY=... + Default is None. + """ + if dandiset_id is not None: + import dandi # check importability + + assert os.getenv("DANDI_API_KEY"), ( + "Unable to find environment variable 'DANDI_API_KEY'. " + "Please retrieve your token from DANDI and set this environment " + "variable." + ) + + logging.info(f"stub_test = {stub_test}") + logging.info(f"overwrite = {overwrite}") + logging.info(f"dandiset_id = {dandiset_id}") + + # Get paths + session_paths = get_session_paths.get_session_paths(subject, + session, + repo=_REPO) + logging.info(f"session_paths: {session_paths}") + + # Get paths for nwb files to write + session_paths.output.mkdir(parents=True, exist_ok=True) + if stub_test: + session_id = f"{session}-stub" + else: + session_id = f"{session}" + raw_nwb_path = session_paths.output / f"sub-{subject}_ses-{session_id}_ecephys.nwb" + processed_nwb_path = session_paths.output / f"sub-{subject}_ses-{session_id}_behavior+ecephys.nwb" + logging.info(f"raw_nwb_path = {raw_nwb_path}") + logging.info(f"processed_nwb_path = {processed_nwb_path}") + logging.info("") + + # Initialize empty data dictionaries + raw_source_data = {} + raw_conversion_options = {} + processed_source_data = {} + processed_conversion_options = {} + + # Add V-Probe data + for probe_num in range(2): + _add_v_probe_data( + raw_source_data=raw_source_data, + raw_conversion_options=raw_conversion_options, + processed_source_data=processed_source_data, + processed_conversion_options=processed_conversion_options, + session_paths=session_paths, + probe_num=probe_num, + stub_test=stub_test, + ) + + # Add SpikeGLX data + _add_spikeglx_data( + raw_source_data=raw_source_data, + raw_conversion_options=raw_conversion_options, + processed_source_data=processed_source_data, + processed_conversion_options=processed_conversion_options, + session_paths=session_paths, + stub_test=stub_test, + ) + + # Add behavior data + logging.info("Adding behavior data") + behavior_path = str(session_paths.task_behavior_data) + processed_source_data["EyePosition"] = dict(folder_path=behavior_path) + processed_conversion_options["EyePosition"] = dict() + processed_source_data["PupilSize"] = dict(folder_path=behavior_path) + processed_conversion_options["PupilSize"] = dict() + processed_source_data["RewardLine"] = dict(folder_path=behavior_path) + processed_conversion_options["RewardLine"] = dict() + processed_source_data["Audio"] = dict(folder_path=behavior_path) + processed_conversion_options["Audio"] = dict() + + # Add trials data + logging.info("Adding trials data") + processed_source_data["Trials"] = dict(folder_path=str(session_paths.task_behavior_data)) + processed_conversion_options["Trials"] = dict() + + # Add display data + logging.info("Adding display data") + processed_source_data["Display"] = dict(folder_path=str(session_paths.task_behavior_data)) + processed_conversion_options["Display"] = dict() + + # Create processed data converter + processed_converter = nwb_converter.NWBConverter( + source_data=processed_source_data, + sync_dir=session_paths.sync_pulses, + ) + + # Add datetime and subject name to processed converter + metadata = processed_converter.get_metadata() + metadata["NWBFile"]["session_id"] = session_id + metadata["Subject"]["subject_id"] = subject + metadata["Subject"]["sex"] = _SUBJECT_TO_SEX[subject] + metadata["Subject"]["age"] = _SUBJECT_TO_AGE[subject] + + # EcePhys + probe_metadata_file = session_paths.data_open_source / "probes.metadata.json" + with open(probe_metadata_file, "r") as f: + probe_metadata = json.load(f) + neuropixel_metadata = [x for x in probe_metadata if x["probe_type"] == "Neuropixels"][0] + for entry in metadata["Ecephys"]["ElectrodeGroup"]: + if entry["device"] == "Neuropixel-Imec": + # TODO: uncomment when fixed in pynwb + # entry.update(dict(position=[( + # neuropixel_metadata['coordinates'][0], + # neuropixel_metadata['coordinates'][1], + # neuropixel_metadata['depth_from_surface'], + # )] + logging.info("\n\n") + logging.warning(" PROBE COORDINATES NOT IMPLEMENTED\n\n") + + # Update default metadata with the editable in the corresponding yaml file + editable_metadata_path = Path(__file__).parent / "metadata.yaml" + editable_metadata = load_dict_from_file(editable_metadata_path) + metadata = dict_deep_update(metadata, editable_metadata) + + # Check if session_start_time was found/set + if "session_start_time" not in metadata["NWBFile"]: + try: + date = datetime.datetime.strptime(session, "%Y-%m-%d") + date = date.replace(tzinfo=ZoneInfo("US/Eastern")) + except: + raise ValueError("Session start time was not auto-detected. Please provide it " "in `metadata.yaml`") + metadata["NWBFile"]["session_start_time"] = date + + # Run conversion + logging.info("Running processed conversion") + processed_converter.run_conversion( + metadata=metadata, + nwbfile_path=processed_nwb_path, + conversion_options=processed_conversion_options, + overwrite=overwrite, + ) + + logging.info("Running raw data conversion") + metadata["NWBFile"]["identifier"] = str(uuid4()) + raw_converter = nwb_converter.NWBConverter( + source_data=raw_source_data, + sync_dir=str(session_paths.sync_pulses), + ) + raw_converter.run_conversion( + metadata=metadata, + nwbfile_path=raw_nwb_path, + conversion_options=raw_conversion_options, + overwrite=overwrite, + ) + + # Upload to DANDI + if dandiset_id is not None: + logging.info(f"Uploading to dandiset id {dandiset_id}") + automatic_dandi_upload( + dandiset_id=dandiset_id, + nwb_folder_path=session_paths.output, + ) + + +if __name__ == "__main__": + """Run session conversion.""" + subject = sys.argv[1] + session = sys.argv[2] + logging.info(f"\nStarting conversion for {subject}/{session}\n") + session_to_nwb( + subject=subject, + session=session, + stub_test=_STUB_TEST, + overwrite=_OVERWRITE, + dandiset_id=_DANDISET_ID, + ) + logging.info(f"\nFinished conversion for {subject}/{session}\n") diff --git a/src/jazayeri_lab_to_nwb/piccato/metadata.yaml b/src/jazayeri_lab_to_nwb/piccato/metadata.yaml new file mode 100644 index 0000000..0ed943b --- /dev/null +++ b/src/jazayeri_lab_to_nwb/piccato/metadata.yaml @@ -0,0 +1,15 @@ +NWBFile: + # related_publications: # no pubs yet + # - https://doi.org/12345 + session_description: + Data from macaque performing multi-object working memory task. Subject is + presented with multiple objects at different locations on a screen. After a + delay, the subject is then cued with one of the objects, now displayed at + the center of the screen. Subject should respond by saccading to the + location of the cued object at its initial presentation. + institution: MIT + lab: Jazayeri + experimenter: + - Watters, Nicholas +Subject: + species: Macaca mulatta diff --git a/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py b/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py new file mode 100644 index 0000000..5fa299f --- /dev/null +++ b/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py @@ -0,0 +1,137 @@ +"""Primary NWBConverter class for this dataset.""" +import json +import logging +from pathlib import Path +from typing import Optional + +import numpy as np +from display_interface import DisplayInterface +from neuroconv import NWBConverter +from neuroconv.datainterfaces import ( + KiloSortSortingInterface, + SpikeGLXRecordingInterface, +) +from neuroconv.datainterfaces.ecephys.basesortingextractorinterface import ( + BaseSortingExtractorInterface, +) +from neuroconv.utils import FolderPathType +from recording_interface import DatRecordingInterface +from spikeinterface.core.waveform_tools import has_exceeding_spikes +from spikeinterface.curation import remove_excess_spikes +from timeseries_interface import ( + AudioInterface, + EyePositionInterface, + PupilSizeInterface, + RewardLineInterface, +) +from trials_interface import TrialsInterface + + +class NWBConverter(NWBConverter): + """Primary conversion class for extracellular electrophysiology dataset.""" + + data_interface_classes = dict( + RecordingVP0=DatRecordingInterface, + SortingVP0=KiloSortSortingInterface, + RecordingVP1=DatRecordingInterface, + SortingVP1=KiloSortSortingInterface, + RecordingNP=SpikeGLXRecordingInterface, + LF=SpikeGLXRecordingInterface, + SortingNP=KiloSortSortingInterface, + EyePosition=EyePositionInterface, + PupilSize=PupilSizeInterface, + RewardLine=RewardLineInterface, + Audio=AudioInterface, + Trials=TrialsInterface, + Display=DisplayInterface, + ) + + def __init__(self, source_data: dict[str, dict], sync_dir: Optional[FolderPathType] = None, verbose: bool = True): + """Validate source_data and initialize all data interfaces.""" + super().__init__(source_data=source_data, verbose=verbose) + self.sync_dir = sync_dir + + unit_name_start = 0 + for name, data_interface in self.data_interface_objects.items(): + if isinstance(data_interface, BaseSortingExtractorInterface): + unit_ids = np.array(data_interface.sorting_extractor.unit_ids) + data_interface.sorting_extractor.set_property( + key="unit_name", + values=(unit_ids + unit_name_start).astype(str), + ) + unit_name_start += np.max(unit_ids) + 1 + + def temporally_align_data_interfaces(self): + logging.info("Temporally aligning data interfaces") + + if self.sync_dir is None: + return + sync_dir = Path(self.sync_dir) + + # openephys alignment + with open(sync_dir / "open_ephys" / "recording_start_time") as f: + open_ephys_start_time = float(f.read().strip()) + with open(sync_dir / "open_ephys" / "transform", "r") as f: + open_ephys_transform = json.load(f) + for i in [0, 1]: + if f"RecordingVP{i}" in self.data_interface_objects: + orig_timestamps = self.data_interface_objects[f"RecordingVP{i}"].get_original_timestamps() + aligned_timestamps = open_ephys_transform["intercept"] + open_ephys_transform["coef"] * ( + open_ephys_start_time + orig_timestamps + ) + self.data_interface_objects[f"RecordingVP{i}"].set_aligned_timestamps(aligned_timestamps) + # openephys sorting alignment + if f"SortingVP{i}" in self.data_interface_objects: + if has_exceeding_spikes( + recording=self.data_interface_objects[f"RecordingVP{i}"].recording_extractor, + sorting=self.data_interface_objects[f"SortingVP{i}"].sorting_extractor, + ): + print( + f"Spikes exceeding recording found in SortingVP{i}! " + "Removing with `spikeinterface.curation.remove_excess_spikes()`" + ) + self.data_interface_objects[f"SortingVP{i}"].sorting_extractor = remove_excess_spikes( + recording=self.data_interface_objects[f"RecordingVP{i}"].recording_extractor, + sorting=self.data_interface_objects[f"SortingVP{i}"].sorting_extractor, + ) + self.data_interface_objects[f"SortingVP{i}"].register_recording( + self.data_interface_objects[f"RecordingVP{i}"] + ) + + # neuropixel alignment + orig_timestamps = self.data_interface_objects["RecordingNP"].get_original_timestamps() + with open(sync_dir / "spikeglx" / "transform", "r") as f: + spikeglx_transform = json.load(f) + aligned_timestamps = spikeglx_transform["intercept"] + spikeglx_transform["coef"] * orig_timestamps + self.data_interface_objects["RecordingNP"].set_aligned_timestamps(aligned_timestamps) + # neuropixel LFP alignment + orig_timestamps = self.data_interface_objects["LF"].get_original_timestamps() + aligned_timestamps = spikeglx_transform["intercept"] + spikeglx_transform["coef"] * orig_timestamps + self.data_interface_objects["LF"].set_aligned_timestamps(aligned_timestamps) + # neuropixel sorting alignment + if "SortingNP" in self.data_interface_objects: + if has_exceeding_spikes( + recording=self.data_interface_objects["RecordingNP"].recording_extractor, + sorting=self.data_interface_objects["SortingNP"].sorting_extractor, + ): + print( + "Spikes exceeding recording found in SortingNP! " + "Removing with `spikeinterface.curation.remove_excess_spikes()`" + ) + self.data_interface_objects["SortingNP"].sorting_extractor = remove_excess_spikes( + recording=self.data_interface_objects["RecordingNP"].recording_extractor, + sorting=self.data_interface_objects["SortingNP"].sorting_extractor, + ) + self.data_interface_objects["SortingNP"].register_recording(self.data_interface_objects["RecordingNP"]) + + # align recording start to 0 + aligned_start_times = [] + for name, data_interface in self.data_interface_objects.items(): + start_time = data_interface.get_timestamps()[0] + aligned_start_times.append(start_time) + zero_time = -1.0 * min(aligned_start_times) + for name, data_interface in self.data_interface_objects.items(): + if isinstance(data_interface, BaseSortingExtractorInterface): + # Do not need to align because recording will be aligned + continue + start_time = data_interface.set_aligned_starting_time(aligned_starting_time=zero_time) diff --git a/src/jazayeri_lab_to_nwb/piccato/recording_interface.py b/src/jazayeri_lab_to_nwb/piccato/recording_interface.py new file mode 100644 index 0000000..216bfaf --- /dev/null +++ b/src/jazayeri_lab_to_nwb/piccato/recording_interface.py @@ -0,0 +1,97 @@ +"""Primary class for recording data.""" +import json +from typing import Optional + +import numpy as np +import probeinterface +from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import ( + BaseRecordingExtractorInterface, +) +from neuroconv.utils import FilePathType +from spikeinterface import BaseRecording + + +class DatRecordingInterface(BaseRecordingExtractorInterface): + ExtractorName = "BinaryRecordingExtractor" + + def __init__( + self, + file_path: FilePathType, + verbose: bool = True, + es_key: str = "ElectricalSeries", + channel_count: int = 64, + dtype: str = "int16", + t_start: float = 0.0, + sampling_frequency: float = 30000.0, + channel_ids: Optional[list] = None, + gain_to_uv: list = 1.0, + offset_to_uv: list = 0.0, + probe_metadata_file: Optional[FilePathType] = None, + probe_name: str = "vprobe", + probe_key: Optional[str] = None, + ): + source_data = { + "file_paths": [file_path], + "sampling_frequency": sampling_frequency, + "num_channels": channel_count, + "t_starts": [t_start], + "channel_ids": channel_ids, + "gain_to_uV": gain_to_uv, + "offset_to_uV": offset_to_uv, + "dtype": dtype, + } + super().__init__(verbose=verbose, es_key=es_key, **source_data) + + # this is used for metadata naming + self.probe_name = probe_name + + # add probe information + probe_metadata = None + if probe_metadata_file is not None and probe_key is not None: + with open(probe_metadata_file, "r") as f: + all_probe_metadata = json.load(f) + for entry in all_probe_metadata: + if entry["label"] == probe_key: + probe_metadata = entry + + if probe_metadata is not None and "electrodes_locations" in probe_metadata: + # Grab electrode position from metadata + locations_array = np.array(probe_metadata["electrodes_locations"]) + ndim = locations_array.shape[1] + probe = probeinterface.Probe(ndim=ndim) + probeinterface.set_contacts(locations_array) + else: + # Generate V-probe geometry: 64 channels arranged vertically with 50 um spacing + probe = probeinterface.generate_linear_probe(num_elec=channel_count, ypitch=50) + probe.set_device_channel_indices(np.arange(channel_count)) + probe.name = probe_name + + # set probe to interface recording + self.set_probe(probe, group_mode="by_probe") + + # set group_name property to match electrode group name in metadata + self.recording_extractor.set_property( + key="group_name", + values=[probe_name] * len(self.recording_extractor.channel_ids), + ) + + def get_metadata(self) -> dict: + metadata = super().get_metadata() + metadata["Ecephys"]["Device"] = [ + dict( + name=self.probe_name, + description="64-channel Plexon V-Probe", + manufacturer="Plexon", + ) + ] + electrode_groups = [ + dict( + name=self.probe_name, + description=f"a group representing electrodes on {self.probe_name}", + location="unknown", + device=self.probe_name, + ) + ] + metadata["Ecephys"]["ElectrodeGroup"] = electrode_groups + + return metadata diff --git a/src/jazayeri_lab_to_nwb/piccato/requirements.txt b/src/jazayeri_lab_to_nwb/piccato/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py new file mode 100644 index 0000000..20e1a57 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py @@ -0,0 +1,200 @@ +"""Primary classes for timeseries variables. + +The classes here handle variables like eye position, reward line, and audio +stimuli that are not necessarily tied to the trial structure of display updates. +For trial structured variables, see ../trials_interface.py. For variables +pertaining to display updates, see ../frames_interface.py. +""" +import json +from pathlib import Path + +import numpy as np +from hdmf.backends.hdf5 import H5DataIO +from ndx_events import LabeledEvents +from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface +from neuroconv.tools.nwb_helpers import get_module +from neuroconv.utils import FolderPathType +from pynwb import NWBFile, TimeSeries +from pynwb.behavior import SpatialSeries + + +class TimestampsFromArrayInterface(BaseTemporalAlignmentInterface): + """Interface implementing temporal alignment functions with timestamps.""" + + def __init__(self, folder_path: FolderPathType): + super().__init__(folder_path=folder_path) + + def set_original_timestamps(self, original_timestamps: np.ndarray) -> None: + self._original_timestamps = original_timestamps + self._timestamps = np.copy(original_timestamps) + + def get_original_timestamps(self) -> np.ndarray: + return self._original_timestamps + + def set_aligned_timestamps(self, aligned_timestamps: np.ndarray) -> None: + self._timestamps = aligned_timestamps + + def get_timestamps(self): + return self._timestamps + + +class EyePositionInterface(TimestampsFromArrayInterface): + """Eye position interface.""" + + def __init__(self, folder_path: FolderPathType): + folder_path = Path(folder_path) + super().__init__(folder_path=folder_path) + + # Find eye position files and check they all exist + eye_h_file = folder_path / "eye_h_calibrated.json" + eye_v_file = folder_path / "eye_v_calibrated.json" + assert eye_h_file.exists(), f"Could not find {eye_h_file}" + assert eye_v_file.exists(), f"Could not find {eye_v_file}" + + # Load eye data + eye_h_data = json.load(open(eye_h_file, "r")) + eye_v_data = json.load(open(eye_v_file, "r")) + eye_h_times = np.array(eye_h_data["times"]) + eye_h_values = 0.5 + (np.array(eye_h_data["values"]) / 20) + eye_v_times = np.array(eye_v_data["times"]) + eye_v_values = 0.5 + (np.array(eye_v_data["values"]) / 20) + + # Check eye_h and eye_v have the same number of samples + if len(eye_h_times) != len(eye_v_times): + raise ValueError(f"len(eye_h_times) = {len(eye_h_times)}, but len(eye_v_times) " f"= {len(eye_v_times)}") + # Check that eye_h_times and eye_v_times are similar to within 0.5ms + if not np.allclose(eye_h_times, eye_v_times, atol=0.0005): + raise ValueError("eye_h_times and eye_v_times are not sufficiently similar") + + # Set data attributes + self.set_original_timestamps(eye_h_times) + self._eye_pos = np.stack([eye_h_values, eye_v_values], axis=1) + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + # Make SpatialSeries + eye_position = SpatialSeries( + name="eye_position", + data=H5DataIO(self._eye_pos, compression="gzip"), + reference_frame="(0,0) is bottom left corner of screen", + unit="meters", + conversion=0.257, + timestamps=H5DataIO(self._timestamps, compression="gzip"), + description="Eye position data recorded by EyeLink camera", + ) + + # Get processing module + module_description = "Contains behavioral data from experiment." + processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) + + # Add data to module + processing_module.add_data_interface(eye_position) + + return nwbfile + + +class PupilSizeInterface(TimestampsFromArrayInterface): + """Pupil size interface.""" + + def __init__(self, folder_path: FolderPathType): + # Find pupil size file + folder_path = Path(folder_path) + pupil_size_file = folder_path / "pupil_size_r.json" + assert pupil_size_file.exists(), f"Could not find {pupil_size_file}" + + # Load pupil size data and set data attributes + pupil_size_data = json.load(open(pupil_size_file, "r")) + self.set_original_timestamps(np.array(pupil_size_data["times"])) + self._pupil_size = np.array(pupil_size_data["values"]) + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + # Make TimeSeries + pupil_size = TimeSeries( + name="pupil_size", + data=H5DataIO(self._pupil_size, compression="gzip"), + unit="pixels", + conversion=1.0, + timestamps=H5DataIO(self._timestamps, compression="gzip"), + description="Pupil size data recorded by EyeLink camera", + ) + + # Get processing module + module_description = "Contains behavioral data from experiment." + processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) + + # Add data to module + processing_module.add_data_interface(pupil_size) + + return nwbfile + + +class RewardLineInterface(TimestampsFromArrayInterface): + """Reward line interface.""" + + def __init__(self, folder_path: FolderPathType): + # Find reward line file + folder_path = Path(folder_path) + reward_line_file = folder_path / "reward_line.json" + assert reward_line_file.exists(), f"Could not find {reward_line_file}" + + # Load reward line data and set data attributes + reward_line_data = json.load(open(reward_line_file, "r")) + self.set_original_timestamps(np.array(reward_line_data["times"])) + self._reward_line = reward_line_data["values"] + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + # Make LabeledEvents + reward_line = LabeledEvents( + name="reward_line", + description=("Reward line data representing events of reward dispenser"), + timestamps=H5DataIO(self._timestamps, compression="gzip"), + data=self._reward_line, + labels=["closed", "open"], + ) + + # Get processing module + module_description = "Contains audio and reward data from experiment." + processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) + + # Add data to module + processing_module.add_data_interface(reward_line) + + return nwbfile + + +class AudioInterface(TimestampsFromArrayInterface): + """Audio interface.""" + + SOUNDS = ["failure_sound", "success_sound"] + + def __init__(self, folder_path: FolderPathType): + # Find sound file + folder_path = Path(folder_path) + sound_file = folder_path / "sound.json" + assert sound_file.exists(), f"Could not find {sound_file}" + + # Load sound data and set data attributes + sound_data = json.load(open(sound_file, "r")) + self.set_original_timestamps(np.array(sound_data["times"])) + audio = np.array(sound_data["values"]) + + sound_to_code = {k: i for i, k in enumerate(AudioInterface.SOUNDS)} + self._sound_codes = [sound_to_code[x] for x in audio] + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + # Make LabeledEvents + audio = LabeledEvents( + name="audio", + description="Audio data representing auditory stimuli events", + timestamps=H5DataIO(self._timestamps, compression="gzip"), + data=self._sound_codes, + labels=AudioInterface.SOUNDS, + ) + + # Get processing module + module_description = "Contains audio and reward data from experiment." + processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) + + # Add data to module + processing_module.add_data_interface(audio) + + return nwbfile diff --git a/src/jazayeri_lab_to_nwb/piccato/trials_interface.py b/src/jazayeri_lab_to_nwb/piccato/trials_interface.py new file mode 100644 index 0000000..c030938 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/piccato/trials_interface.py @@ -0,0 +1,167 @@ +"""Class for converting trial-structured data.""" +import json +from pathlib import Path +from typing import Optional + +import numpy as np +import pandas as pd +from neuroconv.datainterfaces.text.timeintervalsinterface import TimeIntervalsInterface +from neuroconv.utils import FolderPathType +from pynwb import NWBFile + + +class TrialsInterface(TimeIntervalsInterface): + """Class for converting trial-structured data. + + All events that occur exactly once per trial are contained in this + interface. + """ + + KEY_MAP = { + "background_indices": "background_indices", + "broke_fixation": "broke_fixation", + "stimulus_object_identities": "stimulus_object_identities", + "stimulus_object_positions": "stimulus_object_positions", + "stimulus_object_velocities": "stimulus_object_velocities", + "stimulus_object_target": "stimulus_object_target", + "delay_object_blanks": "delay_object_blanks", + "closed_loop_response_position": "closed_loop_response_position", + "closed_loop_response_time": "closed_loop_response_time", + "time_start": "start_time", + "time_phase_fixation": "phase_fixation_time", + "time_phase_stimulus": "phase_stimulus_time", + "time_phase_delay": "phase_delay_time", + "time_phase_cue": "phase_cue_time", + "time_phase_response": "phase_response_time", + "time_phase_reveal": "phase_reveal_time", + "time_phase_iti": "phase_iti_time", + "reward_time": "reward_time", + "reward_duration": "reward_duration", + "response_position": "response_position", + "response_time": "response_time", + } + + def __init__(self, folder_path: FolderPathType, verbose: bool = True): + super().__init__(file_path=folder_path, verbose=verbose) + + def get_metadata(self) -> dict: + metadata = super().get_metadata() + metadata["TimeIntervals"] = dict( + trials=dict( + table_name="trials", + table_description="data about each trial", + ) + ) + return metadata + + def get_timestamps(self) -> np.ndarray: + return super(TrialsInterface, self).get_timestamps(column="start_time") + + def set_aligned_starting_time(self, aligned_starting_time: float) -> None: + self.dataframe.closed_loop_response_time += aligned_starting_time + self.dataframe.start_time += aligned_starting_time + self.dataframe.phase_fixation_time += aligned_starting_time + self.dataframe.phase_stimulus_time += aligned_starting_time + self.dataframe.phase_delay_time += aligned_starting_time + self.dataframe.phase_cue_time += aligned_starting_time + self.dataframe.phase_response_time += aligned_starting_time + self.dataframe.phase_reveal_time += aligned_starting_time + self.dataframe.phase_iti_time += aligned_starting_time + self.dataframe.reward_time += aligned_starting_time + self.dataframe.response_time += aligned_starting_time + + def _read_file(self, file_path: FolderPathType): + # Create dataframe with data for each trial + trials = json.load(open(Path(file_path) / "trials.json", "r")) + trials = {k_mapped: [d[k] for d in trials] for k, k_mapped in TrialsInterface.KEY_MAP.items()} + + # Field closed_loop_response_position may have None values, so replace + # those with NaN to make hdf5 conversion work + trials["closed_loop_response_position"] = [ + [np.nan, np.nan] if x is None else x for x in trials["closed_loop_response_position"] + ] + + # Serialize fields with variable-length lists for hdf5 conversion + for k in [ + "stimulus_object_identities", + "stimulus_object_positions", + "stimulus_object_velocities", + "stimulus_object_target", + ]: + trials[k] = [json.dumps(x) for x in trials[k]] + + return pd.DataFrame(trials) + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None, tag: str = "trials"): + return super(TrialsInterface, self).add_to_nwbfile( + nwbfile=nwbfile, + metadata=metadata, + tag=tag, + column_descriptions=self.column_descriptions, + ) + + @property + def column_descriptions(self): + column_descriptions = { + "background_indices": ("For each trial, the indices of the background noise pattern " "patch."), + "broke_fixation": ("For each trial, whether the subject broke fixation and the " "trial was aborted"), + "stimulus_object_identities": ( + "For each trial, a serialized list with one element for each " + 'object. Each element is the identity symbol (e.g. "a", "b", ' + '"c", ...) of the corresponding object.' + ), + "stimulus_object_positions": ( + "For each trial, a serialized list with one element for each " + "object. Each element is the initial (x, y) position of the " + "corresponding object, in coordinates of arena width." + ), + "stimulus_object_velocities": ( + "For each trial, a serialized list with one element for each " + "object. Each element is the initial (dx/dt, dy/dt) velocity " + "of the corresponding object, in units of arena width per " + "display update." + ), + "stimulus_object_target": ( + "For each trial, a serialized list with one element for each " + "object. Each element is a boolean indicating whether the " + "corresponding object is ultimately the cued target." + ), + "delay_object_blanks": ( + "For each trial, a boolean indicating whether the objects were " + "rendered as blank discs during the delay phase." + ), + "closed_loop_response_position": ( + "For each trial, the position of the response saccade used by " + "the closed-loop game engine. This is used for determining " + "reward." + ), + "closed_loop_response_time": ( + "For each trial, the time of the response saccade used by " + "the closed-loop game engine. This is used for the timing of " + "reward delivery." + ), + "start_time": "Start time of each trial.", + "phase_fixation_time": ("Time of fixation phase onset for each trial."), + "phase_stimulus_time": ("Time of stimulus phase onset for each trial."), + "phase_delay_time": "Time of delay phase onset for each trial.", + "phase_cue_time": "Time of cue phase onset for each trial.", + "phase_response_time": ("Time of response phase onset for each trial."), + "phase_reveal_time": "Time of reveal phase onset for each trial.", + "phase_iti_time": ("Time of inter-trial interval onset for each trial."), + "reward_time": "Time of reward delivery onset for each trial.", + "reward_duration": "Reward duration for each trial", + "response_position": ( + "Response position for each trial. This differs from " + "closed_loop_response_position in that this is calculated " + "post-hoc from high-resolution eye tracking data, hence is " + "more accurate." + ), + "response_time": ( + "Response time for each trial. This differs from " + "closed_loop_response_time in that this is calculated post-hoc " + "from high-resolution eye tracking data, hence is more " + "accurate." + ), + } + + return column_descriptions From 981faf59f3db8291f11809a8338be41ba72c16be Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Tue, 19 Dec 2023 17:17:44 -0500 Subject: [PATCH 02/28] Everything up to line 280 in main_convert_session works. Need to generate MWorks trial variables --- .../piccato/get_session_paths.py | 17 ++++---- .../piccato/main_convert_session.py | 42 +++++++++++-------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py index 1320725..ea5d314 100644 --- a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py +++ b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py @@ -7,6 +7,8 @@ "elgar": "elgar", } +OM_PATH = '/om2/user/apiccato/phys_preprocessing_open_source/phys_data' + SessionPaths = collections.namedtuple( "SessionPaths", [ @@ -25,17 +27,16 @@ def _get_session_paths_openmind(subject, session): # subject_id = SUBJECT_NAME_TO_ID[subject] # Path to write output nwb files to - output_path = f"/om2/user/apiccato/nwb_data/staging/sub-{subject}" + output_path = pathlib.Path( + f"/om2/user/apiccato/nwb_data/staging/sub-{subject}" + ) # Path to the raw data. This is used for reading raw physiology data. - raw_data_path = pathlib.Path( - "/om4/group/jazlab/apiccato/", - "phys_preprocessing_open_source/phys_data/", - f"{session}/raw_data/{subject}/") + raw_data_path = pathlib.Path(f"{OM_PATH}/{subject}/{session}/raw_data/") # Path to task and behavior data. task_behavior_data_path = pathlib.Path( - "/om4/group/jazlab/apiccato/phys_preprocessing_open_source/", + OM_PATH, f"{subject}/{session}" ) @@ -47,13 +48,13 @@ def _get_session_paths_openmind(subject, session): # Path to sync pulses. This is used for reading timescale transformations # between physiology and mworks data streams. sync_pulses_path = pathlib.Path( - "/om4/group/jazlab/phys_preprocessing_open_source", + OM_PATH, f"{subject}/{session}/sync_signals" ) # Path to spike sorting. This is used for reading spike sorted data. spike_sorting_raw_path = pathlib.Path( - "/om4/group/jazlab/apiccato/phys_preprocessing_open_source/" + OM_PATH, f"{subject}/{session}/spike_sorting" ) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 2606802..20c31dd 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -122,15 +122,11 @@ def _add_spikeglx_data( logging.info("Adding SpikeGLX data") # Raw data - spikeglx_dir = [x for x in (session_paths.raw_data / "spikeglx").iterdir() if "settling" not in str(x)] - if len(spikeglx_dir) == 0: - logging.info("Found no SpikeGLX data") - elif len(spikeglx_dir) == 1: - spikeglx_dir = spikeglx_dir[0] - else: - raise ValueError(f"Found multiple spikeglx directories {spikeglx_dir}") - ap_file = _get_single_file(spikeglx_dir, suffix="/*.ap.bin") - lfp_file = _get_single_file(spikeglx_dir, suffix="/*.lf.bin") + spikeglx_dir = Path(_get_single_file( + session_paths.raw_data/"spikeglx", + suffix='imec0')) + ap_file = _get_single_file(spikeglx_dir, suffix="*.ap.bin") + lfp_file = _get_single_file(spikeglx_dir, suffix="*.lf.bin") raw_source_data["RecordingNP"] = dict(file_path=ap_file) raw_source_data["LF"] = dict(file_path=lfp_file) processed_source_data["RecordingNP"] = dict(file_path=ap_file) @@ -141,13 +137,15 @@ def _add_spikeglx_data( processed_conversion_options["LF"] = dict(stub_test=stub_test) # Processed data - sorting_path = session_paths.spike_sorting_raw / "np_0" / "ks_3_output_v2" + sorting_path = (session_paths.spike_sorting_raw / + "spikeglx/kilosort2_5/sorter_output" + ) processed_source_data["SortingNP"] = dict( folder_path=str(sorting_path), keep_good_only=False, ) - processed_conversion_options["SortingNP"] = dict(stub_test=stub_test, write_as="processing") - + processed_conversion_options["SortingNP"] = dict(stub_test=stub_test, + write_as="processing") def session_to_nwb( subject: str, @@ -207,12 +205,19 @@ def session_to_nwb( session_id = f"{session}-stub" else: session_id = f"{session}" - raw_nwb_path = session_paths.output / f"sub-{subject}_ses-{session_id}_ecephys.nwb" - processed_nwb_path = session_paths.output / f"sub-{subject}_ses-{session_id}_behavior+ecephys.nwb" + raw_nwb_path = ( + session_paths.output / + f"sub-{subject}_ses-{session_id}_ecephys.nwb" + ) + processed_nwb_path = ( + session_paths.output / + f"sub-{subject}_ses-{session_id}_behavior+ecephys.nwb" + ) logging.info(f"raw_nwb_path = {raw_nwb_path}") logging.info(f"processed_nwb_path = {processed_nwb_path}") logging.info("") + # Initialize empty data dictionaries raw_source_data = {} raw_conversion_options = {} @@ -244,7 +249,7 @@ def session_to_nwb( # Add behavior data logging.info("Adding behavior data") behavior_path = str(session_paths.task_behavior_data) - processed_source_data["EyePosition"] = dict(folder_path=behavior_path) + processed_source_data["EyePosition"] = dict(folder_path=behavior_path) processed_conversion_options["EyePosition"] = dict() processed_source_data["PupilSize"] = dict(folder_path=behavior_path) processed_conversion_options["PupilSize"] = dict() @@ -255,12 +260,14 @@ def session_to_nwb( # Add trials data logging.info("Adding trials data") - processed_source_data["Trials"] = dict(folder_path=str(session_paths.task_behavior_data)) + processed_source_data["Trials"] = dict( + folder_path=str(session_paths.task_behavior_data)) processed_conversion_options["Trials"] = dict() # Add display data logging.info("Adding display data") - processed_source_data["Display"] = dict(folder_path=str(session_paths.task_behavior_data)) + processed_source_data["Display"] = dict( + folder_path=str(session_paths.task_behavior_data)) processed_conversion_options["Display"] = dict() # Create processed data converter @@ -269,6 +276,7 @@ def session_to_nwb( sync_dir=session_paths.sync_pulses, ) + return # Add datetime and subject name to processed converter metadata = processed_converter.get_metadata() metadata["NWBFile"]["session_id"] = session_id From c280948f3de5ab9fee3530c3e0c89b6359dadb82 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Tue, 2 Jan 2024 16:16:21 -0500 Subject: [PATCH 03/28] reading in display and timeseries interface data --- .../piccato/display_interface.py | 33 +++++-------- .../piccato/get_session_paths.py | 25 ++++------ .../piccato/main_convert_session.py | 16 +++---- .../piccato/timeseries_interface.py | 41 ++++++++++------ .../piccato/trials_interface.py | 48 ++++++++++--------- 5 files changed, 81 insertions(+), 82 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/display_interface.py b/src/jazayeri_lab_to_nwb/piccato/display_interface.py index f5ecadd..fdfcf1a 100644 --- a/src/jazayeri_lab_to_nwb/piccato/display_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/display_interface.py @@ -6,7 +6,9 @@ import numpy as np import pandas as pd -from neuroconv.datainterfaces.text.timeintervalsinterface import TimeIntervalsInterface +from neuroconv.datainterfaces.text.timeintervalsinterface import ( + TimeIntervalsInterface +) from neuroconv.utils import FolderPathType from pynwb import NWBFile @@ -19,8 +21,6 @@ class DisplayInterface(TimeIntervalsInterface): """ KEY_MAP = { - "frame_object_positions": "object_positions", - "frame_fixation_cross_scale": "fixation_cross_scale", "frame_closed_loop_gaze_position": "closed_loop_eye_position", "frame_task_phase": "task_phase", "frame_display_times": "start_time", @@ -40,24 +40,28 @@ def get_metadata(self) -> dict: return metadata def get_timestamps(self) -> np.ndarray: - return super(DisplayInterface, self).get_timestamps(column="start_time") + return super(DisplayInterface, self).get_timestamps( + column="start_time") def set_aligned_starting_time(self, aligned_starting_time: float) -> None: self.dataframe.start_time += aligned_starting_time def _read_file(self, file_path: FolderPathType): + # Create dataframe with data for each frame trials = json.load(open(Path(file_path) / "trials.json", "r")) frames = { - k_mapped: list(itertools.chain(*[d[k] for d in trials])) for k, k_mapped in DisplayInterface.KEY_MAP.items() + k_mapped: list(itertools.chain(*[d[k] for d in trials])) + for k, k_mapped in DisplayInterface.KEY_MAP.items() } - # Serialize object_positions data for hdf5 conversion to work - frames["object_positions"] = [json.dumps(x) for x in frames["object_positions"]] - + for k in frames.keys(): + print(k, len(frames[k])) + import pdb; pdb.set_trace() return pd.DataFrame(frames) - def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None, tag: str = "display"): + def add_to_nwbfile(self, nwbfile: NWBFile, + metadata: Optional[dict] = None, tag: str = "display"): return super(DisplayInterface, self).add_to_nwbfile( nwbfile=nwbfile, metadata=metadata, @@ -68,17 +72,6 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None, tag: @property def column_descriptions(self): column_descriptions = { - "object_positions": ( - "For each frame, a serialized list with one element for each " - "object. Each element is an (x, y) position of the " - "corresponding object, in coordinates of arena width." - ), - "fixation_cross_scale": ( - "For each frame, the scale of the central fixation cross. " - "Fixation cross scale grows as the eye position deviates from " - "the center of the fixation cross, to provide a cue to " - "maintain good fixation." - ), "closed_loop_eye_position": ( "For each frame, the eye position in the close-loop task " "engine. This was used to for real-time eye position " diff --git a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py index ea5d314..d5597bb 100644 --- a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py +++ b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py @@ -14,8 +14,7 @@ [ "output", "raw_data", - # "data_open_source", - "task_behavior_data", + "behavior_task_data", "sync_pulses", "spike_sorting_raw", ], @@ -35,34 +34,25 @@ def _get_session_paths_openmind(subject, session): raw_data_path = pathlib.Path(f"{OM_PATH}/{subject}/{session}/raw_data/") # Path to task and behavior data. - task_behavior_data_path = pathlib.Path( - OM_PATH, - f"{subject}/{session}" + behavior_task_data_path = pathlib.Path( + f"{OM_PATH}/{subject}/{session}/behavior_task" ) - # # Path to open-source data. This is used for reading behavior and task data. - # data_open_source_path = ( - # "/om4/group/jazlab/nwatters/multi_prediction/datasets/data_open_source/" f"Subjects/{subject_id}/{session}/001" - # ) - # Path to sync pulses. This is used for reading timescale transformations # between physiology and mworks data streams. sync_pulses_path = pathlib.Path( - OM_PATH, - f"{subject}/{session}/sync_signals" + f"{OM_PATH}/{subject}/{session}/sync_signals" ) # Path to spike sorting. This is used for reading spike sorted data. spike_sorting_raw_path = pathlib.Path( - OM_PATH, - f"{subject}/{session}/spike_sorting" + f"{OM_PATH}/{subject}/{session}/spike_sorting" ) session_paths = SessionPaths( output=output_path, raw_data=raw_data_path, - # data_open_source=pathlib.Path(data_open_source_path), - task_behavior_data=task_behavior_data_path, + behavior_task_data=pathlib.Path(behavior_task_data_path), sync_pulses=sync_pulses_path, spike_sorting_raw=spike_sorting_raw_path, ) @@ -99,7 +89,8 @@ def _get_session_paths_globus(subject, session): output=pathlib.Path(output_path), raw_data=pathlib.Path(raw_data_path), data_open_source=pathlib.Path(data_open_source_path), - task_behavior_data=pathlib.Path(task_behavior_data_path), + behavior_data=pathlib.Path(task_behavior_data_path), + task_data=pathlib.Path(task_behavior_data_path), sync_pulses=pathlib.Path(sync_pulses_path), spike_sorting_raw=pathlib.Path(spike_sorting_raw_path), ) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 20c31dd..15750da 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -147,6 +147,7 @@ def _add_spikeglx_data( processed_conversion_options["SortingNP"] = dict(stub_test=stub_test, write_as="processing") + def session_to_nwb( subject: str, session: str, @@ -217,7 +218,6 @@ def session_to_nwb( logging.info(f"processed_nwb_path = {processed_nwb_path}") logging.info("") - # Initialize empty data dictionaries raw_source_data = {} raw_conversion_options = {} @@ -248,26 +248,26 @@ def session_to_nwb( # Add behavior data logging.info("Adding behavior data") - behavior_path = str(session_paths.task_behavior_data) - processed_source_data["EyePosition"] = dict(folder_path=behavior_path) + behavior_task_path = str(session_paths.behavior_task_data) + processed_source_data["EyePosition"] = dict(folder_path=behavior_task_path) processed_conversion_options["EyePosition"] = dict() - processed_source_data["PupilSize"] = dict(folder_path=behavior_path) + processed_source_data["PupilSize"] = dict(folder_path=behavior_task_path) processed_conversion_options["PupilSize"] = dict() - processed_source_data["RewardLine"] = dict(folder_path=behavior_path) + processed_source_data["RewardLine"] = dict(folder_path=behavior_task_path) processed_conversion_options["RewardLine"] = dict() - processed_source_data["Audio"] = dict(folder_path=behavior_path) + processed_source_data["Audio"] = dict(folder_path=behavior_task_path) processed_conversion_options["Audio"] = dict() # Add trials data logging.info("Adding trials data") processed_source_data["Trials"] = dict( - folder_path=str(session_paths.task_behavior_data)) + folder_path=str(session_paths.behavior_task_data)) processed_conversion_options["Trials"] = dict() # Add display data logging.info("Adding display data") processed_source_data["Display"] = dict( - folder_path=str(session_paths.task_behavior_data)) + folder_path=str(session_paths.behavior_task_data)) processed_conversion_options["Display"] = dict() # Create processed data converter diff --git a/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py index 20e1a57..3ee1a71 100644 --- a/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py @@ -1,9 +1,9 @@ """Primary classes for timeseries variables. The classes here handle variables like eye position, reward line, and audio -stimuli that are not necessarily tied to the trial structure of display updates. -For trial structured variables, see ../trials_interface.py. For variables -pertaining to display updates, see ../frames_interface.py. +stimuli that are not necessarily tied to the trial structure of display +updates. For trial structured variables, see ../trials_interface.py. +For variables pertaining to display updates, see ../frames_interface.py. """ import json from pathlib import Path @@ -11,7 +11,9 @@ import numpy as np from hdmf.backends.hdf5 import H5DataIO from ndx_events import LabeledEvents -from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface +from neuroconv.basetemporalalignmentinterface import ( + BaseTemporalAlignmentInterface +) from neuroconv.tools.nwb_helpers import get_module from neuroconv.utils import FolderPathType from pynwb import NWBFile, TimeSeries @@ -46,8 +48,8 @@ def __init__(self, folder_path: FolderPathType): super().__init__(folder_path=folder_path) # Find eye position files and check they all exist - eye_h_file = folder_path / "eye_h_calibrated.json" - eye_v_file = folder_path / "eye_v_calibrated.json" + eye_h_file = folder_path / "eye.h.json" + eye_v_file = folder_path / "eye.v.json" assert eye_h_file.exists(), f"Could not find {eye_h_file}" assert eye_v_file.exists(), f"Could not find {eye_v_file}" @@ -61,10 +63,12 @@ def __init__(self, folder_path: FolderPathType): # Check eye_h and eye_v have the same number of samples if len(eye_h_times) != len(eye_v_times): - raise ValueError(f"len(eye_h_times) = {len(eye_h_times)}, but len(eye_v_times) " f"= {len(eye_v_times)}") + raise ValueError(f"len(eye_h_times) = {len(eye_h_times)}, " + "but len(eye_v_times) " f"= {len(eye_v_times)}") # Check that eye_h_times and eye_v_times are similar to within 0.5ms if not np.allclose(eye_h_times, eye_v_times, atol=0.0005): - raise ValueError("eye_h_times and eye_v_times are not sufficiently similar") + raise ValueError( + "eye_h_times and eye_v_times are not sufficiently similar") # Set data attributes self.set_original_timestamps(eye_h_times) @@ -84,7 +88,8 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): # Get processing module module_description = "Contains behavioral data from experiment." - processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) + processing_module = get_module(nwbfile=nwbfile, name="behavior", + description=module_description) # Add data to module processing_module.add_data_interface(eye_position) @@ -98,7 +103,7 @@ class PupilSizeInterface(TimestampsFromArrayInterface): def __init__(self, folder_path: FolderPathType): # Find pupil size file folder_path = Path(folder_path) - pupil_size_file = folder_path / "pupil_size_r.json" + pupil_size_file = folder_path / "pupil.size.json" assert pupil_size_file.exists(), f"Could not find {pupil_size_file}" # Load pupil size data and set data attributes @@ -119,7 +124,9 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): # Get processing module module_description = "Contains behavioral data from experiment." - processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) + processing_module = get_module(nwbfile=nwbfile, + name="behavior", + description=module_description) # Add data to module processing_module.add_data_interface(pupil_size) @@ -133,7 +140,7 @@ class RewardLineInterface(TimestampsFromArrayInterface): def __init__(self, folder_path: FolderPathType): # Find reward line file folder_path = Path(folder_path) - reward_line_file = folder_path / "reward_line.json" + reward_line_file = folder_path / "reward.line.json" assert reward_line_file.exists(), f"Could not find {reward_line_file}" # Load reward line data and set data attributes @@ -145,7 +152,8 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): # Make LabeledEvents reward_line = LabeledEvents( name="reward_line", - description=("Reward line data representing events of reward dispenser"), + description=( + "Reward line data representing events of reward dispenser"), timestamps=H5DataIO(self._timestamps, compression="gzip"), data=self._reward_line, labels=["closed", "open"], @@ -153,7 +161,9 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): # Get processing module module_description = "Contains audio and reward data from experiment." - processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) + processing_module = get_module(nwbfile=nwbfile, + name="behavior", + description=module_description) # Add data to module processing_module.add_data_interface(reward_line) @@ -192,7 +202,8 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): # Get processing module module_description = "Contains audio and reward data from experiment." - processing_module = get_module(nwbfile=nwbfile, name="behavior", description=module_description) + processing_module = get_module(nwbfile=nwbfile, name="behavior", + description=module_description) # Add data to module processing_module.add_data_interface(audio) diff --git a/src/jazayeri_lab_to_nwb/piccato/trials_interface.py b/src/jazayeri_lab_to_nwb/piccato/trials_interface.py index c030938..c4a90b3 100644 --- a/src/jazayeri_lab_to_nwb/piccato/trials_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/trials_interface.py @@ -22,12 +22,11 @@ class TrialsInterface(TimeIntervalsInterface): "broke_fixation": "broke_fixation", "stimulus_object_identities": "stimulus_object_identities", "stimulus_object_positions": "stimulus_object_positions", - "stimulus_object_velocities": "stimulus_object_velocities", "stimulus_object_target": "stimulus_object_target", - "delay_object_blanks": "delay_object_blanks", "closed_loop_response_position": "closed_loop_response_position", "closed_loop_response_time": "closed_loop_response_time", "time_start": "start_time", + # 'trial_type': 'trial_type', "time_phase_fixation": "phase_fixation_time", "time_phase_stimulus": "phase_stimulus_time", "time_phase_delay": "phase_delay_time", @@ -73,26 +72,30 @@ def set_aligned_starting_time(self, aligned_starting_time: float) -> None: def _read_file(self, file_path: FolderPathType): # Create dataframe with data for each trial trials = json.load(open(Path(file_path) / "trials.json", "r")) - trials = {k_mapped: [d[k] for d in trials] for k, k_mapped in TrialsInterface.KEY_MAP.items()} + trials = {k_mapped: [d[k] for d in trials] + for k, k_mapped in TrialsInterface.KEY_MAP.items()} # Field closed_loop_response_position may have None values, so replace # those with NaN to make hdf5 conversion work trials["closed_loop_response_position"] = [ - [np.nan, np.nan] if x is None else x for x in trials["closed_loop_response_position"] + [np.nan, np.nan] if x is None else x + for x in trials["closed_loop_response_position"] ] # Serialize fields with variable-length lists for hdf5 conversion for k in [ "stimulus_object_identities", "stimulus_object_positions", - "stimulus_object_velocities", "stimulus_object_target", ]: trials[k] = [json.dumps(x) for x in trials[k]] return pd.DataFrame(trials) - def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None, tag: str = "trials"): + def add_to_nwbfile(self, + nwbfile: NWBFile, + metadata: Optional[dict] = None, + tag: str = "trials"): return super(TrialsInterface, self).add_to_nwbfile( nwbfile=nwbfile, metadata=metadata, @@ -103,33 +106,26 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None, tag: @property def column_descriptions(self): column_descriptions = { - "background_indices": ("For each trial, the indices of the background noise pattern " "patch."), - "broke_fixation": ("For each trial, whether the subject broke fixation and the " "trial was aborted"), + "background_indices": ("For each trial, the indices of the " + "background noise pattern patch."), + "broke_fixation": ("For each trial, whether the subject broke " + "fixation and the trial was aborted"), "stimulus_object_identities": ( "For each trial, a serialized list with one element for each " 'object. Each element is the identity symbol (e.g. "a", "b", ' '"c", ...) of the corresponding object.' ), + "trial_type": ("For each trial, whether condition is LTM or STM"), "stimulus_object_positions": ( "For each trial, a serialized list with one element for each " "object. Each element is the initial (x, y) position of the " "corresponding object, in coordinates of arena width." ), - "stimulus_object_velocities": ( - "For each trial, a serialized list with one element for each " - "object. Each element is the initial (dx/dt, dy/dt) velocity " - "of the corresponding object, in units of arena width per " - "display update." - ), "stimulus_object_target": ( "For each trial, a serialized list with one element for each " "object. Each element is a boolean indicating whether the " "corresponding object is ultimately the cued target." ), - "delay_object_blanks": ( - "For each trial, a boolean indicating whether the objects were " - "rendered as blank discs during the delay phase." - ), "closed_loop_response_position": ( "For each trial, the position of the response saccade used by " "the closed-loop game engine. This is used for determining " @@ -141,13 +137,21 @@ def column_descriptions(self): "reward delivery." ), "start_time": "Start time of each trial.", - "phase_fixation_time": ("Time of fixation phase onset for each trial."), - "phase_stimulus_time": ("Time of stimulus phase onset for each trial."), + "phase_fixation_time": ( + "Time of fixation phase onset for each trial." + ), + "phase_stimulus_time": ( + "Time of stimulus phase onset for each trial." + ), "phase_delay_time": "Time of delay phase onset for each trial.", "phase_cue_time": "Time of cue phase onset for each trial.", - "phase_response_time": ("Time of response phase onset for each trial."), + "phase_response_time": ( + "Time of response phase onset for each trial." + ), "phase_reveal_time": "Time of reveal phase onset for each trial.", - "phase_iti_time": ("Time of inter-trial interval onset for each trial."), + "phase_iti_time": ( + "Time of inter-trial interval onset for each trial." + ), "reward_time": "Time of reward delivery onset for each trial.", "reward_duration": "Reward duration for each trial", "response_position": ( From fd88d6d56a258202bcc7b4da231dcc79441b084a Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Tue, 2 Jan 2024 17:04:01 -0500 Subject: [PATCH 04/28] Display interface is now functional --- src/jazayeri_lab_to_nwb/piccato/display_interface.py | 3 --- src/jazayeri_lab_to_nwb/piccato/main_convert_session.py | 9 +++++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/display_interface.py b/src/jazayeri_lab_to_nwb/piccato/display_interface.py index fdfcf1a..59b06cf 100644 --- a/src/jazayeri_lab_to_nwb/piccato/display_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/display_interface.py @@ -55,9 +55,6 @@ def _read_file(self, file_path: FolderPathType): for k, k_mapped in DisplayInterface.KEY_MAP.items() } - for k in frames.keys(): - print(k, len(frames[k])) - import pdb; pdb.set_trace() return pd.DataFrame(frames) def add_to_nwbfile(self, nwbfile: NWBFile, diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 15750da..fc064e2 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -50,10 +50,10 @@ logging.getLogger().setLevel(logging.INFO) _SUBJECT_TO_SEX = { - "Elgar": "M", + "elgar": "M", } _SUBJECT_TO_AGE = { - "Elgar": "P10Y", # Born 5/2/2012 + "elgar": "P10Y", # Born 5/2/2012 } @@ -276,7 +276,6 @@ def session_to_nwb( sync_dir=session_paths.sync_pulses, ) - return # Add datetime and subject name to processed converter metadata = processed_converter.get_metadata() metadata["NWBFile"]["session_id"] = session_id @@ -285,9 +284,11 @@ def session_to_nwb( metadata["Subject"]["age"] = _SUBJECT_TO_AGE[subject] # EcePhys - probe_metadata_file = session_paths.data_open_source / "probes.metadata.json" + probe_metadata_file = (session_paths.data_open_source / + "probes.metadata.json") with open(probe_metadata_file, "r") as f: probe_metadata = json.load(f) + return neuropixel_metadata = [x for x in probe_metadata if x["probe_type"] == "Neuropixels"][0] for entry in metadata["Ecephys"]["ElectrodeGroup"]: if entry["device"] == "Neuropixel-Imec": From daebee030de5317f782ec005d7d9dc8252893b28 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Tue, 2 Jan 2024 22:16:14 -0500 Subject: [PATCH 05/28] Entire conversion pipeline seems to run! --- .../piccato/get_session_paths.py | 6 + .../piccato/logs/34316882.out | 37 +++ .../piccato/logs/34316908.out | 47 ++++ .../piccato/main_convert_session.py | 8 +- .../piccato/main_convert_session.sh | 14 ++ .../piccato/nwb_converter.py | 215 +++++++++++------- 6 files changed, 238 insertions(+), 89 deletions(-) create mode 100644 src/jazayeri_lab_to_nwb/piccato/logs/34316882.out create mode 100644 src/jazayeri_lab_to_nwb/piccato/logs/34316908.out create mode 100644 src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh diff --git a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py index d5597bb..aa5ceff 100644 --- a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py +++ b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py @@ -15,6 +15,7 @@ "output", "raw_data", "behavior_task_data", + "session_data", "sync_pulses", "spike_sorting_raw", ], @@ -49,9 +50,14 @@ def _get_session_paths_openmind(subject, session): f"{OM_PATH}/{subject}/{session}/spike_sorting" ) + session_path = pathlib.Path( + f"{OM_PATH}/{subject}/{session}/" + ) + session_paths = SessionPaths( output=output_path, raw_data=raw_data_path, + session_data=session_path, behavior_task_data=pathlib.Path(behavior_task_data_path), sync_pulses=sync_pulses_path, spike_sorting_raw=spike_sorting_raw_path, diff --git a/src/jazayeri_lab_to_nwb/piccato/logs/34316882.out b/src/jazayeri_lab_to_nwb/piccato/logs/34316882.out new file mode 100644 index 0000000..d9b7372 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/piccato/logs/34316882.out @@ -0,0 +1,37 @@ +INFO:root: +Starting conversion for elgar/2023-11-30 + +INFO:root:stub_test = True +INFO:root:overwrite = True +INFO:root:dandiset_id = None +INFO:root:session_paths: SessionPaths(output=PosixPath('/om2/user/apiccato/nwb_data/staging/sub-elgar'), raw_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/raw_data'), behavior_task_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/behavior_task'), session_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30'), sync_pulses=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/sync_signals'), spike_sorting_raw=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/spike_sorting')) +INFO:root:raw_nwb_path = /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_ecephys.nwb +INFO:root:processed_nwb_path = /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_behavior+ecephys.nwb +INFO:root: +INFO:root:Adding SpikeGLX data +INFO:root:Adding behavior data +INFO:root:Adding trials data +INFO:root:Adding display data +INFO:root: + + +WARNING:root: PROBE COORDINATES NOT IMPLEMENTED + + +INFO:root:Running processed conversion +INFO:root:Temporally aligning data interfaces +/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/spikeinterface/core/baserecording.py:413: UserWarning: Setting times with Recording.set_times() is not recommended because times are not always propagated to across preprocessingUse use this carefully! + warn( +/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/pynwb/file.py:471: UserWarning: Date is missing timezone information. Updating to local timezone. + args_to_set['session_start_time'] = _add_missing_timezone(session_start_time) +/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py:22: UserWarning: Custom description given to get_module does not match existing module description! Ignoring custom description. + warn( +/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py:22: UserWarning: Custom description given to get_module does not match existing module description! Ignoring custom description. + warn( +INFO:root: +Finished conversion for elgar/2023-11-30 + +Source data is valid! +Metadata is valid! +conversion_options is valid! +NWB file saved at /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_behavior+ecephys.nwb! diff --git a/src/jazayeri_lab_to_nwb/piccato/logs/34316908.out b/src/jazayeri_lab_to_nwb/piccato/logs/34316908.out new file mode 100644 index 0000000..a633af9 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/piccato/logs/34316908.out @@ -0,0 +1,47 @@ +INFO:root: +Starting conversion for elgar/2023-11-30 + +INFO:root:stub_test = True +INFO:root:overwrite = True +INFO:root:dandiset_id = None +INFO:root:session_paths: SessionPaths(output=PosixPath('/om2/user/apiccato/nwb_data/staging/sub-elgar'), raw_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/raw_data'), behavior_task_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/behavior_task'), session_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30'), sync_pulses=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/sync_signals'), spike_sorting_raw=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/spike_sorting')) +INFO:root:raw_nwb_path = /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_ecephys.nwb +INFO:root:processed_nwb_path = /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_behavior+ecephys.nwb +INFO:root: +INFO:root:Adding SpikeGLX data +INFO:root:Adding behavior data +INFO:root:Adding trials data +INFO:root:Adding display data +INFO:root: + + +WARNING:root: PROBE COORDINATES NOT IMPLEMENTED + + +INFO:root:Running processed conversion +INFO:root:Temporally aligning data interfaces +/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/spikeinterface/core/baserecording.py:413: UserWarning: Setting times with Recording.set_times() is not recommended because times are not always propagated to across preprocessingUse use this carefully! + warn( +/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/pynwb/file.py:471: UserWarning: Date is missing timezone information. Updating to local timezone. + args_to_set['session_start_time'] = _add_missing_timezone(session_start_time) +/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py:22: UserWarning: Custom description given to get_module does not match existing module description! Ignoring custom description. + warn( +/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py:22: UserWarning: Custom description given to get_module does not match existing module description! Ignoring custom description. + warn( +INFO:root:Running raw data conversion +INFO:root:Temporally aligning data interfaces +/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/spikeinterface/core/baserecording.py:413: UserWarning: Setting times with Recording.set_times() is not recommended because times are not always propagated to across preprocessingUse use this carefully! + warn( +/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/pynwb/file.py:471: UserWarning: Date is missing timezone information. Updating to local timezone. + args_to_set['session_start_time'] = _add_missing_timezone(session_start_time) +INFO:root: +Finished conversion for elgar/2023-11-30 + +Source data is valid! +Metadata is valid! +conversion_options is valid! +NWB file saved at /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_behavior+ecephys.nwb! +Source data is valid! +Metadata is valid! +conversion_options is valid! +NWB file saved at /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_ecephys.nwb! diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index fc064e2..fa96c0a 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -284,12 +284,14 @@ def session_to_nwb( metadata["Subject"]["age"] = _SUBJECT_TO_AGE[subject] # EcePhys - probe_metadata_file = (session_paths.data_open_source / + probe_metadata_file = (session_paths.session_data / "probes.metadata.json") with open(probe_metadata_file, "r") as f: probe_metadata = json.load(f) - return - neuropixel_metadata = [x for x in probe_metadata if x["probe_type"] == "Neuropixels"][0] + + neuropixel_metadata = [x for x in probe_metadata + if x["probe_type"] == "Neuropixel"][0] + for entry in metadata["Ecephys"]["ElectrodeGroup"]: if entry["device"] == "Neuropixel-Imec": # TODO: uncomment when fixed in pynwb diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh new file mode 100644 index 0000000..5a8b5d6 --- /dev/null +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +#SBATCH -o /om2/user/apiccato/jazayeri-lab-to-nwb/src/jazayeri_lab_to_nwb/piccato/logs/%A.out +#SBATCH -t 06:00:00 +#SBATCH -n 1 +#SBATCH --mem-per-cpu 30G +#SBATCH --mail-type=NONE +#SBATCH --mail-user=apiccato@mit.edu +#SBATCH --partition=jazayeri + +source ~/.bashrc +conda activate jazayeri_lab_to_nwb_env +cd /om2/user/apiccato/jazayeri-lab-to-nwb +python src/jazayeri_lab_to_nwb/piccato/main_convert_session.py elgar 2023-11-30 \ No newline at end of file diff --git a/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py b/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py index 5fa299f..36bf2ea 100644 --- a/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py +++ b/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py @@ -1,58 +1,103 @@ """Primary NWBConverter class for this dataset.""" + import json import logging from pathlib import Path from typing import Optional +import display_interface +import neuroconv import numpy as np -from display_interface import DisplayInterface -from neuroconv import NWBConverter -from neuroconv.datainterfaces import ( - KiloSortSortingInterface, - SpikeGLXRecordingInterface, -) +import timeseries_interface +import trials_interface from neuroconv.datainterfaces.ecephys.basesortingextractorinterface import ( BaseSortingExtractorInterface, ) from neuroconv.utils import FolderPathType from recording_interface import DatRecordingInterface -from spikeinterface.core.waveform_tools import has_exceeding_spikes -from spikeinterface.curation import remove_excess_spikes -from timeseries_interface import ( - AudioInterface, - EyePositionInterface, - PupilSizeInterface, - RewardLineInterface, -) -from trials_interface import TrialsInterface +from spikeinterface import curation + + +def _trim_excess_spikes( + recording_interface, sorting_interface, max_excess_samples=300 +): + """Trim sorting object spikes that exceed the recording number of samples. + + Args: + recording: BaseRecording instance. The recording object. + sorting: BaseSorting instance. The sorting object. + max_excess_samples: Int. If a spike exists more than this number of + samples beyond the end of the recording, an error is raised. This is + in units of samples, which is typically 30000Hz. + + Returns: + bool True if exceeding spikes, False otherwise. + """ + recording_extractor = recording_interface.recording_extractor + sorting_extractor = sorting_interface.sorting_extractor + spike_vector = sorting_extractor.to_spike_vector() + has_exceeding_spikes = False + for segment_index in range(recording_extractor.get_num_segments()): + start_seg_ind, end_seg_ind = np.searchsorted( + spike_vector["segment_index"], [segment_index, segment_index + 1] + ) + spike_vector_seg = spike_vector[start_seg_ind:end_seg_ind] + if len(spike_vector_seg) > 0: + last_spike_vector_sample = spike_vector_seg["sample_index"][-1] + last_recording_sample = recording_extractor.get_num_samples( + segment_index=segment_index + ) + excess = last_spike_vector_sample - last_recording_sample + 1 + if excess > max_excess_samples: + raise ValueError( + f"Spikes detected at least {excess} samples after the end " + "of the recording." + ) + elif excess > 0: + has_exceeding_spikes = True + + if has_exceeding_spikes: + # Sometimes kilosort can detect spike that happen very + # slightly after the recording stopped + sorting_interface.sorting_extractor = curation.remove_excess_spikes( + recording=recording_extractor, + sorting=sorting_extractor, + ) + return -class NWBConverter(NWBConverter): + +class NWBConverter(neuroconv.NWBConverter): """Primary conversion class for extracellular electrophysiology dataset.""" data_interface_classes = dict( RecordingVP0=DatRecordingInterface, - SortingVP0=KiloSortSortingInterface, + SortingVP0=neuroconv.datainterfaces.KiloSortSortingInterface, RecordingVP1=DatRecordingInterface, - SortingVP1=KiloSortSortingInterface, - RecordingNP=SpikeGLXRecordingInterface, - LF=SpikeGLXRecordingInterface, - SortingNP=KiloSortSortingInterface, - EyePosition=EyePositionInterface, - PupilSize=PupilSizeInterface, - RewardLine=RewardLineInterface, - Audio=AudioInterface, - Trials=TrialsInterface, - Display=DisplayInterface, + SortingVP1=neuroconv.datainterfaces.KiloSortSortingInterface, + RecordingNP=neuroconv.datainterfaces.SpikeGLXRecordingInterface, + LF=neuroconv.datainterfaces.SpikeGLXRecordingInterface, + SortingNP=neuroconv.datainterfaces.KiloSortSortingInterface, + EyePosition=timeseries_interface.EyePositionInterface, + PupilSize=timeseries_interface.PupilSizeInterface, + RewardLine=timeseries_interface.RewardLineInterface, + Audio=timeseries_interface.AudioInterface, + Trials=trials_interface.TrialsInterface, + Display=display_interface.DisplayInterface, ) - def __init__(self, source_data: dict[str, dict], sync_dir: Optional[FolderPathType] = None, verbose: bool = True): + def __init__( + self, + source_data: dict[str, dict], + sync_dir: Optional[FolderPathType] = None, + verbose: bool = True, + ): """Validate source_data and initialize all data interfaces.""" super().__init__(source_data=source_data, verbose=verbose) self.sync_dir = sync_dir unit_name_start = 0 - for name, data_interface in self.data_interface_objects.items(): + for data_interface in self.data_interface_objects.values(): if isinstance(data_interface, BaseSortingExtractorInterface): unit_ids = np.array(data_interface.sorting_extractor.unit_ids) data_interface.sorting_extractor.set_property( @@ -68,70 +113,68 @@ def temporally_align_data_interfaces(self): return sync_dir = Path(self.sync_dir) - # openephys alignment - with open(sync_dir / "open_ephys" / "recording_start_time") as f: - open_ephys_start_time = float(f.read().strip()) - with open(sync_dir / "open_ephys" / "transform", "r") as f: - open_ephys_transform = json.load(f) - for i in [0, 1]: - if f"RecordingVP{i}" in self.data_interface_objects: - orig_timestamps = self.data_interface_objects[f"RecordingVP{i}"].get_original_timestamps() - aligned_timestamps = open_ephys_transform["intercept"] + open_ephys_transform["coef"] * ( - open_ephys_start_time + orig_timestamps - ) - self.data_interface_objects[f"RecordingVP{i}"].set_aligned_timestamps(aligned_timestamps) - # openephys sorting alignment - if f"SortingVP{i}" in self.data_interface_objects: - if has_exceeding_spikes( - recording=self.data_interface_objects[f"RecordingVP{i}"].recording_extractor, - sorting=self.data_interface_objects[f"SortingVP{i}"].sorting_extractor, - ): - print( - f"Spikes exceeding recording found in SortingVP{i}! " - "Removing with `spikeinterface.curation.remove_excess_spikes()`" - ) - self.data_interface_objects[f"SortingVP{i}"].sorting_extractor = remove_excess_spikes( - recording=self.data_interface_objects[f"RecordingVP{i}"].recording_extractor, - sorting=self.data_interface_objects[f"SortingVP{i}"].sorting_extractor, - ) - self.data_interface_objects[f"SortingVP{i}"].register_recording( - self.data_interface_objects[f"RecordingVP{i}"] - ) - - # neuropixel alignment - orig_timestamps = self.data_interface_objects["RecordingNP"].get_original_timestamps() - with open(sync_dir / "spikeglx" / "transform", "r") as f: - spikeglx_transform = json.load(f) - aligned_timestamps = spikeglx_transform["intercept"] + spikeglx_transform["coef"] * orig_timestamps - self.data_interface_objects["RecordingNP"].set_aligned_timestamps(aligned_timestamps) - # neuropixel LFP alignment - orig_timestamps = self.data_interface_objects["LF"].get_original_timestamps() - aligned_timestamps = spikeglx_transform["intercept"] + spikeglx_transform["coef"] * orig_timestamps - self.data_interface_objects["LF"].set_aligned_timestamps(aligned_timestamps) - # neuropixel sorting alignment - if "SortingNP" in self.data_interface_objects: - if has_exceeding_spikes( - recording=self.data_interface_objects["RecordingNP"].recording_extractor, - sorting=self.data_interface_objects["SortingNP"].sorting_extractor, - ): - print( - "Spikes exceeding recording found in SortingNP! " - "Removing with `spikeinterface.curation.remove_excess_spikes()`" + # Align each recording + for name, recording_interface in self.data_interface_objects.items(): + if "Recording" not in name: + continue + probe_name = name.split("Recording")[1] + + # Load timescale transform + if "VP" in probe_name: + start_path = sync_dir / "open_ephys" / "recording_start_time" + start = float(open(start_path).read().strip()) + transform_path = sync_dir / "open_ephys" / "transform" + transform = json.load(open(transform_path, "r")) + lf_interface = None + elif "NP" in probe_name: + start = 0.0 + transform_path = sync_dir / "spikeglx" / "transform" + transform = json.load(open(transform_path, "r")) + lf_interface = self.data_interface_objects["LF"] + else: + raise ValueError("Invalid probe_name {probe_name}") + intercept = transform["intercept"] + coef = transform["coef"] + + # Align recording timestamps + orig_timestamps = recording_interface.get_original_timestamps() + aligned_timestamps = intercept + coef * (start + orig_timestamps) + recording_interface.set_aligned_timestamps(aligned_timestamps) + + # Align LFP timestamps + if lf_interface is not None: + orig_timestamps = lf_interface.get_original_timestamps() + aligned_timestamps = intercept + coef * ( + start + orig_timestamps ) - self.data_interface_objects["SortingNP"].sorting_extractor = remove_excess_spikes( - recording=self.data_interface_objects["RecordingNP"].recording_extractor, - sorting=self.data_interface_objects["SortingNP"].sorting_extractor, + lf_interface.set_aligned_timestamps(aligned_timestamps) + + # If sorting exists, register recording to it + if f"Sorting{probe_name}" in self.data_interface_objects: + sorting_interface = self.data_interface_objects[ + f"Sorting{probe_name}" + ] + + # Trim sorted spikes that occur after recording ends from + # kilosort artifacts + _trim_excess_spikes( + recording_interface=recording_interface, + sorting_interface=sorting_interface, ) - self.data_interface_objects["SortingNP"].register_recording(self.data_interface_objects["RecordingNP"]) - # align recording start to 0 + # Register recording + sorting_interface.register_recording(recording_interface) + + # Align so that 0 is the first of all timestamps aligned_start_times = [] - for name, data_interface in self.data_interface_objects.items(): + for data_interface in self.data_interface_objects.values(): start_time = data_interface.get_timestamps()[0] aligned_start_times.append(start_time) zero_time = -1.0 * min(aligned_start_times) - for name, data_interface in self.data_interface_objects.items(): + for data_interface in self.data_interface_objects.values(): if isinstance(data_interface, BaseSortingExtractorInterface): # Do not need to align because recording will be aligned continue - start_time = data_interface.set_aligned_starting_time(aligned_starting_time=zero_time) + start_time = data_interface.set_aligned_starting_time( + aligned_starting_time=zero_time + ) From 258d7dd58dea6271e7f1aa48128d3187bf910611 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Wed, 3 Jan 2024 10:55:15 -0500 Subject: [PATCH 06/28] Remove electrical series from processed data --- .../piccato/main_convert_session.py | 117 +++++++++++------- 1 file changed, 75 insertions(+), 42 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index fa96c0a..bc26066 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -106,8 +106,76 @@ def _add_v_probe_data( folder_path=str(sorting_path), keep_good_only=False, ) - processed_conversion_options[f"RecordingVP{probe_num}"] = dict(stub_test=stub_test, write_electrical_series=False) - processed_conversion_options[f"SortingVP{probe_num}"] = dict(stub_test=stub_test, write_as="processing") + processed_conversion_options[f"RecordingVP{probe_num}"] = dict( + stub_test=stub_test, + write_electrical_series=False) + processed_conversion_options[f"SortingVP{probe_num}"] = dict( + stub_test=stub_test, + write_as="processing") + + +def _update_metadata(metadata, subject, session_id, session_paths): + """Update metadata.""" + + # Add subject_id, session_id, sex, and age + metadata["NWBFile"]["session_id"] = session_id + metadata["Subject"]["subject_id"] = subject + metadata["Subject"]["sex"] = _SUBJECT_TO_SEX[subject] + metadata["Subject"]["age"] = _SUBJECT_TO_AGE[subject] + + # Add probe locations + probe_metadata_file = ( + session_paths.data_open_source / "probes.metadata.json" + ) + probe_metadata = json.load(open(probe_metadata_file, "r")) + for entry in metadata["Ecephys"]["ElectrodeGroup"]: + if entry["device"] == "Neuropixel-Imec": + neuropixel_metadata = [ + x for x in probe_metadata if x["probe_type"] == "Neuropixels" + ][0] + coordinate_system = neuropixel_metadata["coordinate_system"] + coordinates = neuropixel_metadata["coordinates"] + depth_from_surface = neuropixel_metadata["depth_from_surface"] + entry["description"] = ( + f"{entry['description']}\n" + f"{coordinate_system}\n" + f"coordinates = {coordinates}\n" + f"depth_from_surface = {depth_from_surface}" + ) + entry["position"] = [ + coordinates[0], + coordinates[1], + depth_from_surface, + ] + elif "vprobe" in entry["device"]: + probe_index = int(entry["device"].split("vprobe")[1]) + v_probe_metadata = [ + x for x in probe_metadata if x["probe_type"] == "V-Probe 64" + ][probe_index] + first_channel = v_probe_metadata["coordinates"]["first_channel"] + last_channel = v_probe_metadata["coordinates"]["last_channel"] + coordinate_system = v_probe_metadata["coordinate_system"] + entry["description"] = ( + f"{entry['description']}\n" + f"{coordinate_system}\n" + f"first_channel = {first_channel}\n" + f"last_channel = {last_channel}" + ) + entry["position"] = first_channel + + # Update default metadata with the editable in the corresponding yaml file + editable_metadata_path = Path(__file__).parent / "metadata.yaml" + editable_metadata = load_dict_from_file(editable_metadata_path) + metadata = dict_deep_update(metadata, editable_metadata) + + # Ensure session_start_time exists in metadata + if "session_start_time" not in metadata["NWBFile"]: + raise ValueError( + "Session start time was not auto-detected. Please provide it " + "in `metadata.yaml`" + ) + + return metadata def _add_spikeglx_data( @@ -133,8 +201,10 @@ def _add_spikeglx_data( processed_source_data["LF"] = dict(file_path=lfp_file) raw_conversion_options["RecordingNP"] = dict(stub_test=stub_test) raw_conversion_options["LF"] = dict(stub_test=stub_test) - processed_conversion_options["RecordingNP"] = dict(stub_test=stub_test) - processed_conversion_options["LF"] = dict(stub_test=stub_test) + processed_conversion_options["RecordingNP"] = dict( + stub_test=stub_test, write_electrical_series=False) + processed_conversion_options["LF"] = dict( + stub_test=stub_test, write_electrical_series=False) # Processed data sorting_path = (session_paths.spike_sorting_raw / @@ -278,44 +348,7 @@ def session_to_nwb( # Add datetime and subject name to processed converter metadata = processed_converter.get_metadata() - metadata["NWBFile"]["session_id"] = session_id - metadata["Subject"]["subject_id"] = subject - metadata["Subject"]["sex"] = _SUBJECT_TO_SEX[subject] - metadata["Subject"]["age"] = _SUBJECT_TO_AGE[subject] - - # EcePhys - probe_metadata_file = (session_paths.session_data / - "probes.metadata.json") - with open(probe_metadata_file, "r") as f: - probe_metadata = json.load(f) - - neuropixel_metadata = [x for x in probe_metadata - if x["probe_type"] == "Neuropixel"][0] - - for entry in metadata["Ecephys"]["ElectrodeGroup"]: - if entry["device"] == "Neuropixel-Imec": - # TODO: uncomment when fixed in pynwb - # entry.update(dict(position=[( - # neuropixel_metadata['coordinates'][0], - # neuropixel_metadata['coordinates'][1], - # neuropixel_metadata['depth_from_surface'], - # )] - logging.info("\n\n") - logging.warning(" PROBE COORDINATES NOT IMPLEMENTED\n\n") - - # Update default metadata with the editable in the corresponding yaml file - editable_metadata_path = Path(__file__).parent / "metadata.yaml" - editable_metadata = load_dict_from_file(editable_metadata_path) - metadata = dict_deep_update(metadata, editable_metadata) - - # Check if session_start_time was found/set - if "session_start_time" not in metadata["NWBFile"]: - try: - date = datetime.datetime.strptime(session, "%Y-%m-%d") - date = date.replace(tzinfo=ZoneInfo("US/Eastern")) - except: - raise ValueError("Session start time was not auto-detected. Please provide it " "in `metadata.yaml`") - metadata["NWBFile"]["session_start_time"] = date + metadata = _update_metadata(metadata, subject, session_id, session_paths) # Run conversion logging.info("Running processed conversion") From 1576758e753696c108a21eb8a939ab199b514995 Mon Sep 17 00:00:00 2001 From: Aida Piccato <49250051+aidapiccato@users.noreply.github.com> Date: Wed, 3 Jan 2024 15:26:41 -0500 Subject: [PATCH 07/28] Delete src/jazayeri_lab_to_nwb/piccato/logs directory --- .../piccato/logs/34316882.out | 37 --------------- .../piccato/logs/34316908.out | 47 ------------------- 2 files changed, 84 deletions(-) delete mode 100644 src/jazayeri_lab_to_nwb/piccato/logs/34316882.out delete mode 100644 src/jazayeri_lab_to_nwb/piccato/logs/34316908.out diff --git a/src/jazayeri_lab_to_nwb/piccato/logs/34316882.out b/src/jazayeri_lab_to_nwb/piccato/logs/34316882.out deleted file mode 100644 index d9b7372..0000000 --- a/src/jazayeri_lab_to_nwb/piccato/logs/34316882.out +++ /dev/null @@ -1,37 +0,0 @@ -INFO:root: -Starting conversion for elgar/2023-11-30 - -INFO:root:stub_test = True -INFO:root:overwrite = True -INFO:root:dandiset_id = None -INFO:root:session_paths: SessionPaths(output=PosixPath('/om2/user/apiccato/nwb_data/staging/sub-elgar'), raw_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/raw_data'), behavior_task_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/behavior_task'), session_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30'), sync_pulses=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/sync_signals'), spike_sorting_raw=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/spike_sorting')) -INFO:root:raw_nwb_path = /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_ecephys.nwb -INFO:root:processed_nwb_path = /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_behavior+ecephys.nwb -INFO:root: -INFO:root:Adding SpikeGLX data -INFO:root:Adding behavior data -INFO:root:Adding trials data -INFO:root:Adding display data -INFO:root: - - -WARNING:root: PROBE COORDINATES NOT IMPLEMENTED - - -INFO:root:Running processed conversion -INFO:root:Temporally aligning data interfaces -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/spikeinterface/core/baserecording.py:413: UserWarning: Setting times with Recording.set_times() is not recommended because times are not always propagated to across preprocessingUse use this carefully! - warn( -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/pynwb/file.py:471: UserWarning: Date is missing timezone information. Updating to local timezone. - args_to_set['session_start_time'] = _add_missing_timezone(session_start_time) -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py:22: UserWarning: Custom description given to get_module does not match existing module description! Ignoring custom description. - warn( -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py:22: UserWarning: Custom description given to get_module does not match existing module description! Ignoring custom description. - warn( -INFO:root: -Finished conversion for elgar/2023-11-30 - -Source data is valid! -Metadata is valid! -conversion_options is valid! -NWB file saved at /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_behavior+ecephys.nwb! diff --git a/src/jazayeri_lab_to_nwb/piccato/logs/34316908.out b/src/jazayeri_lab_to_nwb/piccato/logs/34316908.out deleted file mode 100644 index a633af9..0000000 --- a/src/jazayeri_lab_to_nwb/piccato/logs/34316908.out +++ /dev/null @@ -1,47 +0,0 @@ -INFO:root: -Starting conversion for elgar/2023-11-30 - -INFO:root:stub_test = True -INFO:root:overwrite = True -INFO:root:dandiset_id = None -INFO:root:session_paths: SessionPaths(output=PosixPath('/om2/user/apiccato/nwb_data/staging/sub-elgar'), raw_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/raw_data'), behavior_task_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/behavior_task'), session_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30'), sync_pulses=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/sync_signals'), spike_sorting_raw=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/spike_sorting')) -INFO:root:raw_nwb_path = /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_ecephys.nwb -INFO:root:processed_nwb_path = /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_behavior+ecephys.nwb -INFO:root: -INFO:root:Adding SpikeGLX data -INFO:root:Adding behavior data -INFO:root:Adding trials data -INFO:root:Adding display data -INFO:root: - - -WARNING:root: PROBE COORDINATES NOT IMPLEMENTED - - -INFO:root:Running processed conversion -INFO:root:Temporally aligning data interfaces -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/spikeinterface/core/baserecording.py:413: UserWarning: Setting times with Recording.set_times() is not recommended because times are not always propagated to across preprocessingUse use this carefully! - warn( -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/pynwb/file.py:471: UserWarning: Date is missing timezone information. Updating to local timezone. - args_to_set['session_start_time'] = _add_missing_timezone(session_start_time) -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py:22: UserWarning: Custom description given to get_module does not match existing module description! Ignoring custom description. - warn( -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py:22: UserWarning: Custom description given to get_module does not match existing module description! Ignoring custom description. - warn( -INFO:root:Running raw data conversion -INFO:root:Temporally aligning data interfaces -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/spikeinterface/core/baserecording.py:413: UserWarning: Setting times with Recording.set_times() is not recommended because times are not always propagated to across preprocessingUse use this carefully! - warn( -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/pynwb/file.py:471: UserWarning: Date is missing timezone information. Updating to local timezone. - args_to_set['session_start_time'] = _add_missing_timezone(session_start_time) -INFO:root: -Finished conversion for elgar/2023-11-30 - -Source data is valid! -Metadata is valid! -conversion_options is valid! -NWB file saved at /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_behavior+ecephys.nwb! -Source data is valid! -Metadata is valid! -conversion_options is valid! -NWB file saved at /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_ecephys.nwb! From 385ea8e7fd7ce5abd775301ca7c600d86533bdd0 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Wed, 3 Jan 2024 15:27:26 -0500 Subject: [PATCH 08/28] Upload to DANDI; adding to .gitignore; reformatting --- .gitignore | 3 + .../piccato/logs/34316882.out | 37 ----------- .../piccato/logs/34316908.out | 47 -------------- .../piccato/main_convert_session.py | 63 ++++++++++--------- .../watters/main_convert_session.py | 22 +++---- .../watters/timeseries_interface.py | 2 +- 6 files changed, 49 insertions(+), 125 deletions(-) delete mode 100644 src/jazayeri_lab_to_nwb/piccato/logs/34316882.out delete mode 100644 src/jazayeri_lab_to_nwb/piccato/logs/34316908.out diff --git a/.gitignore b/.gitignore index 7347b6c..d3f9d3f 100644 --- a/.gitignore +++ b/.gitignore @@ -147,3 +147,6 @@ dmypy.json # NWB files **.nwb + +*.out +**logs/* \ No newline at end of file diff --git a/src/jazayeri_lab_to_nwb/piccato/logs/34316882.out b/src/jazayeri_lab_to_nwb/piccato/logs/34316882.out deleted file mode 100644 index d9b7372..0000000 --- a/src/jazayeri_lab_to_nwb/piccato/logs/34316882.out +++ /dev/null @@ -1,37 +0,0 @@ -INFO:root: -Starting conversion for elgar/2023-11-30 - -INFO:root:stub_test = True -INFO:root:overwrite = True -INFO:root:dandiset_id = None -INFO:root:session_paths: SessionPaths(output=PosixPath('/om2/user/apiccato/nwb_data/staging/sub-elgar'), raw_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/raw_data'), behavior_task_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/behavior_task'), session_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30'), sync_pulses=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/sync_signals'), spike_sorting_raw=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/spike_sorting')) -INFO:root:raw_nwb_path = /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_ecephys.nwb -INFO:root:processed_nwb_path = /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_behavior+ecephys.nwb -INFO:root: -INFO:root:Adding SpikeGLX data -INFO:root:Adding behavior data -INFO:root:Adding trials data -INFO:root:Adding display data -INFO:root: - - -WARNING:root: PROBE COORDINATES NOT IMPLEMENTED - - -INFO:root:Running processed conversion -INFO:root:Temporally aligning data interfaces -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/spikeinterface/core/baserecording.py:413: UserWarning: Setting times with Recording.set_times() is not recommended because times are not always propagated to across preprocessingUse use this carefully! - warn( -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/pynwb/file.py:471: UserWarning: Date is missing timezone information. Updating to local timezone. - args_to_set['session_start_time'] = _add_missing_timezone(session_start_time) -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py:22: UserWarning: Custom description given to get_module does not match existing module description! Ignoring custom description. - warn( -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py:22: UserWarning: Custom description given to get_module does not match existing module description! Ignoring custom description. - warn( -INFO:root: -Finished conversion for elgar/2023-11-30 - -Source data is valid! -Metadata is valid! -conversion_options is valid! -NWB file saved at /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_behavior+ecephys.nwb! diff --git a/src/jazayeri_lab_to_nwb/piccato/logs/34316908.out b/src/jazayeri_lab_to_nwb/piccato/logs/34316908.out deleted file mode 100644 index a633af9..0000000 --- a/src/jazayeri_lab_to_nwb/piccato/logs/34316908.out +++ /dev/null @@ -1,47 +0,0 @@ -INFO:root: -Starting conversion for elgar/2023-11-30 - -INFO:root:stub_test = True -INFO:root:overwrite = True -INFO:root:dandiset_id = None -INFO:root:session_paths: SessionPaths(output=PosixPath('/om2/user/apiccato/nwb_data/staging/sub-elgar'), raw_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/raw_data'), behavior_task_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/behavior_task'), session_data=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30'), sync_pulses=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/sync_signals'), spike_sorting_raw=PosixPath('/om2/user/apiccato/phys_preprocessing_open_source/phys_data/elgar/2023-11-30/spike_sorting')) -INFO:root:raw_nwb_path = /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_ecephys.nwb -INFO:root:processed_nwb_path = /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_behavior+ecephys.nwb -INFO:root: -INFO:root:Adding SpikeGLX data -INFO:root:Adding behavior data -INFO:root:Adding trials data -INFO:root:Adding display data -INFO:root: - - -WARNING:root: PROBE COORDINATES NOT IMPLEMENTED - - -INFO:root:Running processed conversion -INFO:root:Temporally aligning data interfaces -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/spikeinterface/core/baserecording.py:413: UserWarning: Setting times with Recording.set_times() is not recommended because times are not always propagated to across preprocessingUse use this carefully! - warn( -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/pynwb/file.py:471: UserWarning: Date is missing timezone information. Updating to local timezone. - args_to_set['session_start_time'] = _add_missing_timezone(session_start_time) -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py:22: UserWarning: Custom description given to get_module does not match existing module description! Ignoring custom description. - warn( -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py:22: UserWarning: Custom description given to get_module does not match existing module description! Ignoring custom description. - warn( -INFO:root:Running raw data conversion -INFO:root:Temporally aligning data interfaces -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/spikeinterface/core/baserecording.py:413: UserWarning: Setting times with Recording.set_times() is not recommended because times are not always propagated to across preprocessingUse use this carefully! - warn( -/om2/user/apiccato/anaconda/envs/jazayeri_lab_to_nwb_env/lib/python3.10/site-packages/pynwb/file.py:471: UserWarning: Date is missing timezone information. Updating to local timezone. - args_to_set['session_start_time'] = _add_missing_timezone(session_start_time) -INFO:root: -Finished conversion for elgar/2023-11-30 - -Source data is valid! -Metadata is valid! -conversion_options is valid! -NWB file saved at /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_behavior+ecephys.nwb! -Source data is valid! -Metadata is valid! -conversion_options is valid! -NWB file saved at /om2/user/apiccato/nwb_data/staging/sub-elgar/sub-elgar_ses-2023-11-30-stub_ecephys.nwb! diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index bc26066..8904788 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -44,7 +44,7 @@ # Whether to overwrite output nwb files _OVERWRITE = True # ID of the dandiset to upload to, or None to not upload -_DANDISET_ID = None # '000620' +_DANDISET_ID = None # '000767' # Set logger level for info is displayed in console logging.getLogger().setLevel(logging.INFO) @@ -95,22 +95,26 @@ def _add_v_probe_data( probe_name=f"vprobe{probe_num}", es_key=f"ElectricalSeriesVP{probe_num}", ) - raw_conversion_options[f"RecordingVP{probe_num}"] = dict(stub_test=stub_test) + raw_conversion_options[f"RecordingVP{probe_num}"] = dict( + stub_test=stub_test + ) # Processed data sorting_path = (session_paths.spike_sorting_raw / f"v_probe_{probe_num}" / "ks_3_output_pre_v6_curated") - processed_source_data[f"RecordingVP{probe_num}"] = raw_source_data[f"RecordingVP{probe_num}"] + processed_source_data[f"RecordingVP{probe_num}"] = raw_source_data[ + f"RecordingVP{probe_num}" + ] processed_source_data[f"SortingVP{probe_num}"] = dict( folder_path=str(sorting_path), keep_good_only=False, ) processed_conversion_options[f"RecordingVP{probe_num}"] = dict( - stub_test=stub_test, + stub_test=stub_test, write_electrical_series=False) processed_conversion_options[f"SortingVP{probe_num}"] = dict( - stub_test=stub_test, + stub_test=stub_test, write_as="processing") @@ -125,28 +129,29 @@ def _update_metadata(metadata, subject, session_id, session_paths): # Add probe locations probe_metadata_file = ( - session_paths.data_open_source / "probes.metadata.json" + session_paths.session_data / "probes.metadata.json" ) probe_metadata = json.load(open(probe_metadata_file, "r")) for entry in metadata["Ecephys"]["ElectrodeGroup"]: if entry["device"] == "Neuropixel-Imec": - neuropixel_metadata = [ - x for x in probe_metadata if x["probe_type"] == "Neuropixels" - ][0] - coordinate_system = neuropixel_metadata["coordinate_system"] - coordinates = neuropixel_metadata["coordinates"] - depth_from_surface = neuropixel_metadata["depth_from_surface"] - entry["description"] = ( - f"{entry['description']}\n" - f"{coordinate_system}\n" - f"coordinates = {coordinates}\n" - f"depth_from_surface = {depth_from_surface}" - ) - entry["position"] = [ - coordinates[0], - coordinates[1], - depth_from_surface, - ] + pass + # neuropixel_metadata = [ + # x for x in probe_metadata if x["probe_type"] == "Neuropixel" + # ][0] + # coordinate_system = neuropixel_metadata["coordinate_system"] + # coordinates = neuropixel_metadata["coordinates"] + # depth_from_surface = neuropixel_metadata["depth_from_surface"] + # entry["description"] = ( + # f"{entry['description']}\n" + # f"{coordinate_system}\n" + # f"coordinates = {coordinates}\n" + # f"depth_from_surface = {depth_from_surface}" + # ) + # entry["position"] = [ + # coordinates[0], + # coordinates[1], + # depth_from_surface, + # ] elif "vprobe" in entry["device"]: probe_index = int(entry["device"].split("vprobe")[1]) v_probe_metadata = [ @@ -340,13 +345,17 @@ def session_to_nwb( folder_path=str(session_paths.behavior_task_data)) processed_conversion_options["Display"] = dict() - # Create processed data converter + # Create data converters processed_converter = nwb_converter.NWBConverter( source_data=processed_source_data, sync_dir=session_paths.sync_pulses, ) + raw_converter = nwb_converter.NWBConverter( + source_data=raw_source_data, + sync_dir=str(session_paths.sync_pulses), + ) - # Add datetime and subject name to processed converter + # Update metadata metadata = processed_converter.get_metadata() metadata = _update_metadata(metadata, subject, session_id, session_paths) @@ -361,10 +370,6 @@ def session_to_nwb( logging.info("Running raw data conversion") metadata["NWBFile"]["identifier"] = str(uuid4()) - raw_converter = nwb_converter.NWBConverter( - source_data=raw_source_data, - sync_dir=str(session_paths.sync_pulses), - ) raw_converter.run_conversion( metadata=metadata, nwbfile_path=raw_nwb_path, diff --git a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py index cf870a7..e820da9 100644 --- a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py @@ -284,17 +284,17 @@ def session_to_nwb( processed_source_data = {} processed_conversion_options = {} - # Add V-Probe data - for probe_num in range(2): - _add_v_probe_data( - raw_source_data=raw_source_data, - raw_conversion_options=raw_conversion_options, - processed_source_data=processed_source_data, - processed_conversion_options=processed_conversion_options, - session_paths=session_paths, - probe_num=probe_num, - stub_test=stub_test, - ) + # # Add V-Probe data + # for probe_num in range(2): + # _add_v_probe_data( + # raw_source_data=raw_source_data, + # raw_conversion_options=raw_conversion_options, + # processed_source_data=processed_source_data, + # processed_conversion_options=processed_conversion_options, + # session_paths=session_paths, + # probe_num=probe_num, + # stub_test=stub_test, + # ) # Add SpikeGLX data _add_spikeglx_data( diff --git a/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py b/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py index 2c9ffc3..459a754 100644 --- a/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py +++ b/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py @@ -1,7 +1,7 @@ """Primary classes for timeseries variables. The classes here handle variables like eye position, reward line, and audio -stimuli that are not necessarily tied to the trial structure of display updates. +stimuli that are not necessarily tied to the trial structure of display updates For trial structured variables, see ../trials_interface.py. For variables pertaining to display updates, see ../frames_interface.py. """ From 4613fb3b718bd4905d6d4edaa89d03cdba911199 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Wed, 3 Jan 2024 16:04:15 -0500 Subject: [PATCH 09/28] Small formatting changes --- .../piccato/main_convert_session.py | 5 ++- .../piccato/timeseries_interface.py | 35 +++++++++++-------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 8904788..5c94603 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -21,7 +21,7 @@ See comments below for descriptions of these variables. """ -import datetime + import glob import json import logging @@ -30,7 +30,6 @@ from pathlib import Path from typing import Union from uuid import uuid4 -from zoneinfo import ZoneInfo import get_session_paths import nwb_converter @@ -44,7 +43,7 @@ # Whether to overwrite output nwb files _OVERWRITE = True # ID of the dandiset to upload to, or None to not upload -_DANDISET_ID = None # '000767' +_DANDISET_ID = None # '000767' # Set logger level for info is displayed in console logging.getLogger().setLevel(logging.INFO) diff --git a/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py index 3ee1a71..bf116e3 100644 --- a/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py @@ -20,6 +20,15 @@ from pynwb.behavior import SpatialSeries +def get_processing_module(nwbfile: NWBFile): + module_description = ( + "Contains behavior, audio, and reward data from experiment." + ) + return get_module( + nwbfile=nwbfile, name="behavior", description=module_description + ) + + class TimestampsFromArrayInterface(BaseTemporalAlignmentInterface): """Interface implementing temporal alignment functions with timestamps.""" @@ -75,6 +84,8 @@ def __init__(self, folder_path: FolderPathType): self._eye_pos = np.stack([eye_h_values, eye_v_values], axis=1) def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + del metadata + # Make SpatialSeries eye_position = SpatialSeries( name="eye_position", @@ -87,9 +98,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): ) # Get processing module - module_description = "Contains behavioral data from experiment." - processing_module = get_module(nwbfile=nwbfile, name="behavior", - description=module_description) + processing_module = get_processing_module(nwbfile=nwbfile) # Add data to module processing_module.add_data_interface(eye_position) @@ -112,6 +121,8 @@ def __init__(self, folder_path: FolderPathType): self._pupil_size = np.array(pupil_size_data["values"]) def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + del metadata + # Make TimeSeries pupil_size = TimeSeries( name="pupil_size", @@ -123,10 +134,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): ) # Get processing module - module_description = "Contains behavioral data from experiment." - processing_module = get_module(nwbfile=nwbfile, - name="behavior", - description=module_description) + processing_module = get_processing_module(nwbfile=nwbfile) # Add data to module processing_module.add_data_interface(pupil_size) @@ -149,6 +157,8 @@ def __init__(self, folder_path: FolderPathType): self._reward_line = reward_line_data["values"] def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + del metadata + # Make LabeledEvents reward_line = LabeledEvents( name="reward_line", @@ -160,10 +170,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): ) # Get processing module - module_description = "Contains audio and reward data from experiment." - processing_module = get_module(nwbfile=nwbfile, - name="behavior", - description=module_description) + processing_module = get_processing_module(nwbfile=nwbfile) # Add data to module processing_module.add_data_interface(reward_line) @@ -191,6 +198,8 @@ def __init__(self, folder_path: FolderPathType): self._sound_codes = [sound_to_code[x] for x in audio] def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): + del metadata + # Make LabeledEvents audio = LabeledEvents( name="audio", @@ -201,9 +210,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): ) # Get processing module - module_description = "Contains audio and reward data from experiment." - processing_module = get_module(nwbfile=nwbfile, name="behavior", - description=module_description) + processing_module = get_processing_module(nwbfile=nwbfile) # Add data to module processing_module.add_data_interface(audio) From 8d5cee0783910f9e8c2b79d7f23b984f7d517f61 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Wed, 3 Jan 2024 16:10:37 -0500 Subject: [PATCH 10/28] undoing accidental changes to watters directory --- .../watters/main_convert_session.py | 22 +++++++++---------- .../watters/timeseries_interface.py | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py index e820da9..cf870a7 100644 --- a/src/jazayeri_lab_to_nwb/watters/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/watters/main_convert_session.py @@ -284,17 +284,17 @@ def session_to_nwb( processed_source_data = {} processed_conversion_options = {} - # # Add V-Probe data - # for probe_num in range(2): - # _add_v_probe_data( - # raw_source_data=raw_source_data, - # raw_conversion_options=raw_conversion_options, - # processed_source_data=processed_source_data, - # processed_conversion_options=processed_conversion_options, - # session_paths=session_paths, - # probe_num=probe_num, - # stub_test=stub_test, - # ) + # Add V-Probe data + for probe_num in range(2): + _add_v_probe_data( + raw_source_data=raw_source_data, + raw_conversion_options=raw_conversion_options, + processed_source_data=processed_source_data, + processed_conversion_options=processed_conversion_options, + session_paths=session_paths, + probe_num=probe_num, + stub_test=stub_test, + ) # Add SpikeGLX data _add_spikeglx_data( diff --git a/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py b/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py index 459a754..2c9ffc3 100644 --- a/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py +++ b/src/jazayeri_lab_to_nwb/watters/timeseries_interface.py @@ -1,7 +1,7 @@ """Primary classes for timeseries variables. The classes here handle variables like eye position, reward line, and audio -stimuli that are not necessarily tied to the trial structure of display updates +stimuli that are not necessarily tied to the trial structure of display updates. For trial structured variables, see ../trials_interface.py. For variables pertaining to display updates, see ../frames_interface.py. """ From e435ead7890f1da250ab4b8b6d5029988559b10b Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Thu, 4 Jan 2024 16:23:07 -0500 Subject: [PATCH 11/28] Added stimulus set as a field in trials_interface; small changes when reading in probe metadata --- .../piccato/main_convert_session.py | 44 ++++++------------- .../piccato/trials_interface.py | 6 ++- 2 files changed, 19 insertions(+), 31 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 5c94603..644f818 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -30,6 +30,7 @@ from pathlib import Path from typing import Union from uuid import uuid4 +import numpy as np import get_session_paths import nwb_converter @@ -128,44 +129,27 @@ def _update_metadata(metadata, subject, session_id, session_paths): # Add probe locations probe_metadata_file = ( - session_paths.session_data / "probes.metadata.json" + session_paths.session_data / "phys_metadata.json" ) probe_metadata = json.load(open(probe_metadata_file, "r")) for entry in metadata["Ecephys"]["ElectrodeGroup"]: if entry["device"] == "Neuropixel-Imec": - pass - # neuropixel_metadata = [ - # x for x in probe_metadata if x["probe_type"] == "Neuropixel" - # ][0] - # coordinate_system = neuropixel_metadata["coordinate_system"] - # coordinates = neuropixel_metadata["coordinates"] - # depth_from_surface = neuropixel_metadata["depth_from_surface"] - # entry["description"] = ( - # f"{entry['description']}\n" - # f"{coordinate_system}\n" - # f"coordinates = {coordinates}\n" - # f"depth_from_surface = {depth_from_surface}" - # ) - # entry["position"] = [ - # coordinates[0], - # coordinates[1], - # depth_from_surface, - # ] - elif "vprobe" in entry["device"]: - probe_index = int(entry["device"].split("vprobe")[1]) - v_probe_metadata = [ - x for x in probe_metadata if x["probe_type"] == "V-Probe 64" - ][probe_index] - first_channel = v_probe_metadata["coordinates"]["first_channel"] - last_channel = v_probe_metadata["coordinates"]["last_channel"] - coordinate_system = v_probe_metadata["coordinate_system"] + neuropixel_metadata = probe_metadata + coordinate_system = neuropixel_metadata["coordinate_system"] + coordinates = np.round(neuropixel_metadata["coordinates"][:2], + decimals=2) + depth_from_surface = neuropixel_metadata["depth"] entry["description"] = ( f"{entry['description']}\n" f"{coordinate_system}\n" - f"first_channel = {first_channel}\n" - f"last_channel = {last_channel}" + f"coordinates = {coordinates}\n" + f"depth_from_surface = {depth_from_surface}" ) - entry["position"] = first_channel + entry["position"] = [ + coordinates[0], + coordinates[1], + depth_from_surface, + ] # Update default metadata with the editable in the corresponding yaml file editable_metadata_path = Path(__file__).parent / "metadata.yaml" diff --git a/src/jazayeri_lab_to_nwb/piccato/trials_interface.py b/src/jazayeri_lab_to_nwb/piccato/trials_interface.py index c4a90b3..8e49063 100644 --- a/src/jazayeri_lab_to_nwb/piccato/trials_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/trials_interface.py @@ -26,7 +26,8 @@ class TrialsInterface(TimeIntervalsInterface): "closed_loop_response_position": "closed_loop_response_position", "closed_loop_response_time": "closed_loop_response_time", "time_start": "start_time", - # 'trial_type': 'trial_type', + "trial_type": "trial_type", + "stimulus_set": "stimulus_set", "time_phase_fixation": "phase_fixation_time", "time_phase_stimulus": "phase_stimulus_time", "time_phase_delay": "phase_delay_time", @@ -116,6 +117,9 @@ def column_descriptions(self): '"c", ...) of the corresponding object.' ), "trial_type": ("For each trial, whether condition is LTM or STM"), + "stimulus_set": ("For each trial, string identity symbol of the " + "set of objects. String consists of sorted list " + "of object identity symbols"), "stimulus_object_positions": ( "For each trial, a serialized list with one element for each " "object. Each element is the initial (x, y) position of the " From 514751a35e67701bd9d28f4db74d171bf5f0015a Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Sun, 7 Jan 2024 15:13:57 -0500 Subject: [PATCH 12/28] Included dandiset directory when writing to staging, removed automatic upload, edited experimental metadata --- .../piccato/get_session_paths.py | 4 +- .../piccato/main_convert_session.py | 115 +++--------------- .../piccato/main_convert_session.sh | 16 ++- src/jazayeri_lab_to_nwb/piccato/metadata.yaml | 15 ++- 4 files changed, 41 insertions(+), 109 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py index aa5ceff..499793a 100644 --- a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py +++ b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py @@ -8,7 +8,7 @@ } OM_PATH = '/om2/user/apiccato/phys_preprocessing_open_source/phys_data' - +DANDISET_ID = '000767' SessionPaths = collections.namedtuple( "SessionPaths", [ @@ -28,7 +28,7 @@ def _get_session_paths_openmind(subject, session): # Path to write output nwb files to output_path = pathlib.Path( - f"/om2/user/apiccato/nwb_data/staging/sub-{subject}" + f"/om2/user/apiccato/nwb_data/staging/{DANDISET_ID}/sub-{subject}" ) # Path to the raw data. This is used for reading raw physiology data. diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 644f818..2289b90 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -34,17 +34,16 @@ import get_session_paths import nwb_converter -from neuroconv.tools.data_transfers import automatic_dandi_upload + from neuroconv.utils import dict_deep_update, load_dict_from_file # Data repository. Either 'globus' or 'openmind' _REPO = "openmind" # Whether to run all the physiology data or only a stub -_STUB_TEST = True +_STUB_TEST = False # Whether to overwrite output nwb files _OVERWRITE = True -# ID of the dandiset to upload to, or None to not upload -_DANDISET_ID = None # '000767' + # Set logger level for info is displayed in console logging.getLogger().setLevel(logging.INFO) @@ -70,59 +69,11 @@ def _get_single_file(directory, suffix=""): return files[0] -def _add_v_probe_data( - raw_source_data, - raw_conversion_options, - processed_source_data, - processed_conversion_options, - session_paths, - probe_num, - stub_test, -): - """Add V-Probe session data.""" - probe_data_dir = session_paths.raw_data / f"v_probe_{probe_num}" - if not probe_data_dir.exists(): - return - logging.info(f"Adding V-probe {probe_num} session data") - - # Raw data - recording_file = _get_single_file(probe_data_dir, suffix=".dat") - metadata_path = str(session_paths.data_open_source / "probes.metadata.json") - raw_source_data[f"RecordingVP{probe_num}"] = dict( - file_path=recording_file, - probe_metadata_file=metadata_path, - probe_key=f"probe{(probe_num + 1):02d}", - probe_name=f"vprobe{probe_num}", - es_key=f"ElectricalSeriesVP{probe_num}", - ) - raw_conversion_options[f"RecordingVP{probe_num}"] = dict( - stub_test=stub_test - ) - - # Processed data - sorting_path = (session_paths.spike_sorting_raw / - f"v_probe_{probe_num}" / - "ks_3_output_pre_v6_curated") - processed_source_data[f"RecordingVP{probe_num}"] = raw_source_data[ - f"RecordingVP{probe_num}" - ] - processed_source_data[f"SortingVP{probe_num}"] = dict( - folder_path=str(sorting_path), - keep_good_only=False, - ) - processed_conversion_options[f"RecordingVP{probe_num}"] = dict( - stub_test=stub_test, - write_electrical_series=False) - processed_conversion_options[f"SortingVP{probe_num}"] = dict( - stub_test=stub_test, - write_as="processing") - - def _update_metadata(metadata, subject, session_id, session_paths): """Update metadata.""" # Add subject_id, session_id, sex, and age - metadata["NWBFile"]["session_id"] = session_id + metadata["NWBFile"]["session_id"] = str(session_id) metadata["Subject"]["subject_id"] = subject metadata["Subject"]["sex"] = _SUBJECT_TO_SEX[subject] metadata["Subject"]["age"] = _SUBJECT_TO_AGE[subject] @@ -198,12 +149,13 @@ def _add_spikeglx_data( sorting_path = (session_paths.spike_sorting_raw / "spikeglx/kilosort2_5/sorter_output" ) - processed_source_data["SortingNP"] = dict( - folder_path=str(sorting_path), - keep_good_only=False, - ) - processed_conversion_options["SortingNP"] = dict(stub_test=stub_test, - write_as="processing") + if os.path.exists(sorting_path): + processed_source_data["SortingNP"] = dict( + folder_path=str(sorting_path), + keep_good_only=False, + ) + processed_conversion_options["SortingNP"] = dict(stub_test=stub_test, + write_as="processing") def session_to_nwb( @@ -211,7 +163,6 @@ def session_to_nwb( session: str, stub_test: bool = False, overwrite: bool = True, - dandiset_id: Union[str, None] = None ): """ Convert a single session to an NWB file. @@ -230,27 +181,9 @@ def session_to_nwb( If the file exists already, True will delete and replace with a new file, False will append the contents. Default is True. - dandiset_id : string, optional - If you want to upload the file to the DANDI archive, specify the - six-digit ID here. Requires the DANDI_API_KEY environment variable to - be set. To set this in your bash terminal in Linux or macOS, run - export DANDI_API_KEY=... - or in Windows - set DANDI_API_KEY=... - Default is None. """ - if dandiset_id is not None: - import dandi # check importability - - assert os.getenv("DANDI_API_KEY"), ( - "Unable to find environment variable 'DANDI_API_KEY'. " - "Please retrieve your token from DANDI and set this environment " - "variable." - ) - logging.info(f"stub_test = {stub_test}") logging.info(f"overwrite = {overwrite}") - logging.info(f"dandiset_id = {dandiset_id}") # Get paths session_paths = get_session_paths.get_session_paths(subject, @@ -282,18 +215,6 @@ def session_to_nwb( processed_source_data = {} processed_conversion_options = {} - # Add V-Probe data - for probe_num in range(2): - _add_v_probe_data( - raw_source_data=raw_source_data, - raw_conversion_options=raw_conversion_options, - processed_source_data=processed_source_data, - processed_conversion_options=processed_conversion_options, - session_paths=session_paths, - probe_num=probe_num, - stub_test=stub_test, - ) - # Add SpikeGLX data _add_spikeglx_data( raw_source_data=raw_source_data, @@ -340,7 +261,10 @@ def session_to_nwb( # Update metadata metadata = processed_converter.get_metadata() - metadata = _update_metadata(metadata, subject, session_id, session_paths) + metadata = _update_metadata(metadata=metadata, + subject=subject, + session_id=session_id, + session_paths=session_paths) # Run conversion logging.info("Running processed conversion") @@ -360,14 +284,6 @@ def session_to_nwb( overwrite=overwrite, ) - # Upload to DANDI - if dandiset_id is not None: - logging.info(f"Uploading to dandiset id {dandiset_id}") - automatic_dandi_upload( - dandiset_id=dandiset_id, - nwb_folder_path=session_paths.output, - ) - if __name__ == "__main__": """Run session conversion.""" @@ -379,6 +295,5 @@ def session_to_nwb( session=session, stub_test=_STUB_TEST, overwrite=_OVERWRITE, - dandiset_id=_DANDISET_ID, ) logging.info(f"\nFinished conversion for {subject}/{session}\n") diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh index 5a8b5d6..fd8676d 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh @@ -8,7 +8,21 @@ #SBATCH --mail-user=apiccato@mit.edu #SBATCH --partition=jazayeri +SUBJECT=$1 # Argument passed in by user. Should be in subject/date format +echo "SUBJECT: $SUBJECT" +if [ -z "$SUBJECT" ]; then + echo "No session specified, exiting." + exit +fi + +SESSION=$2 # Argument passed in by user. Should be in subject/date format +echo "SESSION: $SESSION" +if [ -z "$SESSION" ]; then + echo "No session specified, exiting." + exit +fi + source ~/.bashrc conda activate jazayeri_lab_to_nwb_env cd /om2/user/apiccato/jazayeri-lab-to-nwb -python src/jazayeri_lab_to_nwb/piccato/main_convert_session.py elgar 2023-11-30 \ No newline at end of file +python src/jazayeri_lab_to_nwb/piccato/main_convert_session.py $SUBJECT $SESSION \ No newline at end of file diff --git a/src/jazayeri_lab_to_nwb/piccato/metadata.yaml b/src/jazayeri_lab_to_nwb/piccato/metadata.yaml index 0ed943b..cc45ead 100644 --- a/src/jazayeri_lab_to_nwb/piccato/metadata.yaml +++ b/src/jazayeri_lab_to_nwb/piccato/metadata.yaml @@ -2,14 +2,17 @@ NWBFile: # related_publications: # no pubs yet # - https://doi.org/12345 session_description: - Data from macaque performing multi-object working memory task. Subject is - presented with multiple objects at different locations on a screen. After a - delay, the subject is then cued with one of the objects, now displayed at - the center of the screen. Subject should respond by saccading to the - location of the cued object at its initial presentation. + Data from macaque performing a working and long-term memory task. A + session switches in a block-like fashion between a long- and a short-term + memory task. In the short-term memory task, the subject is presented with + three objects placed in random locations in the periphery. The subject must + respond by retrieving and saccading to the location of a cued object after + a short delay. In the long-term memory task, the objects are not presented + in their corresponding locations. Instead, the subject must respond + according to a set of learned long-term object-location associations. institution: MIT lab: Jazayeri experimenter: - - Watters, Nicholas + - Piccato, Aida Subject: species: Macaca mulatta From 4114dc7ff53de608ec62b5b0d8ca1c59cfce6257 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Mon, 8 Jan 2024 17:07:55 -0500 Subject: [PATCH 13/28] Updated readme to reflect directory structure; otherwise mostly formatting changes --- src/jazayeri_lab_to_nwb/piccato/README.md | 63 +++++++++---------- .../piccato/display_interface.py | 12 ++-- .../piccato/main_convert_session.py | 7 +-- .../piccato/timeseries_interface.py | 24 ++++--- .../piccato/trials_interface.py | 40 ++++++++---- 5 files changed, 81 insertions(+), 65 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/README.md b/src/jazayeri_lab_to_nwb/piccato/README.md index e718409..753c32b 100644 --- a/src/jazayeri_lab_to_nwb/piccato/README.md +++ b/src/jazayeri_lab_to_nwb/piccato/README.md @@ -1,55 +1,50 @@ -# Watters data conversion pipeline -NWB conversion scripts for Watters data to the [Neurodata Without Borders](https://nwb-overview.readthedocs.io/) data format. +# Piccato data conversion pipeline +NWB conversion scripts for Piccato data to the [Neurodata Without Borders](https://nwb-overview.readthedocs.io/) data format. ## Usage To run a specific conversion, you might need to install first some conversion specific dependencies that are located in each conversion directory: + ``` -pip install -r src/jazayeri_lab_to_nwb/watters/watters_requirements.txt +pip install -r src/jazayeri_lab_to_nwb/piccato/requirements.txt ``` You can run a specific conversion with the following command: ``` -python src/jazayeri_lab_to_nwb/watters/main_convert_session.py $SUBJECT $SESSION +python src/jazayeri_lab_to_nwb/piccato/main_convert_session.py $SUBJECT $SESSION ``` -### Watters working memory task data -The conversion function for this experiment, `session_to_nwb`, is found in `src/watters/main_convert_session.py`. The function takes arguments: -* `subject` subject name, either `'Perle'` or `'Elgar'`. +### Piccato working and long-term memory task data +The conversion function for this experiment, `session_to_nwb`, is found in `src/piccato/main_convert_session.py`. The function takes arguments: +* `subject` subject name (currently only `'elgar'`.) * `session` session date in format `'YYYY-MM-DD'`. * `stub_test` indicates whether only a small portion of the data should be saved (mainly used by us for testing purposes). * `overwrite` indicates whether to overwrite nwb output files. -* `dandiset_id` optional dandiset ID. + The function can be imported in a separate script with and run, or you can run the file directly and specify the arguments in the `if name == "__main__"` block at the bottom. The function expects the raw data in `data_dir_path` to follow this structure: - - data_dir_path/ - ├── data_open_source - │ ├── behavior - │ │ └── eye.h.times.npy, etc. - │ ├── task - │ └── trials.start_times.json, etc. - │ └── probes.metadata.json - ├── raw_data - │ ├── spikeglx - │ └── */*/*.ap.bin, */*/*.lf.bin, etc. - │ ├── v_probe_0 - │ └── raw_data.dat - │ └── v_probe_{n} - │ └── raw_data.dat - ├── spike_sorting_raw - │ ├── np - │ ├── vp_0 - │ └── vp_{n} - ├── sync_pulses - ├── mworks - ├── open_ephys - └── spikeglx - ... - -The conversion will try to automatically fetch metadata from the provided data directory. However, some information, such as the subject's name and age, must be specified by the user in the file `src/jazayeri_lab_to_nwb/watters/metadata.yaml`. If any of the automatically fetched metadata is incorrect, it can also be overriden from this file. +``` + data_dir_path/ + ├── behavior_task + │ ├── eye.h.json, eye.v.json, etc. + │ ├── trials.json + ├── raw_data + │ ├── behavior + │ └── mworks + │ └── moog + │ ├── spikeglx + │ └── */*/*.ap.bin, */*/*.lf.bin, etc. + ├── spike_sorting + │ ├── spikeglx + │ └── kilosort2_5_0 + ├── phys_metadata.json + ├── sync_signals + └── spikeglx + └── transform +``` +The conversion will try to automatically fetch metadata from the provided data directory. However, some information, such as the subject's name and age, must be specified by the user in the file `src/jazayeri_lab_to_nwb/piccato/metadata.yaml`. If any of the automatically fetched metadata is incorrect, it can also be overriden from this file. The converted data will be saved in two files, one called `{session_id}_raw.nwb`, which contains the raw electrophysiology data from the Neuropixels and V-Probes, and one called `{session_id}_processed.nwb` with behavioral data, trial info, and sorted unit spiking. diff --git a/src/jazayeri_lab_to_nwb/piccato/display_interface.py b/src/jazayeri_lab_to_nwb/piccato/display_interface.py index 59b06cf..f9c6743 100644 --- a/src/jazayeri_lab_to_nwb/piccato/display_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/display_interface.py @@ -1,4 +1,5 @@ """Class for converting data about display frames.""" + import itertools import json from pathlib import Path @@ -7,7 +8,7 @@ import numpy as np import pandas as pd from neuroconv.datainterfaces.text.timeintervalsinterface import ( - TimeIntervalsInterface + TimeIntervalsInterface, ) from neuroconv.utils import FolderPathType from pynwb import NWBFile @@ -47,7 +48,6 @@ def set_aligned_starting_time(self, aligned_starting_time: float) -> None: self.dataframe.start_time += aligned_starting_time def _read_file(self, file_path: FolderPathType): - # Create dataframe with data for each frame trials = json.load(open(Path(file_path) / "trials.json", "r")) frames = { @@ -57,8 +57,12 @@ def _read_file(self, file_path: FolderPathType): return pd.DataFrame(frames) - def add_to_nwbfile(self, nwbfile: NWBFile, - metadata: Optional[dict] = None, tag: str = "display"): + def add_to_nwbfile( + self, + nwbfile: NWBFile, + metadata: Optional[dict] = None, + tag: str = "display", + ): return super(DisplayInterface, self).add_to_nwbfile( nwbfile=nwbfile, metadata=metadata, diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 2289b90..ecfac49 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -28,7 +28,6 @@ import os import sys from pathlib import Path -from typing import Union from uuid import uuid4 import numpy as np @@ -261,9 +260,9 @@ def session_to_nwb( # Update metadata metadata = processed_converter.get_metadata() - metadata = _update_metadata(metadata=metadata, - subject=subject, - session_id=session_id, + metadata = _update_metadata(metadata=metadata, + subject=subject, + session_id=session_id, session_paths=session_paths) # Run conversion diff --git a/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py index bf116e3..89d69b7 100644 --- a/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py @@ -12,7 +12,7 @@ from hdmf.backends.hdf5 import H5DataIO from ndx_events import LabeledEvents from neuroconv.basetemporalalignmentinterface import ( - BaseTemporalAlignmentInterface + BaseTemporalAlignmentInterface, ) from neuroconv.tools.nwb_helpers import get_module from neuroconv.utils import FolderPathType @@ -20,7 +20,7 @@ from pynwb.behavior import SpatialSeries -def get_processing_module(nwbfile: NWBFile): +def _get_processing_module(nwbfile: NWBFile): module_description = ( "Contains behavior, audio, and reward data from experiment." ) @@ -29,7 +29,8 @@ def get_processing_module(nwbfile: NWBFile): ) -class TimestampsFromArrayInterface(BaseTemporalAlignmentInterface): +class TimestampsFromArrayInterface( + BaseTemporalAlignmentInterface): """Interface implementing temporal alignment functions with timestamps.""" def __init__(self, folder_path: FolderPathType): @@ -72,12 +73,15 @@ def __init__(self, folder_path: FolderPathType): # Check eye_h and eye_v have the same number of samples if len(eye_h_times) != len(eye_v_times): - raise ValueError(f"len(eye_h_times) = {len(eye_h_times)}, " - "but len(eye_v_times) " f"= {len(eye_v_times)}") + raise ValueError( + f"len(eye_h_times) = {len(eye_h_times)}, but len(eye_v_times) " + f"= {len(eye_v_times)}" + ) # Check that eye_h_times and eye_v_times are similar to within 0.5ms if not np.allclose(eye_h_times, eye_v_times, atol=0.0005): raise ValueError( - "eye_h_times and eye_v_times are not sufficiently similar") + "eye_h_times and eye_v_times are not sufficiently similar" + ) # Set data attributes self.set_original_timestamps(eye_h_times) @@ -98,7 +102,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): ) # Get processing module - processing_module = get_processing_module(nwbfile=nwbfile) + processing_module = _get_processing_module(nwbfile=nwbfile) # Add data to module processing_module.add_data_interface(eye_position) @@ -134,7 +138,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): ) # Get processing module - processing_module = get_processing_module(nwbfile=nwbfile) + processing_module = _get_processing_module(nwbfile=nwbfile) # Add data to module processing_module.add_data_interface(pupil_size) @@ -170,7 +174,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): ) # Get processing module - processing_module = get_processing_module(nwbfile=nwbfile) + processing_module = _get_processing_module(nwbfile=nwbfile) # Add data to module processing_module.add_data_interface(reward_line) @@ -210,7 +214,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): ) # Get processing module - processing_module = get_processing_module(nwbfile=nwbfile) + processing_module = _get_processing_module(nwbfile=nwbfile) # Add data to module processing_module.add_data_interface(audio) diff --git a/src/jazayeri_lab_to_nwb/piccato/trials_interface.py b/src/jazayeri_lab_to_nwb/piccato/trials_interface.py index 8e49063..10e0eaa 100644 --- a/src/jazayeri_lab_to_nwb/piccato/trials_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/trials_interface.py @@ -1,11 +1,14 @@ """Class for converting trial-structured data.""" + import json from pathlib import Path from typing import Optional import numpy as np import pandas as pd -from neuroconv.datainterfaces.text.timeintervalsinterface import TimeIntervalsInterface +from neuroconv.datainterfaces.text.timeintervalsinterface import ( + TimeIntervalsInterface, +) from neuroconv.utils import FolderPathType from pynwb import NWBFile @@ -73,8 +76,10 @@ def set_aligned_starting_time(self, aligned_starting_time: float) -> None: def _read_file(self, file_path: FolderPathType): # Create dataframe with data for each trial trials = json.load(open(Path(file_path) / "trials.json", "r")) - trials = {k_mapped: [d[k] for d in trials] - for k, k_mapped in TrialsInterface.KEY_MAP.items()} + trials = { + k_mapped: [d[k] for d in trials] + for k, k_mapped in TrialsInterface.KEY_MAP.items() + } # Field closed_loop_response_position may have None values, so replace # those with NaN to make hdf5 conversion work @@ -93,10 +98,12 @@ def _read_file(self, file_path: FolderPathType): return pd.DataFrame(trials) - def add_to_nwbfile(self, - nwbfile: NWBFile, - metadata: Optional[dict] = None, - tag: str = "trials"): + def add_to_nwbfile( + self, + nwbfile: NWBFile, + metadata: Optional[dict] = None, + tag: str = "trials", + ): return super(TrialsInterface, self).add_to_nwbfile( nwbfile=nwbfile, metadata=metadata, @@ -107,10 +114,14 @@ def add_to_nwbfile(self, @property def column_descriptions(self): column_descriptions = { - "background_indices": ("For each trial, the indices of the " - "background noise pattern patch."), - "broke_fixation": ("For each trial, whether the subject broke " - "fixation and the trial was aborted"), + "background_indices": ( + "For each trial, the indices of the background noise pattern " + "patch." + ), + "broke_fixation": ( + "For each trial, whether the subject broke fixation and the " + "trial was aborted" + ), "stimulus_object_identities": ( "For each trial, a serialized list with one element for each " 'object. Each element is the identity symbol (e.g. "a", "b", ' @@ -162,13 +173,16 @@ def column_descriptions(self): "Response position for each trial. This differs from " "closed_loop_response_position in that this is calculated " "post-hoc from high-resolution eye tracking data, hence is " - "more accurate." + "more accurate. Note that unlike " + "closed_loop_response_position, this may be inconsistent with " + "reward delivery." ), "response_time": ( "Response time for each trial. This differs from " "closed_loop_response_time in that this is calculated post-hoc " "from high-resolution eye tracking data, hence is more " - "accurate." + "accurate. Note that unlike closed_loop_response_time, this " + "may be inconsistent with reward delivery." ), } From c3cee5bf4797fa23d3f8123d93773078ad774181 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Wed, 10 Jan 2024 14:25:59 -0500 Subject: [PATCH 14/28] Small changes to conversion --- src/jazayeri_lab_to_nwb/piccato/main_convert_session.py | 3 ++- src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index ecfac49..f74498b 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -146,9 +146,10 @@ def _add_spikeglx_data( # Processed data sorting_path = (session_paths.spike_sorting_raw / - "spikeglx/kilosort2_5/sorter_output" + "spikeglx/kilosort2_5_0/sorter_output" ) if os.path.exists(sorting_path): + logging.info("Adding spike sorted data") processed_source_data["SortingNP"] = dict( folder_path=str(sorting_path), keep_good_only=False, diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh index fd8676d..7c1c134 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh @@ -1,13 +1,14 @@ #!/bin/sh #SBATCH -o /om2/user/apiccato/jazayeri-lab-to-nwb/src/jazayeri_lab_to_nwb/piccato/logs/%A.out -#SBATCH -t 06:00:00 +#SBATCH -t 99:00:00 #SBATCH -n 1 #SBATCH --mem-per-cpu 30G #SBATCH --mail-type=NONE #SBATCH --mail-user=apiccato@mit.edu #SBATCH --partition=jazayeri + SUBJECT=$1 # Argument passed in by user. Should be in subject/date format echo "SUBJECT: $SUBJECT" if [ -z "$SUBJECT" ]; then From 6fa2466310a56e189497682f72b955fc8441a269 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Wed, 10 Jan 2024 14:39:43 -0500 Subject: [PATCH 15/28] Tiny formatting changes to prepare for merge --- .gitignore | 3 +- src/jazayeri_lab_to_nwb/piccato/README.md | 5 +-- .../piccato/get_session_paths.py | 40 ------------------- .../piccato/main_convert_session.py | 1 - .../piccato/main_convert_session.sh | 3 ++ .../piccato/nwb_converter.py | 4 +- .../piccato/timeseries_interface.py | 3 +- .../piccato/trials_interface.py | 8 ++-- 8 files changed, 13 insertions(+), 54 deletions(-) diff --git a/.gitignore b/.gitignore index d3f9d3f..75a6b1c 100644 --- a/.gitignore +++ b/.gitignore @@ -148,5 +148,4 @@ dmypy.json # NWB files **.nwb -*.out -**logs/* \ No newline at end of file +*.out \ No newline at end of file diff --git a/src/jazayeri_lab_to_nwb/piccato/README.md b/src/jazayeri_lab_to_nwb/piccato/README.md index 753c32b..fc5a75d 100644 --- a/src/jazayeri_lab_to_nwb/piccato/README.md +++ b/src/jazayeri_lab_to_nwb/piccato/README.md @@ -21,7 +21,6 @@ The conversion function for this experiment, `session_to_nwb`, is found in `src/ * `stub_test` indicates whether only a small portion of the data should be saved (mainly used by us for testing purposes). * `overwrite` indicates whether to overwrite nwb output files. - The function can be imported in a separate script with and run, or you can run the file directly and specify the arguments in the `if name == "__main__"` block at the bottom. The function expects the raw data in `data_dir_path` to follow this structure: @@ -46,6 +45,6 @@ The function expects the raw data in `data_dir_path` to follow this structure: ``` The conversion will try to automatically fetch metadata from the provided data directory. However, some information, such as the subject's name and age, must be specified by the user in the file `src/jazayeri_lab_to_nwb/piccato/metadata.yaml`. If any of the automatically fetched metadata is incorrect, it can also be overriden from this file. -The converted data will be saved in two files, one called `{session_id}_raw.nwb`, which contains the raw electrophysiology data from the Neuropixels and V-Probes, and one called `{session_id}_processed.nwb` with behavioral data, trial info, and sorted unit spiking. +The converted data will be saved in two files, one called `{session_id}_ecephys.nwb`, which contains the raw electrophysiology data from the Neuropixels and V-Probes, and one called `{session_id}_behavior+ecephys.nwb` with behavioral data, trial info, and sorted unit spiking. -If you run into memory issues when writing the `{session_id}_raw.nwb` files, you may want to set `buffer_gb` to a value smaller than 1 (its default) in the `conversion_options` dicts for the recording interfaces, i.e. [here](https://github.com/catalystneuro/jazayeri-lab-to-nwb/blob/vprobe_dev/src/jazayeri_lab_to_nwb/watters/main_convert_session.py#L189). +If you run into memory issues when writing the `{session_id}_ecephys.nwb` files, you may want to set `buffer_gb` to a value smaller than 1 (its default) in the `conversion_options` dicts for the recording interfaces, i.e. [here](https://github.com/catalystneuro/jazayeri-lab-to-nwb/blob/vprobe_dev/src/jazayeri_lab_to_nwb/watters/main_convert_session.py#L189). diff --git a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py index 499793a..ac9867b 100644 --- a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py +++ b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py @@ -65,44 +65,6 @@ def _get_session_paths_openmind(subject, session): return session_paths -# TODO: Update Globus paths when these are available - -def _get_session_paths_globus(subject, session): - """Get paths to all components of the data in the globus repo.""" - subject_id = SUBJECT_NAME_TO_ID[subject] - base_data_dir = f"/shared/catalystneuro/JazLab/{subject_id}/{session}/" - - # Path to write output nwb files to - output_path = f"~/conversion_nwb/jazayeri-lab-to-nwb" - - # Path to the raw data. This is used for reading raw physiology data. - raw_data_path = f"{base_data_dir}/raw_data" - - # Path to task and behavior data. - task_behavior_data_path = f"{base_data_dir}/processed_task_data" - - # Path to open-source data. This is used for reading behavior and task data. - data_open_source_path = f"{base_data_dir}/data_open_source" - - # Path to sync pulses. This is used for reading timescale transformations - # between physiology and mworks data streams. - sync_pulses_path = f"{base_data_dir}/sync_pulses" - - # Path to spike sorting. This is used for reading spike sorted data. - spike_sorting_raw_path = f"{base_data_dir}/spike_sorting" - - session_paths = SessionPaths( - output=pathlib.Path(output_path), - raw_data=pathlib.Path(raw_data_path), - data_open_source=pathlib.Path(data_open_source_path), - behavior_data=pathlib.Path(task_behavior_data_path), - task_data=pathlib.Path(task_behavior_data_path), - sync_pulses=pathlib.Path(sync_pulses_path), - spike_sorting_raw=pathlib.Path(spike_sorting_raw_path), - ) - - return session_paths - def get_session_paths(subject, session, repo="openmind"): """Get paths to all components of the data. @@ -112,7 +74,5 @@ def get_session_paths(subject, session, repo="openmind"): """ if repo == "openmind": return _get_session_paths_openmind(subject=subject, session=session) - elif repo == "globus": - return _get_session_paths_globus(subject=subject, session=session) else: raise ValueError(f"Invalid repo {repo}") diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index f74498b..e29287c 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -17,7 +17,6 @@ _REPO _STUB_TEST _OVERWRITE - _DANDISET_ID See comments below for descriptions of these variables. """ diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh index 7c1c134..5a9e0bb 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh @@ -8,6 +8,9 @@ #SBATCH --mail-user=apiccato@mit.edu #SBATCH --partition=jazayeri +# Script to convert a session to NWB format. Takes in two arguments from user: +# name of subject and session date. + SUBJECT=$1 # Argument passed in by user. Should be in subject/date format echo "SUBJECT: $SUBJECT" diff --git a/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py b/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py index 36bf2ea..2ff9826 100644 --- a/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py +++ b/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py @@ -27,8 +27,8 @@ def _trim_excess_spikes( recording: BaseRecording instance. The recording object. sorting: BaseSorting instance. The sorting object. max_excess_samples: Int. If a spike exists more than this number of - samples beyond the end of the recording, an error is raised. This is - in units of samples, which is typically 30000Hz. + samples beyond the end of the recording, an error is raised. This + is in units of samples, which is typically 30000Hz. Returns: bool True if exceeding spikes, False otherwise. diff --git a/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py index 89d69b7..4fb6e0b 100644 --- a/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py @@ -29,8 +29,7 @@ def _get_processing_module(nwbfile: NWBFile): ) -class TimestampsFromArrayInterface( - BaseTemporalAlignmentInterface): +class TimestampsFromArrayInterface(BaseTemporalAlignmentInterface): """Interface implementing temporal alignment functions with timestamps.""" def __init__(self, folder_path: FolderPathType): diff --git a/src/jazayeri_lab_to_nwb/piccato/trials_interface.py b/src/jazayeri_lab_to_nwb/piccato/trials_interface.py index 10e0eaa..69c12ac 100644 --- a/src/jazayeri_lab_to_nwb/piccato/trials_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/trials_interface.py @@ -179,10 +179,10 @@ def column_descriptions(self): ), "response_time": ( "Response time for each trial. This differs from " - "closed_loop_response_time in that this is calculated post-hoc " - "from high-resolution eye tracking data, hence is more " - "accurate. Note that unlike closed_loop_response_time, this " - "may be inconsistent with reward delivery." + "closed_loop_response_time in that this is calculated " + "post-hoc from high-resolution eye tracking data, hence is " + "more accurate. Note that unlike closed_loop_response_time, " + "this may be inconsistent with reward delivery." ), } From e08dbb371ca74baf7d6ebdf87e5fcb3ec8afc849 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Jan 2024 19:49:25 +0000 Subject: [PATCH 16/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .gitignore | 2 +- .../piccato/display_interface.py | 3 +- .../piccato/get_session_paths.py | 8 +-- .../piccato/main_convert_session.py | 65 ++++++++++--------- .../piccato/main_convert_session.sh | 4 +- src/jazayeri_lab_to_nwb/piccato/metadata.yaml | 14 ++-- .../piccato/nwb_converter.py | 2 +- .../piccato/recording_interface.py | 9 ++- .../piccato/timeseries_interface.py | 3 +- .../piccato/trials_interface.py | 8 ++- 10 files changed, 65 insertions(+), 53 deletions(-) diff --git a/.gitignore b/.gitignore index 75a6b1c..b8d749b 100644 --- a/.gitignore +++ b/.gitignore @@ -148,4 +148,4 @@ dmypy.json # NWB files **.nwb -*.out \ No newline at end of file +*.out diff --git a/src/jazayeri_lab_to_nwb/piccato/display_interface.py b/src/jazayeri_lab_to_nwb/piccato/display_interface.py index f9c6743..60cd2f6 100644 --- a/src/jazayeri_lab_to_nwb/piccato/display_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/display_interface.py @@ -42,7 +42,8 @@ def get_metadata(self) -> dict: def get_timestamps(self) -> np.ndarray: return super(DisplayInterface, self).get_timestamps( - column="start_time") + column="start_time" + ) def set_aligned_starting_time(self, aligned_starting_time: float) -> None: self.dataframe.start_time += aligned_starting_time diff --git a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py index ac9867b..158a19f 100644 --- a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py +++ b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py @@ -7,8 +7,8 @@ "elgar": "elgar", } -OM_PATH = '/om2/user/apiccato/phys_preprocessing_open_source/phys_data' -DANDISET_ID = '000767' +OM_PATH = "/om2/user/apiccato/phys_preprocessing_open_source/phys_data" +DANDISET_ID = "000767" SessionPaths = collections.namedtuple( "SessionPaths", [ @@ -50,9 +50,7 @@ def _get_session_paths_openmind(subject, session): f"{OM_PATH}/{subject}/{session}/spike_sorting" ) - session_path = pathlib.Path( - f"{OM_PATH}/{subject}/{session}/" - ) + session_path = pathlib.Path(f"{OM_PATH}/{subject}/{session}/") session_paths = SessionPaths( output=output_path, diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index e29287c..979d82f 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -28,11 +28,10 @@ import sys from pathlib import Path from uuid import uuid4 -import numpy as np import get_session_paths +import numpy as np import nwb_converter - from neuroconv.utils import dict_deep_update, load_dict_from_file # Data repository. Either 'globus' or 'openmind' @@ -77,16 +76,15 @@ def _update_metadata(metadata, subject, session_id, session_paths): metadata["Subject"]["age"] = _SUBJECT_TO_AGE[subject] # Add probe locations - probe_metadata_file = ( - session_paths.session_data / "phys_metadata.json" - ) + probe_metadata_file = session_paths.session_data / "phys_metadata.json" probe_metadata = json.load(open(probe_metadata_file, "r")) for entry in metadata["Ecephys"]["ElectrodeGroup"]: if entry["device"] == "Neuropixel-Imec": neuropixel_metadata = probe_metadata coordinate_system = neuropixel_metadata["coordinate_system"] - coordinates = np.round(neuropixel_metadata["coordinates"][:2], - decimals=2) + coordinates = np.round( + neuropixel_metadata["coordinates"][:2], decimals=2 + ) depth_from_surface = neuropixel_metadata["depth"] entry["description"] = ( f"{entry['description']}\n" @@ -127,9 +125,9 @@ def _add_spikeglx_data( logging.info("Adding SpikeGLX data") # Raw data - spikeglx_dir = Path(_get_single_file( - session_paths.raw_data/"spikeglx", - suffix='imec0')) + spikeglx_dir = Path( + _get_single_file(session_paths.raw_data / "spikeglx", suffix="imec0") + ) ap_file = _get_single_file(spikeglx_dir, suffix="*.ap.bin") lfp_file = _get_single_file(spikeglx_dir, suffix="*.lf.bin") raw_source_data["RecordingNP"] = dict(file_path=ap_file) @@ -139,22 +137,26 @@ def _add_spikeglx_data( raw_conversion_options["RecordingNP"] = dict(stub_test=stub_test) raw_conversion_options["LF"] = dict(stub_test=stub_test) processed_conversion_options["RecordingNP"] = dict( - stub_test=stub_test, write_electrical_series=False) + stub_test=stub_test, write_electrical_series=False + ) processed_conversion_options["LF"] = dict( - stub_test=stub_test, write_electrical_series=False) + stub_test=stub_test, write_electrical_series=False + ) # Processed data - sorting_path = (session_paths.spike_sorting_raw / - "spikeglx/kilosort2_5_0/sorter_output" - ) + sorting_path = ( + session_paths.spike_sorting_raw + / "spikeglx/kilosort2_5_0/sorter_output" + ) if os.path.exists(sorting_path): logging.info("Adding spike sorted data") processed_source_data["SortingNP"] = dict( folder_path=str(sorting_path), keep_good_only=False, ) - processed_conversion_options["SortingNP"] = dict(stub_test=stub_test, - write_as="processing") + processed_conversion_options["SortingNP"] = dict( + stub_test=stub_test, write_as="processing" + ) def session_to_nwb( @@ -185,9 +187,9 @@ def session_to_nwb( logging.info(f"overwrite = {overwrite}") # Get paths - session_paths = get_session_paths.get_session_paths(subject, - session, - repo=_REPO) + session_paths = get_session_paths.get_session_paths( + subject, session, repo=_REPO + ) logging.info(f"session_paths: {session_paths}") # Get paths for nwb files to write @@ -197,12 +199,11 @@ def session_to_nwb( else: session_id = f"{session}" raw_nwb_path = ( - session_paths.output / - f"sub-{subject}_ses-{session_id}_ecephys.nwb" + session_paths.output / f"sub-{subject}_ses-{session_id}_ecephys.nwb" ) processed_nwb_path = ( - session_paths.output / - f"sub-{subject}_ses-{session_id}_behavior+ecephys.nwb" + session_paths.output + / f"sub-{subject}_ses-{session_id}_behavior+ecephys.nwb" ) logging.info(f"raw_nwb_path = {raw_nwb_path}") logging.info(f"processed_nwb_path = {processed_nwb_path}") @@ -239,13 +240,15 @@ def session_to_nwb( # Add trials data logging.info("Adding trials data") processed_source_data["Trials"] = dict( - folder_path=str(session_paths.behavior_task_data)) + folder_path=str(session_paths.behavior_task_data) + ) processed_conversion_options["Trials"] = dict() # Add display data logging.info("Adding display data") processed_source_data["Display"] = dict( - folder_path=str(session_paths.behavior_task_data)) + folder_path=str(session_paths.behavior_task_data) + ) processed_conversion_options["Display"] = dict() # Create data converters @@ -260,10 +263,12 @@ def session_to_nwb( # Update metadata metadata = processed_converter.get_metadata() - metadata = _update_metadata(metadata=metadata, - subject=subject, - session_id=session_id, - session_paths=session_paths) + metadata = _update_metadata( + metadata=metadata, + subject=subject, + session_id=session_id, + session_paths=session_paths, + ) # Run conversion logging.info("Running processed conversion") diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh index 5a9e0bb..3495f0a 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh @@ -9,7 +9,7 @@ #SBATCH --partition=jazayeri # Script to convert a session to NWB format. Takes in two arguments from user: -# name of subject and session date. +# name of subject and session date. SUBJECT=$1 # Argument passed in by user. Should be in subject/date format @@ -29,4 +29,4 @@ fi source ~/.bashrc conda activate jazayeri_lab_to_nwb_env cd /om2/user/apiccato/jazayeri-lab-to-nwb -python src/jazayeri_lab_to_nwb/piccato/main_convert_session.py $SUBJECT $SESSION \ No newline at end of file +python src/jazayeri_lab_to_nwb/piccato/main_convert_session.py $SUBJECT $SESSION diff --git a/src/jazayeri_lab_to_nwb/piccato/metadata.yaml b/src/jazayeri_lab_to_nwb/piccato/metadata.yaml index cc45ead..ba80cba 100644 --- a/src/jazayeri_lab_to_nwb/piccato/metadata.yaml +++ b/src/jazayeri_lab_to_nwb/piccato/metadata.yaml @@ -3,13 +3,13 @@ NWBFile: # - https://doi.org/12345 session_description: Data from macaque performing a working and long-term memory task. A - session switches in a block-like fashion between a long- and a short-term - memory task. In the short-term memory task, the subject is presented with - three objects placed in random locations in the periphery. The subject must - respond by retrieving and saccading to the location of a cued object after - a short delay. In the long-term memory task, the objects are not presented - in their corresponding locations. Instead, the subject must respond - according to a set of learned long-term object-location associations. + session switches in a block-like fashion between a long- and a short-term + memory task. In the short-term memory task, the subject is presented with + three objects placed in random locations in the periphery. The subject must + respond by retrieving and saccading to the location of a cued object after + a short delay. In the long-term memory task, the objects are not presented + in their corresponding locations. Instead, the subject must respond + according to a set of learned long-term object-location associations. institution: MIT lab: Jazayeri experimenter: diff --git a/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py b/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py index 2ff9826..56b61ea 100644 --- a/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py +++ b/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py @@ -27,7 +27,7 @@ def _trim_excess_spikes( recording: BaseRecording instance. The recording object. sorting: BaseSorting instance. The sorting object. max_excess_samples: Int. If a spike exists more than this number of - samples beyond the end of the recording, an error is raised. This + samples beyond the end of the recording, an error is raised. This is in units of samples, which is typically 30000Hz. Returns: diff --git a/src/jazayeri_lab_to_nwb/piccato/recording_interface.py b/src/jazayeri_lab_to_nwb/piccato/recording_interface.py index 216bfaf..f67dd44 100644 --- a/src/jazayeri_lab_to_nwb/piccato/recording_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/recording_interface.py @@ -54,7 +54,10 @@ def __init__( if entry["label"] == probe_key: probe_metadata = entry - if probe_metadata is not None and "electrodes_locations" in probe_metadata: + if ( + probe_metadata is not None + and "electrodes_locations" in probe_metadata + ): # Grab electrode position from metadata locations_array = np.array(probe_metadata["electrodes_locations"]) ndim = locations_array.shape[1] @@ -62,7 +65,9 @@ def __init__( probeinterface.set_contacts(locations_array) else: # Generate V-probe geometry: 64 channels arranged vertically with 50 um spacing - probe = probeinterface.generate_linear_probe(num_elec=channel_count, ypitch=50) + probe = probeinterface.generate_linear_probe( + num_elec=channel_count, ypitch=50 + ) probe.set_device_channel_indices(np.arange(channel_count)) probe.name = probe_name diff --git a/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py index 4fb6e0b..79e366c 100644 --- a/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py @@ -166,7 +166,8 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): reward_line = LabeledEvents( name="reward_line", description=( - "Reward line data representing events of reward dispenser"), + "Reward line data representing events of reward dispenser" + ), timestamps=H5DataIO(self._timestamps, compression="gzip"), data=self._reward_line, labels=["closed", "open"], diff --git a/src/jazayeri_lab_to_nwb/piccato/trials_interface.py b/src/jazayeri_lab_to_nwb/piccato/trials_interface.py index 69c12ac..977f06d 100644 --- a/src/jazayeri_lab_to_nwb/piccato/trials_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/trials_interface.py @@ -128,9 +128,11 @@ def column_descriptions(self): '"c", ...) of the corresponding object.' ), "trial_type": ("For each trial, whether condition is LTM or STM"), - "stimulus_set": ("For each trial, string identity symbol of the " - "set of objects. String consists of sorted list " - "of object identity symbols"), + "stimulus_set": ( + "For each trial, string identity symbol of the " + "set of objects. String consists of sorted list " + "of object identity symbols" + ), "stimulus_object_positions": ( "For each trial, a serialized list with one element for each " "object. Each element is the initial (x, y) position of the " From d3cfadc172bbb3a9d97f473d1aa17ea8d07930cd Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Tue, 16 Jan 2024 11:56:57 -0500 Subject: [PATCH 17/28] made session_id a string --- src/jazayeri_lab_to_nwb/piccato/main_convert_session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 979d82f..a01e37b 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -197,7 +197,7 @@ def session_to_nwb( if stub_test: session_id = f"{session}-stub" else: - session_id = f"{session}" + session_id = f"{session}-full" raw_nwb_path = ( session_paths.output / f"sub-{subject}_ses-{session_id}_ecephys.nwb" ) From dfd9da690a213d66d405e6ef810b332d4a1066ff Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Thu, 18 Jan 2024 16:23:55 -0500 Subject: [PATCH 18/28] Added option to exclude non-trial-structured data from file (to see if that will load more quickly on Neurosift) --- .../piccato/main_convert_session.py | 56 ++++++++++++------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index a01e37b..07f1b6f 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -40,7 +40,8 @@ _STUB_TEST = False # Whether to overwrite output nwb files _OVERWRITE = True - +# Whether to only inclue trial-structured data in the processed NWB file +_TRIALS_ONLY = True # Set logger level for info is displayed in console logging.getLogger().setLevel(logging.INFO) @@ -194,17 +195,24 @@ def session_to_nwb( # Get paths for nwb files to write session_paths.output.mkdir(parents=True, exist_ok=True) + session_id = str(session) if stub_test: - session_id = f"{session}-stub" + session_id = f"{session_id}-stub" else: session_id = f"{session}-full" raw_nwb_path = ( session_paths.output / f"sub-{subject}_ses-{session_id}_ecephys.nwb" ) - processed_nwb_path = ( - session_paths.output - / f"sub-{subject}_ses-{session_id}_behavior+ecephys.nwb" - ) + if not _TRIALS_ONLY: + processed_nwb_path = ( + session_paths.output + / f"sub-{subject}_ses-{session_id}_trials+behavior+ecephys.nwb" + ) + else: + processed_nwb_path = ( + session_paths.output + / f"sub-{subject}_ses-{session_id}_trials+ecephys.nwb" + ) logging.info(f"raw_nwb_path = {raw_nwb_path}") logging.info(f"processed_nwb_path = {processed_nwb_path}") logging.info("") @@ -226,16 +234,21 @@ def session_to_nwb( ) # Add behavior data - logging.info("Adding behavior data") - behavior_task_path = str(session_paths.behavior_task_data) - processed_source_data["EyePosition"] = dict(folder_path=behavior_task_path) - processed_conversion_options["EyePosition"] = dict() - processed_source_data["PupilSize"] = dict(folder_path=behavior_task_path) - processed_conversion_options["PupilSize"] = dict() - processed_source_data["RewardLine"] = dict(folder_path=behavior_task_path) - processed_conversion_options["RewardLine"] = dict() - processed_source_data["Audio"] = dict(folder_path=behavior_task_path) - processed_conversion_options["Audio"] = dict() + if not _TRIALS_ONLY: + logging.info("Adding behavior data") + behavior_task_path = str(session_paths.behavior_task_data) + processed_source_data["EyePosition"] = dict( + folder_path=behavior_task_path) + processed_conversion_options["EyePosition"] = dict() + processed_source_data["PupilSize"] = dict( + folder_path=behavior_task_path) + processed_conversion_options["PupilSize"] = dict() + processed_source_data["RewardLine"] = dict( + folder_path=behavior_task_path) + processed_conversion_options["RewardLine"] = dict() + processed_source_data["Audio"] = dict( + folder_path=behavior_task_path) + processed_conversion_options["Audio"] = dict() # Add trials data logging.info("Adding trials data") @@ -245,11 +258,12 @@ def session_to_nwb( processed_conversion_options["Trials"] = dict() # Add display data - logging.info("Adding display data") - processed_source_data["Display"] = dict( - folder_path=str(session_paths.behavior_task_data) - ) - processed_conversion_options["Display"] = dict() + if not _TRIALS_ONLY: + logging.info("Adding display data") + processed_source_data["Display"] = dict( + folder_path=str(session_paths.behavior_task_data) + ) + processed_conversion_options["Display"] = dict() # Create data converters processed_converter = nwb_converter.NWBConverter( From 1daa70309bcd681c991b3510fe947670f237e27f Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Mon, 22 Jan 2024 15:51:15 -0500 Subject: [PATCH 19/28] Final small refactoring changes --- .../piccato/main_convert_session.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 07f1b6f..aec0c76 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -40,8 +40,6 @@ _STUB_TEST = False # Whether to overwrite output nwb files _OVERWRITE = True -# Whether to only inclue trial-structured data in the processed NWB file -_TRIALS_ONLY = True # Set logger level for info is displayed in console logging.getLogger().setLevel(logging.INFO) @@ -203,16 +201,10 @@ def session_to_nwb( raw_nwb_path = ( session_paths.output / f"sub-{subject}_ses-{session_id}_ecephys.nwb" ) - if not _TRIALS_ONLY: - processed_nwb_path = ( - session_paths.output - / f"sub-{subject}_ses-{session_id}_trials+behavior+ecephys.nwb" - ) - else: - processed_nwb_path = ( - session_paths.output - / f"sub-{subject}_ses-{session_id}_trials+ecephys.nwb" - ) + processed_nwb_path = ( + session_paths.output + / f"sub-{subject}_ses-{session_id}_ecephys.nwb" + ) logging.info(f"raw_nwb_path = {raw_nwb_path}") logging.info(f"processed_nwb_path = {processed_nwb_path}") logging.info("") From bf8c06104b707ff8ae256e3adbb5ca97e32e9456 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jan 2024 20:51:48 +0000 Subject: [PATCH 20/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../piccato/main_convert_session.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index aec0c76..bd7d72b 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -202,8 +202,7 @@ def session_to_nwb( session_paths.output / f"sub-{subject}_ses-{session_id}_ecephys.nwb" ) processed_nwb_path = ( - session_paths.output - / f"sub-{subject}_ses-{session_id}_ecephys.nwb" + session_paths.output / f"sub-{subject}_ses-{session_id}_ecephys.nwb" ) logging.info(f"raw_nwb_path = {raw_nwb_path}") logging.info(f"processed_nwb_path = {processed_nwb_path}") @@ -230,16 +229,18 @@ def session_to_nwb( logging.info("Adding behavior data") behavior_task_path = str(session_paths.behavior_task_data) processed_source_data["EyePosition"] = dict( - folder_path=behavior_task_path) + folder_path=behavior_task_path + ) processed_conversion_options["EyePosition"] = dict() processed_source_data["PupilSize"] = dict( - folder_path=behavior_task_path) + folder_path=behavior_task_path + ) processed_conversion_options["PupilSize"] = dict() processed_source_data["RewardLine"] = dict( - folder_path=behavior_task_path) + folder_path=behavior_task_path + ) processed_conversion_options["RewardLine"] = dict() - processed_source_data["Audio"] = dict( - folder_path=behavior_task_path) + processed_source_data["Audio"] = dict(folder_path=behavior_task_path) processed_conversion_options["Audio"] = dict() # Add trials data From 05974c52c43da9f6613e0996817969d311ceab06 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Mon, 22 Jan 2024 16:03:46 -0500 Subject: [PATCH 21/28] Updated neuroconv requirement --- .../piccato/main_convert_session.py | 40 +++++++++---------- .../piccato/requirements.txt | 1 + 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index aec0c76..39d706f 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -226,21 +226,20 @@ def session_to_nwb( ) # Add behavior data - if not _TRIALS_ONLY: - logging.info("Adding behavior data") - behavior_task_path = str(session_paths.behavior_task_data) - processed_source_data["EyePosition"] = dict( - folder_path=behavior_task_path) - processed_conversion_options["EyePosition"] = dict() - processed_source_data["PupilSize"] = dict( - folder_path=behavior_task_path) - processed_conversion_options["PupilSize"] = dict() - processed_source_data["RewardLine"] = dict( - folder_path=behavior_task_path) - processed_conversion_options["RewardLine"] = dict() - processed_source_data["Audio"] = dict( - folder_path=behavior_task_path) - processed_conversion_options["Audio"] = dict() + logging.info("Adding behavior data") + behavior_task_path = str(session_paths.behavior_task_data) + processed_source_data["EyePosition"] = dict( + folder_path=behavior_task_path) + processed_conversion_options["EyePosition"] = dict() + processed_source_data["PupilSize"] = dict( + folder_path=behavior_task_path) + processed_conversion_options["PupilSize"] = dict() + processed_source_data["RewardLine"] = dict( + folder_path=behavior_task_path) + processed_conversion_options["RewardLine"] = dict() + processed_source_data["Audio"] = dict( + folder_path=behavior_task_path) + processed_conversion_options["Audio"] = dict() # Add trials data logging.info("Adding trials data") @@ -250,12 +249,11 @@ def session_to_nwb( processed_conversion_options["Trials"] = dict() # Add display data - if not _TRIALS_ONLY: - logging.info("Adding display data") - processed_source_data["Display"] = dict( - folder_path=str(session_paths.behavior_task_data) - ) - processed_conversion_options["Display"] = dict() + logging.info("Adding display data") + processed_source_data["Display"] = dict( + folder_path=str(session_paths.behavior_task_data) + ) + processed_conversion_options["Display"] = dict() # Create data converters processed_converter = nwb_converter.NWBConverter( diff --git a/src/jazayeri_lab_to_nwb/piccato/requirements.txt b/src/jazayeri_lab_to_nwb/piccato/requirements.txt index e69de29..0c14ef5 100644 --- a/src/jazayeri_lab_to_nwb/piccato/requirements.txt +++ b/src/jazayeri_lab_to_nwb/piccato/requirements.txt @@ -0,0 +1 @@ +neuroconv==0.4.7 \ No newline at end of file From a422e07cc32899815024bd940d4a08875acadf59 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Tue, 6 Feb 2024 16:20:11 -0500 Subject: [PATCH 22/28] Merge branch 'piccato' of github.com:catalystneuro/jazayeri-lab-to-nwb into piccato --- src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh index 3495f0a..55a576d 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh @@ -4,7 +4,7 @@ #SBATCH -t 99:00:00 #SBATCH -n 1 #SBATCH --mem-per-cpu 30G -#SBATCH --mail-type=NONE +#SBATCH --mail-type=END #SBATCH --mail-user=apiccato@mit.edu #SBATCH --partition=jazayeri From f336f1ff17bc8d8fe4f9b86eae46b0516b8501c5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 6 Feb 2024 21:20:38 +0000 Subject: [PATCH 23/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../piccato/main_convert_session.py | 16 ++++++---------- .../piccato/recording_interface.py | 1 + src/jazayeri_lab_to_nwb/piccato/requirements.txt | 2 +- .../piccato/timeseries_interface.py | 1 + 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index cb99e5b..7243d2f 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -20,7 +20,6 @@ See comments below for descriptions of these variables. """ - import glob import json import logging @@ -202,7 +201,8 @@ def session_to_nwb( session_paths.output / f"sub-{subject}_ses-{session_id}_ecephys.nwb" ) processed_nwb_path = ( - session_paths.output / f"sub-{subject}_ses-{session_id}_behavior+ecephys.nwb" + session_paths.output + / f"sub-{subject}_ses-{session_id}_behavior+ecephys.nwb" ) logging.info(f"raw_nwb_path = {raw_nwb_path}") logging.info(f"processed_nwb_path = {processed_nwb_path}") @@ -227,17 +227,13 @@ def session_to_nwb( # Add behavior data logging.info("Adding behavior data") behavior_task_path = str(session_paths.behavior_task_data) - processed_source_data["EyePosition"] = dict( - folder_path=behavior_task_path) + processed_source_data["EyePosition"] = dict(folder_path=behavior_task_path) processed_conversion_options["EyePosition"] = dict() - processed_source_data["PupilSize"] = dict( - folder_path=behavior_task_path) + processed_source_data["PupilSize"] = dict(folder_path=behavior_task_path) processed_conversion_options["PupilSize"] = dict() - processed_source_data["RewardLine"] = dict( - folder_path=behavior_task_path) + processed_source_data["RewardLine"] = dict(folder_path=behavior_task_path) processed_conversion_options["RewardLine"] = dict() - processed_source_data["Audio"] = dict( - folder_path=behavior_task_path) + processed_source_data["Audio"] = dict(folder_path=behavior_task_path) processed_conversion_options["Audio"] = dict() # Add trials data diff --git a/src/jazayeri_lab_to_nwb/piccato/recording_interface.py b/src/jazayeri_lab_to_nwb/piccato/recording_interface.py index f67dd44..e31b3f9 100644 --- a/src/jazayeri_lab_to_nwb/piccato/recording_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/recording_interface.py @@ -1,4 +1,5 @@ """Primary class for recording data.""" + import json from typing import Optional diff --git a/src/jazayeri_lab_to_nwb/piccato/requirements.txt b/src/jazayeri_lab_to_nwb/piccato/requirements.txt index 0c14ef5..eb36ba3 100644 --- a/src/jazayeri_lab_to_nwb/piccato/requirements.txt +++ b/src/jazayeri_lab_to_nwb/piccato/requirements.txt @@ -1 +1 @@ -neuroconv==0.4.7 \ No newline at end of file +neuroconv==0.4.7 diff --git a/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py index 79e366c..a2bb8e1 100644 --- a/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/timeseries_interface.py @@ -5,6 +5,7 @@ updates. For trial structured variables, see ../trials_interface.py. For variables pertaining to display updates, see ../frames_interface.py. """ + import json from pathlib import Path From 502e988b82d2941f70585a7bb1991e9239489124 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Mon, 26 Feb 2024 09:44:08 -0500 Subject: [PATCH 24/28] split up conversion --- .../piccato/main_convert_session.py | 259 +++++++++++------- .../piccato/main_convert_session.sh | 18 +- 2 files changed, 171 insertions(+), 106 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 7243d2f..2996d05 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -31,7 +31,10 @@ import get_session_paths import numpy as np import nwb_converter +from neuroconv.tools.spikeinterface import write_sorting from neuroconv.utils import dict_deep_update, load_dict_from_file +from spikeinterface.extractors import read_kilosort, read_spikeglx +from spikeinterface import curation # Data repository. Either 'globus' or 'openmind' _REPO = "openmind" @@ -111,11 +114,34 @@ def _update_metadata(metadata, subject, session_id, session_paths): return metadata +# def _add_curated_sorting_data( +# nwbfile_path: str, +# session_paths: get_session_paths.SessionPaths, +# ): +# """Adds curated sorting data to the processed NWB file.""" +# sorting = read_kilosort( +# folder_path=(session_paths.spike_sorting_raw / +# "spikeglx/kilosort2_5_0/sorter_output") +# ) +# spikeglx_dir = Path( +# _get_single_file(session_paths.raw_data / "spikeglx", suffix="imec0") +# ) +# sorting = curation.remove_excess_spikes(sorting=sorting) +# keep_unit_ids = json.load(open( +# session_paths.spike_sorting_raw / "keep_unit_ids.json")) +# write_sorting( +# nwbfile_path=nwbfile_path, +# sorting=sorting, +# units_ids=keep_unit_ids, +# overwrite=False, +# write_as='units', +# ) + + def _add_spikeglx_data( - raw_source_data, - raw_conversion_options, - processed_source_data, - processed_conversion_options, + source_data, + conversion_options, + conversion_type, session_paths, stub_test, ): @@ -128,38 +154,43 @@ def _add_spikeglx_data( ) ap_file = _get_single_file(spikeglx_dir, suffix="*.ap.bin") lfp_file = _get_single_file(spikeglx_dir, suffix="*.lf.bin") - raw_source_data["RecordingNP"] = dict(file_path=ap_file) - raw_source_data["LF"] = dict(file_path=lfp_file) - processed_source_data["RecordingNP"] = dict(file_path=ap_file) - processed_source_data["LF"] = dict(file_path=lfp_file) - raw_conversion_options["RecordingNP"] = dict(stub_test=stub_test) - raw_conversion_options["LF"] = dict(stub_test=stub_test) - processed_conversion_options["RecordingNP"] = dict( - stub_test=stub_test, write_electrical_series=False - ) - processed_conversion_options["LF"] = dict( - stub_test=stub_test, write_electrical_series=False - ) - - # Processed data - sorting_path = ( - session_paths.spike_sorting_raw - / "spikeglx/kilosort2_5_0/sorter_output" - ) - if os.path.exists(sorting_path): - logging.info("Adding spike sorted data") - processed_source_data["SortingNP"] = dict( - folder_path=str(sorting_path), - keep_good_only=False, + if conversion_type == "raw": + source_data["RecordingNP"] = dict(file_path=ap_file) + source_data["LF"] = dict(file_path=lfp_file) + conversion_options["RecordingNP"] = dict(stub_test=stub_test) + conversion_options["LF"] = dict(stub_test=stub_test) + + elif conversion_type == 'processed': + source_data["RecordingNP"] = dict(file_path=ap_file) + source_data["LF"] = dict(file_path=lfp_file) + + conversion_options["RecordingNP"] = dict( + stub_test=stub_test, write_electrical_series=False ) - processed_conversion_options["SortingNP"] = dict( - stub_test=stub_test, write_as="processing" + conversion_options["LF"] = dict( + stub_test=stub_test, write_electrical_series=False ) + # Processed data + sorting_path = ( + session_paths.spike_sorting_raw + / "spikeglx/kilosort2_5_0/sorter_output" + ) + if os.path.exists(sorting_path): + logging.info("Adding spike sorted data") + source_data["SortingNP"] = dict( + folder_path=str(sorting_path), + keep_good_only=False, + ) + conversion_options["SortingNP"] = dict( + stub_test=stub_test, write_as="processing" + ) + def session_to_nwb( subject: str, session: str, + conversion_type: str, stub_test: bool = False, overwrite: bool = True, ): @@ -172,6 +203,8 @@ def session_to_nwb( Subject, either 'Perle' or 'Elgar'. session : string Session date in format 'YYYY-MM-DD'. + conversion_type: string + Conversion type, either 'raw' or 'processed'. stub_test : boolean Whether or not to generate a preview file by limiting data write to a few MB. @@ -183,6 +216,7 @@ def session_to_nwb( """ logging.info(f"stub_test = {stub_test}") logging.info(f"overwrite = {overwrite}") + logging.info(f"conversion_type = {conversion_type}") # Get paths session_paths = get_session_paths.get_session_paths( @@ -214,88 +248,119 @@ def session_to_nwb( processed_source_data = {} processed_conversion_options = {} - # Add SpikeGLX data - _add_spikeglx_data( - raw_source_data=raw_source_data, - raw_conversion_options=raw_conversion_options, - processed_source_data=processed_source_data, - processed_conversion_options=processed_conversion_options, - session_paths=session_paths, - stub_test=stub_test, - ) + if conversion_type == "raw": + # Add SpikeGLX data + _add_spikeglx_data( + source_data=raw_source_data, + conversion_options=raw_conversion_options, + conversion_type='raw', + session_paths=session_paths, + stub_test=stub_test, + ) + raw_converter = nwb_converter.NWBConverter( + source_data=raw_source_data, + sync_dir=str(session_paths.sync_pulses), + ) + logging.info("Running raw data conversion") + + # Get metadata + # NOTE: This might not work. Previously, metadata was from processed + metadata = raw_converter.get_metadata() + metadata = _update_metadata( + metadata=metadata, + subject=subject, + session_id=session_id, + session_paths=session_paths, + ) + metadata["NWBFile"]["identifier"] = str(uuid4()) + + # Run conversion + raw_converter.run_conversion( + metadata=metadata, + nwbfile_path=raw_nwb_path, + conversion_options=raw_conversion_options, + overwrite=overwrite, + ) - # Add behavior data - logging.info("Adding behavior data") - behavior_task_path = str(session_paths.behavior_task_data) - processed_source_data["EyePosition"] = dict(folder_path=behavior_task_path) - processed_conversion_options["EyePosition"] = dict() - processed_source_data["PupilSize"] = dict(folder_path=behavior_task_path) - processed_conversion_options["PupilSize"] = dict() - processed_source_data["RewardLine"] = dict(folder_path=behavior_task_path) - processed_conversion_options["RewardLine"] = dict() - processed_source_data["Audio"] = dict(folder_path=behavior_task_path) - processed_conversion_options["Audio"] = dict() - - # Add trials data - logging.info("Adding trials data") - processed_source_data["Trials"] = dict( - folder_path=str(session_paths.behavior_task_data) - ) - processed_conversion_options["Trials"] = dict() + elif conversion_type == "processed": + # Add behavior data + logging.info("Adding behavior data") + _add_spikeglx_data( + source_data=processed_source_data, + conversion_options=processed_conversion_options, + conversion_type='processed', + session_paths=session_paths, + stub_test=stub_test, + ) + behavior_task_path = str(session_paths.behavior_task_data) + processed_source_data["EyePosition"] = dict( + folder_path=behavior_task_path) + processed_conversion_options["EyePosition"] = dict() + processed_source_data["PupilSize"] = dict( + folder_path=behavior_task_path) + processed_conversion_options["PupilSize"] = dict() + processed_source_data["RewardLine"] = dict( + folder_path=behavior_task_path) + processed_conversion_options["RewardLine"] = dict() + processed_source_data["Audio"] = dict(folder_path=behavior_task_path) + processed_conversion_options["Audio"] = dict() + + # Add trials data + logging.info("Adding trials data") + processed_source_data["Trials"] = dict( + folder_path=str(session_paths.behavior_task_data) + ) + processed_conversion_options["Trials"] = dict() - # Add display data - logging.info("Adding display data") - processed_source_data["Display"] = dict( - folder_path=str(session_paths.behavior_task_data) - ) - processed_conversion_options["Display"] = dict() + # Add display data + logging.info("Adding display data") + processed_source_data["Display"] = dict( + folder_path=str(session_paths.behavior_task_data) + ) + processed_conversion_options["Display"] = dict() - # Create data converters - processed_converter = nwb_converter.NWBConverter( - source_data=processed_source_data, - sync_dir=session_paths.sync_pulses, - ) - raw_converter = nwb_converter.NWBConverter( - source_data=raw_source_data, - sync_dir=str(session_paths.sync_pulses), - ) + # Create data converters + processed_converter = nwb_converter.NWBConverter( + source_data=processed_source_data, + sync_dir=session_paths.sync_pulses, + ) - # Update metadata - metadata = processed_converter.get_metadata() - metadata = _update_metadata( - metadata=metadata, - subject=subject, - session_id=session_id, - session_paths=session_paths, - ) + # Get metadata + metadata = processed_converter.get_metadata() + metadata = _update_metadata( + metadata=metadata, + subject=subject, + session_id=session_id, + session_paths=session_paths, + ) - # Run conversion - logging.info("Running processed conversion") - processed_converter.run_conversion( - metadata=metadata, - nwbfile_path=processed_nwb_path, - conversion_options=processed_conversion_options, - overwrite=overwrite, - ) + # Run conversion + logging.info("Running processed conversion") + processed_converter.run_conversion( + metadata=metadata, + nwbfile_path=processed_nwb_path, + conversion_options=processed_conversion_options, + overwrite=overwrite, + ) - logging.info("Running raw data conversion") - metadata["NWBFile"]["identifier"] = str(uuid4()) - raw_converter.run_conversion( - metadata=metadata, - nwbfile_path=raw_nwb_path, - conversion_options=raw_conversion_options, - overwrite=overwrite, - ) + # logging.info("Writing curated sorting output to processed NWB") + # _add_curated_sorting_data( + # nwbfile_path=processed_nwb_path, + # session_paths=session_paths, + # ) if __name__ == "__main__": """Run session conversion.""" - subject = sys.argv[1] - session = sys.argv[2] + session = sys.argv[1] + conversion_type = sys.argv[2] + subject = session.split('/')[0] + session = session.split('/')[1] logging.info(f"\nStarting conversion for {subject}/{session}\n") session_to_nwb( subject=subject, session=session, + conversion_type=conversion_type, stub_test=_STUB_TEST, overwrite=_OVERWRITE, ) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh index 55a576d..0307d67 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.sh @@ -11,22 +11,22 @@ # Script to convert a session to NWB format. Takes in two arguments from user: # name of subject and session date. - -SUBJECT=$1 # Argument passed in by user. Should be in subject/date format -echo "SUBJECT: $SUBJECT" -if [ -z "$SUBJECT" ]; then +SESSION=$1 # Argument passed in by user. Should be in subject/date format +echo "SESSION: $SESSION" +if [ -z "$SESSION" ]; then echo "No session specified, exiting." exit fi -SESSION=$2 # Argument passed in by user. Should be in subject/date format -echo "SESSION: $SESSION" -if [ -z "$SESSION" ]; then - echo "No session specified, exiting." +CONVERSION_TYPE=$2 # Argument passed in by user. Should be either 'raw' or 'processed' +echo "CONVERSION_TYPE: $CONVERSION_TYPE" +if [ -z "$CONVERSION_TYPE" ]; then + echo "No conversion type specified, exiting." exit fi + source ~/.bashrc conda activate jazayeri_lab_to_nwb_env cd /om2/user/apiccato/jazayeri-lab-to-nwb -python src/jazayeri_lab_to_nwb/piccato/main_convert_session.py $SUBJECT $SESSION +python src/jazayeri_lab_to_nwb/piccato/main_convert_session.py $SESSION $CONVERSION_TYPE From 101abe68ce986eb914159f475a7c45015cb3b38e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 26 Feb 2024 14:44:26 +0000 Subject: [PATCH 25/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../piccato/main_convert_session.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 2996d05..a1baf4f 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -33,8 +33,8 @@ import nwb_converter from neuroconv.tools.spikeinterface import write_sorting from neuroconv.utils import dict_deep_update, load_dict_from_file -from spikeinterface.extractors import read_kilosort, read_spikeglx from spikeinterface import curation +from spikeinterface.extractors import read_kilosort, read_spikeglx # Data repository. Either 'globus' or 'openmind' _REPO = "openmind" @@ -160,7 +160,7 @@ def _add_spikeglx_data( conversion_options["RecordingNP"] = dict(stub_test=stub_test) conversion_options["LF"] = dict(stub_test=stub_test) - elif conversion_type == 'processed': + elif conversion_type == "processed": source_data["RecordingNP"] = dict(file_path=ap_file) source_data["LF"] = dict(file_path=lfp_file) @@ -253,7 +253,7 @@ def session_to_nwb( _add_spikeglx_data( source_data=raw_source_data, conversion_options=raw_conversion_options, - conversion_type='raw', + conversion_type="raw", session_paths=session_paths, stub_test=stub_test, ) @@ -288,19 +288,22 @@ def session_to_nwb( _add_spikeglx_data( source_data=processed_source_data, conversion_options=processed_conversion_options, - conversion_type='processed', + conversion_type="processed", session_paths=session_paths, stub_test=stub_test, ) behavior_task_path = str(session_paths.behavior_task_data) processed_source_data["EyePosition"] = dict( - folder_path=behavior_task_path) + folder_path=behavior_task_path + ) processed_conversion_options["EyePosition"] = dict() processed_source_data["PupilSize"] = dict( - folder_path=behavior_task_path) + folder_path=behavior_task_path + ) processed_conversion_options["PupilSize"] = dict() processed_source_data["RewardLine"] = dict( - folder_path=behavior_task_path) + folder_path=behavior_task_path + ) processed_conversion_options["RewardLine"] = dict() processed_source_data["Audio"] = dict(folder_path=behavior_task_path) processed_conversion_options["Audio"] = dict() @@ -354,8 +357,8 @@ def session_to_nwb( """Run session conversion.""" session = sys.argv[1] conversion_type = sys.argv[2] - subject = session.split('/')[0] - session = session.split('/')[1] + subject = session.split("/")[0] + session = session.split("/")[1] logging.info(f"\nStarting conversion for {subject}/{session}\n") session_to_nwb( subject=subject, From ffea6abb60a651eaa40670003f0132e65924d532 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Mon, 26 Feb 2024 15:12:51 -0500 Subject: [PATCH 26/28] added postprocessing data --- .../piccato/get_session_paths.py | 12 +- .../piccato/main_convert_session.py | 267 ++++++++++++------ 2 files changed, 191 insertions(+), 88 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py index 158a19f..efe3c80 100644 --- a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py +++ b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py @@ -13,11 +13,12 @@ "SessionPaths", [ "output", - "raw_data", + "ecephys_data", "behavior_task_data", "session_data", "sync_pulses", "spike_sorting_raw", + "postprocessed_data" ], ) @@ -32,7 +33,7 @@ def _get_session_paths_openmind(subject, session): ) # Path to the raw data. This is used for reading raw physiology data. - raw_data_path = pathlib.Path(f"{OM_PATH}/{subject}/{session}/raw_data/") + ecephys_data_path = pathlib.Path(f"{OM_PATH}/{subject}/{session}/raw_data/") # Path to task and behavior data. behavior_task_data_path = pathlib.Path( @@ -52,13 +53,18 @@ def _get_session_paths_openmind(subject, session): session_path = pathlib.Path(f"{OM_PATH}/{subject}/{session}/") + postprocessed_data_path = pathlib.Path( + f"{OM_PATH}/{subject}/{session}/kilosort2_5_0" + ) + session_paths = SessionPaths( output=output_path, - raw_data=raw_data_path, + ecephys_data=ecephys_data_path, session_data=session_path, behavior_task_data=pathlib.Path(behavior_task_data_path), sync_pulses=sync_pulses_path, spike_sorting_raw=spike_sorting_raw_path, + postprocessed_data=postprocessed_data_path ) return session_paths diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 2996d05..90609f8 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -3,7 +3,7 @@ This converts a session to NWB format and writes the nwb files to /om/user/nwatters/nwb_data_multi_prediction/{$SUBJECT}/{$SESSION} Two NWB files are created: - $SUBJECT_$SESSION_raw.nwb --- Raw physiology + $SUBJECT_$SESSION_ecephys.nwb --- Raw physiology $SUBJECT_$SESSION_processed.nwb --- Task, behavior, and sorted physiology These files can be automatically uploaded to a DANDI dataset. @@ -27,14 +27,15 @@ import sys from pathlib import Path from uuid import uuid4 - +import time import get_session_paths import numpy as np import nwb_converter -from neuroconv.tools.spikeinterface import write_sorting +from neuroconv.tools.spikeinterface import write_sorting, write_waveforms from neuroconv.utils import dict_deep_update, load_dict_from_file -from spikeinterface.extractors import read_kilosort, read_spikeglx -from spikeinterface import curation +from spikeinterface.extractors import read_kilosort +import spikeinterface.core as sc +import pynwb # Data repository. Either 'globus' or 'openmind' _REPO = "openmind" @@ -53,6 +54,10 @@ "elgar": "P10Y", # Born 5/2/2012 } +_BEHAVIOR_TASK_CONV_TYPE = 'behavior+task' +_ECEPHYS_CONV_TYPE = 'ecephys' +_SPIKES_CONV_TYPE = 'spikes' + def _get_single_file(directory, suffix=""): """Get path to a file in given directory with given suffix. @@ -114,28 +119,68 @@ def _update_metadata(metadata, subject, session_id, session_paths): return metadata -# def _add_curated_sorting_data( -# nwbfile_path: str, -# session_paths: get_session_paths.SessionPaths, -# ): -# """Adds curated sorting data to the processed NWB file.""" -# sorting = read_kilosort( -# folder_path=(session_paths.spike_sorting_raw / -# "spikeglx/kilosort2_5_0/sorter_output") -# ) -# spikeglx_dir = Path( -# _get_single_file(session_paths.raw_data / "spikeglx", suffix="imec0") -# ) -# sorting = curation.remove_excess_spikes(sorting=sorting) -# keep_unit_ids = json.load(open( -# session_paths.spike_sorting_raw / "keep_unit_ids.json")) -# write_sorting( -# nwbfile_path=nwbfile_path, -# sorting=sorting, -# units_ids=keep_unit_ids, -# overwrite=False, -# write_as='units', -# ) +def _add_curated_sorting_data( + nwbfile_path: Path, + session_paths: get_session_paths.SessionPaths): + """Add curated sorting data to spikes NWB file.""" + sorting = read_kilosort( + folder_path=( + session_paths.spike_sorting_raw / + 'spikeglx/kilosort2_5_0/sorter_output') + ) + + # Adding curated units + unit_ids = list(json.load(open( + session_paths.postprocessed_data / 'manual_curation.json', 'r') + ).keys()) + unit_ids = [int(unit_id) for unit_id in unit_ids] + print(f"unit_ids: {unit_ids}") + write_sorting( + sorting=sorting, + nwbfile_path=nwbfile_path, + unit_ids=unit_ids, + overwrite=False, + write_as='units', + ) + + # # Adding waveform template + # waveform_extractor = sc.load_waveforms( + # session_paths.postprocessed_data / 'waveforms' + # ) + # write_waveforms( + # waveform_extractor=waveform_extractor, + # nwbfile_path=nwbfile_path, + # overwrite=False, + # unit_ids=unit_ids, + # write_as='units', + # ) + + # Adding stable trials information + read_io = pynwb.NWBHDF5IO( + nwbfile_path, mode='r', load_namespaces=True, + ) + stable_trials = json.load(open( + session_paths.postprocessed_data / 'stability.json', 'r' + )) + units_stable_trials = [ + stable_trials[unit_id] for unit_id in unit_ids + ] + description = ( + "For each trial, whether this unit was stable in the recording." + ) + + nwbfile = read_io.read() + units_data = nwbfile['units'] + units_data.add_column( + name='stable_trials', + description=description, + data=units_stable_trials) + + os.remove(nwbfile_path) + with pynwb.NWBHDF5IO(nwbfile_path, mode='w') as write_io: + write_io.export( + src_io=read_io, nwbfile=nwbfile, write_args={'link_data': False}, + ) def _add_spikeglx_data( @@ -150,28 +195,28 @@ def _add_spikeglx_data( # Raw data spikeglx_dir = Path( - _get_single_file(session_paths.raw_data / "spikeglx", suffix="imec0") + _get_single_file( + session_paths.ecephys_data / "spikeglx", suffix="imec0") ) ap_file = _get_single_file(spikeglx_dir, suffix="*.ap.bin") lfp_file = _get_single_file(spikeglx_dir, suffix="*.lf.bin") - if conversion_type == "raw": + if conversion_type == _ECEPHYS_CONV_TYPE: source_data["RecordingNP"] = dict(file_path=ap_file) source_data["LF"] = dict(file_path=lfp_file) conversion_options["RecordingNP"] = dict(stub_test=stub_test) conversion_options["LF"] = dict(stub_test=stub_test) - elif conversion_type == 'processed': + elif conversion_type == _SPIKES_CONV_TYPE: source_data["RecordingNP"] = dict(file_path=ap_file) source_data["LF"] = dict(file_path=lfp_file) conversion_options["RecordingNP"] = dict( stub_test=stub_test, write_electrical_series=False ) + conversion_options["LF"] = dict( stub_test=stub_test, write_electrical_series=False ) - - # Processed data sorting_path = ( session_paths.spike_sorting_raw / "spikeglx/kilosort2_5_0/sorter_output" @@ -186,6 +231,16 @@ def _add_spikeglx_data( stub_test=stub_test, write_as="processing" ) + elif conversion_type == "behavior+task": + source_data["RecordingNP"] = dict(file_path=ap_file) + source_data["LF"] = dict(file_path=lfp_file) + conversion_options["RecordingNP"] = dict( + stub_test=stub_test, write_electrical_series=False + ) + conversion_options["LF"] = dict( + stub_test=stub_test, write_electrical_series=False + ) + def session_to_nwb( subject: str, @@ -204,7 +259,7 @@ def session_to_nwb( session : string Session date in format 'YYYY-MM-DD'. conversion_type: string - Conversion type, either 'raw' or 'processed'. + Conversion type, either 'ecephys', 'behavior+task', or 'spikes'. stub_test : boolean Whether or not to generate a preview file by limiting data write to a few MB. @@ -231,41 +286,47 @@ def session_to_nwb( session_id = f"{session_id}-stub" else: session_id = f"{session}-full" - raw_nwb_path = ( + ecephys_nwb_path = ( session_paths.output / f"sub-{subject}_ses-{session_id}_ecephys.nwb" ) - processed_nwb_path = ( + behavior_task_nwb_path = ( session_paths.output - / f"sub-{subject}_ses-{session_id}_behavior+ecephys.nwb" + / f"sub-{subject}_ses-{session_id}_behavior+task.nwb" ) - logging.info(f"raw_nwb_path = {raw_nwb_path}") - logging.info(f"processed_nwb_path = {processed_nwb_path}") + spikes_nwb_path = ( + session_paths.output + / f"sub-{subject}_ses-{session_id}_spikes.nwb" + ) + logging.info(f"ecephys_nwb_path = {ecephys_nwb_path}") + logging.info(f"behavior_task_nwb_path = {behavior_task_nwb_path}") + logging.info(f"spikes_nwb_path = {spikes_nwb_path}") logging.info("") # Initialize empty data dictionaries - raw_source_data = {} - raw_conversion_options = {} - processed_source_data = {} - processed_conversion_options = {} - - if conversion_type == "raw": + ecephys_source_data = {} + ecephys_conversion_options = {} + behavior_task_source_data = {} + behavior_task_conversion_options = {} + spikes_source_data = {} + spikes_conversion_options = {} + + if conversion_type == _ECEPHYS_CONV_TYPE: # Add SpikeGLX data _add_spikeglx_data( - source_data=raw_source_data, - conversion_options=raw_conversion_options, - conversion_type='raw', + source_data=ecephys_source_data, + conversion_options=ecephys_conversion_options, + conversion_type=_ECEPHYS_CONV_TYPE, session_paths=session_paths, stub_test=stub_test, ) - raw_converter = nwb_converter.NWBConverter( - source_data=raw_source_data, + ecephys_converter = nwb_converter.NWBConverter( + source_data=ecephys_source_data, sync_dir=str(session_paths.sync_pulses), ) - logging.info("Running raw data conversion") + logging.info("Running ecephys data conversion") # Get metadata - # NOTE: This might not work. Previously, metadata was from processed - metadata = raw_converter.get_metadata() + metadata = ecephys_converter.get_metadata() metadata = _update_metadata( metadata=metadata, subject=subject, @@ -275,58 +336,61 @@ def session_to_nwb( metadata["NWBFile"]["identifier"] = str(uuid4()) # Run conversion - raw_converter.run_conversion( + ecephys_converter.run_conversion( metadata=metadata, - nwbfile_path=raw_nwb_path, - conversion_options=raw_conversion_options, + nwbfile_path=ecephys_nwb_path, + conversion_options=ecephys_conversion_options, overwrite=overwrite, ) - elif conversion_type == "processed": - # Add behavior data - logging.info("Adding behavior data") + elif conversion_type == "behavior+task": + # Add SpikeGLX data _add_spikeglx_data( - source_data=processed_source_data, - conversion_options=processed_conversion_options, - conversion_type='processed', + source_data=behavior_task_source_data, + conversion_options=behavior_task_conversion_options, + conversion_type=_BEHAVIOR_TASK_CONV_TYPE, session_paths=session_paths, stub_test=stub_test, ) + + # Add behavior data + logging.info("Adding behavior data") behavior_task_path = str(session_paths.behavior_task_data) - processed_source_data["EyePosition"] = dict( + behavior_task_source_data["EyePosition"] = dict( + folder_path=behavior_task_path) + behavior_task_conversion_options["EyePosition"] = dict() + behavior_task_source_data["PupilSize"] = dict( folder_path=behavior_task_path) - processed_conversion_options["EyePosition"] = dict() - processed_source_data["PupilSize"] = dict( + behavior_task_conversion_options["PupilSize"] = dict() + behavior_task_source_data["RewardLine"] = dict( folder_path=behavior_task_path) - processed_conversion_options["PupilSize"] = dict() - processed_source_data["RewardLine"] = dict( + behavior_task_conversion_options["RewardLine"] = dict() + behavior_task_source_data["Audio"] = dict( folder_path=behavior_task_path) - processed_conversion_options["RewardLine"] = dict() - processed_source_data["Audio"] = dict(folder_path=behavior_task_path) - processed_conversion_options["Audio"] = dict() + behavior_task_conversion_options["Audio"] = dict() # Add trials data logging.info("Adding trials data") - processed_source_data["Trials"] = dict( + behavior_task_source_data["Trials"] = dict( folder_path=str(session_paths.behavior_task_data) ) - processed_conversion_options["Trials"] = dict() + behavior_task_conversion_options["Trials"] = dict() # Add display data logging.info("Adding display data") - processed_source_data["Display"] = dict( + behavior_task_source_data["Display"] = dict( folder_path=str(session_paths.behavior_task_data) ) - processed_conversion_options["Display"] = dict() + behavior_task_conversion_options["Display"] = dict() # Create data converters - processed_converter = nwb_converter.NWBConverter( - source_data=processed_source_data, + behavior_task_converter = nwb_converter.NWBConverter( + source_data=behavior_task_source_data, sync_dir=session_paths.sync_pulses, ) # Get metadata - metadata = processed_converter.get_metadata() + metadata = behavior_task_converter.get_metadata() metadata = _update_metadata( metadata=metadata, subject=subject, @@ -335,19 +399,52 @@ def session_to_nwb( ) # Run conversion - logging.info("Running processed conversion") - processed_converter.run_conversion( + logging.info("Running behavior+task conversion") + behavior_task_converter.run_conversion( metadata=metadata, - nwbfile_path=processed_nwb_path, - conversion_options=processed_conversion_options, + nwbfile_path=behavior_task_nwb_path, + conversion_options=behavior_task_conversion_options, overwrite=overwrite, ) + elif conversion_type == 'spikes': + _add_spikeglx_data( + source_data=spikes_source_data, + conversion_options=spikes_conversion_options, + conversion_type='spikes', + session_paths=session_paths, + stub_test=stub_test, + ) - # logging.info("Writing curated sorting output to processed NWB") - # _add_curated_sorting_data( - # nwbfile_path=processed_nwb_path, - # session_paths=session_paths, - # ) + # Create data converter + spikes_converter = nwb_converter.NWBConverter( + source_data=spikes_source_data, + sync_dir=session_paths.sync_pulses, + ) + + # Get metadata + metadata = spikes_converter.get_metadata() + metadata = _update_metadata( + metadata=metadata, + subject=subject, + session_id=session_id, + session_paths=session_paths, + ) + + # Run conversion + logging.info('Running spikes conversion') + spikes_converter.run_conversion( + metadata=metadata, + nwbfile_path=spikes_nwb_path, + conversion_options=spikes_conversion_options, + overwrite=overwrite, + ) + time.sleep(10) + # Adding curated spike sorting and waveform data + logging.info("Writing curated sorting output to processed NWB") + _add_curated_sorting_data( + nwbfile_path=spikes_nwb_path, + session_paths=session_paths, + ) if __name__ == "__main__": From 0c4ebbf8c5f919b1deb0303658262db0886ff2e8 Mon Sep 17 00:00:00 2001 From: Aida Piccato Date: Fri, 15 Mar 2024 15:35:30 -0400 Subject: [PATCH 27/28] editing timestamp --- .../piccato/main_convert_session.py | 166 ++++++++---------- .../piccato/nwb_converter.py | 4 +- .../piccato/recording_interface.py | 1 - 3 files changed, 80 insertions(+), 91 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 90609f8..091993e 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -40,7 +40,7 @@ # Data repository. Either 'globus' or 'openmind' _REPO = "openmind" # Whether to run all the physiology data or only a stub -_STUB_TEST = False +_STUB_TEST = True # Whether to overwrite output nwb files _OVERWRITE = True @@ -130,20 +130,22 @@ def _add_curated_sorting_data( ) # Adding curated units - unit_ids = list(json.load(open( + curated_unit_idxs = list(json.load(open( session_paths.postprocessed_data / 'manual_curation.json', 'r') ).keys()) - unit_ids = [int(unit_id) for unit_id in unit_ids] - print(f"unit_ids: {unit_ids}") + curated_unit_idxs = [int(unit_id) for unit_id in curated_unit_idxs] + unit_ids = sorting.get_unit_ids() + curated_unit_ids = [unit_ids[idx] for idx in curated_unit_idxs] + write_sorting( sorting=sorting, nwbfile_path=nwbfile_path, - unit_ids=unit_ids, + unit_ids=curated_unit_ids, overwrite=False, write_as='units', ) - # # Adding waveform template + # Adding waveform template # waveform_extractor = sc.load_waveforms( # session_paths.postprocessed_data / 'waveforms' # ) @@ -159,23 +161,21 @@ def _add_curated_sorting_data( read_io = pynwb.NWBHDF5IO( nwbfile_path, mode='r', load_namespaces=True, ) + nwbfile = read_io.read() stable_trials = json.load(open( session_paths.postprocessed_data / 'stability.json', 'r' )) units_stable_trials = [ - stable_trials[unit_id] for unit_id in unit_ids + stable_trials[unit_idx] for unit_idx in curated_unit_idxs ] description = ( "For each trial, whether this unit was stable in the recording." ) - - nwbfile = read_io.read() - units_data = nwbfile['units'] + units_data = nwbfile.units units_data.add_column( name='stable_trials', description=description, data=units_stable_trials) - os.remove(nwbfile_path) with pynwb.NWBHDF5IO(nwbfile_path, mode='w') as write_io: write_io.export( @@ -200,13 +200,16 @@ def _add_spikeglx_data( ) ap_file = _get_single_file(spikeglx_dir, suffix="*.ap.bin") lfp_file = _get_single_file(spikeglx_dir, suffix="*.lf.bin") + if conversion_type == _ECEPHYS_CONV_TYPE: - source_data["RecordingNP"] = dict(file_path=ap_file) + source_data["RecordingNP"] = dict( + file_path=ap_file, + ) source_data["LF"] = dict(file_path=lfp_file) conversion_options["RecordingNP"] = dict(stub_test=stub_test) conversion_options["LF"] = dict(stub_test=stub_test) - - elif conversion_type == _SPIKES_CONV_TYPE: + return + if conversion_type == _SPIKES_CONV_TYPE: source_data["RecordingNP"] = dict(file_path=ap_file) source_data["LF"] = dict(file_path=lfp_file) @@ -230,8 +233,8 @@ def _add_spikeglx_data( conversion_options["SortingNP"] = dict( stub_test=stub_test, write_as="processing" ) - - elif conversion_type == "behavior+task": + return + if conversion_type == "behavior+task": source_data["RecordingNP"] = dict(file_path=ap_file) source_data["LF"] = dict(file_path=lfp_file) conversion_options["RecordingNP"] = dict( @@ -240,6 +243,7 @@ def _add_spikeglx_data( conversion_options["LF"] = dict( stub_test=stub_test, write_electrical_series=False ) + return def session_to_nwb( @@ -286,47 +290,32 @@ def session_to_nwb( session_id = f"{session_id}-stub" else: session_id = f"{session}-full" - ecephys_nwb_path = ( - session_paths.output / f"sub-{subject}_ses-{session_id}_ecephys.nwb" - ) - behavior_task_nwb_path = ( - session_paths.output - / f"sub-{subject}_ses-{session_id}_behavior+task.nwb" - ) - spikes_nwb_path = ( - session_paths.output - / f"sub-{subject}_ses-{session_id}_spikes.nwb" - ) - logging.info(f"ecephys_nwb_path = {ecephys_nwb_path}") - logging.info(f"behavior_task_nwb_path = {behavior_task_nwb_path}") - logging.info(f"spikes_nwb_path = {spikes_nwb_path}") - logging.info("") - # Initialize empty data dictionaries - ecephys_source_data = {} - ecephys_conversion_options = {} - behavior_task_source_data = {} - behavior_task_conversion_options = {} - spikes_source_data = {} - spikes_conversion_options = {} + source_data = {} + conversion_options = {} + _add_spikeglx_data( + source_data=source_data, + conversion_options=conversion_options, + conversion_type=conversion_type, + session_paths=session_paths, + stub_test=stub_test, + ) if conversion_type == _ECEPHYS_CONV_TYPE: # Add SpikeGLX data - _add_spikeglx_data( - source_data=ecephys_source_data, - conversion_options=ecephys_conversion_options, - conversion_type=_ECEPHYS_CONV_TYPE, - session_paths=session_paths, - stub_test=stub_test, + nwb_path = ( + session_paths.output / + f"sub-{subject}_ses-{session_id}_ecephys.nwb" ) - ecephys_converter = nwb_converter.NWBConverter( - source_data=ecephys_source_data, + + converter = nwb_converter.NWBConverter( + source_data=source_data, sync_dir=str(session_paths.sync_pulses), ) logging.info("Running ecephys data conversion") # Get metadata - metadata = ecephys_converter.get_metadata() + metadata = converter.get_metadata() metadata = _update_metadata( metadata=metadata, subject=subject, @@ -334,63 +323,64 @@ def session_to_nwb( session_paths=session_paths, ) metadata["NWBFile"]["identifier"] = str(uuid4()) - + + for interface_name, data_interface in converter.data_interface_objects.items(): + if 'Recording' in interface_name: + print(data_interface.sampling_frequency) # Run conversion - ecephys_converter.run_conversion( + converter.run_conversion( metadata=metadata, - nwbfile_path=ecephys_nwb_path, - conversion_options=ecephys_conversion_options, + nwbfile_path=nwb_path, + conversion_options=conversion_options, overwrite=overwrite, ) - elif conversion_type == "behavior+task": + return + if conversion_type == _BEHAVIOR_TASK_CONV_TYPE: # Add SpikeGLX data - _add_spikeglx_data( - source_data=behavior_task_source_data, - conversion_options=behavior_task_conversion_options, - conversion_type=_BEHAVIOR_TASK_CONV_TYPE, - session_paths=session_paths, - stub_test=stub_test, + nwb_path = ( + session_paths.output + / f"sub-{subject}_ses-{session_id}_behavior+task.nwb" ) # Add behavior data logging.info("Adding behavior data") behavior_task_path = str(session_paths.behavior_task_data) - behavior_task_source_data["EyePosition"] = dict( + source_data["EyePosition"] = dict( folder_path=behavior_task_path) - behavior_task_conversion_options["EyePosition"] = dict() - behavior_task_source_data["PupilSize"] = dict( + conversion_options["EyePosition"] = dict() + source_data["PupilSize"] = dict( folder_path=behavior_task_path) - behavior_task_conversion_options["PupilSize"] = dict() - behavior_task_source_data["RewardLine"] = dict( + conversion_options["PupilSize"] = dict() + source_data["RewardLine"] = dict( folder_path=behavior_task_path) - behavior_task_conversion_options["RewardLine"] = dict() - behavior_task_source_data["Audio"] = dict( + conversion_options["RewardLine"] = dict() + source_data["Audio"] = dict( folder_path=behavior_task_path) - behavior_task_conversion_options["Audio"] = dict() + conversion_options["Audio"] = dict() # Add trials data logging.info("Adding trials data") - behavior_task_source_data["Trials"] = dict( + source_data["Trials"] = dict( folder_path=str(session_paths.behavior_task_data) ) - behavior_task_conversion_options["Trials"] = dict() + conversion_options["Trials"] = dict() # Add display data logging.info("Adding display data") - behavior_task_source_data["Display"] = dict( + source_data["Display"] = dict( folder_path=str(session_paths.behavior_task_data) ) - behavior_task_conversion_options["Display"] = dict() + conversion_options["Display"] = dict() # Create data converters - behavior_task_converter = nwb_converter.NWBConverter( - source_data=behavior_task_source_data, + converter = nwb_converter.NWBConverter( + source_data=source_data, sync_dir=session_paths.sync_pulses, ) # Get metadata - metadata = behavior_task_converter.get_metadata() + metadata = converter.get_metadata() metadata = _update_metadata( metadata=metadata, subject=subject, @@ -400,24 +390,21 @@ def session_to_nwb( # Run conversion logging.info("Running behavior+task conversion") - behavior_task_converter.run_conversion( + converter.run_conversion( metadata=metadata, - nwbfile_path=behavior_task_nwb_path, - conversion_options=behavior_task_conversion_options, + nwbfile_path=nwb_path, + conversion_options=conversion_options, overwrite=overwrite, ) - elif conversion_type == 'spikes': - _add_spikeglx_data( - source_data=spikes_source_data, - conversion_options=spikes_conversion_options, - conversion_type='spikes', - session_paths=session_paths, - stub_test=stub_test, + return + if conversion_type == _SPIKES_CONV_TYPE: + nwb_path = ( + session_paths.output + / f"sub-{subject}_ses-{session_id}_spikes.nwb" ) - # Create data converter spikes_converter = nwb_converter.NWBConverter( - source_data=spikes_source_data, + source_data=source_data, sync_dir=session_paths.sync_pulses, ) @@ -434,15 +421,16 @@ def session_to_nwb( logging.info('Running spikes conversion') spikes_converter.run_conversion( metadata=metadata, - nwbfile_path=spikes_nwb_path, - conversion_options=spikes_conversion_options, + nwbfile_path=nwb_path, + conversion_options=conversion_options, overwrite=overwrite, ) - time.sleep(10) + # Adding curated spike sorting and waveform data + time.sleep(10) logging.info("Writing curated sorting output to processed NWB") _add_curated_sorting_data( - nwbfile_path=spikes_nwb_path, + nwbfile_path=nwb_path, session_paths=session_paths, ) diff --git a/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py b/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py index 56b61ea..253bcfa 100644 --- a/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py +++ b/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py @@ -133,6 +133,7 @@ def temporally_align_data_interfaces(self): lf_interface = self.data_interface_objects["LF"] else: raise ValueError("Invalid probe_name {probe_name}") + intercept = transform["intercept"] coef = transform["coef"] @@ -140,7 +141,6 @@ def temporally_align_data_interfaces(self): orig_timestamps = recording_interface.get_original_timestamps() aligned_timestamps = intercept + coef * (start + orig_timestamps) recording_interface.set_aligned_timestamps(aligned_timestamps) - # Align LFP timestamps if lf_interface is not None: orig_timestamps = lf_interface.get_original_timestamps() @@ -174,7 +174,9 @@ def temporally_align_data_interfaces(self): for data_interface in self.data_interface_objects.values(): if isinstance(data_interface, BaseSortingExtractorInterface): # Do not need to align because recording will be aligned + # before spike sorting continue start_time = data_interface.set_aligned_starting_time( aligned_starting_time=zero_time ) + diff --git a/src/jazayeri_lab_to_nwb/piccato/recording_interface.py b/src/jazayeri_lab_to_nwb/piccato/recording_interface.py index e31b3f9..c469fc9 100644 --- a/src/jazayeri_lab_to_nwb/piccato/recording_interface.py +++ b/src/jazayeri_lab_to_nwb/piccato/recording_interface.py @@ -9,7 +9,6 @@ BaseRecordingExtractorInterface, ) from neuroconv.utils import FilePathType -from spikeinterface import BaseRecording class DatRecordingInterface(BaseRecordingExtractorInterface): From b23703b7ddd6523a899b12cc109d83088ff80a3b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 15 Mar 2024 20:30:01 +0000 Subject: [PATCH 28/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../piccato/get_session_paths.py | 8 +- .../piccato/main_convert_session.py | 93 ++++++++++--------- .../piccato/nwb_converter.py | 1 - 3 files changed, 56 insertions(+), 46 deletions(-) diff --git a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py index efe3c80..b8be150 100644 --- a/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py +++ b/src/jazayeri_lab_to_nwb/piccato/get_session_paths.py @@ -18,7 +18,7 @@ "session_data", "sync_pulses", "spike_sorting_raw", - "postprocessed_data" + "postprocessed_data", ], ) @@ -33,7 +33,9 @@ def _get_session_paths_openmind(subject, session): ) # Path to the raw data. This is used for reading raw physiology data. - ecephys_data_path = pathlib.Path(f"{OM_PATH}/{subject}/{session}/raw_data/") + ecephys_data_path = pathlib.Path( + f"{OM_PATH}/{subject}/{session}/raw_data/" + ) # Path to task and behavior data. behavior_task_data_path = pathlib.Path( @@ -64,7 +66,7 @@ def _get_session_paths_openmind(subject, session): behavior_task_data=pathlib.Path(behavior_task_data_path), sync_pulses=sync_pulses_path, spike_sorting_raw=spike_sorting_raw_path, - postprocessed_data=postprocessed_data_path + postprocessed_data=postprocessed_data_path, ) return session_paths diff --git a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py index 62ae532..7873ca6 100644 --- a/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py +++ b/src/jazayeri_lab_to_nwb/piccato/main_convert_session.py @@ -25,17 +25,18 @@ import logging import os import sys +import time from pathlib import Path from uuid import uuid4 -import time + import get_session_paths import numpy as np import nwb_converter +import pynwb +import spikeinterface.core as sc from neuroconv.tools.spikeinterface import write_sorting, write_waveforms from neuroconv.utils import dict_deep_update, load_dict_from_file from spikeinterface.extractors import read_kilosort -import spikeinterface.core as sc -import pynwb # Data repository. Either 'globus' or 'openmind' _REPO = "openmind" @@ -54,9 +55,9 @@ "elgar": "P10Y", # Born 5/2/2012 } -_BEHAVIOR_TASK_CONV_TYPE = 'behavior+task' -_ECEPHYS_CONV_TYPE = 'ecephys' -_SPIKES_CONV_TYPE = 'spikes' +_BEHAVIOR_TASK_CONV_TYPE = "behavior+task" +_ECEPHYS_CONV_TYPE = "ecephys" +_SPIKES_CONV_TYPE = "spikes" def _get_single_file(directory, suffix=""): @@ -120,19 +121,24 @@ def _update_metadata(metadata, subject, session_id, session_paths): def _add_curated_sorting_data( - nwbfile_path: Path, - session_paths: get_session_paths.SessionPaths): + nwbfile_path: Path, session_paths: get_session_paths.SessionPaths +): """Add curated sorting data to spikes NWB file.""" sorting = read_kilosort( folder_path=( - session_paths.spike_sorting_raw / - 'spikeglx/kilosort2_5_0/sorter_output') + session_paths.spike_sorting_raw + / "spikeglx/kilosort2_5_0/sorter_output" + ) ) # Adding curated units - curated_unit_idxs = list(json.load(open( - session_paths.postprocessed_data / 'manual_curation.json', 'r') - ).keys()) + curated_unit_idxs = list( + json.load( + open( + session_paths.postprocessed_data / "manual_curation.json", "r" + ) + ).keys() + ) curated_unit_idxs = [int(unit_id) for unit_id in curated_unit_idxs] unit_ids = sorting.get_unit_ids() curated_unit_ids = [unit_ids[idx] for idx in curated_unit_idxs] @@ -142,29 +148,31 @@ def _add_curated_sorting_data( nwbfile_path=nwbfile_path, unit_ids=curated_unit_ids, overwrite=False, - write_as='units', + write_as="units", ) # Adding waveform template waveform_extractor = sc.load_waveforms( - session_paths.postprocessed_data / 'waveforms' + session_paths.postprocessed_data / "waveforms" ) write_waveforms( waveform_extractor=waveform_extractor, nwbfile_path=nwbfile_path, overwrite=False, unit_ids=unit_ids, - write_as='units', + write_as="units", ) # Adding stable trials information read_io = pynwb.NWBHDF5IO( - nwbfile_path, mode='r', load_namespaces=True, + nwbfile_path, + mode="r", + load_namespaces=True, ) nwbfile = read_io.read() - stable_trials = json.load(open( - session_paths.postprocessed_data / 'stability.json', 'r' - )) + stable_trials = json.load( + open(session_paths.postprocessed_data / "stability.json", "r") + ) units_stable_trials = [ stable_trials[unit_idx] for unit_idx in curated_unit_idxs ] @@ -173,13 +181,14 @@ def _add_curated_sorting_data( ) units_data = nwbfile.units units_data.add_column( - name='stable_trials', - description=description, - data=units_stable_trials) + name="stable_trials", description=description, data=units_stable_trials + ) os.remove(nwbfile_path) - with pynwb.NWBHDF5IO(nwbfile_path, mode='w') as write_io: + with pynwb.NWBHDF5IO(nwbfile_path, mode="w") as write_io: write_io.export( - src_io=read_io, nwbfile=nwbfile, write_args={'link_data': False}, + src_io=read_io, + nwbfile=nwbfile, + write_args={"link_data": False}, ) @@ -196,7 +205,8 @@ def _add_spikeglx_data( # Raw data spikeglx_dir = Path( _get_single_file( - session_paths.ecephys_data / "spikeglx", suffix="imec0") + session_paths.ecephys_data / "spikeglx", suffix="imec0" + ) ) ap_file = _get_single_file(spikeglx_dir, suffix="*.ap.bin") lfp_file = _get_single_file(spikeglx_dir, suffix="*.lf.bin") @@ -304,8 +314,9 @@ def session_to_nwb( if conversion_type == _ECEPHYS_CONV_TYPE: # Add SpikeGLX data nwb_path = ( - session_paths.output / - f"sub-{subject}_ses-{session_id}_ecephys.nwb") + session_paths.output + / f"sub-{subject}_ses-{session_id}_ecephys.nwb" + ) _add_spikeglx_data( source_data=raw_source_data, conversion_options=raw_conversion_options, @@ -329,9 +340,12 @@ def session_to_nwb( session_paths=session_paths, ) metadata["NWBFile"]["identifier"] = str(uuid4()) - - for interface_name, data_interface in converter.data_interface_objects.items(): - if 'Recording' in interface_name: + + for ( + interface_name, + data_interface, + ) in converter.data_interface_objects.items(): + if "Recording" in interface_name: print(data_interface.sampling_frequency) # Run conversion converter.run_conversion( @@ -352,17 +366,13 @@ def session_to_nwb( # Add behavior data logging.info("Adding behavior data") behavior_task_path = str(session_paths.behavior_task_data) - source_data["EyePosition"] = dict( - folder_path=behavior_task_path) + source_data["EyePosition"] = dict(folder_path=behavior_task_path) conversion_options["EyePosition"] = dict() - source_data["PupilSize"] = dict( - folder_path=behavior_task_path) + source_data["PupilSize"] = dict(folder_path=behavior_task_path) conversion_options["PupilSize"] = dict() - source_data["RewardLine"] = dict( - folder_path=behavior_task_path) + source_data["RewardLine"] = dict(folder_path=behavior_task_path) conversion_options["RewardLine"] = dict() - source_data["Audio"] = dict( - folder_path=behavior_task_path) + source_data["Audio"] = dict(folder_path=behavior_task_path) conversion_options["Audio"] = dict() _add_spikeglx_data( source_data=source_data, @@ -412,8 +422,7 @@ def session_to_nwb( return if conversion_type == _SPIKES_CONV_TYPE: nwb_path = ( - session_paths.output - / f"sub-{subject}_ses-{session_id}_spikes.nwb" + session_paths.output / f"sub-{subject}_ses-{session_id}_spikes.nwb" ) # Create data converter spikes_converter = nwb_converter.NWBConverter( @@ -431,7 +440,7 @@ def session_to_nwb( ) # Run conversion - logging.info('Running spikes conversion') + logging.info("Running spikes conversion") spikes_converter.run_conversion( metadata=metadata, nwbfile_path=nwb_path, diff --git a/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py b/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py index 253bcfa..7b748af 100644 --- a/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py +++ b/src/jazayeri_lab_to_nwb/piccato/nwb_converter.py @@ -179,4 +179,3 @@ def temporally_align_data_interfaces(self): start_time = data_interface.set_aligned_starting_time( aligned_starting_time=zero_time ) -