Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Neuropixels] Extend metadata #24

Merged
merged 7 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,75 +1,13 @@
from pathlib import Path
from typing import Union, Optional
from warnings import warn

import pandas as pd
from dateutil import tz
from neuroconv.utils import load_dict_from_file, dict_deep_update
from pymatreader import read_mat

from constantinople_lab_to_nwb.utils import get_subject_metadata_from_rat_info_folder
from constantinople_lab_to_nwb.mah_2024 import Mah2024NWBConverter


def get_subject_metadata_from_rat_info_folder(
folder_path: Union[str, Path],
subject_id: str,
date: str,
) -> dict:
"""
Load subject metadata from the rat info files.
The "registry.mat" file contains information about the subject such as date of birth, sex, and vendor.
The "Mass_registry.mat" file contains information about the weight of the subject.

Parameters
----------
folder_path: Union[str, Path]
The folder path containing the rat info files.
subject_id: str
The subject ID.
date: str
The date of the session in the format "yyyy-mm-dd".
"""

folder_path = Path(folder_path)
rat_registry_file_path = folder_path / "registry.mat"

subject_metadata = dict()
if rat_registry_file_path.exists():
rat_registry = read_mat(str(rat_registry_file_path))
rat_registry = pd.DataFrame(rat_registry["Registry"])

filtered_rat_registry = rat_registry[rat_registry["RatName"] == subject_id]
if not filtered_rat_registry.empty:
date_of_birth = filtered_rat_registry["DOB"].values[0]
if date_of_birth:
# convert date of birth to datetime with format "yyyy-mm-dd"
date_of_birth = pd.to_datetime(date_of_birth, format="%Y-%m-%d")
subject_metadata.update(date_of_birth=date_of_birth)
else:
# TODO: what to do if date of birth is missing?
warn("Date of birth is missing. We recommend adding this information to the rat info files.")
# Using age range specified in the manuscript
subject_metadata.update(age="P6M/P24M")
subject_metadata.update(sex=filtered_rat_registry["sex"].values[0])
vendor = filtered_rat_registry["vendor"].values[0]
if vendor:
subject_metadata.update(description=f"Vendor: {vendor}")

mass_registry_file_path = folder_path / "Mass_registry.mat"
if mass_registry_file_path.exists():
mass_registry = read_mat(str(mass_registry_file_path))
mass_registry = pd.DataFrame(mass_registry["Mass_registry"])

filtered_mass_registry = mass_registry[(mass_registry["rat"] == subject_id) & (mass_registry["date"] == date)]
if not filtered_mass_registry.empty:
weight_g = filtered_mass_registry["mass"].astype(int).values[0] # in grams
# convert mass to kg
weight_kg = weight_g / 1000
subject_metadata.update(weight=str(weight_kg))

return subject_metadata


def session_to_nwb(
raw_behavior_file_path: Union[str, Path],
processed_behavior_file_path: Union[str, Path],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Ecephys:
Device:
- name: DeviceEcephys
description: OpenEphys recording
ElectrodeGroup:
- name: ElectrodeGroup
description: The electrode group on the Neuropixels probe.
location: LO
device: DeviceEcephys
electrical_series:
name: electrical_series
description: The raw acquisition traces from Neuropixels probe (384 channels, 30 kHz sampling rate) using Neuropix-PXI hardware and OpenEphys.
lfp_electrical_series:
name: lfp_electrical_series
description: The processed traces from the Neuropixels probe (384 channels, 1 kHz sampling rate) using Neuropix-PXI hardware and OpenEphys.
UnitProperties:
- name: channel_depth_um
description: The distance of the channel from the tip of the neuropixels probe in micrometers.
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
NWBFile:
# related_publications:
# https://doi.org/### or link to APA or MLA citation of the publication
experiment_description: |
This dataset contains in vivo extracellular electrophysiology recordings from rats performing a value-based
decision-making task. Neural data were acquired using Neuropixels probes (384 channels, 30 kHz sampling rate) with
Neuropix-PXI hardware and OpenEphys, and preprocessed using Kilosort 2.5 with manual curation in Phy.
Trials were initiated by a nose-poke in a lit center port and required maintaining a center fixation for 0.8 to 1.2
seconds, during which a tone indicated the possible reward size. A subsequent side LED indicated the potential
reward location, followed by a delay period drawn from an exponential distribution (mean = 2.5 s). Rats could opt
out at any time by poking the unlit port, restarting the trial. Catch trials, where the delay period only ended if
the rat opted out, constituted 15-25% of the trials. Rats received penalties for premature fixation breaks.
Additionally, the tasks introduced semi-observable hidden states by varying reward statistics across uncued blocks
(high, low, and mixed), structured hierarchically, with blocks transitioning after 40 successfully completed trials.
session_description: |
This session contains extracellular electrophysiology acquired from 384 channels at 30 kHz using Neuropix-PXI hardware and OpenEphys.
The neural data were preprocessed using Kilosort 2.5. After preprocessing, clusters that were identified from Kilosort as single-units
were manually inspected using Phy. The behavioral tasks were conducted in a high-throughput facility where rats
were trained in increasingly complex protocols. Trials were initiated by a nose-poke in a lit center port and
required maintaining a center fixation for 0.8 to 1.2 seconds, during which a tone indicated the possible reward
size. A subsequent side LED indicated the potential reward location, followed by a delay period drawn from an
exponential distribution (mean = 2.5 s). Rats could opt out at any time by poking the unlit port, restarting the
trial. Catch trials, where the delay period only ended if the rat opted out, constituted 15-25% of the trials.
Rats received penalties for premature fixation breaks. Additionally, the tasks introduced semi-observable hidden
states by varying reward statistics across uncued blocks (high, low, and mixed), structured hierarchically, with
blocks transitioning after 40 successfully completed trials.
institution: NYU Center for Neural Science
lab: Constantinople
keywords:
- decision making
- reinforcement learning
- hidden state inference
- extracellular electrophysiology
- single-unit activity
experimenter:
- Schiereck, Shannon
Subject:
species: Rattus norvegicus
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,62 @@
from pathlib import Path
from typing import Union, Optional

import numpy as np
import pandas as pd
from dateutil import tz
from neuroconv.datainterfaces import OpenEphysRecordingInterface
from neuroconv.utils import load_dict_from_file, dict_deep_update
from nwbinspector import inspect_nwbfile, save_report, format_messages
from pymatreader import read_mat
from spikeinterface.extractors import OpenEphysBinaryRecordingExtractor

from constantinople_lab_to_nwb.utils import get_subject_metadata_from_rat_info_folder
from constantinople_lab_to_nwb.schierek_embargo_2024 import SchierekEmbargo2024NWBConverter


def update_ephys_device_metadata_for_subject(
epys_registry_file_path: Union[str, Path],
subject_id: str,
metadata: dict,
):
if not os.path.exists(epys_registry_file_path):
raise FileNotFoundError(f"File not found: {epys_registry_file_path}")

ephys_registry = read_mat(epys_registry_file_path)
if "Registry" not in ephys_registry:
raise ValueError(f"'Registry' key not found in {epys_registry_file_path}.")
ephys_registry = pd.DataFrame(ephys_registry["Registry"])
if "ratname" not in ephys_registry.columns:
raise ValueError(f"'ratname' column not found in {epys_registry_file_path}.")
filtered_ephys_registry = ephys_registry[ephys_registry["ratname"] == subject_id]

if not filtered_ephys_registry.empty:
ap_value = filtered_ephys_registry["AP"].values[0]
ml_value = filtered_ephys_registry["ML"].values[0]
dv_value = filtered_ephys_registry["DV"].values[0]

coordinates_in_mm = f"AP: {ap_value} mm, ML: {ml_value} mm"
if not np.isnan(dv_value):
coordinates_in_mm += f", DV: {dv_value}."

recording_hemisphere = filtered_ephys_registry["recordinghemisphere"].values[0]
recording_hemisphere = dict(L="left", R="right").get(recording_hemisphere, recording_hemisphere)
probe_type = filtered_ephys_registry["probetype"].values[0]

brain_region = filtered_ephys_registry["recordingsite"].values[0]
description = f"The {probe_type} probe implanted in {brain_region} brain region, at {coordinates_in_mm}, {recording_hemisphere} hemisphere."
if "distance2LO" in filtered_ephys_registry.columns:
distance_to_LO_um = filtered_ephys_registry["distance2LO"].values[0]
# TODO: confirm unit
description += f" Distance to LO: {distance_to_LO_um} μm."

metadata["Ecephys"]["Device"][0].update(
description=description,
)

return metadata


def session_to_nwb(
openephys_recording_folder_path: Union[str, Path],
spike_sorting_folder_path: Union[str, Path],
Expand All @@ -20,6 +68,8 @@ def session_to_nwb(
nwbfile_path: Union[str, Path],
column_name_mapping: Optional[dict] = None,
column_descriptions: Optional[dict] = None,
ephys_registry_file_path: Optional[Union[str, Path]] = None,
subject_metadata: Optional[dict] = None,
stub_test: bool = False,
overwrite: bool = False,
):
Expand All @@ -36,6 +86,10 @@ def session_to_nwb(
The path to the processed spike sorting file (.mat).
nwbfile_path : str or Path
The path to the NWB file to write.
ephys_registry_file_path: str or Path
The path to the ephys registry (.mat) file.
subject_metadata: dict, optional
Additional subject metadata. e.g. dict(
stub_test : bool, default: False
Whether to run a stub test conversion.
overwrite : bool, default: False
Expand Down Expand Up @@ -66,7 +120,14 @@ def session_to_nwb(

# Add Sorting
source_data.update(dict(PhySorting=dict(folder_path=spike_sorting_folder_path)))
conversion_options.update(dict(PhySorting=dict(stub_test=False)))
conversion_options.update(
dict(
PhySorting=dict(
stub_test=False,
units_description="Units table with spike times from Kilosort 2.5 and manually curated using Phy.",
)
)
)

# Add processed sorting output
if processed_spike_sorting_file_path is not None:
Expand All @@ -85,7 +146,13 @@ def session_to_nwb(
),
)
)
conversion_options.update(dict(ProcessedSorting=dict(write_as="processing", stub_test=False)))
conversion_options.update(
dict(
ProcessedSorting=dict(
write_as="processing", stub_test=False, units_description="The curated single-units from Phy."
),
),
)
conversion_options.update(
dict(
ProcessedBehavior=dict(column_name_mapping=column_name_mapping, column_descriptions=column_descriptions)
Expand Down Expand Up @@ -136,7 +203,7 @@ def session_to_nwb(
)

# Update default metadata with the editable in the corresponding yaml file
editable_metadata_path = Path(__file__).parent / "schierek_embargo_2024_metadata.yaml"
editable_metadata_path = Path(__file__).parent / "metadata" / "schierek_embargo_2024_general_metadata.yaml"
editable_metadata = load_dict_from_file(editable_metadata_path)
metadata = dict_deep_update(metadata, editable_metadata)

Expand All @@ -145,7 +212,20 @@ def session_to_nwb(
behavior_metadata = load_dict_from_file(behavior_metadata_path)
metadata = dict_deep_update(metadata, behavior_metadata)

metadata["Subject"].update(subject_id=subject_id)
# Update ecephys metadata
ephys_metadata_path = Path(__file__).parent / "metadata" / "schierek_embargo_2024_ecephys_metadata.yaml"
ephys_metadata = load_dict_from_file(ephys_metadata_path)
metadata = dict_deep_update(metadata, ephys_metadata)

if ephys_registry_file_path is not None:
metadata = update_ephys_device_metadata_for_subject(
epys_registry_file_path=ephys_registry_file_path,
subject_id=subject_id,
metadata=metadata,
)

if subject_metadata is not None:
metadata["Subject"].update(subject_id=subject_id, **subject_metadata)

# Run conversion
converter.run_conversion(
Expand All @@ -155,6 +235,17 @@ def session_to_nwb(
overwrite=overwrite,
)

results = list(inspect_nwbfile(nwbfile_path=nwbfile_path))
report_path = Path(nwbfile_path).parent / f"{subject_id}-{session_id}_nwbinspector_result.txt"
if not report_path.exists():
save_report(
report_file_path=report_path,
formatted_messages=format_messages(
results,
levels=["importance", "file_path"],
),
)


if __name__ == "__main__":

Expand Down Expand Up @@ -225,20 +316,33 @@ def session_to_nwb(
wait_thresh="The threshold in seconds to remove wait-times (mean + 1*std of all cumulative wait-times).",
)

nwbfile_path = Path("/Volumes/T9/Constantinople/nwbfiles/J076_2023-12-12_14-52-04.nwb")
nwbfile_path = Path("/Users/weian/data/demo/J076_2023-12-12_14-52-04.nwb")
if not nwbfile_path.parent.exists():
os.makedirs(nwbfile_path.parent, exist_ok=True)

# Ephys registry file path (constains metadata for the neuropixels probe)
ephys_registry_file_path = "/Volumes/T9/Constantinople/Ephys Data/Ephys_registry.mat"

stub_test = True
overwrite = True

# Get subject metadata from rat registry
rat_registry_folder_path = "/Volumes/T9/Constantinople/Rat_info"
subject_metadata = get_subject_metadata_from_rat_info_folder(
folder_path=rat_registry_folder_path,
subject_id="J076",
date="2023-12-12",
)

session_to_nwb(
openephys_recording_folder_path=openephys_recording_folder_path,
spike_sorting_folder_path=phy_sorting_folder_path,
processed_spike_sorting_file_path=processed_sorting_file_path,
raw_behavior_file_path=bpod_file_path,
column_name_mapping=column_name_mapping,
column_descriptions=column_descriptions,
ephys_registry_file_path=ephys_registry_file_path,
subject_metadata=subject_metadata,
nwbfile_path=nwbfile_path,
stub_test=stub_test,
overwrite=overwrite,
Expand Down

This file was deleted.

1 change: 1 addition & 0 deletions src/constantinople_lab_to_nwb/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .get_subject_metadata import get_subject_metadata_from_rat_info_folder
Loading