Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ticket/PSB-260: Bug fix to VBN #2731

Merged
merged 6 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Change Log
All notable changes to this project will be documented in this file.

## [2.16.2] = 2023-11-29
- See release notes on Github
- Commit code used to create new VBN data release.
- Update VBN changelog..

## [2.16.1] = 2023-11-13
- See release notes on Github
- Update testing
Expand Down
2 changes: 1 addition & 1 deletion allensdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#
import logging

__version__ = '2.16.1'
__version__ = '2.16.2'


try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,49 +11,68 @@
class BehaviorSessionMetadataSchema(RaisingSchema):
age_in_days = Int(required=True, description="Subject age")
behavior_session_id = Int(
required=True,
required=False,
allow_none=True,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If allow_none is True, then required should be False

description=(
"Unique identifier for the "
"behavior session to write into "
"NWB format"
),
)
cre_line = String(
required=True, description="Genetic cre line of the subject."
required=False,
allow_none=True,
description="Genetic cre line of the subject."
)
date_of_acquisition = String(
required=True,
required=False,
allow_none=True,
description=(
"Date of acquisition of " "behavior session, in string " "format"
),
)
driver_line = List(
String,
required=True,
required=False,
allow_none=True,
cli_as_single_argument=True,
description="Genetic driver line(s) of subject",
)
equipment_name = String(
required=True, description=("Name of the equipment used.")
required=False,
allow_none=True,
description=("Name of the equipment used.")
)
full_genotype = String(
required=True, description="Full genotype of subject"
required=False,
allow_none=True,
description="Full genotype of subject"
)
mouse_id = String(
required=True,
required=False,
allow_none=True,
description="LabTracks ID of the subject. aka external_specimen_name.",
)
project_code = String(
rquired=True,
rquired=False,
allow_none=True,
description="LabTracks ID of the subject. aka external_specimen_name.",
)
reporter_line = String(
required=True, description="Genetic reporter line(s) of subject"
required=False,
allow_none=True,
description="Genetic reporter line(s) of subject"
)
session_type = String(
required=True, description="Full name of session type."
required=False,
allow_none=True,
description="Full name of session type."
)
sex = String(
required=False,
allow_none=True,
description="Subject sex"
)
sex = String(required=True, description="Subject sex")

@mm.post_load
def convert_date_time(self, data, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -596,11 +596,28 @@ def _check_for_errant_omitted_stimulus(
Dataframe with omitted stimulus removed from first row or if not
found, return input_df unmodified.
"""
if "omitted" in input_df.columns and len(input_df) > 0:
first_row = input_df.iloc[0]

def safe_omitted_check(input_df: pd.Series,
stimulus_block: Optional[int]):
if stimulus_block is not None:
first_row = input_df[
input_df['stimulus_block'] == stim_block].iloc[0]
else:
first_row = input_df.iloc[0]

if not pd.isna(first_row["omitted"]):
if first_row["omitted"]:
input_df = input_df.drop(first_row.name, axis=0)
return input_df

if "omitted" in input_df.columns and len(input_df) > 0:
if "stimulus_block" in input_df.columns:
for stim_block in input_df['stimulus_block'].unique():
input_df = safe_omitted_check(input_df=input_df,
stimulus_block=stim_block)
else:
input_df = safe_omitted_check(input_df=input_df,
stimulus_block=None)
return input_df

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ def run(self):
nwb_filepath=bs_id_dir / f"behavior_session_{bs_id}.nwb",
skip_metadata=self.args["skip_metadata_key"],
skip_stim=self.args["skip_stimulus_file_key"],
include_experiment_description=self.args[
'include_experiment_description'
]
)
logging.info("File successfully created")

Expand All @@ -43,6 +46,7 @@ def write_behavior_nwb(
nwb_filepath: Path,
skip_metadata: List[str],
skip_stim: List[str],
include_experiment_description=True
) -> str:
"""Load and write a BehaviorSession as NWB.

Expand All @@ -61,6 +65,8 @@ def write_behavior_nwb(
List of metadata keys to skip when comparing data.
skip_stim : list of str
List of stimulus file keys to skip when comparing data.
include_experiment_description : bool
If True, include experiment description in NWB file.

Returns
-------
Expand All @@ -79,7 +85,11 @@ def write_behavior_nwb(
session_data=behavior_session_metadata,
serializer=BehaviorSession,
)
nwb_writer.write_nwb(skip_metadata=skip_metadata, skip_stim=skip_stim)
nwb_writer.write_nwb(
skip_metadata=skip_metadata,
skip_stim=skip_stim,
include_experiment_description=include_experiment_description
)

return str(nwb_filepath)

Expand Down
83 changes: 69 additions & 14 deletions allensdk/brain_observatory/behavior/write_nwb/behavior/schemas.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import marshmallow as mm
from warnings import warn
import pandas as pd
from allensdk.brain_observatory.argschema_utilities import (
InputFile,
RaisingSchema,
Expand All @@ -18,6 +20,7 @@
OutputDir,
OutputFile,
String,
Bool
)


Expand Down Expand Up @@ -54,29 +57,81 @@ class Meta:
required=True,
description="Path of output.json to be written",
)
include_experiment_description = Bool(
required=False,
description="If True, include experiment description in NWB file.",
default=True
)

def _get_behavior_metadata(self, bs_row):
""" """
behavior_session_metadata = {}

behavior_session_metadata["age_in_days"] = bs_row["age_in_days"]
behavior_session_metadata["cre_line"] = bs_row["cre_line"]
behavior_session_metadata["date_of_acquisition"] = bs_row[
"date_of_acquisition"
]
behavior_session_metadata["driver_line"] = sorted(
bs_row["driver_line"]
behavior_session_metadata["age_in_days"] = self._retrieve_value(
bs_row=bs_row, column_name="age_in_days"
)
behavior_session_metadata["cre_line"] = self._retrieve_value(
bs_row=bs_row, column_name="cre_line"
)
behavior_session_metadata["date_of_acquisition"] = self._retrieve_value( # noqa: E501
bs_row=bs_row, column_name="date_of_acquisition"
)
behavior_session_metadata["driver_line"] = self._retrieve_value(
bs_row=bs_row, column_name="driver_line"
)
behavior_session_metadata["equipment_name"] = self._retrieve_value(
bs_row=bs_row, column_name="equipment_name"
)
behavior_session_metadata["full_genotype"] = self._retrieve_value(
bs_row=bs_row, column_name="full_genotype"
)
behavior_session_metadata["mouse_id"] = self._retrieve_value(
bs_row=bs_row, column_name="mouse_id"
)
behavior_session_metadata["project_code"] = self._retrieve_value(
bs_row=bs_row, column_name="project_code"
)
behavior_session_metadata["reporter_line"] = self._retrieve_value(
bs_row=bs_row, column_name="reporter_line"
)
behavior_session_metadata["session_type"] = self._retrieve_value(
bs_row=bs_row, column_name="session_type"
)
behavior_session_metadata["sex"] = self._retrieve_value(
bs_row=bs_row,
column_name="sex"
)
behavior_session_metadata["equipment_name"] = bs_row["equipment_name"]
behavior_session_metadata["full_genotype"] = bs_row["full_genotype"]
behavior_session_metadata["mouse_id"] = bs_row["mouse_id"]
behavior_session_metadata["project_code"] = bs_row["project_code"]
behavior_session_metadata["reporter_line"] = bs_row["reporter_line"]
behavior_session_metadata["session_type"] = bs_row["session_type"]
behavior_session_metadata["sex"] = bs_row["sex"]

return behavior_session_metadata

def _retrieve_value(self, bs_row: pd.Series, column_name: str):
"""Pull a column safely, return None otherwise.

Parameters
----------
bs_row : pd.Series
Row of a BehaviorSessionTable
column_name : str
Name of column to retrieve

Returns
-------
value : object
Value of column_name in bs_row, or None if column_name is not in
bs_row
"""
if column_name not in bs_row.index:
warn(f"Warning, {column_name} not in metadata table. Unless this "
"has been added to the inputs skip_metadata_key or "
"skip_stimulus_file_key, creating the NWB file "
"may fail.")
return None
else:
value = bs_row[column_name]
if isinstance(value, list):
value = sorted(value)
return value


class BehaviorInputSchema(BaseInputSchema):
behavior_session_id = Int(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def _update_session(
self,
lims_session: BehaviorSession,
ophys_experiment_ids: Optional[List[int]] = None,
**kwargs
) -> BehaviorSession:
"""Call session methods to update certain values within the session.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ def run(self):
nwb_filepath=oe_id_dir / f"behavior_ophys_experiment_{oe_id}.nwb",
skip_metadata=self.args["skip_metadata_key"],
skip_stim=self.args["skip_stimulus_file_key"],
include_experiment_description=self.args[
'include_experiment_description'
]
)
logging.info("File successfully created")

Expand All @@ -43,6 +46,7 @@ def write_experiment_nwb(
nwb_filepath: Path,
skip_metadata: List[str],
skip_stim: List[str],
include_experiment_description=True
) -> str:
"""Load and write a BehaviorOphysExperiment as NWB.

Expand All @@ -61,6 +65,8 @@ def write_experiment_nwb(
List of metadata keys to skip when comparing data.
skip_stim : list of str
List of stimulus file keys to skip when comparing data.
include_experiment_description : bool
If True, include experiment description in NWB file.

Returns
-------
Expand All @@ -83,6 +89,7 @@ def write_experiment_nwb(
ophys_experiment_ids=self.args["ophys_container_experiment_ids"],
skip_metadata=skip_metadata,
skip_stim=skip_stim,
include_experiment_description=include_experiment_description
)

return str(nwb_filepath)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ def channel_locations(self) -> np.ndarray:

@classmethod
def from_json(cls, probe_meta: dict) -> "CurrentSourceDensity":
scale = probe_meta.get("scale_mean_waveform_and_csd", 1)
with h5py.File(probe_meta['csd_path'], "r") as csd_file:
return CurrentSourceDensity(
data=csd_file["current_source_density"][:],
data=csd_file["current_source_density"][:] / scale,
timestamps=csd_file["timestamps"][:],
interpolated_channel_locations=csd_file["csd_locations"][:]
)
Expand Down
12 changes: 9 additions & 3 deletions allensdk/brain_observatory/ecephys/_units.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def from_json(
)
mean_waveforms = _read_waveforms_to_dictionary(
probe['mean_waveforms_path'],
local_to_global_unit_map
local_to_global_unit_map,
mean_waveform_scale=probe.get('scale_mean_waveform_and_csd', 1)
)
spike_amplitudes = _read_spike_amplitudes_to_dictionary(
probe["spike_amplitudes_path"],
Expand Down Expand Up @@ -129,7 +130,10 @@ def _read_spike_amplitudes_to_dictionary(


def _read_waveforms_to_dictionary(
waveforms_path, local_to_global_unit_map=None, peak_channel_map=None
waveforms_path,
local_to_global_unit_map=None,
peak_channel_map=None,
mean_waveform_scale=1,
):
""" Builds a lookup table for unitwise waveform data

Expand All @@ -144,6 +148,8 @@ def _read_waveforms_to_dictionary(
Maps unit identifiers to indices of peak channels. If provided,
the output will contain only samples on the peak
channel for each unit.
mean_waveform_scale : float, optional
Divide out a scaling from the mean_waveform. Default 1.

Returns
-------
Expand All @@ -169,7 +175,7 @@ def _read_waveforms_to_dictionary(
if peak_channel_map is not None:
waveform = waveform[:, peak_channel_map[unit_id]]

output_waveforms[unit_id] = np.squeeze(waveform)
output_waveforms[unit_id] = np.squeeze(waveform) / mean_waveform_scale

return output_waveforms

Expand Down
10 changes: 10 additions & 0 deletions allensdk/brain_observatory/ecephys/write_nwb/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,16 @@ class Probe(RaisingSchema):
help="""amplitude scale factor converting raw amplitudes to Volts.
Default converts from bits -> uV -> V""",
)
scale_mean_waveform_and_csd = Float(
default=1,
allow_none=True,
help="""Amount to scale the mean waveform and CSD by. (data / scale).
This is a fix for a set of data documented in the change log.
The values for unit amplitudes were changed in the input_json
file and do not use this scale.
If the data in LIMS for these sessions is updated, this scaling
is not needed. Default is 1"""
)


class InvalidEpoch(RaisingSchema):
Expand Down
Loading
Loading