From 5dcc24ab8435ae67b865ef2a9453e35737fc79c4 Mon Sep 17 00:00:00 2001 From: Christopher Morrison Date: Tue, 31 Oct 2023 13:35:45 -0700 Subject: [PATCH 1/6] Check for bad omitted stimulus at start of both active and passive blocks. --- .../data_objects/stimuli/presentations.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/allensdk/brain_observatory/behavior/data_objects/stimuli/presentations.py b/allensdk/brain_observatory/behavior/data_objects/stimuli/presentations.py index 8cb6a0430..20c42ac4d 100644 --- a/allensdk/brain_observatory/behavior/data_objects/stimuli/presentations.py +++ b/allensdk/brain_observatory/behavior/data_objects/stimuli/presentations.py @@ -597,10 +597,18 @@ def _check_for_errant_omitted_stimulus( found, return input_df unmodified. """ if "omitted" in input_df.columns and len(input_df) > 0: - first_row = input_df.iloc[0] - if not pd.isna(first_row["omitted"]): - if first_row["omitted"]: - input_df = input_df.drop(first_row.name, axis=0) + if "stimulus_block" in input_df.columns: + for stim_block in input_df['stimulus_block'].unique(): + first_row = input_df[ + input_df['stimulus_block'] == stim_block].iloc[0] + if not pd.isna(first_row["omitted"]): + if first_row["omitted"]: + input_df = input_df.drop(first_row.name, axis=0) + else: + first_row = input_df.iloc[0] + if not pd.isna(first_row["omitted"]): + if first_row["omitted"]: + input_df = input_df.drop(first_row.name, axis=0) return input_df @staticmethod From dd4275e5088cf55dd2125dfcf13815bafaa66a96 Mon Sep 17 00:00:00 2001 From: Christopher Morrison Date: Fri, 10 Nov 2023 16:05:03 -0800 Subject: [PATCH 2/6] Allow for missing data in input metadata table. Warn that the column is missing. Add include_experiment_description to write nwb. --- allensdk/__init__.py | 2 +- .../tables/metadata_table_schemas.py | 41 ++++++--- .../data_objects/stimuli/presentations.py | 27 ++++-- .../behavior/write_nwb/behavior/__main__.py | 12 ++- .../behavior/write_nwb/behavior/schemas.py | 83 +++++++++++++++---- .../behavior/write_nwb/nwb_writer_utils.py | 1 + .../behavior/write_nwb/ophys/__main__.py | 7 ++ 7 files changed, 137 insertions(+), 36 deletions(-) diff --git a/allensdk/__init__.py b/allensdk/__init__.py index f2f248ebd..83a37b4b3 100644 --- a/allensdk/__init__.py +++ b/allensdk/__init__.py @@ -35,7 +35,7 @@ # import logging -__version__ = '2.16.1' +__version__ = '2.16.2' try: diff --git a/allensdk/brain_observatory/behavior/behavior_project_cache/tables/metadata_table_schemas.py b/allensdk/brain_observatory/behavior/behavior_project_cache/tables/metadata_table_schemas.py index d579114c8..1f6a1e0b1 100644 --- a/allensdk/brain_observatory/behavior/behavior_project_cache/tables/metadata_table_schemas.py +++ b/allensdk/brain_observatory/behavior/behavior_project_cache/tables/metadata_table_schemas.py @@ -11,7 +11,8 @@ class BehaviorSessionMetadataSchema(RaisingSchema): age_in_days = Int(required=True, description="Subject age") behavior_session_id = Int( - required=True, + required=False, + allow_none=True, description=( "Unique identifier for the " "behavior session to write into " @@ -19,41 +20,59 @@ class BehaviorSessionMetadataSchema(RaisingSchema): ), ) cre_line = String( - required=True, description="Genetic cre line of the subject." + required=False, + allow_none=True, + description="Genetic cre line of the subject." ) date_of_acquisition = String( - required=True, + required=False, + allow_none=True, description=( "Date of acquisition of " "behavior session, in string " "format" ), ) driver_line = List( String, - required=True, + required=False, + allow_none=True, cli_as_single_argument=True, description="Genetic driver line(s) of subject", ) equipment_name = String( - required=True, description=("Name of the equipment used.") + required=False, + allow_none=True, + description=("Name of the equipment used.") ) full_genotype = String( - required=True, description="Full genotype of subject" + required=False, + allow_none=True, + description="Full genotype of subject" ) mouse_id = String( - required=True, + required=False, + allow_none=True, description="LabTracks ID of the subject. aka external_specimen_name.", ) project_code = String( - rquired=True, + rquired=False, + allow_none=True, description="LabTracks ID of the subject. aka external_specimen_name.", ) reporter_line = String( - required=True, description="Genetic reporter line(s) of subject" + required=False, + allow_none=True, + description="Genetic reporter line(s) of subject" ) session_type = String( - required=True, description="Full name of session type." + required=False, + allow_none=True, + description="Full name of session type." + ) + sex = String( + required=False, + allow_none=True, + description="Subject sex" ) - sex = String(required=True, description="Subject sex") @mm.post_load def convert_date_time(self, data, **kwargs): diff --git a/allensdk/brain_observatory/behavior/data_objects/stimuli/presentations.py b/allensdk/brain_observatory/behavior/data_objects/stimuli/presentations.py index 20c42ac4d..f511b03d0 100644 --- a/allensdk/brain_observatory/behavior/data_objects/stimuli/presentations.py +++ b/allensdk/brain_observatory/behavior/data_objects/stimuli/presentations.py @@ -596,19 +596,28 @@ def _check_for_errant_omitted_stimulus( Dataframe with omitted stimulus removed from first row or if not found, return input_df unmodified. """ + + def safe_omitted_check(input_df: pd.Series, + stimulus_block: Optional[int]): + if stimulus_block is not None: + first_row = input_df[ + input_df['stimulus_block'] == stim_block].iloc[0] + else: + first_row = input_df.iloc[0] + + if not pd.isna(first_row["omitted"]): + if first_row["omitted"]: + input_df = input_df.drop(first_row.name, axis=0) + return input_df + if "omitted" in input_df.columns and len(input_df) > 0: if "stimulus_block" in input_df.columns: for stim_block in input_df['stimulus_block'].unique(): - first_row = input_df[ - input_df['stimulus_block'] == stim_block].iloc[0] - if not pd.isna(first_row["omitted"]): - if first_row["omitted"]: - input_df = input_df.drop(first_row.name, axis=0) + input_df = safe_omitted_check(input_df=input_df, + stimulus_block=stim_block) else: - first_row = input_df.iloc[0] - if not pd.isna(first_row["omitted"]): - if first_row["omitted"]: - input_df = input_df.drop(first_row.name, axis=0) + input_df = safe_omitted_check(input_df=input_df, + stimulus_block=None) return input_df @staticmethod diff --git a/allensdk/brain_observatory/behavior/write_nwb/behavior/__main__.py b/allensdk/brain_observatory/behavior/write_nwb/behavior/__main__.py index f5079b463..fb13a434d 100644 --- a/allensdk/brain_observatory/behavior/write_nwb/behavior/__main__.py +++ b/allensdk/brain_observatory/behavior/write_nwb/behavior/__main__.py @@ -27,6 +27,9 @@ def run(self): nwb_filepath=bs_id_dir / f"behavior_session_{bs_id}.nwb", skip_metadata=self.args["skip_metadata_key"], skip_stim=self.args["skip_stimulus_file_key"], + include_experiment_description=self.args[ + 'include_experiment_description' + ] ) logging.info("File successfully created") @@ -43,6 +46,7 @@ def write_behavior_nwb( nwb_filepath: Path, skip_metadata: List[str], skip_stim: List[str], + include_experiment_description=True ) -> str: """Load and write a BehaviorSession as NWB. @@ -61,6 +65,8 @@ def write_behavior_nwb( List of metadata keys to skip when comparing data. skip_stim : list of str List of stimulus file keys to skip when comparing data. + include_experiment_description : bool + If True, include experiment description in NWB file. Returns ------- @@ -79,7 +85,11 @@ def write_behavior_nwb( session_data=behavior_session_metadata, serializer=BehaviorSession, ) - nwb_writer.write_nwb(skip_metadata=skip_metadata, skip_stim=skip_stim) + nwb_writer.write_nwb( + skip_metadata=skip_metadata, + skip_stim=skip_stim, + include_experiment_description=include_experiment_description + ) return str(nwb_filepath) diff --git a/allensdk/brain_observatory/behavior/write_nwb/behavior/schemas.py b/allensdk/brain_observatory/behavior/write_nwb/behavior/schemas.py index feacbc7c6..843e6a505 100644 --- a/allensdk/brain_observatory/behavior/write_nwb/behavior/schemas.py +++ b/allensdk/brain_observatory/behavior/write_nwb/behavior/schemas.py @@ -1,4 +1,6 @@ import marshmallow as mm +from warnings import warn +import pandas as pd from allensdk.brain_observatory.argschema_utilities import ( InputFile, RaisingSchema, @@ -18,6 +20,7 @@ OutputDir, OutputFile, String, + Bool ) @@ -54,29 +57,81 @@ class Meta: required=True, description="Path of output.json to be written", ) + include_experiment_description = Bool( + required=False, + description="If True, include experiment description in NWB file.", + default=True + ) def _get_behavior_metadata(self, bs_row): """ """ behavior_session_metadata = {} - behavior_session_metadata["age_in_days"] = bs_row["age_in_days"] - behavior_session_metadata["cre_line"] = bs_row["cre_line"] - behavior_session_metadata["date_of_acquisition"] = bs_row[ - "date_of_acquisition" - ] - behavior_session_metadata["driver_line"] = sorted( - bs_row["driver_line"] + behavior_session_metadata["age_in_days"] = self._retrieve_value( + bs_row=bs_row, column_name="age_in_days" + ) + behavior_session_metadata["cre_line"] = self._retrieve_value( + bs_row=bs_row, column_name="cre_line" + ) + behavior_session_metadata["date_of_acquisition"] = self._retrieve_value( # noqa: E501 + bs_row=bs_row, column_name="date_of_acquisition" + ) + behavior_session_metadata["driver_line"] = self._retrieve_value( + bs_row=bs_row, column_name="driver_line" + ) + behavior_session_metadata["equipment_name"] = self._retrieve_value( + bs_row=bs_row, column_name="equipment_name" + ) + behavior_session_metadata["full_genotype"] = self._retrieve_value( + bs_row=bs_row, column_name="full_genotype" + ) + behavior_session_metadata["mouse_id"] = self._retrieve_value( + bs_row=bs_row, column_name="mouse_id" + ) + behavior_session_metadata["project_code"] = self._retrieve_value( + bs_row=bs_row, column_name="project_code" + ) + behavior_session_metadata["reporter_line"] = self._retrieve_value( + bs_row=bs_row, column_name="reporter_line" + ) + behavior_session_metadata["session_type"] = self._retrieve_value( + bs_row=bs_row, column_name="session_type" + ) + behavior_session_metadata["sex"] = self._retrieve_value( + bs_row=bs_row, + column_name="sex" ) - behavior_session_metadata["equipment_name"] = bs_row["equipment_name"] - behavior_session_metadata["full_genotype"] = bs_row["full_genotype"] - behavior_session_metadata["mouse_id"] = bs_row["mouse_id"] - behavior_session_metadata["project_code"] = bs_row["project_code"] - behavior_session_metadata["reporter_line"] = bs_row["reporter_line"] - behavior_session_metadata["session_type"] = bs_row["session_type"] - behavior_session_metadata["sex"] = bs_row["sex"] return behavior_session_metadata + def _retrieve_value(self, bs_row: pd.Series, column_name: str): + """Pull a column safely, return None otherwise. + + Parameters + ---------- + bs_row : pd.Series + Row of a BehaviorSessionTable + column_name : str + Name of column to retrieve + + Returns + ------- + value : object + Value of column_name in bs_row, or None if column_name is not in + bs_row + """ + if column_name not in bs_row.index: + warn(f"Warning, {column_name} not in metadata table. Unless this " + "has been added to the inputs skip_metadata_key or " + "skip_stimulus_file_key, creating the NWB file " + "may fail.") + return None + else: + value = bs_row[column_name] + if isinstance(value, list): + value = sorted(value) + return value + class BehaviorInputSchema(BaseInputSchema): behavior_session_id = Int( diff --git a/allensdk/brain_observatory/behavior/write_nwb/nwb_writer_utils.py b/allensdk/brain_observatory/behavior/write_nwb/nwb_writer_utils.py index 0ad369be9..1b1c3836e 100644 --- a/allensdk/brain_observatory/behavior/write_nwb/nwb_writer_utils.py +++ b/allensdk/brain_observatory/behavior/write_nwb/nwb_writer_utils.py @@ -13,6 +13,7 @@ def _update_session( self, lims_session: BehaviorSession, ophys_experiment_ids: Optional[List[int]] = None, + **kwargs ) -> BehaviorSession: """Call session methods to update certain values within the session. diff --git a/allensdk/brain_observatory/behavior/write_nwb/ophys/__main__.py b/allensdk/brain_observatory/behavior/write_nwb/ophys/__main__.py index 718674234..77e92c692 100644 --- a/allensdk/brain_observatory/behavior/write_nwb/ophys/__main__.py +++ b/allensdk/brain_observatory/behavior/write_nwb/ophys/__main__.py @@ -29,6 +29,9 @@ def run(self): nwb_filepath=oe_id_dir / f"behavior_ophys_experiment_{oe_id}.nwb", skip_metadata=self.args["skip_metadata_key"], skip_stim=self.args["skip_stimulus_file_key"], + include_experiment_description=self.args[ + 'include_experiment_description' + ] ) logging.info("File successfully created") @@ -43,6 +46,7 @@ def write_experiment_nwb( nwb_filepath: Path, skip_metadata: List[str], skip_stim: List[str], + include_experiment_description=True ) -> str: """Load and write a BehaviorOphysExperiment as NWB. @@ -61,6 +65,8 @@ def write_experiment_nwb( List of metadata keys to skip when comparing data. skip_stim : list of str List of stimulus file keys to skip when comparing data. + include_experiment_description : bool + If True, include experiment description in NWB file. Returns ------- @@ -83,6 +89,7 @@ def write_experiment_nwb( ophys_experiment_ids=self.args["ophys_container_experiment_ids"], skip_metadata=skip_metadata, skip_stim=skip_stim, + include_experiment_description=include_experiment_description ) return str(nwb_filepath) From ef35a6c7a91cdfb168afd154ddda901b70a3462f Mon Sep 17 00:00:00 2001 From: Christopher Morrison Date: Tue, 28 Nov 2023 14:02:49 -0800 Subject: [PATCH 3/6] Add VBN changelog --- allensdk/brain_observatory/ecephys/_units.py | 4 +++- doc_template/visual_behavior_neuropixels.rst | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/allensdk/brain_observatory/ecephys/_units.py b/allensdk/brain_observatory/ecephys/_units.py index 654dc2b7d..9d61964cb 100644 --- a/allensdk/brain_observatory/ecephys/_units.py +++ b/allensdk/brain_observatory/ecephys/_units.py @@ -129,7 +129,9 @@ def _read_spike_amplitudes_to_dictionary( def _read_waveforms_to_dictionary( - waveforms_path, local_to_global_unit_map=None, peak_channel_map=None + waveforms_path, + local_to_global_unit_map=None, + peak_channel_map=None, ): """ Builds a lookup table for unitwise waveform data diff --git a/doc_template/visual_behavior_neuropixels.rst b/doc_template/visual_behavior_neuropixels.rst index 89aa57c86..d2e195f2d 100644 --- a/doc_template/visual_behavior_neuropixels.rst +++ b/doc_template/visual_behavior_neuropixels.rst @@ -36,6 +36,18 @@ Visual Behavior - Neuropixels DATA FILE CHANGELOG ------------------- +**v0.5.0** + +- Updated stimulus presentations tables. +- Add LFP sample rate when loading from cache. +- Fixed scaling of LFP data in multiple sessions. Amplitudes were off by a + factor of 2 for most ecephys sessions. The scaling is also + corrected in released metadata tables. + - All ecephys sessions except those listed here had the issue with the + LFP amplitude scaling: 1059678195, 1108334384, 1108531612, 1109680280, + 1109889304, 1111013640, 1111216934, 1112302803, 1112515874, 1113751921, + 1113957627, 1115077618, 1115356973, 1118324999, 1118512505 + **v0.4.0-fix** - Added stimulus presentations columns that are computed on load: From 1c61e2688258f27fe474bdf9320e409e121bd3e5 Mon Sep 17 00:00:00 2001 From: Christopher Morrison Date: Tue, 28 Nov 2023 14:07:35 -0800 Subject: [PATCH 4/6] Add scaling factor to fix mean_waveforms and CSD. --- .../ecephys/_current_source_density.py | 3 ++- allensdk/brain_observatory/ecephys/_units.py | 8 ++++++-- .../brain_observatory/ecephys/write_nwb/schemas.py | 10 ++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/allensdk/brain_observatory/ecephys/_current_source_density.py b/allensdk/brain_observatory/ecephys/_current_source_density.py index d088d9081..b319812e6 100644 --- a/allensdk/brain_observatory/ecephys/_current_source_density.py +++ b/allensdk/brain_observatory/ecephys/_current_source_density.py @@ -50,9 +50,10 @@ def channel_locations(self) -> np.ndarray: @classmethod def from_json(cls, probe_meta: dict) -> "CurrentSourceDensity": + scale = probe_meta.get("scale_mean_waveform_and_csd", 1) with h5py.File(probe_meta['csd_path'], "r") as csd_file: return CurrentSourceDensity( - data=csd_file["current_source_density"][:], + data=csd_file["current_source_density"][:] / scale, timestamps=csd_file["timestamps"][:], interpolated_channel_locations=csd_file["csd_locations"][:] ) diff --git a/allensdk/brain_observatory/ecephys/_units.py b/allensdk/brain_observatory/ecephys/_units.py index 9d61964cb..7128b8879 100644 --- a/allensdk/brain_observatory/ecephys/_units.py +++ b/allensdk/brain_observatory/ecephys/_units.py @@ -47,7 +47,8 @@ def from_json( ) mean_waveforms = _read_waveforms_to_dictionary( probe['mean_waveforms_path'], - local_to_global_unit_map + local_to_global_unit_map, + mean_waveform_scale=probe.get('scale_mean_waveform_and_csd', 1) ) spike_amplitudes = _read_spike_amplitudes_to_dictionary( probe["spike_amplitudes_path"], @@ -132,6 +133,7 @@ def _read_waveforms_to_dictionary( waveforms_path, local_to_global_unit_map=None, peak_channel_map=None, + mean_waveform_scale=1, ): """ Builds a lookup table for unitwise waveform data @@ -146,6 +148,8 @@ def _read_waveforms_to_dictionary( Maps unit identifiers to indices of peak channels. If provided, the output will contain only samples on the peak channel for each unit. + mean_waveform_scale : float, optional + Divide out a scaling from the mean_waveform. Default 1. Returns ------- @@ -171,7 +175,7 @@ def _read_waveforms_to_dictionary( if peak_channel_map is not None: waveform = waveform[:, peak_channel_map[unit_id]] - output_waveforms[unit_id] = np.squeeze(waveform) + output_waveforms[unit_id] = np.squeeze(waveform) / mean_waveform_scale return output_waveforms diff --git a/allensdk/brain_observatory/ecephys/write_nwb/schemas.py b/allensdk/brain_observatory/ecephys/write_nwb/schemas.py index 9626923a2..8b0bb0f34 100644 --- a/allensdk/brain_observatory/ecephys/write_nwb/schemas.py +++ b/allensdk/brain_observatory/ecephys/write_nwb/schemas.py @@ -204,6 +204,16 @@ class Probe(RaisingSchema): help="""amplitude scale factor converting raw amplitudes to Volts. Default converts from bits -> uV -> V""", ) + scale_mean_waveform_and_csd = Float( + default=1, + allow_none=True, + help="""Amount to scale the mean waveform and CSD by. (data / scale). + This is a fix for a set of data documented in the change log. + The values for unit amplitudes were changed in the input_json + file and do not use this scale. + If the data in LIMS for these sessions is updated, this scaling + is not needed. Default is 1""" + ) class InvalidEpoch(RaisingSchema): From 9e79b447365e3021009fd0c6d156cde55e134dca Mon Sep 17 00:00:00 2001 From: Christopher Morrison Date: Wed, 29 Nov 2023 20:01:14 -0800 Subject: [PATCH 5/6] Update changelog --- doc_template/visual_behavior_neuropixels.rst | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/doc_template/visual_behavior_neuropixels.rst b/doc_template/visual_behavior_neuropixels.rst index d2e195f2d..d567df90a 100644 --- a/doc_template/visual_behavior_neuropixels.rst +++ b/doc_template/visual_behavior_neuropixels.rst @@ -39,14 +39,18 @@ DATA FILE CHANGELOG **v0.5.0** - Updated stimulus presentations tables. -- Add LFP sample rate when loading from cache. +- Add LFP sample rate when loading from cache. (Value is duplicated in the + probes metadata table.) - Fixed scaling of LFP data in multiple sessions. Amplitudes were off by a - factor of 2 for most ecephys sessions. The scaling is also - corrected in released metadata tables. - - All ecephys sessions except those listed here had the issue with the - LFP amplitude scaling: 1059678195, 1108334384, 1108531612, 1109680280, - 1109889304, 1111013640, 1111216934, 1112302803, 1112515874, 1113751921, - 1113957627, 1115077618, 1115356973, 1118324999, 1118512505 + factor of 2 for most ecephys sessions. Specifically changed were units + table in the session amplitude, recovery_slope, and repolarization_slopelfp data, mean + waveform, and current source density data in the NWB files. The scaling is + also corrected in released metadata tables. + - All ecephys sessions had this issue except those listed here: 1059678195, + 1108334384, 1108531612, 1109680280, 1109889304, 1111013640, 1111216934, + 1112302803, 1112515874, 1113751921, 1113957627, 1115077618, 1115356973, + 1118324999, 1118512505 +- Added quality column to units table. **v0.4.0-fix** From f77212f96016220930d6809b6e54f5cbf7f8aa14 Mon Sep 17 00:00:00 2001 From: Christopher Morrison Date: Wed, 29 Nov 2023 20:10:45 -0800 Subject: [PATCH 6/6] Update changelog and index.rst --- CHANGELOG.md | 5 +++++ doc_template/index.rst | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 73a39f69a..de92af669 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # Change Log All notable changes to this project will be documented in this file. +## [2.16.2] = 2023-11-29 +- See release notes on Github +- Commit code used to create new VBN data release. +- Update VBN changelog.. + ## [2.16.1] = 2023-11-13 - See release notes on Github - Update testing diff --git a/doc_template/index.rst b/doc_template/index.rst index b66692319..f6c94f96e 100644 --- a/doc_template/index.rst +++ b/doc_template/index.rst @@ -118,6 +118,12 @@ The Allen SDK provides Python code for accessing experimental metadata along wit See the `mouse connectivity section `_ for more details. +What's new - 2.16.2 +---------------------------------------------------------------------- +- See full release notes on Github +- Commit code used to create new VBN data release. +- Update VBN changelog. + What's new - 2.16.1 ---------------------------------------------------------------------- - See full release notes on Github