From de341fac1b1b4932eb0d59f0c925658658391fce Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Fri, 2 Feb 2024 13:48:02 -0500 Subject: [PATCH] Add in vitro support (#431) * add injection and test * extend form to allow other subject ID value for DANDI organization * adjust test docstring * expand docs and changelog * remove mock until we move to new pynwb permanently * swap from invitro to protein * Update tests/test_inspector.py Co-authored-by: Ryan Ly --------- Co-authored-by: Ryan Ly --- CHANGELOG.md | 6 ++++ docs/best_practices/nwbfile_metadata.rst | 2 ++ src/nwbinspector/nwbinspector.py | 36 +++++++++++++++++++++++- tests/test_inspector.py | 19 +++++++++++-- 4 files changed, 59 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 705f3f30c..21e39475c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,12 @@ * Use cached extension namespaces when calling pynwb validate instead of just the core namespace. [#425](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/425) +### Improvements + +* Added automatic suppression of certain subject related checks when inspecting files using the "dandi" configuration that have a `subject_id` that starts with the keyphrase "protein"; _e.g._, "proteinCaMPARI3" to indicate the _in vitro_ subject of the experiment is a purified CaMPARI3 protein. + + + # v0.4.30 ### Fixes diff --git a/docs/best_practices/nwbfile_metadata.rst b/docs/best_practices/nwbfile_metadata.rst index 1915172f9..5e897ce7e 100644 --- a/docs/best_practices/nwbfile_metadata.rst +++ b/docs/best_practices/nwbfile_metadata.rst @@ -184,6 +184,8 @@ A ``subject_id`` is required for upload to the :dandi-archive:`DANDI archive <>` not intended for DANDI upload, if the :ref:`nwb-schema:sec-Subject` is specified at all it should be given a ``subject_id`` for reference. +In the special case of *in vitro* studies where the 'subject' of scientific interest was not a tissue sample obtained from a living subject but was instead a purified protein, this will be annotated by prepending the keyphrase "protein" to the subject ID; *e.g*, "proteinCaMPARI3". In the case where the *in vitro* experiment is performed on an extracted or cultured biological sample, the other subject attributes (such as age and sex) should be specified as their values at the time the sample was collected. + Check function: :py:meth:`~nwbinspector.checks.nwbfile_metadata.check_subject_id_exists` diff --git a/src/nwbinspector/nwbinspector.py b/src/nwbinspector/nwbinspector.py index 748f6fe7a..378ca99df 100644 --- a/src/nwbinspector/nwbinspector.py +++ b/src/nwbinspector/nwbinspector.py @@ -604,6 +604,38 @@ def inspect_nwbfile( ) +# TODO: deprecate once subject types and dandi schemas have been extended +def _intercept_in_vitro_protein(nwbfile_object: pynwb.NWBFile, checks: Optional[list] = None) -> List[callable]: + """ + If the special 'protein' subject_id is specified, return a truncated list of checks to run. + + This is a temporary method for allowing upload of certain in vitro data to DANDI and + is expected to replaced in future versions. + """ + subject_related_check_names = [ + "check_subject_exists", + "check_subject_id_exists", + "check_subject_sex", + "check_subject_species_exists", + "check_subject_species_form", + "check_subject_age", + "check_subject_proper_age_range", + ] + subject_related_dandi_requirements = [ + check.importance == Importance.CRITICAL for check in checks if check.__name__ in subject_related_check_names + ] + + subject = getattr(nwbfile_object, "subject", None) + if ( + any(subject_related_dandi_requirements) + and subject is not None + and getattr(subject, "subject_id", "").startswith("protein") + ): + non_subject_checks = [check for check in checks if check.__name__ not in subject_related_check_names] + return non_subject_checks + return checks + + def inspect_nwbfile_object( nwbfile_object: pynwb.NWBFile, checks: Optional[list] = None, @@ -651,7 +683,9 @@ def inspect_nwbfile_object( checks=checks, config=config, ignore=ignore, select=select, importance_threshold=importance_threshold ) - for inspector_message in run_checks(nwbfile=nwbfile_object, checks=checks): + subject_dependent_checks = _intercept_in_vitro_protein(nwbfile_object=nwbfile_object, checks=checks) + + for inspector_message in run_checks(nwbfile=nwbfile_object, checks=subject_dependent_checks): yield inspector_message diff --git a/tests/test_inspector.py b/tests/test_inspector.py index 5d540453a..b1863efba 100644 --- a/tests/test_inspector.py +++ b/tests/test_inspector.py @@ -1,5 +1,4 @@ import os -import pytest from shutil import rmtree from tempfile import mkdtemp from pathlib import Path @@ -8,7 +7,7 @@ import numpy as np from pynwb import NWBFile, NWBHDF5IO, TimeSeries -from pynwb.file import TimeIntervals +from pynwb.file import TimeIntervals, Subject from pynwb.behavior import SpatialSeries, Position from hdmf.common import DynamicTable from natsort import natsorted @@ -22,7 +21,7 @@ check_subject_exists, load_config, ) -from nwbinspector import inspect_all, inspect_nwbfile, available_checks +from nwbinspector import inspect_all, inspect_nwbfile, inspect_nwbfile_object, available_checks from nwbinspector.register_checks import Severity, InspectorMessage, register_check from nwbinspector.tools import make_minimal_nwbfile from nwbinspector.utils import FilePathType @@ -727,3 +726,17 @@ def test_check_unique_identifiers_fail(self): file_path=str(self.tempdir), ) ] + + +def test_dandi_config_in_vitro_injection(): + """Test that a subject_id starting with 'protein' excludes meaningless CRITICAL-elevated subject checks.""" + nwbfile = make_minimal_nwbfile() + nwbfile.subject = Subject( + subject_id="proteinCaMPARI3", description="A detailed description about the in vitro setup." + ) + config = load_config(filepath_or_keyword="dandi") + importance_threshold = "CRITICAL" + messages = list( + inspect_nwbfile_object(nwbfile_object=nwbfile, config=config, importance_threshold=importance_threshold) + ) + assert messages == []