From e9a5155e97e19e842a06916081cff0e0e44e15b6 Mon Sep 17 00:00:00 2001 From: Jason Kai Date: Mon, 13 Mar 2023 09:04:42 -0400 Subject: [PATCH] wip - bids validation plugin --- snakebids/app.py | 3 +- snakebids/core/input_generation.py | 56 +------------------ snakebids/plugins/__init__.py | 6 +++ snakebids/plugins/validation.py | 48 +++++++++++++++++ snakebids/tests/test_app.py | 1 - snakebids/tests/test_generate_inputs.py | 71 ++----------------------- 6 files changed, 60 insertions(+), 125 deletions(-) create mode 100644 snakebids/plugins/__init__.py create mode 100644 snakebids/plugins/validation.py diff --git a/snakebids/app.py b/snakebids/app.py index dccb02b5..cd072213 100644 --- a/snakebids/app.py +++ b/snakebids/app.py @@ -20,6 +20,7 @@ parse_snakebids_args, ) from snakebids.exceptions import ConfigError, RunError +from snakebids.plugins.validation import bids_validate from snakebids.utils.output import ( prepare_bidsapp_output, write_config_file, @@ -171,8 +172,6 @@ def run_snakemake(self): self.config["pybids_db_dir"] = args.pybidsdb_dir self.config["pybids_db_reset"] = args.reset_db - self.config["skip_bids_validation"] = args.skip_bids_validation - update_config(self.config, args) # First, handle outputs in snakebids_root or results folder diff --git a/snakebids/core/input_generation.py b/snakebids/core/input_generation.py index b73cce86..946581c9 100644 --- a/snakebids/core/input_generation.py +++ b/snakebids/core/input_generation.py @@ -3,8 +3,6 @@ import json import logging import re -import subprocess -import tempfile from collections import defaultdict from pathlib import Path from typing import Dict, Generator, Iterable, List, Optional, Tuple, Union, overload @@ -268,20 +266,10 @@ def generate_inputs( participant_label, exclude_participant_label ) - # Attempt to validate with node bids-validator - validated = ( - _validate_bids_dir - if not skip_bids_validation and not _all_custom_paths(pybids_inputs) - else None - ) - - # If not skipping validation, set validate indicator to opposite of output - # from _validate_bids_dir, otherwise do not validate layout = ( _gen_bids_layout( bids_dir=bids_dir, derivatives=derivatives, - validate=not validated if validated else False, pybids_config=pybids_config, pybids_database_dir=pybids_database_dir, pybids_reset_database=pybids_reset_database, @@ -335,7 +323,6 @@ def _all_custom_paths(config: InputsConfig): def _gen_bids_layout( bids_dir: Union[Path, str], derivatives: bool, - validate: Union[bool, None], pybids_database_dir: Union[Path, str, None], pybids_reset_database: bool, pybids_config: Union[Path, str, None] = None, @@ -353,10 +340,6 @@ def _gen_bids_layout( determines whether snakebids will search in the derivatives subdirectory of the input dataset. - validate : bool - A boolean that indicates whether validation should be performed on - input dataset - pybids_database_dir : str Path to database directory. If None is provided, database is not used @@ -385,50 +368,13 @@ def _gen_bids_layout( return BIDSLayout( bids_dir, derivatives=derivatives, - validate=validate, config=pybids_config, database_path=pybids_database_dir, reset_database=pybids_reset_database, - indexer=BIDSLayoutIndexer(validate=validate, index_metadata=False), + indexer=BIDSLayoutIndexer(index_metadata=False), ) -def _validate_bids_dir( - bids_dir: Union[Path, str], -) -> bool: - """Perform validation of dataset. Initial attempt at validation performed - with node-version of bids-validator. If not found, will fall back to - validation integrated into pybids. - - Parameters - ---------- - bids_dir : str - Path to bids directory - - Returns - ------- - bool - Indication of whether validation was successfully performed using - the node-version of bids-validator - """ - try: - validator_config_dict = {"ignoredFiles": ["/participants.tsv"]} - - with tempfile.NamedTemporaryFile(mode="w+", suffix=".json") as temp: - temp.write(json.dumps(validator_config_dict)) - temp.flush() - - subprocess.check_call(["bids-validator", str(bids_dir), "-c", temp.name]) - - return True - except FileNotFoundError: - _logger.warning( - "Bids-validator does not appear to be installed - will use pybids " - "validation." - ) - return False - - def write_derivative_json(snakemake, **kwargs): """Snakemake function to read a json file, and write to a new one, adding BIDS derivatives fields for Sources and Parameters. diff --git a/snakebids/plugins/__init__.py b/snakebids/plugins/__init__.py new file mode 100644 index 00000000..ad1770bd --- /dev/null +++ b/snakebids/plugins/__init__.py @@ -0,0 +1,6 @@ +__submodules__ = [] + +# +__all__ = [] + +# diff --git a/snakebids/plugins/validation.py b/snakebids/plugins/validation.py new file mode 100644 index 00000000..bdada5bc --- /dev/null +++ b/snakebids/plugins/validation.py @@ -0,0 +1,48 @@ +import json +import logging +import subprocess +import tempfile + +from snakebids.app import SnakeBidsApp + +_logger = logging.getLogger(__name__) + + +class InvalidBidsError(Exception): + """Error raised if an input BIDS dataset is invalid.""" + + +def bids_validate(app: SnakeBidsApp, bids_dir: str) -> None: + """Perform validation of dataset. Initial attempt at validation performed + with node-version of bids-validator. If not found, will fall back to Python + version of validation (same as pybids). + + Parameters + ---------- + app + Snakebids application to be run + bids_dir + BIDS organized directory to be validated + """ + + # Skip bids validation + if app.config["skip_bids_validation"]: + return + + try: + validator_config_dict = {"ignoredFiles": ["/participants.tsv"]} + + with tempfile.NamedTemporaryFile(mode="w+", suffix=".json") as temp: + temp.write(json.dumps(validator_config_dict)) + temp.flush() + + subprocess.check_call(["bids-validator", str(bids_dir), "-c", temp.name]) + # If the bids-validator call can't be made + except FileNotFoundError: + _logger.warning( + "Bids-validator does not appear to be installed - will use python " + "validation." + ) + # Any other bids-validator error + except subprocess.CalledProcessError as err: + raise InvalidBidsError from err diff --git a/snakebids/tests/test_app.py b/snakebids/tests/test_app.py index 97281be0..2d252a41 100644 --- a/snakebids/tests/test_app.py +++ b/snakebids/tests/test_app.py @@ -100,7 +100,6 @@ def test_runs_in_correct_mode( "pybids_db_reset": True, "snakefile": Path("Snakefile"), "output_dir": outputdir.resolve(), - "skip_bids_validation": False, } ) if root == "app" and tail == "": diff --git a/snakebids/tests/test_generate_inputs.py b/snakebids/tests/test_generate_inputs.py index d45f7448..4632a2ec 100644 --- a/snakebids/tests/test_generate_inputs.py +++ b/snakebids/tests/test_generate_inputs.py @@ -29,7 +29,6 @@ _generate_filters, _get_lists_from_bids, _parse_custom_path, - _validate_bids_dir, generate_inputs, ) from snakebids.exceptions import ConfigError, PybidsError @@ -97,9 +96,7 @@ def test_ambiguous_paths_with_extra_entities_leads_to_error( } with pytest.raises(ConfigError): - generate_inputs( - root, pybids_inputs, skip_bids_validation=True, use_bids_inputs=True - ) + generate_inputs(root, pybids_inputs, use_bids_inputs=True) @settings( deadline=800, @@ -125,9 +122,7 @@ def test_ambiguous_paths_with_missing_entity_leads_to_error( } with pytest.raises(ConfigError): - generate_inputs( - root, pybids_inputs, skip_bids_validation=True, use_bids_inputs=True - ) + generate_inputs(root, pybids_inputs, use_bids_inputs=True) @settings( deadline=800, @@ -162,9 +157,7 @@ def test_entity_excluded_when_filter_false( } ) - data = generate_inputs( - root, pybids_inputs, skip_bids_validation=True, use_bids_inputs=True - ) + data = generate_inputs(root, pybids_inputs, use_bids_inputs=True) assert data == expected @example( @@ -242,9 +235,7 @@ def test_entity_excluded_when_filter_true(self, tmpdir: Path, dataset: BidsDatas } ) - data = generate_inputs( - root, pybids_inputs, skip_bids_validation=True, use_bids_inputs=True - ) + data = generate_inputs(root, pybids_inputs, use_bids_inputs=True) assert data == expected @@ -279,7 +270,6 @@ def test_missing_filters(self, tmpdir: Path): bids_dir=tmpdir, derivatives=derivatives, pybids_config=str(Path(__file__).parent / "data" / "custom_config.json"), - skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset({"t1": BidsComponent("t1", config["t1"].path, zip_list)}) @@ -304,7 +294,6 @@ def test_missing_wildcards(self, tmpdir: Path): bids_dir=tmpdir, derivatives=derivatives, pybids_config=str(Path(__file__).parent / "data" / "custom_config.json"), - skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset({"t1": BidsComponent("t1", config["t1"].path, {})}) @@ -565,7 +554,6 @@ def test_custom_pybids_config(tmpdir: Path): bids_dir=tmpdir, derivatives=derivatives, pybids_config=Path(__file__).parent / "data" / "custom_config.json", - skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset( @@ -618,7 +606,6 @@ def test_nonstandard_custom_pybids_config(tmpdir: Path): pybids_config=( Path(__file__).parent / "data" / "custom_config_nonstandard.json" ), - skip_bids_validation=True, use_bids_inputs=True, ) @@ -642,7 +629,6 @@ def test_t1w(): derivatives=derivatives, participant_label="001", exclude_participant_label="002", - skip_bids_validation=True, ) assert v_error.value.args[0] == ( "Cannot define both participant_label and " @@ -654,7 +640,6 @@ def test_t1w(): pybids_inputs=pybids_inputs, bids_dir=real_bids_dir, derivatives=derivatives, - skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset( @@ -690,7 +675,6 @@ def test_t1w(): bids_dir=real_bids_dir, derivatives=derivatives, participant_label="001", - skip_bids_validation=True, use_bids_inputs=True, ) assert result["scan"].entities == { @@ -762,7 +746,6 @@ def test_t1w(): pybids_inputs=pybids_inputs, bids_dir=bids_dir, derivatives=derivatives, - skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset( @@ -801,7 +784,6 @@ def test_t1w_with_dict(): pybids_inputs=pybids_inputs, bids_dir=real_bids_dir, derivatives=derivatives, - skip_bids_validation=True, ) # Order of the subjects is not deterministic assert config["input_lists"] == BidsListCompare( @@ -833,7 +815,6 @@ def test_t1w_with_dict(): bids_dir=real_bids_dir, derivatives=derivatives, participant_label="001", - skip_bids_validation=True, ) assert config["input_lists"] == { "scan": {"acq": ["mprage"], "subject": ["001"], "suffix": ["T1w"]} @@ -987,45 +968,6 @@ def test_when_all_custom_paths_no_layout_indexed( spy.assert_not_called() -class TestValidate: - @pytest.fixture(autouse=True) - def start(self, tmpdir): - self.bids_dir = "snakebids/tests/data/bids_t1w" - self.tmp_dir = tmpdir.strpath - - # Copy test data - shutil.copytree(self.bids_dir, f"{self.tmp_dir}/data") - assert filecmp.dircmp("snakebids/tests/data/bids_t1w", f"{self.tmp_dir}/data") - - # Copy dataset description - shutil.copy( - "snakebids/tests/data/dataset_description.json", f"{self.tmp_dir}/data" - ) - - def test_check_validator(self): - """Test validator defaults to pybids (i.e. False)""" - assert _validate_bids_dir(self.bids_dir) == False - - def test_pybids_validation_fail(self): - with pytest.raises(BIDSValidationError): - _gen_bids_layout( - bids_dir=self.bids_dir, - derivatives=False, - pybids_database_dir=None, - pybids_reset_database=False, - validate=True, - ) - - def test_pybids_validation_pass(self): - assert _gen_bids_layout( - bids_dir=f"{self.tmp_dir}/data", - derivatives=False, - pybids_database_dir=None, - pybids_reset_database=False, - validate=True, - ) - - class TestDB: @pytest.fixture(autouse=True) def start(self, tmpdir): @@ -1046,7 +988,6 @@ def test_database_dir_blank(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), - validate=False, ) assert not os.path.exists(self.pybids_db.get("database_dir")) @@ -1060,7 +1001,6 @@ def test_database_dir_relative(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), - validate=False, ) assert not os.path.exists(f"{self.tmpdir}/data/.db/") @@ -1075,7 +1015,6 @@ def test_database_dir_absolute(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), - validate=False, ) assert os.path.exists(f"{self.tmpdir}/data/.db/") @@ -1091,7 +1030,6 @@ def test_database_dir_absolute(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), - validate=False, ) assert not layout.get(subject="003") @@ -1103,6 +1041,5 @@ def test_database_dir_absolute(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), - validate=False, ) assert layout.get(subject="003")