From 21aa9745cde9ca6cac04d65a876f838e38e4efa2 Mon Sep 17 00:00:00 2001 From: Jason Kai Date: Mon, 27 Feb 2023 06:47:12 -0500 Subject: [PATCH 01/11] add skip-validation to snakebids cli --- snakebids/cli.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/snakebids/cli.py b/snakebids/cli.py index 7278fe6a..4e26a5c6 100644 --- a/snakebids/cli.py +++ b/snakebids/cli.py @@ -55,20 +55,25 @@ class SnakebidsArgs: Directory to place outputs pybidsdb_dir : Path Directory to place pybids database + reset_db : bool + Update the pybids database snakemake_args : list of strings Arguments to pass on to Snakemake args_dict : Dict[str, Any] Contains all the snakebids specific args. Meant to contain custom user args defined in config, as well as dynamic --filter-xx and --wildcard-xx args. These will eventually be printed in the new config. + skip_validation : bool + Skip bids validation of input dataset """ force: bool outputdir: Path = attr.ib(converter=lambda p: Path(p).resolve()) - snakemake_args: List[str] - args_dict: Dict[str, Any] pybidsdb_dir: Optional[Path] = None reset_db: bool = False + snakemake_args: List[str] + args_dict: Dict[str, Any] + skip_validation: bool = False def create_parser(include_snakemake=False): @@ -141,6 +146,13 @@ def create_parser(include_snakemake=False): help="Force output in a new directory that already has contents", ) + standard_group.add_argument( + "--skip-validation", + "--skip_validation", + action="store_true", + help=("Skip BIDS validation of input dataset") + ) + standard_group.add_argument( "--retrofit", action="store_true", From 6aa4f1a88d13e0152ba7402b9041505e5f77aa10 Mon Sep 17 00:00:00 2001 From: Jason Kai Date: Mon, 27 Feb 2023 07:54:54 -0500 Subject: [PATCH 02/11] Add opt-out validation of bids_dir - Currently performs validation for entire dataset, can look into performing validation for only subjects that are included in run --- snakebids/core/input_generation.py | 65 +++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/snakebids/core/input_generation.py b/snakebids/core/input_generation.py index c75d9b0e..d336d758 100644 --- a/snakebids/core/input_generation.py +++ b/snakebids/core/input_generation.py @@ -3,6 +3,8 @@ import json import logging import re +import subprocess +import tempfile from collections import defaultdict from pathlib import Path from typing import Dict, Generator, Iterable, List, Optional, Tuple, Union, overload @@ -34,6 +36,7 @@ def generate_inputs( limit_to=..., participant_label=..., exclude_participant_label=..., + skip_bids_validation=..., use_bids_inputs: Union[Literal[False], None] = ..., ) -> BidsDatasetDict: ... @@ -51,6 +54,7 @@ def generate_inputs( limit_to=..., participant_label=..., exclude_participant_label=..., + skip_bids_validation=..., use_bids_inputs: Literal[True] = ..., ) -> BidsDataset: ... @@ -67,6 +71,7 @@ def generate_inputs( limit_to=None, participant_label=None, exclude_participant_label=None, + skip_bids_validation=False, use_bids_inputs=None, ): """Dynamically generate snakemake inputs using pybids_inputs @@ -129,6 +134,12 @@ def generate_inputs( cause errors if subject filters are also specified in pybids_inputs. It may not be specified if participant_label is specified + skip_bids_validation : bool, optional + If True, will not perform validation of the input dataset. Otherwise, + validation is first attempted by performing a system call to `bids-validator` + (e.g. node version), which is has more comprehensive coverage, before falling + back on the python version of the validator. + use_bids_inputs : bool, optional If True, opts in to the new :class:`BidsDataset` output, otherwise returns the classic dict. Currently, the classic dict will be returned by default, however, @@ -257,11 +268,18 @@ def generate_inputs( participant_label, exclude_participant_label ) + # Attempt to validate with node bids-validator, if needed + if not skip_bids_validation: + validated = _validate_input_dir(bids_dir) + # Generates a BIDSLayout + # If not skipping validation, set validate indicator to opposite of output + # from _validate_input_dir, otherwise do not validate layout = ( _gen_bids_layout( bids_dir=bids_dir, derivatives=derivatives, + validate=not validated if validated else False, pybids_config=pybids_config, pybids_database_dir=pybids_database_dir, pybids_reset_database=pybids_reset_database, @@ -315,6 +333,7 @@ def _all_custom_paths(config: InputsConfig): def _gen_bids_layout( bids_dir: Union[Path, str], derivatives: bool, + validate: Union[bool, None], pybids_database_dir: Union[Path, str, None], pybids_reset_database: bool, pybids_config: Union[Path, str, None] = None, @@ -332,6 +351,10 @@ def _gen_bids_layout( determines whether snakebids will search in the derivatives subdirectory of the input dataset. + validate : bool + A boolean that indicates whether validation should be performed on + input dataset + pybids_database_dir : str Path to database directory. If None is provided, database is not used @@ -360,14 +383,52 @@ def _gen_bids_layout( return BIDSLayout( bids_dir, derivatives=derivatives, - validate=False, + validate=validate, config=pybids_config, database_path=pybids_database_dir, reset_database=pybids_reset_database, - indexer=BIDSLayoutIndexer(validate=False, index_metadata=False), + indexer=BIDSLayoutIndexer(validate=validate, index_metadata=False), ) +def _validate_input_dir( + bids_dir: Union[Path, str], +) -> bool: + """Perform validation of dataset. Initial attempt at validation performed + with node-version of bids-validator. If not found, will fall back to + validation integrated into pybids. + + Parameters + ---------- + bids_dir : str + Path to bids directory + + Returns + ------- + bool + Indication of whether validation was successfully performed using + the node-version of bids-validator + """ + try: + validator_config_dict = { + "ignoredFiles": ['/participants.tsv'] + } + + with tempfile.NamedTemporaryFile(mode="w+", suffix=".json") as temp: + temp.write(json.dumps(validator_config_dict)) + temp.flush() + + subprocess.check_call(['bids-validator', str(bids_dir), '-c', temp.name]) + + return True + except FileNotFoundError: + _logger.warning( + "Bids-validator does not appear to be installed - will use pybids " + "validation." + ) + return False + + def write_derivative_json(snakemake, **kwargs): """Snakemake function to read a json file, and write to a new one, adding BIDS derivatives fields for Sources and Parameters. From b2bbf93512966c25edd11bc64977ca5391fd3a3a Mon Sep 17 00:00:00 2001 From: Jason Kai Date: Mon, 27 Feb 2023 09:32:55 -0500 Subject: [PATCH 03/11] skip bids validation for existing tests --- snakebids/tests/test_generate_inputs.py | 48 +++++++++++++++++++++---- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/snakebids/tests/test_generate_inputs.py b/snakebids/tests/test_generate_inputs.py index 283ae27e..39fbac4e 100644 --- a/snakebids/tests/test_generate_inputs.py +++ b/snakebids/tests/test_generate_inputs.py @@ -28,6 +28,7 @@ _generate_filters, _get_lists_from_bids, _parse_custom_path, + _validate_input_dir, generate_inputs, ) from snakebids.exceptions import ConfigError, PybidsError @@ -95,7 +96,12 @@ def test_ambiguous_paths_with_extra_entities_leads_to_error( } with pytest.raises(ConfigError): - generate_inputs(root, pybids_inputs, use_bids_inputs=True) + generate_inputs( + root, + pybids_inputs, + skip_bids_validation=True, + use_bids_inputs=True + ) @settings( deadline=800, @@ -121,7 +127,12 @@ def test_ambiguous_paths_with_missing_entity_leads_to_error( } with pytest.raises(ConfigError): - generate_inputs(root, pybids_inputs, use_bids_inputs=True) + generate_inputs( + root, + pybids_inputs, + skip_bids_validation=True, + use_bids_inputs=True + ) @settings( deadline=800, @@ -156,7 +167,12 @@ def test_entity_excluded_when_filter_false( } ) - data = generate_inputs(root, pybids_inputs, use_bids_inputs=True) + data = generate_inputs( + root, + pybids_inputs, + skip_bids_validation=True, + use_bids_inputs=True + ) assert data == expected @example( @@ -234,7 +250,12 @@ def test_entity_excluded_when_filter_true(self, tmpdir: Path, dataset: BidsDatas } ) - data = generate_inputs(root, pybids_inputs, use_bids_inputs=True) + data = generate_inputs( + root, + pybids_inputs, + skip_bids_validation=True, + use_bids_inputs=True + ) assert data == expected @@ -269,6 +290,7 @@ def test_missing_filters(self, tmpdir: Path): bids_dir=tmpdir, derivatives=derivatives, pybids_config=str(Path(__file__).parent / "data" / "custom_config.json"), + skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset({"t1": BidsComponent("t1", config["t1"].path, zip_list)}) @@ -293,6 +315,7 @@ def test_missing_wildcards(self, tmpdir: Path): bids_dir=tmpdir, derivatives=derivatives, pybids_config=str(Path(__file__).parent / "data" / "custom_config.json"), + skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset({"t1": BidsComponent("t1", config["t1"].path, {})}) @@ -553,6 +576,7 @@ def test_custom_pybids_config(tmpdir: Path): bids_dir=tmpdir, derivatives=derivatives, pybids_config=Path(__file__).parent / "data" / "custom_config.json", + skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset( @@ -605,6 +629,7 @@ def test_nonstandard_custom_pybids_config(tmpdir: Path): pybids_config=( Path(__file__).parent / "data" / "custom_config_nonstandard.json" ), + skip_bids_validation=True, use_bids_inputs=True, ) @@ -628,6 +653,7 @@ def test_t1w(): derivatives=derivatives, participant_label="001", exclude_participant_label="002", + skip_bids_validation=True, ) assert v_error.value.args[0] == ( "Cannot define both participant_label and " @@ -639,6 +665,7 @@ def test_t1w(): pybids_inputs=pybids_inputs, bids_dir=real_bids_dir, derivatives=derivatives, + skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset( @@ -674,6 +701,7 @@ def test_t1w(): bids_dir=real_bids_dir, derivatives=derivatives, participant_label="001", + skip_bids_validation=True, use_bids_inputs=True, ) assert result["scan"].entities == { @@ -745,6 +773,7 @@ def test_t1w(): pybids_inputs=pybids_inputs, bids_dir=bids_dir, derivatives=derivatives, + skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset( @@ -783,6 +812,7 @@ def test_t1w_with_dict(): pybids_inputs=pybids_inputs, bids_dir=real_bids_dir, derivatives=derivatives, + skip_bids_validation=True, ) # Order of the subjects is not deterministic assert config["input_lists"] == BidsListCompare( @@ -814,6 +844,7 @@ def test_t1w_with_dict(): bids_dir=real_bids_dir, derivatives=derivatives, participant_label="001", + skip_bids_validation=True, ) assert config["input_lists"] == { "scan": {"acq": ["mprage"], "subject": ["001"], "suffix": ["T1w"]} @@ -914,11 +945,11 @@ def bids_fs(self, bids_fs: Optional[FakeFilesystem]): def test_gen_layout_returns_valid_dataset(self, tmpdir: Path): dataset = sb_st.datasets().example() create_dataset(tmpdir, dataset) - assert _gen_bids_layout(tmpdir, False, None, False, None) + assert _gen_bids_layout(tmpdir, False, False, None, False, None) def test_invalid_path_raises_error(self, tmpdir: Path): with pytest.raises(ValueError): - _gen_bids_layout(tmpdir / "foo", False, None, False) + _gen_bids_layout(tmpdir / "foo", False, False, None, False) @pytest.mark.parametrize("count", tuple(range(6))) @@ -987,6 +1018,7 @@ def test_database_dir_blank(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), + validate=False, ) assert not os.path.exists(self.pybids_db.get("database_dir")) @@ -1000,6 +1032,7 @@ def test_database_dir_relative(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), + validate=False, ) assert not os.path.exists(f"{self.tmpdir}/data/.db/") @@ -1014,6 +1047,7 @@ def test_database_dir_absolute(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), + validate=False, ) assert os.path.exists(f"{self.tmpdir}/data/.db/") @@ -1029,6 +1063,7 @@ def test_database_dir_absolute(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), + validate=False, ) assert not layout.get(subject="003") @@ -1040,5 +1075,6 @@ def test_database_dir_absolute(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), + validate=False, ) assert layout.get(subject="003") From a3cc17c6db14b45689a4d344eac3dbb3dc70dddf Mon Sep 17 00:00:00 2001 From: Jason Kai Date: Mon, 27 Feb 2023 09:49:55 -0500 Subject: [PATCH 04/11] add skip_validation options to app --- snakebids/app.py | 2 ++ snakebids/cli.py | 3 ++- .../{{cookiecutter.app_name}}/config/snakebids.yml | 3 +++ .../{{cookiecutter.app_name}}/workflow/Snakefile | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/snakebids/app.py b/snakebids/app.py index 79757085..85eb0486 100644 --- a/snakebids/app.py +++ b/snakebids/app.py @@ -171,6 +171,8 @@ def run_snakemake(self): self.config["pybids_db_dir"] = args.pybidsdb_dir self.config["pybids_db_reset"] = args.reset_db + self.config["skip_bids_validation"] = args.skip_validation + update_config(self.config, args) # First, handle outputs in snakebids_root or results folder diff --git a/snakebids/cli.py b/snakebids/cli.py index 4e26a5c6..c1289b20 100644 --- a/snakebids/cli.py +++ b/snakebids/cli.py @@ -150,7 +150,7 @@ def create_parser(include_snakemake=False): "--skip-validation", "--skip_validation", action="store_true", - help=("Skip BIDS validation of input dataset") + help=("Skip bids validation of input dataset") ) standard_group.add_argument( @@ -283,6 +283,7 @@ def parse_snakebids_args(parser: argparse.ArgumentParser): else Path(all_args[0].pybidsdb_dir).resolve() ), reset_db=all_args[0].reset_db, + skip_bids_validation=all_args[0].skip_validation, ) diff --git a/snakebids/project_template/{{cookiecutter.app_name}}/config/snakebids.yml b/snakebids/project_template/{{cookiecutter.app_name}}/config/snakebids.yml index 0db60574..b7f6858c 100644 --- a/snakebids/project_template/{{cookiecutter.app_name}}/config/snakebids.yml +++ b/snakebids/project_template/{{cookiecutter.app_name}}/config/snakebids.yml @@ -40,6 +40,9 @@ pybids_inputs: # pybids_db_dir: '/path/to/db_dir' # Leave blank if you do not wish to use this # pybids_db_reset: False # Change this to true to update the database +# Skipping of bids validation +skip_bids_validation: False + #configuration for the command-line parameters to make available # passed on the argparse add_argument() parse_args: diff --git a/snakebids/project_template/{{cookiecutter.app_name}}/workflow/Snakefile b/snakebids/project_template/{{cookiecutter.app_name}}/workflow/Snakefile index a09a583f..870a873e 100644 --- a/snakebids/project_template/{{cookiecutter.app_name}}/workflow/Snakefile +++ b/snakebids/project_template/{{cookiecutter.app_name}}/workflow/Snakefile @@ -14,6 +14,7 @@ inputs = snakebids.generate_inputs( derivatives=config.get("derivatives", None), participant_label=config.get("participant_label", None), exclude_participant_label=config.get("exclude_participant_label", None), + skip_bids_validation=config.get("skip_bids_validation", False), use_bids_inputs=True, ) From 737bb020e1f21485c456dd2bca92c3ca0892e080 Mon Sep 17 00:00:00 2001 From: Jason Kai Date: Mon, 27 Feb 2023 10:34:39 -0500 Subject: [PATCH 05/11] fix unbound var + vars with no defaults --- snakebids/cli.py | 4 ++-- snakebids/core/input_generation.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/snakebids/cli.py b/snakebids/cli.py index c1289b20..945aa6c9 100644 --- a/snakebids/cli.py +++ b/snakebids/cli.py @@ -69,10 +69,10 @@ class SnakebidsArgs: force: bool outputdir: Path = attr.ib(converter=lambda p: Path(p).resolve()) - pybidsdb_dir: Optional[Path] = None - reset_db: bool = False snakemake_args: List[str] args_dict: Dict[str, Any] + pybidsdb_dir: Optional[Path] = None + reset_db: bool = False skip_validation: bool = False diff --git a/snakebids/core/input_generation.py b/snakebids/core/input_generation.py index d336d758..ba6ee421 100644 --- a/snakebids/core/input_generation.py +++ b/snakebids/core/input_generation.py @@ -269,9 +269,8 @@ def generate_inputs( ) # Attempt to validate with node bids-validator, if needed - if not skip_bids_validation: - validated = _validate_input_dir(bids_dir) - + validated = _validate_input_dir if not skip_bids_validation else None + # Generates a BIDSLayout # If not skipping validation, set validate indicator to opposite of output # from _validate_input_dir, otherwise do not validate From a4ff1a9fcc3068c9230e24e82628007680b57534 Mon Sep 17 00:00:00 2001 From: Jason Kai Date: Mon, 27 Feb 2023 10:35:01 -0500 Subject: [PATCH 06/11] update mock config --- snakebids/tests/mock/config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/snakebids/tests/mock/config.yaml b/snakebids/tests/mock/config.yaml index d2f92d65..466de22b 100644 --- a/snakebids/tests/mock/config.yaml +++ b/snakebids/tests/mock/config.yaml @@ -22,6 +22,8 @@ pybids_inputs: pybids_db_dir: '/path/to/db_dir' pybids_db_reset: False +skip_bids_validation: False + targets_by_analysis_level: participant: - '' # if '', then the first rule is run From b2239c4ae464a381021b96c72a2772898e7cf104 Mon Sep 17 00:00:00 2001 From: Jason Kai Date: Mon, 27 Feb 2023 11:16:47 -0500 Subject: [PATCH 07/11] add initial tests for bids-validation --- snakebids/tests/test_app.py | 1 + snakebids/tests/test_generate_inputs.py | 28 +++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/snakebids/tests/test_app.py b/snakebids/tests/test_app.py index 2d252a41..66035d1a 100644 --- a/snakebids/tests/test_app.py +++ b/snakebids/tests/test_app.py @@ -100,6 +100,7 @@ def test_runs_in_correct_mode( "pybids_db_reset": True, "snakefile": Path("Snakefile"), "output_dir": outputdir.resolve(), + "skip_bids_validation": False } ) if root == "app" and tail == "": diff --git a/snakebids/tests/test_generate_inputs.py b/snakebids/tests/test_generate_inputs.py index 39fbac4e..629debb5 100644 --- a/snakebids/tests/test_generate_inputs.py +++ b/snakebids/tests/test_generate_inputs.py @@ -15,6 +15,7 @@ import more_itertools as itx import pytest from bids import BIDSLayout +from bids.exceptions import BIDSValidationError from hypothesis import HealthCheck, assume, example, given, settings from hypothesis import strategies as st from pyfakefs.fake_filesystem import FakeFilesystem @@ -998,6 +999,33 @@ def test_when_all_custom_paths_no_layout_indexed( spy.assert_not_called() +class TestValidate: + @pytest.fixture(autouse=True) + def start(self, tmpdir): + self.bids_dir = "snakebids/tests/data/bids_t1w" + self.tmp_dir = tmpdir.strpath + + # Copy test data + shutil.copytree(self.bids_dir, f"{self.tmp_dir}/data") + + + def test_check_validator(self): + """Test validator defaults to pybids""" + assert _validate_input_dir(self.bids_dir) == False + + + # Test for validation failure + def test_pybids_validation_fail(self): + with pytest.raises(BIDSValidationError): + _gen_bids_layout( + bids_dir=f"{self.tmp_dir}/data", + derivatives=False, + pybids_database_dir=None, + pybids_reset_database=False, + validate=True, + ) + + class TestDB: @pytest.fixture(autouse=True) def start(self, tmpdir): From 92ea98440fda3e75a3cb66d88ffcadfa349209a7 Mon Sep 17 00:00:00 2001 From: Jason Kai Date: Mon, 27 Feb 2023 11:18:20 -0500 Subject: [PATCH 08/11] skip_validation -> skip_bids_validation for clarity --- snakebids/app.py | 2 +- snakebids/cli.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/snakebids/app.py b/snakebids/app.py index 85eb0486..dccb02b5 100644 --- a/snakebids/app.py +++ b/snakebids/app.py @@ -171,7 +171,7 @@ def run_snakemake(self): self.config["pybids_db_dir"] = args.pybidsdb_dir self.config["pybids_db_reset"] = args.reset_db - self.config["skip_bids_validation"] = args.skip_validation + self.config["skip_bids_validation"] = args.skip_bids_validation update_config(self.config, args) diff --git a/snakebids/cli.py b/snakebids/cli.py index 945aa6c9..fe55a8ed 100644 --- a/snakebids/cli.py +++ b/snakebids/cli.py @@ -63,7 +63,7 @@ class SnakebidsArgs: Contains all the snakebids specific args. Meant to contain custom user args defined in config, as well as dynamic --filter-xx and --wildcard-xx args. These will eventually be printed in the new config. - skip_validation : bool + skip_bids_validation : bool Skip bids validation of input dataset """ @@ -73,7 +73,7 @@ class SnakebidsArgs: args_dict: Dict[str, Any] pybidsdb_dir: Optional[Path] = None reset_db: bool = False - skip_validation: bool = False + skip_bids_validation: bool = False def create_parser(include_snakemake=False): @@ -147,8 +147,8 @@ def create_parser(include_snakemake=False): ) standard_group.add_argument( - "--skip-validation", - "--skip_validation", + "--skip-bids-validation", + "--skip_bids-validation", action="store_true", help=("Skip bids validation of input dataset") ) @@ -283,7 +283,7 @@ def parse_snakebids_args(parser: argparse.ArgumentParser): else Path(all_args[0].pybidsdb_dir).resolve() ), reset_db=all_args[0].reset_db, - skip_bids_validation=all_args[0].skip_validation, + skip_bids_validation=all_args[0].skip_bids_validation, ) From 9ccfd1a5881938ff5e5d4e14a52188fc5cb62627 Mon Sep 17 00:00:00 2001 From: Jason Kai Date: Mon, 27 Feb 2023 11:41:07 -0500 Subject: [PATCH 09/11] add test + description.json for passing validation - also includes quality fixes" --- snakebids/cli.py | 2 +- snakebids/core/input_generation.py | 16 ++++--- snakebids/tests/data/dataset_description.json | 4 ++ snakebids/tests/test_app.py | 2 +- snakebids/tests/test_generate_inputs.py | 42 +++++++++---------- 5 files changed, 34 insertions(+), 32 deletions(-) create mode 100644 snakebids/tests/data/dataset_description.json diff --git a/snakebids/cli.py b/snakebids/cli.py index fe55a8ed..3aed8ef1 100644 --- a/snakebids/cli.py +++ b/snakebids/cli.py @@ -150,7 +150,7 @@ def create_parser(include_snakemake=False): "--skip-bids-validation", "--skip_bids-validation", action="store_true", - help=("Skip bids validation of input dataset") + help=("Skip bids validation of input dataset"), ) standard_group.add_argument( diff --git a/snakebids/core/input_generation.py b/snakebids/core/input_generation.py index ba6ee421..0185bf14 100644 --- a/snakebids/core/input_generation.py +++ b/snakebids/core/input_generation.py @@ -270,7 +270,7 @@ def generate_inputs( # Attempt to validate with node bids-validator, if needed validated = _validate_input_dir if not skip_bids_validation else None - + # Generates a BIDSLayout # If not skipping validation, set validate indicator to opposite of output # from _validate_input_dir, otherwise do not validate @@ -351,7 +351,7 @@ def _gen_bids_layout( derivatives subdirectory of the input dataset. validate : bool - A boolean that indicates whether validation should be performed on + A boolean that indicates whether validation should be performed on input dataset pybids_database_dir : str @@ -394,9 +394,9 @@ def _validate_input_dir( bids_dir: Union[Path, str], ) -> bool: """Perform validation of dataset. Initial attempt at validation performed - with node-version of bids-validator. If not found, will fall back to + with node-version of bids-validator. If not found, will fall back to validation integrated into pybids. - + Parameters ---------- bids_dir : str @@ -408,16 +408,14 @@ def _validate_input_dir( Indication of whether validation was successfully performed using the node-version of bids-validator """ - try: - validator_config_dict = { - "ignoredFiles": ['/participants.tsv'] - } + try: + validator_config_dict = {"ignoredFiles": ["/participants.tsv"]} with tempfile.NamedTemporaryFile(mode="w+", suffix=".json") as temp: temp.write(json.dumps(validator_config_dict)) temp.flush() - subprocess.check_call(['bids-validator', str(bids_dir), '-c', temp.name]) + subprocess.check_call(["bids-validator", str(bids_dir), "-c", temp.name]) return True except FileNotFoundError: diff --git a/snakebids/tests/data/dataset_description.json b/snakebids/tests/data/dataset_description.json new file mode 100644 index 00000000..4ee35cf3 --- /dev/null +++ b/snakebids/tests/data/dataset_description.json @@ -0,0 +1,4 @@ +{ + "Name": "Snakebids - test dataset", + "BIDSVersion": "1.8.0" +} \ No newline at end of file diff --git a/snakebids/tests/test_app.py b/snakebids/tests/test_app.py index 66035d1a..97281be0 100644 --- a/snakebids/tests/test_app.py +++ b/snakebids/tests/test_app.py @@ -100,7 +100,7 @@ def test_runs_in_correct_mode( "pybids_db_reset": True, "snakefile": Path("Snakefile"), "output_dir": outputdir.resolve(), - "skip_bids_validation": False + "skip_bids_validation": False, } ) if root == "app" and tail == "": diff --git a/snakebids/tests/test_generate_inputs.py b/snakebids/tests/test_generate_inputs.py index 629debb5..d459c9bd 100644 --- a/snakebids/tests/test_generate_inputs.py +++ b/snakebids/tests/test_generate_inputs.py @@ -98,10 +98,7 @@ def test_ambiguous_paths_with_extra_entities_leads_to_error( with pytest.raises(ConfigError): generate_inputs( - root, - pybids_inputs, - skip_bids_validation=True, - use_bids_inputs=True + root, pybids_inputs, skip_bids_validation=True, use_bids_inputs=True ) @settings( @@ -129,10 +126,7 @@ def test_ambiguous_paths_with_missing_entity_leads_to_error( with pytest.raises(ConfigError): generate_inputs( - root, - pybids_inputs, - skip_bids_validation=True, - use_bids_inputs=True + root, pybids_inputs, skip_bids_validation=True, use_bids_inputs=True ) @settings( @@ -169,10 +163,7 @@ def test_entity_excluded_when_filter_false( ) data = generate_inputs( - root, - pybids_inputs, - skip_bids_validation=True, - use_bids_inputs=True + root, pybids_inputs, skip_bids_validation=True, use_bids_inputs=True ) assert data == expected @@ -252,10 +243,7 @@ def test_entity_excluded_when_filter_true(self, tmpdir: Path, dataset: BidsDatas ) data = generate_inputs( - root, - pybids_inputs, - skip_bids_validation=True, - use_bids_inputs=True + root, pybids_inputs, skip_bids_validation=True, use_bids_inputs=True ) assert data == expected @@ -1007,24 +995,36 @@ def start(self, tmpdir): # Copy test data shutil.copytree(self.bids_dir, f"{self.tmp_dir}/data") + assert filecmp.dircmp("snakebids/tests/data/bids_t1w", f"{self.tmp_dir}/data") + # Copy dataset description + shutil.copy( + "snakebids/tests/data/dataset_description.json", f"{self.tmp_dir}/data" + ) def test_check_validator(self): - """Test validator defaults to pybids""" - assert _validate_input_dir(self.bids_dir) == False - + """Test validator defaults to pybids (i.e. False)""" + assert _validate_input_dir(self.bids_dir) == False - # Test for validation failure def test_pybids_validation_fail(self): with pytest.raises(BIDSValidationError): _gen_bids_layout( - bids_dir=f"{self.tmp_dir}/data", + bids_dir=self.bids_dir, derivatives=False, pybids_database_dir=None, pybids_reset_database=False, validate=True, ) + def test_pybids_validation_pass(self): + assert _gen_bids_layout( + bids_dir=f"{self.tmp_dir}/data", + derivatives=False, + pybids_database_dir=None, + pybids_reset_database=False, + validate=True, + ) + class TestDB: @pytest.fixture(autouse=True) From 430f28aeda220fcda77b023a9190b6ee473d006c Mon Sep 17 00:00:00 2001 From: Jason Kai Date: Mon, 27 Feb 2023 11:56:43 -0500 Subject: [PATCH 10/11] update docs, rename validation fn - update docs to include information about skipping bids validation - renamed function from validate_input_dir -> validate_bids_dir - added exception to skipping bids-validation with node if all paths provided are custom - quality fixes --- docs/bids_app/workflow.rst | 2 +- docs/running_snakebids/overview.md | 2 ++ snakebids/core/input_generation.py | 19 +++++++++++-------- snakebids/tests/test_generate_inputs.py | 4 ++-- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/docs/bids_app/workflow.rst b/docs/bids_app/workflow.rst index ee1c7ae2..96646c0c 100644 --- a/docs/bids_app/workflow.rst +++ b/docs/bids_app/workflow.rst @@ -14,10 +14,10 @@ To get access to these additions, the base Snakefile for a snakebids workflow sh inputs = snakebids.generate_inputs( bids_dir=config["bids_dir"], pybids_inputs=config["pybids_inputs"], + skip_bids_validation=config["skip_bids_validation"], derivatives=config.get("derivatives", None), participant_label=config.get("participant_label", None), exclude_participant_label=config.get("exclude_participant_label", None) - ) #this adds constraints to the bids naming diff --git a/docs/running_snakebids/overview.md b/docs/running_snakebids/overview.md index d78ab291..085bed42 100644 --- a/docs/running_snakebids/overview.md +++ b/docs/running_snakebids/overview.md @@ -19,6 +19,8 @@ Indexing of large datasets can be a time-consuming process. Snakebids, through ` 1. Uncomment the lines in `snakebids.yml` containing `pybids_db_dir` and `pybids_db_reset`. 1. The variables can be updated directly in this file or through the CLI by using `-pybidsdb-dir {dir}` to specify the database path and `--reset-db` to indicate that the database should be updated. _Note: CLI arguments take precendence if both CLI and config variables are set._ +Input BIDS datasets are also validated via the bids-validator. By default, this feature uses the command-line (node.js) version of the [validator](https://www.npmjs.com/package/bids-validator). If this is not found to be installed on the system, the `pybids` version of validation will be performed instead. To opt-out validation, one can invoke `--skip-bids-validation`. + Workflow mode ============= diff --git a/snakebids/core/input_generation.py b/snakebids/core/input_generation.py index 0185bf14..b73cce86 100644 --- a/snakebids/core/input_generation.py +++ b/snakebids/core/input_generation.py @@ -135,10 +135,10 @@ def generate_inputs( be specified if participant_label is specified skip_bids_validation : bool, optional - If True, will not perform validation of the input dataset. Otherwise, + If True, will not perform validation of the input dataset. Otherwise, validation is first attempted by performing a system call to `bids-validator` - (e.g. node version), which is has more comprehensive coverage, before falling - back on the python version of the validator. + (e.g. node version), which is has more comprehensive coverage, before falling + back on the python version of the validator. use_bids_inputs : bool, optional If True, opts in to the new :class:`BidsDataset` output, otherwise returns the @@ -268,12 +268,15 @@ def generate_inputs( participant_label, exclude_participant_label ) - # Attempt to validate with node bids-validator, if needed - validated = _validate_input_dir if not skip_bids_validation else None + # Attempt to validate with node bids-validator + validated = ( + _validate_bids_dir + if not skip_bids_validation and not _all_custom_paths(pybids_inputs) + else None + ) - # Generates a BIDSLayout # If not skipping validation, set validate indicator to opposite of output - # from _validate_input_dir, otherwise do not validate + # from _validate_bids_dir, otherwise do not validate layout = ( _gen_bids_layout( bids_dir=bids_dir, @@ -390,7 +393,7 @@ def _gen_bids_layout( ) -def _validate_input_dir( +def _validate_bids_dir( bids_dir: Union[Path, str], ) -> bool: """Perform validation of dataset. Initial attempt at validation performed diff --git a/snakebids/tests/test_generate_inputs.py b/snakebids/tests/test_generate_inputs.py index d459c9bd..d45f7448 100644 --- a/snakebids/tests/test_generate_inputs.py +++ b/snakebids/tests/test_generate_inputs.py @@ -29,7 +29,7 @@ _generate_filters, _get_lists_from_bids, _parse_custom_path, - _validate_input_dir, + _validate_bids_dir, generate_inputs, ) from snakebids.exceptions import ConfigError, PybidsError @@ -1004,7 +1004,7 @@ def start(self, tmpdir): def test_check_validator(self): """Test validator defaults to pybids (i.e. False)""" - assert _validate_input_dir(self.bids_dir) == False + assert _validate_bids_dir(self.bids_dir) == False def test_pybids_validation_fail(self): with pytest.raises(BIDSValidationError): From e9a5155e97e19e842a06916081cff0e0e44e15b6 Mon Sep 17 00:00:00 2001 From: Jason Kai Date: Mon, 13 Mar 2023 09:04:42 -0400 Subject: [PATCH 11/11] wip - bids validation plugin --- snakebids/app.py | 3 +- snakebids/core/input_generation.py | 56 +------------------ snakebids/plugins/__init__.py | 6 +++ snakebids/plugins/validation.py | 48 +++++++++++++++++ snakebids/tests/test_app.py | 1 - snakebids/tests/test_generate_inputs.py | 71 ++----------------------- 6 files changed, 60 insertions(+), 125 deletions(-) create mode 100644 snakebids/plugins/__init__.py create mode 100644 snakebids/plugins/validation.py diff --git a/snakebids/app.py b/snakebids/app.py index dccb02b5..cd072213 100644 --- a/snakebids/app.py +++ b/snakebids/app.py @@ -20,6 +20,7 @@ parse_snakebids_args, ) from snakebids.exceptions import ConfigError, RunError +from snakebids.plugins.validation import bids_validate from snakebids.utils.output import ( prepare_bidsapp_output, write_config_file, @@ -171,8 +172,6 @@ def run_snakemake(self): self.config["pybids_db_dir"] = args.pybidsdb_dir self.config["pybids_db_reset"] = args.reset_db - self.config["skip_bids_validation"] = args.skip_bids_validation - update_config(self.config, args) # First, handle outputs in snakebids_root or results folder diff --git a/snakebids/core/input_generation.py b/snakebids/core/input_generation.py index b73cce86..946581c9 100644 --- a/snakebids/core/input_generation.py +++ b/snakebids/core/input_generation.py @@ -3,8 +3,6 @@ import json import logging import re -import subprocess -import tempfile from collections import defaultdict from pathlib import Path from typing import Dict, Generator, Iterable, List, Optional, Tuple, Union, overload @@ -268,20 +266,10 @@ def generate_inputs( participant_label, exclude_participant_label ) - # Attempt to validate with node bids-validator - validated = ( - _validate_bids_dir - if not skip_bids_validation and not _all_custom_paths(pybids_inputs) - else None - ) - - # If not skipping validation, set validate indicator to opposite of output - # from _validate_bids_dir, otherwise do not validate layout = ( _gen_bids_layout( bids_dir=bids_dir, derivatives=derivatives, - validate=not validated if validated else False, pybids_config=pybids_config, pybids_database_dir=pybids_database_dir, pybids_reset_database=pybids_reset_database, @@ -335,7 +323,6 @@ def _all_custom_paths(config: InputsConfig): def _gen_bids_layout( bids_dir: Union[Path, str], derivatives: bool, - validate: Union[bool, None], pybids_database_dir: Union[Path, str, None], pybids_reset_database: bool, pybids_config: Union[Path, str, None] = None, @@ -353,10 +340,6 @@ def _gen_bids_layout( determines whether snakebids will search in the derivatives subdirectory of the input dataset. - validate : bool - A boolean that indicates whether validation should be performed on - input dataset - pybids_database_dir : str Path to database directory. If None is provided, database is not used @@ -385,50 +368,13 @@ def _gen_bids_layout( return BIDSLayout( bids_dir, derivatives=derivatives, - validate=validate, config=pybids_config, database_path=pybids_database_dir, reset_database=pybids_reset_database, - indexer=BIDSLayoutIndexer(validate=validate, index_metadata=False), + indexer=BIDSLayoutIndexer(index_metadata=False), ) -def _validate_bids_dir( - bids_dir: Union[Path, str], -) -> bool: - """Perform validation of dataset. Initial attempt at validation performed - with node-version of bids-validator. If not found, will fall back to - validation integrated into pybids. - - Parameters - ---------- - bids_dir : str - Path to bids directory - - Returns - ------- - bool - Indication of whether validation was successfully performed using - the node-version of bids-validator - """ - try: - validator_config_dict = {"ignoredFiles": ["/participants.tsv"]} - - with tempfile.NamedTemporaryFile(mode="w+", suffix=".json") as temp: - temp.write(json.dumps(validator_config_dict)) - temp.flush() - - subprocess.check_call(["bids-validator", str(bids_dir), "-c", temp.name]) - - return True - except FileNotFoundError: - _logger.warning( - "Bids-validator does not appear to be installed - will use pybids " - "validation." - ) - return False - - def write_derivative_json(snakemake, **kwargs): """Snakemake function to read a json file, and write to a new one, adding BIDS derivatives fields for Sources and Parameters. diff --git a/snakebids/plugins/__init__.py b/snakebids/plugins/__init__.py new file mode 100644 index 00000000..ad1770bd --- /dev/null +++ b/snakebids/plugins/__init__.py @@ -0,0 +1,6 @@ +__submodules__ = [] + +# +__all__ = [] + +# diff --git a/snakebids/plugins/validation.py b/snakebids/plugins/validation.py new file mode 100644 index 00000000..bdada5bc --- /dev/null +++ b/snakebids/plugins/validation.py @@ -0,0 +1,48 @@ +import json +import logging +import subprocess +import tempfile + +from snakebids.app import SnakeBidsApp + +_logger = logging.getLogger(__name__) + + +class InvalidBidsError(Exception): + """Error raised if an input BIDS dataset is invalid.""" + + +def bids_validate(app: SnakeBidsApp, bids_dir: str) -> None: + """Perform validation of dataset. Initial attempt at validation performed + with node-version of bids-validator. If not found, will fall back to Python + version of validation (same as pybids). + + Parameters + ---------- + app + Snakebids application to be run + bids_dir + BIDS organized directory to be validated + """ + + # Skip bids validation + if app.config["skip_bids_validation"]: + return + + try: + validator_config_dict = {"ignoredFiles": ["/participants.tsv"]} + + with tempfile.NamedTemporaryFile(mode="w+", suffix=".json") as temp: + temp.write(json.dumps(validator_config_dict)) + temp.flush() + + subprocess.check_call(["bids-validator", str(bids_dir), "-c", temp.name]) + # If the bids-validator call can't be made + except FileNotFoundError: + _logger.warning( + "Bids-validator does not appear to be installed - will use python " + "validation." + ) + # Any other bids-validator error + except subprocess.CalledProcessError as err: + raise InvalidBidsError from err diff --git a/snakebids/tests/test_app.py b/snakebids/tests/test_app.py index 97281be0..2d252a41 100644 --- a/snakebids/tests/test_app.py +++ b/snakebids/tests/test_app.py @@ -100,7 +100,6 @@ def test_runs_in_correct_mode( "pybids_db_reset": True, "snakefile": Path("Snakefile"), "output_dir": outputdir.resolve(), - "skip_bids_validation": False, } ) if root == "app" and tail == "": diff --git a/snakebids/tests/test_generate_inputs.py b/snakebids/tests/test_generate_inputs.py index d45f7448..4632a2ec 100644 --- a/snakebids/tests/test_generate_inputs.py +++ b/snakebids/tests/test_generate_inputs.py @@ -29,7 +29,6 @@ _generate_filters, _get_lists_from_bids, _parse_custom_path, - _validate_bids_dir, generate_inputs, ) from snakebids.exceptions import ConfigError, PybidsError @@ -97,9 +96,7 @@ def test_ambiguous_paths_with_extra_entities_leads_to_error( } with pytest.raises(ConfigError): - generate_inputs( - root, pybids_inputs, skip_bids_validation=True, use_bids_inputs=True - ) + generate_inputs(root, pybids_inputs, use_bids_inputs=True) @settings( deadline=800, @@ -125,9 +122,7 @@ def test_ambiguous_paths_with_missing_entity_leads_to_error( } with pytest.raises(ConfigError): - generate_inputs( - root, pybids_inputs, skip_bids_validation=True, use_bids_inputs=True - ) + generate_inputs(root, pybids_inputs, use_bids_inputs=True) @settings( deadline=800, @@ -162,9 +157,7 @@ def test_entity_excluded_when_filter_false( } ) - data = generate_inputs( - root, pybids_inputs, skip_bids_validation=True, use_bids_inputs=True - ) + data = generate_inputs(root, pybids_inputs, use_bids_inputs=True) assert data == expected @example( @@ -242,9 +235,7 @@ def test_entity_excluded_when_filter_true(self, tmpdir: Path, dataset: BidsDatas } ) - data = generate_inputs( - root, pybids_inputs, skip_bids_validation=True, use_bids_inputs=True - ) + data = generate_inputs(root, pybids_inputs, use_bids_inputs=True) assert data == expected @@ -279,7 +270,6 @@ def test_missing_filters(self, tmpdir: Path): bids_dir=tmpdir, derivatives=derivatives, pybids_config=str(Path(__file__).parent / "data" / "custom_config.json"), - skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset({"t1": BidsComponent("t1", config["t1"].path, zip_list)}) @@ -304,7 +294,6 @@ def test_missing_wildcards(self, tmpdir: Path): bids_dir=tmpdir, derivatives=derivatives, pybids_config=str(Path(__file__).parent / "data" / "custom_config.json"), - skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset({"t1": BidsComponent("t1", config["t1"].path, {})}) @@ -565,7 +554,6 @@ def test_custom_pybids_config(tmpdir: Path): bids_dir=tmpdir, derivatives=derivatives, pybids_config=Path(__file__).parent / "data" / "custom_config.json", - skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset( @@ -618,7 +606,6 @@ def test_nonstandard_custom_pybids_config(tmpdir: Path): pybids_config=( Path(__file__).parent / "data" / "custom_config_nonstandard.json" ), - skip_bids_validation=True, use_bids_inputs=True, ) @@ -642,7 +629,6 @@ def test_t1w(): derivatives=derivatives, participant_label="001", exclude_participant_label="002", - skip_bids_validation=True, ) assert v_error.value.args[0] == ( "Cannot define both participant_label and " @@ -654,7 +640,6 @@ def test_t1w(): pybids_inputs=pybids_inputs, bids_dir=real_bids_dir, derivatives=derivatives, - skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset( @@ -690,7 +675,6 @@ def test_t1w(): bids_dir=real_bids_dir, derivatives=derivatives, participant_label="001", - skip_bids_validation=True, use_bids_inputs=True, ) assert result["scan"].entities == { @@ -762,7 +746,6 @@ def test_t1w(): pybids_inputs=pybids_inputs, bids_dir=bids_dir, derivatives=derivatives, - skip_bids_validation=True, use_bids_inputs=True, ) template = BidsDataset( @@ -801,7 +784,6 @@ def test_t1w_with_dict(): pybids_inputs=pybids_inputs, bids_dir=real_bids_dir, derivatives=derivatives, - skip_bids_validation=True, ) # Order of the subjects is not deterministic assert config["input_lists"] == BidsListCompare( @@ -833,7 +815,6 @@ def test_t1w_with_dict(): bids_dir=real_bids_dir, derivatives=derivatives, participant_label="001", - skip_bids_validation=True, ) assert config["input_lists"] == { "scan": {"acq": ["mprage"], "subject": ["001"], "suffix": ["T1w"]} @@ -987,45 +968,6 @@ def test_when_all_custom_paths_no_layout_indexed( spy.assert_not_called() -class TestValidate: - @pytest.fixture(autouse=True) - def start(self, tmpdir): - self.bids_dir = "snakebids/tests/data/bids_t1w" - self.tmp_dir = tmpdir.strpath - - # Copy test data - shutil.copytree(self.bids_dir, f"{self.tmp_dir}/data") - assert filecmp.dircmp("snakebids/tests/data/bids_t1w", f"{self.tmp_dir}/data") - - # Copy dataset description - shutil.copy( - "snakebids/tests/data/dataset_description.json", f"{self.tmp_dir}/data" - ) - - def test_check_validator(self): - """Test validator defaults to pybids (i.e. False)""" - assert _validate_bids_dir(self.bids_dir) == False - - def test_pybids_validation_fail(self): - with pytest.raises(BIDSValidationError): - _gen_bids_layout( - bids_dir=self.bids_dir, - derivatives=False, - pybids_database_dir=None, - pybids_reset_database=False, - validate=True, - ) - - def test_pybids_validation_pass(self): - assert _gen_bids_layout( - bids_dir=f"{self.tmp_dir}/data", - derivatives=False, - pybids_database_dir=None, - pybids_reset_database=False, - validate=True, - ) - - class TestDB: @pytest.fixture(autouse=True) def start(self, tmpdir): @@ -1046,7 +988,6 @@ def test_database_dir_blank(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), - validate=False, ) assert not os.path.exists(self.pybids_db.get("database_dir")) @@ -1060,7 +1001,6 @@ def test_database_dir_relative(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), - validate=False, ) assert not os.path.exists(f"{self.tmpdir}/data/.db/") @@ -1075,7 +1015,6 @@ def test_database_dir_absolute(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), - validate=False, ) assert os.path.exists(f"{self.tmpdir}/data/.db/") @@ -1091,7 +1030,6 @@ def test_database_dir_absolute(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), - validate=False, ) assert not layout.get(subject="003") @@ -1103,6 +1041,5 @@ def test_database_dir_absolute(self): derivatives=False, pybids_database_dir=self.pybids_db.get("database_dir"), pybids_reset_database=self.pybids_db.get("reset_database"), - validate=False, ) assert layout.get(subject="003")