diff --git a/docs/bids_app/workflow.rst b/docs/bids_app/workflow.rst index ee1c7ae2..96646c0c 100644 --- a/docs/bids_app/workflow.rst +++ b/docs/bids_app/workflow.rst @@ -14,10 +14,10 @@ To get access to these additions, the base Snakefile for a snakebids workflow sh inputs = snakebids.generate_inputs( bids_dir=config["bids_dir"], pybids_inputs=config["pybids_inputs"], + skip_bids_validation=config["skip_bids_validation"], derivatives=config.get("derivatives", None), participant_label=config.get("participant_label", None), exclude_participant_label=config.get("exclude_participant_label", None) - ) #this adds constraints to the bids naming diff --git a/docs/running_snakebids/overview.md b/docs/running_snakebids/overview.md index d78ab291..085bed42 100644 --- a/docs/running_snakebids/overview.md +++ b/docs/running_snakebids/overview.md @@ -19,6 +19,8 @@ Indexing of large datasets can be a time-consuming process. Snakebids, through ` 1. Uncomment the lines in `snakebids.yml` containing `pybids_db_dir` and `pybids_db_reset`. 1. The variables can be updated directly in this file or through the CLI by using `-pybidsdb-dir {dir}` to specify the database path and `--reset-db` to indicate that the database should be updated. _Note: CLI arguments take precendence if both CLI and config variables are set._ +Input BIDS datasets are also validated via the bids-validator. By default, this feature uses the command-line (node.js) version of the [validator](https://www.npmjs.com/package/bids-validator). If this is not found to be installed on the system, the `pybids` version of validation will be performed instead. To opt-out validation, one can invoke `--skip-bids-validation`. + Workflow mode ============= diff --git a/snakebids/app.py b/snakebids/app.py index 79757085..cd072213 100644 --- a/snakebids/app.py +++ b/snakebids/app.py @@ -20,6 +20,7 @@ parse_snakebids_args, ) from snakebids.exceptions import ConfigError, RunError +from snakebids.plugins.validation import bids_validate from snakebids.utils.output import ( prepare_bidsapp_output, write_config_file, diff --git a/snakebids/cli.py b/snakebids/cli.py index 7278fe6a..3aed8ef1 100644 --- a/snakebids/cli.py +++ b/snakebids/cli.py @@ -55,12 +55,16 @@ class SnakebidsArgs: Directory to place outputs pybidsdb_dir : Path Directory to place pybids database + reset_db : bool + Update the pybids database snakemake_args : list of strings Arguments to pass on to Snakemake args_dict : Dict[str, Any] Contains all the snakebids specific args. Meant to contain custom user args defined in config, as well as dynamic --filter-xx and --wildcard-xx args. These will eventually be printed in the new config. + skip_bids_validation : bool + Skip bids validation of input dataset """ force: bool @@ -69,6 +73,7 @@ class SnakebidsArgs: args_dict: Dict[str, Any] pybidsdb_dir: Optional[Path] = None reset_db: bool = False + skip_bids_validation: bool = False def create_parser(include_snakemake=False): @@ -141,6 +146,13 @@ def create_parser(include_snakemake=False): help="Force output in a new directory that already has contents", ) + standard_group.add_argument( + "--skip-bids-validation", + "--skip_bids-validation", + action="store_true", + help=("Skip bids validation of input dataset"), + ) + standard_group.add_argument( "--retrofit", action="store_true", @@ -271,6 +283,7 @@ def parse_snakebids_args(parser: argparse.ArgumentParser): else Path(all_args[0].pybidsdb_dir).resolve() ), reset_db=all_args[0].reset_db, + skip_bids_validation=all_args[0].skip_bids_validation, ) diff --git a/snakebids/core/input_generation.py b/snakebids/core/input_generation.py index c75d9b0e..946581c9 100644 --- a/snakebids/core/input_generation.py +++ b/snakebids/core/input_generation.py @@ -34,6 +34,7 @@ def generate_inputs( limit_to=..., participant_label=..., exclude_participant_label=..., + skip_bids_validation=..., use_bids_inputs: Union[Literal[False], None] = ..., ) -> BidsDatasetDict: ... @@ -51,6 +52,7 @@ def generate_inputs( limit_to=..., participant_label=..., exclude_participant_label=..., + skip_bids_validation=..., use_bids_inputs: Literal[True] = ..., ) -> BidsDataset: ... @@ -67,6 +69,7 @@ def generate_inputs( limit_to=None, participant_label=None, exclude_participant_label=None, + skip_bids_validation=False, use_bids_inputs=None, ): """Dynamically generate snakemake inputs using pybids_inputs @@ -129,6 +132,12 @@ def generate_inputs( cause errors if subject filters are also specified in pybids_inputs. It may not be specified if participant_label is specified + skip_bids_validation : bool, optional + If True, will not perform validation of the input dataset. Otherwise, + validation is first attempted by performing a system call to `bids-validator` + (e.g. node version), which is has more comprehensive coverage, before falling + back on the python version of the validator. + use_bids_inputs : bool, optional If True, opts in to the new :class:`BidsDataset` output, otherwise returns the classic dict. Currently, the classic dict will be returned by default, however, @@ -257,7 +266,6 @@ def generate_inputs( participant_label, exclude_participant_label ) - # Generates a BIDSLayout layout = ( _gen_bids_layout( bids_dir=bids_dir, @@ -360,11 +368,10 @@ def _gen_bids_layout( return BIDSLayout( bids_dir, derivatives=derivatives, - validate=False, config=pybids_config, database_path=pybids_database_dir, reset_database=pybids_reset_database, - indexer=BIDSLayoutIndexer(validate=False, index_metadata=False), + indexer=BIDSLayoutIndexer(index_metadata=False), ) diff --git a/snakebids/plugins/__init__.py b/snakebids/plugins/__init__.py new file mode 100644 index 00000000..ad1770bd --- /dev/null +++ b/snakebids/plugins/__init__.py @@ -0,0 +1,6 @@ +__submodules__ = [] + +# +__all__ = [] + +# diff --git a/snakebids/plugins/validation.py b/snakebids/plugins/validation.py new file mode 100644 index 00000000..bdada5bc --- /dev/null +++ b/snakebids/plugins/validation.py @@ -0,0 +1,48 @@ +import json +import logging +import subprocess +import tempfile + +from snakebids.app import SnakeBidsApp + +_logger = logging.getLogger(__name__) + + +class InvalidBidsError(Exception): + """Error raised if an input BIDS dataset is invalid.""" + + +def bids_validate(app: SnakeBidsApp, bids_dir: str) -> None: + """Perform validation of dataset. Initial attempt at validation performed + with node-version of bids-validator. If not found, will fall back to Python + version of validation (same as pybids). + + Parameters + ---------- + app + Snakebids application to be run + bids_dir + BIDS organized directory to be validated + """ + + # Skip bids validation + if app.config["skip_bids_validation"]: + return + + try: + validator_config_dict = {"ignoredFiles": ["/participants.tsv"]} + + with tempfile.NamedTemporaryFile(mode="w+", suffix=".json") as temp: + temp.write(json.dumps(validator_config_dict)) + temp.flush() + + subprocess.check_call(["bids-validator", str(bids_dir), "-c", temp.name]) + # If the bids-validator call can't be made + except FileNotFoundError: + _logger.warning( + "Bids-validator does not appear to be installed - will use python " + "validation." + ) + # Any other bids-validator error + except subprocess.CalledProcessError as err: + raise InvalidBidsError from err diff --git a/snakebids/project_template/{{cookiecutter.app_name}}/config/snakebids.yml b/snakebids/project_template/{{cookiecutter.app_name}}/config/snakebids.yml index 0db60574..b7f6858c 100644 --- a/snakebids/project_template/{{cookiecutter.app_name}}/config/snakebids.yml +++ b/snakebids/project_template/{{cookiecutter.app_name}}/config/snakebids.yml @@ -40,6 +40,9 @@ pybids_inputs: # pybids_db_dir: '/path/to/db_dir' # Leave blank if you do not wish to use this # pybids_db_reset: False # Change this to true to update the database +# Skipping of bids validation +skip_bids_validation: False + #configuration for the command-line parameters to make available # passed on the argparse add_argument() parse_args: diff --git a/snakebids/project_template/{{cookiecutter.app_name}}/workflow/Snakefile b/snakebids/project_template/{{cookiecutter.app_name}}/workflow/Snakefile index a09a583f..870a873e 100644 --- a/snakebids/project_template/{{cookiecutter.app_name}}/workflow/Snakefile +++ b/snakebids/project_template/{{cookiecutter.app_name}}/workflow/Snakefile @@ -14,6 +14,7 @@ inputs = snakebids.generate_inputs( derivatives=config.get("derivatives", None), participant_label=config.get("participant_label", None), exclude_participant_label=config.get("exclude_participant_label", None), + skip_bids_validation=config.get("skip_bids_validation", False), use_bids_inputs=True, ) diff --git a/snakebids/tests/data/dataset_description.json b/snakebids/tests/data/dataset_description.json new file mode 100644 index 00000000..4ee35cf3 --- /dev/null +++ b/snakebids/tests/data/dataset_description.json @@ -0,0 +1,4 @@ +{ + "Name": "Snakebids - test dataset", + "BIDSVersion": "1.8.0" +} \ No newline at end of file diff --git a/snakebids/tests/mock/config.yaml b/snakebids/tests/mock/config.yaml index d2f92d65..466de22b 100644 --- a/snakebids/tests/mock/config.yaml +++ b/snakebids/tests/mock/config.yaml @@ -22,6 +22,8 @@ pybids_inputs: pybids_db_dir: '/path/to/db_dir' pybids_db_reset: False +skip_bids_validation: False + targets_by_analysis_level: participant: - '' # if '', then the first rule is run diff --git a/snakebids/tests/test_generate_inputs.py b/snakebids/tests/test_generate_inputs.py index 283ae27e..4632a2ec 100644 --- a/snakebids/tests/test_generate_inputs.py +++ b/snakebids/tests/test_generate_inputs.py @@ -15,6 +15,7 @@ import more_itertools as itx import pytest from bids import BIDSLayout +from bids.exceptions import BIDSValidationError from hypothesis import HealthCheck, assume, example, given, settings from hypothesis import strategies as st from pyfakefs.fake_filesystem import FakeFilesystem @@ -914,11 +915,11 @@ def bids_fs(self, bids_fs: Optional[FakeFilesystem]): def test_gen_layout_returns_valid_dataset(self, tmpdir: Path): dataset = sb_st.datasets().example() create_dataset(tmpdir, dataset) - assert _gen_bids_layout(tmpdir, False, None, False, None) + assert _gen_bids_layout(tmpdir, False, False, None, False, None) def test_invalid_path_raises_error(self, tmpdir: Path): with pytest.raises(ValueError): - _gen_bids_layout(tmpdir / "foo", False, None, False) + _gen_bids_layout(tmpdir / "foo", False, False, None, False) @pytest.mark.parametrize("count", tuple(range(6)))