diff --git a/docs/api.rst b/docs/api.rst index c028ca40..c06b1a3f 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -2,6 +2,8 @@ API ================ + + snakebids --------- @@ -20,3 +22,22 @@ snakemake_io .. automodule:: snakebids.utils.snakemake_io :members: + + +cli +--- + +.. automodule:: snakebids.cli + :members: + +exceptions +---------- + +.. automodule:: snakebids.exceptions + :members: + +types +----- + +.. automodule:: snakebids.types + :members: diff --git a/docs/bids_app/config.md b/docs/bids_app/config.md index 2492a02f..d014468c 100644 --- a/docs/bids_app/config.md +++ b/docs/bids_app/config.md @@ -1,3 +1,4 @@ +{#bids-app-config} Configuration ============= @@ -21,10 +22,13 @@ The value of ``filters`` should be a dictionary where each key corresponds to a extension: '.nii.gz' datatype: 'func' ``` + the bold component would match any paths under the `func/` datatype folder, with the suffix `bold` and the extension `.nii.gz`. + ``` sub-xxx/.../func/ent1-xxx_ent2-xxx_..._bold.nii.gz ``` + * [boolean](python:bool): constrains presence or absence of the entity without restricting its value. `False` requires that the entity be **absent**, while `True` requires that the entity be **present**, regardless of value. ```yaml pybids_inputs: @@ -38,7 +42,7 @@ The value of ``filters`` should be a dictionary where each key corresponds to a In addition, the special filter `regex_search` can be set to `true`, which causes all other filters in the component to use regex matching instead of exact matching. -The value of ``wildcards`` should be a list of BIDS entities. Snakebids collects the values of any entities specified and saves them in the {attr}`entities ` and `zip_lists` entries of the corresponding {class}`BidsComponent `. In other words, these are the entities to be preserved in output paths derived from the input being described. Placing an entity in `wildcards` does not require the entity be present. If an entity is not found, it will be left out of {attr}`entities `. To require the presence of an entity, place it under `filters` set to `true`. +The value of ``wildcards`` should be a list of BIDS entities. Snakebids collects the values of any entities specified and saves them in the {attr}`entities ` and {attr}`~snakebids.BidsComponent.zip_lists` entries of the corresponding {class}`BidsComponent `. In other words, these are the entities to be preserved in output paths derived from the input being described. Placing an entity in `wildcards` does not require the entity be present. If an entity is not found, it will be left out of {attr}`entities `. To require the presence of an entity, place it under `filters` set to `true`. In the following (YAML-formatted) example, the ``bold`` input type is specified. BIDS files with the datatype ``func``, suffix ``bold``, and extension ``.nii.gz`` will be grabbed, and the ``subject``, ``session``, ``acquisition``, ``task``, and ``run`` entities of those files will be left as wildcards. The `task` entity must be present, but there must not be any `desc`. @@ -61,24 +65,23 @@ pybids_inputs: ### `pybids_db_dir` -PyBIDS allows for the use of a cached layout to be used in order to reduce the time required to index a BIDS dataset. A path (if provided) to save the ``PyBIDS`` layout. If ``None`` or ``''`` is provided, the layout is not saved / used. The path provided must be absolute, otherwise the database will not be used. Note, this is a variable used for an opt-in feature and must first be uncommented in the ``snakebids.yml`` file. +PyBIDS allows for the use of a cached layout to be used in order to reduce the time required to index a BIDS dataset. A path (if provided) to save the *pybids* [layout](#bids.layout.BIDSLayout). If `None` or `''` is provided, the layout is not saved or used. The path provided must be absolute, otherwise the database will not be used. ### `pybids_db_reset` -A boolean determining whether the existing layout should be be updated. Default behaviour does not update the existing database if one is used. Note, this is a variable used for an opt-in feature and must first be uncommented in the ``snakebids.yml`` file. + +A boolean determining whether the existing layout should be be updated. Default behaviour does not update the existing database if one is used. ### `analysis_levels` A list of analysis levels in the BIDS app. Typically, this will include participant and/or group. Note that the default (YAML) configuration file expects this mapping to be identified with the anchor ``analysis_levels`` to be aliased by ``parse_args``. - ### `targets_by_analysis_level` A mapping from the name of each ``analysis_level`` to the list of rules or files to be run for that analysis level. - ### `parse_args` -A dictionary of command-line parameters to make available as part of the BIDS app. Each item of the mapping is passed to [argparse's add_argument function](https://docs.python.org/3/library/argparse.html#the-add-argument-method). A number of default entries are present in a new snakebids project's config file that structure the BIDS app's CLI, but additional command-line arguments can be added as necessary. +A dictionary of command-line parameters to make available as part of the BIDS app. Each item of the mapping is passed to [argparse's add_argument function](#argparse.ArgumentParser.add_argument). A number of default entries are present in a new snakebids project's config file that structure the BIDS app's CLI, but additional command-line arguments can be added as necessary. ### `debug` diff --git a/docs/bids_app/workflow.md b/docs/bids_app/workflow.md new file mode 100644 index 00000000..bb456d89 --- /dev/null +++ b/docs/bids_app/workflow.md @@ -0,0 +1,57 @@ +Workflows +========= + +Snakebids workflows are constructed the same way as any other [Snakemake workflows](inv:snakemake#snakefiles/rules), but with a few additions that make it easier to work with BIDS datasets. + +To get access to these additions, the base Snakefile for a snakebids workflow should begin with the following boilerplate: + +```python +import snakebids +from snakebids import bids + +configfile: 'config/snakebids.yml' + +# Get input wildcards +inputs = snakebids.generate_inputs( + bids_dir=config["bids_dir"], + pybids_inputs=config["pybids_inputs"], + pybids_database_dir=config.get("pybids_db_dir"), + pybids_reset_database=config.get("pybids_db_reset"), + derivatives=config.get("derivatives"), + participant_label=config.get("participant_label"), + exclude_participant_label=config.get("exclude_participant_label"), + use_bids_inputs=True, +) + +``` + +Snakebids workflow features +--------------------------- + +The [](#snakebids.bids) function generates a properly-formatted BIDS filename with the specified entities, as documented in more detail elsewhere in this documentation. + +[](#snakebids.generate_inputs) returns an instance of [](#snakebids.BidsDataset), a special [](#dict) with keys mapping to the {class}`~snakebids.BidsComponents` defined in [the config file](/bids_app/config). Each {class}`~snakebids.BidsComponent` contains a number of attributes to assist processing a BIDS dataset with snakemake. {func}`~snakebids.generate_inputs` should be called at the beginning of the workflow and assigned to a variable called `inputs`. + +The {attr}`~snakebids.BidsComponent.path` member of [](#snakebids.BidsComponent) is generated by snakebids and contains a list of matched files for every input type. Often, the first rule to be invoked will use one or more entries in `inputs.path` as the input file specification. + +The {attr}`~snakebids.BidsComponent.zip_lists` member of [](#snakebids.BidsComponent) is used with [`bids()`](#snakebids.bids) and [`expand()`](#snakefiles_expand) to fill the wildcards with corresponding values from input files. The usage pattern is as follows: + +```py +expand( + bids( + root="results", + datatype="func", + suffix="bold.nii.gz", + **inputs.wildcards["bold"] + ), + zip, + **inputs.zip_lists["bold"] +) +``` + +The {attr}`~snakebids.BidsComponent.wildcards` member of {class}`snakebids.BidsComponent` is generated by snakebids and contains a dictionary mapping the wildcards for each input type to snakemake-formatted wildcards, for convenient use in the ``bids`` function. + + +## Accessing the underlying *pybids* dataset + +In addition to mapping all of the {class}`~snakebids.BidsComponents` to their names, {class}`~snakebids.BidsDataset` also has a {attr}`~snakebids.BidsDataset.layout` member which gives access to the underlying {class}`BIDSLayout `. This can be used to access advanced pybids features not covered by `snakebids`. Note that if `custom_paths` are specified for every {class}`BidsComponent `, pybids indexing will be skipped and `layout` will be set to `None`. If your workflow relies on accessing this `layout`, you must ensure your users do not provide a `custom_path` for every single component, either in the config file or [via the CLI](/running_snakebids/overview) (``--path_{component}``). diff --git a/docs/bids_app/workflow.rst b/docs/bids_app/workflow.rst deleted file mode 100644 index ee1c7ae2..00000000 --- a/docs/bids_app/workflow.rst +++ /dev/null @@ -1,50 +0,0 @@ -Workflows -========= - -Snakebids workflows are constructed the same way as any other `Snakemake workflows `_, but with a few additions that make it easier to work with BIDS datasets. - -To get access to these additions, the base Snakefile for a snakebids workflow should begin with the following boilerplate:: - - import snakebids - from snakebids import bids - - configfile: 'config/snakebids.yml' - - # Get input wildcards - inputs = snakebids.generate_inputs( - bids_dir=config["bids_dir"], - pybids_inputs=config["pybids_inputs"], - derivatives=config.get("derivatives", None), - participant_label=config.get("participant_label", None), - exclude_participant_label=config.get("exclude_participant_label", None) - - ) - - #this adds constraints to the bids naming - wildcard_constraints: **snakebids.get_wildcard_constraints(config) - -Snakebids workflow features ---------------------------- - -The :py:func:`snakebids.bids()` function generates a properly-formatted BIDS filename with the specified entities, as documented in more detail elsewhere in this documentation. - -:py:func:`snakebids.generate_inputs` returns an instance of :py:class:`snakebids.BidsDataset`, which contains a number attributes to assist processing a BIDS dataset with snakemake. It should be called at the beginning of the workflow and assigned to a variable called ``inputs``. - -The ``path`` member of :py:class:`snakebids.BidsDataset` is generated by snakebids and contains a list of matched files for every input type. Often, the first rule to be invoked will use one or more entries in ``inputs.path`` as the input file specification. - -The ``zip_lists`` member of :py:class:`snakebids.BidsDataset` is used with the :py:func:`bids ` function to fill the wildcards with corresponding values from input files. The usage pattern is as follows:: - - expand( - bids( - root="results", - datatype="func", - suffix="bold.nii.gz", - **inputs.wildcards["bold"] - ), - zip, - **inputs.zip_lists["bold"] - ) - -The ``wildcards`` member of :py:class:`snakebids.BidsDataset` is generated by snakebids and contains a dictionary mapping the wildcards for each input type to snakemake-formatted wildcards, for convenient use in the ``bids`` function. - -The ``layout`` member gives access to the underlying :py:class:`BIDSLayout `, which can be used to access advanced pybids features not covered by ``snakebids``. Note that if ``custom_paths`` are specified for every :py:class:`BidsComponent `, pybids indexing will be skipped and ``layout`` will be set to ``None``. If your workflow relies on accessing this ``layout``, you must ensure your users do not provide a ``custom_path`` for every single component, either in the config file or :ref:`via the CLI ` (``--path_{component}``). diff --git a/docs/conf.py b/docs/conf.py index d52cdbb3..ff10a633 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -34,16 +34,18 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - "sphinx_rtd_theme", "sphinxarg.ext", "sphinx.ext.intersphinx", "sphinx.ext.napoleon", "sphinx.ext.autodoc", + "sphinx.ext.autosummary", "sphinxcontrib.asciinema", "myst_parser", "sphinx_copybutton", + "sphinx_toolbox.more_autosummary", ] + myst_enable_extensions = [ "attrs_block", ] @@ -60,6 +62,10 @@ napoleon_google_docstring = False napoleon_numpy_docstring = True +autodoc_member_order = "bysource" +autodoc_typehints = "description" +autosummary_imported_members = True + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -90,6 +96,7 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] +# templates_path = ["_templates"] sphinxcontrib_asciinema_defaults = { "preload": 1, diff --git a/docs/requirements.txt b/docs/requirements.txt index dcd1a149..1391e95d 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,6 +1,6 @@ -docutils<0.18 +# docutils<0.18 sphinx-argparse -sphinx_rtd_theme +# sphinx_rtd_theme sphinxcontrib-asciinema myst-parser furo==2023.3.23 diff --git a/docs/running_snakebids/overview.md b/docs/running_snakebids/overview.md index d78ab291..17613e0b 100644 --- a/docs/running_snakebids/overview.md +++ b/docs/running_snakebids/overview.md @@ -1,5 +1,3 @@ -(running overview)= - Overview ======== diff --git a/docs/tutorial/step0/Snakefile b/docs/tutorial/step0/Snakefile index 6c4b629d..9ba9f116 100644 --- a/docs/tutorial/step0/Snakefile +++ b/docs/tutorial/step0/Snakefile @@ -1,6 +1,6 @@ rule smooth: input: - '../bids/sub-001/func/sub-001_task-rest_run-1_bold.nii.gz' + 'data/sub-001/func/sub-001_task-rest_run-1_bold.nii.gz' params: sigma = '2.12' output: diff --git a/docs/tutorial/step1/Snakefile b/docs/tutorial/step1/Snakefile index d33bfd83..7fc07377 100644 --- a/docs/tutorial/step1/Snakefile +++ b/docs/tutorial/step1/Snakefile @@ -1,6 +1,6 @@ rule smooth: input: - '../bids/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_bold.nii.gz' + 'data/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_bold.nii.gz' params: sigma = '2.12' output: diff --git a/docs/tutorial/step2/Snakefile b/docs/tutorial/step2/Snakefile index e7fd11e0..ee851f58 100644 --- a/docs/tutorial/step2/Snakefile +++ b/docs/tutorial/step2/Snakefile @@ -3,7 +3,7 @@ def calc_sigma_from_fwhm(wildcards): rule smooth: input: - '../bids/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_bold.nii.gz' + 'data/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_bold.nii.gz' params: sigma = calc_sigma_from_fwhm output: diff --git a/docs/tutorial/step3/Snakefile b/docs/tutorial/step3/Snakefile index 3e93604c..ec211b88 100644 --- a/docs/tutorial/step3/Snakefile +++ b/docs/tutorial/step3/Snakefile @@ -14,7 +14,7 @@ def calc_sigma_from_fwhm(wildcards): rule smooth: input: - '../bids/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_bold.nii.gz' + 'data/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_bold.nii.gz' params: sigma = calc_sigma_from_fwhm, output: diff --git a/docs/tutorial/step4/config.yml b/docs/tutorial/step4/config.yml index 01847028..2ca7ab6b 100644 --- a/docs/tutorial/step4/config.yml +++ b/docs/tutorial/step4/config.yml @@ -15,4 +15,4 @@ fwhm: - 20 -in_bold: '../bids/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_bold.nii.gz' +in_bold: 'data/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_bold.nii.gz' diff --git a/docs/tutorial/step5/Snakefile b/docs/tutorial/step5/Snakefile index 27ea6097..485607e2 100644 --- a/docs/tutorial/step5/Snakefile +++ b/docs/tutorial/step5/Snakefile @@ -29,11 +29,13 @@ rule smooth: params: sigma = calc_sigma_from_fwhm output: - bids(root='results', - subject='{subject}', - task='{task}', - run='{run}', - fwhm='{fwhm}', - suffix='bold.nii.gz') + bids( + root='results', + subject='{subject}', + task='{task}', + run='{run}', + fwhm='{fwhm}', + suffix='bold.nii.gz', + ) shell: 'fslmaths {input} -s {params.sigma} {output}' diff --git a/docs/tutorial/step5/config.yml b/docs/tutorial/step5/config.yml index 01847028..2ca7ab6b 100644 --- a/docs/tutorial/step5/config.yml +++ b/docs/tutorial/step5/config.yml @@ -15,4 +15,4 @@ fwhm: - 20 -in_bold: '../bids/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_bold.nii.gz' +in_bold: 'data/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_bold.nii.gz' diff --git a/docs/tutorial/step6/Snakefile b/docs/tutorial/step6/Snakefile index e84586cb..be3bdbcc 100644 --- a/docs/tutorial/step6/Snakefile +++ b/docs/tutorial/step6/Snakefile @@ -1,30 +1,30 @@ -import snakebids -from snakebids import bids +from snakebids import bids, generate_inputs configfile: 'config.yml' -config.update( - snakebids.generate_inputs( - bids_dir=config['bids_dir'], - pybids_inputs=config['pybids_inputs'], - ) +inputs = generate_inputs( + bids_dir=config['bids_dir'], + pybids_inputs=config['pybids_inputs'], + use_bids_inputs=True, ) -import pprint -pprint.pp(config) +print(inputs) rule all: input: - expand(bids(root='results', + expand( + bids(root='results', subject='{subject}', task='{task}', run='{run}', fwhm='{fwhm}', - suffix='bold.nii.gz'), - subject=config['subjects'], - task=config['tasks'], - run=config['runs'], - fwhm=config['fwhm']) + suffix='bold.nii.gz' + ), + subject=config['subjects'], + task=config['tasks'], + run=config['runs'], + fwhm=config['fwhm'] + ) def calc_sigma_from_fwhm(wildcards): @@ -32,15 +32,17 @@ def calc_sigma_from_fwhm(wildcards): rule smooth: input: - config['path']['bold'] + inputs['bold'].path params: sigma = calc_sigma_from_fwhm output: - bids(root='results', - subject='{subject}', - task='{task}', - run='{run}', - fwhm='{fwhm}', - suffix='bold.nii.gz') + bids( + root='results', + subject='{subject}', + task='{task}', + run='{run}', + fwhm='{fwhm}', + suffix='bold.nii.gz' + ) shell: 'fslmaths {input} -s {params.sigma} {output}' diff --git a/docs/tutorial/step6/config.yml b/docs/tutorial/step6/config.yml index dd6dfbd6..17493d3f 100644 --- a/docs/tutorial/step6/config.yml +++ b/docs/tutorial/step6/config.yml @@ -1,15 +1,4 @@ -bids_dir: '../bids' - -subjects: - - '001' - -tasks: - - rest - -runs: - - 1 - - 2 - +bids_dir: 'data' fwhm: - 5 @@ -29,4 +18,3 @@ pybids_inputs: - acquisition - task - run - diff --git a/docs/tutorial/step7/config.yml b/docs/tutorial/step7/config.yml index 6c4d6580..17493d3f 100644 --- a/docs/tutorial/step7/config.yml +++ b/docs/tutorial/step7/config.yml @@ -1,4 +1,4 @@ -bids_dir: '../bids' +bids_dir: 'data' fwhm: - 5 @@ -18,4 +18,3 @@ pybids_inputs: - acquisition - task - run - diff --git a/docs/tutorial/step8/config.yml b/docs/tutorial/step8/config.yml index b020137a..9008a5ce 100644 --- a/docs/tutorial/step8/config.yml +++ b/docs/tutorial/step8/config.yml @@ -1,4 +1,4 @@ -bids_dir: '../bids' +bids_dir: 'data' fwhm: - 5 diff --git a/docs/tutorial/tutorial.md b/docs/tutorial/tutorial.md index d69a7f79..eca683cf 100644 --- a/docs/tutorial/tutorial.md +++ b/docs/tutorial/tutorial.md @@ -1,5 +1,9 @@ -Getting started -=============== +# Tutorial + +% links +[expand_func]: inv:snakemake:std:label#snakefiles_expand + +## Getting started In this example we will make a workflow to smooth ``bold`` scans from a bids dataset. @@ -9,20 +13,59 @@ We will start by creating a simple rule, then make this more generalizable in ea This command performs smoothing with a sigma=2.12 Gaussian kernel (equivalent to 5mm FWHM, with sigma=fwhm/2.355), and saves the smoothed file as ``results/sub-001/func/sub-001_task-rest_run-1_fwhm-5mm_bold.nii.gz``. -Prerequisites -------------- +### Installation + +Start by making a new directory: + +```console +$ mkdir snakebids-tutorial +$ cd snakebids-tutorial +``` + +Check your python version to make sure you have at least version 3.7 or higher: +```console +$ python --version +Python 3.10.0 +``` + +Make a new virtual environment: + +```console +$ python -m venv .venv +$ source activate .venv/bin/activate +``` + +And use pip to install snakebids: + +```console +$ pip install snakebids +``` + +In our example, we'll be using the [`fslmaths`](https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/Fslutils) tool from [*FSL*](https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/). If you want to actually run the workflow, you'll need to have FSL installed. This is not actually necessary to follow along the tutorial however, as we can use "dry runs" to see what snakemake *would* do if FSL were installed. + +### Getting the dataset + +We will be running the tutorial on a test dataset consisting only of empty files. We won't actually be able to run our workflow on it (`fslmaths` will fail), but as mentioned above, we can use dry runs to see would would normally happen. + +If you wish to follow along using the same dataset, currently the easiest way is to start by cloning snakebids: + +```console +$ git clone https://github.com/akhanf/snakebids.git +``` + +Then copy the following directory: -- To go through this tutorial you only need to have snakemake and snakebids installed (``pip install snakebids``) -- Since we will just make use of snakemake dry-runs (``-n`` option ), the tutorial data are just placeholders (zero-sized files), and you don't actually need to have FSL installed for ``fslmaths``. +```console +$ cp -r snakebids/docs/tutorial/bids ./data +``` -* TODO: instructions to get or recreate the bids tutorial data +It's also perfectly possible (and probably better!) to try the tutorial on your own dataset. Just adjust any paths below so that they match your data! Part I: Snakemake -================= +==================== (step_0)= -Step 0: a basic non-generic workflow ------------------------------------- +## Step 0: a basic non-generic workflow In this rule, we start by creating a rule that is effectively hard-coding the paths for input and output to re-create the command as above. @@ -44,12 +87,11 @@ When we invoke ``snakemake``, it uses the first rule in the snakefile as the ``t So far, we just have a fancy way of specifying the exact same command we started with, so there is no added benefit (yet). But we will soon add to this rule to make it more generalizable. (step_1)= -Step 1: adding wildcards ------------------------- +## Step 1: adding wildcards First step to make the workflow generalizeable is to replace the hard-coded identifiers (e.g. the subject, task and run) with wildcards. -In the Snakefile, we can replace sub-001 with sub-{subject}, and so forth for task and run. Now the rule is generic for any subject, task, or run. +In the Snakefile, we can replace `sub-001` with `sub-{subject}`, and so forth for task and run. Now the rule is generic for any subject, task, or run. ```{literalinclude} step1/Snakefile :language: python @@ -63,7 +105,7 @@ However, if we try to execute (dry-run) the workflow as before, we get an error. ```{asciinema} step1/step1.cast ``` -So for the time being, we will make use of the snakemake command-line argument to specify ``targets``, and specify the file we want generated from the command-line, by running:: +So for the time being, we will make use of the snakemake command-line argument to specify ``targets``, and specify the file we want generated from the command-line, by running: ```console $ snakemake -np results/sub-001/func/sub-001_task-rest_run-1_fwhm-5mm_bold.nii.gz @@ -81,16 +123,17 @@ Now, try changing the output smoothing value, e.g. ``fwhm-10mm``, and see what h As expected the command still uses a smoothing value of 2.12, since that has been hard-coded, but we will see how to rectify this in the next step. (step_2)= -Step 2: adding a params function --------------------------------- +## Step 2: adding a params function As we noted, the sigma parameter needs to be computed from the FWHM. We can use a function to do this. Functions can be used for any ``input`` or ``params``, and must take ``wildcards`` as an input argument, which provides a mechanism to pass the wildcards (determined from the output file) to the function. We can thus define a simple function that returns a string representing ``FWHM/2.355`` as follows: -```python -def calc_sigma_from_fwhm(wildcards): - return f'{float(wildcards.fwhm)/2.355:0.2f}' + +```{literalinclude} step2/Snakefile +:lines: 1-4 +:caption: Snakefile +:linenos: ``` Note 1: We now have to make the fwhm in the output filename a wildcard, so that it can be passed to the function (via the wildcards object). @@ -121,8 +164,7 @@ Now try running the workflow again, with `fwhm-5` as well as `fwhm-10`. ``` (step_3)= -Step 3: adding a target rule ----------------------------- +## Step 3: adding a target rule Now we have a generic rule, but it is pretty tedious to have to type out the filename of each target from the command-line in order to use it. @@ -130,20 +172,15 @@ This is where target rules come in. If you recall from earlier, the first rule i In this case, we have a BIDS dataset with two runs (run-1, run-2), and suppose we wanted to compute smoothing with several different FWHM kernels (5,10,15,20). We can thus make a target rule that has all these resulting filenames as inputs. -A very useful function in snakemake is [`expand()`](). It is a way to perform array expansion to create lists of strings (input filenames). +A very useful function in snakemake is [`expand()`][expand_func]. It is a way to perform array expansion to create lists of strings (input filenames). -```python -rule all: - input: - expand( - 'results/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_fwhm-{fwhm}mm_bold.nii.gz', - subject='001', - task='rest', - run=[1,2], - fwhm=[5,10,15,20] - ) -``` +```{literalinclude} step3/Snakefile + :language: python + :caption: Snakefile + :linenos: + :lines: 1-11 +``` Now, we don't need to specify any targets from the command-line, and can just run: @@ -163,8 +200,7 @@ The entire Snakefile for reference is: ``` (step_4)= -Step 4: adding a config file ----------------------------- +## Step 4: adding a config file We have a functional workflow, but suppose you need to configure or run it on another bids dataset with different subjects, tasks, runs, or you want to run it for different smoothing values. You have to actually modify your workflow in order to do this. @@ -172,13 +208,17 @@ It is a better practice instead to keep your configuration variables separate fr To do this, we simply add a line to our workflow: -```py -configfile: 'config.yml' +```{literalinclude} step4/Snakefile + :language: python + :caption: Snakefile + :linenos: + :emphasize-lines: 1 + :lines: 1-4 ``` Snakemake will then handle reading it in, and making the configuration variables available via dictionary called ``config``. -In our config file, we will add variables for everything in the target rule [`expand()`](): +In our config file, we will add variables for everything in the target rule [`expand()`][expand_func]: ```{code-block} yaml :caption: config.yaml @@ -224,41 +264,35 @@ Part II: Snakebids Now that we have a fully functioning and generic Snakemake workflow, let's see what Snakebids can add. (step_5)= -Step 5: the bids() function ---------------------------- +## Step 5: the bids() function -The first thing we can make use of is the {func}`bids() ` function. This provides an easy way to generate bids filenames. This is especially useful when defining output files in your workflow and you have many bids entities. +The first thing we can make use of is the {func}`~snakebids.bids` function. This provides an easy way to generate bids filenames. This is especially useful when defining output files in your workflow and you have many bids entities. In our existing workflow, this was our output file: -```{code-block} python -:linenos: -:lineno-start: 22 -:caption: Snakefile - output: - 'results/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_fwhm-{fwhm}mm_bold.nii.gz' +```{literalinclude} step4/Snakefile + :language: python + :caption: Snakefile + :linenos: + :start-at: output + :end-before: shell + :lineno-match: ``` To create the same path using {func}`bids() `, we just need to specify the root directory (`results`), all the bids tags (subject, task, run, fwhm), and the suffix (which includes the extension): -```{code-block} python +```{literalinclude} step5/Snakefile +:language: python :linenos: -:lineno-start: 22 :caption: Snakefile - - output: - bids( - root='results', - subject='{subject}', - task='{task}', - run='{run}', - fwhm='{fwhm}', - suffix='bold.nii.gz' - ) +:start-at: output +:end-before: shell +:lineno-match: ``` + ```{note} To make a snakemake wildcard, we wrapped the `'value'` in curly braces (e.g. `'{value}'`). ``` @@ -276,36 +310,26 @@ The Snakefile with the output filename replaced (in both rules) is below: ``` -Step 6: parsing the BIDS dataset --------------------------------- +## Step 6: parsing the BIDS dataset So far, we have had to manually enter the path to input bold file in the config file, and also specify what subjects, tasks, and runs we want processed. Can't we use the fact that we have a BIDS dataset to automate this a bit more? With Snakemake, there are ways to glob the files to figure out what wildcards are present (e.g. [`glob_wildcards()`](inv:snakemake#glob-wildcards)), however, this is not so straightforward with BIDS, since filenames in BIDS often have optional components. E.g. some datasets may have a `ses` tag/sub-directory, and others do not. Also there are often optional user-defined values, such as the `acq` tag, that a workflow in most cases should ignore. Thus, the input that we use in our workflow, `in_bold`, that has wildcards to be generic, would need to be altered for any given BIDS dataset, along with the workflow itself, making this automated BIDS parsing difficult within Snakemake. -Snakebids lets you parse a bids dataset (using [pybids](inv:pybids:std:doc#index) under the hood) using a configfile that contains the required wildcards, along with data structures that specify all the wildcard values for all the subjects. This, in combination with the {func}`bids() ` function, can allow one to make snakemake workflows that are compatible with any general bids dataset. +Snakebids lets you parse a bids dataset (using [pybids](inv:pybids:std:doc#index) under the hood) using a configfile that contains the required wildcards, along with data structures that specify all the wildcard values for all the subjects. This, in combination with the {func}`bids() ` function, can allow one to make snakemake workflows that are compatible with any general bids dataset. + +To add this parsing to the workflow, we call the {func}`generate_inputs() ` function before our rules are defined, and pass along some configuration data to specify the location of the bids directory (`bids_dir`) and the inputs we want to parse for the workflow (`pybids_inputs`). The function returns a {class}`BidsDataset `, which we'll assign to a variable called `inputs`: -To add this parsing to the workflow, we call the {func}`generate_inputs() ` function before our rules are defined, and pass along some configuration data to specify the location of the bids directory (`bids_dir`) and the inputs we want to parse for the workflow (`pybids_inputs`). The function returns a {class}`BidsDataset() `, which we'll assign to a variable called `inputs`: -```{code-block} python +```{literalinclude} step6/Snakefile +:language: python :linenos: -:lineno-start: 1 :caption: Snakefile +:end-before: print(inputs) :emphasize-lines: 1, 5-9 - -from snakebids import bids, generate_inputs - -configfile: 'config.yml' - -inputs = generate_inputs( - bids_dir=config['bids_dir'], - pybids_inputs=config['pybids_inputs'], - use_bids_inputs=True, -) - -rule all: ``` + ```{note} Snakebids is transitioning to a new format for {func}`generate_inputs() `. Currently, you need to opt-in to the new features by setting `use_bids_inputs=True` in `generate_inputs()`, but it will become the default in an upcoming version. We recommend all new users opt-in to maintain long term support, so the tuturial is written using the new syntax. A tutorial for the old syntax can be found on [the v0.5.0 docs](https://snakebids.readthedocs.io/en/v0.5.0/tutorial/tutorial.html#part-ii-snakebids). ``` @@ -313,24 +337,15 @@ Snakebids is transitioning to a new format for {func}`generate_inputs() ) function in [pybids](inv:pybids#index), and thus is quite customizable. @@ -342,24 +357,12 @@ Entries in the `wildcards` list do not have to be in your bids dataset, but if t The {class}`BidsDataset ` class returned by {func}`generate_inputs() ` summarizes the wildcards found in your bids dataset and has parameters to plug those wildcards into your workflow. To investigate it, add a print statement following `generate_inputs(...)`: -```{code-block} python +```{literalinclude} step6/Snakefile +:language: python :linenos: -:lineno-start: 1 :caption: Snakefile +:end-before: rule all :emphasize-lines: 11 - -from snakebids import bids, generate_inputs - -configfile: 'config.yml' - -inputs = generate_inputs( - bids_dir=config['bids_dir'], - pybids_inputs=config['pybids_inputs'], - use_bids_inputs=True, -) - -print(inputs) - ``` Run the workflow: @@ -369,7 +372,7 @@ $ snakemake -nq BidsDataset({ "bold": BidsComponent( name="bold", - path="/home/pvandyken/projects/snakebids/dummy/bids_bold/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_bold.nii.gz", + path="/path/to/bids_bold/sub-{subject}/func/sub-{subject}_task-{task}_run-{run}_bold.nii.gz", zip_lists={ "subject": ["1", "2", "2" ], "task": ["rest", "rest", "rest"], @@ -383,25 +386,22 @@ BidsDataset({ As you can see, {class}`BidsDataset ` is just a special kind of {class}`dict`. Its keys refer to the names of the input types you specified in the config file (in `pybids_inputs`). You can test this by running `print(list(inputs.keys))`{l=python}. Each value contains an object summarizing that input type. We refer to these input types, and the objects that describe them, as {class}`BidsComponents `. -Each {class}`BidsComponents ` has three primary attributes. {attr}`.name ` is the name of the component, this will be the same as the dictionary key in the dataset. {attr}`.path ` is the generic path of the component. Note the wildcards: `{subject}`, `{task}`, and `{run}`. These wildcards can be substituted for values that will uniquely define each specific path. {attr}`.zip_lists ` contains these unique values. It's a simple {class}`dict` whose keys are bids entities and whose values are {attr}`lists ` of entity-values. Note the tabular format that printed in your console: each of the columns of this "table" correspond to the entity-values of one specific file. +Each {class}`BidsComponent ` has three primary attributes. {attr}`.name ` is the name of the component, this will be the same as the dictionary key in the dataset. {attr}`.path ` is the generic path of the component. Note the wildcards: `{subject}`, `{task}`, and `{run}`. These wildcards can be substituted for values that will uniquely define each specific path. {attr}`.zip_lists ` contains these unique values. It's a simple {class}`dict` whose keys are bids entities and whose values are {attr}`lists ` of entity-values. Note the tabular format that printed in your console: each of the columns of this "table" correspond to the entity-values of one specific file. Notice that `inputs['bold'].path`{l=python} is the same as the path we wrote under `in_bold:` in our `config.yaml` file in [step 4](#step_4). In fact, we can go ahead and replace `config['in_bold']`{l=python} in our `Snakemake` file with `inputs['bold'].path`{l=python} and delete `in_bold` from `config.yaml`. -```{code-block} python -:caption: Snakefile +```{literalinclude} step6/Snakefile +:language: python :linenos: -:lineno-start: 29 +:caption: Snakefile +:start-at: rule smooth +:end-at: params +:lineno-match: :emphasize-lines: 3 - -rule smooth: - input: - inputs['bold'].path - params: - ``` -Step 7: using input wildcards: ------------------------------- + +## Step 7: using input wildcards: {attr}`BidsComponent.path ` already grants us a lot of flexibility, but we can still do more! In addition to the three main attributes of {class}`BidsComponents ` already described, the class offers a number of special properties we can use in our workflows. First, we'll look at {attr}`BidsComponent.wildcards `. This is a dict that maps each entity to the brace-wrapped `{wildcards}` we specified in `pybids_config`. If you printed this value in our test workflow, it would look like this: @@ -413,54 +413,28 @@ inputs['bold'].wildcards == { } ``` -This is super useful when combined with {func}`bids() `, as we can use the keyword expansion (`**inputs[].wildcards`{l=python} to set all the wildcard parameters to the {func}`bids() ` function. Thus, we can make our workflow even more general, by replacing: +This is super useful when combined with {func}`bids() `, as we can use the keyword expansion (`**inputs[].wildcards`{l=python} to set all the wildcard parameters to the {func}`bids() ` function. Thus, we can make our workflow even more general, by replacing this: -```{code-block} -:caption: Snakefile +```{literalinclude} step6/Snakefile +:language: python :linenos: -:lineno-start: 29 +:caption: Snakefile +:start-at: rule smooth +:end-at: fslmaths {input} +:lineno-match: :emphasize-lines: 9-11 - -rule smooth: - input: - config['in_bold'] - params: - sigma = calc_sigma_from_fwhm - output: - bids( - root='results', - subject='{subject}', - task='{task}', - run='{run}', - fwhm='{fwhm}', - suffix='bold.nii.gz' - ) - shell: - 'fslmaths {input} -s {params.sigma} {output}' ``` with this: -```{code-block} -:caption: Snakefile +```{literalinclude} step7/Snakefile +:language: python :linenos: -:lineno-start: 29 +:caption: Snakefile +:start-at: rule smooth +:end-at: fslmaths {input} +:lineno-match: :emphasize-lines: 11 - -rule smooth: - input: - config['in_bold'] - params: - sigma = calc_sigma_from_fwhm - output: - bids( - root='results', - fwhm='{fwhm}', - suffix='bold.nii.gz' - **inputs['bold'].wildcards - ) - shell: - 'fslmaths {input} -s {params.sigma} {output}' ``` This effectively ensures that any bids entities from the input filenames (that are listed as pybids wildcards) get carried over to the output filenames. Note that we still have the ability to add on additional entities, such as `fwhm` here, and set the root directory and suffix. @@ -475,26 +449,16 @@ inputs['bold'].entities == { } ``` -This is a useful dict to use with [`expand()`]() in a target rule, letting us avoid having to specify e.g. the run numbers or task names in the config, and rely on pybids to determine these: +This is a useful dict to use with [`expand()`][expand_func] in a target rule, letting us avoid having to specify e.g. the run numbers or task names in the config, and rely on pybids to determine these: -```{code-block} -:caption: Snakefile +```{literalinclude} step7/Snakefile +:language: python :linenos: -:lineno-start: 11 +:caption: Snakefile +:start-at: rule all +:end-before: def calc +:lineno-match: :emphasize-lines: 11 - -rule all: - input: - expand( - bids( - root='results', - fwhm='{fwhm}', - suffix='bold.nii.gz', - **config['wildcards']['bold'] - ), - fwhm=config['fwhm'], - **inputs['bold'].entities, - ) ``` @@ -518,10 +482,9 @@ For reference, here is the updated config file and Snakefile after these changes :caption: Snakefile ``` -Step 8: creating a command-line executable ------------------------------------------- +## Step 8: creating a command-line executable -Now that we have pybids parsing to dynamically configure our workflow inputs based on our BIDS dataset, we are ready to turn our workflow into a [BIDS App](http://bids-apps.neuroimaging.io/), e.g. an app with a standardized command-line interface (e.g. three required positional arguments: ``bids_directory``, ``output_directory``, and ``analysis_level``). +Now that we have pybids parsing to dynamically configure our workflow inputs based on our BIDS dataset, we are ready to turn our workflow into a [BIDS App](http://bids-apps.neuroimaging.io/). BIDS Apps are command-line apps with a standardized interface (e.g. three required positional arguments: ``bids_directory``, ``output_directory``, and ``analysis_level``). We do this in snakebids by creating an executable python script, which uses the {class}`SnakeBidsApp ` class from {mod}`snakebids.app` to run snakemake. An example of this `run.py` script is shown below. @@ -533,51 +496,14 @@ We do this in snakebids by creating an executable python script, which uses the However, we will first need to add some additional information to our config file, mainly to define how to parse command-line arguments for this app. This is done with a new `parse_args` dict in the config: -```yaml -parse_args: - bids_dir: - help: | - The directory with the input dataset formatted according to the BIDS - standard. - - output_dir: - help: | - The directory where the output files should be stored. If you are running - group level analysis this folder should be prepopulated with the results - of the participant level analysis. - - analysis_level: - help: Level of the analysis that will be performed. - choices: *analysis_levels - - --participant_label: - help: | - The label(s) of the participant(s) that should be analyzed. The label - corresponds to sub- from the BIDS spec (so it does not - include "sub-"). If this parameter is not provided all subjects should be - analyzed. Multiple participants can be specified with a space separated - list. - nargs: '+' - - --exclude_participant_label: - help: | - The label(s) of the participant(s) that should be excluded. The label - corresponds to sub- from the BIDS spec (so it does not - include "sub-"). If this parameter is not provided all subjects should be - analyzed. Multiple participants can be specified with a space separated - list. - nargs: '+' - - --derivatives: - help: | - Path(s) to a derivatives dataset, for folder(s) that contains multiple - derivatives datasets. - default: False - type: Path - nargs: '+' +```{literalinclude} step8/config.yml +:language: yaml +:caption: config.yml +:linenos: +:start-at: parse_args +:lineno-match: ``` - The above is standard boilerplate for any BIDS app. You can also define any new command-line arguments you wish. Snakebids uses the {mod}`argparse` module, and each entry in this `parse_args` dict thus becomes a call to {meth}`add_argument() ` from {class}`argparse.ArgumentParser`. When you run the workflow, snakebids adds the named argument values to the config dict, so your workflow can make use of it as if you had manually added the variable to your configfile. Arguments that will receive paths should be given the item `type: Path`, as is done for `--derivatives` in the example above. Without this annotation, paths given to keyword arguments will be interpreted relative to the output directory. Indicating `type: Path` will tell Snakebids to first resolve the path according to your current working directory. @@ -588,13 +514,13 @@ Arguments that will receive paths should be given the item `type: Path`, as is d BIDS apps also have a required `analysis_level` positional argument, so there are some config variables to set this as well. The analysis levels are in an `analysis_levels` list in the config, and also as keys in a `targets_by_analysis_level` dict, which can be used to map each analysis level to the name of a target rule: -```yaml -targets_by_analysis_level: - participant: - - '' # if '', then the first rule is run - -analysis_levels: &analysis_levels - - participant +```{literalinclude} step8/config.yml +:language: yaml +:caption: config.yml +:linenos: +:start-at: targets_by_analysis_level +:end-before: parse_args +:lineno-match: ``` Note: since we specified a `''` for the target rule, no target rule will be specified, so snakemake will just default to the first rule in the workflow. diff --git a/snakebids/__init__.py b/snakebids/__init__.py index d39ab8c6..b9067b58 100644 --- a/snakebids/__init__.py +++ b/snakebids/__init__.py @@ -1,3 +1,4 @@ +"""Top-level namespace containing the core classes and functions""" __submodules__ = ["core"] __version__ = "0.0.0" diff --git a/snakebids/app.py b/snakebids/app.py index 0fbddfd7..4060da5b 100644 --- a/snakebids/app.py +++ b/snakebids/app.py @@ -75,26 +75,26 @@ def wrapper(self: "SnakeBidsApp"): class SnakeBidsApp: """Snakebids app with config and arguments. - Attributes + Parameters ---------- - snakemake_dir : str + snakemake_dir : str | Path Root directory of the snakebids app, containing the config file and workflow files. - parser : ArgumentParser, optional + parser Parser including only the arguments specific to this Snakebids app, as specified in the config file. By default, it will use `create_parser()` from `cli.py` - configfile_path : str, optional + configfile_path Relative path to config file (relative to snakemake_dir). By default, autocalculates based on snamake_dir - snakefile_path : str, optional + snakefile_path Absolute path to the input Snakefile. By default, autocalculates based on snakemake_dir:: join(snakemake_dir, snakefile_path) - config : dict, optional + config Contains all the configuration variables parsed from the config file and generated during the initialization of the SnakeBidsApp. - args : SnakebidsArgs, optional + args Arguments to use when running the app. By default, generated using the parser attribute, autopopulated with args from `config.py` """ @@ -117,34 +117,34 @@ class SnakeBidsApp: def add_plugins( self, plugins: Iterable[Callable[[SnakeBidsApp], None | SnakeBidsApp]] - ): + ) -> None: """Supply list of methods to be called after CLI parsing. - Each callable in ``plugins`` should take, as a single argument, a - reference to the ``SnakeBidsApp``. Plugins may perform any arbitrary - side effects, including updates to the config dictionary, validation - of inputs, optimization, or other enhancements to the snakebids app. + Each callable in ``plugins`` should take, as a single argument, a reference to + the ``SnakeBidsApp``. Plugins may perform any arbitrary side effects, including + updates to the config dictionary, validation of inputs, optimization, or other + enhancements to the snakebids app. - CLI parameters may be read from ``SnakeBidsApp.config``. Plugins - are responsible for documenting what properties they expect to find - in the config. + CLI parameters may be read from ``SnakeBidsApp.config``. Plugins are responsible + for documenting what properties they expect to find in the config. Every plugin should return either: - - Nothing, in which case any changes to the SnakeBidsApp will - persist in the workflow. - - A ``SnakeBidsApp``, which will replace the existing instance, - so this option should be used with care. + - Nothing, in which case any changes to the SnakeBidsApp will persist in the + workflow. + - A ``SnakeBidsApp``, which will replace the existing instance, so this option + should be used with care. + + Parameters + ---------- + plugins + List of plugins to be added """ # pylint: disable=no-member self.plugins.extend(plugins) - def run_snakemake(self): - """Run snakemake with that config. - - Workflow snakefile will read snakebids config, create inputs_config, - and read that in. - """ + def run_snakemake(self) -> None: + """Run snakemake with the given config, after applying plugins""" # If no SnakebidsArgs were provided on class instantiation, we compute args # using the provided parser diff --git a/snakebids/core/datasets.py b/snakebids/core/datasets.py index 9d70566c..fa673e35 100644 --- a/snakebids/core/datasets.py +++ b/snakebids/core/datasets.py @@ -35,25 +35,23 @@ class BidsComponent: """Component of a BidsDataset mapping entities to their resolved values BidsComponents are immutable: their values cannot be altered. - - Attributes - ---------- - name - Name of the component - path - Wildcard-filled path that matches the files for this component. - zip_lists - Dictionary where each key is a wildcard entity and each value is a list of the - values found for that entity. Each of these lists has length equal to the number - of images matched for this modality, so they can be zipped together to get a - list of the wildcard values for each file. """ name: str = attr.field(on_setattr=attr.setters.frozen) + """Name of the component""" + path: str = attr.field(on_setattr=attr.setters.frozen) + """Wildcard-filled path that matches the files for this component.""" zip_lists: dict[str, list[str]] = attr.field( on_setattr=attr.setters.frozen, converter=dict ) + """Table of unique wildcard groupings for each member in the component. + + Dictionary where each key is a wildcard entity and each value is a list of the + values found for that entity. Each of these lists has length equal to the number + of images matched for this modality, so they can be zipped together to get a + list of the wildcard values for each file. + """ @zip_lists.validator # type: ignore def _validate_zip_lists(self, _, value: dict[str, list[str]]): @@ -70,33 +68,6 @@ def _validate_zip_lists(self, _, value: dict[str, list[str]]): f"{self.path}: {fields} != zip_lists: {set(value)}" ) - @property - def input_name(self): - """Alias of :attr:`name ` - - Name of the component - """ - return self.name - - @property - def input_path(self): - """Alias of :attr:`path ` - - Wildcard-filled path that matches the files for this component. - """ - return self.path - - @property - def input_zip_lists(self): - """Alias of :attr:`zip_lists ` - - Dictionary where each key is a wildcard entity and each value is a list of the - values found for that entity. Each of these lists has length equal to the number - of images matched for this modality, so they can be zipped together to get a - list of the wildcard values for each file. - """ - return self.zip_lists - # Note: we can't use cached property here because it's incompatible with slots. _input_lists: Optional[dict[str, list[str]]] = attr.field( default=None, init=False, eq=False, repr=False @@ -109,7 +80,7 @@ def input_zip_lists(self): ) @property - def entities(self): + def entities(self) -> dict[str, list[str]]: """Component entities and their associated values Dictionary where each key is an entity and each value is a list of the @@ -121,12 +92,8 @@ def entities(self): } return self._input_lists - @property_alias(entities, "entities", "snakebids.BidsComponent.entities") - def input_lists(self): - return self.entities - @property - def wildcards(self): + def wildcards(self) -> dict[str, str]: """Wildcards in brace-wrapped syntax Dictionary where each key is the name of a wildcard entity, and each value is @@ -138,6 +105,37 @@ def wildcards(self): } return self._input_wildcards + @property + def input_name(self) -> str: + """Alias of :attr:`name ` + + Name of the component + """ + return self.name + + @property + def input_path(self) -> str: + """Alias of :attr:`path ` + + Wildcard-filled path that matches the files for this component. + """ + return self.path + + @property + def input_zip_lists(self) -> dict[str, list[str]]: + """Alias of :attr:`zip_lists ` + + Dictionary where each key is a wildcard entity and each value is a list of the + values found for that entity. Each of these lists has length equal to the number + of images matched for this modality, so they can be zipped together to get a + list of the wildcard values for each file. + """ + return self.zip_lists + + @property_alias(entities, "entities", "snakebids.BidsComponent.entities") + def input_lists(self): + return self.entities + @property_alias(wildcards, "wildcards", "snakebids.BidsComponent.wildcards") def input_wildcards(self): return self.wildcards @@ -219,16 +217,12 @@ def __setitem__(self, _: Any, __: Any): deprecated_in="0.8.0", admonition="warning", ) - def path(self): + def path(self) -> dict[str, str]: """Dict mapping :class:`BidsComponents ` names to \ their ``paths``. """ return {key: value.path for key, value in self.data.items()} - @property_alias(path, "path", "snakebids.BidsDataset.path") - def input_path(self): - return self.path - @cached_property @deprecated( details=""" @@ -240,16 +234,12 @@ def input_path(self): deprecated_in="0.8.0", admonition="warning", ) - def zip_lists(self): + def zip_lists(self) -> dict[str, dict[str, list[str]]]: """Dict mapping :class:`BidsComponents ` names to \ their ``zip_lists`` """ return {key: value.zip_lists for key, value in self.data.items()} - @property_alias(zip_lists, "zip_lists", "snakebids.BidsDataset.zip_lists") - def input_zip_lists(self): - return self.zip_lists - @cached_property @deprecated( details=""" @@ -261,16 +251,12 @@ def input_zip_lists(self): deprecated_in="0.8.0", admonition="warning", ) - def entities(self): + def entities(self) -> dict[str, dict[str, list[str]]]: """Dict mapping :class:`BidsComponents ` names to \ to their :attr:`entities ` """ return {key: value.entities for key, value in self.data.items()} - @property_alias(entities, "entities", "snakebids.BidsDataset.entities") - def input_lists(self): - return self.entities - @cached_property @deprecated( details=""" @@ -282,16 +268,12 @@ def input_lists(self): deprecated_in="0.8.0", admonition="warning", ) - def wildcards(self): + def wildcards(self) -> dict[str, dict[str, str]]: """Dict mapping :class:`BidsComponents ` names to \ their :attr:`wildcards ` """ return {key: value.input_wildcards for key, value in self.data.items()} - @property_alias(wildcards, "wildcards", "snakebids.BidsDataset.wildcards") - def input_wildcards(self): - return self.wildcards - @cached_property def subjects(self): """A list of the subjects in the dataset.""" @@ -304,7 +286,7 @@ def subjects(self): ] @cached_property - def sessions(self): + def sessions(self) -> list[str]: """A list of the sessions in the dataset.""" return [ *{ @@ -315,7 +297,7 @@ def sessions(self): ] @cached_property - def subj_wildcards(self): + def subj_wildcards(self) -> dict[str, str]: """The subject and session wildcards applicable to this dataset. ``{"subject":"{subject}"}`` if there is only one session, ``{"subject": @@ -328,6 +310,22 @@ def subj_wildcards(self): "session": "{session}", } + @property_alias(path, "path", "snakebids.BidsDataset.path") + def input_path(self) -> dict[str, str]: + return self.path + + @property_alias(entities, "entities", "snakebids.BidsDataset.entities") + def input_lists(self) -> dict[str, dict[str, list[str]]]: + return self.entities + + @property_alias(zip_lists, "zip_lists", "snakebids.BidsDataset.zip_lists") + def input_zip_lists(self) -> dict[str, dict[str, list[str]]]: + return self.zip_lists + + @property_alias(wildcards, "wildcards", "snakebids.BidsDataset.wildcards") + def input_wildcards(self) -> dict[str, dict[str, str]]: + return self.wildcards + @property def as_dict(self): """Get the layout as a legacy dict diff --git a/snakebids/core/filtering.py b/snakebids/core/filtering.py index 45f2a43b..7a603b02 100644 --- a/snakebids/core/filtering.py +++ b/snakebids/core/filtering.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import operator as op import re from typing import Dict, List, TypeVar, Union, overload @@ -13,52 +15,48 @@ @overload def filter_list( - zip_list, - filters: Dict[str, T_co], + zip_list: dict[str, list[str]], + filters: dict[str, list[str] | str], return_indices_only: Literal[False] = ..., regex_search: bool = ..., -) -> Dict[str, List[str]]: +) -> dict[str, list[str]]: ... @overload def filter_list( - zip_list, - filters: Dict[str, T_co], + zip_list: dict[str, list[str]], + filters: dict[str, list[str] | str], return_indices_only: Literal[True] = ..., regex_search: bool = ..., -) -> List[int]: +) -> list[int]: ... def filter_list( - zip_list: Dict[str, List[str]], - filters: Dict[str, T_co], + zip_list: dict[str, list[str]], + filters: dict[str, list[str] | str], return_indices_only: bool = False, - regex_search=False, -): + regex_search: bool = False, +) -> dict[str, list[str]] | list[int]: """This function is used when you are expanding over some subset of the wildcards i.e. if your output file doesn't contain all the wildcards in :attr:`BidsComponent.wildcards ` Parameters ---------- - zip_list : dict + zip_list generated zip lists dict from config file to filter - filters : dict + filters wildcard values to filter the zip lists - return_indices_only : bool, default=False + return_indices_only return the indices of the matching wildcards - regex_search : bool, default=False + regex_search Use regex matching to filter instead of the default equality check. - Returns - ------- - dict - zip list with non-matching elements removed Examples -------- @@ -135,7 +133,7 @@ def filter_list( else: match_func = op.eq - keep_indices = set.intersection( + keep_indices: set[int] = set.intersection( # Get a set {0,1,2,3...n-1} where n is the length of any one of the lists in # zip_list {*_get_zip_list_indices(zip_list)}, @@ -147,7 +145,7 @@ def filter_list( } for key, val in filters.items() if key in zip_list - ) + ), ) # Now we have the indices, so filter the lists diff --git a/snakebids/core/input_generation.py b/snakebids/core/input_generation.py index c75d9b0e..bc498c92 100644 --- a/snakebids/core/input_generation.py +++ b/snakebids/core/input_generation.py @@ -1,4 +1,5 @@ """Utilities for converting Snakemake apps to BIDS apps.""" +from __future__ import annotations import json import logging @@ -25,15 +26,15 @@ # pylint: disable=too-many-arguments @overload def generate_inputs( - bids_dir, + bids_dir: Path | str, pybids_inputs: InputsConfig, - pybids_database_dir=..., - pybids_reset_database=..., - derivatives=..., - pybids_config=..., - limit_to=..., - participant_label=..., - exclude_participant_label=..., + pybids_database_dir: Path | str | None = ..., + pybids_reset_database: bool = ..., + derivatives: bool | Path | str = ..., + pybids_config: str | None = ..., + limit_to: list[str] | None = ..., + participant_label: list[str] | None = ..., + exclude_participant_label: list[str] | None = ..., use_bids_inputs: Union[Literal[False], None] = ..., ) -> BidsDatasetDict: ... @@ -42,15 +43,15 @@ def generate_inputs( # pylint: disable=too-many-arguments @overload def generate_inputs( - bids_dir, + bids_dir: Path | str, pybids_inputs: InputsConfig, - pybids_database_dir=..., - pybids_reset_database=..., - derivatives=..., - pybids_config=..., - limit_to=..., - participant_label=..., - exclude_participant_label=..., + pybids_database_dir: Path | str | None = ..., + pybids_reset_database: bool = ..., + derivatives: bool | Path | str = ..., + pybids_config: str | None = ..., + limit_to: list[str] | None = ..., + participant_label: list[str] | None = ..., + exclude_participant_label: list[str] | None = ..., use_bids_inputs: Literal[True] = ..., ) -> BidsDataset: ... @@ -58,17 +59,17 @@ def generate_inputs( # pylint: disable=too-many-arguments, too-many-locals def generate_inputs( - bids_dir, + bids_dir: Path | str, pybids_inputs: InputsConfig, - pybids_database_dir=None, - pybids_reset_database=False, - derivatives=False, - pybids_config=None, - limit_to=None, - participant_label=None, - exclude_participant_label=None, - use_bids_inputs=None, -): + pybids_database_dir: Path | str | None = None, + pybids_reset_database: bool = False, + derivatives: bool | Path | str = False, + pybids_config: str | None = None, + limit_to: list[str] | None = None, + participant_label: list[str] | None = None, + exclude_participant_label: list[str] | None = None, + use_bids_inputs: bool | None = None, +) -> BidsDataset | BidsDatasetDict: """Dynamically generate snakemake inputs using pybids_inputs Pybids is used to parse the bids_dir. Custom paths can also be parsed by including @@ -76,13 +77,14 @@ def generate_inputs( Parameters ---------- - bids_dir : str + bids_dir Path to bids directory - pybids_inputs : dict + pybids_inputs Configuration for bids inputs, with keys as the names (``str``) - Nested `dicts` with the following required keys: + Nested `dicts` with the following required keys (for complete info, see + :class:`~snakebids.types.InputConfig`): * ``"filters"``: Dictionary of entity: "values" (dict of str -> str or list of str). The entity keywords should the bids tags on which to filter. The values @@ -100,36 +102,36 @@ def generate_inputs( as in ``/path/to/sub-{subject}/{wildcard_1}-{wildcard_2}``. This path will be parsed without pybids, allowing the use of non-bids-compliant paths. - pybids_database_dir : str + pybids_database_dir Path to database directory. If None is provided, database is not used - pybids_reset_database : bool + pybids_reset_database A boolean that determines whether to reset / overwrite existing database. - derivatives : bool + derivatives Indicates whether pybids should look for derivative datasets under bids_dir. These datasets must be properly formatted according to bids specs to be recognized. Defaults to False. - limit_to : list of str, optional + limit_to If provided, indicates which input descriptors from pybids_inputs should be parsed. For example, if pybids_inputs describes ``"bold"`` and ``"dwi"`` inputs, and ``limit_to = ["bold"]``, only the "bold" inputs will be parsed. "dwi" will be ignored - participant_label : str or list of str, optional + participant_label Indicate one or more participants to be included from input parsing. This may cause errors if subject filters are also specified in pybids_inputs. It may not be specified if exclude_participant_label is specified - exclude_participant_label : str or list of str, optional + exclude_participant_label Indicate one or more participants to be excluded from input parsing. This may cause errors if subject filters are also specified in pybids_inputs. It may not be specified if participant_label is specified - use_bids_inputs : bool, optional + use_bids_inputs If True, opts in to the new :class:`BidsDataset` output, otherwise returns the classic dict. Currently, the classic dict will be returned by default, however, this will change in a future release. If you do not wish to migrate to the new @@ -137,7 +139,7 @@ def generate_inputs( Returns ------- - BidsDataset or BidsDatasetDict: + BidsDataset | BidsDatasetDict Object containing organized information about the bids inputs for consumption in snakemake. See the documentation of :class:`BidsDataset` for details and examples. @@ -314,7 +316,7 @@ def _all_custom_paths(config: InputsConfig): def _gen_bids_layout( bids_dir: Union[Path, str], - derivatives: bool, + derivatives: Path | str | bool, pybids_database_dir: Union[Path, str, None], pybids_reset_database: bool, pybids_config: Union[Path, str, None] = None, @@ -324,19 +326,19 @@ def _gen_bids_layout( Parameters ---------- - bids_dir : str + bids_dir Path to bids directory - derivatives : bool + derivatives A boolean (or path(s) to derivatives datasets) that determines whether snakebids will search in the derivatives subdirectory of the input dataset. - pybids_database_dir : str + pybids_database_dir Path to database directory. If None is provided, database is not used - pybids_reset_database : bool + pybids_reset_database A boolean that determines whether to reset / overwrite existing database. @@ -359,7 +361,7 @@ def _gen_bids_layout( return BIDSLayout( bids_dir, - derivatives=derivatives, + derivatives=derivatives, # type: ignore (mistake in BIDSLayout typing) validate=False, config=pybids_config, database_path=pybids_database_dir, diff --git a/snakebids/types.py b/snakebids/types.py index f690a7cc..e27273e3 100644 --- a/snakebids/types.py +++ b/snakebids/types.py @@ -7,7 +7,36 @@ class InputConfig(TypedDict, total=False): """Configuration passed in snakebids.yaml file""" filters: dict[str, str | bool | list[str]] + """Filters to pass on to :class:`BIDSLayout.get() ` + + Each key refers to the name of an entity. Values may take the following forms: + + * :class:`string `: Restricts the entity to the exact string given + * :class:`bool`: ``True`` requires the entity to be present (with any value). + ``False`` requires the entity to be absent. + * :class:`list` [:class:`str`]: List of allowable values the entity may take. + + In addition, a few special filters may be added which carry different meanings: + + * ``use_regex: True``: If present, all strings will be interpreted as regex + * ``scope``: Restricts the scope of the component. It may take the following values: + - ``"all"``: search everything (default behaviour) + - ``"raw"``: only search the top-level raw dataset + - ``"derivatives"``: only search derivative datasets + - ````: only search derivative datasets with a matching pipeline + name + """ + wildcards: list[str] + """Wildcards to allow in the component. + + Each value in the list refers to the name of an entity. If the entity is present, + the generated :class:`~snakebids.BidsComponent` will have values of this entity + substituted for wildcards in the :attr:`~snakebids.BidsComponent.path`, and the + entity will be included in the :attr:`~snakebids.BidsComponent.zip_lists`. + + If the entity is not found, it will be ignored. + """ custom_path: str