From ec3d21e8daecaf264319d5d6a6a562c6bb063f12 Mon Sep 17 00:00:00 2001 From: Jason Kai Date: Wed, 13 Dec 2023 19:51:06 -0500 Subject: [PATCH] Update URLs to new repo link Search and replace of `akhanf` -> `khanlab` to update the repo URL. --- README.md | 33 ++++++++------ docs/conf.py | 2 +- docs/tutorial/tutorial.md | 43 ++++++++----------- pyproject.toml | 2 +- .../tests/data/dataset_description.json | 29 ++++++------- 5 files changed, 53 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 69dff0eb..677131d6 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,34 @@ - # Snakebids -[![Tests](https://github.com/akhanf/snakebids/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/akhanf/snakebids/actions/workflows/test.yml?query=branch%3Amain) + +[![Tests](https://github.com/khanlab/snakebids/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/khanlab/snakebids/actions/workflows/test.yml?query=branch%3Amain) [![Documentation Status](https://readthedocs.org/projects/snakebids/badge/?version=stable)](https://snakebids.readthedocs.io/en/stable/?badge=stable) -[![Version](https://img.shields.io/github/v/tag/akhanf/snakebids?label=version)](https://pypi.org/project/snakebids/) +[![Version](https://img.shields.io/github/v/tag/khanlab/snakebids?label=version)](https://pypi.org/project/snakebids/) [![Python versions](https://img.shields.io/pypi/pyversions/snakebids)](https://pypi.org/project/snakebids/) [![DOI](https://zenodo.org/badge/309495236.svg)](https://zenodo.org/badge/latestdoi/309495236) - [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) Snakebids is a Python package that extends [Snakemake](https://snakemake.github.io), enabling users to create reproducible, scalable pipelines for processing neuroimaging data in the [BIDS format](https://bids.neuroimaging.io). Snakebids workflows expose a CLI that conforms to the [BIDS App](https://bids-apps.neuroimaging.io) guidelines. ## Features + Snakebids includes all of the features of Snakemake, including flexible configuration, parallel execution, and Docker/Singularity support, plus: -* **Built-in support for BIDS datasets**: Seamless workflow functionality with a wide range of BIDS datasets, accomodating various levels of complexity. -* **BIDS App Creation**: Provide command-line invocations of your workflow following BIDS App guidelines, ensuring reproducibility and enhancing accessibility of your workflow. -* **BIDS Path Construction**: Easy, flexible construction of valid BIDS paths following BIDS guiding principles, promoting data organization and sharing. -* **Plugin System**: Extend the functionality of Snakebids by creating and using plugins to meet your workflow's needs. -* **Pybids Querying**: Leverages [Pybids](https://bids-standard.github.io/pybids/) to efficiently retrieve specific data required. +- **Built-in support for BIDS datasets**: Seamless workflow functionality with a wide range of BIDS datasets, accomodating various levels of complexity. +- **BIDS App Creation**: Provide command-line invocations of your workflow following BIDS App guidelines, ensuring reproducibility and enhancing accessibility of your workflow. +- **BIDS Path Construction**: Easy, flexible construction of valid BIDS paths following BIDS guiding principles, promoting data organization and sharing. +- **Plugin System**: Extend the functionality of Snakebids by creating and using plugins to meet your workflow's needs. +- **Pybids Querying**: Leverages [Pybids](https://bids-standard.github.io/pybids/) to efficiently retrieve specific data required. ## Installation + Snakebids can be installed using pip: -```bash +```bash pip install snakebids ``` ## Usage + To create and run a Snakebids workflow, you need to: 1. **Create a Snakefile**: Define the steps of your workflow, including input / output files, processing rules, and dependencies @@ -35,9 +38,10 @@ To create and run a Snakebids workflow, you need to: For detailed instructions and examples, please refer to the [**documentation**](https://snakebids.readthedocs.io/en/stable/index.html). ## Contributing -Snakebids is an open-source project, and contributions are welcome! If you have any bug reports, feature requests, or improvements, please submit them to the [**issues page**](https://github.com/akhanf/snakebids). -To contribute, first clone the Github repository. Snakebids dependencies are managed with Poetry (version 1.2 or higher). Please refer to the [poetry website](https://python-poetry.org/docs/master/#installation) for installation instructions. +Snakebids is an open-source project, and contributions are welcome! If you have any bug reports, feature requests, or improvements, please submit them to the [**issues page**](https://github.com/khanlab/snakebids). + +To contribute, first clone the Github repository. Snakebids dependencies are managed with Poetry (version 1.2 or higher). Please refer to the [poetry website](https://python-poetry.org/docs/master/#installation) for installation instructions. _Note: Snakebids makes use of Poetry's dynamic versioning. To see a version number on locally installed Snakebids versions, you will have to also install `poetry-dynamic-versioning` plugin to your poetry installation (`poetry self add "poetry-dynamic-versioning\[plugin\]"). This is **not required** for contribution._ @@ -63,10 +67,13 @@ poetry run poe test Additionally, Snakebids uses pre-commit hooks (installed via the `poe setup` command above) to lint and format code (we use [black](https://github.com/psf/black), [isort](https://github.com/PyCQA/isort) and [ruff](https://beta.ruff.rs/docs/). By default, these hooks are run on every commit. Please be sure they all pass before making a PR. ## License + Snakebids is distributed under the MIT License. ## Acknowledgements + Snakebids extends the Snakemake workflow management system and follows the guidelines outlined by the BIDS specification. ## Relevant papers -* Mölder F, Jablonski KP, Letcher B et al. Sustainable data analysis with Snakemake [version 2; peer review: 2 approved]. F1000Research. 2021. doi: [10.12688/f1000research.29032.2](https://doi.org/10.12688/f1000research.29032.2) \ No newline at end of file + +- Mölder F, Jablonski KP, Letcher B et al. Sustainable data analysis with Snakemake [version 2; peer review: 2 approved]. F1000Research. 2021. doi: [10.12688/f1000research.29032.2](https://doi.org/10.12688/f1000research.29032.2) diff --git a/docs/conf.py b/docs/conf.py index 68cade4e..8ee6d312 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -97,7 +97,7 @@ # html_theme = "furo" html_theme_options = { - "source_repository": "https://github.com/akhanf/snakebids", + "source_repository": "https://github.com/khanlab/snakebids", "source_branch": "main", "source_directory": "docs/", } diff --git a/docs/tutorial/tutorial.md b/docs/tutorial/tutorial.md index 816097ad..5bca6bef 100644 --- a/docs/tutorial/tutorial.md +++ b/docs/tutorial/tutorial.md @@ -23,6 +23,7 @@ $ cd snakebids-tutorial ``` Check your python version to make sure you have at least version 3.7 or higher: + ```console $ python --version Python 3.10.0 @@ -41,7 +42,7 @@ And use pip to install snakebids: $ pip install snakebids ``` -In our example, we'll be using the [`fslmaths`](https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/Fslutils) tool from [*FSL*](https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/). If you want to actually run the workflow, you'll need to have FSL installed. This is not actually necessary to follow along the tutorial however, as we can use "dry runs" to see what snakemake *would* do if FSL were installed. +In our example, we'll be using the [`fslmaths`](https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/Fslutils) tool from [_FSL_](https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/). If you want to actually run the workflow, you'll need to have FSL installed. This is not actually necessary to follow along the tutorial however, as we can use "dry runs" to see what snakemake _would_ do if FSL were installed. ### Getting the dataset @@ -50,7 +51,7 @@ We will be running the tutorial on a test dataset consisting only of empty files If you wish to follow along using the same dataset, currently the easiest way is to start by cloning snakebids: ```console -$ git clone https://github.com/akhanf/snakebids.git +$ git clone https://github.com/khanlab/snakebids.git ``` Then copy the following directory: @@ -61,15 +62,15 @@ $ cp -r snakebids/docs/tutorial/bids ./data It's also perfectly possible (and probably better!) to try the tutorial on your own dataset. Just adjust any paths below so that they match your data! -Part I: Snakemake -==================== +# Part I: Snakemake (step_0)= + ## Step 0: a basic non-generic workflow In this rule, we start by creating a rule that is effectively hard-coding the paths for input and output to re-create the command as above. -In this rule we have an ``input:`` section for input **files**, a ``params:`` section for **non-file** parameters, and an ``output:`` section for output files. The shell section is used to build the shell command, and can refer to the input, output or params using curly braces. Note that any of these can also be named inputs, but we have only used this for the ``sigma`` parameter in this case. +In this rule we have an ``input:`` section for input **files**, a ``params:`` section for **non-file** parameters, and an ``output:`` section for output files. The shell section is used to build the shell command, and can refer to the input, output or params using curly braces. Note that any of these can also be named inputs, but we have only used this for the ``sigma`` parameter in this case. ```{literalinclude} step0/Snakefile :language: python @@ -80,6 +81,7 @@ In this rule we have an ``input:`` section for input **files**, a ``params:`` se With this rule in our Snakefile, we can then run ``snakemake -np`` to execute a dry-run of the workflow. Here the ``-n`` specifies dry-run, and the ``-p`` prints any shell commands that are to be executed. ```{asciinema} step0/step0.cast + ``` When we invoke ``snakemake``, it uses the first rule in the snakefile as the ``target`` rule. The target rule is what snakemake uses as a starting point to determine what other rules need to be run in order to generate the inputs. We'll learn a bit more about that in the next step. @@ -87,11 +89,12 @@ When we invoke ``snakemake``, it uses the first rule in the snakefile as the ``t So far, we just have a fancy way of specifying the exact same command we started with, so there is no added benefit (yet). But we will soon add to this rule to make it more generalizable. (step_1)= + ## Step 1: adding wildcards First step to make the workflow generalizeable is to replace the hard-coded identifiers (e.g. the subject, task and run) with wildcards. -In the Snakefile, we can replace `sub-001` with `sub-{subject}`, and so forth for task and run. Now the rule is generic for any subject, task, or run. +In the Snakefile, we can replace `sub-001` with `sub-{subject}`, and so forth for task and run. Now the rule is generic for any subject, task, or run. ```{literalinclude} step1/Snakefile :language: python @@ -103,6 +106,7 @@ In the Snakefile, we can replace `sub-001` with `sub-{subject}`, and so forth fo However, if we try to execute (dry-run) the workflow as before, we get an error. This is because the ``target`` rule now has wildcards in it. So snakemake is unable to determine what rules need to be run to generate the inputs, since the wildcards can take any value. ```{asciinema} step1/step1.cast + ``` So for the time being, we will make use of the snakemake command-line argument to specify ``targets``, and specify the file we want generated from the command-line, by running: @@ -123,13 +127,13 @@ Now, try changing the output smoothing value, e.g. ``fwhm-10mm``, and see what h As expected the command still uses a smoothing value of 2.12, since that has been hard-coded, but we will see how to rectify this in the next step. (step_2)= + ## Step 2: adding a params function As we noted, the sigma parameter needs to be computed from the FWHM. We can use a function to do this. Functions can be used for any ``input`` or ``params``, and must take ``wildcards`` as an input argument, which provides a mechanism to pass the wildcards (determined from the output file) to the function. We can thus define a simple function that returns a string representing ``FWHM/2.355`` as follows: - ```{literalinclude} step2/Snakefile :lines: 1-2 :caption: Snakefile @@ -161,9 +165,11 @@ Here is the full Snakefile: Now try running the workflow again, with `fwhm-5` as well as `fwhm-10`. ```{asciinema} step2/step2.cast + ``` (step_3)= + ## Step 3: adding a target rule Now we have a generic rule, but it is pretty tedious to have to type out the filename of each target from the command-line in order to use it. @@ -174,7 +180,6 @@ In this case, we have a BIDS dataset with two runs (run-1, run-2), and suppose w A very useful function in snakemake is [`expand()`][expand_func]. It is a way to perform array expansion to create lists of strings (input filenames). - ```{literalinclude} step3/Snakefile :language: python :caption: Snakefile @@ -189,6 +194,7 @@ $ snakemake -np ``` ```{asciinema} step3/step3.cast + ``` The entire Snakefile for reference is: @@ -200,6 +206,7 @@ The entire Snakefile for reference is: ``` (step_4)= + ## Step 4: adding a config file We have a functional workflow, but suppose you need to configure or run it on another bids dataset with different subjects, tasks, runs, or you want to run it for different smoothing values. You have to actually modify your workflow in order to do this. @@ -220,7 +227,6 @@ Snakemake will then handle reading it in, and making the configuration variables In our config file, we will add variables for everything in the target rule [`expand()`][expand_func]: - ```{literalinclude} step4/config.yml :language: yaml :caption: config.yaml @@ -238,22 +244,21 @@ In our Snakefile, we then need to replace these hardcoded values with ``config[k After these changes, the workflow should still run just like the last step, but now you can make any changes via the config file. ```{asciinema} step4/step4.cast -``` +``` -Part II: Snakebids -================== +# Part II: Snakebids Now that we have a fully functioning and generic Snakemake workflow, let's see what Snakebids can add. (step_5)= + ## Step 5: the bids() function The first thing we can make use of is the {func}`~snakebids.bids` function. This provides an easy way to generate bids filenames. This is especially useful when defining output files in your workflow and you have many bids entities. In our existing workflow, this was our output file: - ```{literalinclude} step4/Snakefile :language: python :caption: Snakefile @@ -274,8 +279,6 @@ To create the same path using {func}`bids() `, we just need to s :lineno-match: ``` - - ```{note} To make a snakemake wildcard, we wrapped the `'value'` in curly braces (e.g. `'{value}'`). ``` @@ -292,7 +295,6 @@ The Snakefile with the output filename replaced (in both rules) is below: :caption: Snakefile ``` - ## Step 6: parsing the BIDS dataset So far, we have had to manually enter the path to input bold file in the config file, and also specify what subjects, tasks, and runs we want processed. Can't we use the fact that we have a BIDS dataset to automate this a bit more? @@ -303,7 +305,6 @@ Snakebids lets you parse a bids dataset (using [pybids](inv:pybids:std:doc#index To add this parsing to the workflow, we call the {func}`generate_inputs() ` function before our rules are defined, and pass along some configuration data to specify the location of the bids directory (`bids_dir`) and the inputs we want to parse for the workflow (`pybids_inputs`). The function returns a {class}`BidsDataset `, which we'll assign to a variable called `inputs`: - ```{literalinclude} step6/Snakefile :language: python :linenos: @@ -312,12 +313,10 @@ To add this parsing to the workflow, we call the {func}`generate_inputs() `. To get access to the old dict style return, the `use_bids_inputs` parameter must be set to False. A tutorial for the old syntax can be found on [the v0.5.0 docs](https://snakebids.readthedocs.io/en/v0.5.0/tutorial/tutorial.html#part-ii-snakebids). ``` - The config variables we need pre-defined are as follows:: ```{literalinclude} step6/config.yml @@ -327,7 +326,6 @@ The config variables we need pre-defined are as follows:: :emphasize-lines: 1, 9-20 ``` - The `pybids_inputs` dict defines what types of inputs the workflow can make use of (i.e. the top-level keys, `bold` in this case), and for each input, how to filter for them (i.e. the `filters` dict), and what BIDS entities to replace with wildcards in the snakemake workflow (i.e. the `wildcards` dict). ```{note} @@ -383,7 +381,6 @@ Notice that `inputs['bold'].path`{l=python} is the same as the path we wrote und :emphasize-lines: 3 ``` - ## Step 7: using input wildcards {attr}`BidsComponent.path ` already grants us a lot of flexibility, but we can still do more! In addition to the three main attributes of {class}`BidsComponents ` already described, the class offers a number of special properties we can use in our workflows. First, we'll look at {attr}`BidsComponent.wildcards `. This is a dict that maps each entity to the brace-wrapped `{wildcards}` we specified in `pybids_config`. If you printed this value in our test workflow, it would look like this: @@ -438,7 +435,6 @@ Finally, we can use our {class}`BidsComponents ` to eas [`BidsComponent.expand()`](#snakebids.BidsComponent.expand) still uses snakemake's [`expand()`][expand_func] under the hood, but applies extra logic to ensure only entity groups *actually* found in your dataset are used. If need to expand over additional wildcards, just add them as keyword args. They'll expand over every possible combination, just like snakemake's [`expand()`][expand_func]. ``` - For reference, here is the updated config file and Snakefile after these changes: ```{literalinclude} step7/config.yml @@ -447,7 +443,6 @@ For reference, here is the updated config file and Snakefile after these changes :caption: config.yml ``` - ```{literalinclude} step7/Snakefile :language: python :linenos: @@ -476,7 +471,7 @@ However, we will first need to add some additional information to our config fil :lineno-match: ``` -The above is standard boilerplate for any BIDS app. You can also define any new command-line arguments you wish. Snakebids uses the {mod}`argparse` module, and each entry in this `parse_args` dict thus becomes a call to {meth}`add_argument() ` from {class}`argparse.ArgumentParser`. When you run the workflow, snakebids adds the named argument values to the config dict, so your workflow can make use of it as if you had manually added the variable to your configfile. +The above is standard boilerplate for any BIDS app. You can also define any new command-line arguments you wish. Snakebids uses the {mod}`argparse` module, and each entry in this `parse_args` dict thus becomes a call to {meth}`add_argument() ` from {class}`argparse.ArgumentParser`. When you run the workflow, snakebids adds the named argument values to the config dict, so your workflow can make use of it as if you had manually added the variable to your configfile. Arguments that will receive paths should be given the item `type: Path`, as is done for `--derivatives` in the example above. Without this annotation, paths given to keyword arguments will be interpreted relative to the output directory. Indicating `type: Path` will tell Snakebids to first resolve the path according to your current working directory. diff --git a/pyproject.toml b/pyproject.toml index 5c82721a..09e1676d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "snakebids" version = "0.0.0" description = "BIDS integration into snakemake workflows" readme = "README.md" -repository = "https://github.com/akhanf/snakebids" +repository = "https://github.com/khanlab/snakebids" documentation = "https://snakebids.readthedocs.io/" authors = [ "Ali Khan ", diff --git a/snakebids/project_template/tests/data/dataset_description.json b/snakebids/project_template/tests/data/dataset_description.json index cec4e821..42b8aa44 100644 --- a/snakebids/project_template/tests/data/dataset_description.json +++ b/snakebids/project_template/tests/data/dataset_description.json @@ -1,18 +1,13 @@ { - "Acknowledgements": "Snakebids Development Team", - "Authors": [ - "Peter Van Dyken", - "Ali Khan", - "Tristan Kuehn", - "Jason Kai" - ], - "DatasetType": "raw", - "BIDSVersion": "1.4.1", - "HowToAcknowledge": "https://zenodo.org/record/8274278", - "License": "PDLL", - "Name": "Snakebids Test T1w Dataset (empty files)", - "ReferencesAndLinks": [ - "https://github.com/akhanf/snakebids", - "List of papers or websites" - ] - } + "Acknowledgements": "Snakebids Development Team", + "Authors": ["Peter Van Dyken", "Ali Khan", "Tristan Kuehn", "Jason Kai"], + "DatasetType": "raw", + "BIDSVersion": "1.4.1", + "HowToAcknowledge": "https://zenodo.org/record/8274278", + "License": "PDLL", + "Name": "Snakebids Test T1w Dataset (empty files)", + "ReferencesAndLinks": [ + "https://github.com/khanlab/snakebids", + "List of papers or websites" + ] +}