From 2aadd74c0f8f8c0c13e6fb5030e65d1aad9470b5 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Fri, 21 Jun 2024 15:09:20 +0200
Subject: [PATCH 01/23] Update operational code (#63)

* update readme

* switch ci to toolbox

* update to viash 0.9.0-RC6

* edit keywords

* fix version
---
 .github/workflows/test.yaml | 111 +-----------------------------------
 README.md                   |  30 +++++++---
 README.qmd                  |  25 +++++---
 _viash.yaml                 |   4 +-
 4 files changed, 43 insertions(+), 127 deletions(-)
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 6e1fc4b3..2591978f 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -3,114 +3,7 @@ name: Component Testing
 on:
   pull_request:
   push:
-    branches: [ '**' ]
 
 jobs:
-  run_ci_check_job:
-    runs-on: ubuntu-latest
-    outputs:
-      run_ci: ${{ steps.github_cli.outputs.check }}
-    steps:
-      - name: 'Check if branch has an existing pull request and the trigger was a push'
-        id: github_cli
-        run: |
-          pull_request=$(gh pr list -R ${{ github.repository }} -H ${{ github.ref_name }} --json url --state open --limit 1 | jq '.[0].url')
-          # If the branch has a PR and this run was triggered by a push event, do not run
-          if [[ "$pull_request" != "null" && "$GITHUB_REF_NAME" != "main" && "${{ github.event_name == 'push' }}" == "true" && "${{ !contains(github.event.head_commit.message, 'ci force') }}" == "true" ]]; then
-            echo "check=false" >> $GITHUB_OUTPUT
-          else
-            echo "check=true" >> $GITHUB_OUTPUT
-          fi
-        env:
-           GH_TOKEN: ${{ github.token }}
-
-  # phase 1
-  list:
-    needs: run_ci_check_job
-    runs-on: ubuntu-latest
-    if: ${{ needs.run_ci_check_job.outputs.run_ci == 'true' }}
-
-    outputs:
-      matrix: ${{ steps.set_matrix.outputs.matrix }}
-
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        
-    - name: Get head git commit message
-      id: get_head_commit_message
-      run: echo "HEAD_COMMIT_MESSAGE=$(git show -s --format=%s ${{ github.event.pull_request.head.sha || github.sha }})" >> "$GITHUB_OUTPUT"
-
-    - uses: viash-io/viash-actions/setup@v5
-
-    - name: Check if all config can be parsed if there is no unicode support
-      run: |
-        LANG=C viash ns list > /dev/null
-
-    # see https://github.com/viash-io/viash/issues/654
-    # and https://github.com/viash-io/viash-actions/pull/27
-    # - name: Get changed files
-    #   id: changed-files
-    #   uses: tj-actions/changed-files@v42
-    #   with:
-    #     separator: ";"
-    #     diff_relative: true
-    # - id: ns_list
-    #   uses: viash-io/viash-actions/ns-list@v5
-    #   with:
-    #     platform: docker
-    #     format: json
-    #     query: ^(?!workflows)
-    # - id: ns_list_filtered
-    #   uses: viash-io/viash-actions/project/detect-changed-components@v5
-    #   with:
-    #     input_file: "${{ steps.ns_list.outputs.output_file }}"
-    # - id: set_matrix
-    #   run: |
-    #     echo "matrix=$(jq -c '[ .[] | 
-    #       { 
-    #         "name": (.functionality.namespace + "/" + .functionality.name),
-    #         "config": .info.config,
-    #         "dir": .info.config | capture("^(?<dir>.*\/)").dir
-    #       }
-    #     ]' ${{ contains(steps.get_head_commit_message.outputs.HEAD_COMMIT_MESSAGE, 'ci force') && steps.ns_list.outputs.output_file || steps.ns_list_filtered.outputs.output_file }} )" >> $GITHUB_OUTPUT
-
-
-    - id: set_matrix
-      run: |
-        viash ns list --format json > ns_list.json
-        echo "matrix=$(jq -c '[ .[] | 
-          { 
-            "name": (.namespace + "/" + .name),
-            "config": .build_info.config,
-            "dir": .build_info.config | capture("^(?<dir>.*\/)").dir
-          }
-        ]' ns_list.json )" >> $GITHUB_OUTPUT
-
-  # phase 2
-  viash_test:
-    needs: list
-    if: ${{ needs.list.outputs.matrix != '[]' && needs.list.outputs.matrix != '' }}
-    runs-on: ubuntu-latest
-
-    strategy:
-      fail-fast: false
-      matrix:
-        component: ${{ fromJson(needs.list.outputs.matrix) }}
-
-    steps:
-    # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.'
-    - uses: data-intuitive/reclaim-the-bytes@v2
-
-    - uses: actions/checkout@v4
-
-    - uses: viash-io/viash-actions/setup@v5
-
-    - name: Run test
-      timeout-minutes: 30
-      run: |
-        viash test \
-          "${{ matrix.component.config }}" \
-          --cpus 2 \
-          --memory "6gb"
\ No newline at end of file
+  test:
+    uses: viash-hub/toolbox/.github/workflows/test.yaml@main
\ No newline at end of file
diff --git a/README.md b/README.md
index ecf807ca..29ace00b 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,24 @@
-# Base repository for reusable Viash components
 
 
-This repository is a collection of reproducible and reusable Viash
-components.
+# biobox
+
+[![ViashHub](https://img.shields.io/badge/ViashHub-biobox-7a4baa.png)](https://web.viash-hub.com/packages/biobox)
+[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2Fbiobox-blue.png)](https://github.com/viash-hub/biobbox)
+[![GitHub
+License](https://img.shields.io/github/license/viash-hub/biobox.png)](https://github.com/viash-hub/biobbox/blob/main/LICENSE)
+[![GitHub
+Issues](https://img.shields.io/github/issues/viash-hub/biobox.png)](https://github.com/viash-hub/biobox/issues)
+[![Viash
+version](https://img.shields.io/badge/Viash-v0.9.0--RC6-blue)](https://viash.io)
+
+A collection of bioinformatics tools for working with sequence data.
 
 ## Objectives
 
 - **Reusability**: Facilitating the use of components across various
   projects and contexts.
-- **Reproducibility**: Guaranteeing that bioinformatics analyses can be
-  reliably replicated.
+- **Reproducibility**: Ensuring that components are reproducible and can
+  be easily shared.
 - **Best Practices**: Adhering to established standards in software
   development and bioinformatics.
 
@@ -43,18 +52,21 @@ contribute a component to this repository.
 12. Create test script
 13. Create a `/var/software_versions.txt` file
 
-See the [CONTRIBUTING](CONTRIBUTING.md) file for more details.
+See the
+[CONTRIBUTING](https://github.com/viash-hub/biobbox/blob/main/CONTRIBUTING.md)
+file for more details.
 
 ## Support and Community
 
 For support, questions, or to join our community:
 
 - **Issues**: Submit questions or issues via the [GitHub issue
-  tracker](https://github.com/viash-hub/base/issues).
+  tracker](https://github.com/viash-hub/biobox/issues).
 - **Discussions**: Join our discussions via [GitHub
-  Discussions](https://github.com/viash-hub/base/discussions).
+  Discussions](https://github.com/viash-hub/biobbox/discussions).
 
 ## License
 
 This repository is licensed under an MIT license. See the
-[LICENSE](LICENSE) file for details.
+[LICENSE](https://github.com/viash-hub/biobbox/blob/main/LICENSE) file
+for details.
diff --git a/README.qmd b/README.qmd
index 656cdac7..33e9461b 100644
--- a/README.qmd
+++ b/README.qmd
@@ -1,14 +1,25 @@
 ---
-title: Base repository for reusable Viash components
 format: gfm
 ---
+```{r setup, include=FALSE}
+project <- yaml::read_yaml("_viash.yaml")
+license <- paste0(project$links$repository, "/blob/main/LICENSE")
+contributing <- paste0(project$links$repository, "/blob/main/CONTRIBUTING.md")
+```
+# `r project$name`
+
+[![ViashHub](https://img.shields.io/badge/ViashHub-`r project$name`-7a4baa)](https://web.viash-hub.com/packages/`r project$name`) 
+[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2F`r project$name`-blue)](`r project$links$repository`) 
+[![GitHub License](https://img.shields.io/github/license/viash-hub/`r project$name`)](`r license`) 
+[![GitHub Issues](https://img.shields.io/github/issues/viash-hub/`r project$name`)](`r project$links$issue_tracker`) 
+[![Viash version](https://img.shields.io/badge/Viash-v`r gsub("-", "--", project$viash_version)`-blue)](https://viash.io)
 
-This repository is a collection of reproducible and reusable Viash components.
+`r project$description`
 
 ## Objectives
 
 - **Reusability**: Facilitating the use of components across various projects and contexts.
-- **Reproducibility**: Guaranteeing that bioinformatics analyses can be reliably replicated.
+- **Reproducibility**: Ensuring that components are reproducible and can be easily shared.
 - **Best Practices**: Adhering to established standards in software development and bioinformatics.
 
 ## Contributing
@@ -37,15 +48,15 @@ knitr::asis_output(
 )
 ```
 
-See the [CONTRIBUTING](CONTRIBUTING.md) file for more details.
+See the [CONTRIBUTING](`r contributing`) file for more details.
 
 
 ## Support and Community
 
 For support, questions, or to join our community:
 
-- **Issues**: Submit questions or issues via the [GitHub issue tracker](https://github.com/viash-hub/base/issues).
-- **Discussions**: Join our discussions via [GitHub Discussions](https://github.com/viash-hub/base/discussions).
+- **Issues**: Submit questions or issues via the [GitHub issue tracker](`r project$links$issue_tracker`).
+- **Discussions**: Join our discussions via [GitHub Discussions](`r project$links$repository`/discussions).
 
 ## License
-This repository is licensed under an MIT license. See the [LICENSE](LICENSE) file for details.
+This repository is licensed under an MIT license. See the [LICENSE](`r license`) file for details.
diff --git a/_viash.yaml b/_viash.yaml
index 5b6cf3f7..12b81586 100644
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -2,12 +2,12 @@ name: biobox
 description: |
   A collection of bioinformatics tools for working with sequence data.
 license: MIT
-keywords: [bioinformatics, sequence, alignment, variant calling, dna, rna]
+keywords: [bioinformatics, modules, sequencing]
 links:
   issue_tracker: https://github.com/viash-hub/biobox/issues
   repository: https://github.com/viash-hub/biobbox
 
-viash_version: 0.9.0-RC3
+viash_version: 0.9.0-RC6
 
 config_mods: |
   .requirements.commands := ['ps']

From 069720a81e5e188f2c20078556f2393a8ef65e92 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Fri, 21 Jun 2024 17:44:47 +0200
Subject: [PATCH 02/23] update biobox

---
 CHANGELOG.md | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 99c859d2..973a0699 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-# base unreleased
+# biobox unreleased
 
 ## BREAKING CHANGES
 
@@ -51,8 +51,6 @@
 * `falco`: A C++ drop-in replacement of FastQC to assess the quality of sequence read data (PR #43).
 
 
-## MAJOR CHANGES
-
 ## MINOR CHANGES
 
 * Uniformize component metadata (PR #23).
@@ -61,8 +59,14 @@
 
 * Update to Viash 0.9.0-RC3 (PR #51).
 
+* Update to Viash 0.9.0-RC6 (PR #63).
+
+* Switch to viash-hub/toolbox actions (PR #64).
+
 ## DOCUMENTATION
 
+* Update README (PR #64).
+
 ## BUG FIXES
 
 * Add escaping character before leading hashtag in the description field of the config file (PR #50).

From ce9b8456e329aa152acb81530db9fd7d6f0c5cbf Mon Sep 17 00:00:00 2001
From: Toni Verbeiren <toni.verbeiren@gmail.com>
Date: Fri, 21 Jun 2024 18:03:21 +0200
Subject: [PATCH 03/23] cutadapt (#7)

* First commit, clone of cutadapt in htrnaseq + help.txt

* Add config

* Don't allow multiple: true when providing a FASTA file with adapters

* First version of script

* Updates and fixes - se/pe

* Add tests and fix --json argument

* Add software version

* Better consistency in using snake_case

* Update src/cutadapt/config.vsh.yaml

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>

* Update src/cutadapt/config.vsh.yaml

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>

* Update src/cutadapt/config.vsh.yaml

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>

* Specify --input and --input_r2 as separate arguments

* Avoid specifying default arg values

* Add more information to `--minimum_length` and `maximum_length`

* Add --cpus by means of $meta_cpus and set proper default

* Allow multiple for adapters/fasta and add test

* change multiple_sep to ';'

* add example

* simplify code with a helper function

* create directories in test

* use a different output extension if --fasta is provided

* decrease code duplication by separating optional outputs from paired/unpaired output arguments

* write custom tests for cutadapt

* fix _r2 arguments

* add debug flag as not to always print the cli command

* remove comment

* Update to Viash 0.9.0-RC4

* Ability to specify output globbing patterns

* Avoid the need for both output_dir and output

* Move fields from `info` to `links`

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>

* Move references back to the info field

* apologies, I proposed a wrong syntax

---------

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>
---
 src/cutadapt/config.vsh.yaml | 463 +++++++++++++++++++++++++++++++++++
 src/cutadapt/help.txt        | 218 +++++++++++++++++
 src/cutadapt/script.sh       | 237 ++++++++++++++++++
 src/cutadapt/test.sh         | 256 +++++++++++++++++++
 4 files changed, 1174 insertions(+)
 create mode 100644 src/cutadapt/config.vsh.yaml
 create mode 100644 src/cutadapt/help.txt
 create mode 100644 src/cutadapt/script.sh
 create mode 100644 src/cutadapt/test.sh

diff --git a/src/cutadapt/config.vsh.yaml b/src/cutadapt/config.vsh.yaml
new file mode 100644
index 00000000..a62f0aa9
--- /dev/null
+++ b/src/cutadapt/config.vsh.yaml
@@ -0,0 +1,463 @@
+name: cutadapt
+description: |
+  Cutadapt removes adapter sequences from high-throughput sequencing reads.
+keywords: [RNA-seq, scRNA-seq, high-throughput]
+links:
+  homepage: https://cutadapt.readthedocs.io
+  documentation: https://cutadapt.readthedocs.io
+  repository: https://github.com/marcelm/cutadapt
+references:
+  doi: 10.14806/ej.17.1.200
+license: MIT
+argument_groups:
+  ####################################################################
+  - name: Specify Adapters for R1
+    arguments:
+      - name: --adapter
+        alternatives: [-a]
+        type: string
+        multiple: true
+        description: |
+          Sequence of an adapter ligated to the 3' end (paired data:
+          of the first read). The adapter and subsequent bases are
+          trimmed. If a '$' character is appended ('anchoring'), the
+          adapter is only found if it is a suffix of the read.
+        required: false
+      - name: --front
+        alternatives: [-g]
+        type: string
+        multiple: true
+        description: |
+          Sequence of an adapter ligated to the 5' end (paired data:
+          of the first read). The adapter and any preceding bases
+          are trimmed. Partial matches at the 5' end are allowed. If
+          a '^' character is prepended ('anchoring'), the adapter is
+          only found if it is a prefix of the read.
+        required: false
+      - name: --anywhere
+        alternatives: [-b]
+        type: string
+        multiple: true
+        description: |
+          Sequence of an adapter that may be ligated to the 5' or 3'
+          end (paired data: of the first read). Both types of
+          matches as described under -a and -g are allowed. If the
+          first base of the read is part of the match, the behavior
+          is as with -g, otherwise as with -a. This option is mostly
+          for rescuing failed library preparations - do not use if
+          you know which end your adapter was ligated to!
+        required: false
+
+  ####################################################################
+  - name: Specify Adapters using Fasta files for R1
+    arguments:
+      - name: --adapter_fasta
+        type: file
+        multiple: true
+        description: |
+          Fasta file containing sequences of an adapter ligated to the 3' end (paired data:
+          of the first read). The adapter and subsequent bases are
+          trimmed. If a '$' character is appended ('anchoring'), the
+          adapter is only found if it is a suffix of the read.
+        required: false
+      - name: --front_fasta
+        type: file
+        description: |
+          Fasta file containing sequences of an adapter ligated to the 5' end (paired data:
+          of the first read). The adapter and any preceding bases
+          are trimmed. Partial matches at the 5' end are allowed. If
+          a '^' character is prepended ('anchoring'), the adapter is
+          only found if it is a prefix of the read.
+        required: false
+      - name: --anywhere_fasta
+        type: file
+        description: |
+          Fasta file containing sequences of an adapter that may be ligated to the 5' or 3'
+          end (paired data: of the first read). Both types of
+          matches as described under -a and -g are allowed. If the
+          first base of the read is part of the match, the behavior
+          is as with -g, otherwise as with -a. This option is mostly
+          for rescuing failed library preparations - do not use if
+          you know which end your adapter was ligated to!
+        required: false
+
+  ####################################################################
+  - name: Specify Adapters for R2
+    arguments:
+      - name: --adapter_r2
+        alternatives: [-A]
+        type: string
+        multiple: true
+        description: |
+          Sequence of an adapter ligated to the 3' end (paired data:
+          of the first read). The adapter and subsequent bases are
+          trimmed. If a '$' character is appended ('anchoring'), the
+          adapter is only found if it is a suffix of the read.
+        required: false
+      - name: --front_r2
+        alternatives: [-G]
+        type: string
+        multiple: true
+        description: |
+          Sequence of an adapter ligated to the 5' end (paired data:
+          of the first read). The adapter and any preceding bases
+          are trimmed. Partial matches at the 5' end are allowed. If
+          a '^' character is prepended ('anchoring'), the adapter is
+          only found if it is a prefix of the read.
+        required: false
+      - name: --anywhere_r2
+        alternatives: [-B]
+        type: string
+        multiple: true
+        description: |
+          Sequence of an adapter that may be ligated to the 5' or 3'
+          end (paired data: of the first read). Both types of
+          matches as described under -a and -g are allowed. If the
+          first base of the read is part of the match, the behavior
+          is as with -g, otherwise as with -a. This option is mostly
+          for rescuing failed library preparations - do not use if
+          you know which end your adapter was ligated to!
+        required: false
+
+  ####################################################################
+  - name: Specify Adapters using Fasta files for R2
+    arguments:
+      - name: --adapter_r2_fasta
+        type: file
+        description: |
+          Fasta file containing sequences of an adapter ligated to the 3' end (paired data:
+          of the first read). The adapter and subsequent bases are
+          trimmed. If a '$' character is appended ('anchoring'), the
+          adapter is only found if it is a suffix of the read.
+        required: false
+      - name: --front_r2_fasta
+        type: file
+        description: |
+          Fasta file containing sequences of an adapter ligated to the 5' end (paired data:
+          of the first read). The adapter and any preceding bases
+          are trimmed. Partial matches at the 5' end are allowed. If
+          a '^' character is prepended ('anchoring'), the adapter is
+          only found if it is a prefix of the read.
+        required: false
+      - name: --anywhere_r2_fasta
+        type: file
+        description: |
+          Fasta file containing sequences of an adapter that may be ligated to the 5' or 3'
+          end (paired data: of the first read). Both types of
+          matches as described under -a and -g are allowed. If the
+          first base of the read is part of the match, the behavior
+          is as with -g, otherwise as with -a. This option is mostly
+          for rescuing failed library preparations - do not use if
+          you know which end your adapter was ligated to!
+        required: false
+
+  ####################################################################
+  - name: Paired-end options
+    arguments:
+      - name: --pair_adapters
+        type: boolean_true
+        description: |
+          Treat adapters given with -a/-A etc. as pairs. Either both
+          or none are removed from each read pair.
+      - name: --pair_filter
+        type: string
+        choices: [any, both, first]
+        description: |
+          Which of the reads in a paired-end read have to match the
+          filtering criterion in order for the pair to be filtered.
+      - name: --interleaved
+        type: boolean_true
+        description: |
+          Read and/or write interleaved paired-end reads.
+
+  ####################################################################
+  - name: Input parameters
+    arguments:
+      - name: --input
+        type: file
+        required: true
+        description: |
+          Input fastq file for single-end reads or R1 for paired-end reads.
+      - name: --input_r2
+        type: file
+        required: false
+        description: |
+          Input fastq file for R2 in the case of paired-end reads.
+      - name: --error_rate
+        alternatives: [-E, --errors]
+        type: double
+        description: |
+          Maximum allowed error rate (if 0 <= E < 1), or absolute
+          number of errors for full-length adapter match (if E is an
+          integer >= 1). Error rate = no. of errors divided by
+          length of matching region. Default: 0.1 (10%).
+        example: 0.1
+      - name: --no_indels
+        type: boolean_false
+        description: |
+          Allow only mismatches in alignments.
+
+      - name: --times
+        type: integer
+        alternatives: [-n]
+        description: |
+          Remove up to COUNT adapters from each read. Default: 1.
+        example: 1
+      - name: --overlap
+        alternatives: [-O]
+        type: integer
+        description: |
+          Require MINLENGTH overlap between read and adapter for an
+          adapter to be found. The default is 3.
+        example: 3
+      - name: --match_read_wildcards
+        type: boolean_true
+        description: |
+          Interpret IUPAC wildcards in reads.
+      - name: --no_match_adapter_wildcards
+        type: boolean_false
+        description: |
+          Do not interpret IUPAC wildcards in adapters.
+      - name: --action
+        type: string
+        choices:
+          - trim
+          - retain
+          - mask
+          - lowercase
+          - none
+        description: |
+          What to do if a match was found. trim: trim adapter and
+          up- or downstream sequence; retain: trim, but retain
+          adapter; mask: replace with 'N' characters; lowercase:
+          convert to lowercase; none: leave unchanged.
+          The default is trim.
+        example: trim
+      - name: --revcomp
+        alternatives: [--rc]
+        type: boolean_true
+        description: |
+          Check both the read and its reverse complement for adapter
+          matches. If match is on reverse-complemented version,
+          output that one.
+
+  ####################################################################
+  - name: Read modifications
+    arguments:
+      - name: --cut
+        alternatives: [-u]
+        type: integer
+        multiple: true
+        description: |
+          Remove LEN bases from each read (or R1 if paired; use --cut_r2
+          option for R2). If LEN is positive, remove bases from the
+          beginning. If LEN is negative, remove bases from the end.
+          Can be used twice if LENs have different signs. Applied
+          *before* adapter trimming.
+      - name: --cut_r2
+        type: integer
+        multiple: true
+        description: |
+          Remove LEN bases from each read (for R2). If LEN is positive, remove bases from the
+          beginning. If LEN is negative, remove bases from the end.
+          Can be used twice if LENs have different signs. Applied
+          *before* adapter trimming.
+      - name: --nextseq_trim
+        type: string
+        description: |
+          NextSeq-specific quality trimming (each read). Trims also
+          dark cycles appearing as high-quality G bases.
+      - name: --quality_cutoff
+        alternatives: [-q]
+        type: string
+        description: |
+          Trim low-quality bases from 5' and/or 3' ends of each read
+          before adapter removal. Applied to both reads if data is
+          paired. If one value is given, only the 3' end is trimmed.
+          If two comma-separated cutoffs are given, the 5' end is
+          trimmed with the first cutoff, the 3' end with the second.
+      - name: --quality_cutoff_r2
+        alternatives: [-Q]
+        type: string
+        description: |
+          Quality-trimming cutoff for R2. Default: same as for R1
+      - name: --quality_base
+        type: integer
+        description: |
+          Assume that quality values in FASTQ are encoded as
+          ascii(quality + N). This needs to be set to 64 for some
+          old Illumina FASTQ files. The default is 33.
+        example: 33
+      - name: --poly_a
+        type: boolean_true
+        description: Trim poly-A tails
+      - name: --length
+        alternatives: [-l]
+        type: integer
+        description: |
+          Shorten reads to LENGTH. Positive values remove bases at
+          the end while negative ones remove bases at the beginning.
+          This and the following modifications are applied after
+          adapter trimming.
+      - name: --trim_n
+        type: boolean_true
+        description: Trim N's on ends of reads.
+      - name: --length_tag
+        type: string
+        description: |
+          Search for TAG followed by a decimal number in the
+          description field of the read. Replace the decimal number
+          with the correct length of the trimmed read. For example,
+          use --length-tag 'length=' to correct fields like
+          'length=123'.
+        example: "length="
+      - name: --strip_suffix
+        type: string
+        description: |
+          Remove this suffix from read names if present. Can be
+          given multiple times.
+      - name: --prefix
+        alternatives: [-x]
+        type: string
+        description: |
+          Add this prefix to read names. Use {name} to insert the
+          name of the matching adapter.
+      - name: --suffix
+        alternatives: [-y]
+        type: string
+        description: |
+          Add this suffix to read names; can also include {name}
+      - name: --rename
+        type: string
+        description: |
+          Rename reads using TEMPLATE containing variables such as
+          {id}, {adapter_name} etc. (see documentation)
+      - name: --zero_cap
+        alternatives: [-z]
+        type: boolean_true
+        description: Change negative quality values to zero.
+
+  ####################################################################
+  - name: Filtering of processed reads
+    description: |
+      Filters are applied after above read modifications. Paired-end reads are
+      always discarded pairwise (see also --pair_filter).
+    arguments:
+      - name: --minimum_length
+        alternatives: [-m]
+        type: string
+        description: |
+          Discard reads shorter than LEN. Default is 0.
+          When trimming paired-end reads, the minimum lengths for R1 and R2 can be specified separately by separating them with a colon (:).
+          If the colon syntax is not used, the same minimum length applies to both reads, as discussed above.
+          Also, one of the values can be omitted to impose no restrictions.
+          For example, with -m 17:, the length of R1 must be at least 17, but the length of R2 is ignored.
+        example: "0"
+      - name: --maximum_length
+        alternatives: [-M]
+        type: string
+        description: |
+          Discard reads longer than LEN. Default: no limit.
+          For paired reads, see the remark for --minimum_length
+      - name: --max_n
+        type: string
+        description: |
+          Discard reads with more than COUNT 'N' bases. If COUNT is
+          a number between 0 and 1, it is interpreted as a fraction
+          of the read length.
+      - name: --max_expected_errors
+        alternatives: [--max_ee]
+        type: long
+        description: |
+          Discard reads whose expected number of errors (computed
+          from quality values) exceeds ERRORS.
+      - name: --max_average_error_rate
+        alternatives: [--max_aer]
+        type: long
+        description: |
+          as --max_expected_errors (see above), but divided by
+          length to account for reads of varying length.
+      - name: --discard_trimmed
+        alternatives: [--discard]
+        type: boolean_true
+        description: |
+          Discard reads that contain an adapter. Use also -O to
+          avoid discarding too many randomly matching reads.
+      - name: --discard_untrimmed
+        alternatives: [--trimmed_only]
+        type: boolean_true
+        description: |
+          Discard reads that do not contain an adapter.
+      - name: --discard_casava
+        type: boolean_true
+        description: |
+          Discard reads that did not pass CASAVA filtering (header
+          has :Y:).
+
+  ####################################################################
+  - name: Output parameters
+    arguments:
+      - name: --report
+        type: string
+        choices: [full, minimal]
+        description: |
+          Which type of report to print: 'full' (default) or 'minimal'.
+        example: full
+      - name: --json
+        type: boolean_true
+        description: |
+          Write report in JSON format to this file.
+      - name: --output
+        type: file
+        description: |
+          Glob pattern for matching the expected output files.
+          Should include `$output_dir`.
+        example: "fastq/*_001.fast[a,q]"
+        direction: output
+        required: true
+        must_exist: true
+        multiple: true
+      - name: --fasta
+        type: boolean_true
+        description: |
+          Output FASTA to standard output even on FASTQ input.
+      - name: --info_file
+        type: boolean_true
+        description: |
+          Write information about each read and its adapter matches
+          into info.txt in the output directory.
+          See the documentation for the file format.
+      # - name: -Z
+      # - name: --rest_file
+      # - name: --wildcard-file
+      # - name: --too_short_output
+      # - name: --too_long_output
+      # - name: --untrimmed_output
+      # - name: --untrimmed_paired_output
+      # - name: too_short_paired_output
+      # - name: too_long_paired_output
+  - name: Debug
+    arguments:
+      - type: boolean_true
+        name: --debug
+        description: Print debug information
+resources:
+  - type: bash_script
+    path: script.sh
+test_resources:
+  - type: bash_script
+    path: test.sh
+
+engines:
+  - type: docker
+    image: python:3.12
+    setup:
+      - type: python
+        pip:
+          - cutadapt
+      - type: docker
+        run: |
+          cutadapt --version | sed 's/\(.*\)/cutadapt: "\1"/' > /var/software_versions.txt
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/cutadapt/help.txt b/src/cutadapt/help.txt
new file mode 100644
index 00000000..2280c3e2
--- /dev/null
+++ b/src/cutadapt/help.txt
@@ -0,0 +1,218 @@
+cutadapt version 4.6
+
+Copyright (C) 2010 Marcel Martin <marcel.martin@scilifelab.se> and contributors
+
+Cutadapt removes adapter sequences from high-throughput sequencing reads.
+
+Usage:
+    cutadapt -a ADAPTER [options] [-o output.fastq] input.fastq
+
+For paired-end reads:
+    cutadapt -a ADAPT1 -A ADAPT2 [options] -o out1.fastq -p out2.fastq in1.fastq in2.fastq
+
+Replace "ADAPTER" with the actual sequence of your 3' adapter. IUPAC wildcard
+characters are supported. All reads from input.fastq will be written to
+output.fastq with the adapter sequence removed. Adapter matching is
+error-tolerant. Multiple adapter sequences can be given (use further -a
+options), but only the best-matching adapter will be removed.
+
+Input may also be in FASTA format. Compressed input and output is supported and
+auto-detected from the file name (.gz, .xz, .bz2). Use the file name '-' for
+standard input/output. Without the -o option, output is sent to standard output.
+
+Citation:
+
+Marcel Martin. Cutadapt removes adapter sequences from high-throughput
+sequencing reads. EMBnet.Journal, 17(1):10-12, May 2011.
+http://dx.doi.org/10.14806/ej.17.1.200
+
+Run "cutadapt --help" to see all command-line options.
+See https://cutadapt.readthedocs.io/ for full documentation.
+
+Options:
+  -h, --help            Show this help message and exit
+  --version             Show version number and exit
+  --debug               Print debug log. Use twice to also print DP matrices
+  -j CORES, --cores CORES
+                        Number of CPU cores to use. Use 0 to auto-detect. Default:
+                        1
+
+Finding adapters:
+  Parameters -a, -g, -b specify adapters to be removed from each read (or from
+  R1 if data is paired-end. If specified multiple times, only the best matching
+  adapter is trimmed (but see the --times option). Use notation 'file:FILE' to
+  read adapter sequences from a FASTA file.
+
+  -a ADAPTER, --adapter ADAPTER
+                        Sequence of an adapter ligated to the 3' end (paired data:
+                        of the first read). The adapter and subsequent bases are
+                        trimmed. If a '$' character is appended ('anchoring'), the
+                        adapter is only found if it is a suffix of the read.
+  -g ADAPTER, --front ADAPTER
+                        Sequence of an adapter ligated to the 5' end (paired data:
+                        of the first read). The adapter and any preceding bases
+                        are trimmed. Partial matches at the 5' end are allowed. If
+                        a '^' character is prepended ('anchoring'), the adapter is
+                        only found if it is a prefix of the read.
+  -b ADAPTER, --anywhere ADAPTER
+                        Sequence of an adapter that may be ligated to the 5' or 3'
+                        end (paired data: of the first read). Both types of
+                        matches as described under -a and -g are allowed. If the
+                        first base of the read is part of the match, the behavior
+                        is as with -g, otherwise as with -a. This option is mostly
+                        for rescuing failed library preparations - do not use if
+                        you know which end your adapter was ligated to!
+  -e E, --error-rate E, --errors E
+                        Maximum allowed error rate (if 0 <= E < 1), or absolute
+                        number of errors for full-length adapter match (if E is an
+                        integer >= 1). Error rate = no. of errors divided by
+                        length of matching region. Default: 0.1 (10%)
+  --no-indels           Allow only mismatches in alignments. Default: allow both
+                        mismatches and indels
+  -n COUNT, --times COUNT
+                        Remove up to COUNT adapters from each read. Default: 1
+  -O MINLENGTH, --overlap MINLENGTH
+                        Require MINLENGTH overlap between read and adapter for an
+                        adapter to be found. Default: 3
+  --match-read-wildcards
+                        Interpret IUPAC wildcards in reads. Default: False
+  -N, --no-match-adapter-wildcards
+                        Do not interpret IUPAC wildcards in adapters.
+  --action {trim,retain,mask,lowercase,none}
+                        What to do if a match was found. trim: trim adapter and
+                        up- or downstream sequence; retain: trim, but retain
+                        adapter; mask: replace with 'N' characters; lowercase:
+                        convert to lowercase; none: leave unchanged. Default: trim
+  --rc, --revcomp       Check both the read and its reverse complement for adapter
+                        matches. If match is on reverse-complemented version,
+                        output that one. Default: check only read
+
+Additional read modifications:
+  -u LEN, --cut LEN     Remove LEN bases from each read (or R1 if paired; use -U
+                        option for R2). If LEN is positive, remove bases from the
+                        beginning. If LEN is negative, remove bases from the end.
+                        Can be used twice if LENs have different signs. Applied
+                        *before* adapter trimming.
+  --nextseq-trim 3'CUTOFF
+                        NextSeq-specific quality trimming (each read). Trims also
+                        dark cycles appearing as high-quality G bases.
+  -q [5'CUTOFF,]3'CUTOFF, --quality-cutoff [5'CUTOFF,]3'CUTOFF
+                        Trim low-quality bases from 5' and/or 3' ends of each read
+                        before adapter removal. Applied to both reads if data is
+                        paired. If one value is given, only the 3' end is trimmed.
+                        If two comma-separated cutoffs are given, the 5' end is
+                        trimmed with the first cutoff, the 3' end with the second.
+  --quality-base N      Assume that quality values in FASTQ are encoded as
+                        ascii(quality + N). This needs to be set to 64 for some
+                        old Illumina FASTQ files. Default: 33
+  --poly-a              Trim poly-A tails
+  --length LENGTH, -l LENGTH
+                        Shorten reads to LENGTH. Positive values remove bases at
+                        the end while negative ones remove bases at the beginning.
+                        This and the following modifications are applied after
+                        adapter trimming.
+  --trim-n              Trim N's on ends of reads.
+  --length-tag TAG      Search for TAG followed by a decimal number in the
+                        description field of the read. Replace the decimal number
+                        with the correct length of the trimmed read. For example,
+                        use --length-tag 'length=' to correct fields like
+                        'length=123'.
+  --strip-suffix STRIP_SUFFIX
+                        Remove this suffix from read names if present. Can be
+                        given multiple times.
+  -x PREFIX, --prefix PREFIX
+                        Add this prefix to read names. Use {name} to insert the
+                        name of the matching adapter.
+  -y SUFFIX, --suffix SUFFIX
+                        Add this suffix to read names; can also include {name}
+  --rename TEMPLATE     Rename reads using TEMPLATE containing variables such as
+                        {id}, {adapter_name} etc. (see documentation)
+  --zero-cap, -z        Change negative quality values to zero.
+
+Filtering of processed reads:
+  Filters are applied after above read modifications. Paired-end reads are
+  always discarded pairwise (see also --pair-filter).
+
+  -m LEN[:LEN2], --minimum-length LEN[:LEN2]
+                        Discard reads shorter than LEN. Default: 0
+  -M LEN[:LEN2], --maximum-length LEN[:LEN2]
+                        Discard reads longer than LEN. Default: no limit
+  --max-n COUNT         Discard reads with more than COUNT 'N' bases. If COUNT is
+                        a number between 0 and 1, it is interpreted as a fraction
+                        of the read length.
+  --max-expected-errors ERRORS, --max-ee ERRORS
+                        Discard reads whose expected number of errors (computed
+                        from quality values) exceeds ERRORS.
+  --max-average-error-rate ERROR_RATE, --max-aer ERROR_RATE
+                        as --max-expected-errors (see above), but divided by
+                        length to account for reads of varying length.
+  --discard-trimmed, --discard
+                        Discard reads that contain an adapter. Use also -O to
+                        avoid discarding too many randomly matching reads.
+  --discard-untrimmed, --trimmed-only
+                        Discard reads that do not contain an adapter.
+  --discard-casava      Discard reads that did not pass CASAVA filtering (header
+                        has :Y:).
+
+Output:
+  --quiet               Print only error messages.
+  --report {full,minimal}
+                        Which type of report to print: 'full' or 'minimal'.
+                        Default: full
+  --json FILE           Dump report in JSON format to FILE
+  -o FILE, --output FILE
+                        Write trimmed reads to FILE. FASTQ or FASTA format is
+                        chosen depending on input. Summary report is sent to
+                        standard output. Use '{name}' for demultiplexing (see
+                        docs). Default: write to standard output
+  --fasta               Output FASTA to standard output even on FASTQ input.
+  -Z                    Use compression level 1 for gzipped output files (faster,
+                        but uses more space)
+  --info-file FILE      Write information about each read and its adapter matches
+                        into FILE. See the documentation for the file format.
+  -r FILE, --rest-file FILE
+                        When the adapter matches in the middle of a read, write
+                        the rest (after the adapter) to FILE.
+  --wildcard-file FILE  When the adapter has N wildcard bases, write adapter bases
+                        matching wildcard positions to FILE. (Inaccurate with
+                        indels.)
+  --too-short-output FILE
+                        Write reads that are too short (according to length
+                        specified by -m) to FILE. Default: discard reads
+  --too-long-output FILE
+                        Write reads that are too long (according to length
+                        specified by -M) to FILE. Default: discard reads
+  --untrimmed-output FILE
+                        Write reads that do not contain any adapter to FILE.
+                        Default: output to same file as trimmed reads
+
+Paired-end options:
+  The -A/-G/-B/-U/-Q options work like their lowercase counterparts, but are
+  applied to R2 (second read in pair)
+
+  -A ADAPTER            3' adapter to be removed from R2
+  -G ADAPTER            5' adapter to be removed from R2
+  -B ADAPTER            5'/3 adapter to be removed from R2
+  -U LENGTH             Remove LENGTH bases from R2
+  -Q [5'CUTOFF,]3'CUTOFF
+                        Quality-trimming cutoff for R2. Default: same as for R1
+  -p FILE, --paired-output FILE
+                        Write R2 to FILE.
+  --pair-adapters       Treat adapters given with -a/-A etc. as pairs. Either both
+                        or none are removed from each read pair.
+  --pair-filter {any,both,first}
+                        Which of the reads in a paired-end read have to match the
+                        filtering criterion in order for the pair to be filtered.
+                        Default: any
+  --interleaved         Read and/or write interleaved paired-end reads.
+  --untrimmed-paired-output FILE
+                        Write second read in a pair to this FILE when no adapter
+                        was found. Use with --untrimmed-output. Default: output to
+                        same file as trimmed reads
+  --too-short-paired-output FILE
+                        Write second read in a pair to this file if pair is too
+                        short.
+  --too-long-paired-output FILE
+                        Write second read in a pair to this file if pair is too
+                        long.
+
diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh
new file mode 100644
index 00000000..5e1f9e30
--- /dev/null
+++ b/src/cutadapt/script.sh
@@ -0,0 +1,237 @@
+#!/bin/bash
+
+## VIASH START
+par_adapter='AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC;GGATCGGAAGAGCACACGTCTGAACTCCAGTCAC'
+par_input='src/cutadapt/test_data/se/a.fastq'
+par_report='full'
+par_json='false'
+par_fasta='false'
+par_info_file='false'
+par_debug='true'
+## VIASH END
+
+function debug {
+  [[ "$par_debug" == "true" ]] && echo "DEBUG: $@"
+}
+
+output_dir=$(dirname $par_output)
+[[ ! -d $output_dir ]] && mkdir -p $output_dir
+
+# Init
+###########################################################
+
+echo ">> Paired-end data or not?"
+
+mode=""
+if [[ -z $par_input_r2 ]]; then
+  mode="se"
+  echo "  Single end"
+  input="$par_input"
+else
+  echo "  Paired end"
+  mode="pe"
+  input="$par_input $par_input_r2"
+fi
+
+# Adapter arguments
+#   - paired and single-end
+#   - string and fasta
+###########################################################
+
+function add_flags {
+  local arg=$1
+  local flag=$2
+  local prefix=$3
+  [[ -z $prefix ]] && prefix=""
+
+  # This function should not be called if the input is empty
+  # but check for it just in case
+  if [[ -z $arg ]]; then
+    return
+  fi
+
+  local output=""
+  IFS=';' read -r -a array <<< "$arg"
+  for a in "${array[@]}"; do
+    output="$output $flag $prefix$a"
+  done
+  echo $output
+}
+
+debug ">> Parsing arguments dealing with adapters"
+adapter_args=$(echo \
+  ${par_adapter:+$(add_flags "$par_adapter" "--adapter")} \
+  ${par_adapter_fasta:+$(add_flags "$par_adapter_fasta" "--adapter" "file:")} \
+  ${par_front:+$(add_flags "$par_front" "--front")} \
+  ${par_front_fasta:+$(add_flags "$par_front_fasta" "--front" "file:")} \
+  ${par_anywhere:+$(add_flags "$par_anywhere" "--anywhere")} \
+  ${par_anywhere_fasta:+$(add_flags "$par_anywhere_fasta" "--anywhere" "file:")} \
+  ${par_adapter_r2:+$(add_flags "$par_adapter_r2" "-A")} \
+  ${par_adapter_fasta_r2:+$(add_flags "$par_adapter_fasta_r2" "-A" "file:")} \
+  ${par_front_r2:+$(add_flags "$par_front_r2" "-G")} \
+  ${par_front_fasta_r2:+$(add_flags "$par_front_fasta_r2" "-G" "file:")} \
+  ${par_anywhere_r2:+$(add_flags "$par_anywhere_r2" "-B")} \
+  ${par_anywhere_fasta_r2:+$(add_flags "$par_anywhere_fasta_r2" "-B" "file:")} \
+)
+
+debug "Arguments to cutadapt:"
+debug "$adapter_args"
+debug
+
+# Paired-end options
+###########################################################
+echo ">> Parsing arguments for paired-end reads"
+[[ "$par_pair_adapters" == "false" ]] && unset par_pair_adapters
+[[ "$par_interleaved" == "false" ]] && unset par_interleaved
+
+paired_args=$(echo \
+  ${par_pair_adapters:+--pair-adapters} \
+  ${par_pair_filter:+--pair-filter "${par_pair_filter}"} \
+  ${par_interleaved:+--interleaved}
+)
+debug "Arguments to cutadapt:"
+debug $paired_args
+debug
+
+# Input arguments 
+###########################################################
+echo ">> Parsing input arguments"
+[[ "$par_no_indels" == "true" ]] && unset par_no_indels
+[[ "$par_match_read_wildcards" == "false" ]] && unset par_match_read_wildcards
+[[ "$par_no_match_adapter_wildcards" == "true" ]] && unset par_no_match_adapter_wildcards
+[[ "$par_revcomp" == "false" ]] && unset par_revcomp
+
+input_args=$(echo \
+  ${par_error_rate:+--error-rate "${par_error_rate}"} \
+  ${par_no_indels:+--no-indels} \
+  ${par_times:+--times "${par_times}"} \
+  ${par_overlap:+--overlap "${par_overlap}"} \
+  ${par_match_read_wildcards:+--match-read-wildcards} \
+  ${par_no_match_adapter_wildcards:+--no-match-adapter-wildcards} \
+  ${par_action:+--action "${par_action}"} \
+  ${par_revcomp:+--revcomp} \
+)
+debug "Arguments to cutadapt:"
+debug $input_args
+debug
+
+# Read modifications
+###########################################################
+echo ">> Parsing read modification arguments"
+[[ "$par_poly_a" == "false" ]] && unset par_poly_a
+[[ "$par_trim_n" == "false" ]] && unset par_trim_n
+[[ "$par_zero_cap" == "false" ]] && unset par_zero_cap
+
+mod_args=$(echo \
+  ${par_cut:+--cut "${par_cut}"} \
+  ${par_cut_r2:+--cut_r2 "${par_cut_r2}"} \
+  ${par_nextseq_trim:+--nextseq-trim "${par_nextseq_trim}"} \
+  ${par_quality_cutoff:+--quality-cutoff "${par_quality_cutoff}"} \
+  ${par_quality_cutoff_r2:+--quality-cutoff_r2 "${par_quality_cutoff_r2}"} \
+  ${par_quality_base:+--quality-base "${par_quality_base}"} \
+  ${par_poly_a:+--poly-a} \
+  ${par_length:+--length "${par_length}"} \
+  ${par_trim_n:+--trim-n} \
+  ${par_length_tag:+--length-tag "${par_length_tag}"} \
+  ${par_strip_suffix:+--strip-suffix "${par_strip_suffix}"} \
+  ${par_prefix:+--prefix "${par_prefix}"} \
+  ${par_suffix:+--suffix "${par_suffix}"} \
+  ${par_rename:+--rename "${par_rename}"} \
+  ${par_zero_cap:+--zero-cap} \
+)
+debug "Arguments to cutadapt:"
+debug $mod_args
+debug
+
+# Filtering of processed reads arguments
+###########################################################
+echo ">> Filtering of processed reads arguments"
+[[ "$par_discard_trimmed" == "false" ]] && unset par_discard_trimmed
+[[ "$par_discard_untrimmed" == "false" ]] && unset par_discard_untrimmed
+[[ "$par_discard_casava" == "false" ]] && unset par_discard_casava
+
+# Parse and transform the minimum and maximum length arguments
+[[ -z $par_minimum_length   ]]
+
+filter_args=$(echo \
+  ${par_minimum_length:+--minimum-length "${par_minimum_length}"} \
+  ${par_maximum_length:+--maximum-length "${par_maximum_length}"} \
+  ${par_max_n:+--max-n "${par_max_n}"} \
+  ${par_max_expected_errors:+--max-expected-errors "${par_max_expected_errors}"} \
+  ${par_max_average_error_rate:+--max-average-error-rate "${par_max_average_error_rate}"} \
+  ${par_discard_trimmed:+--discard-trimmed} \
+  ${par_discard_untrimmed:+--discard-untrimmed} \
+  ${par_discard_casava:+--discard-casava} \
+)
+debug "Arguments to cutadapt:"
+debug $filter_args
+debug
+
+# Optional output arguments
+###########################################################
+echo ">> Optional arguments"
+[[ "$par_json" == "false" ]] && unset par_json
+[[ "$par_fasta" == "false" ]] && unset par_fasta
+[[ "$par_info_file" == "false" ]] && unset par_info_file
+
+optional_output_args=$(echo \
+  ${par_report:+--report "${par_report}"} \
+  ${par_json:+--json "report.json"} \
+  ${par_fasta:+--fasta} \
+  ${par_info_file:+--info-file "info.txt"} \
+)
+
+debug "Arguments to cutadapt:"
+debug $optional_output_args
+debug
+
+# Output arguments
+# We write the output to a directory rather than
+# individual files.
+###########################################################
+
+if [[ -z $par_fasta ]]; then
+  ext="fastq"
+else
+  ext="fasta"
+fi
+
+if [ $mode = "se" ]; then
+  output_args=$(echo \
+    --output "$output_dir/{name}_001.$ext" \
+  )
+else
+  output_args=$(echo \
+    --output "$output_dir/{name}_R1_001.$ext" \
+    --paired-output "$output_dir/{name}_R2_001.$ext" \
+  )
+fi
+
+debug "Arguments to cutadapt:"
+debug $output_args
+debug
+
+# Full CLI
+# Set the --cores argument to 0 unless meta_cpus is set
+###########################################################
+echo ">> Running cutadapt"
+par_cpus=0
+[[ ! -z $meta_cpus ]] && par_cpus=$meta_cpus
+
+cli=$(echo \
+  $input \
+  $adapter_args \
+  $paired_args \
+  $input_args \
+  $mod_args \
+  $filter_args \
+  $optional_output_args \
+  $output_args \
+  --cores $par_cpus
+)
+
+debug ">> Full CLI to be run:"
+debug cutadapt $cli | sed -e 's/--/\r\n  --/g'
+debug
+
+cutadapt $cli
diff --git a/src/cutadapt/test.sh b/src/cutadapt/test.sh
new file mode 100644
index 00000000..eff997d7
--- /dev/null
+++ b/src/cutadapt/test.sh
@@ -0,0 +1,256 @@
+#!/bin/bash
+
+set -e
+
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || (echo "File '$1' does not exist" && exit 1)
+}
+assert_file_doesnt_exist() {
+  [ ! -f "$1" ] || (echo "File '$1' exists but shouldn't" && exit 1)
+}
+assert_file_empty() {
+  [ ! -s "$1" ] || (echo "File '$1' is not empty but should be" && exit 1)
+}
+assert_file_not_empty() {
+  [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1)
+}
+assert_file_contains() {
+  grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1)
+}
+assert_file_not_contains() {
+  grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1)
+}
+
+#############################################
+mkdir test_multiple_output
+cd test_multiple_output
+
+echo "#############################################"
+echo "> Run cutadapt with multiple outputs"
+
+cat > example.fa <<'EOF'
+>read1
+MYSEQUENCEADAPTER
+>read2
+MYSEQUENCEADAP
+>read3
+MYSEQUENCEADAPTERSOMETHINGELSE
+>read4
+MYSEQUENCEADABTER
+>read5
+MYSEQUENCEADAPTR
+>read6
+MYSEQUENCEADAPPTER
+>read7
+ADAPTERMYSEQUENCE
+>read8
+PTERMYSEQUENCE
+>read9
+SOMETHINGADAPTERMYSEQUENCE
+EOF
+
+"$meta_executable" \
+  --report minimal \
+  --output "out_test/*.fasta" \
+  --adapter ADAPTER \
+  --input example.fa \
+  --fasta \
+  --no_match_adapter_wildcards \
+  --json
+
+echo ">> Checking output"
+assert_file_exists "report.json"
+assert_file_exists "out_test/1_001.fasta"
+assert_file_exists "out_test/unknown_001.fasta"
+
+cd ..
+echo
+
+#############################################
+mkdir test_simple_single_end
+cd test_simple_single_end
+
+echo "#############################################"
+echo "> Run cutadapt on single-end data"
+
+cat > example.fa <<'EOF'
+>read1
+MYSEQUENCEADAPTER
+>read2
+MYSEQUENCEADAP
+>read3
+MYSEQUENCEADAPTERSOMETHINGELSE
+>read4
+MYSEQUENCEADABTER
+>read5
+MYSEQUENCEADAPTR
+>read6
+MYSEQUENCEADAPPTER
+>read7
+ADAPTERMYSEQUENCE
+>read8
+PTERMYSEQUENCE
+>read9
+SOMETHINGADAPTERMYSEQUENCE
+EOF
+
+"$meta_executable" \
+  --report minimal \
+  --output "out_test1/*.fasta" \
+  --adapter ADAPTER \
+  --input example.fa \
+  --fasta \
+  --no_match_adapter_wildcards \
+  --json
+
+echo ">> Checking output"
+assert_file_exists "report.json"
+assert_file_exists "out_test1/1_001.fasta"
+assert_file_exists "out_test1/unknown_001.fasta"
+
+echo ">> Check if output is empty"
+assert_file_not_empty "report.json"
+assert_file_not_empty "out_test1/1_001.fasta"
+assert_file_not_empty "out_test1/unknown_001.fasta"
+
+echo ">> Check contents"
+for i in 1 2 3 7 9; do
+  assert_file_contains "out_test1/1_001.fasta" ">read$i"
+done
+for i in 4 5 6 8; do
+  assert_file_contains "out_test1/unknown_001.fasta" ">read$i"
+done
+
+cd ..
+echo
+
+#############################################
+mkdir test_multiple_single_end
+cd test_multiple_single_end
+
+echo "#############################################"
+echo "> Run with a combination of inputs"
+
+cat > example.fa <<'EOF'
+>read1
+ACGTACGTACGTAAAAA
+>read2
+ACGTACGTACGTCCCCC
+>read3
+ACGTACGTACGTGGGGG
+>read4
+ACGTACGTACGTTTTTT
+EOF
+
+cat > adapters1.fasta <<'EOF'
+>adapter1
+CCCCC
+EOF
+
+cat > adapters2.fasta <<'EOF'
+>adapter2
+GGGGG
+EOF
+
+"$meta_executable" \
+  --report minimal \
+  --output "out_test2/*.fasta" \
+  --adapter AAAAA \
+  --adapter_fasta adapters1.fasta \
+  --adapter_fasta adapters2.fasta \
+  --input example.fa \
+  --fasta \
+  --json
+
+echo ">> Checking output"
+assert_file_exists "report.json"
+assert_file_exists "out_test2/1_001.fasta"
+assert_file_exists "out_test2/adapter1_001.fasta"
+assert_file_exists "out_test2/adapter2_001.fasta"
+assert_file_exists "out_test2/unknown_001.fasta"
+
+echo ">> Check if output is empty"
+assert_file_not_empty "report.json"
+assert_file_not_empty "out_test2/1_001.fasta"
+assert_file_not_empty "out_test2/adapter1_001.fasta"
+assert_file_not_empty "out_test2/adapter2_001.fasta"
+assert_file_not_empty "out_test2/unknown_001.fasta"
+
+echo ">> Check contents"
+assert_file_contains "out_test2/1_001.fasta" ">read1"
+assert_file_contains "out_test2/adapter1_001.fasta" ">read2"
+assert_file_contains "out_test2/adapter2_001.fasta" ">read3"
+assert_file_contains "out_test2/unknown_001.fasta" ">read4"
+
+cd ..
+echo
+
+#############################################
+mkdir test_simple_paired_end
+cd test_simple_paired_end
+
+echo "#############################################"
+echo "> Run cutadapt on paired-end data"
+
+cat > example_R1.fastq <<'EOF'
+@read1
+ACGTACGTACGTAAAAA
++
+IIIIIIIIIIIIIIIII
+@read2
+ACGTACGTACGTCCCCC
++
+IIIIIIIIIIIIIIIII
+EOF
+
+cat > example_R2.fastq <<'EOF'
+@read1
+ACGTACGTACGTGGGGG
++
+IIIIIIIIIIIIIIIII
+@read2
+ACGTACGTACGTTTTTT
++
+IIIIIIIIIIIIIIIII
+EOF
+
+"$meta_executable" \
+  --report minimal \
+  --output "out_test3/*.fastq" \
+  --adapter AAAAA \
+  --adapter_r2 GGGGG \
+  --input example_R1.fastq \
+  --input_r2 example_R2.fastq \
+  --quality_cutoff 20 \
+  --json \
+  ---cpus 1
+
+echo ">> Checking output"
+assert_file_exists "report.json"
+assert_file_exists "out_test3/1_R1_001.fastq"
+assert_file_exists "out_test3/1_R2_001.fastq"
+assert_file_exists "out_test3/unknown_R1_001.fastq"
+assert_file_exists "out_test3/unknown_R2_001.fastq"
+
+echo ">> Check if output is empty"
+assert_file_not_empty "report.json"
+assert_file_not_empty "out_test3/1_R1_001.fastq"
+assert_file_not_empty "out_test3/1_R2_001.fastq"
+assert_file_not_empty "out_test3/unknown_R1_001.fastq"
+
+echo ">> Check contents"
+assert_file_contains "out_test3/1_R1_001.fastq" "@read1"
+assert_file_contains "out_test3/1_R2_001.fastq" "@read1"
+assert_file_contains "out_test3/unknown_R1_001.fastq" "@read2"
+assert_file_contains "out_test3/unknown_R2_001.fastq" "@read2"
+
+cd ..
+echo
+
+#############################################
+
+echo "#############################################"
+echo "> Test successful"
+

From 2b06a959e9553727ccb282007ef007a09ecb7955 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Fri, 21 Jun 2024 18:04:24 +0200
Subject: [PATCH 04/23] update changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 973a0699..a71db3b4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,8 @@
     - `busco/busco_list_datasets`: Lists available busco datasets (PR #18).
     - `busco/busco_download_datasets`: Download busco datasets (PR #19).
 
+* `cutadapt`: Remove adapter sequences from high-throughput sequencing reads (PR #7).
+
 * `featurecounts`: Assign sequence reads to genomic features (PR #11).
 
 * `bgzip`: Add bgzip functionality to compress and decompress files (PR #13).

From 76d86a2f30e0393536ff490ce96adc0302f8a75d Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Fri, 21 Jun 2024 18:08:28 +0200
Subject: [PATCH 05/23] update readme

---
 README.md  | 2 +-
 README.qmd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 29ace00b..984a1929 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 
 
-# biobox
+# 🌱📦 biobox
 
 [![ViashHub](https://img.shields.io/badge/ViashHub-biobox-7a4baa.png)](https://web.viash-hub.com/packages/biobox)
 [![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2Fbiobox-blue.png)](https://github.com/viash-hub/biobbox)
diff --git a/README.qmd b/README.qmd
index 33e9461b..7d36430b 100644
--- a/README.qmd
+++ b/README.qmd
@@ -6,7 +6,7 @@ project <- yaml::read_yaml("_viash.yaml")
 license <- paste0(project$links$repository, "/blob/main/LICENSE")
 contributing <- paste0(project$links$repository, "/blob/main/CONTRIBUTING.md")
 ```
-# `r project$name`
+# 🌱📦 `r project$name`
 
 [![ViashHub](https://img.shields.io/badge/ViashHub-`r project$name`-7a4baa)](https://web.viash-hub.com/packages/`r project$name`) 
 [![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2F`r project$name`-blue)](`r project$links$repository`) 

From 9d74c3b554f74d9e87c0fd4d7f18533dad1c9bdc Mon Sep 17 00:00:00 2001
From: Sai Nirmayi Yasa <92786623+sainirmayi@users.noreply.github.com>
Date: Fri, 21 Jun 2024 22:28:44 +0200
Subject: [PATCH 06/23] Update salmon quant arguments (#57)

* Make index an optional argument

* FIx argument type and add optional argument
---
 src/salmon/salmon_quant/config.vsh.yaml | 11 +++++++----
 src/salmon/salmon_quant/script.sh       |  3 ++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/salmon/salmon_quant/config.vsh.yaml b/src/salmon/salmon_quant/config.vsh.yaml
index 47d72665..b7e303f4 100644
--- a/src/salmon/salmon_quant/config.vsh.yaml
+++ b/src/salmon/salmon_quant/config.vsh.yaml
@@ -42,7 +42,7 @@ argument_groups:
         type: file
         description: |
           Salmon index.
-        required: true
+        required: false
         example: transcriptome_index
       - name: --unmated_reads
         alternatives: ["-r"]
@@ -320,12 +320,15 @@ argument_groups:
         example: 0.00001
       - name: --write_mappings
         alternatives: ["-z"]
-        type: file
-        direction: output
+        type: boolean_true
         description: |
           If this option is provided, then the selective-alignment results will be written out in SAM-compatible format. By default, output will be directed to stdout, but an alternative file name can be provided instead.
+      - name: --mapping_sam
+        type: file
+        description: Path to file that should output the selective-alignment results in SAM-compatible format. THis option must be provided while using --write_mappings
         required: false
-        example: mappings.sam 
+        direction: output
+        example: mappings.sam
       - name: --write_qualities
         type: boolean_true
         description: |
diff --git a/src/salmon/salmon_quant/script.sh b/src/salmon/salmon_quant/script.sh
index ace79711..4c9f69d5 100644
--- a/src/salmon/salmon_quant/script.sh
+++ b/src/salmon/salmon_quant/script.sh
@@ -21,6 +21,7 @@ set -e
 [[ "$par_softclip_overhangs" == "false" ]] && unset par_softclip_overhangs
 [[ "$par_full_length_alignment" == "false" ]] && unset par_full_length_alignment
 [[ "$par_hard_filter" == "false" ]] && unset par_hard_filter
+[[ "$par_write_mappings" == "false" ]] && unset par_write_mappings
 [[ "$par_write_qualities" == "false" ]] && unset par_write_qualities
 [[ "$par_alternative_init_mode" == "false" ]] && unset par_alternative_init_mode
 [[ "$par_skip_quant" == "false" ]] && unset par_skip_quant
@@ -96,7 +97,7 @@ salmon quant \
     ${par_full_length_alignment:+--fullLengthAlignment} \
     ${par_hard_filter:+--hardFilter} \
     ${par_min_aln_prob:+--minAlnProb "${par_min_aln_prob}"} \
-    ${par_write_mappings:+-z "${par_write_mappings}"} \
+    ${par_write_mappings:+--write_mappings="${par_mappings_sam}"} \
     ${par_write_qualities:+--writeQualities} \
     ${par_hit_filter_policy:+--hitFilterPolicy "${par_hit_filter_policy}"} \
     ${par_alternative_init_mode:+--alternativeInitMode} \

From b68f1edd7ae4774e1971cefcd18e3fc9832fbe18 Mon Sep 17 00:00:00 2001
From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com>
Date: Fri, 21 Jun 2024 22:30:34 +0200
Subject: [PATCH 07/23] FEAT: add bedtools getfasta. (#59)

* FEAT: add bedtools getfasta.

* Add PR number to CHANGELOG
---
 CHANGELOG.md                                  |   3 +
 .../bedtools_getfasta/config.vsh.yaml         | 103 +++++++++++++++
 src/bedtools/bedtools_getfasta/script.sh      |  22 ++++
 src/bedtools/bedtools_getfasta/test.sh        | 119 ++++++++++++++++++
 4 files changed, 247 insertions(+)
 create mode 100644 src/bedtools/bedtools_getfasta/config.vsh.yaml
 create mode 100644 src/bedtools/bedtools_getfasta/script.sh
 create mode 100644 src/bedtools/bedtools_getfasta/test.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a71db3b4..a3e3fa4d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -52,6 +52,9 @@
 
 * `falco`: A C++ drop-in replacement of FastQC to assess the quality of sequence read data (PR #43).
 
+* `bedtools`:
+    - `bedtools_getfasta`: extract sequences from a FASTA file for each of the
+                           intervals defined in a BED/GFF/VCF file (PR #59).
 
 ## MINOR CHANGES
 
diff --git a/src/bedtools/bedtools_getfasta/config.vsh.yaml b/src/bedtools/bedtools_getfasta/config.vsh.yaml
new file mode 100644
index 00000000..f1f49a87
--- /dev/null
+++ b/src/bedtools/bedtools_getfasta/config.vsh.yaml
@@ -0,0 +1,103 @@
+name: bedtools_getfasta
+namespace: bedtools
+description: Extract sequences from a FASTA file for each of the intervals defined in a BED/GFF/VCF file.
+keywords: [sequencing, fasta, BED, GFF, VCF]
+links:
+  documentation: https://bedtools.readthedocs.io/en/latest/content/tools/getfasta.html
+  repository: https://github.com/arq5x/bedtools2
+references:
+  doi: 10.1093/bioinformatics/btq033
+license: GPL-2.0
+requirements:
+  commands: [bedtools]
+
+argument_groups:
+  - name: Input arguments
+    arguments:
+      - name: --input_fasta
+        type: file
+        description: |
+          FASTA file containing sequences for each interval specified in the input BED file.
+          The headers in the input FASTA file must exactly match the chromosome column in the BED file.
+      - name: "--input_bed"
+        type: file
+        description: |
+          BED file containing intervals to extract from the FASTA file.
+          BED files containing a single region require a newline character
+          at the end of the line, otherwise a blank output file is produced.
+      - name: --rna
+        type: boolean_true
+        description: |
+          The FASTA is RNA not DNA. Reverse complementation handled accordingly.
+    
+  - name: Run arguments
+    arguments:
+        - name: "--strandedness"
+          type: boolean_true
+          alternatives: ["-s"]
+          description: |
+            Force strandedness. If the feature occupies the antisense strand, the output sequence will
+            be reverse complemented. By default strandedness is not taken into account.
+
+  - name: Output arguments
+    arguments:
+      - name: --output
+        alternatives: [-o]
+        required: true
+        type: file
+        direction: output
+        description: |
+          Output file where the output from the 'bedtools getfasta' commend will
+          be written to.
+      - name: --tab
+        type: boolean_true
+        description: |
+          Report extract sequences in a tab-delimited format instead of in FASTA format.
+      - name: --bed_out
+        type: boolean_true
+        description: |
+          Report extract sequences in a tab-delimited BED format instead of in FASTA format.
+      - name: "--name"
+        type: boolean_true
+        description: |
+          Set the FASTA header for each extracted sequence to be the "name" and coordinate columns from the BED feature.
+      - name: "--name_only"
+        type: boolean_true
+        description: |
+          Set the FASTA header for each extracted sequence to be the "name" columns from the BED feature.
+      - name: "--split"
+        type: boolean_true
+        description: |
+          When --input is in BED12 format, create a separate fasta entry for each block in a BED12 record,
+          blocks being described in the 11th and 12th column of the BED.
+      - name: "--full_header"
+        type: boolean_true
+        description: |
+          Use full fasta header. By default, only the word before the first space or tab is used.
+        
+# Arguments not taken into account:
+#
+#       -fo           [Specify an output file name. By default, output goes to stdout.
+#                            
+
+resources:
+  - type: bash_script
+    path: script.sh
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+
+engines:
+  - type: docker
+    image: debian:stable-slim
+    setup:
+      - type: apt
+        packages: [bedtools, procps]
+      - type: docker
+        run: |
+          echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt
+
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/bedtools/bedtools_getfasta/script.sh b/src/bedtools/bedtools_getfasta/script.sh
new file mode 100644
index 00000000..8e88b318
--- /dev/null
+++ b/src/bedtools/bedtools_getfasta/script.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+unset_if_false=( par_rna par_strandedness par_tab par_bed_out par_name par_name_only par_split par_full_header )
+
+for par in ${unset_if_false[@]}; do
+    test_val="${!par}"
+    [[ "$test_val" == "false" ]] && unset $par
+done
+
+bedtools getfasta \
+    -fi "$par_input_fasta" \
+    -bed "$par_input_bed" \
+    ${par_rna:+-rna} \
+    ${par_name:+-name} \
+    ${par_name_only:+-nameOnly} \
+    ${par_tab:+-tab} \
+    ${par_bed_out:+-bedOut} \
+    ${par_strandedness:+-s} \
+    ${par_split:+-split} \
+    ${par_full_header:+-fullHeader} > "$par_output"
+
diff --git a/src/bedtools/bedtools_getfasta/test.sh b/src/bedtools/bedtools_getfasta/test.sh
new file mode 100644
index 00000000..a28e3a7e
--- /dev/null
+++ b/src/bedtools/bedtools_getfasta/test.sh
@@ -0,0 +1,119 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+TMPDIR=$(mktemp -d)
+function clean_up {
+  [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
+}
+trap clean_up EXIT
+
+# Create dummy test fasta file
+cat > "$TMPDIR/test.fa" <<EOF
+>chr1
+AAAAAAAACCCCCCCCCCCCCGCTACTGGGGGGGGGGGGGGGGGG
+EOF
+
+TAB="$(printf '\t')"
+
+# Create dummy bed file
+cat > "$TMPDIR/test.bed" <<EOF
+chr1${TAB}5${TAB}10${TAB}myseq
+EOF
+
+# Create expected bed file
+cat > "$TMPDIR/expected.fasta" <<EOF
+>chr1:5-10
+AAACC
+EOF
+
+"$meta_executable" \
+  --input_bed "$TMPDIR/test.bed" \
+  --input_fasta "$TMPDIR/test.fa" \
+  --output "$TMPDIR/output.fasta"
+
+cmp --silent "$TMPDIR/output.fasta" "$TMPDIR/expected.fasta" || { echo "files are different:"; exit 1; }
+
+
+# Create expected bed file for --name
+cat > "$TMPDIR/expected_with_name.fasta" <<EOF
+>myseq::chr1:5-10
+AAACC
+EOF
+
+"$meta_executable" \
+  --input_bed "$TMPDIR/test.bed" \
+  --input_fasta "$TMPDIR/test.fa" \
+  --name \
+  --output "$TMPDIR/output_with_name.fasta"
+
+
+cmp --silent "$TMPDIR/output_with_name.fasta" "$TMPDIR/expected_with_name.fasta" || { echo "Files when using --name are different."; exit 1; }
+
+# Create expected bed file for --name_only
+cat > "$TMPDIR/expected_with_name_only.fasta" <<EOF
+>myseq
+AAACC
+EOF
+
+"$meta_executable" \
+  --input_bed "$TMPDIR/test.bed" \
+  --input_fasta "$TMPDIR/test.fa" \
+  --name_only \
+  --output "$TMPDIR/output_with_name_only.fasta"
+
+cmp --silent "$TMPDIR/output_with_name_only.fasta" "$TMPDIR/expected_with_name_only.fasta" || { echo "Files when using --name_only are different."; exit 1; }
+
+
+# Create expected tab-delimited file for --tab
+cat > "$TMPDIR/expected_tab.out" <<EOF
+myseq${TAB}AAACC
+EOF
+
+"$meta_executable" \
+  --input_bed "$TMPDIR/test.bed" \
+  --input_fasta "$TMPDIR/test.fa" \
+  --name_only \
+  --tab \
+  --output "$TMPDIR/tab.out"
+
+cmp --silent "$TMPDIR/expected_tab.out" "$TMPDIR/tab.out" || { echo "Files when using --tab are different."; exit 1; }
+
+
+# Create expected tab-delimited file for --bed_out
+cat > "$TMPDIR/expected.bed" <<EOF
+chr1${TAB}5${TAB}10${TAB}myseq${TAB}AAACC
+EOF
+
+"$meta_executable" \
+  --input_bed "$TMPDIR/test.bed" \
+  --input_fasta "$TMPDIR/test.fa" \
+  --bed_out \
+  --output "$TMPDIR/output.bed"
+
+
+cmp --silent "$TMPDIR/expected.bed" "$TMPDIR/output.bed" || { echo "Files when using --bed_out are different."; exit 1; }
+
+# Create dummy bed file for strandedness
+cat > "$TMPDIR/test_strandedness.bed" <<EOF
+chr1${TAB}20${TAB}25${TAB}forward${TAB}1${TAB}+
+chr1${TAB}20${TAB}25${TAB}reverse${TAB}1${TAB}-
+EOF
+
+# Create expected tab-delimited file for --bed_out
+cat > "$TMPDIR/expected_strandedness.fasta" <<EOF
+>forward(+)
+CGCTA
+>reverse(-)
+TAGCG
+EOF
+
+"$meta_executable" \
+  --input_bed "$TMPDIR/test_strandedness.bed" \
+  --input_fasta "$TMPDIR/test.fa" \
+  -s \
+  --name_only \
+  --output "$TMPDIR/output_strandedness.fasta"
+
+
+cmp --silent "$TMPDIR/expected_strandedness.fasta" "$TMPDIR/output_strandedness.fasta" || { echo "Files when using -s are different."; exit 1; }
+

From 8191140b7bf964c0439847021bb188253e2a40f9 Mon Sep 17 00:00:00 2001
From: Sai Nirmayi Yasa <92786623+sainirmayi@users.noreply.github.com>
Date: Mon, 24 Jun 2024 10:01:06 +0200
Subject: [PATCH 08/23] Add star genomegenerate component (#58)

* Add star genomegenerate component

* Update changelog

* Rename component

* Update test

* Update CHANGELOG.md

---------

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>
---
 CHANGELOG.md                                  |   4 +-
 src/star/star_genome_generate/config.vsh.yaml | 139 +++
 src/star/star_genome_generate/help.txt        | 927 ++++++++++++++++++
 src/star/star_genome_generate/script.sh       |  29 +
 src/star/star_genome_generate/test.sh         |  48 +
 5 files changed, 1146 insertions(+), 1 deletion(-)
 create mode 100644 src/star/star_genome_generate/config.vsh.yaml
 create mode 100644 src/star/star_genome_generate/help.txt
 create mode 100644 src/star/star_genome_generate/script.sh
 create mode 100644 src/star/star_genome_generate/test.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a3e3fa4d..c89bb9b4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,7 +31,9 @@
 
 * `multiqc`: Aggregate results from bioinformatics analyses across many samples into a single report (PR #42).
 
-* `star/star_align_reads`: Align reads to a reference genome (PR #22).
+* `star`:
+    - `star/star_align_reads`: Align reads to a reference genome (PR #22).
+    - `star/star_genome_generate`: Generate a genome index for STAR alignment (PR #58).
 
 * `gffread`: Validate, filter, convert and perform other operations on GFF files (PR #29).  
 
diff --git a/src/star/star_genome_generate/config.vsh.yaml b/src/star/star_genome_generate/config.vsh.yaml
new file mode 100644
index 00000000..3adaf7a2
--- /dev/null
+++ b/src/star/star_genome_generate/config.vsh.yaml
@@ -0,0 +1,139 @@
+name: star_genome_generate
+namespace: star
+description: | 
+  Create index for STAR
+keywords: [genome, index, align]
+links:
+  repository: https://github.com/alexdobin/STAR
+  documentation: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf
+references:
+  doi: 10.1093/bioinformatics/bts635
+license: MIT
+requirements:
+  commands: [ STAR ]
+
+argument_groups:
+- name: "Input"
+  arguments: 
+  - name: "--genomeFastaFiles"
+    type: file
+    description: |
+      Path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.
+    required: true
+    multiple: yes
+    multiple_sep: ;  
+  - name: "--sjdbGTFfile"
+    type: file
+    description: Path to the GTF file with annotations
+  - name: --sjdbOverhang
+    type: integer
+    description: Length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)
+    example: 100
+  - name: --sjdbGTFchrPrefix
+    type: string
+    description: Prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes)
+  - name: --sjdbGTFfeatureExon
+    type: string
+    description: Feature type in GTF file to be used as exons for building transcripts
+    example: exon
+  - name: --sjdbGTFtagExonParentTranscript
+    type: string
+    description: GTF attribute name for parent transcript ID (default "transcript_id" works for GTF files)
+    example: transcript_id
+  - name: --sjdbGTFtagExonParentGene
+    type: string
+    description: GTF attribute name for parent gene ID (default "gene_id" works for GTF files)
+    example: gene_id
+  - name: --sjdbGTFtagExonParentGeneName
+    type: string
+    description: GTF attribute name for parent gene name
+    example: gene_name
+    multiple: yes
+    multiple_sep: ;
+  - name: --sjdbGTFtagExonParentGeneType
+    type: string
+    description: GTF attribute name for parent gene type
+    example:
+    - gene_type
+    - gene_biotype
+    multiple: yes
+    multiple_sep: ;
+  - name: --limitGenomeGenerateRAM
+    type: long
+    description: Maximum available RAM (bytes) for genome generation
+    example: '31000000000'
+  - name: --genomeSAindexNbases
+    type: integer
+    description: Length (bases) of the SA pre-indexing string. Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, this parameter must be scaled down to min(14, log2(GenomeLength)/2 - 1).
+    example: 14
+  - name: --genomeChrBinNbits
+    type: integer
+    description: Defined as log2(chrBin), where chrBin is the size of the bins for genome storage. Each chromosome will occupy an integer number of bins. For a genome with large number of contigs, it is recommended to scale this parameter as min(18, log2[max(GenomeLength/NumberOfReferences,ReadLength)]).
+    example: 18
+  - name: --genomeSAsparseD
+    type: integer
+    min: 0
+    example: 1
+    description: Suffux array sparsity, i.e. distance between indices. Use bigger numbers to decrease needed RAM at the cost of mapping speed reduction.
+  - name: --genomeSuffixLengthMax
+    type: integer
+    description: Maximum length of the suffixes, has to be longer than read length. Use -1 for infinite length.
+    example: -1
+  - name: --genomeTransformType   
+    type: string
+    description: |
+      Type of genome transformation
+        None       ... no transformation
+        Haploid    ... replace reference alleles with alternative alleles from VCF file (e.g. consensus allele)
+        Diploid    ... create two haplotypes for each chromosome listed in VCF file, for genotypes 1|2, assumes perfect phasing (e.g. personal genome)
+    example: None
+  - name: --genomeTransformVCF
+    type: file
+    description: path to VCF file for genome transformation
+  
+- name: "Output"
+  arguments: 
+  - name: "--index"
+    type: file
+    direction: output
+    description: STAR index directory.
+    default: STAR_index
+    required: true
+
+resources:
+  - type: bash_script
+    path: script.sh
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+  
+engines:
+- type: docker
+  image: ubuntu:22.04
+  setup:
+    # setup derived from https://github.com/alexdobin/STAR/blob/master/extras/docker/Dockerfile
+    - type: docker
+      env: 
+        - STAR_VERSION 2.7.11b
+        - PACKAGES gcc g++ make wget zlib1g-dev unzip xxd
+      run: |
+        apt-get update && \
+          apt-get install -y --no-install-recommends ${PACKAGES} && \
+          cd /tmp && \
+          wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \
+          unzip ${STAR_VERSION}.zip && \
+          cd STAR-${STAR_VERSION}/source && \
+          make STARstatic CXXFLAGS_SIMD=-std=c++11 && \
+          cp STAR /usr/local/bin && \
+          cd / && \
+          rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \
+          apt-get --purge autoremove -y ${PACKAGES} && \
+          apt-get clean
+    - type: docker
+      run: |
+        STAR --version | sed 's#\(.*\)#star: "\1"#' > /var/software_versions.txt
+
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/star/star_genome_generate/help.txt b/src/star/star_genome_generate/help.txt
new file mode 100644
index 00000000..940f639d
--- /dev/null
+++ b/src/star/star_genome_generate/help.txt
@@ -0,0 +1,927 @@
+Usage: STAR  [options]... --genomeDir /path/to/genome/index/   --readFilesIn R1.fq R2.fq
+Spliced Transcripts Alignment to a Reference (c) Alexander Dobin, 2009-2022
+
+STAR version=2.7.11b
+STAR compilation time,server,dir=2024-02-11T19:36:26+00:00 :/tmp/STAR-2.7.11b/source
+For more details see:
+<https://github.com/alexdobin/STAR>
+<https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf>
+### versions
+versionGenome           2.7.4a
+    string: earliest genome index version compatible with this STAR release. Please do not change this value!
+
+### Parameter Files
+parametersFiles          -
+    string: name of a user-defined parameters file, "-": none. Can only be defined on the command line.
+
+### System
+sysShell            -
+    string: path to the shell binary, preferably bash, e.g. /bin/bash.
+                    - ... the default shell is executed, typically /bin/sh. This was reported to fail on some Ubuntu systems - then you need to specify path to bash.
+
+### Run Parameters
+runMode                         alignReads
+    string: type of the run.
+                                alignReads             ... map reads
+                                genomeGenerate         ... generate genome files
+                                inputAlignmentsFromBAM ... input alignments from BAM. Presently only works with --outWigType and --bamRemoveDuplicates options.
+                                liftOver               ... lift-over of GTF files (--sjdbGTFfile) between genome assemblies using chain file(s) from --genomeChainFiles.
+                                soloCellFiltering  </path/to/raw/count/dir/>   </path/to/output/prefix>    ... STARsolo cell filtering ("calling") without remapping, followed by the path to raw count directory and output (filtered) prefix
+
+runThreadN                      1
+    int: number of threads to run STAR
+
+runDirPerm                      User_RWX
+    string: permissions for the directories created at the run-time.
+                                User_RWX ... user-read/write/execute
+                                All_RWX  ... all-read/write/execute (same as chmod 777)
+
+runRNGseed                      777
+    int: random number generator seed.
+
+
+### Genome Parameters
+genomeDir                   ./GenomeDir/
+    string: path to the directory where genome files are stored (for --runMode alignReads) or will be generated (for --runMode generateGenome)
+
+genomeLoad                NoSharedMemory
+    string: mode of shared memory usage for the genome files. Only used with --runMode alignReads.
+                          LoadAndKeep     ... load genome into shared and keep it in memory after run
+                          LoadAndRemove   ... load genome into shared but remove it after run
+                          LoadAndExit     ... load genome into shared memory and exit, keeping the genome in memory for future runs
+                          Remove          ... do not map anything, just remove loaded genome from memory
+                          NoSharedMemory  ... do not use shared memory, each job will have its own private copy of the genome
+
+genomeFastaFiles            -
+    string(s): path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.
+                            Required for the genome generation (--runMode genomeGenerate). Can also be used in the mapping (--runMode alignReads) to add extra (new) sequences to the genome (e.g. spike-ins).
+
+genomeChainFiles            -
+    string: chain files for genomic liftover. Only used with --runMode liftOver .
+
+genomeFileSizes             0
+    uint(s)>0: genome files exact sizes in bytes. Typically, this should not be defined by the user.
+    
+genomeTransformOutput       None
+    string(s):              which output to transform back to original genome
+                            SAM     ... SAM/BAM alignments
+                            SJ      ... splice junctions (SJ.out.tab)
+                            Quant   ... quantifications (from --quantMode option)
+                            None    ... no transformation of the output        
+
+genomeChrSetMitochondrial   chrM M MT
+    string(s):              names of the mitochondrial chromosomes. Presently only used for STARsolo statistics output/
+
+### Genome Indexing Parameters - only used with --runMode genomeGenerate
+genomeChrBinNbits           18
+    int: =log2(chrBin), where chrBin is the size of the bins for genome storage: each chromosome will occupy an integer number of bins. For a genome with large number of contigs, it is recommended to scale this parameter as min(18, log2[max(GenomeLength/NumberOfReferences,ReadLength)]).
+
+genomeSAindexNbases         14
+    int: length (bases) of the SA pre-indexing string. Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, the parameter --genomeSAindexNbases must be scaled down to min(14, log2(GenomeLength)/2 - 1).
+
+genomeSAsparseD             1
+    int>0: suffux array sparsity, i.e. distance between indices: use bigger numbers to decrease needed RAM at the cost of mapping speed reduction
+
+genomeSuffixLengthMax       -1
+    int: maximum length of the suffixes, has to be longer than read length. -1 = infinite.
+    
+genomeTransformType         None
+    string: type of genome transformation
+                            None       ... no transformation
+                            Haploid    ... replace reference alleles with alternative alleles from VCF file (e.g. consensus allele)
+                            Diploid    ... create two haplotypes for each chromosome listed in VCF file, for genotypes 1|2, assumes perfect phasing (e.g. personal genome)
+
+genomeTransformVCF          -
+    string: path to VCF file for genome transformation
+
+
+    
+#####UnderDevelopment_begin : not supported - do not use
+genomeType                  Full
+    string: type of genome to generate
+                            Full                ... full (normal) genome
+                            Transcriptome       ... genome consists of transcript sequences
+                            SuperTransriptome   ... genome consists of superTranscript sequences
+#####UnderDevelopment_end
+
+# DEPRECATED: please use --genomeTransformVCF and --genomeTransformType options instead.
+#genomeConsensusFile         -
+#    string: VCF file with consensus SNPs (i.e. alternative allele is the major (AF>0.5) allele)
+# DEPRECATED 
+
+
+
+### Splice Junctions Database
+sjdbFileChrStartEnd                     -
+    string(s): path to the files with genomic coordinates (chr <tab> start <tab> end <tab> strand) for the splice junction introns. Multiple files can be supplied and will be concatenated.
+
+sjdbGTFfile                             -
+    string: path to the GTF file with annotations
+
+sjdbGTFchrPrefix                        -
+    string: prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes)
+
+sjdbGTFfeatureExon                      exon
+    string: feature type in GTF file to be used as exons for building transcripts
+
+sjdbGTFtagExonParentTranscript          transcript_id
+    string: GTF attribute name for parent transcript ID (default "transcript_id" works for GTF files)
+
+sjdbGTFtagExonParentGene                gene_id
+    string: GTF attribute name for parent gene ID (default "gene_id" works for GTF files)
+
+sjdbGTFtagExonParentGeneName            gene_name
+    string(s): GTF attribute name for parent gene name
+
+sjdbGTFtagExonParentGeneType            gene_type gene_biotype
+    string(s): GTF attribute name for parent gene type
+
+sjdbOverhang                            100
+    int>0: length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)
+
+sjdbScore                               2
+    int: extra alignment score for alignments that cross database junctions
+
+sjdbInsertSave                          Basic
+    string: which files to save when sjdb junctions are inserted on the fly at the mapping step
+                    Basic ... only small junction / transcript files
+                    All   ... all files including big Genome, SA and SAindex - this will create a complete genome directory
+
+### Variation parameters
+varVCFfile                              -
+    string: path to the VCF file that contains variation data. The 10th column should contain the genotype information, e.g. 0/1
+
+### Input Files
+inputBAMfile                -
+    string: path to BAM input file, to be used with --runMode inputAlignmentsFromBAM
+
+### Read Parameters
+readFilesType               Fastx
+    string: format of input read files
+                            Fastx       ... FASTA or FASTQ
+                            SAM SE      ... SAM or BAM single-end reads; for BAM use --readFilesCommand samtools view
+                            SAM PE      ... SAM or BAM paired-end reads; for BAM use --readFilesCommand samtools view
+                            
+readFilesSAMattrKeep        All
+    string(s): for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL
+                            All     ... keep all tags
+                            None    ... do not keep any tags
+
+readFilesIn                 Read1 Read2
+    string(s): paths to files that contain input read1 (and, if needed,  read2)
+
+readFilesManifest           -
+    string: path to the "manifest" file with the names of read files. The manifest file should contain 3 tab-separated columns:
+            paired-end reads: read1_file_name $tab$ read2_file_name $tab$ read_group_line.
+            single-end reads: read1_file_name $tab$ -               $tab$ read_group_line.
+            Spaces, but not tabs are allowed in file names.
+            If read_group_line does not start with ID:, it can only contain one ID field, and ID: will be added to it.
+            If read_group_line starts with ID:, it can contain several fields separated by $tab$, and all fields will be be copied verbatim into SAM @RG header line.
+
+readFilesPrefix             -
+    string: prefix for the read files names, i.e. it will be added in front of the strings in --readFilesIn
+
+readFilesCommand             -
+    string(s): command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout
+               For example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc.
+
+readMapNumber               -1
+    int: number of reads to map from the beginning of the file
+                            -1: map all reads
+
+readMatesLengthsIn          NotEqual
+    string: Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same  / not the same. NotEqual is safe in all situations.
+
+readNameSeparator           /
+    string(s): character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)
+
+readQualityScoreBase        33
+    int>=0: number to be subtracted from the ASCII code to get Phred quality score
+
+### Read Clipping
+
+clipAdapterType             Hamming
+    string:                 adapter clipping type
+                            Hamming ... adapter clipping based on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp
+                            CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes Opal package by Martin Šošić: https://github.com/Martinsos/opal
+                            None ... no adapter clipping, all other clip* parameters are disregarded
+                            
+clip3pNbases                 0
+    int(s): number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.
+
+clip3pAdapterSeq            -
+    string(s): adapter sequences to clip from 3p of each mate.  If one value is given, it will be assumed the same for both mates.
+                            polyA ... polyA sequence with the length equal to read length
+
+clip3pAdapterMMp            0.1
+    double(s): max proportion of mismatches for 3p adapter clipping for each mate.  If one value is given, it will be assumed the same for both mates.
+
+clip3pAfterAdapterNbases    0
+    int(s): number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates.
+
+clip5pNbases                 0
+    int(s): number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates.
+
+#####UnderDevelopment_begin : not supported - do not use   
+clip5pAdapterSeq            -
+    string(s): adapter sequences to clip from 5p of each mate, separated by space.
+
+clip5pAdapterMMp            0.1
+    double(s): max proportion of mismatches for 5p adapter clipping for each mate, separated by space
+
+clip5pAfterAdapterNbases    0
+    int(s): number of bases to clip from 5p of each mate after the adapter clipping, separated by space.
+#####UnderDevelopment_end
+
+### Limits
+limitGenomeGenerateRAM               31000000000
+    int>0: maximum available RAM (bytes) for genome generation
+
+limitIObufferSize                    30000000 50000000
+    int(s)>0: max available buffers size (bytes) for input/output, per thread
+
+limitOutSAMoneReadBytes              100000
+    int>0: max size of the SAM record (bytes) for one read. Recommended value: >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax
+
+limitOutSJoneRead                    1000
+    int>0: max number of junctions for one read (including all multi-mappers)
+
+limitOutSJcollapsed                  1000000
+    int>0: max number of collapsed junctions
+
+limitBAMsortRAM                         0
+    int>=0: maximum available RAM (bytes) for sorting BAM. If =0, it will be set to the genome index size. 0 value can only be used with --genomeLoad NoSharedMemory option.
+
+limitSjdbInsertNsj                     1000000
+    int>=0: maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run
+
+limitNreadsSoft                        -1
+    int: soft limit on the number of reads
+
+### Output: general
+outFileNamePrefix               ./
+    string: output files name prefix (including full or relative path). Can only be defined on the command line.
+
+outTmpDir                       -
+    string: path to a directory that will be used as temporary by STAR. All contents of this directory will be removed!
+                                - ... the temp directory will default to outFileNamePrefix_STARtmp
+
+outTmpKeep                      None
+    string: whether to keep the temporary files after STAR runs is finished
+                                None ... remove all temporary files
+                                All ... keep all files
+
+outStd                          Log
+    string: which output will be directed to stdout (standard out)
+                                Log                    ... log messages
+                                SAM                    ... alignments in SAM format (which normally are output to Aligned.out.sam file), normal standard output will go into Log.std.out
+                                BAM_Unsorted           ... alignments in BAM format, unsorted. Requires --outSAMtype BAM Unsorted
+                                BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype BAM SortedByCoordinate
+                                BAM_Quant              ... alignments to transcriptome in BAM format, unsorted. Requires --quantMode TranscriptomeSAM
+
+outReadsUnmapped                None
+   string: output of unmapped and partially mapped (i.e. mapped only one mate of a paired end read) reads in separate file(s).
+                                None    ... no output
+                                Fastx   ... output in separate fasta/fastq files, Unmapped.out.mate1/2
+
+outQSconversionAdd              0
+   int: add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)
+
+outMultimapperOrder             Old_2.4
+    string: order of multimapping alignments in the output files
+                                Old_2.4             ... quasi-random order used before 2.5.0
+                                Random              ... random order of alignments for each multi-mapper. Read mates (pairs) are always adjacent, all alignment for each read stay together. This option will become default in the future releases.
+
+### Output: SAM and BAM
+outSAMtype                      SAM
+    strings: type of SAM/BAM output
+                                1st word:
+                                BAM  ... output BAM without sorting
+                                SAM  ... output SAM without sorting
+                                None ... no SAM/BAM output
+                                2nd, 3rd:
+                                Unsorted           ... standard unsorted
+                                SortedByCoordinate ... sorted by coordinate. This option will allocate extra memory for sorting which can be specified by --limitBAMsortRAM.
+
+outSAMmode                      Full
+    string: mode of SAM output
+                                None ... no SAM output
+                                Full ... full SAM output
+                                NoQS ... full SAM but without quality scores
+
+outSAMstrandField               None
+    string: Cufflinks-like strand field flag
+                                None        ... not used
+                                intronMotif ... strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out.
+
+outSAMattributes                Standard
+    string(s): a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order.
+                                ***Presets:
+                                None        ... no attributes
+                                Standard    ... NH HI AS nM
+                                All         ... NH HI AS nM NM MD jM jI MC ch                                                                    
+                                ***Alignment:
+                                NH          ... number of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard SAM tag.
+                                HI          ... multiple alignment index, starts with --outSAMattrIHstart (=1 by default). Standard SAM tag.
+                                AS          ... local alignment score, +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.
+                                nM          ... number of mismatches. For PE reads, sum over two mates.
+                                NM          ... edit distance to the reference (number of mismatched + inserted + deleted bases) for each mate. Standard SAM tag.
+                                MD          ... string encoding mismatched and deleted reference bases (see standard SAM specifications). Standard SAM tag.
+                                jM          ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.
+                                jI          ... start and end of introns for all junctions (1-based).
+                                XS          ... alignment strand according to --outSAMstrandField.
+                                MC          ... mate's CIGAR string. Standard SAM tag.
+                                ch          ... marks all segment of all chimeric alingments for --chimOutType WithinBAM output.
+                                cN          ... number of bases clipped from the read ends: 5' and 3'
+                                ***Variation:
+                                vA          ... variant allele
+                                vG          ... genomic coordinate of the variant overlapped by the read.
+                                vW          ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode SAMtag.
+                                ha          ... haplotype (1/2) when mapping to the diploid genome. Requires genome generated with --genomeTransformType Diploid .                               
+                                ***STARsolo:
+                                CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing.
+                                GX GN       ... gene ID and gene name for unique-gene reads.
+                                gx gn       ... gene IDs and gene names for unique- and multi-gene reads.
+                                CB UB       ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM SortedByCoordinate.
+                                sM          ... assessment of CB and UMI.
+                                sS          ... sequence of the entire barcode (CB,UMI,adapter).
+                                sQ          ... quality of the entire barcode.
+                                sF          ... type of feature overlap and number of features for each alignment
+                                ***Unsupported/undocumented:
+                                rB          ... alignment block read/genomic coordinates.
+                                vR          ... read coordinate of the variant.
+
+outSAMattrIHstart               1
+    int>=0:                     start value for the IH attribute. 0 may be required by some downstream software, such as Cufflinks or StringTie.
+
+outSAMunmapped                  None
+    string(s): output of unmapped reads in the SAM format
+                                1st word:
+                                None   ... no output
+                                Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam)
+                                2nd word:
+                                KeepPairs ... record unmapped mate for each alignment, and, in case of unsorted output, keep it adjacent to its mapped mate. Only affects multi-mapping reads.
+
+outSAMorder                     Paired
+    string: type of sorting for the SAM output
+                                Paired: one mate after the other for all paired alignments
+                                PairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files
+
+outSAMprimaryFlag        OneBestScore
+    string: which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG
+                                OneBestScore ... only one alignment with the best score is primary
+                                AllBestScore ... all alignments with the best score are primary
+
+outSAMreadID            Standard
+    string: read ID record type
+                                Standard ... first word (until space) from the FASTx read ID line, removing /1,/2 from the end
+                                Number   ... read number (index) in the FASTx file
+
+outSAMmapqUnique        255
+    int: 0 to 255: the MAPQ value for unique mappers
+
+outSAMflagOR           0
+    int: 0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e. FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by STAR, and after outSAMflagAND. Can be used to set specific bits that are not set otherwise.
+
+outSAMflagAND           65535
+    int: 0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e. FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by STAR, but before outSAMflagOR. Can be used to unset specific bits that are not set otherwise.
+
+outSAMattrRGline        -
+    string(s): SAM/BAM read group line. The first word contains the read group identifier and must start with "ID:", e.g. --outSAMattrRGline ID:xxx CN:yy "DS:z z z".
+            xxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.
+            Comma separated RG lines correspons to different (comma separated) input files in --readFilesIn. Commas have to be surrounded by spaces, e.g.
+            --outSAMattrRGline ID:xxx , ID:zzz "DS:z z" , ID:yyy DS:yyyy
+
+outSAMheaderHD          -
+    strings: @HD (header) line of the SAM header
+
+outSAMheaderPG          -
+    strings: extra @PG (software) line of the SAM header (in addition to STAR)
+
+outSAMheaderCommentFile -
+    string: path to the file with @CO (comment) lines of the SAM header
+
+outSAMfilter            None
+    string(s): filter the output into main SAM/BAM files
+                        KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genomeFastaFiles at the mapping stage.
+                        KeepAllAddedReferences ...  keep all alignments to the extra reference sequences added with --genomeFastaFiles at the mapping stage.
+
+
+outSAMmultNmax          -1
+    int: max number of multiple alignments for a read that will be output to the SAM/BAM files. Note that if this value is not equal to -1, the top scoring alignment will be output first
+                        -1 ... all alignments (up to --outFilterMultimapNmax) will be output
+
+outSAMtlen              1
+    int: calculation method for the TLEN field in the SAM/BAM files
+                        1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate
+                        2 ... leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends
+
+outBAMcompression       1
+    int: -1 to 10  BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression
+
+outBAMsortingThreadN    0
+    int: >=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN).
+
+outBAMsortingBinsN      50
+    int: >0:  number of genome bins for coordinate-sorting
+
+### BAM processing
+bamRemoveDuplicatesType  -
+    string: mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only
+                        -                       ... no duplicate removal/marking
+                        UniqueIdentical         ... mark all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical
+                        UniqueIdenticalNotMulti  ... mark duplicate unique mappers but not multimappers.
+
+bamRemoveDuplicatesMate2basesN   0
+    int>0: number of bases from the 5' of mate 2 to use in collapsing (e.g. for RAMPAGE)
+
+### Output Wiggle
+outWigType          None
+    string(s): type of signal output, e.g. "bedGraph" OR "bedGraph read1_5p". Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .
+                    1st word:
+                    None       ... no signal output
+                    bedGraph   ... bedGraph format
+                    wiggle     ... wiggle format
+                    2nd word:
+                    read1_5p   ... signal from only 5' of the 1st read, useful for CAGE/RAMPAGE etc
+                    read2      ... signal from only 2nd read
+
+outWigStrand        Stranded
+    string: strandedness of wiggle/bedGraph output
+                    Stranded   ...  separate strands, str1 and str2
+                    Unstranded ...  collapsed strands
+
+outWigReferencesPrefix    -
+    string: prefix matching reference names to include in the output wiggle file, e.g. "chr", default "-" - include all references
+
+outWigNorm              RPM
+    string: type of normalization for the signal
+                        RPM    ... reads per million of mapped reads
+                        None   ... no normalization, "raw" counts
+
+### Output Filtering
+outFilterType                   Normal
+    string: type of filtering
+                                Normal  ... standard filtering using only current alignment
+                                BySJout ... keep only those reads that contain junctions that passed filtering into SJ.out.tab
+
+outFilterMultimapScoreRange     1
+    int: the score range below the maximum score for multimapping alignments
+
+outFilterMultimapNmax           10
+    int: maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value.
+         Otherwise no alignments will be output, and the read will be counted as "mapped to too many loci" in the Log.final.out .
+
+outFilterMismatchNmax           10
+    int: alignment will be output only if it has no more mismatches than this value.
+
+outFilterMismatchNoverLmax      0.3
+    real: alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value.
+
+outFilterMismatchNoverReadLmax  1.0
+    real: alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value.
+
+
+outFilterScoreMin               0
+    int: alignment will be output only if its score is higher than or equal to this value.
+
+outFilterScoreMinOverLread      0.66
+    real: same as outFilterScoreMin, but normalized to read length (sum of mates' lengths for paired-end reads)
+
+outFilterMatchNmin              0
+    int: alignment will be output only if the number of matched bases is higher than or equal to this value.
+
+outFilterMatchNminOverLread     0.66
+    real: sam as outFilterMatchNmin, but normalized to the read length (sum of mates' lengths for paired-end reads).
+
+outFilterIntronMotifs           None
+    string: filter alignment using their motifs
+                None                           ... no filtering
+                RemoveNoncanonical             ... filter out alignments that contain non-canonical junctions
+                RemoveNoncanonicalUnannotated  ... filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept.
+
+outFilterIntronStrands          RemoveInconsistentStrands
+    string: filter alignments
+                RemoveInconsistentStrands      ... remove alignments that have junctions with inconsistent strands
+                None                           ... no filtering
+
+### Output splice junctions (SJ.out.tab)
+outSJtype                       Standard
+    string: type of splice junction output
+                                Standard    ... standard SJ.out.tab output
+                                None        ... no splice junction output
+
+### Output Filtering: Splice Junctions
+outSJfilterReads                All
+    string: which reads to consider for collapsed splice junctions output
+                                All     ... all reads, unique- and multi-mappers
+                                Unique  ... uniquely mapping reads only
+
+outSJfilterOverhangMin          30  12  12  12
+    4 integers:    minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif
+                                does not apply to annotated junctions
+
+outSJfilterCountUniqueMin       3   1   1   1
+    4 integers: minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif
+                                Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied
+                                does not apply to annotated junctions
+
+outSJfilterCountTotalMin     3   1   1   1
+    4 integers: minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif
+                                Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied
+                                does not apply to annotated junctions
+
+outSJfilterDistToOtherSJmin     10  0   5   10
+    4 integers>=0: minimum allowed distance to other junctions' donor/acceptor
+                                does not apply to annotated junctions
+
+outSJfilterIntronMaxVsReadN        50000 100000 200000
+    N integers>=0: maximum gap allowed for junctions supported by 1,2,3,,,N reads
+                                i.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax
+                                does not apply to annotated junctions
+
+### Scoring
+scoreGap                     0
+    int: splice junction penalty (independent on intron motif)
+
+scoreGapNoncan               -8
+    int: non-canonical junction penalty (in addition to scoreGap)
+
+scoreGapGCAG                 -4
+    int: GC/AG and CT/GC junction penalty (in addition to scoreGap)
+
+scoreGapATAC                 -8
+    int: AT/AC  and GT/AT junction penalty  (in addition to scoreGap)
+
+scoreGenomicLengthLog2scale   -0.25
+    int: extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)
+
+scoreDelOpen                 -2
+    int: deletion open penalty
+
+scoreDelBase                 -2
+    int: deletion extension penalty per base (in addition to scoreDelOpen)
+
+scoreInsOpen                 -2
+    int: insertion open penalty
+
+scoreInsBase                 -2
+    int: insertion extension penalty per base (in addition to scoreInsOpen)
+
+scoreStitchSJshift           1
+    int: maximum score reduction while searching for SJ boundaries in the stitching step
+
+
+### Alignments and Seeding
+
+seedSearchStartLmax             50
+    int>0: defines the search start point through the read - the read is split into pieces no longer than this value
+
+seedSearchStartLmaxOverLread    1.0
+    real: seedSearchStartLmax normalized to read length (sum of mates' lengths for paired-end reads)
+
+seedSearchLmax       0
+    int>=0: defines the maximum length of the seeds, if =0 seed length is not limited
+
+seedMultimapNmax      10000
+    int>0: only pieces that map fewer than this value are utilized in the stitching procedure
+
+seedPerReadNmax       1000
+    int>0: max number of seeds per read
+
+seedPerWindowNmax     50
+    int>0: max number of seeds per window
+
+seedNoneLociPerWindow    10
+    int>0: max number of one seed loci per window
+
+seedSplitMin                12
+    int>0: min length of the seed sequences split by Ns or mate gap
+
+seedMapMin              5
+    int>0: min length of seeds to be mapped
+
+alignIntronMin              21
+    int: minimum intron size, genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion
+
+alignIntronMax              0
+    int: maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins
+
+alignMatesGapMax            0
+    int: maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins
+
+alignSJoverhangMin          5
+    int>0: minimum overhang (i.e. block size) for spliced alignments
+
+alignSJstitchMismatchNmax   0 -1 0 0
+    4*int>=0: maximum number of mismatches for stitching of the splice junctions (-1: no limit).
+                            (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif.
+
+alignSJDBoverhangMin        3
+    int>0: minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments
+
+alignSplicedMateMapLmin     0
+    int>0: minimum mapped length for a read mate that is spliced
+
+alignSplicedMateMapLminOverLmate 0.66
+    real>0: alignSplicedMateMapLmin normalized to mate length
+
+alignWindowsPerReadNmax     10000
+    int>0: max number of windows per read
+
+alignTranscriptsPerWindowNmax       100
+    int>0: max number of transcripts per window
+
+alignTranscriptsPerReadNmax               10000
+    int>0: max number of different alignments per read to consider
+
+alignEndsType           Local
+    string: type of read ends alignment
+                        Local             ... standard local alignment with soft-clipping allowed
+                        EndToEnd          ... force end-to-end read alignment, do not soft-clip
+                        Extend5pOfRead1   ... fully extend only the 5p of the read1, all other ends: local alignment
+                        Extend5pOfReads12 ... fully extend only the 5p of the both read1 and read2, all other ends: local alignment
+
+alignEndsProtrude       0    ConcordantPair
+    int, string:        allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate
+                        1st word: int: maximum number of protrusion bases allowed
+                        2nd word: string:
+                                            ConcordantPair ... report alignments with non-zero protrusion as concordant pairs
+                                            DiscordantPair ... report alignments with non-zero protrusion as discordant pairs
+
+alignSoftClipAtReferenceEnds    Yes
+    string: allow the soft-clipping of the alignments past the end of the chromosomes
+                                Yes ... allow
+                                No  ... prohibit, useful for compatibility with Cufflinks
+
+alignInsertionFlush     None
+    string: how to flush ambiguous insertion positions
+                        None    ... insertions are not flushed
+                        Right   ... insertions are flushed to the right
+
+### Paired-End reads
+peOverlapNbasesMin          0
+    int>=0:             minimum number of overlapping bases to trigger mates merging and realignment. Specify >0 value to switch on the "merginf of overlapping mates" algorithm.
+
+peOverlapMMp                0.01
+    real, >=0 & <1:     maximum proportion of mismatched bases in the overlap area
+
+### Windows, Anchors, Binning
+
+winAnchorMultimapNmax           50
+    int>0: max number of loci anchors are allowed to map to
+
+winBinNbits                     16
+    int>0: =log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins.
+
+winAnchorDistNbins              9
+    int>0: max number of bins between two anchors that allows aggregation of anchors into one window
+
+winFlankNbins                   4
+    int>0: log2(winFlank), where win Flank is the size of the left and right flanking regions for each window
+
+winReadCoverageRelativeMin      0.5
+    real>=0: minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only.
+
+winReadCoverageBasesMin      0
+    int>0: minimum number of bases covered by the seeds in a window , for STARlong algorithm only.
+
+### Chimeric Alignments
+chimOutType                 Junctions
+    string(s): type of chimeric output
+                            Junctions       ... Chimeric.out.junction
+                            SeparateSAMold  ... output old SAM into separate Chimeric.out.sam file
+                            WithinBAM       ... output into main aligned BAM files (Aligned.*.bam)
+                            WithinBAM HardClip  ... (default) hard-clipping in the CIGAR for supplemental chimeric alignments (default if no 2nd word is present)
+                            WithinBAM SoftClip  ... soft-clipping in the CIGAR for supplemental chimeric alignments
+
+chimSegmentMin              0
+    int>=0: minimum length of chimeric segment length, if ==0, no chimeric output
+
+chimScoreMin                0
+    int>=0: minimum total (summed) score of the chimeric segments
+
+chimScoreDropMax            20
+    int>=0: max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length
+
+chimScoreSeparation         10
+    int>=0: minimum difference (separation) between the best chimeric score and the next one
+
+chimScoreJunctionNonGTAG    -1
+    int: penalty for a non-GT/AG chimeric junction
+
+chimJunctionOverhangMin     20
+    int>=0: minimum overhang for a chimeric junction
+
+chimSegmentReadGapMax       0
+    int>=0: maximum gap in the read sequence between chimeric segments
+
+chimFilter                  banGenomicN
+    string(s): different filters for chimeric alignments
+                            None ... no filtering
+                            banGenomicN ... Ns are not allowed in the genome sequence around the chimeric junction
+
+chimMainSegmentMultNmax        10
+    int>=1: maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments.
+
+chimMultimapNmax                    0
+    int>=0: maximum number of chimeric multi-alignments
+                                0 ... use the old scheme for chimeric detection which only considered unique alignments
+
+chimMultimapScoreRange          1
+    int>=0: the score range for multi-mapping chimeras below the best chimeric score. Only works with --chimMultimapNmax > 1
+
+chimNonchimScoreDropMin         20
+    int>=0: to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value
+
+chimOutJunctionFormat           0
+    int: formatting type for the Chimeric.out.junction file
+                                0 ... no comment lines/headers
+                                1 ... comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping
+
+### Quantification of Annotations
+quantMode                   -
+    string(s): types of quantification requested
+                            -                ... none
+                            TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate file
+                            GeneCounts       ... count reads per gene
+
+quantTranscriptomeBAMcompression    1
+    int: -2 to 10  transcriptome BAM compression level
+                            -2  ... no BAM output
+                            -1  ... default compression (6?)
+                             0  ... no compression
+                             10 ... maximum compression
+
+quantTranscriptomeSAMoutput BanSingleEnd_BanIndels_ExtendSoftclip
+    string: alignment filtering for TranscriptomeSAM output
+                            BanSingleEnd_BanIndels_ExtendSoftclip ... prohibit indels and single-end alignments, extend softclips - compatible with RSEM
+                            BanSingleEnd               ... prohibit single-end alignments, allow indels and softclips
+                            BanSingleEnd_ExtendSoftclip ... prohibit single-end alignments, extend softclips, allow indels
+
+
+### 2-pass Mapping
+twopassMode                 None
+    string: 2-pass mapping mode.
+                            None        ... 1-pass mapping
+                            Basic       ... basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly
+
+twopass1readsN              -1
+    int: number of reads to process for the 1st step. Use very large number (or default -1) to map all reads in the first step.
+
+
+### WASP parameters
+waspOutputMode              None
+    string: WASP allele-specific output type. This is re-implementation of the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061–1063 (2015), https://www.nature.com/articles/nmeth.3582 .
+                            SAMtag      ... add WASP tags to the alignments that pass WASP filtering
+
+### STARsolo (single cell RNA-seq) parameters
+soloType                    None
+    string(s): type of single-cell RNA-seq
+                            CB_UMI_Simple   ... (a.k.a. Droplet) one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X Chromium.
+                            CB_UMI_Complex  ... multiple Cell Barcodes of varying length, one UMI of fixed length and one adapter sequence of fixed length are allowed in read2 only (e.g. inDrop, ddSeq).
+                            CB_samTagOut    ... output Cell Barcode as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or SortedByCoordinate]
+                            SmartSeq        ... Smart-seq: each cell in a separate FASTQ (paired- or single-end), barcodes are corresponding read-groups, no UMI sequences, alignments deduplicated according to alignment start and end (after extending soft-clipped bases)
+
+soloCBtype                  Sequence
+    string: cell barcode type
+                            Sequence: cell barcode is a sequence (standard option)
+                            String: cell barcode is an arbitrary string
+
+soloCBwhitelist             -
+    string(s): file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex allows more than one whitelist file.
+                            None            ... no whitelist: all cell barcodes are allowed
+
+soloCBstart                 1
+    int>0: cell barcode start base
+
+soloCBlen                   16
+    int>0: cell barcode length
+
+soloUMIstart                17
+    int>0: UMI start base
+
+soloUMIlen                  10
+    int>0: UMI length
+
+soloBarcodeReadLength       1
+    int: length of the barcode read
+                            1   ... equal to sum of soloCBlen+soloUMIlen
+                            0   ... not defined, do not check
+
+soloBarcodeMate             0
+    int: identifies which read mate contains the barcode (CB+UMI) sequence
+                            0   ... barcode sequence is on separate read, which should always be the last file in the --readFilesIn listed
+                            1   ... barcode sequence is a part of mate 1
+                            2   ... barcode sequence is a part of mate 2
+
+soloCBposition              -
+    strings(s):             position of Cell Barcode(s) on the barcode read.
+                            Presently only works with --soloType CB_UMI_Complex, and barcodes are assumed to be on Read2.
+                            Format for each barcode: startAnchor_startPosition_endAnchor_endPosition
+                            start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter start; 3: adapter end
+                            start(end)Position is the 0-based position with of the CB start(end) with respect to the Anchor Base
+                            String for different barcodes are separated by space.
+                            Example: inDrop (Zilionis et al, Nat. Protocols, 2017):
+                            --soloCBposition  0_0_2_-1  3_1_3_8
+
+soloUMIposition             -
+    string:                  position of the UMI on the barcode read, same as soloCBposition
+                            Example: inDrop (Zilionis et al, Nat. Protocols, 2017):
+                            --soloCBposition  3_9_3_14
+
+soloAdapterSequence         -
+    string:                 adapter sequence to anchor barcodes. Only one adapter sequence is allowed.
+
+soloAdapterMismatchesNmax   1
+    int>0:                  maximum number of mismatches allowed in adapter sequence.
+
+soloCBmatchWLtype           1MM_multi
+    string:                 matching the Cell Barcodes to the WhiteList
+                            Exact                           ... only exact matches allowed
+                            1MM                             ... only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match.
+                            1MM_multi                       ... multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches.
+                                                                Allowed CBs have to have at least one read with exact match. This option matches best with CellRanger 2.2.0
+                            1MM_multi_pseudocounts          ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.
+                            1MM_multi_Nbase_pseudocounts    ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for CBs with N-bases. This option matches best with CellRanger >= 3.0.0
+                            EditDist_2                    ... allow up to edit distance of 3 fpr each of the barcodes. May include one deletion + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline. 
+
+soloInputSAMattrBarcodeSeq  -
+    string(s):              when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order).
+                            For instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq CR UR .
+                            This parameter is required when running STARsolo with input from SAM. 
+    
+soloInputSAMattrBarcodeQual  -
+    string(s):              when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order).
+                            For instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual CY UY .
+                            If this parameter is '-' (default), the quality 'H' will be assigned to all bases.
+
+soloStrand                  Forward
+    string: strandedness of the solo libraries:
+                            Unstranded  ... no strand information
+                            Forward     ... read strand same as the original RNA molecule
+                            Reverse     ... read strand opposite to the original RNA molecule
+
+soloFeatures                Gene
+    string(s): genomic features for which the UMI counts per Cell Barcode are collected
+                            Gene            ... genes: reads match the gene transcript
+                            SJ              ... splice junctions: reported in SJ.out.tab
+                            GeneFull        ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns
+                            GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns: prioritize 100% overlap with exons
+                            GeneFull_Ex50pAS        ... full gene (pre-RNA): count all reads overlapping genes' exons and introns: prioritize >50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction.
+
+#####UnderDevelopment_begin : not supported - do not use
+                            Transcript3p    ... quantification of transcript for 3' protocols
+#####UnderDevelopment_end
+
+soloMultiMappers            Unique
+    string(s): counting method for reads mapping to multiple genes           
+                            Unique     ... count only reads that map to unique genes
+                            Uniform    ... uniformly distribute multi-genic UMIs to all genes
+                            Rescue     ... distribute UMIs proportionally to unique+uniform counts (~ first iteration of EM)
+                            PropUnique ... distribute UMIs proportionally to unique mappers, if present, and uniformly if not.
+                            EM         ... multi-gene UMIs are distributed using Expectation Maximization algorithm
+
+soloUMIdedup                1MM_All
+    string(s):              type of UMI deduplication (collapsing) algorithm
+                            1MM_All                     ... all UMIs with 1 mismatch distance to each other are collapsed (i.e. counted once).
+                            1MM_Directional_UMItools    ... follows the "directional" method from the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017).
+                            1MM_Directional             ... same as 1MM_Directional_UMItools, but with more stringent criteria for duplicate UMIs
+                            Exact                       ... only exactly matching UMIs are collapsed.
+                            NoDedup                     ... no deduplication of UMIs, count all reads.
+                            1MM_CR                      ... CellRanger2-4 algorithm for 1MM UMI collapsing.
+
+soloUMIfiltering            -
+    string(s):              type of UMI filtering (for reads uniquely mapping to genes)
+                            -                  ... basic filtering: remove UMIs with N and homopolymers (similar to CellRanger 2.2.0).
+                            MultiGeneUMI       ... basic + remove lower-count UMIs that map to more than one gene.
+                            MultiGeneUMI_All   ... basic + remove all UMIs that map to more than one gene.
+                            MultiGeneUMI_CR    ... basic + remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0 .
+                                                   Only works with --soloUMIdedup 1MM_CR
+                                                
+soloOutFileNames            Solo.out/          features.tsv barcodes.tsv        matrix.mtx
+    string(s):              file names for STARsolo output:
+                            file_name_prefix   gene_names   barcode_sequences   cell_feature_count_matrix
+
+soloCellFilter              CellRanger2.2 3000 0.99 10
+    string(s):              cell filtering type and parameters
+                            None            ... do not output filtered cells
+                            TopCells        ... only report top cells by UMI count, followed by the exact number of cells
+                            CellRanger2.2   ... simple filtering of CellRanger 2.2. 
+                                                Can be followed by numbers: number of expected cells, robust maximum percentile for UMI count, maximum to minimum ratio for UMI count
+                                                The harcoded values are from CellRanger: nExpectedCells=3000;  maxPercentile=0.99;  maxMinRatio=10
+                            EmptyDrops_CR   ... EmptyDrops filtering in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y
+                                                Can be followed by 10 numeric parameters:  nExpectedCells   maxPercentile   maxMinRatio   indMin   indMax   umiMin   umiMinFracMedian   candMaxN   FDR   simN 
+                                                The harcoded values are from CellRanger:             3000            0.99            10    45000    90000      500               0.01      20000  0.01  10000
+
+soloOutFormatFeaturesGeneField3    "Gene Expression"
+    string(s):                field 3 in the Gene features.tsv file. If "-", then no 3rd field is output.
+
+soloCellReadStats           None
+    string:                 Output reads statistics for each CB
+                            Standard    ... standard output
+
+#####UnderDevelopment_begin : not supported - do not use
+soloClusterCBfile           -
+    string:                 file containing the cluster information for cell barcodes, two columns: CB cluster_index. Only used with --soloFeatures Transcript3p
+#####UnderDevelopment_end
diff --git a/src/star/star_genome_generate/script.sh b/src/star/star_genome_generate/script.sh
new file mode 100644
index 00000000..cb3b906c
--- /dev/null
+++ b/src/star/star_genome_generate/script.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+set -e
+
+## VIASH START
+## VIASH END
+
+mkdir -p $par_index
+
+STAR \
+    --runMode genomeGenerate \
+    --genomeDir $par_index \
+    --genomeFastaFiles $par_genomeFastaFiles \
+    ${meta_cpus:+--runThreadN "${meta_cpus}"} \
+    ${par_sjdbGTFfile:+--sjdbGTFfile "${par_sjdbGTFfile}"} \
+    ${par_sjdbOverhang:+--sjdbOverhang "${par_sjdbOverhang}"} \
+    ${par_genomeSAindexNbases:+--genomeSAindexNbases "${par_genomeSAindexNbases}"} \
+    ${par_sjdbGTFchrPrefix:+--sjdbGTFchrPrefix "${par_sjdbGTFchrPrefix}"} \
+    ${par_sjdbGTFfeatureExon:+--sjdbGTFfeatureExon "${par_sjdbGTFfeatureExon}"} \
+    ${par_sjdbGTFtagExonParentTranscript:+--sjdbGTFtagExonParentTranscript "${par_sjdbGTFtagExonParentTranscript}"} \
+    ${par_sjdbGTFtagExonParentGene:+--sjdbGTFtagExonParentGene "${par_sjdbGTFtagExonParentGene}"} \
+    ${par_sjdbGTFtagExonParentGeneName:+--sjdbGTFtagExonParentGeneName "${par_sjdbGTFtagExonParentGeneName}"} \
+    ${par_sjdbGTFtagExonParentGeneType:+--sjdbGTFtagExonParentGeneType "${sjdbGTFtagExonParentGeneType}"} \
+    ${par_limitGenomeGenerateRAM:+--limitGenomeGenerateRAM "${par_limitGenomeGenerateRAM}"} \
+    ${par_genomeChrBinNbits:+--genomeChrBinNbits "${par_genomeChrBinNbits}"} \
+    ${par_genomeSAsparseD:+--genomeSAsparseD "${par_genomeSAsparseD}"} \
+    ${par_genomeSuffixLengthMax:+--genomeSuffixLengthMax "${par_genomeSuffixLengthMax}"} \
+    ${par_genomeTransformType:+--genomeTransformType "${par_genomeTransformType}"} \
+    ${par_genomeTransformVCF:+--genomeTransformVCF "${par_genomeTransformVCF}"} \
diff --git a/src/star/star_genome_generate/test.sh b/src/star/star_genome_generate/test.sh
new file mode 100644
index 00000000..fd0e4775
--- /dev/null
+++ b/src/star/star_genome_generate/test.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+set -e
+
+## VIASH START
+## VIASH END
+
+#########################################################################################
+
+echo "> Prepare test data"
+
+cat > genome.fasta <<'EOF'
+>chr1
+TGGCATGAGCCAACGAACGCTGCCTCATAAGCCTCACACATCCGCGCCTATGTTGTGACTCTCTGTGAGCGTTCGTGGG
+GCTCGTCACCACTATGGTTGGCCGGTTAGTAGTGTGACTCCTGGTTTTCTGGAGCTTCTTTAAACCGTAGTCCAGTCAA
+TGCGAATGGCACTTCACGACGGACTGTCCTTAGCTCAGGGGA
+EOF
+
+cat > genes.gtf <<'EOF'
+chr1    example_source  gene    0    50   .   +   .   gene_id "gene1"; transcript_id "transcript1";
+chr1    example_source  exon    20   40   .   +   .   gene_id "gene1"; transcript_id "transcript1"; 
+EOF
+
+#########################################################################################
+
+echo "> Generate index"
+"$meta_executable" \
+  ${meta_cpus:+---cpus $meta_cpus} \
+  --index "star_index/" \
+  --genomeFastaFiles "genome.fasta" \
+  --sjdbGTFfile "genes.gtf" \
+  --genomeSAindexNbases 2
+
+files=("Genome" "Log.out" "SA" "SAindex" "chrLength.txt" "chrName.txt" "chrNameLength.txt" "chrStart.txt" "exonGeTrInfo.tab" "exonInfo.tab" "geneInfo.tab" "genomeParameters.txt" "sjdbInfo.txt" "sjdbList.fromGTF.out.tab" "sjdbList.out.tab" "transcriptInfo.tab")
+
+echo ">> Check if output exists"
+[ ! -d "star_index" ] && echo "Directory 'star_index' does not exist!" && exit 1
+for file in "${files[@]}"; do
+    [ ! -f "star_index/$file" ] && echo "File '$file' does not exist in 'star_index'." && exit 1
+done
+
+echo ">> Check contents of output files"
+grep -q "200" "star_index/chrLength.txt" || (echo "Chromosome length in file 'chrLength.txt' is incorrect! " && exit 1)
+grep -q "chr1" "star_index/chrName.txt" || (echo "Chromosome name in file 'chrName.txt' is incorrect! " && exit 1)
+grep -q "chr1	200" "star_index/chrNameLength.txt" || (echo "Chromosome name in file 'chrNameLength.txt' is incorrect! " && exit 1)
+
+echo ">>> Test finished successfully"
+exit 0

From 528c08c9324440b4fb6eb94fba1d0a5e17a88945 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Mon, 24 Jun 2024 10:10:28 +0200
Subject: [PATCH 09/23] fix package config (#65)

---
 README.md   | 10 +++++-----
 _viash.yaml |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 984a1929..4b497dcd 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,9 @@
 # 🌱📦 biobox
 
 [![ViashHub](https://img.shields.io/badge/ViashHub-biobox-7a4baa.png)](https://web.viash-hub.com/packages/biobox)
-[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2Fbiobox-blue.png)](https://github.com/viash-hub/biobbox)
+[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2Fbiobox-blue.png)](https://github.com/viash-hub/biobox)
 [![GitHub
-License](https://img.shields.io/github/license/viash-hub/biobox.png)](https://github.com/viash-hub/biobbox/blob/main/LICENSE)
+License](https://img.shields.io/github/license/viash-hub/biobox.png)](https://github.com/viash-hub/biobox/blob/main/LICENSE)
 [![GitHub
 Issues](https://img.shields.io/github/issues/viash-hub/biobox.png)](https://github.com/viash-hub/biobox/issues)
 [![Viash
@@ -53,7 +53,7 @@ contribute a component to this repository.
 13. Create a `/var/software_versions.txt` file
 
 See the
-[CONTRIBUTING](https://github.com/viash-hub/biobbox/blob/main/CONTRIBUTING.md)
+[CONTRIBUTING](https://github.com/viash-hub/biobox/blob/main/CONTRIBUTING.md)
 file for more details.
 
 ## Support and Community
@@ -63,10 +63,10 @@ For support, questions, or to join our community:
 - **Issues**: Submit questions or issues via the [GitHub issue
   tracker](https://github.com/viash-hub/biobox/issues).
 - **Discussions**: Join our discussions via [GitHub
-  Discussions](https://github.com/viash-hub/biobbox/discussions).
+  Discussions](https://github.com/viash-hub/biobox/discussions).
 
 ## License
 
 This repository is licensed under an MIT license. See the
-[LICENSE](https://github.com/viash-hub/biobbox/blob/main/LICENSE) file
+[LICENSE](https://github.com/viash-hub/biobox/blob/main/LICENSE) file
 for details.
diff --git a/_viash.yaml b/_viash.yaml
index 12b81586..9a240c24 100644
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -5,7 +5,7 @@ license: MIT
 keywords: [bioinformatics, modules, sequencing]
 links:
   issue_tracker: https://github.com/viash-hub/biobox/issues
-  repository: https://github.com/viash-hub/biobbox
+  repository: https://github.com/viash-hub/biobox
 
 viash_version: 0.9.0-RC6
 

From d0c648fb7eefe067f5b5b3d402a204354bb37198 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Mon, 24 Jun 2024 10:15:10 +0200
Subject: [PATCH 10/23] Delete src/bgzip directory (#64)

It was moved to toolbox
---
 src/bgzip/config.vsh.yaml     | 128 ----------------------------------
 src/bgzip/help.txt            |  22 ------
 src/bgzip/test_data/script.sh |  10 ---
 src/bgzip/test_data/test.vcf  |  23 ------
 4 files changed, 183 deletions(-)
 delete mode 100644 src/bgzip/config.vsh.yaml
 delete mode 100644 src/bgzip/help.txt
 delete mode 100644 src/bgzip/test_data/script.sh
 delete mode 100644 src/bgzip/test_data/test.vcf

diff --git a/src/bgzip/config.vsh.yaml b/src/bgzip/config.vsh.yaml
deleted file mode 100644
index 26e31ae4..00000000
--- a/src/bgzip/config.vsh.yaml
+++ /dev/null
@@ -1,128 +0,0 @@
-name: bgzip
-description: Block compression/decompression utility
-links:
-  homepage: https://www.htslib.org/
-  documentation: https://www.htslib.org/doc/bgzip.html
-  repository: https://github.com/samtools/htslib
-references:
-  doi: 10.1093/gigascience/giab007
-license: MIT
-requirements:
-  commands: [ bgzip ]
-argument_groups:
-  - name: Inputs
-    arguments:
-    - name: --input
-      type: file
-      direction: input
-      description: file to be compressed or decompressed
-      required: true
-  - name: Outputs
-    arguments:
-    - name: --output
-      type: file
-      direction: output
-      description: compressed or decompressed output
-      required: true
-    - name: --index_name
-      alternatives: -I
-      type: file
-      direction: output
-      description: name of BGZF index file [file.gz.gzi]
-  - name: Arguments
-    arguments:
-    - name: --offset
-      alternatives: -b
-      type: integer
-      description: decompress at virtual file pointer (0-based uncompressed offset)
-    - name: --decompress
-      alternatives: -d
-      type: boolean_true
-      description: decompress the input file
-    - name: --rebgzip
-      alternatives: -g
-      type: boolean_true
-      description: use an index file to bgzip a file
-    - name: --index
-      alternatives: -i
-      type: boolean_true
-      description: compress and create BGZF index
-    - name: --compress_level
-      alternatives: -l
-      type: integer
-      description: compression level to use when compressing; 0 to 9, or -1 for default [-1]
-      min: -1
-      max: 9
-    - name: --reindex
-      alternatives: -r
-      type: boolean_true
-      description: (re)index the output file
-    - name: --size
-      alternatives: -s
-      type: integer
-      description: decompress INT bytes (uncompressed size)
-      min: 0
-    - name: --test
-      alternatives: -t
-      type: boolean_true
-      description: test integrity of compressed file
-    - name: --binary
-      type: boolean_true
-      description: Don't align blocks with text lines
-resources:
-  - type: bash_script
-    text: |
-      [[ "$par_decompress" == "false" ]] && unset par_decompress
-      [[ "$par_rebgzip" == "false" ]] && unset par_rebgzip
-      [[ "$par_index" == "false" ]] && unset par_index
-      [[ "$par_reindex" == "false" ]] && unset par_reindex
-      [[ "$par_test" == "false" ]] && unset par_test
-      [[ "$par_binary" == "false" ]] && unset par_binary
-      bgzip -c \
-        ${meta_cpus:+--threads "${meta_cpus}"} \
-        ${par_offset:+-b "${par_offset}"} \
-        ${par_decompress:+-d} \
-        ${par_rebgzip:+-g} \
-        ${par_index:+-i} \
-        ${par_index_name:+-I "${par_index_name}"} \
-        ${par_compress_level:+-l "${par_compress_level}"} \
-        ${par_reindex:+-r} \
-        ${par_size:+-s "${par_size}"} \
-        ${par_test:+-t} \
-        ${par_binary:+--binary} \
-        "$par_input" > "$par_output"
-test_resources:
-  - type: bash_script
-    text: |
-      set -e
-
-      "$meta_executable" --input "$meta_resources_dir/test_data/test.vcf" --output "test.vcf.gz"
-
-      echo ">> Checking output of compressing"
-      [ ! -f "test.vcf.gz" ] && echo "Output file test.vcf.gz does not exist" && exit 1
-
-      "$meta_executable" --input "test.vcf.gz" --output "test.vcf" --decompress
-
-      echo ">> Checking output of decompressing"
-      [ ! -f "test.vcf" ] && echo "Output file test.vcf does not exist" && exit 1
-
-      echo ">> Checking original and decompressed files are the same"
-      set +e
-      cmp --silent -- "$meta_resources_dir/test_data/test.vcf" "test.vcf"
-      [ $? -ne 0 ] && echo "files are different" && exit 1
-      set -e
-      
-      echo "> Test successful"
-  - type: file
-    path: test_data
-
-engines:
-  - type: docker
-    image: quay.io/biocontainers/htslib:1.19--h81da01d_0
-    setup:
-      - type: docker
-        run: |
-          bgzip -h | grep 'Version:' 2>&1 |  sed 's/Version:\s\(.*\)/bgzip: "\1"/' > /var/software_versions.txt
-runners:
-  - type: executable
-  - type: nextflow
\ No newline at end of file
diff --git a/src/bgzip/help.txt b/src/bgzip/help.txt
deleted file mode 100644
index d4012efd..00000000
--- a/src/bgzip/help.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-```bash
-bgzip -h
-```
-
-Version: 1.19
-Usage:   bgzip [OPTIONS] [FILE] ...
-Options:
-   -b, --offset INT           decompress at virtual file pointer (0-based uncompressed offset)
-   -c, --stdout               write on standard output, keep original files unchanged
-   -d, --decompress           decompress
-   -f, --force                overwrite files without asking
-   -g, --rebgzip              use an index file to bgzip a file
-   -h, --help                 give this help
-   -i, --index                compress and create BGZF index
-   -I, --index-name FILE      name of BGZF index file [file.gz.gzi]
-   -k, --keep                 don't delete input files during operation
-   -l, --compress-level INT   Compression level to use when compressing; 0 to 9, or -1 for default [-1]
-   -r, --reindex              (re)index compressed file
-   -s, --size INT             decompress INT bytes (uncompressed size)
-   -t, --test                 test integrity of compressed file
-       --binary               Don't align blocks with text lines
-   -@, --threads INT          number of compression threads to use [1]
diff --git a/src/bgzip/test_data/script.sh b/src/bgzip/test_data/script.sh
deleted file mode 100644
index c9114473..00000000
--- a/src/bgzip/test_data/script.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-# bgzip test data
-
-# Test data was obtained from https://github.com/snakemake/snakemake-wrappers/tree/master/bio/bgzip/test.
-
-if [ ! -d /tmp/snakemake-wrappers ]; then
-  git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers
-fi
-
-cp -r /tmp/snakemake-wrappers/bio/bgzip/test/* src/bgzip/test_data
-
diff --git a/src/bgzip/test_data/test.vcf b/src/bgzip/test_data/test.vcf
deleted file mode 100644
index 11b5400e..00000000
--- a/src/bgzip/test_data/test.vcf
+++ /dev/null
@@ -1,23 +0,0 @@
-##fileformat=VCFv4.0
-##fileDate=20090805
-##source=https://www.internationalgenome.org/wiki/Analysis/vcf4.0/
-##reference=1000GenomesPilot-NCBI36
-##phasing=partial
-##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
-##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
-##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
-##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
-##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
-##FILTER=<ID=q10,Description="Quality below 10">
-##FILTER=<ID=s50,Description="Less than 50% of samples have data">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
-#CHROM POS     ID        REF ALT    QUAL FILTER INFO                              FORMAT      NA00001        NA00002        NA00003
-20     14370   rs6054257 G      A       29   PASS   NS=3;DP=14;AF=0.5;DB;H2           GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
-20     17330   .         T      A       3    q10    NS=3;DP=11;AF=0.017               GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3   0/0:41:3
-20     1110696 rs6040355 A      G,T     67   PASS   NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2   2/2:35:4
-20     1230237 .         T      .       47   PASS   NS=3;DP=13;AA=T                   GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2
-20     1234567 microsat1 GTCT   G,GTACT 50   PASS   NS=3;DP=9;AA=G                    GT:GQ:DP    0/1:35:4       0/2:17:2       1/1:40:3

From 3481750f6e2d77defa28423d832ff251da3a1f9e Mon Sep 17 00:00:00 2001
From: Sai Nirmayi Yasa <92786623+sainirmayi@users.noreply.github.com>
Date: Mon, 24 Jun 2024 11:11:53 +0200
Subject: [PATCH 11/23] Output alignments to the transcriptome (#56)

* Output alignments to  the transcriptome

* Change argument name
---
 src/star/star_align_reads/config.vsh.yaml | 6 ++++++
 src/star/star_align_reads/script.py       | 3 ++-
 src/star/star_align_reads/test.sh         | 3 +++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/star/star_align_reads/config.vsh.yaml b/src/star/star_align_reads/config.vsh.yaml
index 8fdd5256..eab65b35 100644
--- a/src/star/star_align_reads/config.vsh.yaml
+++ b/src/star/star_align_reads/config.vsh.yaml
@@ -72,6 +72,12 @@ argument_groups:
         description: The output file containing the splice junctions.
         direction: output
         example: splice_junctions.tsv
+      - type: file
+        name: --reads_aligned_to_transcriptome
+        required: false
+        description: The output file containing the alignments to transcriptome in BAM formats. This file is generated when --quantMode is set to TranscriptomeSAM.
+        direction: output
+        example: transcriptome_aligned.bam
 # other arguments are defined in a separate file
 __merge__: argument_groups.yaml
 resources:
diff --git a/src/star/star_align_reads/script.py b/src/star/star_align_reads/script.py
index 2bde8798..f3d64a57 100644
--- a/src/star/star_align_reads/script.py
+++ b/src/star/star_align_reads/script.py
@@ -58,7 +58,8 @@
     "log": "Log.final.out",
     "splice_junctions": "SJ.out.tab",
     "unmapped": "Unmapped.out.mate1",
-    "unmapped_r2": "Unmapped.out.mate2"
+    "unmapped_r2": "Unmapped.out.mate2", 
+    "reads_aligned_to_transcriptome": "Aligned.toTranscriptome.out.bam"
 }
 output_paths = {name: par[name] for name in expected_outputs.keys()}
 for name in expected_outputs.keys():
diff --git a/src/star/star_align_reads/test.sh b/src/star/star_align_reads/test.sh
index 374b9014..a15ea599 100644
--- a/src/star/star_align_reads/test.sh
+++ b/src/star/star_align_reads/test.sh
@@ -98,6 +98,7 @@ echo "> Run star_align_reads on SE"
   --reads_per_gene "reads_per_gene.tsv" \
   --outSJtype Standard \
   --splice_junctions "splice_junctions.tsv" \
+  --reads_aligned_to_transcriptome "transcriptome_aligned.bam" \
   ${meta_cpus:+---cpus $meta_cpus}
 
 # TODO: Test data doesn't contain any chimeric reads yet
@@ -111,6 +112,7 @@ assert_file_exists "reads_per_gene.tsv"
 # assert_file_exists "chimeric_junctions.tsv"
 assert_file_exists "splice_junctions.tsv"
 assert_file_exists "unmapped.sam"
+assert_file_exists "transcriptome_aligned.bam"
 
 echo ">> Check if output contents are not empty"
 assert_file_not_empty "output.sam"
@@ -119,6 +121,7 @@ assert_file_not_empty "reads_per_gene.tsv"
 # assert_file_not_empty "chimeric_junctions.tsv"
 # assert_file_not_empty "splice_junctions.tsv" # TODO: test data doesn't contain any splice junctions yet
 assert_file_not_empty "unmapped.sam"
+assert_file_not_empty "transcriptome_aligned.bam"
 
 echo ">> Check if output contents are correct"
 assert_file_contains "log.txt" "Number of input reads \\|	2"

From 3e08b5983fe0fd7d9c3b222044363e760bf6dd4a Mon Sep 17 00:00:00 2001
From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com>
Date: Mon, 1 Jul 2024 12:10:23 +0200
Subject: [PATCH 12/23] BUG: pear component failure is ignored (#70)

---
 CHANGELOG.md       | 8 +++++++-
 src/pear/script.sh | 2 ++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c89bb9b4..bfe2b1b6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,10 @@
-# biobox unreleased
+# biobox x.x.x
+
+## BUG FIXES
+
+* `pear`: fix component not exiting with the correct exitcode when PEAR fails.
+
+# biobox 0.1.0
 
 ## BREAKING CHANGES
 
diff --git a/src/pear/script.sh b/src/pear/script.sh
index f7d6a28f..9eff147b 100644
--- a/src/pear/script.sh
+++ b/src/pear/script.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+set -eo pipefail
+
 ## VIASH START
 ## VIASH END
 

From 1f076bdff73cb9af8151ae3b014f113e69e66f4d Mon Sep 17 00:00:00 2001
From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com>
Date: Mon, 1 Jul 2024 20:35:08 +0200
Subject: [PATCH 13/23] FEAT + BUG: cutadapt; allowing disabling demultiplexing
 and fix par_quality_cutoff_r2 (#69)

* FEAT: Disable cutadapt demultiplexing by default

* Cutadapt: fix --par_quality_cutoff_r2
---
 CHANGELOG.md                 |  5 +++++
 src/cutadapt/config.vsh.yaml | 18 ++++++++++++++++++
 src/cutadapt/script.sh       | 31 ++++++++++++++++++++++++++-----
 src/cutadapt/test.sh         |  5 +++++
 4 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bfe2b1b6..b5c403af 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,10 @@
 
 * `pear`: fix component not exiting with the correct exitcode when PEAR fails.
 
+* `cutadapt`: fix `--par_quality_cutoff_r2` argument.
+
+* `cutadapt`: demultiplexing is now disabled by default. It can be re-enabled by using `demultiplex_mode`.
+
 # biobox 0.1.0
 
 ## BREAKING CHANGES
@@ -12,6 +16,7 @@
   Viash 0.9.0 in order to avoid issues with the current default separator `:` unintentionally
   splitting up certain file paths.
 
+
 ## NEW FEATURES
 
 * `arriba`: Detect gene fusions from RNA-seq data (PR #1).
diff --git a/src/cutadapt/config.vsh.yaml b/src/cutadapt/config.vsh.yaml
index a62f0aa9..b315d0ce 100644
--- a/src/cutadapt/config.vsh.yaml
+++ b/src/cutadapt/config.vsh.yaml
@@ -240,6 +240,24 @@ argument_groups:
           Check both the read and its reverse complement for adapter
           matches. If match is on reverse-complemented version,
           output that one.
+  
+  ####################################################################
+  - name: "Demultiplexing options"
+    arguments:
+      - name: "--demultiplex_mode"
+        type: string
+        choices: ["single", "unique_dual", "combinatorial_dual"]
+        required: false
+        description: |
+          Enable demultiplexing and set the mode for it.
+          With mode 'unique_dual', adapters from the first and second read are used,
+          and the indexes from the reads are only used in pairs. This implies
+          --pair_adapters.
+          Enabling mode 'combinatorial_dual' allows all combinations of the sets of indexes
+          on R1 and R2. It is necessary to write each read pair to an output
+          file depending on the adapters found on both R1 and R2.
+          Mode 'single', uses indexes or barcodes located at the 5'
+          end of the R1 read (single). 
 
   ####################################################################
   - name: Read modifications
diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh
index 5e1f9e30..20c92724 100644
--- a/src/cutadapt/script.sh
+++ b/src/cutadapt/script.sh
@@ -127,7 +127,7 @@ mod_args=$(echo \
   ${par_cut_r2:+--cut_r2 "${par_cut_r2}"} \
   ${par_nextseq_trim:+--nextseq-trim "${par_nextseq_trim}"} \
   ${par_quality_cutoff:+--quality-cutoff "${par_quality_cutoff}"} \
-  ${par_quality_cutoff_r2:+--quality-cutoff_r2 "${par_quality_cutoff_r2}"} \
+  ${par_quality_cutoff_r2:+-Q "${par_quality_cutoff_r2}"} \
   ${par_quality_base:+--quality-base "${par_quality_base}"} \
   ${par_poly_a:+--poly-a} \
   ${par_length:+--length "${par_length}"} \
@@ -196,14 +196,35 @@ else
   ext="fasta"
 fi
 
-if [ $mode = "se" ]; then
+demultiplex_mode="$par_demultiplex_mode"
+if [[ $mode == "se" ]]; then
+  if [[ "$demultiplex_mode" == "unique_dual" ]] || [[ "$demultiplex_mode" == "combinatorial_dual" ]]; then
+    echo "Demultiplexing dual indexes is not possible with single-end data."
+    exit 1
+  fi
+  prefix="trimmed_"
+  if [[ ! -z "$demultiplex_mode" ]]; then
+    prefix="{name}_"
+  fi
   output_args=$(echo \
-    --output "$output_dir/{name}_001.$ext" \
+    --output "$output_dir/${prefix}001.$ext" \
   )
 else
+  demultiplex_indicator_r1='{name}_'
+  demultiplex_indicator_r2=$demultiplex_indicator_r1
+  if [[ "$demultiplex_mode" == "combinatorial_dual" ]]; then
+    demultiplex_indicator_r1='{name1}_{name2}_'
+    demultiplex_indicator_r2='{name1}_{name2}_'
+  fi
+  prefix_r1="trimmed_"
+  prefix_r2="trimmed_"
+  if [[ ! -z "$demultiplex_mode" ]]; then
+    prefix_r1=$demultiplex_indicator_r1
+    prefix_r2=$demultiplex_indicator_r2
+  fi
   output_args=$(echo \
-    --output "$output_dir/{name}_R1_001.$ext" \
-    --paired-output "$output_dir/{name}_R2_001.$ext" \
+    --output "$output_dir/${prefix_r1}R1_001.$ext" \
+    --paired-output "$output_dir/${prefix_r2}R2_001.$ext" \
   )
 fi
 
diff --git a/src/cutadapt/test.sh b/src/cutadapt/test.sh
index eff997d7..1d6d9c18 100644
--- a/src/cutadapt/test.sh
+++ b/src/cutadapt/test.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 
 set -e
+set -eo pipefail
 
 #############################################
 # helper functions
@@ -57,6 +58,7 @@ EOF
   --adapter ADAPTER \
   --input example.fa \
   --fasta \
+  --demultiplex_mode single \
   --no_match_adapter_wildcards \
   --json
 
@@ -101,6 +103,7 @@ EOF
   --output "out_test1/*.fasta" \
   --adapter ADAPTER \
   --input example.fa \
+  --demultiplex_mode single \
   --fasta \
   --no_match_adapter_wildcards \
   --json
@@ -160,6 +163,7 @@ EOF
   --adapter AAAAA \
   --adapter_fasta adapters1.fasta \
   --adapter_fasta adapters2.fasta \
+  --demultiplex_mode single \
   --input example.fa \
   --fasta \
   --json
@@ -224,6 +228,7 @@ EOF
   --input example_R1.fastq \
   --input_r2 example_R2.fastq \
   --quality_cutoff 20 \
+  --demultiplex_mode unique_dual \
   --json \
   ---cpus 1
 

From ec69d9af7a59c2618a49bef7d0bf9afc743ca065 Mon Sep 17 00:00:00 2001
From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com>
Date: Mon, 1 Jul 2024 21:58:03 +0200
Subject: [PATCH 14/23] FEAT: update busco to 5.7.1 (#72)

* FEAT: update busco to 5.7.1

* Typo
---
 CHANGELOG.md                                      | 4 ++++
 src/busco/busco_download_datasets/config.vsh.yaml | 2 +-
 src/busco/busco_list_datasets/config.vsh.yaml     | 2 +-
 src/busco/busco_run/config.vsh.yaml               | 6 +++++-
 src/busco/busco_run/help.txt                      | 9 ++++++---
 src/busco/busco_run/script.sh                     | 1 +
 6 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b5c403af..3a036fba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,10 @@
 
 * `cutadapt`: demultiplexing is now disabled by default. It can be re-enabled by using `demultiplex_mode`.
 
+## MINOR CHANGES
+
+* `busco` components: update BUSCO to `5.7.1`.
+
 # biobox 0.1.0
 
 ## BREAKING CHANGES
diff --git a/src/busco/busco_download_datasets/config.vsh.yaml b/src/busco/busco_download_datasets/config.vsh.yaml
index 04d76dd6..5297af2e 100644
--- a/src/busco/busco_download_datasets/config.vsh.yaml
+++ b/src/busco/busco_download_datasets/config.vsh.yaml
@@ -37,7 +37,7 @@ test_resources:
     path: test.sh
 engines:
   - type: docker
-    image: quay.io/biocontainers/busco:5.6.1--pyhdfd78af_0
+    image: quay.io/biocontainers/busco:5.7.1--pyhdfd78af_0
     setup:
       - type: docker
         run: |
diff --git a/src/busco/busco_list_datasets/config.vsh.yaml b/src/busco/busco_list_datasets/config.vsh.yaml
index 6ada7c84..cac34cc6 100644
--- a/src/busco/busco_list_datasets/config.vsh.yaml
+++ b/src/busco/busco_list_datasets/config.vsh.yaml
@@ -29,7 +29,7 @@ test_resources:
     path: test.sh
 engines:
   - type: docker
-    image: quay.io/biocontainers/busco:5.6.1--pyhdfd78af_0
+    image: quay.io/biocontainers/busco:5.7.1--pyhdfd78af_0
     setup:
       - type: docker
         run: |
diff --git a/src/busco/busco_run/config.vsh.yaml b/src/busco/busco_run/config.vsh.yaml
index d79f03f5..23ee95fb 100644
--- a/src/busco/busco_run/config.vsh.yaml
+++ b/src/busco/busco_run/config.vsh.yaml
@@ -181,6 +181,10 @@ argument_groups:
 
   - name: MetaEuk Settings
     arguments:
+      - name: --metaeuk
+        type: boolean_true
+        description: |
+          Use Metaeuk gene predictor.
       - name: --metaeuk_parameters
         type: string
         description: |
@@ -204,7 +208,7 @@ test_resources:
     path: test_data
 engines:
   - type: docker
-    image: quay.io/biocontainers/busco:5.6.1--pyhdfd78af_0
+    image: quay.io/biocontainers/busco:5.7.1--pyhdfd78af_0
     setup:
       - type: docker
         run: |
diff --git a/src/busco/busco_run/help.txt b/src/busco/busco_run/help.txt
index 2cacec4d..6d83f9be 100644
--- a/src/busco/busco_run/help.txt
+++ b/src/busco/busco_run/help.txt
@@ -2,7 +2,9 @@
 busco -h
 ```
 
-Welcome to BUSCO 5.6.1: the Benchmarking Universal Single-Copy Ortholog assessment tool.
+usage: busco -i [SEQUENCE_FILE] -l [LINEAGE] -o [OUTPUT_NAME] -m [MODE] [OTHER OPTIONS]
+
+Welcome to BUSCO 5.7.1: the Benchmarking Universal Single-Copy Ortholog assessment tool.
 For more detailed usage information, please review the README file provided with this distribution and the BUSCO user guide. Visit this page https://gitlab.com/ezlab/busco#how-to-cite-busco to see how to cite BUSCO
 
 optional arguments:
@@ -18,7 +20,7 @@ optional arguments:
   -l LINEAGE, --lineage_dataset LINEAGE
                         Specify the name of the BUSCO lineage to be used.
   --augustus            Use augustus gene predictor for eukaryote runs
-  --augustus_parameters --PARAM1=VALUE1,--PARAM2=VALUE2
+  --augustus_parameters "--PARAM1=VALUE1,--PARAM2=VALUE2"
                         Pass additional arguments to Augustus. All arguments should be contained within a single string with no white space, with each argument separated by a comma.
   --augustus_species AUGUSTUS_SPECIES
                         Specify a species for Augustus training.
@@ -42,11 +44,12 @@ optional arguments:
   --limit N             How many candidate regions (contig or transcript) to consider per BUSCO (default: 3)
   --list-datasets       Print the list of available BUSCO datasets
   --long                Optimization Augustus self-training mode (Default: Off); adds considerably to the run time, but can improve results for some non-model organisms
+  --metaeuk             Use Metaeuk gene predictor
   --metaeuk_parameters "--PARAM1=VALUE1,--PARAM2=VALUE2"
                         Pass additional arguments to Metaeuk for the first run. All arguments should be contained within a single string with no white space, with each argument separated by a comma.
   --metaeuk_rerun_parameters "--PARAM1=VALUE1,--PARAM2=VALUE2"
                         Pass additional arguments to Metaeuk for the second run. All arguments should be contained within a single string with no white space, with each argument separated by a comma.
-  --miniprot            Use miniprot gene predictor
+  --miniprot            Use Miniprot gene predictor
   --skip_bbtools        Skip BBTools for assembly statistics
   --offline             To indicate that BUSCO cannot attempt to download files
   --opt-out-run-stats   Opt out of data collection. Information on the data collected is available in the user guide.
diff --git a/src/busco/busco_run/script.sh b/src/busco/busco_run/script.sh
index 5b562f83..a0ef24de 100644
--- a/src/busco/busco_run/script.sh
+++ b/src/busco/busco_run/script.sh
@@ -39,6 +39,7 @@ busco \
     ${par_force:+--force} \
     ${par_limit:+--limit "$par_limit"} \
     ${par_long:+--long} \
+    ${par_metaeuk:+--metaeuk} \
     ${par_metaeuk_parameters:+--metaeuk_parameters "$par_metaeuk_parameters"} \
     ${par_metaeuk_rerun_parameters:+--metaeuk_rerun_parameters "$par_metaeuk_rerun_parameters"} \
     ${par_miniprot:+--miniprot} \

From 7544a75a51760ca4c7980e73b3147526c5bd7fc2 Mon Sep 17 00:00:00 2001
From: emmarousseau <emmarou1@icloud.com>
Date: Fri, 5 Jul 2024 14:50:17 +0100
Subject: [PATCH 15/23] Samtools fasta (#53)

* initial commit dedup

* Revert "initial commit dedup"

This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2.

* Fasta component

* change script resource to samtools_fastq script, with dummy argument to specify the command

* add dummy argument to samtools_fastq to share the script with samtools_fasta

* fix path to script in config

* Update src/samtools/samtools_fastq/script.sh

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>

* Change default fields to examples

* Two more default fields changed to examples

* Minor formatting changes

* Markdown formatting changes in configs

---------

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>
---
 CHANGELOG.md                                  |   2 +
 src/samtools/samtools_fasta/config.vsh.yaml   | 191 ++++++++++++++++++
 src/samtools/samtools_fasta/help.txt          |  80 ++++++++
 src/samtools/samtools_fasta/test.sh           |  96 +++++++++
 src/samtools/samtools_fasta/test_data/a.1.fa  |   6 +
 src/samtools/samtools_fasta/test_data/a.2.fa  |   6 +
 src/samtools/samtools_fasta/test_data/a.bam   | Bin 0 -> 184 bytes
 src/samtools/samtools_fasta/test_data/a.fa    |  12 ++
 src/samtools/samtools_fasta/test_data/a.sam   |   7 +
 src/samtools/samtools_fasta/test_data/half.fa |   6 +
 .../samtools_fasta/test_data/script.sh        |  11 +
 src/samtools/samtools_fastq/config.vsh.yaml   |  47 +++--
 src/samtools/samtools_fastq/script.sh         |   9 +-
 13 files changed, 450 insertions(+), 23 deletions(-)
 create mode 100644 src/samtools/samtools_fasta/config.vsh.yaml
 create mode 100644 src/samtools/samtools_fasta/help.txt
 create mode 100644 src/samtools/samtools_fasta/test.sh
 create mode 100644 src/samtools/samtools_fasta/test_data/a.1.fa
 create mode 100644 src/samtools/samtools_fasta/test_data/a.2.fa
 create mode 100644 src/samtools/samtools_fasta/test_data/a.bam
 create mode 100644 src/samtools/samtools_fasta/test_data/a.fa
 create mode 100644 src/samtools/samtools_fasta/test_data/a.sam
 create mode 100644 src/samtools/samtools_fasta/test_data/half.fa
 create mode 100755 src/samtools/samtools_fasta/test_data/script.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3a036fba..2e612a11 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -66,6 +66,8 @@
     - `samtools/samtools_collate`: Shuffles and groups reads in SAM/BAM/CRAM files together by their names (PR #42).
     - `samtools/samtools_view`: Views and converts SAM/BAM/CRAM files (PR #48).
     - `samtools/samtools_fastq`: Converts a SAM/BAM/CRAM file to FASTQ (PR #52).
+    - `samtools/samtools_fastq`: Converts a SAM/BAM/CRAM file to FASTA (PR #53).
+
 
 * `falco`: A C++ drop-in replacement of FastQC to assess the quality of sequence read data (PR #43).
 
diff --git a/src/samtools/samtools_fasta/config.vsh.yaml b/src/samtools/samtools_fasta/config.vsh.yaml
new file mode 100644
index 00000000..23517f6c
--- /dev/null
+++ b/src/samtools/samtools_fasta/config.vsh.yaml
@@ -0,0 +1,191 @@
+name: samtools_fasta
+namespace: samtools
+description: Converts a SAM, BAM or CRAM to FASTA format.
+keywords: [fasta, bam, sam, cram]
+links:
+  homepage: https://www.htslib.org/
+  documentation: https://www.htslib.org/doc/samtools-fasta.html
+  repository: https://github.com/samtools/samtools
+references: 
+  doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
+license: MIT/Expat
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input
+        type: file
+        description: input SAM/BAM/CRAM file
+        required: true
+  - name: Outputs
+    arguments:
+      - name: --output
+        type: file
+        description: output FASTA file
+        required: true
+        direction: output
+  - name: Options
+    arguments:
+      - name: --no_suffix
+        alternatives: -n
+        type: boolean_true
+        description: |
+          By default, either '/1' or '/2' is added to the end of read names where the corresponding 
+          READ1 or READ2 FLAG bit is set. Using -n causes read names to be left as they are.
+      - name: --suffix
+        alternatives: -N
+        type: boolean_true
+        description: |
+          Always add either '/1' or '/2' to the end of read names even when put into different files.
+      - name: --use_oq
+        alternatives: -O
+        type: boolean_true
+        description: |
+          Use quality values from OQ tags in preference to standard quality string if available.
+      - name: --singleton
+        alternatives: -s
+        type: file
+        description: write singleton reads to FILE.
+      - name: --copy_tags
+        alternatives: -t
+        type: boolean_true
+        description: |
+          Copy RG, BC and QT tags to the FASTA header line, if they exist.
+      - name: --copy_tags_list
+        alternatives: -T
+        type: string
+        description: |
+          Specify a comma-separated list of tags to copy to the FASTA header line, if they exist. 
+          TAGLIST can be blank or `*` to indicate all tags should be copied to the output. If using `*`, 
+          be careful to quote it to avoid unwanted shell expansion.
+      - name: --read1
+        alternatives: -1
+        type: file
+        description: |
+          Write reads with the READ1 FLAG set (and READ2 not set) to FILE instead of outputting them. 
+          If the -s option is used, only paired reads will be written to this file.
+        direction: output
+      - name: --read2
+        alternatives: -2
+        type: file
+        description: |
+          Write reads with the READ2 FLAG set (and READ1 not set) to FILE instead of outputting them. 
+          If the -s option is used, only paired reads will be written to this file.
+        direction: output
+      - name: --output_reads
+        alternatives: -o
+        type: file
+        description: |
+          Write reads with either READ1 FLAG or READ2 flag set to FILE instead of outputting them to stdout. 
+          This is equivalent to -1 FILE -2 FILE.
+        direction: output
+      - name: --output_reads_both
+        alternatives: -0
+        type: file
+        description: |
+          Write reads where the READ1 and READ2 FLAG bits set are either both set or both unset to FILE 
+          instead of outputting them.
+        direction: output
+      - name: --filter_flags
+        alternatives: -f
+        type: integer
+        description: |
+          Only output alignments with all bits set in INT present in the FLAG field. INT can be specified 
+          in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with '0' 
+          (i.e. /^0[0-7]+/). Default: `0`.
+        example: 0
+      - name: --excl_flags
+        alternatives: -F
+        type: string
+        description: |
+          Do not output alignments with any bits set in INT present in the FLAG field. INT can be specified 
+          in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with '0'
+          (i.e. /^0[0-7]+/). This defaults to 0x900 representing filtering of secondary and 
+          supplementary alignments. Default: `0x900`.
+        example: "0x900"
+      - name: --incl_flags
+        alternatives: --rf
+        type: string
+        description: |
+          Only output alignments with any bits set in INT present in the FLAG field. INT can be specified 
+          in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/), in octal by beginning with '0'
+          (i.e. /^0[0-7]+/), as a decimal number not beginning with '0' or as a comma-separated list of 
+          flag names. Default: `0`.
+        example: 0
+      - name: --excl_flags_all
+        alternatives: -G
+        type: integer
+        description: |
+          Only EXCLUDE reads with all of the bits set in INT present in the FLAG field. INT can be specified 
+          in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with '0' (i.e. /^0[0-7]+/).
+          Default: `0`.
+        example: 0
+      - name: --aux_tag
+        alternatives: -d
+        type: string
+        description: |
+          Only output alignments containing an auxiliary tag matching both TAG and VAL. If VAL is omitted 
+          then any value is accepted. The tag types supported are i, f, Z, A and H. "B" arrays are not 
+          supported. This is comparable to the method used in samtools view --tag. The option may be specified 
+          multiple times and is equivalent to using the --aux_tag_file option.
+      - name: --aux_tag_file
+        alternatives: -D
+        type: string
+        description: |
+          Only output alignments containing an auxiliary tag matching TAG and having a value listed in FILE. 
+          The format of the file is one line per value. This is equivalent to specifying --aux_tag multiple times.
+      - name: --casava
+        alternatives: -i
+        type: boolean_true
+        description: add Illumina Casava 1.8 format entry to header (eg 1:N:0:ATCACG)
+      - name: --compression
+        alternatives: -c
+        type: integer
+        description: set compression level when writing gz or bgzf fasta files.
+        example: 0
+      - name: --index1
+        alternatives: --i1
+        type: file
+        description: write first index reads to FILE.
+      - name: --index2
+        alternatives: --i2
+        type: file
+        description: write second index reads to FILE.
+      - name: --barcode_tag
+        type: string
+        description: |
+          Auxiliary tag to find index reads in. Default: `BC`.
+        example: "BC"
+      - name: --quality_tag
+        type: string
+        description: |
+          Auxiliary tag to find index quality in. Default: `QT`.
+        example: "QT"
+      - name: --index_format
+        type: string
+        description: |
+          string to describe how to parse the barcode and quality tags. For example:
+          * `i14i8`: the first 14 characters are index 1, the next 8 characters are index 2.
+          * `n8i14`: ignore the first 8 characters, and use the next 14 characters for index 1.
+          If the tag contains a separator, then the numeric part can be replaced with`*` to mean 
+          'read until the separator or end of tag', for example: `n*i*`.
+
+resources:
+  - type: bash_script
+    path: ../samtools_fastq/script.sh
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - type: file
+    path: test_data
+engines:
+  - type: docker
+    image: quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1
+    setup:
+      - type: docker
+        run: |
+          samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \
+          sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt
+runners:
+- type: executable
+- type: nextflow
diff --git a/src/samtools/samtools_fasta/help.txt b/src/samtools/samtools_fasta/help.txt
new file mode 100644
index 00000000..39ed0d00
--- /dev/null
+++ b/src/samtools/samtools_fasta/help.txt
@@ -0,0 +1,80 @@
+```
+samtools fastq
+```
+
+Usage: samtools fastq [options...] <in.bam>
+
+Description:
+Converts a SAM, BAM or CRAM to FASTQ format.
+
+Options:
+  -0 FILE      write reads designated READ_OTHER to FILE
+  -1 FILE      write reads designated READ1 to FILE
+  -2 FILE      write reads designated READ2 to FILE
+  -o FILE      write reads designated READ1 or READ2 to FILE
+               note: if a singleton file is specified with -s, only
+               paired reads will be written to the -1 and -2 files.
+  -d, --tag TAG[:VAL]
+               only include reads containing TAG, optionally with value VAL
+  -f, --require-flags INT
+               only include reads with all  of the FLAGs in INT present [0]
+  -F, --excl[ude]-flags INT
+               only include reads with none of the FLAGs in INT present [0x900]
+      --rf, --incl[ude]-flags INT
+               only include reads with any  of the FLAGs in INT present [0]
+  -G INT       only EXCLUDE reads with all  of the FLAGs in INT present [0]
+  -n           don't append /1 and /2 to the read name
+  -N           always append /1 and /2 to the read name
+  -O           output quality in the OQ tag if present
+  -s FILE      write singleton reads designated READ1 or READ2 to FILE
+  -t           copy RG, BC and QT tags to the FASTQ header line
+  -T TAGLIST   copy arbitrary tags to the FASTQ header line, '*' for all
+  -v INT       default quality score if not given in file [1]
+  -i           add Illumina Casava 1.8 format entry to header (eg 1:N:0:ATCACG)
+  -c INT       compression level [0..9] to use when writing bgzf files [1]
+  --i1 FILE    write first index reads to FILE
+  --i2 FILE    write second index reads to FILE
+  --barcode-tag TAG
+               Barcode tag [BC]
+  --quality-tag TAG
+               Quality tag [QT]
+  --index-format STR
+               How to parse barcode and quality tags
+
+      --input-fmt-option OPT[=VAL]
+               Specify a single input file format option in the form
+               of OPTION or OPTION=VALUE
+      --reference FILE
+               Reference sequence FASTA FILE [null]
+  -@, --threads INT
+               Number of additional threads to use [0]
+      --verbosity INT
+               Set level of verbosity
+
+The files will be automatically compressed if the file names have a .gz
+or .bgzf extension.  The input to this program must be collated by name.
+Run 'samtools collate' or 'samtools sort -n' to achieve this.
+
+Reads are designated READ1 if FLAG READ1 is set and READ2 is not set.
+Reads are designated READ2 if FLAG READ1 is not set and READ2 is set.
+Otherwise reads are designated READ_OTHER (both flags set or both flags unset).
+Run 'samtools flags' for more information on flag codes and meanings.
+
+The index-format string describes how to parse the barcode and quality tags.
+It is made up of 'i' or 'n' followed by a length or '*'.  For example:
+   i14i8       The first 14 characters are index 1, the next 8 are index 2
+   n8i14       Ignore the first 8 characters, and use the next 14 for index 1
+
+If the tag contains a separator, then the numeric part can be replaced with
+'*' to mean 'read until the separator or end of tag', for example:
+   i*i*        Break the tag at the separator into index 1 and index 2
+   n*i*        Ignore the left part of the tag until the separator,
+               then use the second part of the tag as index 1
+
+Examples:
+To get just the paired reads in separate files, use:
+   samtools fastq -1 pair1.fq -2 pair2.fq -0 /dev/null -s /dev/null -n in.bam
+
+To get all non-supplementary/secondary reads in a single file, redirect
+the output:
+   samtools fastq in.bam > all_reads.fq
\ No newline at end of file
diff --git a/src/samtools/samtools_fasta/test.sh b/src/samtools/samtools_fasta/test.sh
new file mode 100644
index 00000000..687965ae
--- /dev/null
+++ b/src/samtools/samtools_fasta/test.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+test_dir="${meta_resources_dir}/test_data"
+out_dir="${meta_resources_dir}/out_data"
+
+############################################################################################
+
+echo ">>> Test 1: Convert all reads from a bam file to fasta format"
+"$meta_executable" \
+  --input "$test_dir/a.bam" \
+  --output "$out_dir/a.fa"
+
+echo ">>> Check if output file exists"
+[ ! -f "$out_dir/a.fa" ] && echo "Output file a.fa does not exist" && exit 1
+
+echo ">>> Check if output is empty"
+[ ! -s "$out_dir/a.fa" ] && echo "Output file a.fa is empty" && exit 1
+
+echo ">>> Check if output matches expected output"
+diff "$out_dir/a.fa" "$test_dir/a.fa" || 
+  (echo "Output file a.fa does not match expected output" && exit 1)
+
+rm "$out_dir/a.fa"
+
+############################################################################################
+
+echo ">>> Test 2: Convert all reads from a sam file to fasta format"
+"$meta_executable" \
+  --input "$test_dir/a.sam" \
+  --output "$out_dir/a.fa"
+
+echo ">>> Check if output file exists"
+[ ! -f "$out_dir/a.fa" ] && echo "Output file a.fa does not exist" && exit 1
+
+echo ">>> Check if output is empty"
+[ ! -s "$out_dir/a.fa" ] && echo "Output file a.fa is empty" && exit 1
+
+echo ">>> Check if output matches expected output"
+diff "$out_dir/a.fa" "$test_dir/a.fa" || 
+  (echo "Output file a.fa does not match expected output" && exit 1)
+
+rm "$out_dir/a.fa"
+
+############################################################################################
+
+echo ">>> Test 3: Output reads from bam file to separate files"
+
+"$meta_executable" \
+  --input "$test_dir/a.bam" \
+  --read1 "$out_dir/a.1.fa" \
+  --read2 "$out_dir/a.2.fa" \
+  --output "$out_dir/a.fa"
+
+echo ">>> Check if output files exist"
+[ ! -f "$out_dir/a.1.fa" ] && echo "Output file a.1.fa does not exist" && exit 1
+[ ! -f "$out_dir/a.2.fa" ] && echo "Output file a.2.fa does not exist" && exit 1
+[ ! -f "$out_dir/a.fa" ] && echo "Output file a.fa does not exist" && exit 1
+
+echo ">>> Check if output files are empty"
+[ ! -s "$out_dir/a.1.fa" ] && echo "Output file a.1.fa is empty" && exit 1
+[ ! -s "$out_dir/a.2.fa" ] && echo "Output file a.2.fa is empty" && exit 1
+# output should be empty since input has no singleton reads
+
+echo ">>> Check if output files match expected output"
+diff "$out_dir/a.1.fa" "$test_dir/a.1.fa" || 
+  (echo "Output file a.1.fa does not match expected output" && exit 1)
+diff "$out_dir/a.2.fa" "$test_dir/a.2.fa" ||
+  (echo "Output file a.2.fa does not match expected output" && exit 1)
+
+rm "$out_dir/a.1.fa" "$out_dir/a.2.fa" "$out_dir/a.fa"
+
+############################################################################################
+
+echo ">>> Test 4: Output only forward reads from bam file to fasta format"
+
+"$meta_executable" \
+  --input "$test_dir/a.sam" \
+  --excl_flags "0x80" \
+  --output "$out_dir/half.fa"
+
+echo ">>> Check if output file exists"
+[ ! -f "$out_dir/half.fa" ] && echo "Output file half.fa does not exist" && exit 1
+
+echo ">>> Check if output is empty"
+[ ! -s "$out_dir/half.fa" ] && echo "Output file half.fa is empty" && exit 1
+
+echo ">>> Check if output matches expected output"
+diff "$out_dir/half.fa" "$test_dir/half.fa" || 
+  (echo "Output file half.fa does not match expected output" && exit 1)
+
+rm "$out_dir/half.fa"
+
+############################################################################################
+
+echo "All tests succeeded!"
+exit 0
\ No newline at end of file
diff --git a/src/samtools/samtools_fasta/test_data/a.1.fa b/src/samtools/samtools_fasta/test_data/a.1.fa
new file mode 100644
index 00000000..2c9fdbe5
--- /dev/null
+++ b/src/samtools/samtools_fasta/test_data/a.1.fa
@@ -0,0 +1,6 @@
+>a1
+AAAAAAAAAA
+>b1
+AAAAAAAAAA
+>c1
+AAAAAAAAAA
diff --git a/src/samtools/samtools_fasta/test_data/a.2.fa b/src/samtools/samtools_fasta/test_data/a.2.fa
new file mode 100644
index 00000000..2c9fdbe5
--- /dev/null
+++ b/src/samtools/samtools_fasta/test_data/a.2.fa
@@ -0,0 +1,6 @@
+>a1
+AAAAAAAAAA
+>b1
+AAAAAAAAAA
+>c1
+AAAAAAAAAA
diff --git a/src/samtools/samtools_fasta/test_data/a.bam b/src/samtools/samtools_fasta/test_data/a.bam
new file mode 100644
index 0000000000000000000000000000000000000000..dba1268acbd8446e4fde54d7da33434597fbe635
GIT binary patch
literal 184
zcmb2|=3rp}f&Xj_PR>jWb_~TuUs6R95)ukH_@3~5+q`PUgD)R98yP)FV(BtuE_7vW
z=9s|5aI{h|P#vgC9!+};gK@G0Lz<hvqFY-Eo1TWeOE*K|BE`c@3aT7Ta|#mNTF$9#
zDph-Y^(5<cOUZS@EcgEW|1baQ7N3BEZ@SP;tH~Q*u`nF>-KrbIh-tVq12fpEAOZjf
CvNY8I

literal 0
HcmV?d00001

diff --git a/src/samtools/samtools_fasta/test_data/a.fa b/src/samtools/samtools_fasta/test_data/a.fa
new file mode 100644
index 00000000..693cd395
--- /dev/null
+++ b/src/samtools/samtools_fasta/test_data/a.fa
@@ -0,0 +1,12 @@
+>a1/1
+AAAAAAAAAA
+>b1/1
+AAAAAAAAAA
+>c1/1
+AAAAAAAAAA
+>a1/2
+AAAAAAAAAA
+>b1/2
+AAAAAAAAAA
+>c1/2
+AAAAAAAAAA
diff --git a/src/samtools/samtools_fasta/test_data/a.sam b/src/samtools/samtools_fasta/test_data/a.sam
new file mode 100644
index 00000000..aa8c77b3
--- /dev/null
+++ b/src/samtools/samtools_fasta/test_data/a.sam
@@ -0,0 +1,7 @@
+@SQ	SN:xx	LN:20
+a1	99	xx	1	1	10M	=	11	20	AAAAAAAAAA	**********
+b1	99	xx	1	1	10M	=	11	20	AAAAAAAAAA	**********
+c1	99	xx	1	1	10M	=	11	20	AAAAAAAAAA	**********
+a1	147	xx	11	1	10M	=	1	-20	TTTTTTTTTT	**********
+b1	147	xx	11	1	10M	=	1	-20	TTTTTTTTTT	**********
+c1	147	xx	11	1	10M	=	1	-20	TTTTTTTTTT	**********
diff --git a/src/samtools/samtools_fasta/test_data/half.fa b/src/samtools/samtools_fasta/test_data/half.fa
new file mode 100644
index 00000000..36cd438c
--- /dev/null
+++ b/src/samtools/samtools_fasta/test_data/half.fa
@@ -0,0 +1,6 @@
+>a1/1
+AAAAAAAAAA
+>b1/1
+AAAAAAAAAA
+>c1/1
+AAAAAAAAAA
diff --git a/src/samtools/samtools_fasta/test_data/script.sh b/src/samtools/samtools_fasta/test_data/script.sh
new file mode 100755
index 00000000..b59bc1bd
--- /dev/null
+++ b/src/samtools/samtools_fasta/test_data/script.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# dowload test data from snakemake wrapper
+if [ ! -d /tmp/fastq_source ]; then
+  git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers.git /tmp/fastq_source
+fi
+
+cp -r /tmp/fastq_source/bio/samtools/fastx/test/*.sam src/samtools/samtools_fastq/test_data/
+cp -r /tmp/fastq_source/bio/samtools/fastq/interleaved/test/mapped/*.bam src/samtools/samtools_fastq/test_data/
+cp -r /tmp/fastq_source/bio/samtools/fastq/interleaved/test/reads/*.fq src/samtools/samtools_fastq/test_data/
+cp -r /tmp/fastq_source/bio/samtools/fastq/separate/test/reads/*.fq src/samtools/samtools_fastq/test_data/
\ No newline at end of file
diff --git a/src/samtools/samtools_fastq/config.vsh.yaml b/src/samtools/samtools_fastq/config.vsh.yaml
index 39e926f0..cac7653b 100644
--- a/src/samtools/samtools_fastq/config.vsh.yaml
+++ b/src/samtools/samtools_fastq/config.vsh.yaml
@@ -56,7 +56,7 @@ argument_groups:
         type: string
         description: |
           Specify a comma-separated list of tags to copy to the FASTQ header line, if they exist. 
-          TAGLIST can be blank or * to indicate all tags should be copied to the output. If using *, 
+          TAGLIST can be blank or `*` to indicate all tags should be copied to the output. If using `*`, 
           be careful to quote it to avoid unwanted shell expansion.
       - name: --read1
         alternatives: -1
@@ -91,35 +91,35 @@ argument_groups:
         type: integer
         description: |
           Only output alignments with all bits set in INT present in the FLAG field. INT can be specified 
-          in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0' 
-          (i.e. /^0[0-7]+/).
-        default: 0
+          in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with '0'
+          (i.e. /^0[0-7]+/). Default: `0`.
+        example: 0
       - name: --excl_flags
         alternatives: -F
         type: string
         description: |
           Do not output alignments with any bits set in INT present in the FLAG field. INT can be specified 
-          in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0' 
+          in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with '0' 
           (i.e. /^0[0-7]+/). This defaults to 0x900 representing filtering of secondary and 
-          supplementary alignments.
-        default: 0x900
+          supplementary alignments. Default: `0x900`.
+        example: "0x900"
       - name: --incl_flags
         alternatives: --rf
         type: string
         description: |
           Only output alignments with any bits set in INT present in the FLAG field. INT can be specified 
-          in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/), in octal by beginning with `0' 
+          in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/), in octal by beginning with '0'
           (i.e. /^0[0-7]+/), as a decimal number not beginning with '0' or as a comma-separated list of 
-          flag names.
-        default: 0
+          flag names. Default: `0`.
+        example: 0
       - name: --excl_flags_all
         alternatives: -G
         type: integer
         description: |
           Only EXCLUDE reads with all of the bits set in INT present in the FLAG field. INT can be specified 
-          in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0' 
-          (i.e. /^0[0-7]+/).
-        default: 0
+          in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with '0' (i.e. /^0[0-7]+/).
+          Default: `0`.
+        example: 0
       - name: --aux_tag
         alternatives: -d
         type: string
@@ -137,12 +137,13 @@ argument_groups:
       - name: --casava
         alternatives: -i
         type: boolean_true
-        description: add Illumina Casava 1.8 format entry to header (eg 1:N:0:ATCACG)
+        description: |
+          Add Illumina Casava 1.8 format entry to header, for example: `1:N:0:ATCACG`.
       - name: --compression
         alternatives: -c
         type: integer
         description: set compression level when writing gz or bgzf fastq files.
-        default: 0
+        example: 0
       - name: --index1
         alternatives: --i1
         type: file
@@ -153,20 +154,22 @@ argument_groups:
         description: write second index reads to FILE.
       - name: --barcode_tag
         type: string
-        description: Auxiliary tag to find index reads in.
-        default: BC
+        description: |
+          Auxiliary tag to find index reads in. Default: `BC`.
+        example: "BC"
       - name: --quality_tag
         type: string
-        description: Auxiliary tag to find index quality in.
-        default: QT
+        description: |
+          Auxiliary tag to find index quality in. Default: `QT`.
+        example: QT
       - name: --index_format
         type: string
         description: |
           string to describe how to parse the barcode and quality tags. For example:
-          [i14i8]: the first 14 characters are index 1, the next 8 characters are index 2.
-          [n8i14]: ignore the first 8 characters, and use the next 14 characters for index 1.
+          * `i14i8`: the first 14 characters are index 1, the next 8 characters are index 2.
+          * `n8i14`: ignore the first 8 characters, and use the next 14 characters for index 1.
           If the tag contains a separator, then the numeric part can be replaced with '*' to mean 
-          'read until the separator or end of tag', for example: [n*i*].
+          'read until the separator or end of tag', for example: `n*i*`.
 
 resources:
   - type: bash_script
diff --git a/src/samtools/samtools_fastq/script.sh b/src/samtools/samtools_fastq/script.sh
index 367432f9..0cad9cfe 100644
--- a/src/samtools/samtools_fastq/script.sh
+++ b/src/samtools/samtools_fastq/script.sh
@@ -11,7 +11,14 @@ set -e
 [[ "$par_copy_tags" == "false" ]] && unset par_copy_tags
 [[ "$par_casava" == "false" ]] && unset par_casava
 
-samtools fastq \
+if [[ "$meta_name" == "samtools_fasta" ]]; then
+  subcommand=fasta
+elif [[ "$meta_name" == "samtools_fastq" ]]; then
+  subcommand=fastq
+else
+  echo "Unrecognized component name" && exit 1
+fi
+samtools "$subcommand" \
     ${par_no_suffix:+-n} \
     ${par_suffix:+-N} \
     ${par_use_oq:+-O} \

From 6a9d990c3a9786f13ab2eca290fdd27053056bac Mon Sep 17 00:00:00 2001
From: emmarousseau <emmarou1@icloud.com>
Date: Fri, 5 Jul 2024 14:51:09 +0100
Subject: [PATCH 16/23] Umi tools dedup (#54)

* initial commit dedup

* Revert "initial commit dedup"

This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2.

* inital commit dedup

* Working component with one test

* Update test 1 and test data, fix some arg types in config and script

* test data files and changes to script

* Add third test and test data

* Fix typo in script

* remove utf8 characters in config

* Add choices fields and change default fields to exampels

* Minor formatting changes

* md formatting changes in config
---
 CHANGELOG.md                                  |   3 +
 src/umi_tools/umi_tools_dedup/config.vsh.yaml | 303 ++++++++++++++++++
 src/umi_tools/umi_tools_dedup/help.txt        | 113 +++++++
 src/umi_tools/umi_tools_dedup/script.sh       |  72 +++++
 src/umi_tools/umi_tools_dedup/test.sh         |  87 +++++
 .../test_data/dedup_edit_distance.tsv         |   5 +
 .../test_data/dedup_per_umi.tsv               |   6 +
 .../test_data/dedup_per_umi_per_position.tsv  |   5 +
 .../umi_tools_dedup/test_data/deduped.sam     |  30 ++
 .../test_data/deduped_fraction.sam            |  29 ++
 .../test_data/deduped_unique.sam              |  31 ++
 .../umi_tools_dedup/test_data/sample.bam      | Bin 0 -> 1584 bytes
 .../umi_tools_dedup/test_data/sample.bam.bai  | Bin 0 -> 2656 bytes
 .../umi_tools_dedup/test_data/script.sh       |   8 +
 14 files changed, 692 insertions(+)
 create mode 100644 src/umi_tools/umi_tools_dedup/config.vsh.yaml
 create mode 100644 src/umi_tools/umi_tools_dedup/help.txt
 create mode 100644 src/umi_tools/umi_tools_dedup/script.sh
 create mode 100644 src/umi_tools/umi_tools_dedup/test.sh
 create mode 100644 src/umi_tools/umi_tools_dedup/test_data/dedup_edit_distance.tsv
 create mode 100644 src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi.tsv
 create mode 100644 src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi_per_position.tsv
 create mode 100644 src/umi_tools/umi_tools_dedup/test_data/deduped.sam
 create mode 100644 src/umi_tools/umi_tools_dedup/test_data/deduped_fraction.sam
 create mode 100644 src/umi_tools/umi_tools_dedup/test_data/deduped_unique.sam
 create mode 100644 src/umi_tools/umi_tools_dedup/test_data/sample.bam
 create mode 100644 src/umi_tools/umi_tools_dedup/test_data/sample.bam.bai
 create mode 100755 src/umi_tools/umi_tools_dedup/test_data/script.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2e612a11..3fc960f4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -71,6 +71,9 @@
 
 * `falco`: A C++ drop-in replacement of FastQC to assess the quality of sequence read data (PR #43).
 
+* `umitools`:
+    - `umitools_dedup`: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read (PR #54).
+
 * `bedtools`:
     - `bedtools_getfasta`: extract sequences from a FASTA file for each of the
                            intervals defined in a BED/GFF/VCF file (PR #59).
diff --git a/src/umi_tools/umi_tools_dedup/config.vsh.yaml b/src/umi_tools/umi_tools_dedup/config.vsh.yaml
new file mode 100644
index 00000000..a02e70a1
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/config.vsh.yaml
@@ -0,0 +1,303 @@
+name: umi_tools_dedup
+namespace: umi_tools
+description: |
+  Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
+keywords: [umi_tools, deduplication, dedup]
+links:
+  homepage: https://umi-tools.readthedocs.io/en/latest/
+  documentation: https://umi-tools.readthedocs.io/en/latest/reference/dedup.html
+  repository: https://github.com/CGATOxford/UMI-tools
+references: 
+  doi: 10.1101/gr.209601.116
+license: MIT
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input
+        alternatives: --stdin
+        type: file
+        description: Input BAM or SAM file. Use --in_sam to specify SAM format.
+        required: true
+      - name: --in_sam
+        type: boolean_true
+        description: |
+          By default, inputs are assumed to be in BAM format. Use this options to specify the use of SAM
+          format for input.
+      - name: --bai
+        type: file
+        description: BAM index
+      - name: --random_seed
+        type: integer
+        description: Random seed to initialize number generator with.
+
+  - name: Outputs
+    arguments:
+      - name: --output
+        alternatives: --stdout
+        type: file
+        description: Deduplicated BAM file.
+        required: true
+        direction: output
+      - name: --out_sam
+        type: boolean_true
+        description: |
+          By default, outputa are written in BAM format. Use this options to specify the use of SAM format
+          for output.
+      - name: --paired
+        type: boolean_true
+        description: |
+          BAM is paired end - output both read pairs. This will also force the use of the template length
+          to determine reads with the same mapping coordinates.
+      - name: --output_stats
+        type: string
+        description: |
+          Generate files containing UMI based deduplication statistics files with this prefix in the file names.
+      - name: --extract_umi_method
+        type: string
+        choices: [read_id, tag, umis]
+        description: |
+          Specify the method by which the barcodes were encoded in the read.
+          The options are:
+            * read_id (default) 
+            * tag
+            * umis
+        example: "read_id"
+      - name: --umi_tag
+        type: string
+        description: |
+          The tag containing the UMI sequence. This is only required if the extract_umi_method is set to tag.
+      - name: --umi_separator
+        type: string
+        description: |
+          The separator used to separate the UMI from the read sequence. This is only required if the
+          extract_umi_method is set to id_read. Default: `_`.
+        example: '_'
+      - name: --umi_tag_split
+        type: string
+        description: Separate the UMI in tag by <SPLIT> and take the first element.
+      - name: --umi_tag_delimiter
+        type: string
+        description: Separate the UMI in by <DELIMITER> and concatenate the elements.
+      - name: --cell_tag
+        type: string
+        description: |
+          The tag containing the cell barcode sequence. This is only required if the extract_umi_method
+          is set to tag.
+      - name: --cell_tag_split
+        type: string
+        description: Separate the cell barcode in tag by <SPLIT> and take the first element.
+      - name: --cell_tag_delimiter
+        type: string
+        description: Separate the cell barcode in by <DELIMITER> and concatenate the elements.
+  
+  - name: Grouping Options
+    arguments:    
+      - name: --method
+        type: string
+        choices: [unique, percentile, cluster, adjacency, directional]
+        description: |
+          The method to use for grouping reads. 
+          The options are: 
+            * unique
+            * percentile
+            * cluster
+            * adjacency
+            * directional (default)
+        example: "directional"
+      - name: --edit_distance_threshold
+        type: integer
+        description: |
+          For the adjacency and cluster methods the threshold for the edit distance to connect two
+          UMIs in the network can be increased. The default value of 1 works best unless the UMI is
+          very long (>14bp). Default: `1`.
+        example: 1
+      - name: --spliced_is_unique
+        type: boolean_true
+        description: |
+          Causes two reads that start in the same position on the same strand and having the same UMI
+          to be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation
+          to test for splicing).
+      - name: --soft_clip_threshold
+        type: integer
+        description: |
+          Mappers that soft clip will sometimes do so rather than mapping a spliced read if there is only
+          a small overhang over the exon junction. By setting this option, you can treat reads with at
+          least this many bases soft-clipped at the 3' end as spliced. Default: `4`.
+        example: 4
+      - name: --multimapping_detection_method
+        type: string
+        description: |
+          If the sam/bam contains tags to identify multimapping reads, you can specify for use when selecting
+          the best read at a given loci. Supported tags are `NH`, `X0` and `XT`. If not specified, the read
+          with the highest mapping quality will be selected.
+      - name: --read_length
+        type: boolean_true
+        description: Use the read length as a criteria when deduping, for e.g. sRNA-Seq.
+  
+  - name: Single-cell RNA-Seq Options
+    arguments:
+      - name: --per_gene
+        type: boolean_true
+        description: |
+          Reads will be grouped together if they have the same gene. This is useful if your library prep
+          generates PCR duplicates with non identical alignment positions such as CEL-Seq. Note this option
+          is hardcoded to be on with the count command. I.e. counting is always performed per-gene. Must be
+          combined with either --gene_tag or --per_contig option.
+      - name: --gene_tag
+        type: string
+        description: |
+          Deduplicate per gene. The gene information is encoded in the bam read tag specified.
+      - name: --assigned_status_tag
+        type: string
+        description: |
+          BAM tag which describes whether a read is assigned to a gene. Defaults to the same value as given
+          for --gene_tag.
+      - name: --skip_tags_regex
+        type: string
+        description: |
+          Use in conjunction with the --assigned_status_tag option to skip any reads where the tag matches
+          this regex. Default ("^[__|Unassigned]") matches anything which starts with "__" or "Unassigned".
+      - name: --per_contig
+        type: boolean_true
+        description: |
+          Deduplicate per contig (field 3 in BAM; RNAME). All reads with the sam contig will be considered to
+          have the same alignment position. This is useful if you have aligned to a reference transcriptome
+          with one transcript per gene. If you have aligned to a transcriptome with more than one transcript
+          per gene, you can supply a map between transcripts and gene using the --gene_transcript_map option.
+      - name: --gene_transcript_map
+        type: file
+        description: |
+          A file containing a mapping between gene names and transcript names. The file should be tab
+          separated with the gene name in the first column and the transcript name in the second column.
+      - name: --per_cell
+        type: boolean_true
+        description: |
+          Reads will only be grouped together if they have the same cell barcode. Can be combined with
+          --per_gene.
+  
+  - name: SAM/BAM Options
+    arguments:
+      - name: --mapping_quality
+        type: integer
+        description: |
+          Minimium mapping quality (MAPQ) for a read to be retained. Default: `0`.
+        example: 0
+      - name: --unmapped_reads
+        type: string
+        description: |
+          How unmapped reads should be handled. 
+          The options are:
+            * "discard": Discard all unmapped reads. (default)
+            * "use":     If read2 is unmapped, deduplicate using read1 only. Requires --paired.
+            * "output":  Output unmapped reads/read pairs without UMI grouping/deduplication. Only available in umi_tools group.
+        example: "discard"
+      - name: --chimeric_pairs
+        type: string
+        choices: [discard, use, output]
+        description: |
+          How chimeric pairs should be handled. 
+          The options are:
+            * "discard": Discard all chimeric read pairs.
+            * "use":     Deduplicate using read1 only. (default)
+            * "output":  Output chimeric pairs without UMI grouping/deduplication. Only available in
+                         umi_tools group.
+        example: "use"
+      - name: --unpaired_reads
+        type: string
+        choices: [discard, use, output]
+        description: |
+          How unpaired reads should be handled. 
+          The options are: 
+            * "discard": Discard all unmapped reads.
+            * "use": If read2 is unmapped, deduplicate using read1 only. Requires --paired. (default)
+            * "output":  Output unmapped reads/read pairs without UMI grouping/deduplication. Only available
+                         in umi_tools group.
+        example: "use"
+      - name: --ignore_umi
+        type: boolean_true
+        description: Ignore the UMI and group reads using mapping coordinates only.
+      - name: --subset
+        type: double
+        description: |
+          Only consider a fraction of the reads, chosen at random. This is useful for doing saturation
+          analyses.
+      - name: --chrom
+        type: string
+        description: Only consider a single chromosome. This is useful for debugging/testing purposes.
+  
+  - name: Group/Dedup Options
+    arguments:
+      - name: --no_sort_output
+        type: boolean_true
+        description: |
+          By default, output is sorted. This involves the use of a temporary unsorted file (saved in
+          --temp_dir). Use this option to turn off sorting.
+      - name: --buffer_whole_contig
+        type: boolean_true
+        description: |
+          Forces dedup to parse an entire contig before yielding any reads for deduplication. This is the
+          only way to absolutely guarantee that all reads with the same start position are grouped together
+          for deduplication since dedup uses the start position of the read, not the alignment coordinate on
+          which the reads are sorted. However, by default, dedup reads for another 1000bp before outputting
+          read groups which will avoid any reads being missed with short read sequencing (<1000bp).
+  
+  - name: Common Options
+    arguments:
+      - name: --log
+        alternatives: -L
+        type: file
+        description: File with logging information.
+      - name: --log2stderr
+        type: boolean_true
+        description: Send logging information to stderr.
+      - name: --verbose
+        alternatives: -v
+        type: integer
+        description: |
+          Log level. The higher, the more output. Default: `0`.
+        example: 0
+      - name: --error
+        alternatives: -E
+        type: file
+        description: File with error information.
+      - name: --temp_dir
+        type: string
+        description: |
+          Directory for temporary files. If not set, the bash environmental variable TMPDIR is used.
+      - name: --compresslevel
+        type: integer
+        description: |
+          Level of Gzip compression to use. Default=6 matches GNU gzip rather than python gzip default.
+          Default: `6`.
+        example: 6
+      - name: --timeit
+        type: file
+        description: Store timing information in file.
+      - name: --timeit_name
+        type: string
+        description: |
+          Name in timing file for this class of jobs. Default: `all`.
+        example: "all"
+      - name: --timeit_header
+        type: string
+        description: Add header for timing information.
+
+resources:
+  - type: bash_script
+    path: script.sh
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - type: file
+    path: test_data
+engines:
+  - type: docker
+    image: quay.io/biocontainers/umi_tools:1.1.5--py39hf95cd2a_1
+    setup:
+      - type: docker
+        run: |
+            umi_tools -v | sed 's/ version//g' > /var/software_versions.txt
+runners:
+- type: executable
+- type: nextflow
\ No newline at end of file
diff --git a/src/umi_tools/umi_tools_dedup/help.txt b/src/umi_tools/umi_tools_dedup/help.txt
new file mode 100644
index 00000000..87baf322
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/help.txt
@@ -0,0 +1,113 @@
+'''
+Generated from the following UMI-tools documentation:
+      https://umi-tools.readthedocs.io/en/latest/common_options.html#common-options
+      https://umi-tools.readthedocs.io/en/latest/reference/dedup.html
+'''
+
+
+dedup - Deduplicate reads using UMI and mapping coordinates
+
+Usage: umi_tools dedup [OPTIONS] [--stdin=IN_BAM] [--stdout=OUT_BAM]
+
+       note: If --stdout is ommited, standard out is output. To
+             generate a valid BAM file on standard out, please
+             redirect log with --log=LOGFILE or --log2stderr 
+
+Common UMI-tools Options:
+
+      -S, --stdout                  File where output is to go [default = stdout].
+      -L, --log                     File with logging information [default = stdout].
+      --log2stderr                  Send logging information to stderr [default = False].
+      -v, --verbose                 Log level. The higher, the more output [default = 1].
+      -E, --error                   File with error information [default = stderr].
+      --temp-dir                    Directory for temporary files. If not set, the bash environmental variable TMPDIR is used[default = None].
+      --compresslevel               Level of Gzip compression to use. Default=6 matches GNU gzip rather than python gzip default (which is 9)
+
+      profiling and debugging options:
+      --timeit                      Store timing information in file [default=none].
+      --timeit-name                 Name in timing file for this class of jobs [default=all].
+      --timeit-header               Add header for timing information [default=none].
+      --random-seed                 Random seed to initialize number generator with [default=none].
+
+Dedup Options:
+      --output-stats=<prefix>             One can use the edit distance between UMIs at the same position as an quality control for the 
+                                          deduplication process by comparing with a null expectation of random sampling. For the random
+                                          sampling, the observed frequency of UMIs is used to more reasonably model the null expectation.
+                                          Use this option to generate a stats outfiles called: 
+                                                [PREFIX]_stats_edit_distance.tsv   
+                                                      Reports the (binned) average edit distance between the UMIs at each position.
+                                          In addition, this option will trigger reporting of further summary statistics for the UMIs which
+                                          may be informative for selecting the optimal deduplication method or debugging.
+                                          Each unique UMI sequence may be observed [0-many] times at multiple positions in the BAM. The
+                                          following files report the distribution for the frequencies of each UMI.
+                                                [PREFIX]_stats_per_umi_per_position.tsv
+                                                      Tabulates the counts for unique combinations of UMI and position.
+                                                [PREFIX]_stats_per_umi_per.tsv
+                                                      The _stats_per_umi_per.tsv table provides UMI-level summary statistics. 
+      --extract-umi-method=<method>       How are the barcodes encoded in the read?
+                                          Options are: read_id (default), tag, umis
+      --umi-separator=<separator>         Separator between read id and UMI. See --extract-umi-method above. Default=_
+      --umi-tag=<tag>                     Tag which contains UMI. See --extract-umi-method above
+      --umi-tag-split=<split>             Separate the UMI in tag by SPLIT and take the first element
+      --umi-tag-delimiter=<delimiter>     Separate the UMI in by DELIMITER and concatenate the elements
+      --cell-tag=<tag>                    Tag which contains cell barcode. See --extract-umi-method above
+      --cell-tag-split=<split>            Separate the cell barcode in tag by SPLIT and take the first element
+      --cell-tag-delimiter=<delimiter>    Separate the cell barcode in by DELIMITER and concatenate the elements
+      --method=<method>                   What method to use to identify group of reads with the same (or similar) UMI(s)?
+                                          All methods start by identifying the reads with the same mapping position.
+                                          The simplest methods, unique and percentile, group reads with the exact same UMI.
+                                          The network-based methods, cluster, adjacency and directional, build networks where
+                                          nodes are UMIs and edges connect UMIs with an edit distance <= threshold (usually 1).
+                                          The groups of reads are then defined from the network in a method-specific manner.
+                                          For all the network-based methods, each read group is equivalent to one read count for the gene.
+      --edit-distance-threshold=<threshold>     For the adjacency and cluster methods the threshold for the edit distance to connect
+                                                two UMIs in the network can be increased. The default value of 1 works best unless
+                                                the UMI is very long (>14bp).
+      --spliced-is-unique           Causes two reads that start in the same position on the same strand and having the
+                                    same UMI to be considered unique if one is spliced and the other is not.
+                                    (Uses the 'N' cigar operation to test for splicing).
+      --soft-clip-threshold=<threshold>    Mappers that soft clip will sometimes do so rather than mapping a spliced read if
+                                          there is only a small overhang over the exon junction. By setting this option, you
+                                          can treat reads with at least this many bases soft-clipped at the 3' end as spliced.
+                                          Default=4.
+      --multimapping-detection-method=<method>  If the sam/bam contains tags to identify multimapping reads, you can specify
+                                                for use when selecting the best read at a given loci. Supported tags are "NH",
+                                                "X0" and "XT". If not specified, the read with the highest mapping quality will be selected.
+      --read-length                              Use the read length as a criteria when deduping, for e.g sRNA-Seq.
+      --per-gene                    Reads will be grouped together if they have the same gene. This is useful if your
+                                    library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq.
+                                    Note this option is hardcoded to be on with the count command. I.e counting is always
+                                    performed per-gene. Must be combined with either --gene-tag or --per-contig option.
+      --gene-tag=<tag>              Deduplicate per gene. The gene information is encoded in the bam read tag specified
+      --assigned-status-tag=<tag>   BAM tag which describes whether a read is assigned to a gene. Defaults to the same value
+                                    as given for --gene-tag
+      --skip-tags-regex=<regex>     Use in conjunction with the --assigned-status-tag option to skip any reads where the
+                                    tag matches this regex. Default ("^[__|Unassigned]") matches anything which starts with "__"
+                                    or "Unassigned":
+      --per-contig                  Deduplicate per contig (field 3 in BAM; RNAME). All reads with the same contig will be
+                                    considered to have the same alignment position. This is useful if you have aligned to a
+                                    reference transcriptome with one transcript per gene. If you have aligned to a transcriptome
+                                    with more than one transcript per gene, you can supply a map between transcripts and gene
+                                    using the --gene-transcript-map option
+      --gene-transcript-map=<file>  File mapping genes to transcripts (tab separated)
+      --per-cell                    Reads will only be grouped together if they have the same cell barcode. Can be combined with --per-gene.
+      --mapping-quality=<quality>   Minimium mapping quality (MAPQ) for a read to be retained. Default is 0.
+      --unmapped-reads=<option>     How should unmapped reads be handled.
+      --chimeric-pairs=<option>     How should chimeric read pairs be handled.
+      --unpaired-reads=<option>     How should unpaired reads be handled.
+      --ignore-umi                  Ignore the UMI and group reads using mapping coordinates only
+      --subset=<fraction>           Only consider a fraction of the reads, chosen at random. This is useful for doing saturation analyses.
+      --chrom=<chromosome>          Only consider a single chromosome. This is useful for debugging/testing purposes
+      --in-sam                      Input is in SAM format
+      --out-sam                     Output is in SAM format
+      --paired                      BAM is paired end - output both read pairs. This will also force the use of the template
+                                    length to determine reads with the same mapping coordinates.
+      --no-sort-output              By default, output is sorted. This involves the use of a temporary unsorted file since
+                                    reads are considered in the order of their start position which may not be the same as
+                                    their alignment coordinate due to soft-clipping and reverse alignments. The temp file
+                                    will be saved (in --temp-dir) and deleted when it has been sorted to the outfile. Use
+                                    this option to turn off sorting.
+      --buffer-whole-contig         Forces dedup to parse an entire contig before yielding any reads for deduplication.
+                                    This is the only way to absolutely guarantee that all reads with the same start position
+                                    are grouped together for deduplication since dedup uses the start position of the read,
+                                    not the alignment coordinate on which the reads are
\ No newline at end of file
diff --git a/src/umi_tools/umi_tools_dedup/script.sh b/src/umi_tools/umi_tools_dedup/script.sh
new file mode 100644
index 00000000..d57a5e76
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/script.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+set -e
+
+test_dir="${metal_executable}/test_data"
+
+[[ "$par_paired" == "false" ]] && unset par_paired
+[[ "$par_in_sam" == "false" ]] && unset par_in_sam
+[[ "$par_out_sam" == "false" ]] && unset par_out_sam
+[[ "$par_spliced_is_unique" == "false" ]] && unset par_spliced_is_unique
+[[ "$par_per_gene" == "false" ]] && unset par_per_gene
+[[ "$par_per_contig" == "false" ]] && unset par_per_contig
+[[ "$par_per_cell" == "false" ]] && unset par_per_cell
+[[ "$par_no_sort_output" == "false" ]] && unset par_no_sort_output
+[[ "$par_buffer_whole_contig" == "false" ]] && unset par_buffer_whole_contig
+[[ "$par_ignore_umi" == "false" ]] && unset par_ignore_umi
+[[ "$par_subset" == "false" ]] && unset par_subset
+[[ "$par_log2stderr" == "false" ]] && unset par_log2stderr
+[[ "$par_read_length" == "false" ]] && unset par_read_length
+
+umi_tools dedup \
+    --stdin "$par_input" \
+    ${par_in_sam:+--in-sam} \
+    --stdout "$par_output" \
+    ${par_out_sam:+--out-sam} \
+    ${par_paired:+--paired} \
+    ${par_output_stats:+--output-stats "$par_output_stats"} \
+    ${par_extract_umi_method:+--extract-umi-method "$par_extract_umi_method"} \
+    ${par_umi_tag:+--umi-tag "$par_umi_tag"} \
+    ${par_umi_separator:+--umi-separator "$par_umi_separator"} \
+    ${par_umi_tag_split:+--umi-tag-split "$par_umi_tag_split"} \
+    ${par_umi_tag_delimiter:+--umi-tag-delimiter "$par_umi_tag_delimiter"} \
+    ${par_cell_tag:+--cell-tag "$par_cell_tag"} \
+    ${par_cell_tag_split:+--cell-tag-split "$par_cell_tag_split"} \
+    ${par_cell_tag_delimiter:+--cell-tag-delimiter "$par_cell_tag_delimiter"} \
+    ${par_method:+--method "$par_method"} \
+    ${par_edit_distance_threshold:+--edit-distance-threshold "$par_edit_distance_threshold"} \
+    ${par_spliced_is_unique:+--spliced-is-unique} \
+    ${par_soft_clip_threshold:+--soft-clip-threshold "$par_soft_clip_threshold"} \
+    ${par_multimapping_detection_method:+--multimapping-detection-method "$par_multimapping_detection_method"} \
+    ${par_read_length:+--read-length} \
+    ${par_per_gene:+--per-gene} \
+    ${par_gene_tag:+--gene-tag "$par_gene_tag"} \
+    ${par_assigned_status_tag:+--assigned-status-tag "$par_assigned_status_tag"} \
+    ${par_skip_tags_regex:+--skip-tags-regex "$par_skip_tags_regex"} \
+    ${par_per_contig:+--per-contig} \
+    ${par_gene_transcript_map:+--gene-transcript-map "$par_gene_transcript_map"} \
+    ${par_per_cell:+--per-cell} \
+    ${par_mapping_quality:+--mapping-quality "$par_mapping_quality"} \
+    ${par_unmapped_reads:+--unmapped-reads "$par_unmapped_reads"} \
+    ${par_chimeric_pairs:+--chimeric-pairs "$par_chimeric_pairs"} \
+    ${par_unpaired_reads:+--unpaired-reads "$par_unpaired_reads"} \
+    ${par_ignore_umi:+--ignore-umi} \
+    ${par_subset:+--subset "$par_subset"} \
+    ${par_chrom:+--chrom "$par_chrom"} \
+    ${par_no_sort_output:+--no-sort-output} \
+    ${par_buffer_whole_contig:+--buffer-whole-contig} \
+    ${par_log:+-L "$par_log"} \
+    ${par_log2stderr:+--log2stderr} \
+    ${par_verbose:+-v "$par_verbose"} \
+    ${par_error:+-E "$par_error"} \
+    ${par_temp_dir:+--temp-dir "$par_temp_dir"} \
+    ${par_compresslevel:+--compresslevel "$par_compresslevel"} \
+    ${par_timeit:+--timeit "$par_timeit"} \
+    ${par_timeit_name:+--timeit-name "$par_timeit_name"} \
+    ${par_timeit_header:+--timeit-header "$par_timeit_header"} \
+    ${par_random_seed:+--random-seed "$par_random_seed"}
+
+exit 0
diff --git a/src/umi_tools/umi_tools_dedup/test.sh b/src/umi_tools/umi_tools_dedup/test.sh
new file mode 100644
index 00000000..adadb410
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/test.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+test_dir="${meta_resources_dir}/test_data"
+out_dir="${meta_resources_dir}/out"
+
+mkdir -p "$out_dir"
+
+############################################################################################
+
+echo ">>> Test 1: Basic usage of $meta_functionality_name with statistics output"
+
+"$meta_executable" \
+  --paired \
+  --input "$test_dir/sample.bam" \
+  --bai "$test_dir/sample.bam.bai" \
+  --output "$out_dir/deduped.sam" \
+  --out_sam \
+  --output_stats "$out_dir/dedup" \
+  --random_seed 1
+
+echo ">>> Checking whether output exists"
+[ ! -f "$out_dir/deduped.sam" ] && echo "File 'deduped.sam' does not exist!" && exit 1
+[ ! -f "$out_dir/dedup_edit_distance.tsv" ] && echo "File 'dedup_edit_distance.tsv' does not exist!" && exit 1
+
+echo ">>> Checking whether output is non-empty"
+[ ! -s "$out_dir/deduped.sam" ] && echo "File 'deduped.sam' is empty!" && exit 1
+[ ! -s "$out_dir/dedup_edit_distance.tsv" ] && echo "File 'dedup_edit_distance.tsv' is empty!" && exit 1
+
+echo ">>> Checking whether output is correct"
+diff "$out_dir/deduped.sam" "$test_dir/deduped.sam" || \
+    (echo "Output file deduped.sam does not match expected output" && exit 1)
+diff "$out_dir/dedup_edit_distance.tsv" "$test_dir/dedup_edit_distance.tsv" || \
+    (echo "Output file dedup_edit_distance.tsv does not match expected output" && exit 1)
+
+############################################################################################
+
+echo ">>> Test 2: $meta_functionality_name with random subset selection"
+
+"$meta_executable" \
+  --paired \
+  --input "$test_dir/sample.bam" \
+  --bai "$test_dir/sample.bam.bai" \
+  --output "$out_dir/deduped_fraction.sam" \
+  --out_sam \
+  --subset 0.5 \
+  --random_seed 1
+
+
+echo ">>> Checking whether output exists"
+[ ! -f "$out_dir/deduped_fraction.sam" ] && echo "File 'deduped_fraction.sam' does not exist!" && exit 1
+
+echo ">>> Checking whether output is non-empty"
+[ ! -s "$out_dir/deduped_fraction.sam" ] && echo "File 'deduped_fraction.sam' is empty!" && exit 1
+
+echo ">>> Checking whether output is correct"
+diff "$out_dir/deduped_fraction.sam" "$test_dir/deduped_fraction.sam" || \
+    (echo "Output file deduped_fraction.sam does not match expected output" && exit 1)
+
+############################################################################################
+
+echo ">>> Test 3: $meta_functionality_name with --method unique"
+
+"$meta_executable" \
+  --paired \
+  --input "$test_dir/sample.bam" \
+  --bai "$test_dir/sample.bam.bai" \
+  --output "$out_dir/deduped_unique.sam" \
+  --out_sam \
+  --method "unique" \
+  --random_seed 1
+
+echo ">>> Checking whether output exists"
+[ ! -f "$out_dir/deduped_unique.sam" ] && echo "File 'deduped_unique.sam' does not exist!" && exit 1
+
+echo ">>> Checking whether output is non-empty"
+[ ! -s "$out_dir/deduped_unique.sam" ] && echo "File 'deduped_unique.sam' is empty!" && exit 1
+
+echo ">>> Checking whether output is correct"
+diff "$out_dir/deduped_unique.sam" "$test_dir/deduped_unique.sam" || \
+    (echo "Output file deduped_unique.sam does not match expected output" && exit 1)
+
+############################################################################################
+
+rm -rf "$out_dir"
+
+echo "All tests succeeded!"
+exit 0
\ No newline at end of file
diff --git a/src/umi_tools/umi_tools_dedup/test_data/dedup_edit_distance.tsv b/src/umi_tools/umi_tools_dedup/test_data/dedup_edit_distance.tsv
new file mode 100644
index 00000000..89684b04
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/test_data/dedup_edit_distance.tsv
@@ -0,0 +1,5 @@
+unique	unique_null	directional	directional_null	edit_distance
+3	3	4	4	Single_UMI
+0	1	0	0	0
+1	0	0	0	1
+0	0	0	0	2
diff --git a/src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi.tsv b/src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi.tsv
new file mode 100644
index 00000000..a1d364e2
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi.tsv
@@ -0,0 +1,6 @@
+UMI	median_counts_pre	times_observed_pre	total_counts_pre	median_counts_post	times_observed_post	total_counts_post
+ACCGGTTTA	74	1	74	74	1	74
+ACTGGTTTC	48	1	48	49	1	49
+AGCGGTTAC	1	1	1	1	1	1
+CCAGGTTCT	1	1	1	1	1	1
+TCTGGTTTC	1	1	1	0	0	0
diff --git a/src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi_per_position.tsv b/src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi_per_position.tsv
new file mode 100644
index 00000000..d9211d0a
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi_per_position.tsv
@@ -0,0 +1,5 @@
+counts	instances_pre	instances_post
+1	3	2
+48	1	0
+49	0	1
+74	1	1
diff --git a/src/umi_tools/umi_tools_dedup/test_data/deduped.sam b/src/umi_tools/umi_tools_dedup/test_data/deduped.sam
new file mode 100644
index 00000000..cce2efb4
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/test_data/deduped.sam
@@ -0,0 +1,30 @@
+@HD	VN:1.0	SO:coordinate
+@SQ	SN:chr1	LN:197195432
+@SQ	SN:chr10	LN:129993255
+@SQ	SN:chr11	LN:121843856
+@SQ	SN:chr12	LN:121257530
+@SQ	SN:chr13	LN:120284312
+@SQ	SN:chr14	LN:125194864
+@SQ	SN:chr15	LN:103494974
+@SQ	SN:chr16	LN:98319150
+@SQ	SN:chr17	LN:95272651
+@SQ	SN:chr18	LN:90772031
+@SQ	SN:chr19	LN:61342430
+@SQ	SN:chr2	LN:181748087
+@SQ	SN:chr3	LN:159599783
+@SQ	SN:chr4	LN:155630120
+@SQ	SN:chr5	LN:152537259
+@SQ	SN:chr6	LN:149517037
+@SQ	SN:chr7	LN:152524553
+@SQ	SN:chr8	LN:131738871
+@SQ	SN:chr9	LN:124076172
+@SQ	SN:chrM	LN:16299
+@SQ	SN:chrX	LN:166650296
+@SQ	SN:chrY	LN:15902555
+@PG	ID:Bowtie	VN:1.1.2	CL:"bowtie --wrapper basic-0 --threads 4 -v 2 -m 10 -k 1 /ifs/mirror/genomes/bowtie/mm9 /dev/fd/63 --sam"
+@PG	ID:samtools	PN:samtools	PP:Bowtie	VN:1.19.2	CL:samtools view -h example.bam
+@PG	ID:samtools.1	PN:samtools	PP:samtools	VN:1.19.2	CL:samtools view -bS -
+SRR2057595.5052066_ACCGGTTTA	16	chr1	3812794	255	51M	*	0	0	*	*	XA:i:2	MD:Z:42T2T5	NM:i:2
+SRR2057595.13520751_CCAGGTTCT	16	chr1	3967622	255	20M	*	0	0	*	*	XA:i:2	MD:Z:12A0C6	NM:i:2
+SRR2057595.8901432_AGCGGTTAC	0	chr1	4369756	255	20M	*	0	0	*	*	XA:i:2	MD:Z:1T4A13	NM:i:2
+SRR2057595.1210348_ACTGGTTTC	0	chr1	4762503	255	45M	*	0	0	*	*	XA:i:2	MD:Z:0C7A36	NM:i:2
diff --git a/src/umi_tools/umi_tools_dedup/test_data/deduped_fraction.sam b/src/umi_tools/umi_tools_dedup/test_data/deduped_fraction.sam
new file mode 100644
index 00000000..cf9e651a
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/test_data/deduped_fraction.sam
@@ -0,0 +1,29 @@
+@HD	VN:1.0	SO:coordinate
+@SQ	SN:chr1	LN:197195432
+@SQ	SN:chr10	LN:129993255
+@SQ	SN:chr11	LN:121843856
+@SQ	SN:chr12	LN:121257530
+@SQ	SN:chr13	LN:120284312
+@SQ	SN:chr14	LN:125194864
+@SQ	SN:chr15	LN:103494974
+@SQ	SN:chr16	LN:98319150
+@SQ	SN:chr17	LN:95272651
+@SQ	SN:chr18	LN:90772031
+@SQ	SN:chr19	LN:61342430
+@SQ	SN:chr2	LN:181748087
+@SQ	SN:chr3	LN:159599783
+@SQ	SN:chr4	LN:155630120
+@SQ	SN:chr5	LN:152537259
+@SQ	SN:chr6	LN:149517037
+@SQ	SN:chr7	LN:152524553
+@SQ	SN:chr8	LN:131738871
+@SQ	SN:chr9	LN:124076172
+@SQ	SN:chrM	LN:16299
+@SQ	SN:chrX	LN:166650296
+@SQ	SN:chrY	LN:15902555
+@PG	ID:Bowtie	VN:1.1.2	CL:"bowtie --wrapper basic-0 --threads 4 -v 2 -m 10 -k 1 /ifs/mirror/genomes/bowtie/mm9 /dev/fd/63 --sam"
+@PG	ID:samtools	PN:samtools	PP:Bowtie	VN:1.19.2	CL:samtools view -h example.bam
+@PG	ID:samtools.1	PN:samtools	PP:samtools	VN:1.19.2	CL:samtools view -bS -
+SRR2057595.4062788_ACCGGTTTA	16	chr1	3812793	255	52M	*	0	0	*	*	XA:i:2	MD:Z:43T2T5	NM:i:2
+SRR2057595.8901432_AGCGGTTAC	0	chr1	4369756	255	20M	*	0	0	*	*	XA:i:2	MD:Z:1T4A13	NM:i:2
+SRR2057595.1999468_ACTGGTTTC	0	chr1	4762503	255	45M	*	0	0	*	*	XA:i:2	MD:Z:0C7A36	NM:i:2
diff --git a/src/umi_tools/umi_tools_dedup/test_data/deduped_unique.sam b/src/umi_tools/umi_tools_dedup/test_data/deduped_unique.sam
new file mode 100644
index 00000000..570ea153
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/test_data/deduped_unique.sam
@@ -0,0 +1,31 @@
+@HD	VN:1.0	SO:coordinate
+@SQ	SN:chr1	LN:197195432
+@SQ	SN:chr10	LN:129993255
+@SQ	SN:chr11	LN:121843856
+@SQ	SN:chr12	LN:121257530
+@SQ	SN:chr13	LN:120284312
+@SQ	SN:chr14	LN:125194864
+@SQ	SN:chr15	LN:103494974
+@SQ	SN:chr16	LN:98319150
+@SQ	SN:chr17	LN:95272651
+@SQ	SN:chr18	LN:90772031
+@SQ	SN:chr19	LN:61342430
+@SQ	SN:chr2	LN:181748087
+@SQ	SN:chr3	LN:159599783
+@SQ	SN:chr4	LN:155630120
+@SQ	SN:chr5	LN:152537259
+@SQ	SN:chr6	LN:149517037
+@SQ	SN:chr7	LN:152524553
+@SQ	SN:chr8	LN:131738871
+@SQ	SN:chr9	LN:124076172
+@SQ	SN:chrM	LN:16299
+@SQ	SN:chrX	LN:166650296
+@SQ	SN:chrY	LN:15902555
+@PG	ID:Bowtie	VN:1.1.2	CL:"bowtie --wrapper basic-0 --threads 4 -v 2 -m 10 -k 1 /ifs/mirror/genomes/bowtie/mm9 /dev/fd/63 --sam"
+@PG	ID:samtools	PN:samtools	PP:Bowtie	VN:1.19.2	CL:samtools view -h example.bam
+@PG	ID:samtools.1	PN:samtools	PP:samtools	VN:1.19.2	CL:samtools view -bS -
+SRR2057595.5052066_ACCGGTTTA	16	chr1	3812794	255	51M	*	0	0	*	*	XA:i:2	MD:Z:42T2T5	NM:i:2
+SRR2057595.13520751_CCAGGTTCT	16	chr1	3967622	255	20M	*	0	0	*	*	XA:i:2	MD:Z:12A0C6	NM:i:2
+SRR2057595.8901432_AGCGGTTAC	0	chr1	4369756	255	20M	*	0	0	*	*	XA:i:2	MD:Z:1T4A13	NM:i:2
+SRR2057595.1210348_ACTGGTTTC	0	chr1	4762503	255	45M	*	0	0	*	*	XA:i:2	MD:Z:0C7A36	NM:i:2
+SRR2057595.1169423_TCTGGTTTC	0	chr1	4762503	255	45M	*	0	0	*	*	XA:i:2	MD:Z:0C7A36	NM:i:2
diff --git a/src/umi_tools/umi_tools_dedup/test_data/sample.bam b/src/umi_tools/umi_tools_dedup/test_data/sample.bam
new file mode 100644
index 0000000000000000000000000000000000000000..32192fc6e9bb58449cc20acd27d747a6c60399d5
GIT binary patch
literal 1584
zcmV-02G98)iwFb&00000{{{d;LjnL<0)3J@ZxcZffCr)^T<9pEU8MmjoNwQcL&{q`
zq$C(DK_Y0*_648F<*c)1)6*bCLCHTzq=+6-06jGgLP(SpP*6b2`CeM5NO97B^WDzQ
z?Ckp5_PNJ10IRn*od-KUX<(;y*Y6}r+Kmo_VYsl`y63cZ{7x?=&Q|t=QN#$&s4Z}A
zs4<37A#79Zp+s}0g|rPV4HZf-Y#UY>OtVd7>KtAeK@8WD+eYLDGj6z1Q$|jV(To@(
zrjAj$5L8hqh%Iz3uu>GWDPbl;62>W?s<>!H6U8;w$`VB<!U$uOW|rWE5R&1nQA>zI
zP{9-x#u73Y+z6sDv(He~4$6hF&(OJGL@}+E-G?b|kz*x^vhR007c!fp72GX?QVL9s
zJ;A-QIh>6=n|bq=b7#|EPsYP2EM5|6P-kPyUu@?w<htWD==Z}EwSz&_ad8$I_R=uu
z4iHD~2vOw5h-BfXh#)W8A9!(;rb+5O2@jJv9C$?{FOCiJy5Y#%?|PDDSq4G8SQU`1
zhe>iUaGE>Tx;b5#DJrWx8b#q4xjhs<3*!Dk*k}jw>3j`xI%B;)D`&ff+=a^kFq?go
zHw4FjKV6(F0mIdoALlC~@EUK_M8RM7Yhv)GyHpd0{{2x+0+z+nTtyNZH@?kQq~P<1
zw>4>4xfIo8;QRUGnX)hn<FzjfB``SJdhV3K;oAF~6$m(aaiszYztW!<N>JcDtc`|$
z*T2q}VBpniSiv@&tkk!$3-5mXuJ-J~qoY6YAMlm5?2rTi03VA81ONa4009360763o
z0J#IT+B=TqR1^kace4ScNvB6_V9<F#27)P!)FgvYmjeVrld}mD2-pfDLp){`?5L2c
znvpF)Hl^Q{Ir((--*X?nu7~0B`{m`U;rWT5-3;%apHEM}VHk$v!-J5gw}05i-tl-C
z$NT$fnhwKv?+wG3hw=8!r(ao4GWqcNoALJer2hKy)o{12x|S2?jQwWD`qivSX%w6d
zBGvV3ry2WVon@{JQdxB$p4{KI4Vea!_e}j_rrNb?P)EqBdYq{nqLnU}NHiOso$6>d
z*@dLwY*>-6v(t>-)L%kvxr3V>p$vDGxXpZX=l;I2b6a~cdtc8~y;jYX7F|`3Gj&5N
zaB@;Nnl%c^DmWWv#tCODy$znNTZ=TD%}&cj*Zw;@&Dbrofha0c@Gh4a7u;1g)e5|8
z<jt~q{{FFT{#sY-ycuYw%C)MS7a3X&Ez4L{k27`4S`(JUgl2_Uy&cX<W?R8o)i4q`
zYg``iAyZQ!hi7XU9R#n|$%XJCxQnwCo-ND8W`<!Si5$*`6sy3q4NrB$S6gmTie^)}
zqYTcva5=oeia7(nFm_faHJmMT%k@r}X6&|OO{rwTU8WZ89b7SOR=k_+%S#1}t4z7Y
z{rd>=cH0qziI-OuAIwz#N0YtXcmxskCPuR&DedrT#ZyTM&8l%2IhwWTwrn&T$|Q>y
zUkq#Pve?uyY^p71FN966QioTYVaf_;%dHt7LWgjqt+LaM-L^w8$KoJ{aoO0y8SV;K
zmF(bZx+y*f=_uJDUlnVyJ=|8Q!n;N(zP*3H|MBDD#p$DU*M-vgaU2i#_tP{^!|R)O
zF3W+&SWLICfBs~+I~||hJU#!{EN)^`c|6>o%RP(>cCrKOVq^c|r_YAxr*GF-&c56X
zPUjHD;m_N5Dvf#2xJs?!b1OhqDd!Tjs?;R}S4{$+fQHfeMIEcAl;IPox_sk<s>YHA
z?}2PdfzN_!>hqf!RBelLcmnN|_|a--J#URzHBxe2J<im-4Qm#emjs^mu=CvqmSy3R
z#IsIR_-r}aL?0(GD@<wyuU3rO1J5R@IGj~26~(g|wqaNJzpOcn;;Xd~cYL;?)nRxx
zS*q}CXE=Vyvbzeu)RYwM(6`-f0nb*5Ylk5d67e0X<%kG|v!a~%6->mNnfG8l6V9qq
z5S}$^Yr?ZE)s1H*xbQ<(?V|W<o7HLuHskWgEihYr`A~#rMH_yqQKG6`Xa53j9M$l7
iDgXc<iwFb&00000{{{d;LjnLB00RI3000000000a8~aoM

literal 0
HcmV?d00001

diff --git a/src/umi_tools/umi_tools_dedup/test_data/sample.bam.bai b/src/umi_tools/umi_tools_dedup/test_data/sample.bam.bai
new file mode 100644
index 0000000000000000000000000000000000000000..e9e2eee1d579c04b0b58bff33862d56ff931ff5d
GIT binary patch
literal 2656
zcmZ>A^kfucU|?VcVnbmD21X#wz!1d*B8?;=wAUmC2Cx8BoPj}v4J2O+B9K5ZRDTy#
z-$N*E4Hbu(2b0f+io?{w%vA<E0%AAJEu-{kctDccXgG|911JPW)4^yu7)=L+@&T;Q
Wr4|jVyN4<bYikTu8j|OS4haD7sht1-

literal 0
HcmV?d00001

diff --git a/src/umi_tools/umi_tools_dedup/test_data/script.sh b/src/umi_tools/umi_tools_dedup/test_data/script.sh
new file mode 100755
index 00000000..2253a0d1
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/test_data/script.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# Download test data
+wget https://github.com/CGATOxford/UMI-tools/releases/download/v0.2.3/example.bam
+# extract 150 reads with a maximum of two reads having the same start position
+samtools view -h example.bam | head -n 150 | samtools view -bS - > sample.bam
+samtools index sample.bam
+rm example.bam
\ No newline at end of file

From 4a8d7d82d5ff9e572ff1fba030cbf15fe87bf281 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 9 Jul 2024 12:03:36 +0200
Subject: [PATCH 17/23] Fix typo (#79)

---
 src/salmon/salmon_index/config.vsh.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/salmon/salmon_index/config.vsh.yaml b/src/salmon/salmon_index/config.vsh.yaml
index f24cd3a9..41c1e05b 100644
--- a/src/salmon/salmon_index/config.vsh.yaml
+++ b/src/salmon/salmon_index/config.vsh.yaml
@@ -19,7 +19,7 @@ argument_groups:
       - name: --genome
         type: file
         description: | 
-          Genome of the organism to prepare the set of decoy sequences. Required to build decoy-aware transccriptome.
+          Genome of the organism to prepare the set of decoy sequences. Required to build decoy-aware transcriptome.
         required: false
         example: genome.fasta
       - name: --transcripts
@@ -110,4 +110,4 @@ engines:
           salmon index -v 2>&1 | sed 's/salmon \([0-9.]*\)/salmon: \1/' > /var/software_versions.txt
 runners:
   - type: executable
-  - type: nextflow
\ No newline at end of file
+  - type: nextflow

From 508a40aab1912a332cfeacc737d827a545d78a04 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 9 Jul 2024 16:43:10 +0200
Subject: [PATCH 18/23] add vscode to gitignore

---
 .gitignore                |    1 +
 .vscode/settings.json     |    9 -
 .vscode/viash_config.yaml | 3005 -------------------------------------
 3 files changed, 1 insertion(+), 3014 deletions(-)
 delete mode 100644 .vscode/settings.json
 delete mode 100644 .vscode/viash_config.yaml

diff --git a/.gitignore b/.gitignore
index ca5262bc..2a64eaac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 
 # IDE ignores
 .idea/
+.vscode/
 
 # R specific ignores
 .Rhistory
diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index df05379a..00000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-  "yaml.schemas": {
-    ".vscode/viash_config.yaml": "**.vsh.yaml",
-  },
-  "files.watcherExclude": {
-    "**/target": true,
-    ".github": true
-  }
-}
\ No newline at end of file
diff --git a/.vscode/viash_config.yaml b/.vscode/viash_config.yaml
deleted file mode 100644
index 0e38195f..00000000
--- a/.vscode/viash_config.yaml
+++ /dev/null
@@ -1,3005 +0,0 @@
-$schema: "https://json-schema.org/draft-07/schema#"
-definitions:
-  Config:
-    description: "A Viash configuration is a YAML file which contains metadata to\
-      \ describe the behaviour and build target(s) of a component.  \nWe commonly\
-      \ name this file `config.vsh.yaml` in our examples, but you can name it however\
-      \ you choose.  \n"
-    type: "object"
-    properties:
-      organization:
-        description: "The organization of the package."
-        type: "string"
-      license:
-        description: "The license of the package."
-        type: "string"
-      authors:
-        description: "A list of authors. An author must at least have a name, but\
-          \ can also have a list of roles, an e-mail address, and a map of custom\
-          \ properties.\n\nSuggested values for roles are:\n \n| Role | Abbrev. |\
-          \ Description |\n|------|---------|-------------|\n| maintainer | mnt |\
-          \ for the maintainer of the code. Ideally, exactly one maintainer is specified.\
-          \ |\n| author | aut | for persons who have made substantial contributions\
-          \ to the software. |\n| contributor | ctb| for persons who have made smaller\
-          \ contributions (such as code patches).\n| datacontributor | dtc | for persons\
-          \ or organisations that contributed data sets for the software\n| copyrightholder\
-          \ | cph | for all copyright holders. This is a legal concept so should use\
-          \ the legal name of an institution or corporate body.\n| funder | fnd |\
-          \ for persons or organizations that furnished financial support for the\
-          \ development of the software\n\nThe [full list of roles](https://www.loc.gov/marc/relators/relaterm.html)\
-          \ is extremely comprehensive.\n"
-        type: "array"
-        items:
-          $ref: "#/definitions/Author"
-      status:
-        description: "Allows setting a component to active, deprecated or disabled."
-        $ref: "#/definitions/Status"
-      requirements:
-        description: "Computational requirements related to running the component.\
-          \ \n`cpus` specifies the maximum number of (logical) cpus a component is\
-          \ allowed to use., whereas\n`memory` specifies the maximum amount of memory\
-          \ a component is allowed to allicate. Memory units must be\nin B, KB, MB,\
-          \ GB, TB or PB."
-        $ref: "#/definitions/ComputationalRequirements"
-      repositories:
-        description: "(Pre-)defines repositories that can be used as repository in\
-          \ dependencies.\nAllows reusing repository definitions in case it is used\
-          \ in multiple dependencies."
-        type: "array"
-        items:
-          $ref: "#/definitions/RepositoryWithName"
-      dependencies:
-        description: "Allows listing Viash components required by this Viash component"
-        type: "array"
-        items:
-          $ref: "#/definitions/Dependency"
-      namespace:
-        description: "Namespace this component is a part of. See the Namespaces guide\
-          \ for more information on namespaces."
-        type: "string"
-      functionality:
-        description: "The functionality describes the behaviour of the script in terms\
-          \ of arguments and resources.\nBy specifying a few restrictions (e.g. mandatory\
-          \ arguments) and adding some descriptions, Viash will automatically generate\
-          \ a stylish command-line interface for you.\n"
-        $ref: "#/definitions/Functionality"
-      runners:
-        description: "A list of runners to execute target artifacts.\n\n - ExecutableRunner\n\
-          \ - NextflowRunner\n"
-        type: "array"
-        items:
-          $ref: "#/definitions/Runner"
-      name:
-        description: "Name of the component and the filename of the executable when\
-          \ built with `viash build`."
-        type: "string"
-      build_info:
-        $ref: "#/definitions/BuildInfo"
-      argument_groups:
-        description: "A grouping of the arguments, used to display the help message.\n\
-          \n - `name: foo`, the name of the argument group. \n - `description: Description\
-          \ of foo`, a description of the argument group. Multiline descriptions are\
-          \ supported.\n - `arguments: [arg1, arg2, ...]`, list of the arguments.\n\
-          \n"
-        type: "array"
-        items:
-          $ref: "#/definitions/ArgumentGroup"
-      description:
-        description: "A description of the component. This will be displayed with\
-          \ `--help`."
-        type: "string"
-      usage:
-        description: "A description on how to use the component. This will be displayed\
-          \ with `--help` under the 'Usage:' section."
-        type: "string"
-      info:
-        description: "Structured information. Can be any shape: a string, vector,\
-          \ map or even nested map."
-        type: "object"
-      package_config:
-        description: "The package config content used during build."
-        $ref: "#/definitions/PackageConfig"
-      platforms:
-        description: "A list of platforms to generate target artifacts for.\n\n -\
-          \ Native\n - Docker\n - Nextflow\n"
-        type: "array"
-        items:
-          $ref: "#/definitions/Platform"
-      version:
-        description: "Version of the component. This field will be used to version\
-          \ the executable and the Docker container."
-        type: "string"
-      links:
-        description: "External links of the component."
-        $ref: "#/definitions/Links"
-      references:
-        description: "References to external resources related to the component."
-        $ref: "#/definitions/References"
-      engines:
-        description: "A list of engine environments to execute target artifacts in.\n\
-          \n - NativeEngine\n - DockerEngine\n"
-        type: "array"
-        items:
-          $ref: "#/definitions/Engine"
-      resources:
-        description: "Resources are files that support the component. The first resource\
-          \ should be a script that will be executed when the component is run. Additional\
-          \ resources will be copied to the same directory.\n\nCommon properties:\n\
-          \n * type: `file` / `r_script` / `python_script` / `bash_script` / `javascript_script`\
-          \ / `scala_script` / `csharp_script`, specifies the type of the resource.\
-          \ The first resource cannot be of type `file`. When the type is not specified,\
-          \ the default type is simply `file`.\n * dest: filename, the resulting name\
-          \ of the resource.  From within a script, the file can be accessed at `meta[\"\
-          resources_dir\"] + \"/\" + dest`. If unspecified, `dest` will be set to\
-          \ the basename of the `path` parameter.\n * path: `path/to/file`, the path\
-          \ of the input file. Can be a relative or an absolute path, or a URI. Mutually\
-          \ exclusive with `text`.\n * text: ...multiline text..., the content of\
-          \ the resulting file specified as a string. Mutually exclusive with `path`.\n\
-          \ * is_executable: `true` / `false`, whether the resulting resource file\
-          \ should be made executable.\n"
-        type: "array"
-        items:
-          $ref: "#/definitions/Resource"
-      keywords:
-        description: "The keywords of the components."
-        type: "array"
-        items:
-          type: "string"
-      test_resources:
-        description: "One or more scripts to be used to test the component behaviour\
-          \ when `viash test` is invoked. Additional files of type `file` will be\
-          \ made available only during testing. Each test script should expect no\
-          \ command-line inputs, be platform-independent, and return an exit code\
-          \ >0 when unexpected behaviour occurs during testing. See Unit Testing for\
-          \ more info."
-        type: "array"
-        items:
-          $ref: "#/definitions/Resource"
-    required:
-    - "name"
-    additionalProperties: false
-  PackageConfig:
-    description: "A Viash package configuration file. It's name should be `_viash.yaml`."
-    type: "object"
-    properties:
-      organization:
-        description: "The organization of the package."
-        type: "string"
-      name:
-        description: "The name of the package."
-        type: "string"
-      source:
-        description: "Which source directory to use for the `viash ns` commands."
-        type: "string"
-      description:
-        description: "A description of the package."
-        type: "string"
-      viash_version:
-        description: "Which version of Viash to use."
-        type: "string"
-      config_mods:
-        oneOf:
-        - description: "Which config mods to apply."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Which config mods to apply."
-            type: "string"
-      info:
-        description: "Structured information. Can be any shape: a string, vector,\
-          \ map or even nested map."
-        type: "object"
-      license:
-        description: "The license of the package."
-        type: "string"
-      references:
-        description: "References to external resources related to the package."
-        $ref: "#/definitions/References"
-      authors:
-        description: "The authors of the package."
-        type: "array"
-        items:
-          $ref: "#/definitions/Author"
-      repositories:
-        description: "Common repository definitions for component dependencies."
-        type: "array"
-        items:
-          $ref: "#/definitions/RepositoryWithName"
-      keywords:
-        description: "The keywords of the package."
-        type: "array"
-        items:
-          type: "string"
-      target:
-        description: "Which target directory to use for `viash ns build`."
-        type: "string"
-      version:
-        description: "The version of the package."
-        type: "string"
-      links:
-        description: "External links of the package."
-        $ref: "#/definitions/Links"
-    required: []
-    additionalProperties: false
-  BuildInfo:
-    description: "Meta information fields filled in by Viash during build."
-    type: "object"
-    properties:
-      git_tag:
-        description: "Git tag."
-        type: "string"
-      git_remote:
-        description: "Git remote name."
-        type: "string"
-      viash_version:
-        description: "The Viash version that was used to build the component."
-        type: "string"
-      output:
-        description: "Folder path to the build artifacts."
-        type: "string"
-      git_commit:
-        description: "Git commit hash."
-        type: "string"
-      executable:
-        description: "Output folder with main executable path."
-        type: "string"
-      engine:
-        description: "The engine id used during build."
-        type: "string"
-      runner:
-        description: "The runner id used during build."
-        type: "string"
-      config:
-        description: "Path to the config used during build."
-        type: "string"
-    required:
-    - "config"
-    additionalProperties: false
-  Functionality:
-    description: "The functionality-part of the config file describes the behaviour\
-      \ of the script in terms of arguments and resources.\nBy specifying a few restrictions\
-      \ (e.g. mandatory arguments) and adding some descriptions, Viash will automatically\
-      \ generate a stylish command-line interface for you.\n"
-    type: "object"
-    properties:
-      organization:
-        description: "The organization of the package."
-        type: "string"
-      name:
-        description: "Name of the component and the filename of the executable when\
-          \ built with `viash build`."
-        type: "string"
-      argument_groups:
-        description: "A grouping of the arguments, used to display the help message.\n\
-          \n - `name: foo`, the name of the argument group. \n - `description: Description\
-          \ of foo`, a description of the argument group. Multiline descriptions are\
-          \ supported.\n - `arguments: [arg1, arg2, ...]`, list of the arguments.\n\
-          \n"
-        type: "array"
-        items:
-          $ref: "#/definitions/ArgumentGroup"
-      info:
-        description: "Structured information. Can be any shape: a string, vector,\
-          \ map or even nested map."
-        type: "object"
-      license:
-        description: "The license of the package."
-        type: "string"
-      references:
-        description: "References to external resources related to the component."
-        $ref: "#/definitions/References"
-      authors:
-        description: "A list of authors. An author must at least have a name, but\
-          \ can also have a list of roles, an e-mail address, and a map of custom\
-          \ properties.\n\nSuggested values for roles are:\n \n| Role | Abbrev. |\
-          \ Description |\n|------|---------|-------------|\n| maintainer | mnt |\
-          \ for the maintainer of the code. Ideally, exactly one maintainer is specified.\
-          \ |\n| author | aut | for persons who have made substantial contributions\
-          \ to the software. |\n| contributor | ctb| for persons who have made smaller\
-          \ contributions (such as code patches).\n| datacontributor | dtc | for persons\
-          \ or organisations that contributed data sets for the software\n| copyrightholder\
-          \ | cph | for all copyright holders. This is a legal concept so should use\
-          \ the legal name of an institution or corporate body.\n| funder | fnd |\
-          \ for persons or organizations that furnished financial support for the\
-          \ development of the software\n\nThe [full list of roles](https://www.loc.gov/marc/relators/relaterm.html)\
-          \ is extremely comprehensive.\n"
-        type: "array"
-        items:
-          $ref: "#/definitions/Author"
-      status:
-        description: "Allows setting a component to active, deprecated or disabled."
-        $ref: "#/definitions/Status"
-      requirements:
-        description: "Computational requirements related to running the component.\
-          \ \n`cpus` specifies the maximum number of (logical) cpus a component is\
-          \ allowed to use., whereas\n`memory` specifies the maximum amount of memory\
-          \ a component is allowed to allicate. Memory units must be\nin B, KB, MB,\
-          \ GB, TB or PB."
-        $ref: "#/definitions/ComputationalRequirements"
-      repositories:
-        description: "(Pre-)defines repositories that can be used as repository in\
-          \ dependencies.\nAllows reusing repository definitions in case it is used\
-          \ in multiple dependencies."
-        type: "array"
-        items:
-          $ref: "#/definitions/RepositoryWithName"
-      test_resources:
-        description: "One or more scripts to be used to test the component behaviour\
-          \ when `viash test` is invoked. Additional files of type `file` will be\
-          \ made available only during testing. Each test script should expect no\
-          \ command-line inputs, be platform-independent, and return an exit code\
-          \ >0 when unexpected behaviour occurs during testing. See Unit Testing for\
-          \ more info."
-        type: "array"
-        items:
-          $ref: "#/definitions/Resource"
-      dependencies:
-        description: "Allows listing Viash components required by this Viash component"
-        type: "array"
-        items:
-          $ref: "#/definitions/Dependency"
-      description:
-        description: "A description of the component. This will be displayed with\
-          \ `--help`."
-        type: "string"
-      usage:
-        description: "A description on how to use the component. This will be displayed\
-          \ with `--help` under the 'Usage:' section."
-        type: "string"
-      version:
-        description: "Version of the component. This field will be used to version\
-          \ the executable and the Docker container."
-        type: "string"
-      links:
-        description: "External links of the component."
-        $ref: "#/definitions/Links"
-      resources:
-        description: "Resources are files that support the component. The first resource\
-          \ should be a script that will be executed when the functionality is run.\
-          \ Additional resources will be copied to the same directory.\n\nCommon properties:\n\
-          \n * type: `file` / `r_script` / `python_script` / `bash_script` / `javascript_script`\
-          \ / `scala_script` / `csharp_script`, specifies the type of the resource.\
-          \ The first resource cannot be of type `file`. When the type is not specified,\
-          \ the default type is simply `file`.\n * dest: filename, the resulting name\
-          \ of the resource.  From within a script, the file can be accessed at `meta[\"\
-          resources_dir\"] + \"/\" + dest`. If unspecified, `dest` will be set to\
-          \ the basename of the `path` parameter.\n * path: `path/to/file`, the path\
-          \ of the input file. Can be a relative or an absolute path, or a URI. Mutually\
-          \ exclusive with `text`.\n * text: ...multiline text..., the content of\
-          \ the resulting file specified as a string. Mutually exclusive with `path`.\n\
-          \ * is_executable: `true` / `false`, whether the resulting resource file\
-          \ should be made executable.\n"
-        type: "array"
-        items:
-          $ref: "#/definitions/Resource"
-      keywords:
-        description: "The keywords of the components."
-        type: "array"
-        items:
-          type: "string"
-      namespace:
-        description: "Namespace this component is a part of. See the Namespaces guide\
-          \ for more information on namespaces."
-        type: "string"
-      arguments:
-        description: "A list of arguments for this component. For each argument, a\
-          \ type and a name must be specified. Depending on the type of argument,\
-          \ different properties can be set. See these reference pages per type for\
-          \ more information:  \n\n - string\n - file\n - integer\n - double\n - boolean\n\
-          \ - boolean_true\n - boolean_false\n"
-        type: "array"
-        items:
-          $ref: "#/definitions/Argument"
-    required:
-    - "name"
-    additionalProperties: false
-  Author:
-    description: "Author metadata."
-    type: "object"
-    properties:
-      name:
-        description: "Full name of the author, usually in the name of FirstName MiddleName\
-          \ LastName."
-        type: "string"
-      email:
-        description: "E-mail of the author."
-        type: "string"
-      info:
-        description: "Structured information. Can be any shape: a string, vector,\
-          \ map or even nested map."
-        type: "object"
-      roles:
-        oneOf:
-        - description: "Role of the author. Suggested items:\n\n* \"author\": Authors\
-            \ who have made substantial contributions to the component.\n* \"maintainer\"\
-            : The maintainer of the component.\n* \"contributor\": Authors who have\
-            \ made smaller contributions (such as code patches etc.).\n"
-          type: "string"
-        - type: "array"
-          items:
-            description: "Role of the author. Suggested items:\n\n* \"author\": Authors\
-              \ who have made substantial contributions to the component.\n* \"maintainer\"\
-              : The maintainer of the component.\n* \"contributor\": Authors who have\
-              \ made smaller contributions (such as code patches etc.).\n"
-            type: "string"
-    required:
-    - "name"
-    additionalProperties: false
-  ComputationalRequirements:
-    description: "Computational requirements related to running the component."
-    type: "object"
-    properties:
-      cpus:
-        description: "The maximum number of (logical) cpus a component is allowed\
-          \ to use."
-        type: "integer"
-      commands:
-        description: "A list of commands which should be present on the system for\
-          \ the script to function."
-        type: "array"
-        items:
-          type: "string"
-      memory:
-        description: "The maximum amount of memory a component is allowed to allocate.\
-          \ Unit must be one of B, KB, MB, GB, TB or PB."
-        type: "string"
-    required: []
-    additionalProperties: false
-  ArgumentGroup:
-    description: "A grouping of the arguments, used to display the help message."
-    type: "object"
-    properties:
-      name:
-        description: "The name of the argument group."
-        type: "string"
-      description:
-        description: "Description of foo`, a description of the argument group. Multiline\
-          \ descriptions are supported."
-        type: "string"
-      arguments:
-        description: "List of arguments."
-        type: "array"
-        items:
-          $ref: "#/definitions/Argument"
-    required:
-    - "name"
-    - "arguments"
-    additionalProperties: false
-  Links:
-    description: "Links to external resources related to the component."
-    type: "object"
-    properties:
-      repository:
-        description: "Source repository url."
-        type: "string"
-      documentation:
-        description: "Documentation website url."
-        type: "string"
-      docker_registry:
-        description: "Docker registry url."
-        type: "string"
-      homepage:
-        description: "Homepage website url."
-        type: "string"
-      issue_tracker:
-        description: "Issue tracker url."
-        type: "string"
-    required: []
-    additionalProperties: false
-  References:
-    description: "References to external resources related to the component."
-    type: "object"
-    properties:
-      bibtex:
-        oneOf:
-        - description: "One or multiple BibTeX reference(s) of the component."
-          type: "string"
-        - type: "array"
-          items:
-            description: "One or multiple BibTeX reference(s) of the component."
-            type: "string"
-      doi:
-        oneOf:
-        - description: "One or multiple DOI reference(s) of the component."
-          type: "string"
-        - type: "array"
-          items:
-            description: "One or multiple DOI reference(s) of the component."
-            type: "string"
-    additionalProperties: false
-  Runner:
-    oneOf:
-    - $ref: "#/definitions/ExecutableRunner"
-    - $ref: "#/definitions/NextflowRunner"
-  ExecutableRunner:
-    description: "Run code as an executable.\n\nThis runner is the default runner.\
-      \ It will generate a bash script that can be run directly.\n\nThis runner is\
-      \ also used for the native engine.\n\nThis runner is also used for the docker\
-      \ engine.\n"
-    type: "object"
-    properties:
-      docker_setup_strategy:
-        description: "The Docker setup strategy to use when building a docker engine\
-          \ enrivonment.\n\n| Strategy | Description |\n|-----|----------|\n| `alwaysbuild`\
-          \ / `build` / `b` | Always build the image from the dockerfile. This is\
-          \ the default setup strategy.\n| `alwayscachedbuild` / `cachedbuild` / `cb`\
-          \ | Always build the image from the dockerfile, with caching enabled.\n\
-          | `ifneedbebuild` |  Build the image if it does not exist locally.\n| `ifneedbecachedbuild`\
-          \ | Build the image with caching enabled if it does not exist locally, with\
-          \ caching enabled.\n| `alwayspull` / `pull` / `p` |  Try to pull the container\
-          \ from [Docker Hub](https://hub.docker.com) or the specified docker registry.\n\
-          | `alwayspullelsebuild` / `pullelsebuild` |  Try to pull the image from\
-          \ a registry and build it if it doesn't exist.\n| `alwayspullelsecachedbuild`\
-          \ / `pullelsecachedbuild` |  Try to pull the image from a registry and build\
-          \ it with caching if it doesn't exist.\n| `ifneedbepull` |  If the image\
-          \ does not exist locally, pull the image.\n| `ifneedbepullelsebuild` | \
-          \ If the image does not exist locally, pull the image. If the image does\
-          \ exist, build it.\n| `ifneedbepullelsecachedbuild` | If the image does\
-          \ not exist locally, pull the image. If the image does exist, build it with\
-          \ caching enabled.\n| `push` | Push the container to [Docker Hub](https://hub.docker.com)\
-          \  or the specified docker registry.\n| `pushifnotpresent` | Push the container\
-          \ to [Docker Hub](https://hub.docker.com) or the specified docker registry\
-          \ if the tag does not exist yet.\n| `donothing` / `meh` | Do not build or\
-          \ pull anything.\n\n"
-        $ref: "#/definitions/DockerSetupStrategy"
-      workdir:
-        description: "The working directory when starting the engine. This doesn't\
-          \ change the Dockerfile but gets added as a command-line argument at runtime."
-        type: "string"
-      docker_run_args:
-        oneOf:
-        - description: "Provide runtime arguments to Docker. See the documentation\
-            \ on [`docker run`](https://docs.docker.com/engine/reference/run/) for\
-            \ more information."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Provide runtime arguments to Docker. See the documentation\
-              \ on [`docker run`](https://docs.docker.com/engine/reference/run/) for\
-              \ more information."
-            type: "string"
-      id:
-        description: "Name of the runner. As with all runners, you can give an runner\
-          \ a different name. By specifying `id: foo`, you can target this executor\
-          \ (only) by specifying `...` in any of the Viash commands."
-        type: "string"
-      port:
-        oneOf:
-        - description: "A list of enabled ports. This doesn't change the Dockerfile\
-            \ but gets added as a command-line argument at runtime."
-          type: "integer"
-        - description: "A list of enabled ports. This doesn't change the Dockerfile\
-            \ but gets added as a command-line argument at runtime."
-          type: "string"
-        - description: "A list of enabled ports. This doesn't change the Dockerfile\
-            \ but gets added as a command-line argument at runtime."
-          type: "array"
-          items:
-            type: "integer"
-        - description: "A list of enabled ports. This doesn't change the Dockerfile\
-            \ but gets added as a command-line argument at runtime."
-          type: "array"
-          items:
-            type: "string"
-      type:
-        description: "Run code as an executable.\n\nThis runner is the default runner.\
-          \ It will generate a bash script that can be run directly.\n\nThis runner\
-          \ is also used for the native engine.\n\nThis runner is also used for the\
-          \ docker engine.\n"
-        const: "executable"
-    required:
-    - "type"
-    additionalProperties: false
-  NextflowRunner:
-    description: "Run a Viash component on a Nextflow backend engine.\n"
-    type: "object"
-    properties:
-      auto:
-        description: "Automated processing flags which can be toggled on or off:\n\
-          \n| Flag | Description | Default |\n|---|---------|----|\n| `simplifyInput`\
-          \ | If `true`, an input tuple only containing only a single File (e.g. `[\"\
-          foo\", file(\"in.h5ad\")]`) is automatically transformed to a map (i.e.\
-          \ `[\"foo\", [ input: file(\"in.h5ad\") ] ]`). | `true` |\n| `simplifyOutput`\
-          \ | If `true`, an output tuple containing a map with a File (e.g. `[\"foo\"\
-          , [ output: file(\"out.h5ad\") ] ]`) is automatically transformed to a map\
-          \ (i.e. `[\"foo\", file(\"out.h5ad\")]`). | `false` |\n| `transcript` |\
-          \ If `true`, the module's transcripts from `work/` are automatically published\
-          \ to `params.transcriptDir`. If not defined, `params.publishDir + \"/_transcripts\"\
-          ` will be used. Will throw an error if neither are defined. | `false` |\n\
-          | `publish` | If `true`, the module's outputs are automatically published\
-          \ to `params.publishDir`. If equal to \"state\", also a `.state.yaml` file\
-          \ will be published in the publish dir. Will throw an error if `params.publishDir`\
-          \ is not defined. | `false` |\n\n"
-        $ref: "#/definitions/NextflowAuto"
-      directives:
-        description: "Directives are optional settings that affect the execution of\
-          \ the process. These mostly match up with the Nextflow counterparts.  \n"
-        $ref: "#/definitions/NextflowDirectives"
-      container:
-        description: "Specifies the Docker engine id to be used to run Nextflow."
-        type: "string"
-      config:
-        description: "Allows tweaking how the Nextflow Config file is generated."
-        $ref: "#/definitions/NextflowConfig"
-      debug:
-        description: "Whether or not to print debug messages."
-        type: "boolean"
-      id:
-        description: "Name of the runner. As with all runners, you can give an runner\
-          \ a different name. By specifying `id: foo`, you can target this runner\
-          \ (only) by specifying `...` in any of the Viash commands."
-        type: "string"
-      type:
-        description: "Run a Viash component on a Nextflow backend engine.\n"
-        const: "nextflow"
-    required:
-    - "type"
-    additionalProperties: false
-  Engine:
-    oneOf:
-    - $ref: "#/definitions/DockerEngine"
-    - $ref: "#/definitions/NativeEngine"
-  NativeEngine:
-    description: "Running a Viash component on a native engine means that the script\
-      \ will be executed in your current environment.\nAny dependencies are assumed\
-      \ to have been installed by the user, so the native engine is meant for developers\
-      \ (who know what they're doing) or for simple bash scripts (which have no extra\
-      \ dependencies).\n"
-    type: "object"
-    properties:
-      id:
-        description: "Name of the engine. As with all engines, you can give an engine\
-          \ a different name. By specifying `id: foo`, you can target this engine\
-          \ (only) by specifying `...` in any of the Viash commands."
-        type: "string"
-      type:
-        description: "Running a Viash component on a native engine means that the\
-          \ script will be executed in your current environment.\nAny dependencies\
-          \ are assumed to have been installed by the user, so the native engine is\
-          \ meant for developers (who know what they're doing) or for simple bash\
-          \ scripts (which have no extra dependencies).\n"
-        const: "native"
-    required:
-    - "type"
-    additionalProperties: false
-  DockerEngine:
-    description: "Run a Viash component on a Docker backend engine.\nBy specifying\
-      \ which dependencies your component needs, users will be able to build a docker\
-      \ container from scratch using the setup flag, or pull it from a docker repository.\n"
-    type: "object"
-    properties:
-      organization:
-        description: "Name of a container's [organization](https://docs.docker.com/docker-hub/orgs/)."
-        type: "string"
-      registry:
-        description: "The URL to the a [custom Docker registry](https://docs.docker.com/registry/)"
-        type: "string"
-      image:
-        description: "The base container to start from. You can also add the tag here\
-          \ if you wish."
-        type: "string"
-      tag:
-        description: "Specify a Docker image based on its tag."
-        type: "string"
-      target_image:
-        description: "If anything is specified in the setup section, running the `---setup`\
-          \ will result in an image with the name of `<target_image>:<version>`. If\
-          \ nothing is specified in the `setup` section, simply `image` will be used.\
-          \ Advanced usage only."
-        type: "string"
-      target_tag:
-        description: "The tag the resulting image gets. Advanced usage only."
-        type: "string"
-      namespace_separator:
-        description: "The separator between the namespace and the name of the component,\
-          \ used for determining the image name. Default: \"/\"."
-        type: "string"
-      id:
-        description: "Name of the engine. As with all engines, you can give a engine\
-          \ a different name. By specifying `id: foo`, you can target this engine\
-          \ (only) by specifying `...` in any of the Viash commands."
-        type: "string"
-      target_registry:
-        description: "The URL where the resulting image will be pushed to. Advanced\
-          \ usage only."
-        type: "string"
-      type:
-        description: "Run a Viash component on a Docker backend engine.\nBy specifying\
-          \ which dependencies your component needs, users will be able to build a\
-          \ docker container from scratch using the setup flag, or pull it from a\
-          \ docker repository.\n"
-        const: "docker"
-      target_organization:
-        description: "The organization set in the resulting image. Advanced usage\
-          \ only."
-        type: "string"
-      setup:
-        description: "A list of requirements for installing the following types of\
-          \ packages:\n\n - apt\n - apk\n - Docker setup instructions\n - JavaScript\n\
-          \ - Python\n - R\n - Ruby\n - yum\n\nThe order in which these dependencies\
-          \ are specified determines the order in which they will be installed.\n"
-        type: "array"
-        items:
-          $ref: "#/definitions/Requirements"
-      cmd:
-        oneOf:
-        - description: "Set the default command being executed when running the Docker\
-            \ container."
-          type: "string"
-        - description: "Set the default command being executed when running the Docker\
-            \ container."
-          type: "array"
-          items:
-            type: "string"
-      target_image_source:
-        description: "The source of the target image. This is used for defining labels\
-          \ in the dockerfile."
-        type: "string"
-      test_setup:
-        description: "Additional requirements specific for running unit tests."
-        type: "array"
-        items:
-          $ref: "#/definitions/Requirements"
-      entrypoint:
-        oneOf:
-        - description: "Override the entrypoint of the base container. Default set\
-            \ `ENTRYPOINT []`."
-          type: "string"
-        - description: "Override the entrypoint of the base container. Default set\
-            \ `ENTRYPOINT []`."
-          type: "array"
-          items:
-            type: "string"
-    required:
-    - "image"
-    - "type"
-    additionalProperties: false
-  Platform:
-    oneOf:
-    - $ref: "#/definitions/NativePlatform"
-    - $ref: "#/definitions/DockerPlatform"
-    - $ref: "#/definitions/NextflowPlatform"
-  NativePlatform:
-    description: "Running a Viash component on a native platform means that the script\
-      \ will be executed in your current environment.\nAny dependencies are assumed\
-      \ to have been installed by the user, so the native platform is meant for developers\
-      \ (who know what they're doing) or for simple bash scripts (which have no extra\
-      \ dependencies).\n"
-    type: "object"
-    properties:
-      id:
-        description: "As with all platforms, you can give a platform a different name.\
-          \ By specifying `id: foo`, you can target this platform (only) by specifying\
-          \ `-p foo` in any of the Viash commands."
-        type: "string"
-      type:
-        description: "Running a Viash component on a native platform means that the\
-          \ script will be executed in your current environment.\nAny dependencies\
-          \ are assumed to have been installed by the user, so the native platform\
-          \ is meant for developers (who know what they're doing) or for simple bash\
-          \ scripts (which have no extra dependencies).\n"
-        const: "native"
-    required:
-    - "type"
-    additionalProperties: false
-  DockerPlatform:
-    description: "Run a Viash component on a Docker backend platform.\nBy specifying\
-      \ which dependencies your component needs, users will be able to build a docker\
-      \ container from scratch using the setup flag, or pull it from a docker repository.\n"
-    type: "object"
-    properties:
-      organization:
-        description: "Name of a container's [organization](https://docs.docker.com/docker-hub/orgs/)."
-        type: "string"
-      registry:
-        description: "The URL to the a [custom Docker registry](https://docs.docker.com/registry/)"
-        type: "string"
-      image:
-        description: "The base container to start from. You can also add the tag here\
-          \ if you wish."
-        type: "string"
-      tag:
-        description: "Specify a Docker image based on its tag."
-        type: "string"
-      target_tag:
-        description: "The tag the resulting image gets. Advanced usage only."
-        type: "string"
-      run_args:
-        oneOf:
-        - description: "Add [docker run](https://docs.docker.com/engine/reference/run/)\
-            \ arguments."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Add [docker run](https://docs.docker.com/engine/reference/run/)\
-              \ arguments."
-            type: "string"
-      namespace_separator:
-        description: "The separator between the namespace and the name of the component,\
-          \ used for determining the image name. Default: \"/\"."
-        type: "string"
-      resolve_volume:
-        description: "Enables or disables automatic volume mapping. Enabled when set\
-          \ to `Automatic` or disabled when set to `Manual`. Default: `Automatic`."
-        $ref: "#/definitions/DockerResolveVolume"
-      cmd:
-        oneOf:
-        - description: "Set the default command being executed when running the Docker\
-            \ container."
-          type: "string"
-        - description: "Set the default command being executed when running the Docker\
-            \ container."
-          type: "array"
-          items:
-            type: "string"
-      id:
-        description: "As with all platforms, you can give a platform a different name.\
-          \ By specifying `id: foo`, you can target this platform (only) by specifying\
-          \ `-p foo` in any of the Viash commands."
-        type: "string"
-      port:
-        oneOf:
-        - description: "A list of enabled ports. This doesn't change the Dockerfile\
-            \ but gets added as a command-line argument at runtime."
-          type: "string"
-        - type: "array"
-          items:
-            description: "A list of enabled ports. This doesn't change the Dockerfile\
-              \ but gets added as a command-line argument at runtime."
-            type: "string"
-      target_registry:
-        description: "The URL where the resulting image will be pushed to. Advanced\
-          \ usage only."
-        type: "string"
-      setup:
-        description: "A list of requirements for installing the following types of\
-          \ packages:\n\n - apt\n - apk\n - Docker setup instructions\n - JavaScript\n\
-          \ - Python\n - R\n - Ruby\n - yum\n\nThe order in which these dependencies\
-          \ are specified determines the order in which they will be installed.\n"
-        type: "array"
-        items:
-          $ref: "#/definitions/Requirements"
-      workdir:
-        description: "The working directory when starting the container. This doesn't\
-          \ change the Dockerfile but gets added as a command-line argument at runtime."
-        type: "string"
-      target_image:
-        description: "If anything is specified in the setup section, running the `---setup`\
-          \ will result in an image with the name of `<target_image>:<version>`. If\
-          \ nothing is specified in the `setup` section, simply `image` will be used.\
-          \ Advanced usage only."
-        type: "string"
-      target_image_source:
-        description: "The source of the target image. This is used for defining labels\
-          \ in the dockerfile."
-        type: "string"
-      test_setup:
-        description: "Additional requirements specific for running unit tests."
-        type: "array"
-        items:
-          $ref: "#/definitions/Requirements"
-      entrypoint:
-        oneOf:
-        - description: "Override the entrypoint of the base container. Default set\
-            \ `ENTRYPOINT []`."
-          type: "string"
-        - description: "Override the entrypoint of the base container. Default set\
-            \ `ENTRYPOINT []`."
-          type: "array"
-          items:
-            type: "string"
-      setup_strategy:
-        description: "The Docker setup strategy to use when building a container.\n\
-          \n| Strategy | Description |\n|-----|----------|\n| `alwaysbuild` / `build`\
-          \ / `b` | Always build the image from the dockerfile. This is the default\
-          \ setup strategy.\n| `alwayscachedbuild` / `cachedbuild` / `cb` | Always\
-          \ build the image from the dockerfile, with caching enabled.\n| `ifneedbebuild`\
-          \ |  Build the image if it does not exist locally.\n| `ifneedbecachedbuild`\
-          \ | Build the image with caching enabled if it does not exist locally, with\
-          \ caching enabled.\n| `alwayspull` / `pull` / `p` |  Try to pull the container\
-          \ from [Docker Hub](https://hub.docker.com) or the specified docker registry.\n\
-          | `alwayspullelsebuild` / `pullelsebuild` |  Try to pull the image from\
-          \ a registry and build it if it doesn't exist.\n| `alwayspullelsecachedbuild`\
-          \ / `pullelsecachedbuild` |  Try to pull the image from a registry and build\
-          \ it with caching if it doesn't exist.\n| `ifneedbepull` |  If the image\
-          \ does not exist locally, pull the image.\n| `ifneedbepullelsebuild` | \
-          \ If the image does not exist locally, pull the image. If the image does\
-          \ exist, build it.\n| `ifneedbepullelsecachedbuild` | If the image does\
-          \ not exist locally, pull the image. If the image does exist, build it with\
-          \ caching enabled.\n| `push` | Push the container to [Docker Hub](https://hub.docker.com)\
-          \  or the specified docker registry.\n| `pushifnotpresent` | Push the container\
-          \ to [Docker Hub](https://hub.docker.com) or the specified docker registry\
-          \ if the tag does not exist yet.\n| `donothing` / `meh` | Do not build or\
-          \ pull anything.\n\n"
-        $ref: "#/definitions/DockerSetupStrategy"
-      type:
-        description: "Run a Viash component on a Docker backend platform.\nBy specifying\
-          \ which dependencies your component needs, users will be able to build a\
-          \ docker container from scratch using the setup flag, or pull it from a\
-          \ docker repository.\n"
-        const: "docker"
-      target_organization:
-        description: "The organization set in the resulting image. Advanced usage\
-          \ only."
-        type: "string"
-    required:
-    - "image"
-    - "type"
-    additionalProperties: false
-  NextflowPlatform:
-    description: "Platform for generating Nextflow VDSL3 modules."
-    type: "object"
-    properties:
-      auto:
-        description: "Automated processing flags which can be toggled on or off:\n\
-          \n| Flag | Description | Default |\n|---|---------|----|\n| `simplifyInput`\
-          \ | If `true`, an input tuple only containing only a single File (e.g. `[\"\
-          foo\", file(\"in.h5ad\")]`) is automatically transformed to a map (i.e.\
-          \ `[\"foo\", [ input: file(\"in.h5ad\") ] ]`). | `true` |\n| `simplifyOutput`\
-          \ | If `true`, an output tuple containing a map with a File (e.g. `[\"foo\"\
-          , [ output: file(\"out.h5ad\") ] ]`) is automatically transformed to a map\
-          \ (i.e. `[\"foo\", file(\"out.h5ad\")]`). | `false` |\n| `transcript` |\
-          \ If `true`, the module's transcripts from `work/` are automatically published\
-          \ to `params.transcriptDir`. If not defined, `params.publishDir + \"/_transcripts\"\
-          ` will be used. Will throw an error if neither are defined. | `false` |\n\
-          | `publish` | If `true`, the module's outputs are automatically published\
-          \ to `params.publishDir`. If equal to \"state\", also a `.state.yaml` file\
-          \ will be published in the publish dir. Will throw an error if `params.publishDir`\
-          \ is not defined. | `false` |\n\n"
-        $ref: "#/definitions/NextflowAuto"
-      directives:
-        description: "Directives are optional settings that affect the execution of\
-          \ the process. These mostly match up with the Nextflow counterparts.  \n"
-        $ref: "#/definitions/NextflowDirectives"
-      container:
-        description: "Specifies the Docker platform id to be used to run Nextflow."
-        type: "string"
-      config:
-        description: "Allows tweaking how the Nextflow Config file is generated."
-        $ref: "#/definitions/NextflowConfig"
-      debug:
-        description: "Whether or not to print debug messages."
-        type: "boolean"
-      id:
-        description: "Every platform can be given a specific id that can later be\
-          \ referred to explicitly when running or building the Viash component."
-        type: "string"
-      type:
-        description: "Platform for generating Nextflow VDSL3 modules."
-        const: "nextflow"
-    required:
-    - "type"
-    additionalProperties: false
-  Requirements:
-    oneOf:
-    - $ref: "#/definitions/ApkRequirements"
-    - $ref: "#/definitions/AptRequirements"
-    - $ref: "#/definitions/DockerRequirements"
-    - $ref: "#/definitions/JavaScriptRequirements"
-    - $ref: "#/definitions/PythonRequirements"
-    - $ref: "#/definitions/RRequirements"
-    - $ref: "#/definitions/RubyRequirements"
-    - $ref: "#/definitions/YumRequirements"
-  ApkRequirements:
-    description: "Specify which apk packages should be available in order to run the\
-      \ component."
-    type: "object"
-    properties:
-      type:
-        description: "Specify which apk packages should be available in order to run\
-          \ the component."
-        const: "apk"
-      packages:
-        oneOf:
-        - description: "Specifies which packages to install."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install."
-            type: "string"
-    required:
-    - "type"
-    additionalProperties: false
-  AptRequirements:
-    description: "Specify which apt packages should be available in order to run the\
-      \ component."
-    type: "object"
-    properties:
-      interactive:
-        description: "If `false`, the Debian frontend is set to non-interactive (recommended).\
-          \ Default: false."
-        type: "boolean"
-      type:
-        description: "Specify which apt packages should be available in order to run\
-          \ the component."
-        const: "apt"
-      packages:
-        oneOf:
-        - description: "Specifies which packages to install."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install."
-            type: "string"
-    required:
-    - "type"
-    additionalProperties: false
-  DockerRequirements:
-    description: "Specify which Docker commands should be run during setup."
-    type: "object"
-    properties:
-      run:
-        oneOf:
-        - description: "Specifies which `RUN` entries to add to the Dockerfile while\
-            \ building it."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which `RUN` entries to add to the Dockerfile while\
-              \ building it."
-            type: "string"
-      label:
-        oneOf:
-        - description: "Specifies which `LABEL` entries to add to the Dockerfile while\
-            \ building it."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which `LABEL` entries to add to the Dockerfile\
-              \ while building it."
-            type: "string"
-      build_args:
-        oneOf:
-        - description: "Specifies which `ARG` entries to add to the Dockerfile while\
-            \ building it."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which `ARG` entries to add to the Dockerfile while\
-              \ building it."
-            type: "string"
-      copy:
-        oneOf:
-        - description: "Specifies which `COPY` entries to add to the Dockerfile while\
-            \ building it."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which `COPY` entries to add to the Dockerfile\
-              \ while building it."
-            type: "string"
-      type:
-        description: "Specify which Docker commands should be run during setup."
-        const: "docker"
-      add:
-        oneOf:
-        - description: "Specifies which `ADD` entries to add to the Dockerfile while\
-            \ building it."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which `ADD` entries to add to the Dockerfile while\
-              \ building it."
-            type: "string"
-      env:
-        oneOf:
-        - description: "Specifies which `ENV` entries to add to the Dockerfile while\
-            \ building it. Unlike `ARG`, `ENV` entries are also accessible from inside\
-            \ the container."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which `ENV` entries to add to the Dockerfile while\
-              \ building it. Unlike `ARG`, `ENV` entries are also accessible from\
-              \ inside the container."
-            type: "string"
-    required:
-    - "type"
-    additionalProperties: false
-  JavaScriptRequirements:
-    description: "Specify which JavaScript packages should be available in order to\
-      \ run the component."
-    type: "object"
-    properties:
-      github:
-        oneOf:
-        - description: "Specifies which packages to install from GitHub."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from GitHub."
-            type: "string"
-      url:
-        oneOf:
-        - description: "Specifies which packages to install using a generic URI."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install using a generic URI."
-            type: "string"
-      git:
-        oneOf:
-        - description: "Specifies which packages to install using a Git URI."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install using a Git URI."
-            type: "string"
-      npm:
-        oneOf:
-        - description: "Specifies which packages to install from npm."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from npm."
-            type: "string"
-      type:
-        description: "Specify which JavaScript packages should be available in order\
-          \ to run the component."
-        const: "javascript"
-      packages:
-        oneOf:
-        - description: "Specifies which packages to install from npm."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from npm."
-            type: "string"
-    required:
-    - "type"
-    additionalProperties: false
-  PythonRequirements:
-    description: "Specify which Python packages should be available in order to run\
-      \ the component."
-    type: "object"
-    properties:
-      github:
-        oneOf:
-        - description: "Specifies which packages to install from GitHub."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from GitHub."
-            type: "string"
-      gitlab:
-        oneOf:
-        - description: "Specifies which packages to install from GitLab."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from GitLab."
-            type: "string"
-      pip:
-        oneOf:
-        - description: "Specifies which packages to install from pip."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from pip."
-            type: "string"
-      pypi:
-        oneOf:
-        - description: "Specifies which packages to install from PyPI using pip."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from PyPI using pip."
-            type: "string"
-      git:
-        oneOf:
-        - description: "Specifies which packages to install using a Git URI."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install using a Git URI."
-            type: "string"
-      upgrade:
-        description: "Sets the `--upgrade` flag when set to true. Default: true."
-        type: "boolean"
-      packages:
-        oneOf:
-        - description: "Specifies which packages to install from pip."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from pip."
-            type: "string"
-      url:
-        oneOf:
-        - description: "Specifies which packages to install using a generic URI."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install using a generic URI."
-            type: "string"
-      svn:
-        oneOf:
-        - description: "Specifies which packages to install using an SVN URI."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install using an SVN URI."
-            type: "string"
-      bazaar:
-        oneOf:
-        - description: "Specifies which packages to install using a Bazaar URI."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install using a Bazaar URI."
-            type: "string"
-      script:
-        oneOf:
-        - description: "Specifies a code block to run as part of the build."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies a code block to run as part of the build."
-            type: "string"
-      type:
-        description: "Specify which Python packages should be available in order to\
-          \ run the component."
-        const: "python"
-      mercurial:
-        oneOf:
-        - description: "Specifies which packages to install using a Mercurial URI."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install using a Mercurial URI."
-            type: "string"
-      user:
-        description: "Sets the `--user` flag when set to true. Default: false."
-        type: "boolean"
-    required:
-    - "type"
-    additionalProperties: false
-  RRequirements:
-    description: "Specify which R packages should be available in order to run the\
-      \ component."
-    type: "object"
-    properties:
-      bioc:
-        oneOf:
-        - description: "Specifies which packages to install from BioConductor."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from BioConductor."
-            type: "string"
-      github:
-        oneOf:
-        - description: "Specifies which packages to install from GitHub."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from GitHub."
-            type: "string"
-      gitlab:
-        oneOf:
-        - description: "Specifies which packages to install from GitLab."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from GitLab."
-            type: "string"
-      url:
-        oneOf:
-        - description: "Specifies which packages to install using a generic URI."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install using a generic URI."
-            type: "string"
-      bioc_force_install:
-        description: "Forces packages specified in `bioc` to be reinstalled, even\
-          \ if they are already present in the container. Default: false."
-        type: "boolean"
-      git:
-        oneOf:
-        - description: "Specifies which packages to install using a Git URI."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install using a Git URI."
-            type: "string"
-      cran:
-        oneOf:
-        - description: "Specifies which packages to install from CRAN."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from CRAN."
-            type: "string"
-      bitbucket:
-        oneOf:
-        - description: "Specifies which packages to install from Bitbucket."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from Bitbucket."
-            type: "string"
-      svn:
-        oneOf:
-        - description: "Specifies which packages to install using an SVN URI."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install using an SVN URI."
-            type: "string"
-      packages:
-        oneOf:
-        - description: "Specifies which packages to install from CRAN."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install from CRAN."
-            type: "string"
-      script:
-        oneOf:
-        - description: "Specifies a code block to run as part of the build."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies a code block to run as part of the build."
-            type: "string"
-      type:
-        description: "Specify which R packages should be available in order to run\
-          \ the component."
-        const: "r"
-    required:
-    - "type"
-    additionalProperties: false
-  RubyRequirements:
-    description: "Specify which Ruby packages should be available in order to run\
-      \ the component."
-    type: "object"
-    properties:
-      type:
-        description: "Specify which Ruby packages should be available in order to\
-          \ run the component."
-        const: "ruby"
-      packages:
-        oneOf:
-        - description: "Specifies which packages to install."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install."
-            type: "string"
-    required:
-    - "type"
-    additionalProperties: false
-  YumRequirements:
-    description: "Specify which yum packages should be available in order to run the\
-      \ component."
-    type: "object"
-    properties:
-      type:
-        description: "Specify which yum packages should be available in order to run\
-          \ the component."
-        const: "yum"
-      packages:
-        oneOf:
-        - description: "Specifies which packages to install."
-          type: "string"
-        - type: "array"
-          items:
-            description: "Specifies which packages to install."
-            type: "string"
-    required:
-    - "type"
-    additionalProperties: false
-  Argument:
-    oneOf:
-    - $ref: "#/definitions/BooleanArgument"
-    - $ref: "#/definitions/BooleanTrueArgument"
-    - $ref: "#/definitions/BooleanFalseArgument"
-    - $ref: "#/definitions/DoubleArgument"
-    - $ref: "#/definitions/FileArgument"
-    - $ref: "#/definitions/IntegerArgument"
-    - $ref: "#/definitions/LongArgument"
-    - $ref: "#/definitions/StringArgument"
-  BooleanArgument:
-    description: "A `boolean` type argument has two possible values: `true` or `false`."
-    type: "object"
-    properties:
-      alternatives:
-        oneOf:
-        - description: "List of alternative format variations for this argument."
-          type: "string"
-        - type: "array"
-          items:
-            description: "List of alternative format variations for this argument."
-            type: "string"
-      name:
-        description: "The name of the argument. Can be in the formats `--trim`, `-t`\
-          \ or `trim`. The number of dashes determines how values can be passed: \
-          \ \n\n  - `--trim` is a long option, which can be passed with `executable_name\
-          \ --trim`\n  - `-t` is a short option, which can be passed with `executable_name\
-          \ -t`\n  - `trim` is an argument, which can be passed with `executable_name\
-          \ trim`  \n"
-        type: "string"
-      direction:
-        $ref: "#/definitions/Direction"
-      info:
-        description: "Structured information. Can be any shape: a string, vector,\
-          \ map or even nested map."
-        type: "object"
-      default:
-        oneOf:
-        - description: "The default value when no argument value is provided. This\
-            \ will not work if the [`required`](#required) property is enabled."
-          type: "boolean"
-        - type: "array"
-          items:
-            description: "The default value when no argument value is provided. This\
-              \ will not work if the [`required`](#required) property is enabled."
-            type: "boolean"
-      example:
-        oneOf:
-        - description: "An example value for this argument. If no [`default`](#default)\
-            \ property was specified, this will be used for that purpose."
-          type: "boolean"
-        - type: "array"
-          items:
-            description: "An example value for this argument. If no [`default`](#default)\
-              \ property was specified, this will be used for that purpose."
-            type: "boolean"
-      description:
-        description: "A description of the argument. This will be displayed with `--help`."
-        type: "string"
-      multiple_sep:
-        description: "The delimiter character for providing [`multiple`](#multiple)\
-          \ values. `:` by default."
-        type: "string"
-      multiple:
-        description: "Treat the argument value as an array. Arrays can be passed using\
-          \ the delimiter `--foo=1:2:3` or by providing the same argument multiple\
-          \ times `--foo 1 --foo 2`. You can use a custom delimiter by using the [`multiple_sep`](#multiple_sep)\
-          \ property. `false` by default."
-        type: "boolean"
-      type:
-        description: "A `boolean` type argument has two possible values: `true` or\
-          \ `false`."
-        const: "boolean"
-      required:
-        description: "Make the value for this argument required. If set to `true`,\
-          \ an error will be produced if no value was provided. `false` by default."
-        type: "boolean"
-    required:
-    - "name"
-    - "type"
-    additionalProperties: false
-  BooleanTrueArgument:
-    description: "An argument of the `boolean_true` type acts like a `boolean` flag\
-      \ with a default value of `false`. When called as an argument it sets the `boolean`\
-      \ to `true`."
-    type: "object"
-    properties:
-      alternatives:
-        oneOf:
-        - description: "List of alternative format variations for this argument."
-          type: "string"
-        - type: "array"
-          items:
-            description: "List of alternative format variations for this argument."
-            type: "string"
-      name:
-        description: "The name of the argument. Can be in the formats `--silent`,\
-          \ `-s` or `silent`. The number of dashes determines how values can be passed:\
-          \  \n\n  - `--silent` is a long option, which can be passed with `executable_name\
-          \ --silent`\n  - `-s` is a short option, which can be passed with `executable_name\
-          \ -s`\n  - `silent` is an argument, which can be passed with `executable_name\
-          \ silent`  \n"
-        type: "string"
-      direction:
-        $ref: "#/definitions/Direction"
-      info:
-        description: "Structured information. Can be any shape: a string, vector,\
-          \ map or even nested map."
-        type: "object"
-      description:
-        description: "A description of the argument. This will be displayed with `--help`."
-        type: "string"
-      type:
-        description: "An argument of the `boolean_true` type acts like a `boolean`\
-          \ flag with a default value of `false`. When called as an argument it sets\
-          \ the `boolean` to `true`."
-        const: "boolean_true"
-    required:
-    - "name"
-    - "type"
-    additionalProperties: false
-  BooleanFalseArgument:
-    description: "An argument of the `boolean_false` type acts like an inverted `boolean`\
-      \ flag with a default value of `true`. When called as an argument it sets the\
-      \ `boolean` to `false`."
-    type: "object"
-    properties:
-      alternatives:
-        oneOf:
-        - description: "List of alternative format variations for this argument."
-          type: "string"
-        - type: "array"
-          items:
-            description: "List of alternative format variations for this argument."
-            type: "string"
-      name:
-        description: "The name of the argument. Can be in the formats `--no-log`,\
-          \ `-n` or `no-log`. The number of dashes determines how values can be passed:\
-          \  \n\n  - `--no-log` is a long option, which can be passed with `executable_name\
-          \ --no-log`\n  - `-n` is a short option, which can be passed with `executable_name\
-          \ -n`\n  - `no-log` is an argument, which can be passed with `executable_name\
-          \ no-log`  \n"
-        type: "string"
-      direction:
-        $ref: "#/definitions/Direction"
-      info:
-        description: "Structured information. Can be any shape: a string, vector,\
-          \ map or even nested map."
-        type: "object"
-      description:
-        description: "A description of the argument. This will be displayed with `--help`."
-        type: "string"
-      type:
-        description: "An argument of the `boolean_false` type acts like an inverted\
-          \ `boolean` flag with a default value of `true`. When called as an argument\
-          \ it sets the `boolean` to `false`."
-        const: "boolean_false"
-    required:
-    - "name"
-    - "type"
-    additionalProperties: false
-  DoubleArgument:
-    description: "A `double` type argument has a numeric value with decimal points"
-    type: "object"
-    properties:
-      alternatives:
-        oneOf:
-        - description: "List of alternative format variations for this argument."
-          type: "string"
-        - type: "array"
-          items:
-            description: "List of alternative format variations for this argument."
-            type: "string"
-      name:
-        description: "The name of the argument. Can be in the formats `--foo`, `-f`\
-          \ or `foo`. The number of dashes determines how values can be passed:  \n\
-          \n  - `--foo` is a long option, which can be passed with `executable_name\
-          \ --foo=value` or `executable_name --foo value`\n  - `-f` is a short option,\
-          \ which can be passed with `executable_name -f value`\n  - `foo` is an argument,\
-          \ which can be passed with `executable_name value`  \n"
-        type: "string"
-      info:
-        description: "Structured information. Can be any shape: a string, vector,\
-          \ map or even nested map."
-        type: "object"
-      max:
-        description: "Maximum allowed value for this argument. If set and the provided\
-          \ value is higher than the maximum, an error will be produced. Can be combined\
-          \ with [`min`](#min) to clamp values."
-        $ref: "#/definitions/DoubleWithInf"
-      default:
-        oneOf:
-        - description: "The default value when no argument value is provided. This\
-            \ will not work if the [`required`](#required) property is enabled."
-          $ref: "#/definitions/DoubleWithInf"
-        - type: "array"
-          items:
-            description: "The default value when no argument value is provided. This\
-              \ will not work if the [`required`](#required) property is enabled."
-            $ref: "#/definitions/DoubleWithInf"
-      example:
-        oneOf:
-        - description: "An example value for this argument. If no [`default`](#default)\
-            \ property was specified, this will be used for that purpose."
-          $ref: "#/definitions/DoubleWithInf"
-        - type: "array"
-          items:
-            description: "An example value for this argument. If no [`default`](#default)\
-              \ property was specified, this will be used for that purpose."
-            $ref: "#/definitions/DoubleWithInf"
-      description:
-        description: "A description of the argument. This will be displayed with `--help`."
-        type: "string"
-      multiple_sep:
-        description: "The delimiter character for providing [`multiple`](#multiple)\
-          \ values. `:` by default."
-        type: "string"
-      min:
-        description: "Minimum allowed value for this argument. If set and the provided\
-          \ value is lower than the minimum, an error will be produced. Can be combined\
-          \ with [`max`](#max) to clamp values."
-        $ref: "#/definitions/DoubleWithInf"
-      direction:
-        $ref: "#/definitions/Direction"
-      multiple:
-        description: "Treat the argument value as an array. Arrays can be passed using\
-          \ the delimiter `--foo=1:2:3` or by providing the same argument multiple\
-          \ times `--foo 1 --foo 2`. You can use a custom delimiter by using the [`multiple_sep`](#multiple_sep)\
-          \ property. `false` by default."
-        type: "boolean"
-      type:
-        description: "A `double` type argument has a numeric value with decimal points"
-        const: "double"
-      required:
-        description: "Make the value for this argument required. If set to `true`,\
-          \ an error will be produced if no value was provided. `false` by default."
-        type: "boolean"
-    required:
-    - "name"
-    - "type"
-    additionalProperties: false
-  FileArgument:
-    description: "A `file` type argument has a string value that points to a file\
-      \ or folder path."
-    type: "object"
-    properties:
-      alternatives:
-        oneOf:
-        - description: "List of alternative format variations for this argument."
-          type: "string"
-        - type: "array"
-          items:
-            description: "List of alternative format variations for this argument."
-            type: "string"
-      name:
-        description: "The name of the argument. Can be in the formats `--foo`, `-f`\
-          \ or `foo`. The number of dashes determines how values can be passed:  \n\
-          \n  - `--foo` is a long option, which can be passed with `executable_name\
-          \ --foo=value` or `executable_name --foo value`\n  - `-f` is a short option,\
-          \ which can be passed with `executable_name -f value`\n  - `foo` is an argument,\
-          \ which can be passed with `executable_name value`  \n"
-        type: "string"
-      create_parent:
-        description: "If the output filename is a path and it does not exist, create\
-          \ it before executing the script (only for `direction: output`)."
-        type: "boolean"
-      direction:
-        description: "Makes this argument an `input` or an `output`, as in does the\
-          \ file/folder needs to be read or written. `input` by default."
-        $ref: "#/definitions/Direction"
-      info:
-        description: "Structured information. Can be any shape: a string, vector,\
-          \ map or even nested map."
-        type: "object"
-      must_exist:
-        description: "Checks whether the file or folder exists. For input files, this\
-          \ check will happen before the execution of the script, while for output\
-          \ files the check will happen afterwards."
-        type: "boolean"
-      default:
-        oneOf:
-        - description: "The default value when no argument value is provided. This\
-            \ will not work if the [`required`](#required) property is enabled."
-          type: "string"
-        - type: "array"
-          items:
-            description: "The default value when no argument value is provided. This\
-              \ will not work if the [`required`](#required) property is enabled."
-            type: "string"
-      example:
-        oneOf:
-        - description: "An example value for this argument. If no [`default`](#default)\
-            \ property was specified, this will be used for that purpose."
-          type: "string"
-        - type: "array"
-          items:
-            description: "An example value for this argument. If no [`default`](#default)\
-              \ property was specified, this will be used for that purpose."
-            type: "string"
-      description:
-        description: "A description of the argument. This will be displayed with `--help`."
-        type: "string"
-      multiple_sep:
-        description: "The delimiter character for providing [`multiple`](#multiple)\
-          \ values. `:` by default."
-        type: "string"
-      multiple:
-        description: "Treat the argument value as an array. Arrays can be passed using\
-          \ the delimiter `--foo=1:2:3` or by providing the same argument multiple\
-          \ times `--foo 1 --foo 2`. You can use a custom delimiter by using the [`multiple_sep`](#multiple_sep)\
-          \ property. `false` by default."
-        type: "boolean"
-      type:
-        description: "A `file` type argument has a string value that points to a file\
-          \ or folder path."
-        const: "file"
-      required:
-        description: "Make the value for this argument required. If set to `true`,\
-          \ an error will be produced if no value was provided. `false` by default."
-        type: "boolean"
-    required:
-    - "name"
-    - "type"
-    additionalProperties: false
-  IntegerArgument:
-    description: "An `integer` type argument has a numeric value without decimal points."
-    type: "object"
-    properties:
-      alternatives:
-        oneOf:
-        - description: "List of alternative format variations for this argument."
-          type: "string"
-        - type: "array"
-          items:
-            description: "List of alternative format variations for this argument."
-            type: "string"
-      name:
-        description: "The name of the argument. Can be in the formats `--foo`, `-f`\
-          \ or `foo`. The number of dashes determines how values can be passed:  \n\
-          \n  - `--foo` is a long option, which can be passed with `executable_name\
-          \ --foo=value` or `executable_name --foo value`\n  - `-f` is a short option,\
-          \ which can be passed with `executable_name -f value`\n  - `foo` is an argument,\
-          \ which can be passed with `executable_name value`  \n"
-        type: "string"
-      choices:
-        description: "Limit the amount of valid values for this argument to those\
-          \ set in this list. When set and a value not present in the list is provided,\
-          \ an error will be produced."
-        type: "array"
-        items:
-          type: "integer"
-      info:
-        description: "Structured information. Can be any shape: a string, vector,\
-          \ map or even nested map."
-        type: "object"
-      max:
-        description: "Maximum allowed value for this argument. If set and the provided\
-          \ value is higher than the maximum, an error will be produced. Can be combined\
-          \ with [`min`](#min) to clamp values."
-        type: "integer"
-      default:
-        oneOf:
-        - description: "The default value when no argument value is provided. This\
-            \ will not work if the [`required`](#required) property is enabled."
-          type: "integer"
-        - type: "array"
-          items:
-            description: "The default value when no argument value is provided. This\
-              \ will not work if the [`required`](#required) property is enabled."
-            type: "integer"
-      example:
-        oneOf:
-        - description: "An example value for this argument. If no [`default`](#default)\
-            \ property was specified, this will be used for that purpose."
-          type: "integer"
-        - type: "array"
-          items:
-            description: "An example value for this argument. If no [`default`](#default)\
-              \ property was specified, this will be used for that purpose."
-            type: "integer"
-      description:
-        description: "A description of the argument. This will be displayed with `--help`."
-        type: "string"
-      multiple_sep:
-        description: "The delimiter character for providing [`multiple`](#multiple)\
-          \ values. `:` by default."
-        type: "string"
-      min:
-        description: "Minimum allowed value for this argument. If set and the provided\
-          \ value is lower than the minimum, an error will be produced. Can be combined\
-          \ with [`max`](#max) to clamp values."
-        type: "integer"
-      direction:
-        $ref: "#/definitions/Direction"
-      multiple:
-        description: "Treat the argument value as an array. Arrays can be passed using\
-          \ the delimiter `--foo=1:2:3` or by providing the same argument multiple\
-          \ times `--foo 1 --foo 2`. You can use a custom delimiter by using the [`multiple_sep`](#multiple_sep)\
-          \ property. `false` by default."
-        type: "boolean"
-      type:
-        description: "An `integer` type argument has a numeric value without decimal\
-          \ points."
-        const: "integer"
-      required:
-        description: "Make the value for this argument required. If set to `true`,\
-          \ an error will be produced if no value was provided. `false` by default."
-        type: "boolean"
-    required:
-    - "name"
-    - "type"
-    additionalProperties: false
-  LongArgument:
-    description: "An `long` type argument has a numeric value without decimal points."
-    type: "object"
-    properties:
-      alternatives:
-        oneOf:
-        - description: "List of alternative format variations for this argument."
-          type: "string"
-        - type: "array"
-          items:
-            description: "List of alternative format variations for this argument."
-            type: "string"
-      name:
-        description: "The name of the argument. Can be in the formats `--foo`, `-f`\
-          \ or `foo`. The number of dashes determines how values can be passed:  \n\
-          \n  - `--foo` is a long option, which can be passed with `executable_name\
-          \ --foo=value` or `executable_name --foo value`\n  - `-f` is a short option,\
-          \ which can be passed with `executable_name -f value`\n  - `foo` is an argument,\
-          \ which can be passed with `executable_name value`  \n"
-        type: "string"
-      choices:
-        description: "Limit the amount of valid values for this argument to those\
-          \ set in this list. When set and a value not present in the list is provided,\
-          \ an error will be produced."
-        type: "array"
-        items:
-          type: "integer"
-      info:
-        description: "Structured information. Can be any shape: a string, vector,\
-          \ map or even nested map."
-        type: "object"
-      max:
-        description: "Maximum allowed value for this argument. If set and the provided\
-          \ value is higher than the maximum, an error will be produced. Can be combined\
-          \ with [`min`](#min) to clamp values."
-        type: "integer"
-      default:
-        oneOf:
-        - description: "The default value when no argument value is provided. This\
-            \ will not work if the [`required`](#required) property is enabled."
-          type: "integer"
-        - type: "array"
-          items:
-            description: "The default value when no argument value is provided. This\
-              \ will not work if the [`required`](#required) property is enabled."
-            type: "integer"
-      example:
-        oneOf:
-        - description: "An example value for this argument. If no [`default`](#default)\
-            \ property was specified, this will be used for that purpose."
-          type: "integer"
-        - type: "array"
-          items:
-            description: "An example value for this argument. If no [`default`](#default)\
-              \ property was specified, this will be used for that purpose."
-            type: "integer"
-      description:
-        description: "A description of the argument. This will be displayed with `--help`."
-        type: "string"
-      multiple_sep:
-        description: "The delimiter character for providing [`multiple`](#multiple)\
-          \ values. `:` by default."
-        type: "string"
-      min:
-        description: "Minimum allowed value for this argument. If set and the provided\
-          \ value is lower than the minimum, an error will be produced. Can be combined\
-          \ with [`max`](#max) to clamp values."
-        type: "integer"
-      direction:
-        $ref: "#/definitions/Direction"
-      multiple:
-        description: "Treat the argument value as an array. Arrays can be passed using\
-          \ the delimiter `--foo=1:2:3` or by providing the same argument multiple\
-          \ times `--foo 1 --foo 2`. You can use a custom delimiter by using the [`multiple_sep`](#multiple_sep)\
-          \ property. `false` by default."
-        type: "boolean"
-      type:
-        description: "An `long` type argument has a numeric value without decimal\
-          \ points."
-        const: "long"
-      required:
-        description: "Make the value for this argument required. If set to `true`,\
-          \ an error will be produced if no value was provided. `false` by default."
-        type: "boolean"
-    required:
-    - "name"
-    - "type"
-    additionalProperties: false
-  StringArgument:
-    description: "A `string` type argument has a value made up of an ordered sequences\
-      \ of characters, like \"Hello\" or \"I'm a string\"."
-    type: "object"
-    properties:
-      alternatives:
-        oneOf:
-        - description: "List of alternative format variations for this argument."
-          type: "string"
-        - type: "array"
-          items:
-            description: "List of alternative format variations for this argument."
-            type: "string"
-      name:
-        description: "The name of the argument. Can be in the formats `--foo`, `-f`\
-          \ or `foo`. The number of dashes determines how values can be passed:  \n\
-          \n  - `--foo` is a long option, which can be passed with `executable_name\
-          \ --foo=value` or `executable_name --foo value`\n  - `-f` is a short option,\
-          \ which can be passed with `executable_name -f value`\n  - `foo` is an argument,\
-          \ which can be passed with `executable_name value`  \n"
-        type: "string"
-      choices:
-        description: "Limit the amount of valid values for this argument to those\
-          \ set in this list. When set and a value not present in the list is provided,\
-          \ an error will be produced."
-        type: "array"
-        items:
-          type: "string"
-      direction:
-        $ref: "#/definitions/Direction"
-      info:
-        description: "Structured information. Can be any shape: a string, vector,\
-          \ map or even nested map."
-        type: "object"
-      default:
-        oneOf:
-        - description: "The default value when no argument value is provided. This\
-            \ will not work if the [`required`](#required) property is enabled."
-          type: "string"
-        - type: "array"
-          items:
-            description: "The default value when no argument value is provided. This\
-              \ will not work if the [`required`](#required) property is enabled."
-            type: "string"
-      example:
-        oneOf:
-        - description: "An example value for this argument. If no [`default`](#default)\
-            \ property was specified, this will be used for that purpose."
-          type: "string"
-        - type: "array"
-          items:
-            description: "An example value for this argument. If no [`default`](#default)\
-              \ property was specified, this will be used for that purpose."
-            type: "string"
-      description:
-        description: "A description of the argument. This will be displayed with `--help`."
-        type: "string"
-      multiple_sep:
-        description: "The delimiter character for providing [`multiple`](#multiple)\
-          \ values. `:` by default."
-        type: "string"
-      multiple:
-        description: "Treat the argument value as an array. Arrays can be passed using\
-          \ the delimiter `--foo=1:2:3` or by providing the same argument multiple\
-          \ times `--foo 1 --foo 2`. You can use a custom delimiter by using the [`multiple_sep`](#multiple_sep)\
-          \ property. `false` by default."
-        type: "boolean"
-      type:
-        description: "A `string` type argument has a value made up of an ordered sequences\
-          \ of characters, like \"Hello\" or \"I'm a string\"."
-        const: "string"
-      required:
-        description: "Make the value for this argument required. If set to `true`,\
-          \ an error will be produced if no value was provided. `false` by default."
-        type: "boolean"
-    required:
-    - "name"
-    - "type"
-    additionalProperties: false
-  Resource:
-    oneOf:
-    - $ref: "#/definitions/BashScript"
-    - $ref: "#/definitions/CSharpScript"
-    - $ref: "#/definitions/Executable"
-    - $ref: "#/definitions/JavaScriptScript"
-    - $ref: "#/definitions/NextflowScript"
-    - $ref: "#/definitions/PlainFile"
-    - $ref: "#/definitions/PythonScript"
-    - $ref: "#/definitions/RScript"
-    - $ref: "#/definitions/ScalaScript"
-  BashScript:
-    description: "An executable Bash script.\nWhen defined in resources, only the\
-      \ first entry will be executed when running the built component or when running\
-      \ `viash run`.\nWhen defined in test_resources, all entries will be executed\
-      \ during `viash test`."
-    type: "object"
-    properties:
-      path:
-        description: "The path of the input file. Can be a relative or an absolute\
-          \ path, or a URI. Mutually exclusive with `text`."
-        type: "string"
-      text:
-        description: "The content of the resulting file specified as a string. Mutually\
-          \ exclusive with `path`."
-        type: "string"
-      is_executable:
-        description: "Whether the resulting resource file should be made executable."
-        type: "boolean"
-      type:
-        description: "An executable Bash script.\nWhen defined in resources, only\
-          \ the first entry will be executed when running the built component or when\
-          \ running `viash run`.\nWhen defined in test_resources, all entries will\
-          \ be executed during `viash test`."
-        const: "bash_script"
-      dest:
-        description: "Resulting filename of the resource. From within a script, the\
-          \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\
-          \ `dest` will be set to the basename of the `path` parameter."
-        type: "string"
-    required:
-    - "type"
-    additionalProperties: false
-  CSharpScript:
-    description: "An executable C# script.\nWhen defined in resources, only the first\
-      \ entry will be executed when running the built component or when running `viash\
-      \ run`.\nWhen defined in test_resources, all entries will be executed during\
-      \ `viash test`."
-    type: "object"
-    properties:
-      path:
-        description: "The path of the input file. Can be a relative or an absolute\
-          \ path, or a URI. Mutually exclusive with `text`."
-        type: "string"
-      text:
-        description: "The content of the resulting file specified as a string. Mutually\
-          \ exclusive with `path`."
-        type: "string"
-      is_executable:
-        description: "Whether the resulting resource file should be made executable."
-        type: "boolean"
-      type:
-        description: "An executable C# script.\nWhen defined in resources, only the\
-          \ first entry will be executed when running the built component or when\
-          \ running `viash run`.\nWhen defined in test_resources, all entries will\
-          \ be executed during `viash test`."
-        const: "csharp_script"
-      dest:
-        description: "Resulting filename of the resource. From within a script, the\
-          \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\
-          \ `dest` will be set to the basename of the `path` parameter."
-        type: "string"
-    required:
-    - "type"
-    additionalProperties: false
-  Executable:
-    description: "An executable file."
-    type: "object"
-    properties:
-      path:
-        description: "The path of the input file. Can be a relative or an absolute\
-          \ path, or a URI. Mutually exclusive with `text`."
-        type: "string"
-      text:
-        description: "The content of the resulting file specified as a string. Mutually\
-          \ exclusive with `path`."
-        type: "string"
-      is_executable:
-        description: "Whether the resulting resource file should be made executable."
-        type: "boolean"
-      type:
-        description: "An executable file."
-        const: "executable"
-      dest:
-        description: "Resulting filename of the resource. From within a script, the\
-          \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\
-          \ `dest` will be set to the basename of the `path` parameter."
-        type: "string"
-    required:
-    - "type"
-    additionalProperties: false
-  JavaScriptScript:
-    description: "An executable JavaScript script.\nWhen defined in resources, only\
-      \ the first entry will be executed when running the built component or when\
-      \ running `viash run`.\nWhen defined in test_resources, all entries will be\
-      \ executed during `viash test`."
-    type: "object"
-    properties:
-      path:
-        description: "The path of the input file. Can be a relative or an absolute\
-          \ path, or a URI. Mutually exclusive with `text`."
-        type: "string"
-      text:
-        description: "The content of the resulting file specified as a string. Mutually\
-          \ exclusive with `path`."
-        type: "string"
-      is_executable:
-        description: "Whether the resulting resource file should be made executable."
-        type: "boolean"
-      type:
-        description: "An executable JavaScript script.\nWhen defined in resources,\
-          \ only the first entry will be executed when running the built component\
-          \ or when running `viash run`.\nWhen defined in test_resources, all entries\
-          \ will be executed during `viash test`."
-        const: "javascript_script"
-      dest:
-        description: "Resulting filename of the resource. From within a script, the\
-          \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\
-          \ `dest` will be set to the basename of the `path` parameter."
-        type: "string"
-    required:
-    - "type"
-    additionalProperties: false
-  NextflowScript:
-    description: "A Nextflow script. Work in progress; added mainly for annotation\
-      \ at the moment."
-    type: "object"
-    properties:
-      path:
-        description: "The path of the input file. Can be a relative or an absolute\
-          \ path, or a URI. Mutually exclusive with `text`."
-        type: "string"
-      text:
-        description: "The content of the resulting file specified as a string. Mutually\
-          \ exclusive with `path`."
-        type: "string"
-      entrypoint:
-        description: "The name of the workflow to be wrapped."
-        type: "string"
-      is_executable:
-        description: "Whether the resulting resource file should be made executable."
-        type: "boolean"
-      type:
-        description: "A Nextflow script. Work in progress; added mainly for annotation\
-          \ at the moment."
-        const: "nextflow_script"
-      dest:
-        description: "Resulting filename of the resource. From within a script, the\
-          \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\
-          \ `dest` will be set to the basename of the `path` parameter."
-        type: "string"
-    required:
-    - "entrypoint"
-    - "type"
-    additionalProperties: false
-  PlainFile:
-    description: "A plain file. This can only be used as a supporting resource for\
-      \ the main script or unit tests."
-    type: "object"
-    properties:
-      path:
-        description: "The path of the input file. Can be a relative or an absolute\
-          \ path, or a URI. Mutually exclusive with `text`."
-        type: "string"
-      text:
-        description: "The content of the resulting file specified as a string. Mutually\
-          \ exclusive with `path`."
-        type: "string"
-      is_executable:
-        description: "Whether the resulting resource file should be made executable."
-        type: "boolean"
-      type:
-        description: "A plain file. This can only be used as a supporting resource\
-          \ for the main script or unit tests."
-        const: "file"
-      dest:
-        description: "Resulting filename of the resource. From within a script, the\
-          \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\
-          \ `dest` will be set to the basename of the `path` parameter."
-        type: "string"
-    required: []
-    additionalProperties: false
-  PythonScript:
-    description: "An executable Python script.\nWhen defined in resources, only the\
-      \ first entry will be executed when running the built component or when running\
-      \ `viash run`.\nWhen defined in test_resources, all entries will be executed\
-      \ during `viash test`."
-    type: "object"
-    properties:
-      path:
-        description: "The path of the input file. Can be a relative or an absolute\
-          \ path, or a URI. Mutually exclusive with `text`."
-        type: "string"
-      text:
-        description: "The content of the resulting file specified as a string. Mutually\
-          \ exclusive with `path`."
-        type: "string"
-      is_executable:
-        description: "Whether the resulting resource file should be made executable."
-        type: "boolean"
-      type:
-        description: "An executable Python script.\nWhen defined in resources, only\
-          \ the first entry will be executed when running the built component or when\
-          \ running `viash run`.\nWhen defined in test_resources, all entries will\
-          \ be executed during `viash test`."
-        const: "python_script"
-      dest:
-        description: "Resulting filename of the resource. From within a script, the\
-          \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\
-          \ `dest` will be set to the basename of the `path` parameter."
-        type: "string"
-    required:
-    - "type"
-    additionalProperties: false
-  RScript:
-    description: "An executable R script.\nWhen defined in resources, only the first\
-      \ entry will be executed when running the built component or when running `viash\
-      \ run`.\nWhen defined in test_resources, all entries will be executed during\
-      \ `viash test`."
-    type: "object"
-    properties:
-      path:
-        description: "The path of the input file. Can be a relative or an absolute\
-          \ path, or a URI. Mutually exclusive with `text`."
-        type: "string"
-      text:
-        description: "The content of the resulting file specified as a string. Mutually\
-          \ exclusive with `path`."
-        type: "string"
-      is_executable:
-        description: "Whether the resulting resource file should be made executable."
-        type: "boolean"
-      type:
-        description: "An executable R script.\nWhen defined in resources, only the\
-          \ first entry will be executed when running the built component or when\
-          \ running `viash run`.\nWhen defined in test_resources, all entries will\
-          \ be executed during `viash test`."
-        const: "r_script"
-      dest:
-        description: "Resulting filename of the resource. From within a script, the\
-          \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\
-          \ `dest` will be set to the basename of the `path` parameter."
-        type: "string"
-    required:
-    - "type"
-    additionalProperties: false
-  ScalaScript:
-    description: "An executable Scala script.\nWhen defined in resources, only the\
-      \ first entry will be executed when running the built component or when running\
-      \ `viash run`.\nWhen defined in test_resources, all entries will be executed\
-      \ during `viash test`."
-    type: "object"
-    properties:
-      path:
-        description: "The path of the input file. Can be a relative or an absolute\
-          \ path, or a URI. Mutually exclusive with `text`."
-        type: "string"
-      text:
-        description: "The content of the resulting file specified as a string. Mutually\
-          \ exclusive with `path`."
-        type: "string"
-      is_executable:
-        description: "Whether the resulting resource file should be made executable."
-        type: "boolean"
-      type:
-        description: "An executable Scala script.\nWhen defined in resources, only\
-          \ the first entry will be executed when running the built component or when\
-          \ running `viash run`.\nWhen defined in test_resources, all entries will\
-          \ be executed during `viash test`."
-        const: "scala_script"
-      dest:
-        description: "Resulting filename of the resource. From within a script, the\
-          \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\
-          \ `dest` will be set to the basename of the `path` parameter."
-        type: "string"
-    required:
-    - "type"
-    additionalProperties: false
-  NextflowDirectives:
-    description: "Directives are optional settings that affect the execution of the\
-      \ process.\n"
-    type: "object"
-    properties:
-      beforeScript:
-        description: "The `beforeScript` directive allows you to execute a custom\
-          \ (Bash) snippet before the main process script is run. This may be useful\
-          \ to initialise the underlying cluster environment or for other custom initialisation.\n\
-          \nSee [`beforeScript`](https://www.nextflow.io/docs/latest/process.html#beforeScript).\n"
-        type: "string"
-      module:
-        oneOf:
-        - description: "Environment Modules is a package manager that allows you to\
-            \ dynamically configure your execution environment and easily switch between\
-            \ multiple versions of the same software tool.\n\nIf it is available in\
-            \ your system you can use it with Nextflow in order to configure the processes\
-            \ execution environment in your pipeline.\n\nIn a process definition you\
-            \ can use the `module` directive to load a specific module version to\
-            \ be used in the process execution environment.\n\nSee [`module`](https://www.nextflow.io/docs/latest/process.html#module).\n"
-          type: "string"
-        - type: "array"
-          items:
-            description: "Environment Modules is a package manager that allows you\
-              \ to dynamically configure your execution environment and easily switch\
-              \ between multiple versions of the same software tool.\n\nIf it is available\
-              \ in your system you can use it with Nextflow in order to configure\
-              \ the processes execution environment in your pipeline.\n\nIn a process\
-              \ definition you can use the `module` directive to load a specific module\
-              \ version to be used in the process execution environment.\n\nSee [`module`](https://www.nextflow.io/docs/latest/process.html#module).\n"
-            type: "string"
-      queue:
-        oneOf:
-        - description: "The `queue` directory allows you to set the queue where jobs\
-            \ are scheduled when using a grid based executor in your pipeline.\n\n\
-            See [`queue`](https://www.nextflow.io/docs/latest/process.html#queue).\n"
-          type: "string"
-        - type: "array"
-          items:
-            description: "The `queue` directory allows you to set the queue where\
-              \ jobs are scheduled when using a grid based executor in your pipeline.\n\
-              \nSee [`queue`](https://www.nextflow.io/docs/latest/process.html#queue).\n"
-            type: "string"
-      label:
-        oneOf:
-        - description: "The `label` directive allows the annotation of processes with\
-            \ mnemonic identifier of your choice.\n\nSee [`label`](https://www.nextflow.io/docs/latest/process.html#label).\n"
-          type: "string"
-        - type: "array"
-          items:
-            description: "The `label` directive allows the annotation of processes\
-              \ with mnemonic identifier of your choice.\n\nSee [`label`](https://www.nextflow.io/docs/latest/process.html#label).\n"
-            type: "string"
-      container:
-        oneOf:
-        - description: "The `container` directive allows you to execute the process\
-            \ script in a Docker container.\n\nIt requires the Docker daemon to be\
-            \ running in machine where the pipeline is executed, i.e. the local machine\
-            \ when using the local executor or the cluster nodes when the pipeline\
-            \ is deployed through a grid executor.\n\nViash implements allows either\
-            \ a string value or a map. In case a map is used, the allowed keys are:\
-            \ `registry`, `image`, and `tag`. The `image` value must be specified.\n\
-            \nSee [`container`](https://www.nextflow.io/docs/latest/process.html#container).\n"
-          type: "object"
-          additionalProperties:
-            type: "string"
-        - description: "The `container` directive allows you to execute the process\
-            \ script in a Docker container.\n\nIt requires the Docker daemon to be\
-            \ running in machine where the pipeline is executed, i.e. the local machine\
-            \ when using the local executor or the cluster nodes when the pipeline\
-            \ is deployed through a grid executor.\n\nViash implements allows either\
-            \ a string value or a map. In case a map is used, the allowed keys are:\
-            \ `registry`, `image`, and `tag`. The `image` value must be specified.\n\
-            \nSee [`container`](https://www.nextflow.io/docs/latest/process.html#container).\n"
-          type: "string"
-      publishDir:
-        oneOf:
-        - oneOf:
-          - description: "The `publishDir` directive allows you to publish the process\
-              \ output files to a specified folder.\n\nViash implements this directive\
-              \ as a plain string or a map. The allowed keywords for the map are:\
-              \ `path`, `mode`, `overwrite`, `pattern`, `saveAs`, `enabled`. The `path`\
-              \ key and value are required.\nThe allowed values for `mode` are: `symlink`,\
-              \ `rellink`, `link`, `copy`, `copyNoFollow`, `move`.\n\nSee [`publishDir`](https://www.nextflow.io/docs/latest/process.html#publishdir).\n"
-            type: "string"
-          - description: "The `publishDir` directive allows you to publish the process\
-              \ output files to a specified folder.\n\nViash implements this directive\
-              \ as a plain string or a map. The allowed keywords for the map are:\
-              \ `path`, `mode`, `overwrite`, `pattern`, `saveAs`, `enabled`. The `path`\
-              \ key and value are required.\nThe allowed values for `mode` are: `symlink`,\
-              \ `rellink`, `link`, `copy`, `copyNoFollow`, `move`.\n\nSee [`publishDir`](https://www.nextflow.io/docs/latest/process.html#publishdir).\n"
-            type: "object"
-            additionalProperties:
-              type: "string"
-        - type: "array"
-          items:
-            oneOf:
-            - description: "The `publishDir` directive allows you to publish the process\
-                \ output files to a specified folder.\n\nViash implements this directive\
-                \ as a plain string or a map. The allowed keywords for the map are:\
-                \ `path`, `mode`, `overwrite`, `pattern`, `saveAs`, `enabled`. The\
-                \ `path` key and value are required.\nThe allowed values for `mode`\
-                \ are: `symlink`, `rellink`, `link`, `copy`, `copyNoFollow`, `move`.\n\
-                \nSee [`publishDir`](https://www.nextflow.io/docs/latest/process.html#publishdir).\n"
-              type: "string"
-            - description: "The `publishDir` directive allows you to publish the process\
-                \ output files to a specified folder.\n\nViash implements this directive\
-                \ as a plain string or a map. The allowed keywords for the map are:\
-                \ `path`, `mode`, `overwrite`, `pattern`, `saveAs`, `enabled`. The\
-                \ `path` key and value are required.\nThe allowed values for `mode`\
-                \ are: `symlink`, `rellink`, `link`, `copy`, `copyNoFollow`, `move`.\n\
-                \nSee [`publishDir`](https://www.nextflow.io/docs/latest/process.html#publishdir).\n"
-              type: "object"
-              additionalProperties:
-                type: "string"
-      maxForks:
-        oneOf:
-        - description: "The `maxForks` directive allows you to define the maximum\
-            \ number of process instances that can be executed in parallel. By default\
-            \ this value is equals to the number of CPU cores available minus 1.\n\
-            \nIf you want to execute a process in a sequential manner, set this directive\
-            \ to one.\n\nSee [`maxForks`](https://www.nextflow.io/docs/latest/process.html#maxforks).\n"
-          type: "string"
-        - description: "The `maxForks` directive allows you to define the maximum\
-            \ number of process instances that can be executed in parallel. By default\
-            \ this value is equals to the number of CPU cores available minus 1.\n\
-            \nIf you want to execute a process in a sequential manner, set this directive\
-            \ to one.\n\nSee [`maxForks`](https://www.nextflow.io/docs/latest/process.html#maxforks).\n"
-          type: "integer"
-      maxErrors:
-        oneOf:
-        - description: "The `maxErrors` directive allows you to specify the maximum\
-            \ number of times a process can fail when using the `retry` error strategy.\
-            \ By default this directive is disabled.\n\nSee [`maxErrors`](https://www.nextflow.io/docs/latest/process.html#maxerrors).\n"
-          type: "string"
-        - description: "The `maxErrors` directive allows you to specify the maximum\
-            \ number of times a process can fail when using the `retry` error strategy.\
-            \ By default this directive is disabled.\n\nSee [`maxErrors`](https://www.nextflow.io/docs/latest/process.html#maxerrors).\n"
-          type: "integer"
-      cpus:
-        oneOf:
-        - description: "The `cpus` directive allows you to define the number of (logical)\
-            \ CPU required by the process' task.\n\nSee [`cpus`](https://www.nextflow.io/docs/latest/process.html#cpus).\n"
-          type: "integer"
-        - description: "The `cpus` directive allows you to define the number of (logical)\
-            \ CPU required by the process' task.\n\nSee [`cpus`](https://www.nextflow.io/docs/latest/process.html#cpus).\n"
-          type: "string"
-      accelerator:
-        description: "The `accelerator` directive allows you to specify the hardware\
-          \ accelerator requirement for the task execution e.g. GPU processor.\n\n\
-          Viash implements this directive as a map with accepted keywords: `type`,\
-          \ `limit`, `request`, and `runtime`.\n\nSee [`accelerator`](https://www.nextflow.io/docs/latest/process.html#accelerator).\n"
-        type: "object"
-        additionalProperties:
-          type: "string"
-      time:
-        description: "The `time` directive allows you to define how long a process\
-          \ is allowed to run.\n\nSee [`time`](https://www.nextflow.io/docs/latest/process.html#time).\n"
-        type: "string"
-      afterScript:
-        description: "The `afterScript` directive allows you to execute a custom (Bash)\
-          \ snippet immediately after the main process has run. This may be useful\
-          \ to clean up your staging area.\n\nSee [`afterScript`](https://www.nextflow.io/docs/latest/process.html#afterscript).\n"
-        type: "string"
-      executor:
-        description: "The `executor` defines the underlying system where processes\
-          \ are executed. By default a process uses the executor defined globally\
-          \ in the nextflow.config file.\n\nThe `executor` directive allows you to\
-          \ configure what executor has to be used by the process, overriding the\
-          \ default configuration. The following values can be used:\n\n| Name | Executor\
-          \ |\n|------|----------|\n| awsbatch | The process is executed using the\
-          \ AWS Batch service. | \n| azurebatch | The process is executed using the\
-          \ Azure Batch service. | \n| condor | The process is executed using the\
-          \ HTCondor job scheduler. | \n| google-lifesciences | The process is executed\
-          \ using the Google Genomics Pipelines service. | \n| ignite | The process\
-          \ is executed using the Apache Ignite cluster. | \n| k8s | The process is\
-          \ executed using the Kubernetes cluster. | \n| local | The process is executed\
-          \ in the computer where Nextflow is launched. | \n| lsf | The process is\
-          \ executed using the Platform LSF job scheduler. | \n| moab | The process\
-          \ is executed using the Moab job scheduler. | \n| nqsii | The process is\
-          \ executed using the NQSII job scheduler. | \n| oge | Alias for the sge\
-          \ executor. | \n| pbs | The process is executed using the PBS/Torque job\
-          \ scheduler. | \n| pbspro | The process is executed using the PBS Pro job\
-          \ scheduler. | \n| sge | The process is executed using the Sun Grid Engine\
-          \ / Open Grid Engine. | \n| slurm | The process is executed using the SLURM\
-          \ job scheduler. | \n| tes | The process is executed using the GA4GH TES\
-          \ service. | \n| uge | Alias for the sge executor. |\n\nSee [`executor`](https://www.nextflow.io/docs/latest/process.html#executor).\n"
-        type: "string"
-      containerOptions:
-        oneOf:
-        - description: "The `containerOptions` directive allows you to specify any\
-            \ container execution option supported by the underlying container engine\
-            \ (ie. Docker, Singularity, etc). This can be useful to provide container\
-            \ settings only for a specific process e.g. mount a custom path.\n\nSee\
-            \ [`containerOptions`](https://www.nextflow.io/docs/latest/process.html#containeroptions).\n"
-          type: "string"
-        - type: "array"
-          items:
-            description: "The `containerOptions` directive allows you to specify any\
-              \ container execution option supported by the underlying container engine\
-              \ (ie. Docker, Singularity, etc). This can be useful to provide container\
-              \ settings only for a specific process e.g. mount a custom path.\n\n\
-              See [`containerOptions`](https://www.nextflow.io/docs/latest/process.html#containeroptions).\n"
-            type: "string"
-      disk:
-        description: "The `disk` directive allows you to define how much local disk\
-          \ storage the process is allowed to use.\n\nSee [`disk`](https://www.nextflow.io/docs/latest/process.html#disk).\n"
-        type: "string"
-      tag:
-        description: "The `tag` directive allows you to associate each process execution\
-          \ with a custom label, so that it will be easier to identify them in the\
-          \ log file or in the trace execution report.\n\nFor ease of use, the default\
-          \ tag is set to \"$id\", which allows tracking the progression of the channel\
-          \ events through the workflow more easily.\n\nSee [`tag`](https://www.nextflow.io/docs/latest/process.html#tag).\n"
-        type: "string"
-      conda:
-        oneOf:
-        - description: "The `conda` directive allows for the definition of the process\
-            \ dependencies using the Conda package manager.\n\nNextflow automatically\
-            \ sets up an environment for the given package names listed by in the\
-            \ `conda` directive.\n\nSee [`conda`](https://www.nextflow.io/docs/latest/process.html#conda).\n"
-          type: "string"
-        - type: "array"
-          items:
-            description: "The `conda` directive allows for the definition of the process\
-              \ dependencies using the Conda package manager.\n\nNextflow automatically\
-              \ sets up an environment for the given package names listed by in the\
-              \ `conda` directive.\n\nSee [`conda`](https://www.nextflow.io/docs/latest/process.html#conda).\n"
-            type: "string"
-      machineType:
-        description: " The `machineType` can be used to specify a predefined Google\
-          \ Compute Platform machine type when running using the Google Life Sciences\
-          \ executor.\n\nSee [`machineType`](https://www.nextflow.io/docs/latest/process.html#machinetype).\n"
-        type: "string"
-      stageInMode:
-        description: "The `stageInMode` directive defines how input files are staged-in\
-          \ to the process work directory. The following values are allowed:\n\n|\
-          \ Value | Description |\n|-------|-------------| \n| copy | Input files\
-          \ are staged in the process work directory by creating a copy. | \n| link\
-          \ | Input files are staged in the process work directory by creating an\
-          \ (hard) link for each of them. | \n| symlink | Input files are staged in\
-          \ the process work directory by creating a symbolic link with an absolute\
-          \ path for each of them (default). | \n| rellink | Input files are staged\
-          \ in the process work directory by creating a symbolic link with a relative\
-          \ path for each of them. | \n\nSee [`stageInMode`](https://www.nextflow.io/docs/latest/process.html#stageinmode).\n"
-        type: "string"
-      cache:
-        oneOf:
-        - description: "The `cache` directive allows you to store the process results\
-            \ to a local cache. When the cache is enabled and the pipeline is launched\
-            \ with the resume option, any following attempt to execute the process,\
-            \ along with the same inputs, will cause the process execution to be skipped,\
-            \ producing the stored data as the actual results.\n\nThe caching feature\
-            \ generates a unique key by indexing the process script and inputs. This\
-            \ key is used to identify univocally the outputs produced by the process\
-            \ execution.\n\nThe `cache` is enabled by default, you can disable it\
-            \ for a specific process by setting the cache directive to `false`.\n\n\
-            Accepted values are: `true`, `false`, \"deep\", and \"lenient\".\n\nSee\
-            \ [`cache`](https://www.nextflow.io/docs/latest/process.html#cache).\n"
-          type: "boolean"
-        - description: "The `cache` directive allows you to store the process results\
-            \ to a local cache. When the cache is enabled and the pipeline is launched\
-            \ with the resume option, any following attempt to execute the process,\
-            \ along with the same inputs, will cause the process execution to be skipped,\
-            \ producing the stored data as the actual results.\n\nThe caching feature\
-            \ generates a unique key by indexing the process script and inputs. This\
-            \ key is used to identify univocally the outputs produced by the process\
-            \ execution.\n\nThe `cache` is enabled by default, you can disable it\
-            \ for a specific process by setting the cache directive to `false`.\n\n\
-            Accepted values are: `true`, `false`, \"deep\", and \"lenient\".\n\nSee\
-            \ [`cache`](https://www.nextflow.io/docs/latest/process.html#cache).\n"
-          type: "string"
-      pod:
-        oneOf:
-        - description: "The `pod` directive allows the definition of pods specific\
-            \ settings, such as environment variables, secrets and config maps when\
-            \ using the Kubernetes executor.\n\nSee [`pod`](https://www.nextflow.io/docs/latest/process.html#pod).\n"
-          type: "object"
-          additionalProperties:
-            type: "string"
-        - type: "array"
-          items:
-            description: "The `pod` directive allows the definition of pods specific\
-              \ settings, such as environment variables, secrets and config maps when\
-              \ using the Kubernetes executor.\n\nSee [`pod`](https://www.nextflow.io/docs/latest/process.html#pod).\n"
-            type: "object"
-            additionalProperties:
-              type: "string"
-      penv:
-        description: "The `penv` directive allows you to define the parallel environment\
-          \ to be used when submitting a parallel task to the SGE resource manager.\n\
-          \nSee [`penv`](https://www.nextflow.io/docs/latest/process.html#penv).\n"
-        type: "string"
-      scratch:
-        oneOf:
-        - description: "The `scratch` directive allows you to execute the process\
-            \ in a temporary folder that is local to the execution node.\n\nSee [`scratch`](https://www.nextflow.io/docs/latest/process.html#scratch).\n"
-          type: "boolean"
-        - description: "The `scratch` directive allows you to execute the process\
-            \ in a temporary folder that is local to the execution node.\n\nSee [`scratch`](https://www.nextflow.io/docs/latest/process.html#scratch).\n"
-          type: "string"
-      storeDir:
-        description: "The `storeDir` directive allows you to define a directory that\
-          \ is used as a permanent cache for your process results.\n\nSee [`storeDir`](https://www.nextflow.io/docs/latest/process.html#storeDir).\n"
-        type: "string"
-      maxRetries:
-        oneOf:
-        - description: "The `maxRetries` directive allows you to define the maximum\
-            \ number of times a process instance can be re-submitted in case of failure.\
-            \ This value is applied only when using the retry error strategy. By default\
-            \ only one retry is allowed.\n\nSee [`maxRetries`](https://www.nextflow.io/docs/latest/process.html#maxretries).\n"
-          type: "string"
-        - description: "The `maxRetries` directive allows you to define the maximum\
-            \ number of times a process instance can be re-submitted in case of failure.\
-            \ This value is applied only when using the retry error strategy. By default\
-            \ only one retry is allowed.\n\nSee [`maxRetries`](https://www.nextflow.io/docs/latest/process.html#maxretries).\n"
-          type: "integer"
-      echo:
-        oneOf:
-        - description: "By default the stdout produced by the commands executed in\
-            \ all processes is ignored. By setting the `echo` directive to true, you\
-            \ can forward the process stdout to the current top running process stdout\
-            \ file, showing it in the shell terminal.\n \nSee [`echo`](https://www.nextflow.io/docs/latest/process.html#echo).\n"
-          type: "boolean"
-        - description: "By default the stdout produced by the commands executed in\
-            \ all processes is ignored. By setting the `echo` directive to true, you\
-            \ can forward the process stdout to the current top running process stdout\
-            \ file, showing it in the shell terminal.\n \nSee [`echo`](https://www.nextflow.io/docs/latest/process.html#echo).\n"
-          type: "string"
-      errorStrategy:
-        description: "The `errorStrategy` directive allows you to define how an error\
-          \ condition is managed by the process. By default when an error status is\
-          \ returned by the executed script, the process stops immediately. This in\
-          \ turn forces the entire pipeline to terminate.\n\nTable of available error\
-          \ strategies:\n| Name | Executor |\n|------|----------|\n| `terminate` |\
-          \ Terminates the execution as soon as an error condition is reported. Pending\
-          \ jobs are killed (default) |\n| `finish` | Initiates an orderly pipeline\
-          \ shutdown when an error condition is raised, waiting the completion of\
-          \ any submitted job. |\n| `ignore` | Ignores processes execution errors.\
-          \ |\n| `retry` | Re-submit for execution a process returning an error condition.\
-          \ |\n\nSee [`errorStrategy`](https://www.nextflow.io/docs/latest/process.html#errorstrategy).\n"
-        type: "string"
-      memory:
-        description: "The `memory` directive allows you to define how much memory\
-          \ the process is allowed to use.\n\nSee [`memory`](https://www.nextflow.io/docs/latest/process.html#memory).\n"
-        type: "string"
-      stageOutMode:
-        description: "The `stageOutMode` directive defines how output files are staged-out\
-          \ from the scratch directory to the process work directory. The following\
-          \ values are allowed:\n\n| Value | Description |\n|-------|-------------|\
-          \ \n| copy | Output files are copied from the scratch directory to the work\
-          \ directory. | \n| move | Output files are moved from the scratch directory\
-          \ to the work directory. | \n| rsync | Output files are copied from the\
-          \ scratch directory to the work directory by using the rsync utility. |\n\
-          \nSee [`stageOutMode`](https://www.nextflow.io/docs/latest/process.html#stageoutmode).\n"
-        type: "string"
-    required: []
-    additionalProperties: false
-  NextflowAuto:
-    description: "Automated processing flags which can be toggled on or off."
-    type: "object"
-    properties:
-      simplifyInput:
-        description: "If `true`, an input tuple only containing only a single File\
-          \ (e.g. `[\"foo\", file(\"in.h5ad\")]`) is automatically transformed to\
-          \ a map (i.e. `[\"foo\", [ input: file(\"in.h5ad\") ] ]`).\n\nDefault: `true`.\n"
-        type: "boolean"
-      simplifyOutput:
-        description: "If `true`, an output tuple containing a map with a File (e.g.\
-          \ `[\"foo\", [ output: file(\"out.h5ad\") ] ]`) is automatically transformed\
-          \ to a map (i.e. `[\"foo\", file(\"out.h5ad\")]`).\n\nDefault: `false`.\n"
-        type: "boolean"
-      publish:
-        oneOf:
-        - description: "If `true`, the module's outputs are automatically published\
-            \ to `params.publishDir`.\nIf equal to \"state\", also a `.state.yaml`\
-            \ file will be published in the publish dir.\nWill throw an error if `params.publishDir`\
-            \ is not defined.\n\nDefault: `false`.\n"
-          type: "boolean"
-        - description: "If `true`, the module's outputs are automatically published\
-            \ to `params.publishDir`.\nIf equal to \"state\", also a `.state.yaml`\
-            \ file will be published in the publish dir.\nWill throw an error if `params.publishDir`\
-            \ is not defined.\n\nDefault: `false`.\n"
-          type: "string"
-      transcript:
-        description: "If `true`, the module's transcripts from `work/` are automatically\
-          \ published to `params.transcriptDir`.\nIf not defined, `params.publishDir\
-          \ + \"/_transcripts\"` will be used.\nWill throw an error if neither are\
-          \ defined.\n\nDefault: `false`.\n"
-        type: "boolean"
-    required: []
-    additionalProperties: false
-  NextflowConfig:
-    description: "Allows tweaking how the Nextflow Config file is generated."
-    type: "object"
-    properties:
-      labels:
-        description: "A series of default labels to specify memory and cpu constraints.\n\
-          \nThe default memory labels are defined as \"mem1gb\", \"mem2gb\", \"mem4gb\"\
-          , ... upto \"mem512tb\" and follows powers of 2.\nThe default cpu labels\
-          \ are defined as \"cpu1\", \"cpu2\", \"cpu5\", \"cpu10\", ... upto \"cpu1000\"\
-          \ and follows a semi logarithmic scale (1, 2, 5 per decade).\n\nConceptually\
-          \ it is possible for a Viash Config to overwrite the full labels parameter,\
-          \ however likely it is more efficient to add additional labels\nin the Viash\
-          \ Package with a config mod.\n"
-        type: "object"
-        additionalProperties:
-          type: "string"
-      script:
-        oneOf:
-        - description: "Includes a single string or list of strings into the nextflow.config\
-            \ file.\nThis can be used to add custom profiles or include an additional\
-            \ config file.\n"
-          type: "string"
-        - type: "array"
-          items:
-            description: "Includes a single string or list of strings into the nextflow.config\
-              \ file.\nThis can be used to add custom profiles or include an additional\
-              \ config file.\n"
-            type: "string"
-    required: []
-    additionalProperties: false
-  Dependency:
-    description: "Specifies a Viash component (script or executable) that should be\
-      \ made available for the code defined in the component.\nThe dependency components\
-      \ are collected and copied to the output folder during the Viash build step.\n"
-    type: "object"
-    properties:
-      name:
-        description: "The full name of the dependency component. This should include\
-          \ the namespace."
-        type: "string"
-      repository:
-        oneOf:
-        - description: "Specifies the repository location where the dependency component\
-            \ can be found.\nThis must either be a full definition of the repository\
-            \ or the name of a repository referenced as it is defined under repositories.\n\
-            Additionally, the full definition can be specified as a single string\
-            \ where all parameters such as repository type, url, branch or tag are\
-            \ specified.\nOmitting the value sets the dependency as a local dependency,\
-            \ ie. the dependency is available in the same namespace as the component.\n"
-          type: "string"
-        - description: "Specifies the repository location where the dependency component\
-            \ can be found.\nThis must either be a full definition of the repository\
-            \ or the name of a repository referenced as it is defined under repositories.\n\
-            Additionally, the full definition can be specified as a single string\
-            \ where all parameters such as repository type, url, branch or tag are\
-            \ specified.\nOmitting the value sets the dependency as a local dependency,\
-            \ ie. the dependency is available in the same namespace as the component.\n"
-          $ref: "#/definitions/Repository"
-      alias:
-        description: "An alternative name for the dependency component. This can include\
-          \ a namespace if so needed."
-        type: "string"
-    required:
-    - "name"
-    additionalProperties: false
-  Repository:
-    oneOf:
-    - $ref: "#/definitions/LocalRepository"
-    - $ref: "#/definitions/GitRepository"
-    - $ref: "#/definitions/GithubRepository"
-    - $ref: "#/definitions/ViashhubRepository"
-  LocalRepository:
-    description: "Defines a locally present and available repository.\nThis can be\
-      \ used to define components from the same code base as the current component.\n\
-      Alternatively, this can be used to refer to a code repository present on the\
-      \ local hard-drive instead of fetchable remotely, for example during development.\n"
-    type: "object"
-    properties:
-      path:
-        description: "Defines a subfolder of the repository to use as base to look\
-          \ for the dependency components."
-        type: "string"
-      tag:
-        description: "Defines which version of the dependency component to use. Typically\
-          \ this can be a specific tag, branch or commit hash."
-        type: "string"
-      type:
-        description: "Defines a locally present and available repository.\nThis can\
-          \ be used to define components from the same code base as the current component.\n\
-          Alternatively, this can be used to refer to a code repository present on\
-          \ the local hard-drive instead of fetchable remotely, for example during\
-          \ development.\n"
-        const: "local"
-    required:
-    - "type"
-    additionalProperties: false
-  GitRepository:
-    description: "A Git repository where remote dependency components can be found."
-    type: "object"
-    properties:
-      path:
-        description: "Defines a subfolder of the repository to use as base to look\
-          \ for the dependency components."
-        type: "string"
-      tag:
-        description: "Defines which version of the dependency component to use. Typically\
-          \ this can be a specific tag, branch or commit hash."
-        type: "string"
-      uri:
-        description: "The URI of the Git repository."
-        type: "string"
-      type:
-        description: "A Git repository where remote dependency components can be found."
-        const: "git"
-    required:
-    - "uri"
-    - "type"
-    additionalProperties: false
-  GithubRepository:
-    description: "A GitHub repository where remote dependency components can be found."
-    type: "object"
-    properties:
-      path:
-        description: "Defines a subfolder of the repository to use as base to look\
-          \ for the dependency components."
-        type: "string"
-      tag:
-        description: "Defines which version of the dependency component to use. Typically\
-          \ this can be a specific tag, branch or commit hash."
-        type: "string"
-      repo:
-        description: "The name of the GitHub repository."
-        type: "string"
-      type:
-        description: "A GitHub repository where remote dependency components can be\
-          \ found."
-        const: "github"
-    required:
-    - "repo"
-    - "type"
-    additionalProperties: false
-  ViashhubRepository:
-    description: "A Viash-Hub repository where remote dependency components can be\
-      \ found."
-    type: "object"
-    properties:
-      path:
-        description: "Defines a subfolder of the repository to use as base to look\
-          \ for the dependency components."
-        type: "string"
-      tag:
-        description: "Defines which version of the dependency component to use. Typically\
-          \ this can be a specific tag, branch or commit hash."
-        type: "string"
-      repo:
-        description: "The name of the Viash-Hub repository."
-        type: "string"
-      type:
-        description: "A Viash-Hub repository where remote dependency components can\
-          \ be found."
-        const: "viashhub"
-    required:
-    - "repo"
-    - "type"
-    additionalProperties: false
-  RepositoryWithName:
-    oneOf:
-    - $ref: "#/definitions/LocalRepositoryWithName"
-    - $ref: "#/definitions/GitRepositoryWithName"
-    - $ref: "#/definitions/GithubRepositoryWithName"
-    - $ref: "#/definitions/ViashhubRepositoryWithName"
-  LocalRepositoryWithName:
-    description: "Defines a locally present and available repository.\nThis can be\
-      \ used to define components from the same code base as the current component.\n\
-      Alternatively, this can be used to refer to a code repository present on the\
-      \ local hard-drive instead of fetchable remotely, for example during development.\n"
-    type: "object"
-    properties:
-      name:
-        description: "The identifier used to refer to this repository from dependencies."
-        type: "string"
-      path:
-        description: "Defines a subfolder of the repository to use as base to look\
-          \ for the dependency components."
-        type: "string"
-      tag:
-        description: "Defines which version of the dependency component to use. Typically\
-          \ this can be a specific tag, branch or commit hash."
-        type: "string"
-      type:
-        description: "Defines a locally present and available repository.\nThis can\
-          \ be used to define components from the same code base as the current component.\n\
-          Alternatively, this can be used to refer to a code repository present on\
-          \ the local hard-drive instead of fetchable remotely, for example during\
-          \ development.\n"
-        const: "localwithname"
-    required:
-    - "name"
-    - "type"
-    additionalProperties: false
-  GitRepositoryWithName:
-    description: "A Git repository where remote dependency components can be found."
-    type: "object"
-    properties:
-      name:
-        description: "The identifier used to refer to this repository from dependencies."
-        type: "string"
-      path:
-        description: "Defines a subfolder of the repository to use as base to look\
-          \ for the dependency components."
-        type: "string"
-      tag:
-        description: "Defines which version of the dependency component to use. Typically\
-          \ this can be a specific tag, branch or commit hash."
-        type: "string"
-      uri:
-        description: "The URI of the Git repository."
-        type: "string"
-      type:
-        description: "A Git repository where remote dependency components can be found."
-        const: "gitwithname"
-    required:
-    - "name"
-    - "uri"
-    - "type"
-    additionalProperties: false
-  GithubRepositoryWithName:
-    description: "A GitHub repository where remote dependency components can be found."
-    type: "object"
-    properties:
-      name:
-        description: "The identifier used to refer to this repository from dependencies."
-        type: "string"
-      path:
-        description: "Defines a subfolder of the repository to use as base to look\
-          \ for the dependency components."
-        type: "string"
-      tag:
-        description: "Defines which version of the dependency component to use. Typically\
-          \ this can be a specific tag, branch or commit hash."
-        type: "string"
-      repo:
-        description: "The name of the GitHub repository."
-        type: "string"
-      type:
-        description: "A GitHub repository where remote dependency components can be\
-          \ found."
-        const: "githubwithname"
-    required:
-    - "name"
-    - "repo"
-    - "type"
-    additionalProperties: false
-  ViashhubRepositoryWithName:
-    description: "A Viash-Hub repository where remote dependency components can be\
-      \ found."
-    type: "object"
-    properties:
-      name:
-        description: "The identifier used to refer to this repository from dependencies."
-        type: "string"
-      path:
-        description: "Defines a subfolder of the repository to use as base to look\
-          \ for the dependency components."
-        type: "string"
-      tag:
-        description: "Defines which version of the dependency component to use. Typically\
-          \ this can be a specific tag, branch or commit hash."
-        type: "string"
-      repo:
-        description: "The name of the Viash-Hub repository."
-        type: "string"
-      type:
-        description: "A Viash-Hub repository where remote dependency components can\
-          \ be found."
-        const: "viashhubwithname"
-    required:
-    - "name"
-    - "repo"
-    - "type"
-    additionalProperties: false
-  DockerSetupStrategy:
-    enum:
-    - "cb"
-    - "ifneedbepullelsecachedbuild"
-    - "donothing"
-    - "gentlepush"
-    - "alwayspullelsebuild"
-    - "build"
-    - "alwayspull"
-    - "alwaysbuild"
-    - "ifneedbebuild"
-    - "pullelsebuild"
-    - "p"
-    - "alwayspullelsecachedbuild"
-    - "pull"
-    - "maybepush"
-    - "ifneedbepullelsebuild"
-    - "cachedbuild"
-    - "pullelsecachedbuild"
-    - "push"
-    - "forcepush"
-    - "alwayspush"
-    - "b"
-    - "pushifnotpresent"
-    - "alwayscachedbuild"
-    - "meh"
-    - "ifneedbepull"
-    - "ifneedbecachedbuild"
-    $comment: "TODO add descriptions to different strategies"
-    description: "The Docker setup strategy to use when building a container."
-  Direction:
-    enum:
-    - "input"
-    - "output"
-    description: "Makes this argument an `input` or an `output`, as in does the file/folder\
-      \ needs to be read or written. `input` by default."
-  Status:
-    enum:
-    - "enabled"
-    - "disabled"
-    - "deprecated"
-    description: "Allows setting a component to active, deprecated or disabled."
-  DockerResolveVolume:
-    enum:
-    - "manual"
-    - "automatic"
-    - "auto"
-    - "Manual"
-    - "Automatic"
-    - "Auto"
-    $comment: "TODO make fully case insensitive"
-    description: "Enables or disables automatic volume mapping. Enabled when set to\
-      \ `Automatic` or disabled when set to `Manual`. Default: `Automatic`"
-  DoubleStrings:
-    enum:
-    - "+.inf"
-    - "+inf"
-    - "+infinity"
-    - "positiveinfinity"
-    - "positiveinf"
-    - "-.inf"
-    - "-inf"
-    - "-infinity"
-    - "negativeinfinity"
-    - "negativeinf"
-    - ".nan"
-    - "nan"
-  DoubleWithInf:
-    oneOf:
-    - type: "number"
-    - $ref: "#/definitions/DoubleStrings"
-oneOf:
-- $ref: "#/definitions/Config"

From f71ed871a8f76a8c4848e1c0efe5bb9904f282eb Mon Sep 17 00:00:00 2001
From: Dorien <41797896+dorien-er@users.noreply.github.com>
Date: Thu, 11 Jul 2024 10:33:33 +0200
Subject: [PATCH 19/23] update multiple separator (#81)

* update multiple separator

* update changelog

* Update src/multiqc/config.vsh.yaml

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>

* Update src/multiqc/config.vsh.yaml

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>

* Update src/multiqc/config.vsh.yaml

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>

* Update src/multiqc/config.vsh.yaml

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>

* update ifs

---------

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>
---
 CHANGELOG.md                |  2 ++
 src/multiqc/config.vsh.yaml | 12 ++++--------
 src/multiqc/script.sh       |  8 ++++----
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3fc960f4..80b8b9f3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,8 @@
 
 * `cutadapt`: demultiplexing is now disabled by default. It can be re-enabled by using `demultiplex_mode`.
 
+* `multiqc`: update multiple separator to `;` (PR #81).
+
 ## MINOR CHANGES
 
 * `busco` components: update BUSCO to `5.7.1`.
diff --git a/src/multiqc/config.vsh.yaml b/src/multiqc/config.vsh.yaml
index 0a3a784b..df5e38e1 100644
--- a/src/multiqc/config.vsh.yaml
+++ b/src/multiqc/config.vsh.yaml
@@ -54,25 +54,21 @@ argument_groups:
       - name: "--include_modules"
         type: string
         multiple: true
-        multiple_sep: ","
-        example: fastqc,cutadapt
+        example: [fastqc, cutadapt]
         description: Use only these module
       - name: "--exclude_modules"
         type: string
         multiple: true
-        multiple_sep: ","
-        example: fastqc,cutadapt
+        example: [fastqc, cutadapt]
         description: Do not use only these modules
       - name: "--ignore_analysis"
         type: string
         multiple: true
-        multiple_sep: ","
-        example: run_one/*,run_two/*
+        example: [run_one/*, run_two/*]
       - name: "--ignore_samples"
         type: string
         multiple: true
-        multiple_sep: ","
-        example: sample_1*,sample_3*
+        example: [sample_1*, sample_3*]
       - name: "--ignore_symlinks"
         type: boolean_true
         description: Ignore symlinked directories and files
diff --git a/src/multiqc/script.sh b/src/multiqc/script.sh
index 6353eb11..ad8c1c0c 100755
--- a/src/multiqc/script.sh
+++ b/src/multiqc/script.sh
@@ -38,7 +38,7 @@ IFS=";" read -ra inputs <<< $par_input
 
 if [[ -n "$par_include_modules" ]]; then
     include_modules=""
-    IFS="," read -ra incl_modules <<< $par_include_modules
+    IFS=";" read -ra incl_modules <<< $par_include_modules
     for i in "${incl_modules[@]}"; do
         include_modules+="--include $i "
     done
@@ -47,7 +47,7 @@ fi
 
 if [[ -n "$par_exclude_modules" ]]; then
     exclude_modules=""
-    IFS="," read -ra excl_modules <<< $par_exclude_modules
+    IFS=";" read -ra excl_modules <<< $par_exclude_modules
     for i in "${excl_modules[@]}"; do
         exclude_modules+="--exclude $i"
     done
@@ -56,7 +56,7 @@ fi
 
 if [[ -n "$par_ignore_analysis" ]]; then
     ignore=""
-    IFS="," read -ra ignore_analysis <<< $par_ignore_analysis
+    IFS=";" read -ra ignore_analysis <<< $par_ignore_analysis
     for i in "${ignore_analysis[@]}"; do
         ignore+="--ignore $i "
     done
@@ -65,7 +65,7 @@ fi
 
 if [[ -n "$par_ignore_samples" ]]; then
     ignore_samples=""
-    IFS="," read -ra ign_samples <<< $par_ignore_samples
+    IFS=";" read -ra ign_samples <<< $par_ignore_samples
     for i in "${ign_samples[@]}"; do
         ignore_samples+="--ignore-samples $i"
     done

From 7d99065ecf66e6bc42b03f8ffcfcfc95ef2d2b72 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Wed, 17 Jul 2024 17:46:44 +0200
Subject: [PATCH 20/23] `bd_rhapsody_make_reference`: Create a reference for
 the BD Rhapsody pipeline (#75)

* `bd_rhapsody/bd_rhapsody_make_reference`: Create a reference for the BD Rhapsody pipeline

* add missing metadata

* remove unicode

* trigger

* process comments

* add authors

* Apply suggestions from code review

Co-authored-by: Dorien <41797896+dorien-er@users.noreply.github.com>

---------

Co-authored-by: Dorien <41797896+dorien-er@users.noreply.github.com>
---
 CHANGELOG.md                                  |   6 +
 src/_authors/robrecht_cannoodt.yaml           |  14 ++
 src/_authors/weiwei_schultz.yaml              |   5 +
 .../config.vsh.yaml                           | 143 ++++++++++++++++
 .../bd_rhapsody_make_reference/help.txt       |  66 +++++++
 .../make_rhap_reference_2.2.1_nodocker.cwl    | 115 +++++++++++++
 .../bd_rhapsody_make_reference/script.py      | 161 ++++++++++++++++++
 .../bd_rhapsody_make_reference/test.sh        |  68 ++++++++
 .../test_data/reference_small.fa              |  27 +++
 .../test_data/reference_small.gtf             |   8 +
 .../test_data/script.sh                       |  47 +++++
 11 files changed, 660 insertions(+)
 create mode 100644 src/_authors/robrecht_cannoodt.yaml
 create mode 100644 src/_authors/weiwei_schultz.yaml
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/help.txt
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/script.py
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/test.sh
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.fa
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.gtf
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/test_data/script.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 80b8b9f3..9cfacdbc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # biobox x.x.x
 
+## NEW FEATURES
+
+* `bd_rhapsody`:
+
+  - `bd_rhapsody/bd_rhapsody_make_reference`: Create a reference for the BD Rhapsody pipeline (PR #75).
+
 ## BUG FIXES
 
 * `pear`: fix component not exiting with the correct exitcode when PEAR fails.
diff --git a/src/_authors/robrecht_cannoodt.yaml b/src/_authors/robrecht_cannoodt.yaml
new file mode 100644
index 00000000..d7c0f283
--- /dev/null
+++ b/src/_authors/robrecht_cannoodt.yaml
@@ -0,0 +1,14 @@
+name: Robrecht Cannoodt
+info:
+  links:
+    email: robrecht@data-intuitive.com
+    github: rcannood
+    orcid: "0000-0003-3641-729X"
+    linkedin: robrechtcannoodt
+  organizations:
+    - name: Data Intuitive
+      href: https://www.data-intuitive.com
+      role: Data Science Engineer
+    - name: Open Problems
+      href: https://openproblems.bio
+      role: Core Member
\ No newline at end of file
diff --git a/src/_authors/weiwei_schultz.yaml b/src/_authors/weiwei_schultz.yaml
new file mode 100644
index 00000000..324f9378
--- /dev/null
+++ b/src/_authors/weiwei_schultz.yaml
@@ -0,0 +1,5 @@
+name: Weiwei Schultz
+info:
+  organizations:
+    - name: Janssen R&D US
+      role: Associate Director Data Sciences
\ No newline at end of file
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml b/src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml
new file mode 100644
index 00000000..e596bf06
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml
@@ -0,0 +1,143 @@
+name: bd_rhapsody_make_reference
+namespace: bd_rhapsody
+description: |
+  The Reference Files Generator creates an archive containing Genome Index
+  and Transcriptome annotation files needed for the BD Rhapsody Sequencing
+  Analysis Pipeline. The app takes as input one or more FASTA and GTF files
+  and produces a compressed archive in the form of a tar.gz file. The 
+  archive contains:
+  
+  - STAR index
+  - Filtered GTF file
+keywords: [genome, reference, index, align]
+links:
+  repository: https://bitbucket.org/CRSwDev/cwl/src/master/v2.2.1/Extra_Utilities/
+  documentation: https://bd-rhapsody-bioinfo-docs.genomics.bd.com/resources/extra_utilities.html#make-rhapsody-reference
+license: Unknown
+authors:
+  - __merge__: /src/_authors/robrecht_cannoodt.yaml
+    roles: [ author, maintainer ]
+  - __merge__: /src/_authors/weiwei_schultz.yaml
+    roles: [ contributor ]
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - type: file
+        name: --genome_fasta
+        required: true
+        description: Reference genome file in FASTA or FASTA.GZ format. The BD Rhapsody Sequencing Analysis Pipeline uses GRCh38 for Human and GRCm39 for Mouse.
+        example: genome_sequence.fa.gz
+        multiple: true
+        info:
+          config_key: Genome_fasta
+      - type: file
+        name: --gtf
+        required: true
+        description: |
+          File path to the transcript annotation files in GTF or GTF.GZ format. The Sequence Analysis Pipeline requires the 'gene_name' or 
+          'gene_id' attribute to be set on each gene and exon feature. Gene and exon feature lines must have the same attribute, and exons
+          must have a corresponding gene with the same value. For TCR/BCR assays, the TCR or BCR gene segments must have the 'gene_type' or
+          'gene_biotype' attribute set, and the value should begin with 'TR' or 'IG', respectively.
+        example: transcriptome_annotation.gtf.gz
+        multiple: true
+        info:
+          config_key: Gtf
+      - type: file
+        name: --extra_sequences
+        description: |
+          File path to additional sequences in FASTA format to use when building the STAR index. (e.g. transgenes or CRISPR guide barcodes).
+          GTF lines for these sequences will be automatically generated and combined with the main GTF.
+        required: false
+        multiple: true
+        info:
+          config_key: Extra_sequences
+  - name: Outputs
+    arguments:
+      - type: file
+        name: --reference_archive
+        direction: output
+        required: true
+        description: |
+          A Compressed archive containing the Reference Genome Index and annotation GTF files. This archive is meant to be used as an
+          input in the BD Rhapsody Sequencing Analysis Pipeline.
+        example: star_index.tar.gz
+  - name: Arguments
+    arguments:
+      - type: string
+        name: --mitochondrial_contigs
+        description: |
+          Names of the Mitochondrial contigs in the provided Reference Genome. Fragments originating from contigs other than these are
+          identified as 'nuclear fragments' in the ATACseq analysis pipeline.
+        required: false
+        multiple: true
+        default: [chrM, chrMT, M, MT]
+        info:
+          config_key: Mitochondrial_contigs
+      - type: boolean_true
+        name: --filtering_off
+        description: |
+          By default the input Transcript Annotation files are filtered based on the gene_type/gene_biotype attribute. Only features 
+          having the following attribute values are kept:
+
+            - protein_coding
+            - lncRNA (lincRNA and antisense for Gencode < v31/M22/Ensembl97)
+            - IG_LV_gene
+            - IG_V_gene
+            - IG_V_pseudogene
+            - IG_D_gene
+            - IG_J_gene
+            - IG_J_pseudogene
+            - IG_C_gene
+            - IG_C_pseudogene
+            - TR_V_gene
+            - TR_V_pseudogene
+            - TR_D_gene
+            - TR_J_gene
+            - TR_J_pseudogene
+            - TR_C_gene
+
+            If you have already pre-filtered the input Annotation files and/or wish to turn-off the filtering, please set this option to True.
+        info:
+          config_key: Filtering_off
+      - type: boolean_true
+        name: --wta_only_index
+        description: Build a WTA only index, otherwise builds a WTA + ATAC index.
+        info:
+          config_key: Wta_Only
+      - type: string
+        name: --extra_star_params
+        description: Additional parameters to pass to STAR when building the genome index. Specify exactly like how you would on the command line.
+        example: --limitGenomeGenerateRAM 48000 --genomeSAindexNbases 11
+        required: false
+        info:
+          config_key: Extra_STAR_params
+
+resources:
+  - type: python_script
+    path: script.py
+  - path: make_rhap_reference_2.2.1_nodocker.cwl
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - path: test_data
+
+requirements:
+  commands: [ "cwl-runner" ]
+
+engines:
+  - type: docker
+    image: bdgenomics/rhapsody:2.2.1
+    setup:
+      - type: apt
+        packages: [procps]
+      - type: python
+        packages: [cwlref-runner, cwl-runner]
+      - type: docker
+        run: |
+          echo "bdgenomics/rhapsody: 2.2.1" > /var/software_versions.txt
+
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/help.txt b/src/bd_rhapsody/bd_rhapsody_make_reference/help.txt
new file mode 100644
index 00000000..cd038b25
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/help.txt
@@ -0,0 +1,66 @@
+```bash
+cwl-runner src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl --help
+```
+
+usage: src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl
+       [-h] [--Archive_prefix ARCHIVE_PREFIX]
+       [--Extra_STAR_params EXTRA_STAR_PARAMS]
+       [--Extra_sequences EXTRA_SEQUENCES] [--Filtering_off] --Genome_fasta
+       GENOME_FASTA --Gtf GTF [--Maximum_threads MAXIMUM_THREADS]
+       [--Mitochondrial_Contigs MITOCHONDRIAL_CONTIGS] [--WTA_Only]
+       [job_order]
+
+The Reference Files Generator creates an archive containing Genome Index and
+Transcriptome annotation files needed for the BD Rhapsodyâ„¢ Sequencing
+Analysis Pipeline. The app takes as input one or more FASTA and GTF files and
+produces a compressed archive in the form of a tar.gz file. The archive
+contains:\n - STAR index\n - Filtered GTF file
+
+positional arguments:
+  job_order             Job input json file
+
+options:
+  -h, --help            show this help message and exit
+  --Archive_prefix ARCHIVE_PREFIX
+                        A prefix for naming the compressed archive file
+                        containing the Reference genome index and annotation
+                        files. The default value is constructed based on the
+                        input Reference files.
+  --Extra_STAR_params EXTRA_STAR_PARAMS
+                        Additional parameters to pass to STAR when building
+                        the genome index. Specify exactly like how you would
+                        on the command line. Example: --limitGenomeGenerateRAM
+                        48000 --genomeSAindexNbases 11
+  --Extra_sequences EXTRA_SEQUENCES
+                        Additional sequences in FASTA format to use when
+                        building the STAR index. (E.g. phiX genome)
+  --Filtering_off       By default the input Transcript Annotation files are
+                        filtered based on the gene_type/gene_biotype
+                        attribute. Only features having the following
+                        attribute values are are kept: - protein_coding -
+                        lncRNA (lincRNA and antisense for Gencode <
+                        v31/M22/Ensembl97) - IG_LV_gene - IG_V_gene -
+                        IG_V_pseudogene - IG_D_gene - IG_J_gene -
+                        IG_J_pseudogene - IG_C_gene - IG_C_pseudogene -
+                        TR_V_gene - TR_V_pseudogene - TR_D_gene - TR_J_gene -
+                        TR_J_pseudogene - TR_C_gene If you have already pre-
+                        filtered the input Annotation files and/or wish to
+                        turn-off the filtering, please set this option to
+                        True.
+  --Genome_fasta GENOME_FASTA
+                        Reference genome file in FASTA format. The BD
+                        Rhapsodyâ„¢ Sequencing Analysis Pipeline uses GRCh38
+                        for Human and GRCm39 for Mouse.
+  --Gtf GTF             Transcript annotation files in GTF format. The BD
+                        Rhapsodyâ„¢ Sequencing Analysis Pipeline uses Gencode
+                        v42 for Human and M31 for Mouse.
+  --Maximum_threads MAXIMUM_THREADS
+                        The maximum number of threads to use in the pipeline.
+                        By default, all available cores are used.
+  --Mitochondrial_Contigs MITOCHONDRIAL_CONTIGS
+                        Names of the Mitochondrial contigs in the provided
+                        Reference Genome. Fragments originating from contigs
+                        other than these are identified as 'nuclear fragments'
+                        in the ATACseq analysis pipeline.
+  --WTA_Only            Build a WTA only index, otherwise builds a WTA + ATAC
+                        index.
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl b/src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl
new file mode 100644
index 00000000..fead2c02
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl
@@ -0,0 +1,115 @@
+requirements:
+  InlineJavascriptRequirement: {}
+class: CommandLineTool
+label: Reference Files Generator for BD Rhapsodyâ„¢ Sequencing Analysis Pipeline
+cwlVersion: v1.2
+doc: >- 
+    The Reference Files Generator creates an archive containing Genome Index and Transcriptome annotation files needed for the BD Rhapsodyâ„¢ Sequencing Analysis Pipeline. The app takes as input one or more FASTA and GTF files and produces a compressed archive in the form of a tar.gz file. The archive contains:\n  - STAR index\n  - Filtered GTF file
+
+
+baseCommand: run_reference_generator.sh 
+inputs: 
+    Genome_fasta:
+        type: File[]
+        label: Reference Genome
+        doc: |-
+            Reference genome file in FASTA format. The BD Rhapsodyâ„¢ Sequencing Analysis Pipeline uses GRCh38 for Human and GRCm39 for Mouse.
+        inputBinding:
+            prefix: --reference-genome
+            shellQuote: false
+    Gtf:
+        type: File[]
+        label: Transcript Annotations
+        doc: |-
+            Transcript annotation files in GTF format. The BD Rhapsodyâ„¢ Sequencing Analysis Pipeline uses Gencode v42 for Human and M31 for Mouse.
+        inputBinding:
+            prefix: --gtf
+            shellQuote: false
+    Extra_sequences:
+        type: File[]?
+        label: Extra Sequences
+        doc: |-
+            Additional sequences in FASTA format to use when building the STAR index. (E.g. phiX genome)
+        inputBinding:
+            prefix: --extra-sequences
+            shellQuote: false
+    Mitochondrial_Contigs:
+        type: string[]?
+        default: ["chrM", "chrMT", "M", "MT"]
+        label: Mitochondrial Contig Names
+        doc: |-
+            Names of the Mitochondrial contigs in the provided Reference Genome. Fragments originating from contigs other than these are identified as 'nuclear fragments' in the ATACseq analysis pipeline.
+        inputBinding:
+            prefix: --mitochondrial-contigs
+            shellQuote: false
+    Filtering_off:
+        type: boolean?
+        label: Turn off filtering
+        doc: |-
+            By default the input Transcript Annotation files are filtered based on the gene_type/gene_biotype attribute. Only features having the following attribute values are are kept:
+            - protein_coding
+            - lncRNA (lincRNA and antisense for Gencode < v31/M22/Ensembl97)
+            - IG_LV_gene
+            - IG_V_gene
+            - IG_V_pseudogene
+            - IG_D_gene
+            - IG_J_gene
+            - IG_J_pseudogene
+            - IG_C_gene
+            - IG_C_pseudogene
+            - TR_V_gene
+            - TR_V_pseudogene
+            - TR_D_gene
+            - TR_J_gene
+            - TR_J_pseudogene
+            - TR_C_gene
+            If you have already pre-filtered the input Annotation files and/or wish to turn-off the filtering, please set this option to True.
+        inputBinding: 
+            prefix: --filtering-off
+            shellQuote: false
+    WTA_Only:
+        type: boolean?
+        label: WTA only index
+        doc: Build a WTA only index, otherwise builds a WTA + ATAC index.
+        inputBinding:
+            prefix: --wta-only-index
+            shellQuote: false
+    Archive_prefix:
+        type: string?
+        label: Archive Prefix
+        doc: |-
+            A prefix for naming the compressed archive file containing the Reference genome index and annotation files. The default value is constructed based on the input Reference files.
+        inputBinding:
+            prefix: --archive-prefix
+            shellQuote: false
+    Extra_STAR_params:
+        type: string?
+        label: Extra STAR Params
+        doc: |-
+            Additional parameters to pass to STAR when building the genome index. Specify exactly like how you would on the command line.
+            Example:
+              --limitGenomeGenerateRAM 48000 --genomeSAindexNbases 11
+        inputBinding:
+            prefix: --extra-star-params 
+            shellQuote: true
+  
+    Maximum_threads:
+        type: int?
+        label: Maximum Number of Threads
+        doc: |-
+            The maximum number of threads to use in the pipeline. By default, all available cores are used.
+        inputBinding:
+            prefix: --maximum-threads
+            shellQuote: false
+
+outputs:
+
+    Archive:
+        type: File
+        doc: |- 
+            A Compressed archive containing the Reference Genome Index and annotation GTF files. This archive is meant to be used as an input in the BD Rhapsodyâ„¢ Sequencing Analysis Pipeline.
+        id: Reference_Archive
+        label: Reference Files Archive
+        outputBinding:
+            glob: '*.tar.gz'
+
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/script.py b/src/bd_rhapsody/bd_rhapsody_make_reference/script.py
new file mode 100644
index 00000000..ca635508
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/script.py
@@ -0,0 +1,161 @@
+import os
+import re
+import subprocess
+import tempfile
+from typing import Any
+import yaml
+import shutil
+
+## VIASH START
+par = {
+    "genome_fasta": [],
+    "gtf": [],
+    "extra_sequences": [],
+    "mitochondrial_contigs": ["chrM", "chrMT", "M", "MT"],
+    "filtering_off": False,
+    "wta_only_index": False,
+    "extra_star_params": None,
+    "reference_archive": "output.tar.gz",
+}
+meta = {
+    "config": "target/nextflow/reference/build_bdrhap_2_reference/.config.vsh.yaml",
+    "resources_dir": os.path.abspath("src/reference/build_bdrhap_2_reference"),
+    "temp_dir": os.getenv("VIASH_TEMP"),
+    "memory_mb": None,
+    "cpus": None
+}
+## VIASH END
+
+def clean_arg(argument):
+    argument["clean_name"] = re.sub("^-*", "", argument["name"])
+    return argument
+
+def read_config(path: str) -> dict[str, Any]:
+    with open(path, "r") as f:
+        config = yaml.safe_load(f)
+    
+    config["all_arguments"] = [
+        clean_arg(arg)
+        for grp in config["argument_groups"]
+        for arg in grp["arguments"]
+    ]
+    
+    return config
+
+def strip_margin(text: str) -> str:
+    return re.sub("(\n?)[ \t]*\|", "\\1", text)
+
+def process_params(par: dict[str, Any], config) -> str:
+    # check input parameters
+    assert par["genome_fasta"], "Pass at least one set of inputs to --genome_fasta."
+    assert par["gtf"], "Pass at least one set of inputs to --gtf."
+    assert par["reference_archive"].endswith(".tar.gz"), "Output reference_archive must end with .tar.gz."
+
+    # make paths absolute
+    for argument in config["all_arguments"]:
+        if par[argument["clean_name"]] and argument["type"] == "file":
+            if isinstance(par[argument["clean_name"]], list):
+                par[argument["clean_name"]] = [ os.path.abspath(f) for f in par[argument["clean_name"]] ]
+            else:
+                par[argument["clean_name"]] = os.path.abspath(par[argument["clean_name"]])
+    
+    return par
+
+def generate_config(par: dict[str, Any], meta, config) -> str:
+    content_list = [strip_margin(f"""\
+        |#!/usr/bin/env cwl-runner
+        |
+        |""")]
+        
+    
+    config_key_value_pairs = []
+    for argument in config["all_arguments"]:
+        config_key = (argument.get("info") or {}).get("config_key")
+        arg_type = argument["type"]
+        par_value = par[argument["clean_name"]]
+        if par_value and config_key:
+            config_key_value_pairs.append((config_key, arg_type, par_value))
+
+    if meta["cpus"]:
+        config_key_value_pairs.append(("Maximum_threads", "integer", meta["cpus"]))
+
+    # print(config_key_value_pairs)
+
+    for config_key, arg_type, par_value in config_key_value_pairs:
+        if arg_type == "file":
+            str = strip_margin(f"""\
+                |{config_key}:
+                |""")
+            if isinstance(par_value, list):
+                for file in par_value:
+                    str += strip_margin(f"""\
+                        | - class: File
+                        |   location: "{file}"
+                        |""")
+            else:
+                str += strip_margin(f"""\
+                    |   class: File
+                    |   location: "{par_value}"
+                    |""")
+            content_list.append(str)
+        else:
+            content_list.append(strip_margin(f"""\
+                |{config_key}: {par_value}
+                |"""))
+            
+    ## Write config to file
+    return "".join(content_list)
+
+def get_cwl_file(meta: dict[str, Any]) -> str:
+    # create cwl file (if need be)
+    cwl_file=os.path.join(meta["resources_dir"], "make_rhap_reference_2.2.1_nodocker.cwl")
+
+    return cwl_file
+
+def main(par: dict[str, Any], meta: dict[str, Any]):
+    config = read_config(meta["config"])
+        
+    # Preprocess params
+    par = process_params(par, config)
+
+    # fetch cwl file
+    cwl_file = get_cwl_file(meta)
+
+    # Create output dir if not exists
+    outdir = os.path.dirname(par["reference_archive"])
+    if not os.path.exists(outdir):
+        os.makedirs(outdir)
+
+    ## Run pipeline
+    with tempfile.TemporaryDirectory(prefix="cwl-bd_rhapsody_wta-", dir=meta["temp_dir"]) as temp_dir:
+        # Create params file
+        config_file = os.path.join(temp_dir, "config.yml")
+        config_content = generate_config(par, meta, config)
+        with open(config_file, "w") as f:
+            f.write(config_content)
+
+
+        cmd = [
+            "cwl-runner",
+            "--no-container",
+            "--preserve-entire-environment",
+            "--outdir",
+            temp_dir,
+            cwl_file,
+            config_file
+        ]
+
+        env = dict(os.environ)
+        env["TMPDIR"] = temp_dir
+
+        print("> " + " ".join(cmd), flush=True)
+        _ = subprocess.check_call(
+            cmd,
+            cwd=os.path.dirname(config_file),
+            env=env
+        )
+
+        shutil.move(os.path.join(temp_dir, "Rhap_reference.tar.gz"), par["reference_archive"])
+
+if __name__ == "__main__":
+    main(par, meta)
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh b/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh
new file mode 100644
index 00000000..3637160a
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+set -e
+
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
+}
+assert_file_doesnt_exist() {
+  [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; }
+}
+assert_file_empty() {
+  #  () will execute in a shubshell, could you use {;}?
+  [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; }
+}
+assert_file_not_empty() {
+  # [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1)
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
+}
+assert_file_contains() {
+  # grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1)
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+assert_file_not_contains() {
+  # grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1)
+  grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
+}
+
+in_fa="$meta_resources_dir/test_data/reference_small.fa"
+in_gtf="$meta_resources_dir/test_data/reference_small.gtf"
+
+echo "#############################################"
+echo "> Simple run"
+
+mkdir simple_run
+cd simple_run
+
+out_tar="myreference.tar.gz"
+
+echo "> Running $meta_name."
+$meta_executable \
+  --genome_fasta "$in_fa" \
+  --gtf "$in_gtf" \
+  --reference_archive "$out_tar" \
+  --extra_star_params "--genomeSAindexNbases 6" \
+  ---cpus 2
+
+exit_code=$?
+[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
+
+assert_file_exists "$out_tar"
+assert_file_not_empty "$out_tar"
+
+echo ">> Checking whether output contains the expected files"
+tar -xvf "$out_tar" > /dev/null
+assert_file_exists "BD_Rhapsody_Reference_Files/star_index/genomeParameters.txt"
+assert_file_exists "BD_Rhapsody_Reference_Files/bwa-mem2_index/reference_small.ann"
+assert_file_exists "BD_Rhapsody_Reference_Files/reference_small-processed.gtf"
+assert_file_exists "BD_Rhapsody_Reference_Files/mitochondrial_contigs.txt"
+assert_file_contains "BD_Rhapsody_Reference_Files/reference_small-processed.gtf" "chr1.*HAVANA.*ENSG00000243485"
+assert_file_contains "BD_Rhapsody_Reference_Files/mitochondrial_contigs.txt" 'chrMT'
+
+cd ..
+
+echo "#############################################"
+
+echo "> Tests succeeded!"
\ No newline at end of file
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.fa b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.fa
new file mode 100644
index 00000000..386d887c
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.fa
@@ -0,0 +1,27 @@
+>chr1 1
+TGGGGAAGCAAGGCGGAGTTGGGCAGCTCGTGTTCAATGGGTAGAGTTTCAGGCTGGGGT
+GATGGAAGGGTGCTGGAAATGAGTGGTAGTGATGGCGGCACAACAGTGTGAATCTACTTA
+ATCCCACTGAACTGTATGCTGAAAAATGGTTTAGACGGTGAATTTTAGGTTATGTATGTT
+TTACCACAATTTTTAAAAAGCTAGTGAAAAGCTGGTAAAAAGAAAGAAAAGAGGCTTTTT
+TAAAAAGTTAAATATATAAAAAGAGCATCATCAGTCCAAAGTCCAGCAGTTGTCCCTCCT
+GGAATCCGTTGGCTTGCCTCCGGCATTTTTGGCCCTTGCCTTTTAGGGTTGCCAGATTAA
+AAGACAGGATGCCCAGCTAGTTTGAATTTTAGATAAACAACGAATAATTTCGTAGCATAA
+ATATGTCCCAAGCTTAGTTTGGGACATACTTATGCTAAAAAACATTATTGGTTGTTTATC
+TGAGATTCAGAATTAAGCATTTTATATTTTATTTGCTGCCTCTGGCCACCCTACTCTCTT
+CCTAACACTCTCTCCCTCTCCCAGTTTTGTCCGCCTTCCCTGCCTCCTCTTCTGGGGGAG
+TTAGATCGAGTTGTAACAAGAACATGCCACTGTCTCGCTGGCTGCAGCGTGTGGTCCCCT
+TACCAGAGGTAAAGAAGAGATGGATCTCCACTCATGTTGTAGACAGAATGTTTATGTCCT
+CTCCAAATGCTTATGTTGAAACCCTAACCCCTAATGTGATGGTATGTGGAGATGGGCCTT
+TGGTAGGTAATTACGGTTAGATGAGGTCATGGGGTGGGGCCCTCATTATAGATCTGGTAA
+GAAAAGAGAGCATTGTCTCTGTGTCTCCCTCTCTCTCTCTCTCTCTCTCTCTCATTTCTC
+TCTATCTCATTTCTCTCTCTCTCGCTATCTCATTTTTCTCTCTCTCTCTTTCTCTCCTCT
+GTCTTTTCCCACCAAGTGAGGATGCGAAGAGAAGGTGGCTGTCTGCAAACCAGGAAGAGA
+GCCCTCACCGGGAACCCGTCCAGCTGCCACCTTGAACTTGGACTTCCAAGCCTCCAGAAC
+TGTGAGGGATAAATGTATGATTTTAAAGTCGCCCAGTGTGTGGTATTTTGTTTTGACTAA
+TACAACCTGAAAACATTTTCCCCTCACTCCACCTGAGCAATATCTGAGTGGCTTAAGGTA
+CTCAGGACACAACAAAGGAGAAATGTCCCATGCACAAGGTGCACCCATGCCTGGGTAAAG
+CAGCCTGGCACAGAGGGAAGCACACAGGCTCAGGGATCTGCTATTCATTCTTTGTGTGAC
+CCTGGGCAAGCCATGAATGGAGCTTCAGTCACCCCATTTGTAATGGGATTTAATTGTGCT
+TGCCCTGCCTCCTTTTGAGGGCTGTAGAGAAAAGATGTCAAAGTATTTTGTAATCTGGCT
+GGGCGTGGTGGCTCATGCCTGTAATCCTAGCACTTTGGTAGGCTGACGCGAGAGGACTGC
+T
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.gtf b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.gtf
new file mode 100644
index 00000000..7ba83523
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.gtf
@@ -0,0 +1,8 @@
+chr1	HAVANA	exon	565	668	.	+	.	gene_id "ENSG00000243485.5"; transcript_id "ENST00000473358.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-202"; exon_number 2; exon_id "ENSE00001922571.1"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "dotter_confirmed"; tag "basic"; tag "Ensembl_canonical"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002840.1";
+chr1	HAVANA	exon	977	1098	.	+	.	gene_id "ENSG00000243485.5"; transcript_id "ENST00000473358.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-202"; exon_number 3; exon_id "ENSE00001827679.1"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "dotter_confirmed"; tag "basic"; tag "Ensembl_canonical"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002840.1";
+chr1	HAVANA	transcript	268	1110	.	+	.	gene_id "ENSG00000243485.5"; transcript_id "ENST00000469289.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-201"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "basic"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002841.2";
+chr1	HAVANA	exon	268	668	.	+	.	gene_id "ENSG00000243485.5"; transcript_id "ENST00000469289.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-201"; exon_number 1; exon_id "ENSE00001841699.1"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "basic"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002841.2";
+chr1	HAVANA	exon	977	1110	.	+	.	gene_id "ENSG00000243485.5"; transcript_id "ENST00000469289.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-201"; exon_number 2; exon_id "ENSE00001890064.1"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "basic"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002841.2";
+chr1	ENSEMBL	gene	367	504	.	+	.	gene_id "ENSG00000284332.1"; gene_type "miRNA"; gene_name "MIR1302-2"; level 3; hgnc_id "HGNC:35294";
+chr1	ENSEMBL	transcript	367	504	.	+	.	gene_id "ENSG00000284332.1"; transcript_id "ENST00000607096.1"; gene_type "miRNA"; gene_name "MIR1302-2"; transcript_type "miRNA"; transcript_name "MIR1302-2-201"; level 3; transcript_support_level "NA"; hgnc_id "HGNC:35294"; tag "basic"; tag "Ensembl_canonical";
+chr1	ENSEMBL	exon	367	504	.	+	.	gene_id "ENSG00000284332.1"; transcript_id "ENST00000607096.1"; gene_type "miRNA"; gene_name "MIR1302-2"; transcript_type "miRNA"; transcript_name "MIR1302-2-201"; exon_number 1; exon_id "ENSE00003695741.1"; level 3; transcript_support_level "NA"; hgnc_id "HGNC:35294"; tag "basic"; tag "Ensembl_canonical";
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/script.sh b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/script.sh
new file mode 100644
index 00000000..8d468064
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/script.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+TMP_DIR=/tmp/bd_rhapsody_make_reference
+OUT_DIR=src/bd_rhapsody/bd_rhapsody_make_reference/test_data
+
+# check if seqkit is installed
+if ! command -v seqkit &> /dev/null; then
+  echo "seqkit could not be found"
+  exit 1
+fi
+
+# create temporary directory and clean up on exit
+mkdir -p $TMP_DIR
+function clean_up {
+    rm -rf "$TMP_DIR"
+}
+trap clean_up EXIT
+
+# fetch reference
+ORIG_FA=$TMP_DIR/reference.fa.gz
+if [ ! -f $ORIG_FA ]; then
+  wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz \
+    -O $ORIG_FA
+fi
+
+ORIG_GTF=$TMP_DIR/reference.gtf.gz
+if [ ! -f $ORIG_GTF ]; then
+  wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz \
+    -O $ORIG_GTF
+fi
+
+# create small reference
+START=30000
+END=31500
+CHR=chr1
+
+# subset to small region
+seqkit grep -r -p "^$CHR\$" "$ORIG_FA" | \
+  seqkit subseq -r "$START:$END" > $OUT_DIR/reference_small.fa
+
+zcat "$ORIG_GTF" | \
+  awk -v FS='\t' -v OFS='\t' "
+    \$1 == \"$CHR\" && \$4 >= $START && \$5 <= $END {
+      \$4 = \$4 - $START + 1;
+      \$5 = \$5 - $START + 1;
+      print;
+    }" > $OUT_DIR/reference_small.gtf

From c2e340d92ea7f153d0c5c9de1cffbc6b88fc4124 Mon Sep 17 00:00:00 2001
From: emmarousseau <emmarou1@icloud.com>
Date: Wed, 17 Jul 2024 18:10:37 +0200
Subject: [PATCH 21/23] Remove multiple_sep (#78)

* initial commit dedup

* Revert "initial commit dedup"

This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2.

* get rid of multiple_sep fields in configs

* Fix coverage argument's format in config
---
 src/gffread/config.vsh.yaml                 |  5 +--
 src/gffread/script.sh                       |  2 ++
 src/gffread/test.sh                         |  2 +-
 src/samtools/samtools_stats/config.vsh.yaml | 40 ++++++++++-----------
 src/samtools/samtools_stats/script.sh       |  3 ++
 src/samtools/samtools_stats/test.sh         |  2 +-
 6 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/src/gffread/config.vsh.yaml b/src/gffread/config.vsh.yaml
index d2c41a87..7477a284 100644
--- a/src/gffread/config.vsh.yaml
+++ b/src/gffread/config.vsh.yaml
@@ -8,8 +8,6 @@ links:
 references: 
   doi: 10.12688/f1000research.23297.2
 license: MIT
-requirements:
-  commands: [ gffread ]
 argument_groups:
   - name: Inputs
     arguments:
@@ -52,7 +50,7 @@ argument_groups:
         required: true
         description: |
           Write the output records into <outfile>.
-        default: output.gff
+        example: output.gff
       - name: --force_exons
         type: boolean_true
         description: |
@@ -154,7 +152,6 @@ argument_groups:
       - name: --table
         type: string
         multiple: true
-        multiple_sep: ","
         description: |
           Output a simple tab delimited format instead of GFF, with columns having the values 
           of GFF attributes given in <attrlist>; special pseudo-attributes (prefixed by @) are 
diff --git a/src/gffread/script.sh b/src/gffread/script.sh
index 9c4a2b8f..cd4abf14 100644
--- a/src/gffread/script.sh
+++ b/src/gffread/script.sh
@@ -50,6 +50,8 @@
 [[ "$par_expose_dups" == "false" ]] && unset par_expose_dups
 [[ "$par_cluster_only" == "false" ]] && unset par_cluster_only
 
+# if par_table is not empty, replace ";" with ","
+par_table=$(echo "$par_table" | tr ';' ',')
 
 $(which gffread) \
     "$par_input" \
diff --git a/src/gffread/test.sh b/src/gffread/test.sh
index 326fce50..ea23edcb 100755
--- a/src/gffread/test.sh
+++ b/src/gffread/test.sh
@@ -86,7 +86,7 @@ diff "$expected_output_dir/transcripts.fa" "$test_output_dir/transcripts.fa" ||
 echo "> Test 4 - Generate table from GFF annotation file"
 
 "$meta_executable" \
-  --table @id,@chr,@start,@end,@strand,@exons,Name,gene,product \
+  --table "@id;@chr;@start;@end;@strand;@exons;Name;gene;product" \
   --outfile "$test_output_dir/annotation.tbl" \
   --input "$test_dir/sequence.gff3"
 
diff --git a/src/samtools/samtools_stats/config.vsh.yaml b/src/samtools/samtools_stats/config.vsh.yaml
index 0d8f57a4..ca630876 100644
--- a/src/samtools/samtools_stats/config.vsh.yaml
+++ b/src/samtools/samtools_stats/config.vsh.yaml
@@ -30,10 +30,10 @@ argument_groups:
     - name: --coverage
       alternatives: -c
       type: integer
-      description: |
-        Coverage distribution min,max,step [1,1000,1].
       multiple: true
-      multiple_sep: ','
+      description: |
+        Coverage distribution min;max;step. Default: [1, 1000, 1].
+      example: [1, 1000, 1]
     - name: --remove_dups
       alternatives: -d
       type: boolean_true
@@ -48,25 +48,25 @@ argument_groups:
       alternatives: -f
       type: string
       description: |
-        Required flag, 0 for unset. See also `samtools flags`.
-      default: "0"
+        Required flag, 0 for unset. See also `samtools flags`. Default: `"0"`.
+      example: "0"
     - name: --filtering_flag
       alternatives: -F
       type: string
       description: |
-        Filtering flag, 0 for unset. See also `samtools flags`.
-      default: "0"
+        Filtering flag, 0 for unset. See also `samtools flags`. Default: `0`.
+      example: "0"
     - name: --GC_depth
       type: double
       description: |
-        The size of GC-depth bins (decreasing bin size increases memory requirement).
-      default: 20000.0
+        The size of GC-depth bins (decreasing bin size increases memory requirement). Default: `20000`.
+      example: 20000.0
     - name: --insert_size
       alternatives: -i
       type: integer
       description: |
-        Maximum insert size.
-      default: 8000
+        Maximum insert size. Default: `8000`.
+      example: 8000
     - name: --id
       alternatives: -I
       type: string
@@ -76,14 +76,14 @@ argument_groups:
       alternatives: -l
       type: integer
       description: |
-        Include in the statistics only reads with the given read length.
-      default: -1
+        Include in the statistics only reads with the given read length. Default: `-1`.
+      example: -1
     - name: --most_inserts
       alternatives: -m
       type: double
       description: |
-        Report only the main part of inserts.
-      default: 0.99
+        Report only the main part of inserts. Default: `0.99`.
+      example: 0.99
     - name: --split_prefix
       alternatives: -P
       type: string
@@ -93,8 +93,8 @@ argument_groups:
       alternatives: -q
       type: integer
       description: |
-        The BWA trimming parameter.
-      default: 0
+        The BWA trimming parameter. Default: `0`.
+      example: 0
     - name: --ref_seq
       alternatives: -r
       type: file
@@ -124,8 +124,8 @@ argument_groups:
       alternatives: -g
       type: integer
       description: |
-        Only bases with coverage above this value will be included in the target percentage computation.
-      default: 0
+        Only bases with coverage above this value will be included in the target percentage computation. Default: `0`.
+      example: 0
     - name: --input_fmt_option
       type: string
       description: |
@@ -141,7 +141,7 @@ argument_groups:
       type: file
       description: |
         Output file.
-      default: "out.txt"
+      example: "out.txt"
       required: true
       direction: output
 
diff --git a/src/samtools/samtools_stats/script.sh b/src/samtools/samtools_stats/script.sh
index 6e32e9a5..e3872fc6 100644
--- a/src/samtools/samtools_stats/script.sh
+++ b/src/samtools/samtools_stats/script.sh
@@ -10,6 +10,9 @@ set -e
 [[ "$par_sparse" == "false" ]] && unset par_sparse
 [[ "$par_remove_overlaps" == "false" ]] && unset par_remove_overlaps
 
+# change the coverage input from X;X;X to X,X,X
+par_coverage=$(echo "$par_coverage" | tr ';' ',')
+
 samtools stats \
     ${par_coverage:+-c "$par_coverage"} \
     ${par_remove_dups:+-d} \
diff --git a/src/samtools/samtools_stats/test.sh b/src/samtools/samtools_stats/test.sh
index 05d70d30..b515100e 100644
--- a/src/samtools/samtools_stats/test.sh
+++ b/src/samtools/samtools_stats/test.sh
@@ -17,7 +17,7 @@ echo ">>> Checking whether output is non-empty"
 [ ! -s "$test_dir/test.paired_end.sorted.txt" ] && echo "File 'test.paired_end.sorted.txt' is empty!" && exit 1
 
 echo ">>> Checking whether output is correct"
-# compare using diff,  ignoring the line stating the command that was passed.
+# compare using diff, ignoring the line stating the command that was passed.
 diff <(grep -v "^# The command" "$test_dir/test.paired_end.sorted.txt") \
     <(grep -v "^# The command" "$test_dir/ref.paired_end.sorted.txt") || \
     (echo "Output file ref.paired_end.sorted.txt does not match expected output" && exit 1)

From 8e9abad885b27120a56a580ca7d961c64b96ad60 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Wed, 17 Jul 2024 18:14:21 +0200
Subject: [PATCH 22/23] Update CONTRIBUTING.md (#82)

* Update CONTRIBUTING.md

* update ctb

* clean up helper functions

* update changelog

* update changelog
---
 CHANGELOG.md                                  |  28 +++-
 CONTRIBUTING.md                               | 151 +++++++++++-------
 .../bd_rhapsody_make_reference/test.sh        |   5 +-
 src/cutadapt/test.sh                          |  14 +-
 src/star/star_align_reads/test.sh             |  21 ++-
 5 files changed, 130 insertions(+), 89 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9cfacdbc..2aad0cb8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,19 +6,33 @@
 
   - `bd_rhapsody/bd_rhapsody_make_reference`: Create a reference for the BD Rhapsody pipeline (PR #75).
 
-## BUG FIXES
+## MINOR CHANGES
 
-* `pear`: fix component not exiting with the correct exitcode when PEAR fails.
+* `busco` components: update BUSCO to `5.7.1` (PR #72).
 
-* `cutadapt`: fix `--par_quality_cutoff_r2` argument.
+## DOCUMENTATION
 
-* `cutadapt`: demultiplexing is now disabled by default. It can be re-enabled by using `demultiplex_mode`.
+* Extend the contributing guidelines (PR #82):
 
-* `multiqc`: update multiple separator to `;` (PR #81).
+  - Update format to Viash 0.9.
 
-## MINOR CHANGES
+  - Descriptions should be formatted in markdown.
+
+  - Add defaults to descriptions, not as a default of the argument.
+
+  - Explain parameter expansion.
 
-* `busco` components: update BUSCO to `5.7.1`.
+  - Mention that the contents of the output of components in tests should be checked.
+
+## BUG FIXES
+
+* `pear`: fix component not exiting with the correct exitcode when PEAR fails (PR #70).
+
+* `cutadapt`: fix `--par_quality_cutoff_r2` argument (PR #69).
+
+* `cutadapt`: demultiplexing is now disabled by default. It can be re-enabled by using `demultiplex_mode` (PR #69).
+
+* `multiqc`: update multiple separator to `;` (PR #81).
 
 # biobox 0.1.0
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 7393bc7e..cee4249a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -65,22 +65,21 @@ runners:
 Fill in the relevant metadata fields in the config. Here is an example of the metadata of an existing component.
 
 ```yaml
-functionality:
-  name: arriba
-  description: Detect gene fusions from RNA-Seq data
-  keywords: [Gene fusion, RNA-Seq]
-  links:
-    homepage: https://arriba.readthedocs.io/en/latest/
-    documentation: https://arriba.readthedocs.io/en/latest/
-    repository: https://github.com/suhrig/arriba
-    issue_tracker: https://github.com/suhrig/arriba/issues
-  references:
-    doi: 10.1101/gr.257246.119
-    bibtex: |
-      @article{
-        ... a bibtex entry in case the doi is not available ...
-      }
-  license: MIT
+name: arriba
+description: Detect gene fusions from RNA-Seq data
+keywords: [Gene fusion, RNA-Seq]
+links:
+  homepage: https://arriba.readthedocs.io/en/latest/
+  documentation: https://arriba.readthedocs.io/en/latest/
+  repository: https://github.com/suhrig/arriba
+  issue_tracker: https://github.com/suhrig/arriba/issues
+references:
+  doi: 10.1101/gr.257246.119
+  bibtex: |
+    @article{
+      ... a bibtex entry in case the doi is not available ...
+    }
+license: MIT
 ```
 
 ### Step 4: Find a suitable container
@@ -162,7 +161,7 @@ argument_groups:
       type: file
       description: |
         File in SAM/BAM/CRAM format with main alignments as generated by STAR
-        (Aligned.out.sam). Arriba extracts candidate reads from this file.
+        (`Aligned.out.sam`). Arriba extracts candidate reads from this file.
       required: true
       example: Aligned.out.bam
 ```
@@ -175,7 +174,7 @@ Several notes:
 
 * Input arguments can have `multiple: true` to allow the user to specify multiple files.
 
-
+* The description should be formatted in markdown.
 
 ### Step 8: Add arguments for the output files
 
@@ -220,7 +219,7 @@ argument_groups:
 
 Note: 
 
-* Preferably, these outputs should not be directores but files. For example, if a tool outputs a directory `foo/` containing files `foo/bar.txt` and `foo/baz.txt`, there should be two output arguments `--bar` and `--baz` (as opposed to one output argument which outputs the whole `foo/` directory).
+* Preferably, these outputs should not be directories but files. For example, if a tool outputs a directory `foo/` containing files `foo/bar.txt` and `foo/baz.txt`, there should be two output arguments `--bar` and `--baz` (as opposed to one output argument which outputs the whole `foo/` directory).
 
 ### Step 9: Add arguments for the other arguments
 
@@ -230,6 +229,8 @@ Finally, add all other arguments to the config file. There are a few exceptions:
 
 * Arguments related to printing the information such as printing the version (`-v`, `--version`) or printing the help (`-h`, `--help`) should not be added to the config file.
 
+* If the help lists defaults, do not add them as defaults but to the description. Example: `description: <Explanation of parameter>. Default: 10.`
+
 
 ### Step 10: Add a Docker engine
 
@@ -275,10 +276,13 @@ Next, we need to write a runner script that runs the tool with the input argumen
 ## VIASH START
 ## VIASH END
 
+# unset flags
+[[ "$par_option" == "false" ]] && unset par_option
+
 xxx \
   --input "$par_input" \
   --output "$par_output" \
-  $([ "$par_option" = "true" ] && echo "--option")
+  ${par_option:+--option}
 ```
 
 When building a Viash component, Viash will automatically replace the `## VIASH START` and `## VIASH END` lines (and anything in between) with environment variables based on the arguments specified in the config.
@@ -291,6 +295,11 @@ As an example, this is what the Bash script for the `arriba` component looks lik
 ## VIASH START
 ## VIASH END
 
+# unset flags
+[[ "$par_skip_duplicate_marking" == "false" ]] && unset par_skip_duplicate_marking
+[[ "$par_extra_information" == "false" ]] && unset par_extra_information
+[[ "$par_fill_gaps" == "false" ]] && unset par_fill_gaps
+
 arriba \
   -x "$par_bam" \
   -a "$par_genome" \
@@ -298,26 +307,30 @@ arriba \
   -o "$par_fusions" \
   ${par_known_fusions:+-k "${par_known_fusions}"} \
   ${par_blacklist:+-b "${par_blacklist}"} \
-  ${par_structural_variants:+-d "${par_structural_variants}"} \
-  $([ "$par_skip_duplicate_marking" = "true" ] && echo "-u") \
-  $([ "$par_extra_information" = "true" ] && echo "-X") \
-  $([ "$par_fill_gaps" = "true" ] && echo "-I")
+  # ...
+  ${par_extra_information:+-X} \
+  ${par_fill_gaps:+-I}
 ```
 
+Notes:
 
-### Step 12: Create test script
+* If your arguments can contain special variables (e.g. `$`), you can use quoting (need to find a documentation page for this) to make sure you can use the string as input. Example: `-x ${par_bam@Q}`.
 
+* Optional arguments can be passed to the command conditionally using Bash [parameter expansion](https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html). For example: `${par_known_fusions:+-k ${par_known_fusions@Q}}`
+
+* If your tool allows for multiple inputs using a separator other than `;` (which is the default Viash multiple separator), you can substitute these values with a command like: `par_disable_filters=$(echo $par_disable_filters | tr ';' ',')`.
+
+
+### Step 12: Create test script
 
 If the unit test requires test resources, these should be provided in the `test_resources` section of the component. 
 
 ```yaml
-functionality:
-  # ...
-  test_resources:
-    - type: bash_script
-      path: test.sh
-    - type: file
-      path: test_data
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - type: file
+    path: test_data
 ```
 
 Create a test script at `src/xxx/test.sh` that runs the component with the test data. This script should run the component (available with `$meta_executable`) with the test data and check if the output is as expected. The script should exit with a non-zero exit code if the output is not as expected. For example:
@@ -325,48 +338,64 @@ Create a test script at `src/xxx/test.sh` that runs the component with the test
 ```bash
 #!/bin/bash
 
+set -e
+
 ## VIASH START
 ## VIASH END
 
-echo "> Run xxx with test data"
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
+}
+assert_file_doesnt_exist() {
+  [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; }
+}
+assert_file_empty() {
+  [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; }
+}
+assert_file_not_empty() {
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
+}
+assert_file_contains() {
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+assert_file_not_contains() {
+  grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
+}
+assert_file_contains_regex() {
+  grep -q -E "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+assert_file_not_contains_regex() {
+  grep -q -E "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
+}
+#############################################
+
+echo "> Run $meta_name with test data"
 "$meta_executable" \
-  --input "$meta_resources_dir/test_data/input.txt" \
+  --input "$meta_resources_dir/test_data/reads_R1.fastq" \
   --output "output.txt" \
   --option
 
-echo ">> Checking output"
-[ ! -f "output.txt" ] && echo "Output file output.txt does not exist" && exit 1
-```
+echo ">> Check if output exists"
+assert_file_exists "output.txt"
 
+echo ">> Check if output is empty"
+assert_file_not_empty "output.txt"
 
-For example, this is what the test script for the `arriba` component looks like:
+echo ">> Check if output is correct"
+assert_file_contains "output.txt" "some expected output"
 
-```bash
-#!/bin/bash
+echo "> All tests succeeded!"
+```
 
-## VIASH START
-## VIASH END
+Notes:
 
-echo "> Run arriba with blacklist"
-"$meta_executable" \
-  --bam "$meta_resources_dir/test_data/A.bam" \
-  --genome "$meta_resources_dir/test_data/genome.fasta" \
-  --gene_annotation "$meta_resources_dir/test_data/annotation.gtf" \
-  --blacklist "$meta_resources_dir/test_data/blacklist.tsv" \
-  --fusions "fusions.tsv" \
-  --fusions_discarded "fusions_discarded.tsv" \
-  --interesting_contigs "1,2"
-
-echo ">> Checking output"
-[ ! -f "fusions.tsv" ] && echo "Output file fusions.tsv does not exist" && exit 1
-[ ! -f "fusions_discarded.tsv" ] && echo "Output file fusions_discarded.tsv does not exist" && exit 1
+* Do always check the contents of the output file. If the output is not deterministic, you can use regular expressions to check the output.
 
-echo ">> Check if output is empty"
-[ ! -s "fusions.tsv" ] && echo "Output file fusions.tsv is empty" && exit 1
-[ ! -s "fusions_discarded.tsv" ] && echo "Output file fusions_discarded.tsv is empty" && exit 1
-```
+* If possible, generate your own test data instead of copying it from an external resource.
 
-### Step 12: Create a `/var/software_versions.txt` file
+### Step 13: Create a `/var/software_versions.txt` file
 
 For the sake of transparency and reproducibility, we require that the versions of the software used in the component are documented.
 
@@ -378,6 +407,8 @@ engines:
     image: quay.io/biocontainers/xxx:0.1.0--py_0
     setup:
       - type: docker
+        # note: /var/software_versions.txt should contain:
+        #   arriba: "2.4.0"
         run: |
           echo "xxx: \"0.1.0\"" > /var/software_versions.txt
 ```
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh b/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh
index 3637160a..845c1739 100644
--- a/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh
@@ -11,21 +11,18 @@ assert_file_doesnt_exist() {
   [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; }
 }
 assert_file_empty() {
-  #  () will execute in a shubshell, could you use {;}?
   [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; }
 }
 assert_file_not_empty() {
-  # [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1)
   [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
 }
 assert_file_contains() {
-  # grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1)
   grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
 }
 assert_file_not_contains() {
-  # grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1)
   grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
 }
+#############################################
 
 in_fa="$meta_resources_dir/test_data/reference_small.fa"
 in_gtf="$meta_resources_dir/test_data/reference_small.gtf"
diff --git a/src/cutadapt/test.sh b/src/cutadapt/test.sh
index 1d6d9c18..28248742 100644
--- a/src/cutadapt/test.sh
+++ b/src/cutadapt/test.sh
@@ -6,25 +6,25 @@ set -eo pipefail
 #############################################
 # helper functions
 assert_file_exists() {
-  [ -f "$1" ] || (echo "File '$1' does not exist" && exit 1)
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
 }
 assert_file_doesnt_exist() {
-  [ ! -f "$1" ] || (echo "File '$1' exists but shouldn't" && exit 1)
+  [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; }
 }
 assert_file_empty() {
-  [ ! -s "$1" ] || (echo "File '$1' is not empty but should be" && exit 1)
+  [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; }
 }
 assert_file_not_empty() {
-  [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1)
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
 }
 assert_file_contains() {
-  grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1)
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
 }
 assert_file_not_contains() {
-  grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1)
+  grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
 }
-
 #############################################
+
 mkdir test_multiple_output
 cd test_multiple_output
 
diff --git a/src/star/star_align_reads/test.sh b/src/star/star_align_reads/test.sh
index a15ea599..bd78094d 100644
--- a/src/star/star_align_reads/test.sh
+++ b/src/star/star_align_reads/test.sh
@@ -7,35 +7,34 @@ meta_executable="target/docker/star/star_align_reads/star_align_reads"
 meta_resources_dir="src/star/star_align_reads"
 ## VIASH END
 
-#########################################################################################
-
+#############################################
 # helper functions
 assert_file_exists() {
-  [ -f "$1" ] || (echo "File '$1' does not exist" && exit 1)
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
 }
 assert_file_doesnt_exist() {
-  [ ! -f "$1" ] || (echo "File '$1' exists but shouldn't" && exit 1)
+  [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; }
 }
 assert_file_empty() {
-  [ ! -s "$1" ] || (echo "File '$1' is not empty but should be" && exit 1)
+  [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; }
 }
 assert_file_not_empty() {
-  [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1)
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
 }
 assert_file_contains() {
-  grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1)
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
 }
 assert_file_not_contains() {
-  grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1)
+  grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
 }
 assert_file_contains_regex() {
-  grep -q -E "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1)
+  grep -q -E "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
 }
 assert_file_not_contains_regex() {
-  grep -q -E "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1)
+  grep -q -E "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
 }
+#############################################
 
-#########################################################################################
 echo "> Prepare test data"
 
 cat > reads_R1.fastq <<'EOF'

From 13c5439a0c36f8a1bd3889e68d68ca85672daa62 Mon Sep 17 00:00:00 2001
From: Leila011 <leilapaquay@gmail.com>
Date: Wed, 17 Jul 2024 18:15:08 +0200
Subject: [PATCH 23/23] Add agat convertspgff2gtf (#76)

* Fill in the metadata

* add help.txt

* add test data

* update help.txt

* add arguments for input file, output file and other arguments

* add  a Docker engine

* Write a runner script

* correct --gtf_version choices

* update description

* update keywords

* Create test script

* Create a /var/software_versions.txt file

* remove duplicated argument

* update config

* change name to agat_convert_sp_gff2gtf

* update license

* replace module name by $meta_name in test.sh

* Add more info to --gtf_version description

* remove extra \

* add additional test: check if the D column in the first line of the GFF was correctly converted into GTF format

* update changelog

* Markdown: add newline before listing

* add test to check if the header contains the right GTF version

* cleanup

* fix formatting

---------

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>
---
 CHANGELOG.md                                  |   3 +
 .../agat_convert_sp_gff2gtf/config.vsh.yaml   |  90 ++++++++++++++++
 src/agat/agat_convert_sp_gff2gtf/help.txt     | 102 ++++++++++++++++++
 src/agat/agat_convert_sp_gff2gtf/script.sh    |  10 ++
 src/agat/agat_convert_sp_gff2gtf/test.sh      |  37 +++++++
 .../test_data/0_test.gff                      |  36 +++++++
 .../test_data/script.sh                       |   9 ++
 7 files changed, 287 insertions(+)
 create mode 100644 src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml
 create mode 100644 src/agat/agat_convert_sp_gff2gtf/help.txt
 create mode 100644 src/agat/agat_convert_sp_gff2gtf/script.sh
 create mode 100644 src/agat/agat_convert_sp_gff2gtf/test.sh
 create mode 100644 src/agat/agat_convert_sp_gff2gtf/test_data/0_test.gff
 create mode 100755 src/agat/agat_convert_sp_gff2gtf/test_data/script.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2aad0cb8..8f56b22e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -100,6 +100,9 @@
     - `bedtools_getfasta`: extract sequences from a FASTA file for each of the
                            intervals defined in a BED/GFF/VCF file (PR #59).
 
+* `agat`:
+    - `agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76).
+
 ## MINOR CHANGES
 
 * Uniformize component metadata (PR #23).
diff --git a/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml b/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml
new file mode 100644
index 00000000..b788c7c7
--- /dev/null
+++ b/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml
@@ -0,0 +1,90 @@
+name: agat_convert_sp_gff2gtf
+namespace: agat
+description: |
+  The script aims to convert any GTF/GFF file into a proper GTF file. Full
+  information about the format can be found here:
+  https://agat.readthedocs.io/en/latest/gxf.html You can choose among 7
+  different GTF types (1, 2, 2.1, 2.2, 2.5, 3 or relax). Depending the
+  version selected the script will filter out the features that are not
+  accepted. For GTF2.5 and 3, every level1 feature (e.g nc_gene
+  pseudogene) will be converted into gene feature and every level2 feature
+  (e.g mRNA ncRNA) will be converted into transcript feature. Using the
+  "relax" option you will produce a GTF-like output keeping all original
+  feature types (3rd column). No modification will occur e.g. mRNA to
+  transcript.
+
+  To be fully GTF compliant all feature have a gene_id and a transcript_id
+  attribute. The gene_id is unique identifier for the genomic source of
+  the transcript, which is used to group transcripts into genes. The
+  transcript_id is a unique identifier for the predicted transcript, which
+  is used to group features into transcripts.
+keywords: [gene annotations, GTF conversion]
+links:
+  homepage: https://github.com/NBISweden/AGAT
+  documentation: https://agat.readthedocs.io/
+  issue_tracker: https://github.com/NBISweden/AGAT/issues
+  repository: https://github.com/NBISweden/AGAT
+references: 
+  doi: 10.5281/zenodo.3552717
+license: GPL-3.0
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --gff
+        alternatives: [-i]
+        description: Input GFF/GTF file that will be read
+        type: file
+        required: true
+        direction: input
+        example: input.gff
+  - name: Outputs
+    arguments:       
+      - name: --output
+        alternatives: [-o, --out, --outfile, --gtf]
+        description: Output GTF file. If no output file is specified, the output will be written to STDOUT.
+        type: file
+        direction: output
+        required: true
+        example: output.gtf
+  - name: Arguments
+    arguments:
+      - name: --gtf_version
+        description: |
+          Version of the GTF output (1,2,2.1,2.2,2.5,3 or relax). Default value from AGAT config file (relax for the default config). The script option has the higher priority.  
+          
+            * relax: all feature types are accepted.  
+            * GTF3 (9 feature types accepted): gene, transcript, exon, CDS, Selenocysteine, start_codon, stop_codon, three_prime_utr and five_prime_utr.  
+            * GTF2.5 (8 feature types accepted): gene, transcript, exon, CDS, UTR, start_codon, stop_codon, Selenocysteine.  
+            * GTF2.2 (9 feature types accepted): CDS, start_codon, stop_codon, 5UTR, 3UTR, inter, inter_CNS, intron_CNS and exon.  
+            * GTF2.1 (6 feature types accepted): CDS, start_codon, stop_codon, exon, 5UTR, 3UTR.  
+            * GTF2 (4 feature types accepted): CDS, start_codon, stop_codon, exon.  
+            * GTF1 (5 feature types accepted): CDS, start_codon, stop_codon, exon, intron.  
+        type: string
+        choices: [relax, "1", "2", "2.1", "2.2", "2.5", "3"]
+        required: false
+        example: "3"
+      - name: --config
+        alternatives: [-c]
+        description: |
+          Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently).
+        type: file
+        required: false
+        example: custom_agat_config.yaml
+resources:
+  - type: bash_script
+    path: script.sh
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - type: file
+    path: test_data
+engines:
+  - type: docker
+    image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0
+    setup:
+      - type: docker
+        run: |
+          agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt
+runners:
+  - type: executable
+  - type: nextflow
\ No newline at end of file
diff --git a/src/agat/agat_convert_sp_gff2gtf/help.txt b/src/agat/agat_convert_sp_gff2gtf/help.txt
new file mode 100644
index 00000000..fdd45507
--- /dev/null
+++ b/src/agat/agat_convert_sp_gff2gtf/help.txt
@@ -0,0 +1,102 @@
+```sh
+agat_convert_sp_gff2gtf.pl --help
+```
+ ------------------------------------------------------------------------------
+|   Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0                      |
+|   https://github.com/NBISweden/AGAT                                          |
+|   National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se         |
+ ------------------------------------------------------------------------------
+
+
+Name:
+    agat_convert_sp_gff2gtf.pl
+
+Description:
+    The script aims to convert any GTF/GFF file into a proper GTF file. Full
+    information about the format can be found here:
+    https://agat.readthedocs.io/en/latest/gxf.html You can choose among 7
+    different GTF types (1, 2, 2.1, 2.2, 2.5, 3 or relax). Depending the
+    version selected the script will filter out the features that are not
+    accepted. For GTF2.5 and 3, every level1 feature (e.g nc_gene
+    pseudogene) will be converted into gene feature and every level2 feature
+    (e.g mRNA ncRNA) will be converted into transcript feature. Using the
+    "relax" option you will produce a GTF-like output keeping all original
+    feature types (3rd column). No modification will occur e.g. mRNA to
+    transcript.
+
+    To be fully GTF compliant all feature have a gene_id and a transcript_id
+    attribute. The gene_id is unique identifier for the genomic source of
+    the transcript, which is used to group transcripts into genes. The
+    transcript_id is a unique identifier for the predicted transcript, which
+    is used to group features into transcripts.
+
+Usage:
+        agat_convert_sp_gff2gtf.pl --gff infile.gff [ -o outfile ]
+        agat_convert_sp_gff2gtf -h
+
+Options:
+    --gff, --gtf or -i
+            Input GFF/GTF file that will be read
+
+    --gtf_version version of the GTF output (1,2,2.1,2.2,2.5,3 or relax).
+    Default value from AGAT config file (relax for the default config). The
+    script option has the higher priority.
+            relax: all feature types are accepted.
+
+            GTF3 (9 feature types accepted): gene, transcript, exon, CDS,
+            Selenocysteine, start_codon, stop_codon, three_prime_utr and
+            five_prime_utr
+
+            GTF2.5 (8 feature types accepted): gene, transcript, exon, CDS,
+            UTR, start_codon, stop_codon, Selenocysteine
+
+            GTF2.2 (9 feature types accepted): CDS, start_codon, stop_codon,
+            5UTR, 3UTR, inter, inter_CNS, intron_CNS and exon
+
+            GTF2.1 (6 feature types accepted): CDS, start_codon, stop_codon,
+            exon, 5UTR, 3UTR
+
+            GTF2 (4 feature types accepted): CDS, start_codon, stop_codon,
+            exon
+
+            GTF1 (5 feature types accepted): CDS, start_codon, stop_codon,
+            exon, intron
+
+    -o , --output , --out , --outfile or --gtf
+            Output GTF file. If no output file is specified, the output will
+            be written to STDOUT.
+
+    -c or --config
+            String - Input agat config file. By default AGAT takes as input
+            agat_config.yaml file from the working directory if any,
+            otherwise it takes the orignal agat_config.yaml shipped with
+            AGAT. To get the agat_config.yaml locally type: "agat config
+            --expose". The --config option gives you the possibility to use
+            your own AGAT config file (located elsewhere or named
+            differently).
+
+    -h or --help
+            Display this helpful text.
+
+Feedback:
+  Did you find a bug?:
+    Do not hesitate to report bugs to help us keep track of the bugs and
+    their resolution. Please use the GitHub issue tracking system available
+    at this address:
+
+                https://github.com/NBISweden/AGAT/issues
+
+     Ensure that the bug was not already reported by searching under Issues.
+     If you're unable to find an (open) issue addressing the problem, open a new one.
+     Try as much as possible to include in the issue when relevant:
+     - a clear description,
+     - as much relevant information as possible,
+     - the command used,
+     - a data sample,
+     - an explanation of the expected behaviour that is not occurring.
+
+  Do you want to contribute?:
+    You are very welcome, visit this address for the Contributing
+    guidelines:
+    https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md
+
diff --git a/src/agat/agat_convert_sp_gff2gtf/script.sh b/src/agat/agat_convert_sp_gff2gtf/script.sh
new file mode 100644
index 00000000..69d66739
--- /dev/null
+++ b/src/agat/agat_convert_sp_gff2gtf/script.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+agat_convert_sp_gff2gtf.pl \
+  -i "$par_gff" \
+  -o "$par_output" \
+  ${par_gtf_version:+--gtf_version "${par_gtf_version}"} \
+  ${par_config:+--config "${par_config}"}
diff --git a/src/agat/agat_convert_sp_gff2gtf/test.sh b/src/agat/agat_convert_sp_gff2gtf/test.sh
new file mode 100644
index 00000000..1e7cc142
--- /dev/null
+++ b/src/agat/agat_convert_sp_gff2gtf/test.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+test_dir="${meta_resources_dir}/test_data"
+
+echo "> Run $meta_name with test data"
+"$meta_executable" \
+  --gff "$test_dir/0_test.gff" \
+  --output "output.gtf" 
+
+echo ">> Checking output"
+[ ! -f "output.gtf" ] && echo "Output file output.gtf does not exist" && exit 1
+
+echo ">> Check if output is empty"
+[ ! -s "output.gtf" ] && echo "Output file output.gtf is empty" && exit 1
+
+echo ">> Check if the conversion resulted in the right GTF format"
+idGFF=$(head -n 2 "$test_dir/0_test.gff" | grep -o 'ID=[^;]*' | cut -d '=' -f 2-)
+expectedGTF="gene_id \"$idGFF\"; ID \"$idGFF\";"
+extractedGTF=$(head -n 3 "output.gtf" | grep -o 'gene_id "[^"]*"; ID "[^"]*";')
+[ "$extractedGTF" != "$expectedGTF" ] && echo "Output file output.gtf does not have the right format" && exit 1
+
+rm output.gtf
+
+echo "> Run $meta_name with test data and GTF version 2.5"
+"$meta_executable" \
+  --gff "$test_dir/0_test.gff" \
+  --output "output.gtf" \
+  --gtf_version "2.5"
+
+echo ">> Check if the output file header display the right GTF version"
+grep -q "##gtf-version 2.5" "output.gtf"
+[ $? -ne 0 ] && echo "Output file output.gtf header does not display the right GTF version" && exit 1
+
+echo "> Test successful"
\ No newline at end of file
diff --git a/src/agat/agat_convert_sp_gff2gtf/test_data/0_test.gff b/src/agat/agat_convert_sp_gff2gtf/test_data/0_test.gff
new file mode 100644
index 00000000..fafe86ed
--- /dev/null
+++ b/src/agat/agat_convert_sp_gff2gtf/test_data/0_test.gff
@@ -0,0 +1,36 @@
+##gff-version 3
+scaffold625	maker	gene	337818	343277	.	+	.	ID=CLUHARG00000005458;Name=TUBB3_2
+scaffold625	maker	mRNA	337818	343277	.	+	.	ID=CLUHART00000008717;Parent=CLUHARG00000005458
+scaffold625	maker	exon	337818	337971	.	+	.	ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717
+scaffold625	maker	exon	340733	340841	.	+	.	ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717
+scaffold625	maker	exon	341518	341628	.	+	.	ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717
+scaffold625	maker	exon	341964	343277	.	+	.	ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717
+scaffold625	maker	CDS	337915	337971	.	+	0	ID=CLUHART00000008717:cds;Parent=CLUHART00000008717
+scaffold625	maker	CDS	340733	340841	.	+	0	ID=CLUHART00000008717:cds;Parent=CLUHART00000008717
+scaffold625	maker	CDS	341518	341628	.	+	2	ID=CLUHART00000008717:cds;Parent=CLUHART00000008717
+scaffold625	maker	CDS	341964	343033	.	+	2	ID=CLUHART00000008717:cds;Parent=CLUHART00000008717
+scaffold625	maker	five_prime_UTR	337818	337914	.	+	.	ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717
+scaffold625	maker	three_prime_UTR	343034	343277	.	+	.	ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717
+scaffold789	maker	gene	558184	564780	.	+	.	ID=CLUHARG00000003852;Name=PF11_0240
+scaffold789	maker	mRNA	558184	564780	.	+	.	ID=CLUHART00000006146;Parent=CLUHARG00000003852
+scaffold789	maker	exon	558184	560123	.	+	.	ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146
+scaffold789	maker	exon	561401	561519	.	+	.	ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146
+scaffold789	maker	exon	564171	564235	.	+	.	ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146
+scaffold789	maker	exon	564372	564780	.	+	.	ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146
+scaffold789	maker	CDS	558191	560123	.	+	0	ID=CLUHART00000006146:cds;Parent=CLUHART00000006146
+scaffold789	maker	CDS	561401	561519	.	+	2	ID=CLUHART00000006146:cds;Parent=CLUHART00000006146
+scaffold789	maker	CDS	564171	564235	.	+	0	ID=CLUHART00000006146:cds;Parent=CLUHART00000006146
+scaffold789	maker	CDS	564372	564588	.	+	1	ID=CLUHART00000006146:cds;Parent=CLUHART00000006146
+scaffold789	maker	five_prime_UTR	558184	558190	.	+	.	ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146
+scaffold789	maker	three_prime_UTR	564589	564780	.	+	.	ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146
+scaffold789	maker	mRNA	558184	564780	.	+	.	ID=CLUHART00000006147;Parent=CLUHARG00000003852
+scaffold789	maker	exon	558184	560123	.	+	.	ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147
+scaffold789	maker	exon	561401	561519	.	+	.	ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147
+scaffold789	maker	exon	562057	562121	.	+	.	ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147
+scaffold789	maker	exon	564372	564780	.	+	.	ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147
+scaffold789	maker	CDS	558191	560123	.	+	0	ID=CLUHART00000006147:cds;Parent=CLUHART00000006147
+scaffold789	maker	CDS	561401	561519	.	+	2	ID=CLUHART00000006147:cds;Parent=CLUHART00000006147
+scaffold789	maker	CDS	562057	562121	.	+	0	ID=CLUHART00000006147:cds;Parent=CLUHART00000006147
+scaffold789	maker	CDS	564372	564588	.	+	1	ID=CLUHART00000006147:cds;Parent=CLUHART00000006147
+scaffold789	maker	five_prime_UTR	558184	558190	.	+	.	ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147
+scaffold789	maker	three_prime_UTR	564589	564780	.	+	.	ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147
diff --git a/src/agat/agat_convert_sp_gff2gtf/test_data/script.sh b/src/agat/agat_convert_sp_gff2gtf/test_data/script.sh
new file mode 100755
index 00000000..e453e772
--- /dev/null
+++ b/src/agat/agat_convert_sp_gff2gtf/test_data/script.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+# clone repo
+if [ ! -d /tmp/agat_source ]; then
+  git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source
+fi
+
+# copy test data
+cp -r /tmp/agat_source/t/gff_syntax/in/0_test.gff src/agat/agat_convert_sp_gff2gtf/test_data