diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 6e1fc4b3..2591978f 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -3,114 +3,7 @@ name: Component Testing
 on:
   pull_request:
   push:
-    branches: [ '**' ]
 
 jobs:
-  run_ci_check_job:
-    runs-on: ubuntu-latest
-    outputs:
-      run_ci: ${{ steps.github_cli.outputs.check }}
-    steps:
-      - name: 'Check if branch has an existing pull request and the trigger was a push'
-        id: github_cli
-        run: |
-          pull_request=$(gh pr list -R ${{ github.repository }} -H ${{ github.ref_name }} --json url --state open --limit 1 | jq '.[0].url')
-          # If the branch has a PR and this run was triggered by a push event, do not run
-          if [[ "$pull_request" != "null" && "$GITHUB_REF_NAME" != "main" && "${{ github.event_name == 'push' }}" == "true" && "${{ !contains(github.event.head_commit.message, 'ci force') }}" == "true" ]]; then
-            echo "check=false" >> $GITHUB_OUTPUT
-          else
-            echo "check=true" >> $GITHUB_OUTPUT
-          fi
-        env:
-           GH_TOKEN: ${{ github.token }}
-
-  # phase 1
-  list:
-    needs: run_ci_check_job
-    runs-on: ubuntu-latest
-    if: ${{ needs.run_ci_check_job.outputs.run_ci == 'true' }}
-
-    outputs:
-      matrix: ${{ steps.set_matrix.outputs.matrix }}
-
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        
-    - name: Get head git commit message
-      id: get_head_commit_message
-      run: echo "HEAD_COMMIT_MESSAGE=$(git show -s --format=%s ${{ github.event.pull_request.head.sha || github.sha }})" >> "$GITHUB_OUTPUT"
-
-    - uses: viash-io/viash-actions/setup@v5
-
-    - name: Check if all config can be parsed if there is no unicode support
-      run: |
-        LANG=C viash ns list > /dev/null
-
-    # see https://github.com/viash-io/viash/issues/654
-    # and https://github.com/viash-io/viash-actions/pull/27
-    # - name: Get changed files
-    #   id: changed-files
-    #   uses: tj-actions/changed-files@v42
-    #   with:
-    #     separator: ";"
-    #     diff_relative: true
-    # - id: ns_list
-    #   uses: viash-io/viash-actions/ns-list@v5
-    #   with:
-    #     platform: docker
-    #     format: json
-    #     query: ^(?!workflows)
-    # - id: ns_list_filtered
-    #   uses: viash-io/viash-actions/project/detect-changed-components@v5
-    #   with:
-    #     input_file: "${{ steps.ns_list.outputs.output_file }}"
-    # - id: set_matrix
-    #   run: |
-    #     echo "matrix=$(jq -c '[ .[] | 
-    #       { 
-    #         "name": (.functionality.namespace + "/" + .functionality.name),
-    #         "config": .info.config,
-    #         "dir": .info.config | capture("^(?<dir>.*\/)").dir
-    #       }
-    #     ]' ${{ contains(steps.get_head_commit_message.outputs.HEAD_COMMIT_MESSAGE, 'ci force') && steps.ns_list.outputs.output_file || steps.ns_list_filtered.outputs.output_file }} )" >> $GITHUB_OUTPUT
-
-
-    - id: set_matrix
-      run: |
-        viash ns list --format json > ns_list.json
-        echo "matrix=$(jq -c '[ .[] | 
-          { 
-            "name": (.namespace + "/" + .name),
-            "config": .build_info.config,
-            "dir": .build_info.config | capture("^(?<dir>.*\/)").dir
-          }
-        ]' ns_list.json )" >> $GITHUB_OUTPUT
-
-  # phase 2
-  viash_test:
-    needs: list
-    if: ${{ needs.list.outputs.matrix != '[]' && needs.list.outputs.matrix != '' }}
-    runs-on: ubuntu-latest
-
-    strategy:
-      fail-fast: false
-      matrix:
-        component: ${{ fromJson(needs.list.outputs.matrix) }}
-
-    steps:
-    # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.'
-    - uses: data-intuitive/reclaim-the-bytes@v2
-
-    - uses: actions/checkout@v4
-
-    - uses: viash-io/viash-actions/setup@v5
-
-    - name: Run test
-      timeout-minutes: 30
-      run: |
-        viash test \
-          "${{ matrix.component.config }}" \
-          --cpus 2 \
-          --memory "6gb"
\ No newline at end of file
+  test:
+    uses: viash-hub/toolbox/.github/workflows/test.yaml@main
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bac18769..aa5304a6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,10 @@
-# base unreleased
+# biobox x.x.x
+
+## BUG FIXES
+
+* `pear`: fix component not exiting with the correct exitcode when PEAR fails.
+
+# biobox 0.1.0
 
 ## BREAKING CHANGES
 
@@ -17,6 +23,8 @@
     - `busco/busco_list_datasets`: Lists available busco datasets (PR #18).
     - `busco/busco_download_datasets`: Download busco datasets (PR #19).
 
+* `cutadapt`: Remove adapter sequences from high-throughput sequencing reads (PR #7).
+
 * `featurecounts`: Assign sequence reads to genomic features (PR #11).
 
 * `bgzip`: Add bgzip functionality to compress and decompress files (PR #13).
@@ -29,7 +37,9 @@
 
 * `multiqc`: Aggregate results from bioinformatics analyses across many samples into a single report (PR #42).
 
-* `star/star_align_reads`: Align reads to a reference genome (PR #22).
+* `star`:
+    - `star/star_align_reads`: Align reads to a reference genome (PR #22).
+    - `star/star_genome_generate`: Generate a genome index for STAR alignment (PR #58).
 
 * `gffread`: Validate, filter, convert and perform other operations on GFF files (PR #29).  
 
@@ -52,8 +62,9 @@
 
 * `falco`: A C++ drop-in replacement of FastQC to assess the quality of sequence read data (PR #43).
 
-
-## MAJOR CHANGES
+* `bedtools`:
+    - `bedtools_getfasta`: extract sequences from a FASTA file for each of the
+                           intervals defined in a BED/GFF/VCF file (PR #59).
 
 ## MINOR CHANGES
 
@@ -63,8 +74,16 @@
 
 * Update to Viash 0.9.0-RC3 (PR #51).
 
+* Update to Viash 0.9.0-RC6 (PR #63).
+
+* Switch to viash-hub/toolbox actions (PR #64).
+
 ## DOCUMENTATION
 
+* Update README (PR #64).
+
 ## BUG FIXES
 
-* Add escaping character before leading hashtag in the description field of the config file (PR #50).
\ No newline at end of file
+* Add escaping character before leading hashtag in the description field of the config file (PR #50).
+
+* Format URL in biobase/bcl_convert description (PR #55).
\ No newline at end of file
diff --git a/README.md b/README.md
index ecf807ca..4b497dcd 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,24 @@
-# Base repository for reusable Viash components
 
 
-This repository is a collection of reproducible and reusable Viash
-components.
+# 🌱📦 biobox
+
+[![ViashHub](https://img.shields.io/badge/ViashHub-biobox-7a4baa.png)](https://web.viash-hub.com/packages/biobox)
+[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2Fbiobox-blue.png)](https://github.com/viash-hub/biobox)
+[![GitHub
+License](https://img.shields.io/github/license/viash-hub/biobox.png)](https://github.com/viash-hub/biobox/blob/main/LICENSE)
+[![GitHub
+Issues](https://img.shields.io/github/issues/viash-hub/biobox.png)](https://github.com/viash-hub/biobox/issues)
+[![Viash
+version](https://img.shields.io/badge/Viash-v0.9.0--RC6-blue)](https://viash.io)
+
+A collection of bioinformatics tools for working with sequence data.
 
 ## Objectives
 
 - **Reusability**: Facilitating the use of components across various
   projects and contexts.
-- **Reproducibility**: Guaranteeing that bioinformatics analyses can be
-  reliably replicated.
+- **Reproducibility**: Ensuring that components are reproducible and can
+  be easily shared.
 - **Best Practices**: Adhering to established standards in software
   development and bioinformatics.
 
@@ -43,18 +52,21 @@ contribute a component to this repository.
 12. Create test script
 13. Create a `/var/software_versions.txt` file
 
-See the [CONTRIBUTING](CONTRIBUTING.md) file for more details.
+See the
+[CONTRIBUTING](https://github.com/viash-hub/biobox/blob/main/CONTRIBUTING.md)
+file for more details.
 
 ## Support and Community
 
 For support, questions, or to join our community:
 
 - **Issues**: Submit questions or issues via the [GitHub issue
-  tracker](https://github.com/viash-hub/base/issues).
+  tracker](https://github.com/viash-hub/biobox/issues).
 - **Discussions**: Join our discussions via [GitHub
-  Discussions](https://github.com/viash-hub/base/discussions).
+  Discussions](https://github.com/viash-hub/biobox/discussions).
 
 ## License
 
 This repository is licensed under an MIT license. See the
-[LICENSE](LICENSE) file for details.
+[LICENSE](https://github.com/viash-hub/biobox/blob/main/LICENSE) file
+for details.
diff --git a/README.qmd b/README.qmd
index 656cdac7..7d36430b 100644
--- a/README.qmd
+++ b/README.qmd
@@ -1,14 +1,25 @@
 ---
-title: Base repository for reusable Viash components
 format: gfm
 ---
+```{r setup, include=FALSE}
+project <- yaml::read_yaml("_viash.yaml")
+license <- paste0(project$links$repository, "/blob/main/LICENSE")
+contributing <- paste0(project$links$repository, "/blob/main/CONTRIBUTING.md")
+```
+# 🌱📦 `r project$name`
+
+[![ViashHub](https://img.shields.io/badge/ViashHub-`r project$name`-7a4baa)](https://web.viash-hub.com/packages/`r project$name`) 
+[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2F`r project$name`-blue)](`r project$links$repository`) 
+[![GitHub License](https://img.shields.io/github/license/viash-hub/`r project$name`)](`r license`) 
+[![GitHub Issues](https://img.shields.io/github/issues/viash-hub/`r project$name`)](`r project$links$issue_tracker`) 
+[![Viash version](https://img.shields.io/badge/Viash-v`r gsub("-", "--", project$viash_version)`-blue)](https://viash.io)
 
-This repository is a collection of reproducible and reusable Viash components.
+`r project$description`
 
 ## Objectives
 
 - **Reusability**: Facilitating the use of components across various projects and contexts.
-- **Reproducibility**: Guaranteeing that bioinformatics analyses can be reliably replicated.
+- **Reproducibility**: Ensuring that components are reproducible and can be easily shared.
 - **Best Practices**: Adhering to established standards in software development and bioinformatics.
 
 ## Contributing
@@ -37,15 +48,15 @@ knitr::asis_output(
 )
 ```
 
-See the [CONTRIBUTING](CONTRIBUTING.md) file for more details.
+See the [CONTRIBUTING](`r contributing`) file for more details.
 
 
 ## Support and Community
 
 For support, questions, or to join our community:
 
-- **Issues**: Submit questions or issues via the [GitHub issue tracker](https://github.com/viash-hub/base/issues).
-- **Discussions**: Join our discussions via [GitHub Discussions](https://github.com/viash-hub/base/discussions).
+- **Issues**: Submit questions or issues via the [GitHub issue tracker](`r project$links$issue_tracker`).
+- **Discussions**: Join our discussions via [GitHub Discussions](`r project$links$repository`/discussions).
 
 ## License
-This repository is licensed under an MIT license. See the [LICENSE](LICENSE) file for details.
+This repository is licensed under an MIT license. See the [LICENSE](`r license`) file for details.
diff --git a/_viash.yaml b/_viash.yaml
index 397a8d69..9a240c24 100644
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -1,13 +1,13 @@
-name: biobase
+name: biobox
 description: |
   A collection of bioinformatics tools for working with sequence data.
 license: MIT
-keywords: [bioinformatics, sequence, alignment, variant calling]
+keywords: [bioinformatics, modules, sequencing]
 links:
-  issue_tracker: https://github.com/viash-hub/biobase/issues
-  repository: https://github.com/viash-hub/biobase
+  issue_tracker: https://github.com/viash-hub/biobox/issues
+  repository: https://github.com/viash-hub/biobox
 
-viash_version: 0.9.0-RC3
+viash_version: 0.9.0-RC6
 
 config_mods: |
   .requirements.commands := ['ps']
diff --git a/src/bcl_convert/config.vsh.yaml b/src/bcl_convert/config.vsh.yaml
index d0cedfa7..657fb1f0 100644
--- a/src/bcl_convert/config.vsh.yaml
+++ b/src/bcl_convert/config.vsh.yaml
@@ -2,8 +2,8 @@ name: bcl_convert
 description: |
   Convert bcl files to fastq files using bcl-convert.
   Information about upgrading from bcl2fastq via
-  https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html
-  and https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html
+  [Upgrading from bcl2fastq to BCL Convert](https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html)
+  and [BCL Convert Compatible Products](https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html)
 argument_groups:
   - name: Input arguments
     arguments:
diff --git a/src/bedtools/bedtools_getfasta/config.vsh.yaml b/src/bedtools/bedtools_getfasta/config.vsh.yaml
new file mode 100644
index 00000000..f1f49a87
--- /dev/null
+++ b/src/bedtools/bedtools_getfasta/config.vsh.yaml
@@ -0,0 +1,103 @@
+name: bedtools_getfasta
+namespace: bedtools
+description: Extract sequences from a FASTA file for each of the intervals defined in a BED/GFF/VCF file.
+keywords: [sequencing, fasta, BED, GFF, VCF]
+links:
+  documentation: https://bedtools.readthedocs.io/en/latest/content/tools/getfasta.html
+  repository: https://github.com/arq5x/bedtools2
+references:
+  doi: 10.1093/bioinformatics/btq033
+license: GPL-2.0
+requirements:
+  commands: [bedtools]
+
+argument_groups:
+  - name: Input arguments
+    arguments:
+      - name: --input_fasta
+        type: file
+        description: |
+          FASTA file containing sequences for each interval specified in the input BED file.
+          The headers in the input FASTA file must exactly match the chromosome column in the BED file.
+      - name: "--input_bed"
+        type: file
+        description: |
+          BED file containing intervals to extract from the FASTA file.
+          BED files containing a single region require a newline character
+          at the end of the line, otherwise a blank output file is produced.
+      - name: --rna
+        type: boolean_true
+        description: |
+          The FASTA is RNA not DNA. Reverse complementation handled accordingly.
+    
+  - name: Run arguments
+    arguments:
+        - name: "--strandedness"
+          type: boolean_true
+          alternatives: ["-s"]
+          description: |
+            Force strandedness. If the feature occupies the antisense strand, the output sequence will
+            be reverse complemented. By default strandedness is not taken into account.
+
+  - name: Output arguments
+    arguments:
+      - name: --output
+        alternatives: [-o]
+        required: true
+        type: file
+        direction: output
+        description: |
+          Output file where the output from the 'bedtools getfasta' commend will
+          be written to.
+      - name: --tab
+        type: boolean_true
+        description: |
+          Report extract sequences in a tab-delimited format instead of in FASTA format.
+      - name: --bed_out
+        type: boolean_true
+        description: |
+          Report extract sequences in a tab-delimited BED format instead of in FASTA format.
+      - name: "--name"
+        type: boolean_true
+        description: |
+          Set the FASTA header for each extracted sequence to be the "name" and coordinate columns from the BED feature.
+      - name: "--name_only"
+        type: boolean_true
+        description: |
+          Set the FASTA header for each extracted sequence to be the "name" columns from the BED feature.
+      - name: "--split"
+        type: boolean_true
+        description: |
+          When --input is in BED12 format, create a separate fasta entry for each block in a BED12 record,
+          blocks being described in the 11th and 12th column of the BED.
+      - name: "--full_header"
+        type: boolean_true
+        description: |
+          Use full fasta header. By default, only the word before the first space or tab is used.
+        
+# Arguments not taken into account:
+#
+#       -fo           [Specify an output file name. By default, output goes to stdout.
+#                            
+
+resources:
+  - type: bash_script
+    path: script.sh
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+
+engines:
+  - type: docker
+    image: debian:stable-slim
+    setup:
+      - type: apt
+        packages: [bedtools, procps]
+      - type: docker
+        run: |
+          echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt
+
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/bedtools/bedtools_getfasta/script.sh b/src/bedtools/bedtools_getfasta/script.sh
new file mode 100644
index 00000000..8e88b318
--- /dev/null
+++ b/src/bedtools/bedtools_getfasta/script.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+unset_if_false=( par_rna par_strandedness par_tab par_bed_out par_name par_name_only par_split par_full_header )
+
+for par in ${unset_if_false[@]}; do
+    test_val="${!par}"
+    [[ "$test_val" == "false" ]] && unset $par
+done
+
+bedtools getfasta \
+    -fi "$par_input_fasta" \
+    -bed "$par_input_bed" \
+    ${par_rna:+-rna} \
+    ${par_name:+-name} \
+    ${par_name_only:+-nameOnly} \
+    ${par_tab:+-tab} \
+    ${par_bed_out:+-bedOut} \
+    ${par_strandedness:+-s} \
+    ${par_split:+-split} \
+    ${par_full_header:+-fullHeader} > "$par_output"
+
diff --git a/src/bedtools/bedtools_getfasta/test.sh b/src/bedtools/bedtools_getfasta/test.sh
new file mode 100644
index 00000000..a28e3a7e
--- /dev/null
+++ b/src/bedtools/bedtools_getfasta/test.sh
@@ -0,0 +1,119 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+TMPDIR=$(mktemp -d)
+function clean_up {
+  [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
+}
+trap clean_up EXIT
+
+# Create dummy test fasta file
+cat > "$TMPDIR/test.fa" <<EOF
+>chr1
+AAAAAAAACCCCCCCCCCCCCGCTACTGGGGGGGGGGGGGGGGGG
+EOF
+
+TAB="$(printf '\t')"
+
+# Create dummy bed file
+cat > "$TMPDIR/test.bed" <<EOF
+chr1${TAB}5${TAB}10${TAB}myseq
+EOF
+
+# Create expected bed file
+cat > "$TMPDIR/expected.fasta" <<EOF
+>chr1:5-10
+AAACC
+EOF
+
+"$meta_executable" \
+  --input_bed "$TMPDIR/test.bed" \
+  --input_fasta "$TMPDIR/test.fa" \
+  --output "$TMPDIR/output.fasta"
+
+cmp --silent "$TMPDIR/output.fasta" "$TMPDIR/expected.fasta" || { echo "files are different:"; exit 1; }
+
+
+# Create expected bed file for --name
+cat > "$TMPDIR/expected_with_name.fasta" <<EOF
+>myseq::chr1:5-10
+AAACC
+EOF
+
+"$meta_executable" \
+  --input_bed "$TMPDIR/test.bed" \
+  --input_fasta "$TMPDIR/test.fa" \
+  --name \
+  --output "$TMPDIR/output_with_name.fasta"
+
+
+cmp --silent "$TMPDIR/output_with_name.fasta" "$TMPDIR/expected_with_name.fasta" || { echo "Files when using --name are different."; exit 1; }
+
+# Create expected bed file for --name_only
+cat > "$TMPDIR/expected_with_name_only.fasta" <<EOF
+>myseq
+AAACC
+EOF
+
+"$meta_executable" \
+  --input_bed "$TMPDIR/test.bed" \
+  --input_fasta "$TMPDIR/test.fa" \
+  --name_only \
+  --output "$TMPDIR/output_with_name_only.fasta"
+
+cmp --silent "$TMPDIR/output_with_name_only.fasta" "$TMPDIR/expected_with_name_only.fasta" || { echo "Files when using --name_only are different."; exit 1; }
+
+
+# Create expected tab-delimited file for --tab
+cat > "$TMPDIR/expected_tab.out" <<EOF
+myseq${TAB}AAACC
+EOF
+
+"$meta_executable" \
+  --input_bed "$TMPDIR/test.bed" \
+  --input_fasta "$TMPDIR/test.fa" \
+  --name_only \
+  --tab \
+  --output "$TMPDIR/tab.out"
+
+cmp --silent "$TMPDIR/expected_tab.out" "$TMPDIR/tab.out" || { echo "Files when using --tab are different."; exit 1; }
+
+
+# Create expected tab-delimited file for --bed_out
+cat > "$TMPDIR/expected.bed" <<EOF
+chr1${TAB}5${TAB}10${TAB}myseq${TAB}AAACC
+EOF
+
+"$meta_executable" \
+  --input_bed "$TMPDIR/test.bed" \
+  --input_fasta "$TMPDIR/test.fa" \
+  --bed_out \
+  --output "$TMPDIR/output.bed"
+
+
+cmp --silent "$TMPDIR/expected.bed" "$TMPDIR/output.bed" || { echo "Files when using --bed_out are different."; exit 1; }
+
+# Create dummy bed file for strandedness
+cat > "$TMPDIR/test_strandedness.bed" <<EOF
+chr1${TAB}20${TAB}25${TAB}forward${TAB}1${TAB}+
+chr1${TAB}20${TAB}25${TAB}reverse${TAB}1${TAB}-
+EOF
+
+# Create expected tab-delimited file for --bed_out
+cat > "$TMPDIR/expected_strandedness.fasta" <<EOF
+>forward(+)
+CGCTA
+>reverse(-)
+TAGCG
+EOF
+
+"$meta_executable" \
+  --input_bed "$TMPDIR/test_strandedness.bed" \
+  --input_fasta "$TMPDIR/test.fa" \
+  -s \
+  --name_only \
+  --output "$TMPDIR/output_strandedness.fasta"
+
+
+cmp --silent "$TMPDIR/expected_strandedness.fasta" "$TMPDIR/output_strandedness.fasta" || { echo "Files when using -s are different."; exit 1; }
+
diff --git a/src/bgzip/config.vsh.yaml b/src/bgzip/config.vsh.yaml
deleted file mode 100644
index 26e31ae4..00000000
--- a/src/bgzip/config.vsh.yaml
+++ /dev/null
@@ -1,128 +0,0 @@
-name: bgzip
-description: Block compression/decompression utility
-links:
-  homepage: https://www.htslib.org/
-  documentation: https://www.htslib.org/doc/bgzip.html
-  repository: https://github.com/samtools/htslib
-references:
-  doi: 10.1093/gigascience/giab007
-license: MIT
-requirements:
-  commands: [ bgzip ]
-argument_groups:
-  - name: Inputs
-    arguments:
-    - name: --input
-      type: file
-      direction: input
-      description: file to be compressed or decompressed
-      required: true
-  - name: Outputs
-    arguments:
-    - name: --output
-      type: file
-      direction: output
-      description: compressed or decompressed output
-      required: true
-    - name: --index_name
-      alternatives: -I
-      type: file
-      direction: output
-      description: name of BGZF index file [file.gz.gzi]
-  - name: Arguments
-    arguments:
-    - name: --offset
-      alternatives: -b
-      type: integer
-      description: decompress at virtual file pointer (0-based uncompressed offset)
-    - name: --decompress
-      alternatives: -d
-      type: boolean_true
-      description: decompress the input file
-    - name: --rebgzip
-      alternatives: -g
-      type: boolean_true
-      description: use an index file to bgzip a file
-    - name: --index
-      alternatives: -i
-      type: boolean_true
-      description: compress and create BGZF index
-    - name: --compress_level
-      alternatives: -l
-      type: integer
-      description: compression level to use when compressing; 0 to 9, or -1 for default [-1]
-      min: -1
-      max: 9
-    - name: --reindex
-      alternatives: -r
-      type: boolean_true
-      description: (re)index the output file
-    - name: --size
-      alternatives: -s
-      type: integer
-      description: decompress INT bytes (uncompressed size)
-      min: 0
-    - name: --test
-      alternatives: -t
-      type: boolean_true
-      description: test integrity of compressed file
-    - name: --binary
-      type: boolean_true
-      description: Don't align blocks with text lines
-resources:
-  - type: bash_script
-    text: |
-      [[ "$par_decompress" == "false" ]] && unset par_decompress
-      [[ "$par_rebgzip" == "false" ]] && unset par_rebgzip
-      [[ "$par_index" == "false" ]] && unset par_index
-      [[ "$par_reindex" == "false" ]] && unset par_reindex
-      [[ "$par_test" == "false" ]] && unset par_test
-      [[ "$par_binary" == "false" ]] && unset par_binary
-      bgzip -c \
-        ${meta_cpus:+--threads "${meta_cpus}"} \
-        ${par_offset:+-b "${par_offset}"} \
-        ${par_decompress:+-d} \
-        ${par_rebgzip:+-g} \
-        ${par_index:+-i} \
-        ${par_index_name:+-I "${par_index_name}"} \
-        ${par_compress_level:+-l "${par_compress_level}"} \
-        ${par_reindex:+-r} \
-        ${par_size:+-s "${par_size}"} \
-        ${par_test:+-t} \
-        ${par_binary:+--binary} \
-        "$par_input" > "$par_output"
-test_resources:
-  - type: bash_script
-    text: |
-      set -e
-
-      "$meta_executable" --input "$meta_resources_dir/test_data/test.vcf" --output "test.vcf.gz"
-
-      echo ">> Checking output of compressing"
-      [ ! -f "test.vcf.gz" ] && echo "Output file test.vcf.gz does not exist" && exit 1
-
-      "$meta_executable" --input "test.vcf.gz" --output "test.vcf" --decompress
-
-      echo ">> Checking output of decompressing"
-      [ ! -f "test.vcf" ] && echo "Output file test.vcf does not exist" && exit 1
-
-      echo ">> Checking original and decompressed files are the same"
-      set +e
-      cmp --silent -- "$meta_resources_dir/test_data/test.vcf" "test.vcf"
-      [ $? -ne 0 ] && echo "files are different" && exit 1
-      set -e
-      
-      echo "> Test successful"
-  - type: file
-    path: test_data
-
-engines:
-  - type: docker
-    image: quay.io/biocontainers/htslib:1.19--h81da01d_0
-    setup:
-      - type: docker
-        run: |
-          bgzip -h | grep 'Version:' 2>&1 |  sed 's/Version:\s\(.*\)/bgzip: "\1"/' > /var/software_versions.txt
-runners:
-  - type: executable
-  - type: nextflow
\ No newline at end of file
diff --git a/src/bgzip/help.txt b/src/bgzip/help.txt
deleted file mode 100644
index d4012efd..00000000
--- a/src/bgzip/help.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-```bash
-bgzip -h
-```
-
-Version: 1.19
-Usage:   bgzip [OPTIONS] [FILE] ...
-Options:
-   -b, --offset INT           decompress at virtual file pointer (0-based uncompressed offset)
-   -c, --stdout               write on standard output, keep original files unchanged
-   -d, --decompress           decompress
-   -f, --force                overwrite files without asking
-   -g, --rebgzip              use an index file to bgzip a file
-   -h, --help                 give this help
-   -i, --index                compress and create BGZF index
-   -I, --index-name FILE      name of BGZF index file [file.gz.gzi]
-   -k, --keep                 don't delete input files during operation
-   -l, --compress-level INT   Compression level to use when compressing; 0 to 9, or -1 for default [-1]
-   -r, --reindex              (re)index compressed file
-   -s, --size INT             decompress INT bytes (uncompressed size)
-   -t, --test                 test integrity of compressed file
-       --binary               Don't align blocks with text lines
-   -@, --threads INT          number of compression threads to use [1]
diff --git a/src/bgzip/test_data/script.sh b/src/bgzip/test_data/script.sh
deleted file mode 100644
index c9114473..00000000
--- a/src/bgzip/test_data/script.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-# bgzip test data
-
-# Test data was obtained from https://github.com/snakemake/snakemake-wrappers/tree/master/bio/bgzip/test.
-
-if [ ! -d /tmp/snakemake-wrappers ]; then
-  git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers
-fi
-
-cp -r /tmp/snakemake-wrappers/bio/bgzip/test/* src/bgzip/test_data
-
diff --git a/src/bgzip/test_data/test.vcf b/src/bgzip/test_data/test.vcf
deleted file mode 100644
index 11b5400e..00000000
--- a/src/bgzip/test_data/test.vcf
+++ /dev/null
@@ -1,23 +0,0 @@
-##fileformat=VCFv4.0
-##fileDate=20090805
-##source=https://www.internationalgenome.org/wiki/Analysis/vcf4.0/
-##reference=1000GenomesPilot-NCBI36
-##phasing=partial
-##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
-##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
-##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
-##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
-##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
-##FILTER=<ID=q10,Description="Quality below 10">
-##FILTER=<ID=s50,Description="Less than 50% of samples have data">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
-#CHROM POS     ID        REF ALT    QUAL FILTER INFO                              FORMAT      NA00001        NA00002        NA00003
-20     14370   rs6054257 G      A       29   PASS   NS=3;DP=14;AF=0.5;DB;H2           GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
-20     17330   .         T      A       3    q10    NS=3;DP=11;AF=0.017               GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3   0/0:41:3
-20     1110696 rs6040355 A      G,T     67   PASS   NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2   2/2:35:4
-20     1230237 .         T      .       47   PASS   NS=3;DP=13;AA=T                   GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2
-20     1234567 microsat1 GTCT   G,GTACT 50   PASS   NS=3;DP=9;AA=G                    GT:GQ:DP    0/1:35:4       0/2:17:2       1/1:40:3
diff --git a/src/cutadapt/config.vsh.yaml b/src/cutadapt/config.vsh.yaml
new file mode 100644
index 00000000..a62f0aa9
--- /dev/null
+++ b/src/cutadapt/config.vsh.yaml
@@ -0,0 +1,463 @@
+name: cutadapt
+description: |
+  Cutadapt removes adapter sequences from high-throughput sequencing reads.
+keywords: [RNA-seq, scRNA-seq, high-throughput]
+links:
+  homepage: https://cutadapt.readthedocs.io
+  documentation: https://cutadapt.readthedocs.io
+  repository: https://github.com/marcelm/cutadapt
+references:
+  doi: 10.14806/ej.17.1.200
+license: MIT
+argument_groups:
+  ####################################################################
+  - name: Specify Adapters for R1
+    arguments:
+      - name: --adapter
+        alternatives: [-a]
+        type: string
+        multiple: true
+        description: |
+          Sequence of an adapter ligated to the 3' end (paired data:
+          of the first read). The adapter and subsequent bases are
+          trimmed. If a '$' character is appended ('anchoring'), the
+          adapter is only found if it is a suffix of the read.
+        required: false
+      - name: --front
+        alternatives: [-g]
+        type: string
+        multiple: true
+        description: |
+          Sequence of an adapter ligated to the 5' end (paired data:
+          of the first read). The adapter and any preceding bases
+          are trimmed. Partial matches at the 5' end are allowed. If
+          a '^' character is prepended ('anchoring'), the adapter is
+          only found if it is a prefix of the read.
+        required: false
+      - name: --anywhere
+        alternatives: [-b]
+        type: string
+        multiple: true
+        description: |
+          Sequence of an adapter that may be ligated to the 5' or 3'
+          end (paired data: of the first read). Both types of
+          matches as described under -a and -g are allowed. If the
+          first base of the read is part of the match, the behavior
+          is as with -g, otherwise as with -a. This option is mostly
+          for rescuing failed library preparations - do not use if
+          you know which end your adapter was ligated to!
+        required: false
+
+  ####################################################################
+  - name: Specify Adapters using Fasta files for R1
+    arguments:
+      - name: --adapter_fasta
+        type: file
+        multiple: true
+        description: |
+          Fasta file containing sequences of an adapter ligated to the 3' end (paired data:
+          of the first read). The adapter and subsequent bases are
+          trimmed. If a '$' character is appended ('anchoring'), the
+          adapter is only found if it is a suffix of the read.
+        required: false
+      - name: --front_fasta
+        type: file
+        description: |
+          Fasta file containing sequences of an adapter ligated to the 5' end (paired data:
+          of the first read). The adapter and any preceding bases
+          are trimmed. Partial matches at the 5' end are allowed. If
+          a '^' character is prepended ('anchoring'), the adapter is
+          only found if it is a prefix of the read.
+        required: false
+      - name: --anywhere_fasta
+        type: file
+        description: |
+          Fasta file containing sequences of an adapter that may be ligated to the 5' or 3'
+          end (paired data: of the first read). Both types of
+          matches as described under -a and -g are allowed. If the
+          first base of the read is part of the match, the behavior
+          is as with -g, otherwise as with -a. This option is mostly
+          for rescuing failed library preparations - do not use if
+          you know which end your adapter was ligated to!
+        required: false
+
+  ####################################################################
+  - name: Specify Adapters for R2
+    arguments:
+      - name: --adapter_r2
+        alternatives: [-A]
+        type: string
+        multiple: true
+        description: |
+          Sequence of an adapter ligated to the 3' end (paired data:
+          of the first read). The adapter and subsequent bases are
+          trimmed. If a '$' character is appended ('anchoring'), the
+          adapter is only found if it is a suffix of the read.
+        required: false
+      - name: --front_r2
+        alternatives: [-G]
+        type: string
+        multiple: true
+        description: |
+          Sequence of an adapter ligated to the 5' end (paired data:
+          of the first read). The adapter and any preceding bases
+          are trimmed. Partial matches at the 5' end are allowed. If
+          a '^' character is prepended ('anchoring'), the adapter is
+          only found if it is a prefix of the read.
+        required: false
+      - name: --anywhere_r2
+        alternatives: [-B]
+        type: string
+        multiple: true
+        description: |
+          Sequence of an adapter that may be ligated to the 5' or 3'
+          end (paired data: of the first read). Both types of
+          matches as described under -a and -g are allowed. If the
+          first base of the read is part of the match, the behavior
+          is as with -g, otherwise as with -a. This option is mostly
+          for rescuing failed library preparations - do not use if
+          you know which end your adapter was ligated to!
+        required: false
+
+  ####################################################################
+  - name: Specify Adapters using Fasta files for R2
+    arguments:
+      - name: --adapter_r2_fasta
+        type: file
+        description: |
+          Fasta file containing sequences of an adapter ligated to the 3' end (paired data:
+          of the first read). The adapter and subsequent bases are
+          trimmed. If a '$' character is appended ('anchoring'), the
+          adapter is only found if it is a suffix of the read.
+        required: false
+      - name: --front_r2_fasta
+        type: file
+        description: |
+          Fasta file containing sequences of an adapter ligated to the 5' end (paired data:
+          of the first read). The adapter and any preceding bases
+          are trimmed. Partial matches at the 5' end are allowed. If
+          a '^' character is prepended ('anchoring'), the adapter is
+          only found if it is a prefix of the read.
+        required: false
+      - name: --anywhere_r2_fasta
+        type: file
+        description: |
+          Fasta file containing sequences of an adapter that may be ligated to the 5' or 3'
+          end (paired data: of the first read). Both types of
+          matches as described under -a and -g are allowed. If the
+          first base of the read is part of the match, the behavior
+          is as with -g, otherwise as with -a. This option is mostly
+          for rescuing failed library preparations - do not use if
+          you know which end your adapter was ligated to!
+        required: false
+
+  ####################################################################
+  - name: Paired-end options
+    arguments:
+      - name: --pair_adapters
+        type: boolean_true
+        description: |
+          Treat adapters given with -a/-A etc. as pairs. Either both
+          or none are removed from each read pair.
+      - name: --pair_filter
+        type: string
+        choices: [any, both, first]
+        description: |
+          Which of the reads in a paired-end read have to match the
+          filtering criterion in order for the pair to be filtered.
+      - name: --interleaved
+        type: boolean_true
+        description: |
+          Read and/or write interleaved paired-end reads.
+
+  ####################################################################
+  - name: Input parameters
+    arguments:
+      - name: --input
+        type: file
+        required: true
+        description: |
+          Input fastq file for single-end reads or R1 for paired-end reads.
+      - name: --input_r2
+        type: file
+        required: false
+        description: |
+          Input fastq file for R2 in the case of paired-end reads.
+      - name: --error_rate
+        alternatives: [-E, --errors]
+        type: double
+        description: |
+          Maximum allowed error rate (if 0 <= E < 1), or absolute
+          number of errors for full-length adapter match (if E is an
+          integer >= 1). Error rate = no. of errors divided by
+          length of matching region. Default: 0.1 (10%).
+        example: 0.1
+      - name: --no_indels
+        type: boolean_false
+        description: |
+          Allow only mismatches in alignments.
+
+      - name: --times
+        type: integer
+        alternatives: [-n]
+        description: |
+          Remove up to COUNT adapters from each read. Default: 1.
+        example: 1
+      - name: --overlap
+        alternatives: [-O]
+        type: integer
+        description: |
+          Require MINLENGTH overlap between read and adapter for an
+          adapter to be found. The default is 3.
+        example: 3
+      - name: --match_read_wildcards
+        type: boolean_true
+        description: |
+          Interpret IUPAC wildcards in reads.
+      - name: --no_match_adapter_wildcards
+        type: boolean_false
+        description: |
+          Do not interpret IUPAC wildcards in adapters.
+      - name: --action
+        type: string
+        choices:
+          - trim
+          - retain
+          - mask
+          - lowercase
+          - none
+        description: |
+          What to do if a match was found. trim: trim adapter and
+          up- or downstream sequence; retain: trim, but retain
+          adapter; mask: replace with 'N' characters; lowercase:
+          convert to lowercase; none: leave unchanged.
+          The default is trim.
+        example: trim
+      - name: --revcomp
+        alternatives: [--rc]
+        type: boolean_true
+        description: |
+          Check both the read and its reverse complement for adapter
+          matches. If match is on reverse-complemented version,
+          output that one.
+
+  ####################################################################
+  - name: Read modifications
+    arguments:
+      - name: --cut
+        alternatives: [-u]
+        type: integer
+        multiple: true
+        description: |
+          Remove LEN bases from each read (or R1 if paired; use --cut_r2
+          option for R2). If LEN is positive, remove bases from the
+          beginning. If LEN is negative, remove bases from the end.
+          Can be used twice if LENs have different signs. Applied
+          *before* adapter trimming.
+      - name: --cut_r2
+        type: integer
+        multiple: true
+        description: |
+          Remove LEN bases from each read (for R2). If LEN is positive, remove bases from the
+          beginning. If LEN is negative, remove bases from the end.
+          Can be used twice if LENs have different signs. Applied
+          *before* adapter trimming.
+      - name: --nextseq_trim
+        type: string
+        description: |
+          NextSeq-specific quality trimming (each read). Trims also
+          dark cycles appearing as high-quality G bases.
+      - name: --quality_cutoff
+        alternatives: [-q]
+        type: string
+        description: |
+          Trim low-quality bases from 5' and/or 3' ends of each read
+          before adapter removal. Applied to both reads if data is
+          paired. If one value is given, only the 3' end is trimmed.
+          If two comma-separated cutoffs are given, the 5' end is
+          trimmed with the first cutoff, the 3' end with the second.
+      - name: --quality_cutoff_r2
+        alternatives: [-Q]
+        type: string
+        description: |
+          Quality-trimming cutoff for R2. Default: same as for R1
+      - name: --quality_base
+        type: integer
+        description: |
+          Assume that quality values in FASTQ are encoded as
+          ascii(quality + N). This needs to be set to 64 for some
+          old Illumina FASTQ files. The default is 33.
+        example: 33
+      - name: --poly_a
+        type: boolean_true
+        description: Trim poly-A tails
+      - name: --length
+        alternatives: [-l]
+        type: integer
+        description: |
+          Shorten reads to LENGTH. Positive values remove bases at
+          the end while negative ones remove bases at the beginning.
+          This and the following modifications are applied after
+          adapter trimming.
+      - name: --trim_n
+        type: boolean_true
+        description: Trim N's on ends of reads.
+      - name: --length_tag
+        type: string
+        description: |
+          Search for TAG followed by a decimal number in the
+          description field of the read. Replace the decimal number
+          with the correct length of the trimmed read. For example,
+          use --length-tag 'length=' to correct fields like
+          'length=123'.
+        example: "length="
+      - name: --strip_suffix
+        type: string
+        description: |
+          Remove this suffix from read names if present. Can be
+          given multiple times.
+      - name: --prefix
+        alternatives: [-x]
+        type: string
+        description: |
+          Add this prefix to read names. Use {name} to insert the
+          name of the matching adapter.
+      - name: --suffix
+        alternatives: [-y]
+        type: string
+        description: |
+          Add this suffix to read names; can also include {name}
+      - name: --rename
+        type: string
+        description: |
+          Rename reads using TEMPLATE containing variables such as
+          {id}, {adapter_name} etc. (see documentation)
+      - name: --zero_cap
+        alternatives: [-z]
+        type: boolean_true
+        description: Change negative quality values to zero.
+
+  ####################################################################
+  - name: Filtering of processed reads
+    description: |
+      Filters are applied after above read modifications. Paired-end reads are
+      always discarded pairwise (see also --pair_filter).
+    arguments:
+      - name: --minimum_length
+        alternatives: [-m]
+        type: string
+        description: |
+          Discard reads shorter than LEN. Default is 0.
+          When trimming paired-end reads, the minimum lengths for R1 and R2 can be specified separately by separating them with a colon (:).
+          If the colon syntax is not used, the same minimum length applies to both reads, as discussed above.
+          Also, one of the values can be omitted to impose no restrictions.
+          For example, with -m 17:, the length of R1 must be at least 17, but the length of R2 is ignored.
+        example: "0"
+      - name: --maximum_length
+        alternatives: [-M]
+        type: string
+        description: |
+          Discard reads longer than LEN. Default: no limit.
+          For paired reads, see the remark for --minimum_length
+      - name: --max_n
+        type: string
+        description: |
+          Discard reads with more than COUNT 'N' bases. If COUNT is
+          a number between 0 and 1, it is interpreted as a fraction
+          of the read length.
+      - name: --max_expected_errors
+        alternatives: [--max_ee]
+        type: long
+        description: |
+          Discard reads whose expected number of errors (computed
+          from quality values) exceeds ERRORS.
+      - name: --max_average_error_rate
+        alternatives: [--max_aer]
+        type: long
+        description: |
+          as --max_expected_errors (see above), but divided by
+          length to account for reads of varying length.
+      - name: --discard_trimmed
+        alternatives: [--discard]
+        type: boolean_true
+        description: |
+          Discard reads that contain an adapter. Use also -O to
+          avoid discarding too many randomly matching reads.
+      - name: --discard_untrimmed
+        alternatives: [--trimmed_only]
+        type: boolean_true
+        description: |
+          Discard reads that do not contain an adapter.
+      - name: --discard_casava
+        type: boolean_true
+        description: |
+          Discard reads that did not pass CASAVA filtering (header
+          has :Y:).
+
+  ####################################################################
+  - name: Output parameters
+    arguments:
+      - name: --report
+        type: string
+        choices: [full, minimal]
+        description: |
+          Which type of report to print: 'full' (default) or 'minimal'.
+        example: full
+      - name: --json
+        type: boolean_true
+        description: |
+          Write report in JSON format to this file.
+      - name: --output
+        type: file
+        description: |
+          Glob pattern for matching the expected output files.
+          Should include `$output_dir`.
+        example: "fastq/*_001.fast[a,q]"
+        direction: output
+        required: true
+        must_exist: true
+        multiple: true
+      - name: --fasta
+        type: boolean_true
+        description: |
+          Output FASTA to standard output even on FASTQ input.
+      - name: --info_file
+        type: boolean_true
+        description: |
+          Write information about each read and its adapter matches
+          into info.txt in the output directory.
+          See the documentation for the file format.
+      # - name: -Z
+      # - name: --rest_file
+      # - name: --wildcard-file
+      # - name: --too_short_output
+      # - name: --too_long_output
+      # - name: --untrimmed_output
+      # - name: --untrimmed_paired_output
+      # - name: too_short_paired_output
+      # - name: too_long_paired_output
+  - name: Debug
+    arguments:
+      - type: boolean_true
+        name: --debug
+        description: Print debug information
+resources:
+  - type: bash_script
+    path: script.sh
+test_resources:
+  - type: bash_script
+    path: test.sh
+
+engines:
+  - type: docker
+    image: python:3.12
+    setup:
+      - type: python
+        pip:
+          - cutadapt
+      - type: docker
+        run: |
+          cutadapt --version | sed 's/\(.*\)/cutadapt: "\1"/' > /var/software_versions.txt
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/cutadapt/help.txt b/src/cutadapt/help.txt
new file mode 100644
index 00000000..2280c3e2
--- /dev/null
+++ b/src/cutadapt/help.txt
@@ -0,0 +1,218 @@
+cutadapt version 4.6
+
+Copyright (C) 2010 Marcel Martin <marcel.martin@scilifelab.se> and contributors
+
+Cutadapt removes adapter sequences from high-throughput sequencing reads.
+
+Usage:
+    cutadapt -a ADAPTER [options] [-o output.fastq] input.fastq
+
+For paired-end reads:
+    cutadapt -a ADAPT1 -A ADAPT2 [options] -o out1.fastq -p out2.fastq in1.fastq in2.fastq
+
+Replace "ADAPTER" with the actual sequence of your 3' adapter. IUPAC wildcard
+characters are supported. All reads from input.fastq will be written to
+output.fastq with the adapter sequence removed. Adapter matching is
+error-tolerant. Multiple adapter sequences can be given (use further -a
+options), but only the best-matching adapter will be removed.
+
+Input may also be in FASTA format. Compressed input and output is supported and
+auto-detected from the file name (.gz, .xz, .bz2). Use the file name '-' for
+standard input/output. Without the -o option, output is sent to standard output.
+
+Citation:
+
+Marcel Martin. Cutadapt removes adapter sequences from high-throughput
+sequencing reads. EMBnet.Journal, 17(1):10-12, May 2011.
+http://dx.doi.org/10.14806/ej.17.1.200
+
+Run "cutadapt --help" to see all command-line options.
+See https://cutadapt.readthedocs.io/ for full documentation.
+
+Options:
+  -h, --help            Show this help message and exit
+  --version             Show version number and exit
+  --debug               Print debug log. Use twice to also print DP matrices
+  -j CORES, --cores CORES
+                        Number of CPU cores to use. Use 0 to auto-detect. Default:
+                        1
+
+Finding adapters:
+  Parameters -a, -g, -b specify adapters to be removed from each read (or from
+  R1 if data is paired-end. If specified multiple times, only the best matching
+  adapter is trimmed (but see the --times option). Use notation 'file:FILE' to
+  read adapter sequences from a FASTA file.
+
+  -a ADAPTER, --adapter ADAPTER
+                        Sequence of an adapter ligated to the 3' end (paired data:
+                        of the first read). The adapter and subsequent bases are
+                        trimmed. If a '$' character is appended ('anchoring'), the
+                        adapter is only found if it is a suffix of the read.
+  -g ADAPTER, --front ADAPTER
+                        Sequence of an adapter ligated to the 5' end (paired data:
+                        of the first read). The adapter and any preceding bases
+                        are trimmed. Partial matches at the 5' end are allowed. If
+                        a '^' character is prepended ('anchoring'), the adapter is
+                        only found if it is a prefix of the read.
+  -b ADAPTER, --anywhere ADAPTER
+                        Sequence of an adapter that may be ligated to the 5' or 3'
+                        end (paired data: of the first read). Both types of
+                        matches as described under -a and -g are allowed. If the
+                        first base of the read is part of the match, the behavior
+                        is as with -g, otherwise as with -a. This option is mostly
+                        for rescuing failed library preparations - do not use if
+                        you know which end your adapter was ligated to!
+  -e E, --error-rate E, --errors E
+                        Maximum allowed error rate (if 0 <= E < 1), or absolute
+                        number of errors for full-length adapter match (if E is an
+                        integer >= 1). Error rate = no. of errors divided by
+                        length of matching region. Default: 0.1 (10%)
+  --no-indels           Allow only mismatches in alignments. Default: allow both
+                        mismatches and indels
+  -n COUNT, --times COUNT
+                        Remove up to COUNT adapters from each read. Default: 1
+  -O MINLENGTH, --overlap MINLENGTH
+                        Require MINLENGTH overlap between read and adapter for an
+                        adapter to be found. Default: 3
+  --match-read-wildcards
+                        Interpret IUPAC wildcards in reads. Default: False
+  -N, --no-match-adapter-wildcards
+                        Do not interpret IUPAC wildcards in adapters.
+  --action {trim,retain,mask,lowercase,none}
+                        What to do if a match was found. trim: trim adapter and
+                        up- or downstream sequence; retain: trim, but retain
+                        adapter; mask: replace with 'N' characters; lowercase:
+                        convert to lowercase; none: leave unchanged. Default: trim
+  --rc, --revcomp       Check both the read and its reverse complement for adapter
+                        matches. If match is on reverse-complemented version,
+                        output that one. Default: check only read
+
+Additional read modifications:
+  -u LEN, --cut LEN     Remove LEN bases from each read (or R1 if paired; use -U
+                        option for R2). If LEN is positive, remove bases from the
+                        beginning. If LEN is negative, remove bases from the end.
+                        Can be used twice if LENs have different signs. Applied
+                        *before* adapter trimming.
+  --nextseq-trim 3'CUTOFF
+                        NextSeq-specific quality trimming (each read). Trims also
+                        dark cycles appearing as high-quality G bases.
+  -q [5'CUTOFF,]3'CUTOFF, --quality-cutoff [5'CUTOFF,]3'CUTOFF
+                        Trim low-quality bases from 5' and/or 3' ends of each read
+                        before adapter removal. Applied to both reads if data is
+                        paired. If one value is given, only the 3' end is trimmed.
+                        If two comma-separated cutoffs are given, the 5' end is
+                        trimmed with the first cutoff, the 3' end with the second.
+  --quality-base N      Assume that quality values in FASTQ are encoded as
+                        ascii(quality + N). This needs to be set to 64 for some
+                        old Illumina FASTQ files. Default: 33
+  --poly-a              Trim poly-A tails
+  --length LENGTH, -l LENGTH
+                        Shorten reads to LENGTH. Positive values remove bases at
+                        the end while negative ones remove bases at the beginning.
+                        This and the following modifications are applied after
+                        adapter trimming.
+  --trim-n              Trim N's on ends of reads.
+  --length-tag TAG      Search for TAG followed by a decimal number in the
+                        description field of the read. Replace the decimal number
+                        with the correct length of the trimmed read. For example,
+                        use --length-tag 'length=' to correct fields like
+                        'length=123'.
+  --strip-suffix STRIP_SUFFIX
+                        Remove this suffix from read names if present. Can be
+                        given multiple times.
+  -x PREFIX, --prefix PREFIX
+                        Add this prefix to read names. Use {name} to insert the
+                        name of the matching adapter.
+  -y SUFFIX, --suffix SUFFIX
+                        Add this suffix to read names; can also include {name}
+  --rename TEMPLATE     Rename reads using TEMPLATE containing variables such as
+                        {id}, {adapter_name} etc. (see documentation)
+  --zero-cap, -z        Change negative quality values to zero.
+
+Filtering of processed reads:
+  Filters are applied after above read modifications. Paired-end reads are
+  always discarded pairwise (see also --pair-filter).
+
+  -m LEN[:LEN2], --minimum-length LEN[:LEN2]
+                        Discard reads shorter than LEN. Default: 0
+  -M LEN[:LEN2], --maximum-length LEN[:LEN2]
+                        Discard reads longer than LEN. Default: no limit
+  --max-n COUNT         Discard reads with more than COUNT 'N' bases. If COUNT is
+                        a number between 0 and 1, it is interpreted as a fraction
+                        of the read length.
+  --max-expected-errors ERRORS, --max-ee ERRORS
+                        Discard reads whose expected number of errors (computed
+                        from quality values) exceeds ERRORS.
+  --max-average-error-rate ERROR_RATE, --max-aer ERROR_RATE
+                        as --max-expected-errors (see above), but divided by
+                        length to account for reads of varying length.
+  --discard-trimmed, --discard
+                        Discard reads that contain an adapter. Use also -O to
+                        avoid discarding too many randomly matching reads.
+  --discard-untrimmed, --trimmed-only
+                        Discard reads that do not contain an adapter.
+  --discard-casava      Discard reads that did not pass CASAVA filtering (header
+                        has :Y:).
+
+Output:
+  --quiet               Print only error messages.
+  --report {full,minimal}
+                        Which type of report to print: 'full' or 'minimal'.
+                        Default: full
+  --json FILE           Dump report in JSON format to FILE
+  -o FILE, --output FILE
+                        Write trimmed reads to FILE. FASTQ or FASTA format is
+                        chosen depending on input. Summary report is sent to
+                        standard output. Use '{name}' for demultiplexing (see
+                        docs). Default: write to standard output
+  --fasta               Output FASTA to standard output even on FASTQ input.
+  -Z                    Use compression level 1 for gzipped output files (faster,
+                        but uses more space)
+  --info-file FILE      Write information about each read and its adapter matches
+                        into FILE. See the documentation for the file format.
+  -r FILE, --rest-file FILE
+                        When the adapter matches in the middle of a read, write
+                        the rest (after the adapter) to FILE.
+  --wildcard-file FILE  When the adapter has N wildcard bases, write adapter bases
+                        matching wildcard positions to FILE. (Inaccurate with
+                        indels.)
+  --too-short-output FILE
+                        Write reads that are too short (according to length
+                        specified by -m) to FILE. Default: discard reads
+  --too-long-output FILE
+                        Write reads that are too long (according to length
+                        specified by -M) to FILE. Default: discard reads
+  --untrimmed-output FILE
+                        Write reads that do not contain any adapter to FILE.
+                        Default: output to same file as trimmed reads
+
+Paired-end options:
+  The -A/-G/-B/-U/-Q options work like their lowercase counterparts, but are
+  applied to R2 (second read in pair)
+
+  -A ADAPTER            3' adapter to be removed from R2
+  -G ADAPTER            5' adapter to be removed from R2
+  -B ADAPTER            5'/3 adapter to be removed from R2
+  -U LENGTH             Remove LENGTH bases from R2
+  -Q [5'CUTOFF,]3'CUTOFF
+                        Quality-trimming cutoff for R2. Default: same as for R1
+  -p FILE, --paired-output FILE
+                        Write R2 to FILE.
+  --pair-adapters       Treat adapters given with -a/-A etc. as pairs. Either both
+                        or none are removed from each read pair.
+  --pair-filter {any,both,first}
+                        Which of the reads in a paired-end read have to match the
+                        filtering criterion in order for the pair to be filtered.
+                        Default: any
+  --interleaved         Read and/or write interleaved paired-end reads.
+  --untrimmed-paired-output FILE
+                        Write second read in a pair to this FILE when no adapter
+                        was found. Use with --untrimmed-output. Default: output to
+                        same file as trimmed reads
+  --too-short-paired-output FILE
+                        Write second read in a pair to this file if pair is too
+                        short.
+  --too-long-paired-output FILE
+                        Write second read in a pair to this file if pair is too
+                        long.
+
diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh
new file mode 100644
index 00000000..5e1f9e30
--- /dev/null
+++ b/src/cutadapt/script.sh
@@ -0,0 +1,237 @@
+#!/bin/bash
+
+## VIASH START
+par_adapter='AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC;GGATCGGAAGAGCACACGTCTGAACTCCAGTCAC'
+par_input='src/cutadapt/test_data/se/a.fastq'
+par_report='full'
+par_json='false'
+par_fasta='false'
+par_info_file='false'
+par_debug='true'
+## VIASH END
+
+function debug {
+  [[ "$par_debug" == "true" ]] && echo "DEBUG: $@"
+}
+
+output_dir=$(dirname $par_output)
+[[ ! -d $output_dir ]] && mkdir -p $output_dir
+
+# Init
+###########################################################
+
+echo ">> Paired-end data or not?"
+
+mode=""
+if [[ -z $par_input_r2 ]]; then
+  mode="se"
+  echo "  Single end"
+  input="$par_input"
+else
+  echo "  Paired end"
+  mode="pe"
+  input="$par_input $par_input_r2"
+fi
+
+# Adapter arguments
+#   - paired and single-end
+#   - string and fasta
+###########################################################
+
+function add_flags {
+  local arg=$1
+  local flag=$2
+  local prefix=$3
+  [[ -z $prefix ]] && prefix=""
+
+  # This function should not be called if the input is empty
+  # but check for it just in case
+  if [[ -z $arg ]]; then
+    return
+  fi
+
+  local output=""
+  IFS=';' read -r -a array <<< "$arg"
+  for a in "${array[@]}"; do
+    output="$output $flag $prefix$a"
+  done
+  echo $output
+}
+
+debug ">> Parsing arguments dealing with adapters"
+adapter_args=$(echo \
+  ${par_adapter:+$(add_flags "$par_adapter" "--adapter")} \
+  ${par_adapter_fasta:+$(add_flags "$par_adapter_fasta" "--adapter" "file:")} \
+  ${par_front:+$(add_flags "$par_front" "--front")} \
+  ${par_front_fasta:+$(add_flags "$par_front_fasta" "--front" "file:")} \
+  ${par_anywhere:+$(add_flags "$par_anywhere" "--anywhere")} \
+  ${par_anywhere_fasta:+$(add_flags "$par_anywhere_fasta" "--anywhere" "file:")} \
+  ${par_adapter_r2:+$(add_flags "$par_adapter_r2" "-A")} \
+  ${par_adapter_fasta_r2:+$(add_flags "$par_adapter_fasta_r2" "-A" "file:")} \
+  ${par_front_r2:+$(add_flags "$par_front_r2" "-G")} \
+  ${par_front_fasta_r2:+$(add_flags "$par_front_fasta_r2" "-G" "file:")} \
+  ${par_anywhere_r2:+$(add_flags "$par_anywhere_r2" "-B")} \
+  ${par_anywhere_fasta_r2:+$(add_flags "$par_anywhere_fasta_r2" "-B" "file:")} \
+)
+
+debug "Arguments to cutadapt:"
+debug "$adapter_args"
+debug
+
+# Paired-end options
+###########################################################
+echo ">> Parsing arguments for paired-end reads"
+[[ "$par_pair_adapters" == "false" ]] && unset par_pair_adapters
+[[ "$par_interleaved" == "false" ]] && unset par_interleaved
+
+paired_args=$(echo \
+  ${par_pair_adapters:+--pair-adapters} \
+  ${par_pair_filter:+--pair-filter "${par_pair_filter}"} \
+  ${par_interleaved:+--interleaved}
+)
+debug "Arguments to cutadapt:"
+debug $paired_args
+debug
+
+# Input arguments 
+###########################################################
+echo ">> Parsing input arguments"
+[[ "$par_no_indels" == "true" ]] && unset par_no_indels
+[[ "$par_match_read_wildcards" == "false" ]] && unset par_match_read_wildcards
+[[ "$par_no_match_adapter_wildcards" == "true" ]] && unset par_no_match_adapter_wildcards
+[[ "$par_revcomp" == "false" ]] && unset par_revcomp
+
+input_args=$(echo \
+  ${par_error_rate:+--error-rate "${par_error_rate}"} \
+  ${par_no_indels:+--no-indels} \
+  ${par_times:+--times "${par_times}"} \
+  ${par_overlap:+--overlap "${par_overlap}"} \
+  ${par_match_read_wildcards:+--match-read-wildcards} \
+  ${par_no_match_adapter_wildcards:+--no-match-adapter-wildcards} \
+  ${par_action:+--action "${par_action}"} \
+  ${par_revcomp:+--revcomp} \
+)
+debug "Arguments to cutadapt:"
+debug $input_args
+debug
+
+# Read modifications
+###########################################################
+echo ">> Parsing read modification arguments"
+[[ "$par_poly_a" == "false" ]] && unset par_poly_a
+[[ "$par_trim_n" == "false" ]] && unset par_trim_n
+[[ "$par_zero_cap" == "false" ]] && unset par_zero_cap
+
+mod_args=$(echo \
+  ${par_cut:+--cut "${par_cut}"} \
+  ${par_cut_r2:+--cut_r2 "${par_cut_r2}"} \
+  ${par_nextseq_trim:+--nextseq-trim "${par_nextseq_trim}"} \
+  ${par_quality_cutoff:+--quality-cutoff "${par_quality_cutoff}"} \
+  ${par_quality_cutoff_r2:+--quality-cutoff_r2 "${par_quality_cutoff_r2}"} \
+  ${par_quality_base:+--quality-base "${par_quality_base}"} \
+  ${par_poly_a:+--poly-a} \
+  ${par_length:+--length "${par_length}"} \
+  ${par_trim_n:+--trim-n} \
+  ${par_length_tag:+--length-tag "${par_length_tag}"} \
+  ${par_strip_suffix:+--strip-suffix "${par_strip_suffix}"} \
+  ${par_prefix:+--prefix "${par_prefix}"} \
+  ${par_suffix:+--suffix "${par_suffix}"} \
+  ${par_rename:+--rename "${par_rename}"} \
+  ${par_zero_cap:+--zero-cap} \
+)
+debug "Arguments to cutadapt:"
+debug $mod_args
+debug
+
+# Filtering of processed reads arguments
+###########################################################
+echo ">> Filtering of processed reads arguments"
+[[ "$par_discard_trimmed" == "false" ]] && unset par_discard_trimmed
+[[ "$par_discard_untrimmed" == "false" ]] && unset par_discard_untrimmed
+[[ "$par_discard_casava" == "false" ]] && unset par_discard_casava
+
+# Parse and transform the minimum and maximum length arguments
+[[ -z $par_minimum_length   ]]
+
+filter_args=$(echo \
+  ${par_minimum_length:+--minimum-length "${par_minimum_length}"} \
+  ${par_maximum_length:+--maximum-length "${par_maximum_length}"} \
+  ${par_max_n:+--max-n "${par_max_n}"} \
+  ${par_max_expected_errors:+--max-expected-errors "${par_max_expected_errors}"} \
+  ${par_max_average_error_rate:+--max-average-error-rate "${par_max_average_error_rate}"} \
+  ${par_discard_trimmed:+--discard-trimmed} \
+  ${par_discard_untrimmed:+--discard-untrimmed} \
+  ${par_discard_casava:+--discard-casava} \
+)
+debug "Arguments to cutadapt:"
+debug $filter_args
+debug
+
+# Optional output arguments
+###########################################################
+echo ">> Optional arguments"
+[[ "$par_json" == "false" ]] && unset par_json
+[[ "$par_fasta" == "false" ]] && unset par_fasta
+[[ "$par_info_file" == "false" ]] && unset par_info_file
+
+optional_output_args=$(echo \
+  ${par_report:+--report "${par_report}"} \
+  ${par_json:+--json "report.json"} \
+  ${par_fasta:+--fasta} \
+  ${par_info_file:+--info-file "info.txt"} \
+)
+
+debug "Arguments to cutadapt:"
+debug $optional_output_args
+debug
+
+# Output arguments
+# We write the output to a directory rather than
+# individual files.
+###########################################################
+
+if [[ -z $par_fasta ]]; then
+  ext="fastq"
+else
+  ext="fasta"
+fi
+
+if [ $mode = "se" ]; then
+  output_args=$(echo \
+    --output "$output_dir/{name}_001.$ext" \
+  )
+else
+  output_args=$(echo \
+    --output "$output_dir/{name}_R1_001.$ext" \
+    --paired-output "$output_dir/{name}_R2_001.$ext" \
+  )
+fi
+
+debug "Arguments to cutadapt:"
+debug $output_args
+debug
+
+# Full CLI
+# Set the --cores argument to 0 unless meta_cpus is set
+###########################################################
+echo ">> Running cutadapt"
+par_cpus=0
+[[ ! -z $meta_cpus ]] && par_cpus=$meta_cpus
+
+cli=$(echo \
+  $input \
+  $adapter_args \
+  $paired_args \
+  $input_args \
+  $mod_args \
+  $filter_args \
+  $optional_output_args \
+  $output_args \
+  --cores $par_cpus
+)
+
+debug ">> Full CLI to be run:"
+debug cutadapt $cli | sed -e 's/--/\r\n  --/g'
+debug
+
+cutadapt $cli
diff --git a/src/cutadapt/test.sh b/src/cutadapt/test.sh
new file mode 100644
index 00000000..eff997d7
--- /dev/null
+++ b/src/cutadapt/test.sh
@@ -0,0 +1,256 @@
+#!/bin/bash
+
+set -e
+
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || (echo "File '$1' does not exist" && exit 1)
+}
+assert_file_doesnt_exist() {
+  [ ! -f "$1" ] || (echo "File '$1' exists but shouldn't" && exit 1)
+}
+assert_file_empty() {
+  [ ! -s "$1" ] || (echo "File '$1' is not empty but should be" && exit 1)
+}
+assert_file_not_empty() {
+  [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1)
+}
+assert_file_contains() {
+  grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1)
+}
+assert_file_not_contains() {
+  grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1)
+}
+
+#############################################
+mkdir test_multiple_output
+cd test_multiple_output
+
+echo "#############################################"
+echo "> Run cutadapt with multiple outputs"
+
+cat > example.fa <<'EOF'
+>read1
+MYSEQUENCEADAPTER
+>read2
+MYSEQUENCEADAP
+>read3
+MYSEQUENCEADAPTERSOMETHINGELSE
+>read4
+MYSEQUENCEADABTER
+>read5
+MYSEQUENCEADAPTR
+>read6
+MYSEQUENCEADAPPTER
+>read7
+ADAPTERMYSEQUENCE
+>read8
+PTERMYSEQUENCE
+>read9
+SOMETHINGADAPTERMYSEQUENCE
+EOF
+
+"$meta_executable" \
+  --report minimal \
+  --output "out_test/*.fasta" \
+  --adapter ADAPTER \
+  --input example.fa \
+  --fasta \
+  --no_match_adapter_wildcards \
+  --json
+
+echo ">> Checking output"
+assert_file_exists "report.json"
+assert_file_exists "out_test/1_001.fasta"
+assert_file_exists "out_test/unknown_001.fasta"
+
+cd ..
+echo
+
+#############################################
+mkdir test_simple_single_end
+cd test_simple_single_end
+
+echo "#############################################"
+echo "> Run cutadapt on single-end data"
+
+cat > example.fa <<'EOF'
+>read1
+MYSEQUENCEADAPTER
+>read2
+MYSEQUENCEADAP
+>read3
+MYSEQUENCEADAPTERSOMETHINGELSE
+>read4
+MYSEQUENCEADABTER
+>read5
+MYSEQUENCEADAPTR
+>read6
+MYSEQUENCEADAPPTER
+>read7
+ADAPTERMYSEQUENCE
+>read8
+PTERMYSEQUENCE
+>read9
+SOMETHINGADAPTERMYSEQUENCE
+EOF
+
+"$meta_executable" \
+  --report minimal \
+  --output "out_test1/*.fasta" \
+  --adapter ADAPTER \
+  --input example.fa \
+  --fasta \
+  --no_match_adapter_wildcards \
+  --json
+
+echo ">> Checking output"
+assert_file_exists "report.json"
+assert_file_exists "out_test1/1_001.fasta"
+assert_file_exists "out_test1/unknown_001.fasta"
+
+echo ">> Check if output is empty"
+assert_file_not_empty "report.json"
+assert_file_not_empty "out_test1/1_001.fasta"
+assert_file_not_empty "out_test1/unknown_001.fasta"
+
+echo ">> Check contents"
+for i in 1 2 3 7 9; do
+  assert_file_contains "out_test1/1_001.fasta" ">read$i"
+done
+for i in 4 5 6 8; do
+  assert_file_contains "out_test1/unknown_001.fasta" ">read$i"
+done
+
+cd ..
+echo
+
+#############################################
+mkdir test_multiple_single_end
+cd test_multiple_single_end
+
+echo "#############################################"
+echo "> Run with a combination of inputs"
+
+cat > example.fa <<'EOF'
+>read1
+ACGTACGTACGTAAAAA
+>read2
+ACGTACGTACGTCCCCC
+>read3
+ACGTACGTACGTGGGGG
+>read4
+ACGTACGTACGTTTTTT
+EOF
+
+cat > adapters1.fasta <<'EOF'
+>adapter1
+CCCCC
+EOF
+
+cat > adapters2.fasta <<'EOF'
+>adapter2
+GGGGG
+EOF
+
+"$meta_executable" \
+  --report minimal \
+  --output "out_test2/*.fasta" \
+  --adapter AAAAA \
+  --adapter_fasta adapters1.fasta \
+  --adapter_fasta adapters2.fasta \
+  --input example.fa \
+  --fasta \
+  --json
+
+echo ">> Checking output"
+assert_file_exists "report.json"
+assert_file_exists "out_test2/1_001.fasta"
+assert_file_exists "out_test2/adapter1_001.fasta"
+assert_file_exists "out_test2/adapter2_001.fasta"
+assert_file_exists "out_test2/unknown_001.fasta"
+
+echo ">> Check if output is empty"
+assert_file_not_empty "report.json"
+assert_file_not_empty "out_test2/1_001.fasta"
+assert_file_not_empty "out_test2/adapter1_001.fasta"
+assert_file_not_empty "out_test2/adapter2_001.fasta"
+assert_file_not_empty "out_test2/unknown_001.fasta"
+
+echo ">> Check contents"
+assert_file_contains "out_test2/1_001.fasta" ">read1"
+assert_file_contains "out_test2/adapter1_001.fasta" ">read2"
+assert_file_contains "out_test2/adapter2_001.fasta" ">read3"
+assert_file_contains "out_test2/unknown_001.fasta" ">read4"
+
+cd ..
+echo
+
+#############################################
+mkdir test_simple_paired_end
+cd test_simple_paired_end
+
+echo "#############################################"
+echo "> Run cutadapt on paired-end data"
+
+cat > example_R1.fastq <<'EOF'
+@read1
+ACGTACGTACGTAAAAA
++
+IIIIIIIIIIIIIIIII
+@read2
+ACGTACGTACGTCCCCC
++
+IIIIIIIIIIIIIIIII
+EOF
+
+cat > example_R2.fastq <<'EOF'
+@read1
+ACGTACGTACGTGGGGG
++
+IIIIIIIIIIIIIIIII
+@read2
+ACGTACGTACGTTTTTT
++
+IIIIIIIIIIIIIIIII
+EOF
+
+"$meta_executable" \
+  --report minimal \
+  --output "out_test3/*.fastq" \
+  --adapter AAAAA \
+  --adapter_r2 GGGGG \
+  --input example_R1.fastq \
+  --input_r2 example_R2.fastq \
+  --quality_cutoff 20 \
+  --json \
+  ---cpus 1
+
+echo ">> Checking output"
+assert_file_exists "report.json"
+assert_file_exists "out_test3/1_R1_001.fastq"
+assert_file_exists "out_test3/1_R2_001.fastq"
+assert_file_exists "out_test3/unknown_R1_001.fastq"
+assert_file_exists "out_test3/unknown_R2_001.fastq"
+
+echo ">> Check if output is empty"
+assert_file_not_empty "report.json"
+assert_file_not_empty "out_test3/1_R1_001.fastq"
+assert_file_not_empty "out_test3/1_R2_001.fastq"
+assert_file_not_empty "out_test3/unknown_R1_001.fastq"
+
+echo ">> Check contents"
+assert_file_contains "out_test3/1_R1_001.fastq" "@read1"
+assert_file_contains "out_test3/1_R2_001.fastq" "@read1"
+assert_file_contains "out_test3/unknown_R1_001.fastq" "@read2"
+assert_file_contains "out_test3/unknown_R2_001.fastq" "@read2"
+
+cd ..
+echo
+
+#############################################
+
+echo "#############################################"
+echo "> Test successful"
+
diff --git a/src/pear/script.sh b/src/pear/script.sh
index f7d6a28f..9eff147b 100644
--- a/src/pear/script.sh
+++ b/src/pear/script.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+set -eo pipefail
+
 ## VIASH START
 ## VIASH END
 
diff --git a/src/salmon/salmon_quant/config.vsh.yaml b/src/salmon/salmon_quant/config.vsh.yaml
index 47d72665..b7e303f4 100644
--- a/src/salmon/salmon_quant/config.vsh.yaml
+++ b/src/salmon/salmon_quant/config.vsh.yaml
@@ -42,7 +42,7 @@ argument_groups:
         type: file
         description: |
           Salmon index.
-        required: true
+        required: false
         example: transcriptome_index
       - name: --unmated_reads
         alternatives: ["-r"]
@@ -320,12 +320,15 @@ argument_groups:
         example: 0.00001
       - name: --write_mappings
         alternatives: ["-z"]
-        type: file
-        direction: output
+        type: boolean_true
         description: |
           If this option is provided, then the selective-alignment results will be written out in SAM-compatible format. By default, output will be directed to stdout, but an alternative file name can be provided instead.
+      - name: --mapping_sam
+        type: file
+        description: Path to file that should output the selective-alignment results in SAM-compatible format. THis option must be provided while using --write_mappings
         required: false
-        example: mappings.sam 
+        direction: output
+        example: mappings.sam
       - name: --write_qualities
         type: boolean_true
         description: |
diff --git a/src/salmon/salmon_quant/script.sh b/src/salmon/salmon_quant/script.sh
index ace79711..4c9f69d5 100644
--- a/src/salmon/salmon_quant/script.sh
+++ b/src/salmon/salmon_quant/script.sh
@@ -21,6 +21,7 @@ set -e
 [[ "$par_softclip_overhangs" == "false" ]] && unset par_softclip_overhangs
 [[ "$par_full_length_alignment" == "false" ]] && unset par_full_length_alignment
 [[ "$par_hard_filter" == "false" ]] && unset par_hard_filter
+[[ "$par_write_mappings" == "false" ]] && unset par_write_mappings
 [[ "$par_write_qualities" == "false" ]] && unset par_write_qualities
 [[ "$par_alternative_init_mode" == "false" ]] && unset par_alternative_init_mode
 [[ "$par_skip_quant" == "false" ]] && unset par_skip_quant
@@ -96,7 +97,7 @@ salmon quant \
     ${par_full_length_alignment:+--fullLengthAlignment} \
     ${par_hard_filter:+--hardFilter} \
     ${par_min_aln_prob:+--minAlnProb "${par_min_aln_prob}"} \
-    ${par_write_mappings:+-z "${par_write_mappings}"} \
+    ${par_write_mappings:+--write_mappings="${par_mappings_sam}"} \
     ${par_write_qualities:+--writeQualities} \
     ${par_hit_filter_policy:+--hitFilterPolicy "${par_hit_filter_policy}"} \
     ${par_alternative_init_mode:+--alternativeInitMode} \
diff --git a/src/star/star_align_reads/config.vsh.yaml b/src/star/star_align_reads/config.vsh.yaml
index 8fdd5256..eab65b35 100644
--- a/src/star/star_align_reads/config.vsh.yaml
+++ b/src/star/star_align_reads/config.vsh.yaml
@@ -72,6 +72,12 @@ argument_groups:
         description: The output file containing the splice junctions.
         direction: output
         example: splice_junctions.tsv
+      - type: file
+        name: --reads_aligned_to_transcriptome
+        required: false
+        description: The output file containing the alignments to transcriptome in BAM formats. This file is generated when --quantMode is set to TranscriptomeSAM.
+        direction: output
+        example: transcriptome_aligned.bam
 # other arguments are defined in a separate file
 __merge__: argument_groups.yaml
 resources:
diff --git a/src/star/star_align_reads/script.py b/src/star/star_align_reads/script.py
index 2bde8798..f3d64a57 100644
--- a/src/star/star_align_reads/script.py
+++ b/src/star/star_align_reads/script.py
@@ -58,7 +58,8 @@
     "log": "Log.final.out",
     "splice_junctions": "SJ.out.tab",
     "unmapped": "Unmapped.out.mate1",
-    "unmapped_r2": "Unmapped.out.mate2"
+    "unmapped_r2": "Unmapped.out.mate2", 
+    "reads_aligned_to_transcriptome": "Aligned.toTranscriptome.out.bam"
 }
 output_paths = {name: par[name] for name in expected_outputs.keys()}
 for name in expected_outputs.keys():
diff --git a/src/star/star_align_reads/test.sh b/src/star/star_align_reads/test.sh
index 374b9014..a15ea599 100644
--- a/src/star/star_align_reads/test.sh
+++ b/src/star/star_align_reads/test.sh
@@ -98,6 +98,7 @@ echo "> Run star_align_reads on SE"
   --reads_per_gene "reads_per_gene.tsv" \
   --outSJtype Standard \
   --splice_junctions "splice_junctions.tsv" \
+  --reads_aligned_to_transcriptome "transcriptome_aligned.bam" \
   ${meta_cpus:+---cpus $meta_cpus}
 
 # TODO: Test data doesn't contain any chimeric reads yet
@@ -111,6 +112,7 @@ assert_file_exists "reads_per_gene.tsv"
 # assert_file_exists "chimeric_junctions.tsv"
 assert_file_exists "splice_junctions.tsv"
 assert_file_exists "unmapped.sam"
+assert_file_exists "transcriptome_aligned.bam"
 
 echo ">> Check if output contents are not empty"
 assert_file_not_empty "output.sam"
@@ -119,6 +121,7 @@ assert_file_not_empty "reads_per_gene.tsv"
 # assert_file_not_empty "chimeric_junctions.tsv"
 # assert_file_not_empty "splice_junctions.tsv" # TODO: test data doesn't contain any splice junctions yet
 assert_file_not_empty "unmapped.sam"
+assert_file_not_empty "transcriptome_aligned.bam"
 
 echo ">> Check if output contents are correct"
 assert_file_contains "log.txt" "Number of input reads \\|	2"
diff --git a/src/star/star_genome_generate/config.vsh.yaml b/src/star/star_genome_generate/config.vsh.yaml
new file mode 100644
index 00000000..3adaf7a2
--- /dev/null
+++ b/src/star/star_genome_generate/config.vsh.yaml
@@ -0,0 +1,139 @@
+name: star_genome_generate
+namespace: star
+description: | 
+  Create index for STAR
+keywords: [genome, index, align]
+links:
+  repository: https://github.com/alexdobin/STAR
+  documentation: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf
+references:
+  doi: 10.1093/bioinformatics/bts635
+license: MIT
+requirements:
+  commands: [ STAR ]
+
+argument_groups:
+- name: "Input"
+  arguments: 
+  - name: "--genomeFastaFiles"
+    type: file
+    description: |
+      Path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.
+    required: true
+    multiple: yes
+    multiple_sep: ;  
+  - name: "--sjdbGTFfile"
+    type: file
+    description: Path to the GTF file with annotations
+  - name: --sjdbOverhang
+    type: integer
+    description: Length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)
+    example: 100
+  - name: --sjdbGTFchrPrefix
+    type: string
+    description: Prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes)
+  - name: --sjdbGTFfeatureExon
+    type: string
+    description: Feature type in GTF file to be used as exons for building transcripts
+    example: exon
+  - name: --sjdbGTFtagExonParentTranscript
+    type: string
+    description: GTF attribute name for parent transcript ID (default "transcript_id" works for GTF files)
+    example: transcript_id
+  - name: --sjdbGTFtagExonParentGene
+    type: string
+    description: GTF attribute name for parent gene ID (default "gene_id" works for GTF files)
+    example: gene_id
+  - name: --sjdbGTFtagExonParentGeneName
+    type: string
+    description: GTF attribute name for parent gene name
+    example: gene_name
+    multiple: yes
+    multiple_sep: ;
+  - name: --sjdbGTFtagExonParentGeneType
+    type: string
+    description: GTF attribute name for parent gene type
+    example:
+    - gene_type
+    - gene_biotype
+    multiple: yes
+    multiple_sep: ;
+  - name: --limitGenomeGenerateRAM
+    type: long
+    description: Maximum available RAM (bytes) for genome generation
+    example: '31000000000'
+  - name: --genomeSAindexNbases
+    type: integer
+    description: Length (bases) of the SA pre-indexing string. Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, this parameter must be scaled down to min(14, log2(GenomeLength)/2 - 1).
+    example: 14
+  - name: --genomeChrBinNbits
+    type: integer
+    description: Defined as log2(chrBin), where chrBin is the size of the bins for genome storage. Each chromosome will occupy an integer number of bins. For a genome with large number of contigs, it is recommended to scale this parameter as min(18, log2[max(GenomeLength/NumberOfReferences,ReadLength)]).
+    example: 18
+  - name: --genomeSAsparseD
+    type: integer
+    min: 0
+    example: 1
+    description: Suffux array sparsity, i.e. distance between indices. Use bigger numbers to decrease needed RAM at the cost of mapping speed reduction.
+  - name: --genomeSuffixLengthMax
+    type: integer
+    description: Maximum length of the suffixes, has to be longer than read length. Use -1 for infinite length.
+    example: -1
+  - name: --genomeTransformType   
+    type: string
+    description: |
+      Type of genome transformation
+        None       ... no transformation
+        Haploid    ... replace reference alleles with alternative alleles from VCF file (e.g. consensus allele)
+        Diploid    ... create two haplotypes for each chromosome listed in VCF file, for genotypes 1|2, assumes perfect phasing (e.g. personal genome)
+    example: None
+  - name: --genomeTransformVCF
+    type: file
+    description: path to VCF file for genome transformation
+  
+- name: "Output"
+  arguments: 
+  - name: "--index"
+    type: file
+    direction: output
+    description: STAR index directory.
+    default: STAR_index
+    required: true
+
+resources:
+  - type: bash_script
+    path: script.sh
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+  
+engines:
+- type: docker
+  image: ubuntu:22.04
+  setup:
+    # setup derived from https://github.com/alexdobin/STAR/blob/master/extras/docker/Dockerfile
+    - type: docker
+      env: 
+        - STAR_VERSION 2.7.11b
+        - PACKAGES gcc g++ make wget zlib1g-dev unzip xxd
+      run: |
+        apt-get update && \
+          apt-get install -y --no-install-recommends ${PACKAGES} && \
+          cd /tmp && \
+          wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \
+          unzip ${STAR_VERSION}.zip && \
+          cd STAR-${STAR_VERSION}/source && \
+          make STARstatic CXXFLAGS_SIMD=-std=c++11 && \
+          cp STAR /usr/local/bin && \
+          cd / && \
+          rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \
+          apt-get --purge autoremove -y ${PACKAGES} && \
+          apt-get clean
+    - type: docker
+      run: |
+        STAR --version | sed 's#\(.*\)#star: "\1"#' > /var/software_versions.txt
+
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/star/star_genome_generate/help.txt b/src/star/star_genome_generate/help.txt
new file mode 100644
index 00000000..940f639d
--- /dev/null
+++ b/src/star/star_genome_generate/help.txt
@@ -0,0 +1,927 @@
+Usage: STAR  [options]... --genomeDir /path/to/genome/index/   --readFilesIn R1.fq R2.fq
+Spliced Transcripts Alignment to a Reference (c) Alexander Dobin, 2009-2022
+
+STAR version=2.7.11b
+STAR compilation time,server,dir=2024-02-11T19:36:26+00:00 :/tmp/STAR-2.7.11b/source
+For more details see:
+<https://github.com/alexdobin/STAR>
+<https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf>
+### versions
+versionGenome           2.7.4a
+    string: earliest genome index version compatible with this STAR release. Please do not change this value!
+
+### Parameter Files
+parametersFiles          -
+    string: name of a user-defined parameters file, "-": none. Can only be defined on the command line.
+
+### System
+sysShell            -
+    string: path to the shell binary, preferably bash, e.g. /bin/bash.
+                    - ... the default shell is executed, typically /bin/sh. This was reported to fail on some Ubuntu systems - then you need to specify path to bash.
+
+### Run Parameters
+runMode                         alignReads
+    string: type of the run.
+                                alignReads             ... map reads
+                                genomeGenerate         ... generate genome files
+                                inputAlignmentsFromBAM ... input alignments from BAM. Presently only works with --outWigType and --bamRemoveDuplicates options.
+                                liftOver               ... lift-over of GTF files (--sjdbGTFfile) between genome assemblies using chain file(s) from --genomeChainFiles.
+                                soloCellFiltering  </path/to/raw/count/dir/>   </path/to/output/prefix>    ... STARsolo cell filtering ("calling") without remapping, followed by the path to raw count directory and output (filtered) prefix
+
+runThreadN                      1
+    int: number of threads to run STAR
+
+runDirPerm                      User_RWX
+    string: permissions for the directories created at the run-time.
+                                User_RWX ... user-read/write/execute
+                                All_RWX  ... all-read/write/execute (same as chmod 777)
+
+runRNGseed                      777
+    int: random number generator seed.
+
+
+### Genome Parameters
+genomeDir                   ./GenomeDir/
+    string: path to the directory where genome files are stored (for --runMode alignReads) or will be generated (for --runMode generateGenome)
+
+genomeLoad                NoSharedMemory
+    string: mode of shared memory usage for the genome files. Only used with --runMode alignReads.
+                          LoadAndKeep     ... load genome into shared and keep it in memory after run
+                          LoadAndRemove   ... load genome into shared but remove it after run
+                          LoadAndExit     ... load genome into shared memory and exit, keeping the genome in memory for future runs
+                          Remove          ... do not map anything, just remove loaded genome from memory
+                          NoSharedMemory  ... do not use shared memory, each job will have its own private copy of the genome
+
+genomeFastaFiles            -
+    string(s): path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.
+                            Required for the genome generation (--runMode genomeGenerate). Can also be used in the mapping (--runMode alignReads) to add extra (new) sequences to the genome (e.g. spike-ins).
+
+genomeChainFiles            -
+    string: chain files for genomic liftover. Only used with --runMode liftOver .
+
+genomeFileSizes             0
+    uint(s)>0: genome files exact sizes in bytes. Typically, this should not be defined by the user.
+    
+genomeTransformOutput       None
+    string(s):              which output to transform back to original genome
+                            SAM     ... SAM/BAM alignments
+                            SJ      ... splice junctions (SJ.out.tab)
+                            Quant   ... quantifications (from --quantMode option)
+                            None    ... no transformation of the output        
+
+genomeChrSetMitochondrial   chrM M MT
+    string(s):              names of the mitochondrial chromosomes. Presently only used for STARsolo statistics output/
+
+### Genome Indexing Parameters - only used with --runMode genomeGenerate
+genomeChrBinNbits           18
+    int: =log2(chrBin), where chrBin is the size of the bins for genome storage: each chromosome will occupy an integer number of bins. For a genome with large number of contigs, it is recommended to scale this parameter as min(18, log2[max(GenomeLength/NumberOfReferences,ReadLength)]).
+
+genomeSAindexNbases         14
+    int: length (bases) of the SA pre-indexing string. Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, the parameter --genomeSAindexNbases must be scaled down to min(14, log2(GenomeLength)/2 - 1).
+
+genomeSAsparseD             1
+    int>0: suffux array sparsity, i.e. distance between indices: use bigger numbers to decrease needed RAM at the cost of mapping speed reduction
+
+genomeSuffixLengthMax       -1
+    int: maximum length of the suffixes, has to be longer than read length. -1 = infinite.
+    
+genomeTransformType         None
+    string: type of genome transformation
+                            None       ... no transformation
+                            Haploid    ... replace reference alleles with alternative alleles from VCF file (e.g. consensus allele)
+                            Diploid    ... create two haplotypes for each chromosome listed in VCF file, for genotypes 1|2, assumes perfect phasing (e.g. personal genome)
+
+genomeTransformVCF          -
+    string: path to VCF file for genome transformation
+
+
+    
+#####UnderDevelopment_begin : not supported - do not use
+genomeType                  Full
+    string: type of genome to generate
+                            Full                ... full (normal) genome
+                            Transcriptome       ... genome consists of transcript sequences
+                            SuperTransriptome   ... genome consists of superTranscript sequences
+#####UnderDevelopment_end
+
+# DEPRECATED: please use --genomeTransformVCF and --genomeTransformType options instead.
+#genomeConsensusFile         -
+#    string: VCF file with consensus SNPs (i.e. alternative allele is the major (AF>0.5) allele)
+# DEPRECATED 
+
+
+
+### Splice Junctions Database
+sjdbFileChrStartEnd                     -
+    string(s): path to the files with genomic coordinates (chr <tab> start <tab> end <tab> strand) for the splice junction introns. Multiple files can be supplied and will be concatenated.
+
+sjdbGTFfile                             -
+    string: path to the GTF file with annotations
+
+sjdbGTFchrPrefix                        -
+    string: prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes)
+
+sjdbGTFfeatureExon                      exon
+    string: feature type in GTF file to be used as exons for building transcripts
+
+sjdbGTFtagExonParentTranscript          transcript_id
+    string: GTF attribute name for parent transcript ID (default "transcript_id" works for GTF files)
+
+sjdbGTFtagExonParentGene                gene_id
+    string: GTF attribute name for parent gene ID (default "gene_id" works for GTF files)
+
+sjdbGTFtagExonParentGeneName            gene_name
+    string(s): GTF attribute name for parent gene name
+
+sjdbGTFtagExonParentGeneType            gene_type gene_biotype
+    string(s): GTF attribute name for parent gene type
+
+sjdbOverhang                            100
+    int>0: length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)
+
+sjdbScore                               2
+    int: extra alignment score for alignments that cross database junctions
+
+sjdbInsertSave                          Basic
+    string: which files to save when sjdb junctions are inserted on the fly at the mapping step
+                    Basic ... only small junction / transcript files
+                    All   ... all files including big Genome, SA and SAindex - this will create a complete genome directory
+
+### Variation parameters
+varVCFfile                              -
+    string: path to the VCF file that contains variation data. The 10th column should contain the genotype information, e.g. 0/1
+
+### Input Files
+inputBAMfile                -
+    string: path to BAM input file, to be used with --runMode inputAlignmentsFromBAM
+
+### Read Parameters
+readFilesType               Fastx
+    string: format of input read files
+                            Fastx       ... FASTA or FASTQ
+                            SAM SE      ... SAM or BAM single-end reads; for BAM use --readFilesCommand samtools view
+                            SAM PE      ... SAM or BAM paired-end reads; for BAM use --readFilesCommand samtools view
+                            
+readFilesSAMattrKeep        All
+    string(s): for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL
+                            All     ... keep all tags
+                            None    ... do not keep any tags
+
+readFilesIn                 Read1 Read2
+    string(s): paths to files that contain input read1 (and, if needed,  read2)
+
+readFilesManifest           -
+    string: path to the "manifest" file with the names of read files. The manifest file should contain 3 tab-separated columns:
+            paired-end reads: read1_file_name $tab$ read2_file_name $tab$ read_group_line.
+            single-end reads: read1_file_name $tab$ -               $tab$ read_group_line.
+            Spaces, but not tabs are allowed in file names.
+            If read_group_line does not start with ID:, it can only contain one ID field, and ID: will be added to it.
+            If read_group_line starts with ID:, it can contain several fields separated by $tab$, and all fields will be be copied verbatim into SAM @RG header line.
+
+readFilesPrefix             -
+    string: prefix for the read files names, i.e. it will be added in front of the strings in --readFilesIn
+
+readFilesCommand             -
+    string(s): command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout
+               For example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc.
+
+readMapNumber               -1
+    int: number of reads to map from the beginning of the file
+                            -1: map all reads
+
+readMatesLengthsIn          NotEqual
+    string: Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same  / not the same. NotEqual is safe in all situations.
+
+readNameSeparator           /
+    string(s): character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)
+
+readQualityScoreBase        33
+    int>=0: number to be subtracted from the ASCII code to get Phred quality score
+
+### Read Clipping
+
+clipAdapterType             Hamming
+    string:                 adapter clipping type
+                            Hamming ... adapter clipping based on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp
+                            CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes Opal package by Martin Šošić: https://github.com/Martinsos/opal
+                            None ... no adapter clipping, all other clip* parameters are disregarded
+                            
+clip3pNbases                 0
+    int(s): number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.
+
+clip3pAdapterSeq            -
+    string(s): adapter sequences to clip from 3p of each mate.  If one value is given, it will be assumed the same for both mates.
+                            polyA ... polyA sequence with the length equal to read length
+
+clip3pAdapterMMp            0.1
+    double(s): max proportion of mismatches for 3p adapter clipping for each mate.  If one value is given, it will be assumed the same for both mates.
+
+clip3pAfterAdapterNbases    0
+    int(s): number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates.
+
+clip5pNbases                 0
+    int(s): number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates.
+
+#####UnderDevelopment_begin : not supported - do not use   
+clip5pAdapterSeq            -
+    string(s): adapter sequences to clip from 5p of each mate, separated by space.
+
+clip5pAdapterMMp            0.1
+    double(s): max proportion of mismatches for 5p adapter clipping for each mate, separated by space
+
+clip5pAfterAdapterNbases    0
+    int(s): number of bases to clip from 5p of each mate after the adapter clipping, separated by space.
+#####UnderDevelopment_end
+
+### Limits
+limitGenomeGenerateRAM               31000000000
+    int>0: maximum available RAM (bytes) for genome generation
+
+limitIObufferSize                    30000000 50000000
+    int(s)>0: max available buffers size (bytes) for input/output, per thread
+
+limitOutSAMoneReadBytes              100000
+    int>0: max size of the SAM record (bytes) for one read. Recommended value: >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax
+
+limitOutSJoneRead                    1000
+    int>0: max number of junctions for one read (including all multi-mappers)
+
+limitOutSJcollapsed                  1000000
+    int>0: max number of collapsed junctions
+
+limitBAMsortRAM                         0
+    int>=0: maximum available RAM (bytes) for sorting BAM. If =0, it will be set to the genome index size. 0 value can only be used with --genomeLoad NoSharedMemory option.
+
+limitSjdbInsertNsj                     1000000
+    int>=0: maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run
+
+limitNreadsSoft                        -1
+    int: soft limit on the number of reads
+
+### Output: general
+outFileNamePrefix               ./
+    string: output files name prefix (including full or relative path). Can only be defined on the command line.
+
+outTmpDir                       -
+    string: path to a directory that will be used as temporary by STAR. All contents of this directory will be removed!
+                                - ... the temp directory will default to outFileNamePrefix_STARtmp
+
+outTmpKeep                      None
+    string: whether to keep the temporary files after STAR runs is finished
+                                None ... remove all temporary files
+                                All ... keep all files
+
+outStd                          Log
+    string: which output will be directed to stdout (standard out)
+                                Log                    ... log messages
+                                SAM                    ... alignments in SAM format (which normally are output to Aligned.out.sam file), normal standard output will go into Log.std.out
+                                BAM_Unsorted           ... alignments in BAM format, unsorted. Requires --outSAMtype BAM Unsorted
+                                BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype BAM SortedByCoordinate
+                                BAM_Quant              ... alignments to transcriptome in BAM format, unsorted. Requires --quantMode TranscriptomeSAM
+
+outReadsUnmapped                None
+   string: output of unmapped and partially mapped (i.e. mapped only one mate of a paired end read) reads in separate file(s).
+                                None    ... no output
+                                Fastx   ... output in separate fasta/fastq files, Unmapped.out.mate1/2
+
+outQSconversionAdd              0
+   int: add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)
+
+outMultimapperOrder             Old_2.4
+    string: order of multimapping alignments in the output files
+                                Old_2.4             ... quasi-random order used before 2.5.0
+                                Random              ... random order of alignments for each multi-mapper. Read mates (pairs) are always adjacent, all alignment for each read stay together. This option will become default in the future releases.
+
+### Output: SAM and BAM
+outSAMtype                      SAM
+    strings: type of SAM/BAM output
+                                1st word:
+                                BAM  ... output BAM without sorting
+                                SAM  ... output SAM without sorting
+                                None ... no SAM/BAM output
+                                2nd, 3rd:
+                                Unsorted           ... standard unsorted
+                                SortedByCoordinate ... sorted by coordinate. This option will allocate extra memory for sorting which can be specified by --limitBAMsortRAM.
+
+outSAMmode                      Full
+    string: mode of SAM output
+                                None ... no SAM output
+                                Full ... full SAM output
+                                NoQS ... full SAM but without quality scores
+
+outSAMstrandField               None
+    string: Cufflinks-like strand field flag
+                                None        ... not used
+                                intronMotif ... strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out.
+
+outSAMattributes                Standard
+    string(s): a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order.
+                                ***Presets:
+                                None        ... no attributes
+                                Standard    ... NH HI AS nM
+                                All         ... NH HI AS nM NM MD jM jI MC ch                                                                    
+                                ***Alignment:
+                                NH          ... number of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard SAM tag.
+                                HI          ... multiple alignment index, starts with --outSAMattrIHstart (=1 by default). Standard SAM tag.
+                                AS          ... local alignment score, +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.
+                                nM          ... number of mismatches. For PE reads, sum over two mates.
+                                NM          ... edit distance to the reference (number of mismatched + inserted + deleted bases) for each mate. Standard SAM tag.
+                                MD          ... string encoding mismatched and deleted reference bases (see standard SAM specifications). Standard SAM tag.
+                                jM          ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.
+                                jI          ... start and end of introns for all junctions (1-based).
+                                XS          ... alignment strand according to --outSAMstrandField.
+                                MC          ... mate's CIGAR string. Standard SAM tag.
+                                ch          ... marks all segment of all chimeric alingments for --chimOutType WithinBAM output.
+                                cN          ... number of bases clipped from the read ends: 5' and 3'
+                                ***Variation:
+                                vA          ... variant allele
+                                vG          ... genomic coordinate of the variant overlapped by the read.
+                                vW          ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode SAMtag.
+                                ha          ... haplotype (1/2) when mapping to the diploid genome. Requires genome generated with --genomeTransformType Diploid .                               
+                                ***STARsolo:
+                                CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing.
+                                GX GN       ... gene ID and gene name for unique-gene reads.
+                                gx gn       ... gene IDs and gene names for unique- and multi-gene reads.
+                                CB UB       ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM SortedByCoordinate.
+                                sM          ... assessment of CB and UMI.
+                                sS          ... sequence of the entire barcode (CB,UMI,adapter).
+                                sQ          ... quality of the entire barcode.
+                                sF          ... type of feature overlap and number of features for each alignment
+                                ***Unsupported/undocumented:
+                                rB          ... alignment block read/genomic coordinates.
+                                vR          ... read coordinate of the variant.
+
+outSAMattrIHstart               1
+    int>=0:                     start value for the IH attribute. 0 may be required by some downstream software, such as Cufflinks or StringTie.
+
+outSAMunmapped                  None
+    string(s): output of unmapped reads in the SAM format
+                                1st word:
+                                None   ... no output
+                                Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam)
+                                2nd word:
+                                KeepPairs ... record unmapped mate for each alignment, and, in case of unsorted output, keep it adjacent to its mapped mate. Only affects multi-mapping reads.
+
+outSAMorder                     Paired
+    string: type of sorting for the SAM output
+                                Paired: one mate after the other for all paired alignments
+                                PairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files
+
+outSAMprimaryFlag        OneBestScore
+    string: which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG
+                                OneBestScore ... only one alignment with the best score is primary
+                                AllBestScore ... all alignments with the best score are primary
+
+outSAMreadID            Standard
+    string: read ID record type
+                                Standard ... first word (until space) from the FASTx read ID line, removing /1,/2 from the end
+                                Number   ... read number (index) in the FASTx file
+
+outSAMmapqUnique        255
+    int: 0 to 255: the MAPQ value for unique mappers
+
+outSAMflagOR           0
+    int: 0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e. FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by STAR, and after outSAMflagAND. Can be used to set specific bits that are not set otherwise.
+
+outSAMflagAND           65535
+    int: 0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e. FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by STAR, but before outSAMflagOR. Can be used to unset specific bits that are not set otherwise.
+
+outSAMattrRGline        -
+    string(s): SAM/BAM read group line. The first word contains the read group identifier and must start with "ID:", e.g. --outSAMattrRGline ID:xxx CN:yy "DS:z z z".
+            xxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.
+            Comma separated RG lines correspons to different (comma separated) input files in --readFilesIn. Commas have to be surrounded by spaces, e.g.
+            --outSAMattrRGline ID:xxx , ID:zzz "DS:z z" , ID:yyy DS:yyyy
+
+outSAMheaderHD          -
+    strings: @HD (header) line of the SAM header
+
+outSAMheaderPG          -
+    strings: extra @PG (software) line of the SAM header (in addition to STAR)
+
+outSAMheaderCommentFile -
+    string: path to the file with @CO (comment) lines of the SAM header
+
+outSAMfilter            None
+    string(s): filter the output into main SAM/BAM files
+                        KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genomeFastaFiles at the mapping stage.
+                        KeepAllAddedReferences ...  keep all alignments to the extra reference sequences added with --genomeFastaFiles at the mapping stage.
+
+
+outSAMmultNmax          -1
+    int: max number of multiple alignments for a read that will be output to the SAM/BAM files. Note that if this value is not equal to -1, the top scoring alignment will be output first
+                        -1 ... all alignments (up to --outFilterMultimapNmax) will be output
+
+outSAMtlen              1
+    int: calculation method for the TLEN field in the SAM/BAM files
+                        1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate
+                        2 ... leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends
+
+outBAMcompression       1
+    int: -1 to 10  BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression
+
+outBAMsortingThreadN    0
+    int: >=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN).
+
+outBAMsortingBinsN      50
+    int: >0:  number of genome bins for coordinate-sorting
+
+### BAM processing
+bamRemoveDuplicatesType  -
+    string: mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only
+                        -                       ... no duplicate removal/marking
+                        UniqueIdentical         ... mark all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical
+                        UniqueIdenticalNotMulti  ... mark duplicate unique mappers but not multimappers.
+
+bamRemoveDuplicatesMate2basesN   0
+    int>0: number of bases from the 5' of mate 2 to use in collapsing (e.g. for RAMPAGE)
+
+### Output Wiggle
+outWigType          None
+    string(s): type of signal output, e.g. "bedGraph" OR "bedGraph read1_5p". Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .
+                    1st word:
+                    None       ... no signal output
+                    bedGraph   ... bedGraph format
+                    wiggle     ... wiggle format
+                    2nd word:
+                    read1_5p   ... signal from only 5' of the 1st read, useful for CAGE/RAMPAGE etc
+                    read2      ... signal from only 2nd read
+
+outWigStrand        Stranded
+    string: strandedness of wiggle/bedGraph output
+                    Stranded   ...  separate strands, str1 and str2
+                    Unstranded ...  collapsed strands
+
+outWigReferencesPrefix    -
+    string: prefix matching reference names to include in the output wiggle file, e.g. "chr", default "-" - include all references
+
+outWigNorm              RPM
+    string: type of normalization for the signal
+                        RPM    ... reads per million of mapped reads
+                        None   ... no normalization, "raw" counts
+
+### Output Filtering
+outFilterType                   Normal
+    string: type of filtering
+                                Normal  ... standard filtering using only current alignment
+                                BySJout ... keep only those reads that contain junctions that passed filtering into SJ.out.tab
+
+outFilterMultimapScoreRange     1
+    int: the score range below the maximum score for multimapping alignments
+
+outFilterMultimapNmax           10
+    int: maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value.
+         Otherwise no alignments will be output, and the read will be counted as "mapped to too many loci" in the Log.final.out .
+
+outFilterMismatchNmax           10
+    int: alignment will be output only if it has no more mismatches than this value.
+
+outFilterMismatchNoverLmax      0.3
+    real: alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value.
+
+outFilterMismatchNoverReadLmax  1.0
+    real: alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value.
+
+
+outFilterScoreMin               0
+    int: alignment will be output only if its score is higher than or equal to this value.
+
+outFilterScoreMinOverLread      0.66
+    real: same as outFilterScoreMin, but normalized to read length (sum of mates' lengths for paired-end reads)
+
+outFilterMatchNmin              0
+    int: alignment will be output only if the number of matched bases is higher than or equal to this value.
+
+outFilterMatchNminOverLread     0.66
+    real: sam as outFilterMatchNmin, but normalized to the read length (sum of mates' lengths for paired-end reads).
+
+outFilterIntronMotifs           None
+    string: filter alignment using their motifs
+                None                           ... no filtering
+                RemoveNoncanonical             ... filter out alignments that contain non-canonical junctions
+                RemoveNoncanonicalUnannotated  ... filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept.
+
+outFilterIntronStrands          RemoveInconsistentStrands
+    string: filter alignments
+                RemoveInconsistentStrands      ... remove alignments that have junctions with inconsistent strands
+                None                           ... no filtering
+
+### Output splice junctions (SJ.out.tab)
+outSJtype                       Standard
+    string: type of splice junction output
+                                Standard    ... standard SJ.out.tab output
+                                None        ... no splice junction output
+
+### Output Filtering: Splice Junctions
+outSJfilterReads                All
+    string: which reads to consider for collapsed splice junctions output
+                                All     ... all reads, unique- and multi-mappers
+                                Unique  ... uniquely mapping reads only
+
+outSJfilterOverhangMin          30  12  12  12
+    4 integers:    minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif
+                                does not apply to annotated junctions
+
+outSJfilterCountUniqueMin       3   1   1   1
+    4 integers: minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif
+                                Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied
+                                does not apply to annotated junctions
+
+outSJfilterCountTotalMin     3   1   1   1
+    4 integers: minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif
+                                Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied
+                                does not apply to annotated junctions
+
+outSJfilterDistToOtherSJmin     10  0   5   10
+    4 integers>=0: minimum allowed distance to other junctions' donor/acceptor
+                                does not apply to annotated junctions
+
+outSJfilterIntronMaxVsReadN        50000 100000 200000
+    N integers>=0: maximum gap allowed for junctions supported by 1,2,3,,,N reads
+                                i.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax
+                                does not apply to annotated junctions
+
+### Scoring
+scoreGap                     0
+    int: splice junction penalty (independent on intron motif)
+
+scoreGapNoncan               -8
+    int: non-canonical junction penalty (in addition to scoreGap)
+
+scoreGapGCAG                 -4
+    int: GC/AG and CT/GC junction penalty (in addition to scoreGap)
+
+scoreGapATAC                 -8
+    int: AT/AC  and GT/AT junction penalty  (in addition to scoreGap)
+
+scoreGenomicLengthLog2scale   -0.25
+    int: extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)
+
+scoreDelOpen                 -2
+    int: deletion open penalty
+
+scoreDelBase                 -2
+    int: deletion extension penalty per base (in addition to scoreDelOpen)
+
+scoreInsOpen                 -2
+    int: insertion open penalty
+
+scoreInsBase                 -2
+    int: insertion extension penalty per base (in addition to scoreInsOpen)
+
+scoreStitchSJshift           1
+    int: maximum score reduction while searching for SJ boundaries in the stitching step
+
+
+### Alignments and Seeding
+
+seedSearchStartLmax             50
+    int>0: defines the search start point through the read - the read is split into pieces no longer than this value
+
+seedSearchStartLmaxOverLread    1.0
+    real: seedSearchStartLmax normalized to read length (sum of mates' lengths for paired-end reads)
+
+seedSearchLmax       0
+    int>=0: defines the maximum length of the seeds, if =0 seed length is not limited
+
+seedMultimapNmax      10000
+    int>0: only pieces that map fewer than this value are utilized in the stitching procedure
+
+seedPerReadNmax       1000
+    int>0: max number of seeds per read
+
+seedPerWindowNmax     50
+    int>0: max number of seeds per window
+
+seedNoneLociPerWindow    10
+    int>0: max number of one seed loci per window
+
+seedSplitMin                12
+    int>0: min length of the seed sequences split by Ns or mate gap
+
+seedMapMin              5
+    int>0: min length of seeds to be mapped
+
+alignIntronMin              21
+    int: minimum intron size, genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion
+
+alignIntronMax              0
+    int: maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins
+
+alignMatesGapMax            0
+    int: maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins
+
+alignSJoverhangMin          5
+    int>0: minimum overhang (i.e. block size) for spliced alignments
+
+alignSJstitchMismatchNmax   0 -1 0 0
+    4*int>=0: maximum number of mismatches for stitching of the splice junctions (-1: no limit).
+                            (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif.
+
+alignSJDBoverhangMin        3
+    int>0: minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments
+
+alignSplicedMateMapLmin     0
+    int>0: minimum mapped length for a read mate that is spliced
+
+alignSplicedMateMapLminOverLmate 0.66
+    real>0: alignSplicedMateMapLmin normalized to mate length
+
+alignWindowsPerReadNmax     10000
+    int>0: max number of windows per read
+
+alignTranscriptsPerWindowNmax       100
+    int>0: max number of transcripts per window
+
+alignTranscriptsPerReadNmax               10000
+    int>0: max number of different alignments per read to consider
+
+alignEndsType           Local
+    string: type of read ends alignment
+                        Local             ... standard local alignment with soft-clipping allowed
+                        EndToEnd          ... force end-to-end read alignment, do not soft-clip
+                        Extend5pOfRead1   ... fully extend only the 5p of the read1, all other ends: local alignment
+                        Extend5pOfReads12 ... fully extend only the 5p of the both read1 and read2, all other ends: local alignment
+
+alignEndsProtrude       0    ConcordantPair
+    int, string:        allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate
+                        1st word: int: maximum number of protrusion bases allowed
+                        2nd word: string:
+                                            ConcordantPair ... report alignments with non-zero protrusion as concordant pairs
+                                            DiscordantPair ... report alignments with non-zero protrusion as discordant pairs
+
+alignSoftClipAtReferenceEnds    Yes
+    string: allow the soft-clipping of the alignments past the end of the chromosomes
+                                Yes ... allow
+                                No  ... prohibit, useful for compatibility with Cufflinks
+
+alignInsertionFlush     None
+    string: how to flush ambiguous insertion positions
+                        None    ... insertions are not flushed
+                        Right   ... insertions are flushed to the right
+
+### Paired-End reads
+peOverlapNbasesMin          0
+    int>=0:             minimum number of overlapping bases to trigger mates merging and realignment. Specify >0 value to switch on the "merginf of overlapping mates" algorithm.
+
+peOverlapMMp                0.01
+    real, >=0 & <1:     maximum proportion of mismatched bases in the overlap area
+
+### Windows, Anchors, Binning
+
+winAnchorMultimapNmax           50
+    int>0: max number of loci anchors are allowed to map to
+
+winBinNbits                     16
+    int>0: =log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins.
+
+winAnchorDistNbins              9
+    int>0: max number of bins between two anchors that allows aggregation of anchors into one window
+
+winFlankNbins                   4
+    int>0: log2(winFlank), where win Flank is the size of the left and right flanking regions for each window
+
+winReadCoverageRelativeMin      0.5
+    real>=0: minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only.
+
+winReadCoverageBasesMin      0
+    int>0: minimum number of bases covered by the seeds in a window , for STARlong algorithm only.
+
+### Chimeric Alignments
+chimOutType                 Junctions
+    string(s): type of chimeric output
+                            Junctions       ... Chimeric.out.junction
+                            SeparateSAMold  ... output old SAM into separate Chimeric.out.sam file
+                            WithinBAM       ... output into main aligned BAM files (Aligned.*.bam)
+                            WithinBAM HardClip  ... (default) hard-clipping in the CIGAR for supplemental chimeric alignments (default if no 2nd word is present)
+                            WithinBAM SoftClip  ... soft-clipping in the CIGAR for supplemental chimeric alignments
+
+chimSegmentMin              0
+    int>=0: minimum length of chimeric segment length, if ==0, no chimeric output
+
+chimScoreMin                0
+    int>=0: minimum total (summed) score of the chimeric segments
+
+chimScoreDropMax            20
+    int>=0: max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length
+
+chimScoreSeparation         10
+    int>=0: minimum difference (separation) between the best chimeric score and the next one
+
+chimScoreJunctionNonGTAG    -1
+    int: penalty for a non-GT/AG chimeric junction
+
+chimJunctionOverhangMin     20
+    int>=0: minimum overhang for a chimeric junction
+
+chimSegmentReadGapMax       0
+    int>=0: maximum gap in the read sequence between chimeric segments
+
+chimFilter                  banGenomicN
+    string(s): different filters for chimeric alignments
+                            None ... no filtering
+                            banGenomicN ... Ns are not allowed in the genome sequence around the chimeric junction
+
+chimMainSegmentMultNmax        10
+    int>=1: maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments.
+
+chimMultimapNmax                    0
+    int>=0: maximum number of chimeric multi-alignments
+                                0 ... use the old scheme for chimeric detection which only considered unique alignments
+
+chimMultimapScoreRange          1
+    int>=0: the score range for multi-mapping chimeras below the best chimeric score. Only works with --chimMultimapNmax > 1
+
+chimNonchimScoreDropMin         20
+    int>=0: to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value
+
+chimOutJunctionFormat           0
+    int: formatting type for the Chimeric.out.junction file
+                                0 ... no comment lines/headers
+                                1 ... comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping
+
+### Quantification of Annotations
+quantMode                   -
+    string(s): types of quantification requested
+                            -                ... none
+                            TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate file
+                            GeneCounts       ... count reads per gene
+
+quantTranscriptomeBAMcompression    1
+    int: -2 to 10  transcriptome BAM compression level
+                            -2  ... no BAM output
+                            -1  ... default compression (6?)
+                             0  ... no compression
+                             10 ... maximum compression
+
+quantTranscriptomeSAMoutput BanSingleEnd_BanIndels_ExtendSoftclip
+    string: alignment filtering for TranscriptomeSAM output
+                            BanSingleEnd_BanIndels_ExtendSoftclip ... prohibit indels and single-end alignments, extend softclips - compatible with RSEM
+                            BanSingleEnd               ... prohibit single-end alignments, allow indels and softclips
+                            BanSingleEnd_ExtendSoftclip ... prohibit single-end alignments, extend softclips, allow indels
+
+
+### 2-pass Mapping
+twopassMode                 None
+    string: 2-pass mapping mode.
+                            None        ... 1-pass mapping
+                            Basic       ... basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly
+
+twopass1readsN              -1
+    int: number of reads to process for the 1st step. Use very large number (or default -1) to map all reads in the first step.
+
+
+### WASP parameters
+waspOutputMode              None
+    string: WASP allele-specific output type. This is re-implementation of the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061–1063 (2015), https://www.nature.com/articles/nmeth.3582 .
+                            SAMtag      ... add WASP tags to the alignments that pass WASP filtering
+
+### STARsolo (single cell RNA-seq) parameters
+soloType                    None
+    string(s): type of single-cell RNA-seq
+                            CB_UMI_Simple   ... (a.k.a. Droplet) one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X Chromium.
+                            CB_UMI_Complex  ... multiple Cell Barcodes of varying length, one UMI of fixed length and one adapter sequence of fixed length are allowed in read2 only (e.g. inDrop, ddSeq).
+                            CB_samTagOut    ... output Cell Barcode as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or SortedByCoordinate]
+                            SmartSeq        ... Smart-seq: each cell in a separate FASTQ (paired- or single-end), barcodes are corresponding read-groups, no UMI sequences, alignments deduplicated according to alignment start and end (after extending soft-clipped bases)
+
+soloCBtype                  Sequence
+    string: cell barcode type
+                            Sequence: cell barcode is a sequence (standard option)
+                            String: cell barcode is an arbitrary string
+
+soloCBwhitelist             -
+    string(s): file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex allows more than one whitelist file.
+                            None            ... no whitelist: all cell barcodes are allowed
+
+soloCBstart                 1
+    int>0: cell barcode start base
+
+soloCBlen                   16
+    int>0: cell barcode length
+
+soloUMIstart                17
+    int>0: UMI start base
+
+soloUMIlen                  10
+    int>0: UMI length
+
+soloBarcodeReadLength       1
+    int: length of the barcode read
+                            1   ... equal to sum of soloCBlen+soloUMIlen
+                            0   ... not defined, do not check
+
+soloBarcodeMate             0
+    int: identifies which read mate contains the barcode (CB+UMI) sequence
+                            0   ... barcode sequence is on separate read, which should always be the last file in the --readFilesIn listed
+                            1   ... barcode sequence is a part of mate 1
+                            2   ... barcode sequence is a part of mate 2
+
+soloCBposition              -
+    strings(s):             position of Cell Barcode(s) on the barcode read.
+                            Presently only works with --soloType CB_UMI_Complex, and barcodes are assumed to be on Read2.
+                            Format for each barcode: startAnchor_startPosition_endAnchor_endPosition
+                            start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter start; 3: adapter end
+                            start(end)Position is the 0-based position with of the CB start(end) with respect to the Anchor Base
+                            String for different barcodes are separated by space.
+                            Example: inDrop (Zilionis et al, Nat. Protocols, 2017):
+                            --soloCBposition  0_0_2_-1  3_1_3_8
+
+soloUMIposition             -
+    string:                  position of the UMI on the barcode read, same as soloCBposition
+                            Example: inDrop (Zilionis et al, Nat. Protocols, 2017):
+                            --soloCBposition  3_9_3_14
+
+soloAdapterSequence         -
+    string:                 adapter sequence to anchor barcodes. Only one adapter sequence is allowed.
+
+soloAdapterMismatchesNmax   1
+    int>0:                  maximum number of mismatches allowed in adapter sequence.
+
+soloCBmatchWLtype           1MM_multi
+    string:                 matching the Cell Barcodes to the WhiteList
+                            Exact                           ... only exact matches allowed
+                            1MM                             ... only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match.
+                            1MM_multi                       ... multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches.
+                                                                Allowed CBs have to have at least one read with exact match. This option matches best with CellRanger 2.2.0
+                            1MM_multi_pseudocounts          ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.
+                            1MM_multi_Nbase_pseudocounts    ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for CBs with N-bases. This option matches best with CellRanger >= 3.0.0
+                            EditDist_2                    ... allow up to edit distance of 3 fpr each of the barcodes. May include one deletion + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline. 
+
+soloInputSAMattrBarcodeSeq  -
+    string(s):              when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order).
+                            For instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq CR UR .
+                            This parameter is required when running STARsolo with input from SAM. 
+    
+soloInputSAMattrBarcodeQual  -
+    string(s):              when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order).
+                            For instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual CY UY .
+                            If this parameter is '-' (default), the quality 'H' will be assigned to all bases.
+
+soloStrand                  Forward
+    string: strandedness of the solo libraries:
+                            Unstranded  ... no strand information
+                            Forward     ... read strand same as the original RNA molecule
+                            Reverse     ... read strand opposite to the original RNA molecule
+
+soloFeatures                Gene
+    string(s): genomic features for which the UMI counts per Cell Barcode are collected
+                            Gene            ... genes: reads match the gene transcript
+                            SJ              ... splice junctions: reported in SJ.out.tab
+                            GeneFull        ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns
+                            GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns: prioritize 100% overlap with exons
+                            GeneFull_Ex50pAS        ... full gene (pre-RNA): count all reads overlapping genes' exons and introns: prioritize >50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction.
+
+#####UnderDevelopment_begin : not supported - do not use
+                            Transcript3p    ... quantification of transcript for 3' protocols
+#####UnderDevelopment_end
+
+soloMultiMappers            Unique
+    string(s): counting method for reads mapping to multiple genes           
+                            Unique     ... count only reads that map to unique genes
+                            Uniform    ... uniformly distribute multi-genic UMIs to all genes
+                            Rescue     ... distribute UMIs proportionally to unique+uniform counts (~ first iteration of EM)
+                            PropUnique ... distribute UMIs proportionally to unique mappers, if present, and uniformly if not.
+                            EM         ... multi-gene UMIs are distributed using Expectation Maximization algorithm
+
+soloUMIdedup                1MM_All
+    string(s):              type of UMI deduplication (collapsing) algorithm
+                            1MM_All                     ... all UMIs with 1 mismatch distance to each other are collapsed (i.e. counted once).
+                            1MM_Directional_UMItools    ... follows the "directional" method from the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017).
+                            1MM_Directional             ... same as 1MM_Directional_UMItools, but with more stringent criteria for duplicate UMIs
+                            Exact                       ... only exactly matching UMIs are collapsed.
+                            NoDedup                     ... no deduplication of UMIs, count all reads.
+                            1MM_CR                      ... CellRanger2-4 algorithm for 1MM UMI collapsing.
+
+soloUMIfiltering            -
+    string(s):              type of UMI filtering (for reads uniquely mapping to genes)
+                            -                  ... basic filtering: remove UMIs with N and homopolymers (similar to CellRanger 2.2.0).
+                            MultiGeneUMI       ... basic + remove lower-count UMIs that map to more than one gene.
+                            MultiGeneUMI_All   ... basic + remove all UMIs that map to more than one gene.
+                            MultiGeneUMI_CR    ... basic + remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0 .
+                                                   Only works with --soloUMIdedup 1MM_CR
+                                                
+soloOutFileNames            Solo.out/          features.tsv barcodes.tsv        matrix.mtx
+    string(s):              file names for STARsolo output:
+                            file_name_prefix   gene_names   barcode_sequences   cell_feature_count_matrix
+
+soloCellFilter              CellRanger2.2 3000 0.99 10
+    string(s):              cell filtering type and parameters
+                            None            ... do not output filtered cells
+                            TopCells        ... only report top cells by UMI count, followed by the exact number of cells
+                            CellRanger2.2   ... simple filtering of CellRanger 2.2. 
+                                                Can be followed by numbers: number of expected cells, robust maximum percentile for UMI count, maximum to minimum ratio for UMI count
+                                                The harcoded values are from CellRanger: nExpectedCells=3000;  maxPercentile=0.99;  maxMinRatio=10
+                            EmptyDrops_CR   ... EmptyDrops filtering in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y
+                                                Can be followed by 10 numeric parameters:  nExpectedCells   maxPercentile   maxMinRatio   indMin   indMax   umiMin   umiMinFracMedian   candMaxN   FDR   simN 
+                                                The harcoded values are from CellRanger:             3000            0.99            10    45000    90000      500               0.01      20000  0.01  10000
+
+soloOutFormatFeaturesGeneField3    "Gene Expression"
+    string(s):                field 3 in the Gene features.tsv file. If "-", then no 3rd field is output.
+
+soloCellReadStats           None
+    string:                 Output reads statistics for each CB
+                            Standard    ... standard output
+
+#####UnderDevelopment_begin : not supported - do not use
+soloClusterCBfile           -
+    string:                 file containing the cluster information for cell barcodes, two columns: CB cluster_index. Only used with --soloFeatures Transcript3p
+#####UnderDevelopment_end
diff --git a/src/star/star_genome_generate/script.sh b/src/star/star_genome_generate/script.sh
new file mode 100644
index 00000000..cb3b906c
--- /dev/null
+++ b/src/star/star_genome_generate/script.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+set -e
+
+## VIASH START
+## VIASH END
+
+mkdir -p $par_index
+
+STAR \
+    --runMode genomeGenerate \
+    --genomeDir $par_index \
+    --genomeFastaFiles $par_genomeFastaFiles \
+    ${meta_cpus:+--runThreadN "${meta_cpus}"} \
+    ${par_sjdbGTFfile:+--sjdbGTFfile "${par_sjdbGTFfile}"} \
+    ${par_sjdbOverhang:+--sjdbOverhang "${par_sjdbOverhang}"} \
+    ${par_genomeSAindexNbases:+--genomeSAindexNbases "${par_genomeSAindexNbases}"} \
+    ${par_sjdbGTFchrPrefix:+--sjdbGTFchrPrefix "${par_sjdbGTFchrPrefix}"} \
+    ${par_sjdbGTFfeatureExon:+--sjdbGTFfeatureExon "${par_sjdbGTFfeatureExon}"} \
+    ${par_sjdbGTFtagExonParentTranscript:+--sjdbGTFtagExonParentTranscript "${par_sjdbGTFtagExonParentTranscript}"} \
+    ${par_sjdbGTFtagExonParentGene:+--sjdbGTFtagExonParentGene "${par_sjdbGTFtagExonParentGene}"} \
+    ${par_sjdbGTFtagExonParentGeneName:+--sjdbGTFtagExonParentGeneName "${par_sjdbGTFtagExonParentGeneName}"} \
+    ${par_sjdbGTFtagExonParentGeneType:+--sjdbGTFtagExonParentGeneType "${sjdbGTFtagExonParentGeneType}"} \
+    ${par_limitGenomeGenerateRAM:+--limitGenomeGenerateRAM "${par_limitGenomeGenerateRAM}"} \
+    ${par_genomeChrBinNbits:+--genomeChrBinNbits "${par_genomeChrBinNbits}"} \
+    ${par_genomeSAsparseD:+--genomeSAsparseD "${par_genomeSAsparseD}"} \
+    ${par_genomeSuffixLengthMax:+--genomeSuffixLengthMax "${par_genomeSuffixLengthMax}"} \
+    ${par_genomeTransformType:+--genomeTransformType "${par_genomeTransformType}"} \
+    ${par_genomeTransformVCF:+--genomeTransformVCF "${par_genomeTransformVCF}"} \
diff --git a/src/star/star_genome_generate/test.sh b/src/star/star_genome_generate/test.sh
new file mode 100644
index 00000000..fd0e4775
--- /dev/null
+++ b/src/star/star_genome_generate/test.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+set -e
+
+## VIASH START
+## VIASH END
+
+#########################################################################################
+
+echo "> Prepare test data"
+
+cat > genome.fasta <<'EOF'
+>chr1
+TGGCATGAGCCAACGAACGCTGCCTCATAAGCCTCACACATCCGCGCCTATGTTGTGACTCTCTGTGAGCGTTCGTGGG
+GCTCGTCACCACTATGGTTGGCCGGTTAGTAGTGTGACTCCTGGTTTTCTGGAGCTTCTTTAAACCGTAGTCCAGTCAA
+TGCGAATGGCACTTCACGACGGACTGTCCTTAGCTCAGGGGA
+EOF
+
+cat > genes.gtf <<'EOF'
+chr1    example_source  gene    0    50   .   +   .   gene_id "gene1"; transcript_id "transcript1";
+chr1    example_source  exon    20   40   .   +   .   gene_id "gene1"; transcript_id "transcript1"; 
+EOF
+
+#########################################################################################
+
+echo "> Generate index"
+"$meta_executable" \
+  ${meta_cpus:+---cpus $meta_cpus} \
+  --index "star_index/" \
+  --genomeFastaFiles "genome.fasta" \
+  --sjdbGTFfile "genes.gtf" \
+  --genomeSAindexNbases 2
+
+files=("Genome" "Log.out" "SA" "SAindex" "chrLength.txt" "chrName.txt" "chrNameLength.txt" "chrStart.txt" "exonGeTrInfo.tab" "exonInfo.tab" "geneInfo.tab" "genomeParameters.txt" "sjdbInfo.txt" "sjdbList.fromGTF.out.tab" "sjdbList.out.tab" "transcriptInfo.tab")
+
+echo ">> Check if output exists"
+[ ! -d "star_index" ] && echo "Directory 'star_index' does not exist!" && exit 1
+for file in "${files[@]}"; do
+    [ ! -f "star_index/$file" ] && echo "File '$file' does not exist in 'star_index'." && exit 1
+done
+
+echo ">> Check contents of output files"
+grep -q "200" "star_index/chrLength.txt" || (echo "Chromosome length in file 'chrLength.txt' is incorrect! " && exit 1)
+grep -q "chr1" "star_index/chrName.txt" || (echo "Chromosome name in file 'chrName.txt' is incorrect! " && exit 1)
+grep -q "chr1	200" "star_index/chrNameLength.txt" || (echo "Chromosome name in file 'chrNameLength.txt' is incorrect! " && exit 1)
+
+echo ">>> Test finished successfully"
+exit 0