From 7d99065ecf66e6bc42b03f8ffcfcfc95ef2d2b72 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Wed, 17 Jul 2024 17:46:44 +0200
Subject: [PATCH 1/7] `bd_rhapsody_make_reference`: Create a reference for the
 BD Rhapsody pipeline (#75)

* `bd_rhapsody/bd_rhapsody_make_reference`: Create a reference for the BD Rhapsody pipeline

* add missing metadata

* remove unicode

* trigger

* process comments

* add authors

* Apply suggestions from code review

Co-authored-by: Dorien <41797896+dorien-er@users.noreply.github.com>

---------

Co-authored-by: Dorien <41797896+dorien-er@users.noreply.github.com>
---
 CHANGELOG.md                                  |   6 +
 src/_authors/robrecht_cannoodt.yaml           |  14 ++
 src/_authors/weiwei_schultz.yaml              |   5 +
 .../config.vsh.yaml                           | 143 ++++++++++++++++
 .../bd_rhapsody_make_reference/help.txt       |  66 +++++++
 .../make_rhap_reference_2.2.1_nodocker.cwl    | 115 +++++++++++++
 .../bd_rhapsody_make_reference/script.py      | 161 ++++++++++++++++++
 .../bd_rhapsody_make_reference/test.sh        |  68 ++++++++
 .../test_data/reference_small.fa              |  27 +++
 .../test_data/reference_small.gtf             |   8 +
 .../test_data/script.sh                       |  47 +++++
 11 files changed, 660 insertions(+)
 create mode 100644 src/_authors/robrecht_cannoodt.yaml
 create mode 100644 src/_authors/weiwei_schultz.yaml
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/help.txt
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/script.py
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/test.sh
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.fa
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.gtf
 create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/test_data/script.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 80b8b9f3..9cfacdbc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # biobox x.x.x
 
+## NEW FEATURES
+
+* `bd_rhapsody`:
+
+  - `bd_rhapsody/bd_rhapsody_make_reference`: Create a reference for the BD Rhapsody pipeline (PR #75).
+
 ## BUG FIXES
 
 * `pear`: fix component not exiting with the correct exitcode when PEAR fails.
diff --git a/src/_authors/robrecht_cannoodt.yaml b/src/_authors/robrecht_cannoodt.yaml
new file mode 100644
index 00000000..d7c0f283
--- /dev/null
+++ b/src/_authors/robrecht_cannoodt.yaml
@@ -0,0 +1,14 @@
+name: Robrecht Cannoodt
+info:
+  links:
+    email: robrecht@data-intuitive.com
+    github: rcannood
+    orcid: "0000-0003-3641-729X"
+    linkedin: robrechtcannoodt
+  organizations:
+    - name: Data Intuitive
+      href: https://www.data-intuitive.com
+      role: Data Science Engineer
+    - name: Open Problems
+      href: https://openproblems.bio
+      role: Core Member
\ No newline at end of file
diff --git a/src/_authors/weiwei_schultz.yaml b/src/_authors/weiwei_schultz.yaml
new file mode 100644
index 00000000..324f9378
--- /dev/null
+++ b/src/_authors/weiwei_schultz.yaml
@@ -0,0 +1,5 @@
+name: Weiwei Schultz
+info:
+  organizations:
+    - name: Janssen R&D US
+      role: Associate Director Data Sciences
\ No newline at end of file
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml b/src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml
new file mode 100644
index 00000000..e596bf06
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml
@@ -0,0 +1,143 @@
+name: bd_rhapsody_make_reference
+namespace: bd_rhapsody
+description: |
+  The Reference Files Generator creates an archive containing Genome Index
+  and Transcriptome annotation files needed for the BD Rhapsody Sequencing
+  Analysis Pipeline. The app takes as input one or more FASTA and GTF files
+  and produces a compressed archive in the form of a tar.gz file. The 
+  archive contains:
+  
+  - STAR index
+  - Filtered GTF file
+keywords: [genome, reference, index, align]
+links:
+  repository: https://bitbucket.org/CRSwDev/cwl/src/master/v2.2.1/Extra_Utilities/
+  documentation: https://bd-rhapsody-bioinfo-docs.genomics.bd.com/resources/extra_utilities.html#make-rhapsody-reference
+license: Unknown
+authors:
+  - __merge__: /src/_authors/robrecht_cannoodt.yaml
+    roles: [ author, maintainer ]
+  - __merge__: /src/_authors/weiwei_schultz.yaml
+    roles: [ contributor ]
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - type: file
+        name: --genome_fasta
+        required: true
+        description: Reference genome file in FASTA or FASTA.GZ format. The BD Rhapsody Sequencing Analysis Pipeline uses GRCh38 for Human and GRCm39 for Mouse.
+        example: genome_sequence.fa.gz
+        multiple: true
+        info:
+          config_key: Genome_fasta
+      - type: file
+        name: --gtf
+        required: true
+        description: |
+          File path to the transcript annotation files in GTF or GTF.GZ format. The Sequence Analysis Pipeline requires the 'gene_name' or 
+          'gene_id' attribute to be set on each gene and exon feature. Gene and exon feature lines must have the same attribute, and exons
+          must have a corresponding gene with the same value. For TCR/BCR assays, the TCR or BCR gene segments must have the 'gene_type' or
+          'gene_biotype' attribute set, and the value should begin with 'TR' or 'IG', respectively.
+        example: transcriptome_annotation.gtf.gz
+        multiple: true
+        info:
+          config_key: Gtf
+      - type: file
+        name: --extra_sequences
+        description: |
+          File path to additional sequences in FASTA format to use when building the STAR index. (e.g. transgenes or CRISPR guide barcodes).
+          GTF lines for these sequences will be automatically generated and combined with the main GTF.
+        required: false
+        multiple: true
+        info:
+          config_key: Extra_sequences
+  - name: Outputs
+    arguments:
+      - type: file
+        name: --reference_archive
+        direction: output
+        required: true
+        description: |
+          A Compressed archive containing the Reference Genome Index and annotation GTF files. This archive is meant to be used as an
+          input in the BD Rhapsody Sequencing Analysis Pipeline.
+        example: star_index.tar.gz
+  - name: Arguments
+    arguments:
+      - type: string
+        name: --mitochondrial_contigs
+        description: |
+          Names of the Mitochondrial contigs in the provided Reference Genome. Fragments originating from contigs other than these are
+          identified as 'nuclear fragments' in the ATACseq analysis pipeline.
+        required: false
+        multiple: true
+        default: [chrM, chrMT, M, MT]
+        info:
+          config_key: Mitochondrial_contigs
+      - type: boolean_true
+        name: --filtering_off
+        description: |
+          By default the input Transcript Annotation files are filtered based on the gene_type/gene_biotype attribute. Only features 
+          having the following attribute values are kept:
+
+            - protein_coding
+            - lncRNA (lincRNA and antisense for Gencode < v31/M22/Ensembl97)
+            - IG_LV_gene
+            - IG_V_gene
+            - IG_V_pseudogene
+            - IG_D_gene
+            - IG_J_gene
+            - IG_J_pseudogene
+            - IG_C_gene
+            - IG_C_pseudogene
+            - TR_V_gene
+            - TR_V_pseudogene
+            - TR_D_gene
+            - TR_J_gene
+            - TR_J_pseudogene
+            - TR_C_gene
+
+            If you have already pre-filtered the input Annotation files and/or wish to turn-off the filtering, please set this option to True.
+        info:
+          config_key: Filtering_off
+      - type: boolean_true
+        name: --wta_only_index
+        description: Build a WTA only index, otherwise builds a WTA + ATAC index.
+        info:
+          config_key: Wta_Only
+      - type: string
+        name: --extra_star_params
+        description: Additional parameters to pass to STAR when building the genome index. Specify exactly like how you would on the command line.
+        example: --limitGenomeGenerateRAM 48000 --genomeSAindexNbases 11
+        required: false
+        info:
+          config_key: Extra_STAR_params
+
+resources:
+  - type: python_script
+    path: script.py
+  - path: make_rhap_reference_2.2.1_nodocker.cwl
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - path: test_data
+
+requirements:
+  commands: [ "cwl-runner" ]
+
+engines:
+  - type: docker
+    image: bdgenomics/rhapsody:2.2.1
+    setup:
+      - type: apt
+        packages: [procps]
+      - type: python
+        packages: [cwlref-runner, cwl-runner]
+      - type: docker
+        run: |
+          echo "bdgenomics/rhapsody: 2.2.1" > /var/software_versions.txt
+
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/help.txt b/src/bd_rhapsody/bd_rhapsody_make_reference/help.txt
new file mode 100644
index 00000000..cd038b25
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/help.txt
@@ -0,0 +1,66 @@
+```bash
+cwl-runner src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl --help
+```
+
+usage: src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl
+       [-h] [--Archive_prefix ARCHIVE_PREFIX]
+       [--Extra_STAR_params EXTRA_STAR_PARAMS]
+       [--Extra_sequences EXTRA_SEQUENCES] [--Filtering_off] --Genome_fasta
+       GENOME_FASTA --Gtf GTF [--Maximum_threads MAXIMUM_THREADS]
+       [--Mitochondrial_Contigs MITOCHONDRIAL_CONTIGS] [--WTA_Only]
+       [job_order]
+
+The Reference Files Generator creates an archive containing Genome Index and
+Transcriptome annotation files needed for the BD Rhapsodyâ„¢ Sequencing
+Analysis Pipeline. The app takes as input one or more FASTA and GTF files and
+produces a compressed archive in the form of a tar.gz file. The archive
+contains:\n - STAR index\n - Filtered GTF file
+
+positional arguments:
+  job_order             Job input json file
+
+options:
+  -h, --help            show this help message and exit
+  --Archive_prefix ARCHIVE_PREFIX
+                        A prefix for naming the compressed archive file
+                        containing the Reference genome index and annotation
+                        files. The default value is constructed based on the
+                        input Reference files.
+  --Extra_STAR_params EXTRA_STAR_PARAMS
+                        Additional parameters to pass to STAR when building
+                        the genome index. Specify exactly like how you would
+                        on the command line. Example: --limitGenomeGenerateRAM
+                        48000 --genomeSAindexNbases 11
+  --Extra_sequences EXTRA_SEQUENCES
+                        Additional sequences in FASTA format to use when
+                        building the STAR index. (E.g. phiX genome)
+  --Filtering_off       By default the input Transcript Annotation files are
+                        filtered based on the gene_type/gene_biotype
+                        attribute. Only features having the following
+                        attribute values are are kept: - protein_coding -
+                        lncRNA (lincRNA and antisense for Gencode <
+                        v31/M22/Ensembl97) - IG_LV_gene - IG_V_gene -
+                        IG_V_pseudogene - IG_D_gene - IG_J_gene -
+                        IG_J_pseudogene - IG_C_gene - IG_C_pseudogene -
+                        TR_V_gene - TR_V_pseudogene - TR_D_gene - TR_J_gene -
+                        TR_J_pseudogene - TR_C_gene If you have already pre-
+                        filtered the input Annotation files and/or wish to
+                        turn-off the filtering, please set this option to
+                        True.
+  --Genome_fasta GENOME_FASTA
+                        Reference genome file in FASTA format. The BD
+                        Rhapsodyâ„¢ Sequencing Analysis Pipeline uses GRCh38
+                        for Human and GRCm39 for Mouse.
+  --Gtf GTF             Transcript annotation files in GTF format. The BD
+                        Rhapsodyâ„¢ Sequencing Analysis Pipeline uses Gencode
+                        v42 for Human and M31 for Mouse.
+  --Maximum_threads MAXIMUM_THREADS
+                        The maximum number of threads to use in the pipeline.
+                        By default, all available cores are used.
+  --Mitochondrial_Contigs MITOCHONDRIAL_CONTIGS
+                        Names of the Mitochondrial contigs in the provided
+                        Reference Genome. Fragments originating from contigs
+                        other than these are identified as 'nuclear fragments'
+                        in the ATACseq analysis pipeline.
+  --WTA_Only            Build a WTA only index, otherwise builds a WTA + ATAC
+                        index.
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl b/src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl
new file mode 100644
index 00000000..fead2c02
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl
@@ -0,0 +1,115 @@
+requirements:
+  InlineJavascriptRequirement: {}
+class: CommandLineTool
+label: Reference Files Generator for BD Rhapsodyâ„¢ Sequencing Analysis Pipeline
+cwlVersion: v1.2
+doc: >- 
+    The Reference Files Generator creates an archive containing Genome Index and Transcriptome annotation files needed for the BD Rhapsodyâ„¢ Sequencing Analysis Pipeline. The app takes as input one or more FASTA and GTF files and produces a compressed archive in the form of a tar.gz file. The archive contains:\n  - STAR index\n  - Filtered GTF file
+
+
+baseCommand: run_reference_generator.sh 
+inputs: 
+    Genome_fasta:
+        type: File[]
+        label: Reference Genome
+        doc: |-
+            Reference genome file in FASTA format. The BD Rhapsodyâ„¢ Sequencing Analysis Pipeline uses GRCh38 for Human and GRCm39 for Mouse.
+        inputBinding:
+            prefix: --reference-genome
+            shellQuote: false
+    Gtf:
+        type: File[]
+        label: Transcript Annotations
+        doc: |-
+            Transcript annotation files in GTF format. The BD Rhapsodyâ„¢ Sequencing Analysis Pipeline uses Gencode v42 for Human and M31 for Mouse.
+        inputBinding:
+            prefix: --gtf
+            shellQuote: false
+    Extra_sequences:
+        type: File[]?
+        label: Extra Sequences
+        doc: |-
+            Additional sequences in FASTA format to use when building the STAR index. (E.g. phiX genome)
+        inputBinding:
+            prefix: --extra-sequences
+            shellQuote: false
+    Mitochondrial_Contigs:
+        type: string[]?
+        default: ["chrM", "chrMT", "M", "MT"]
+        label: Mitochondrial Contig Names
+        doc: |-
+            Names of the Mitochondrial contigs in the provided Reference Genome. Fragments originating from contigs other than these are identified as 'nuclear fragments' in the ATACseq analysis pipeline.
+        inputBinding:
+            prefix: --mitochondrial-contigs
+            shellQuote: false
+    Filtering_off:
+        type: boolean?
+        label: Turn off filtering
+        doc: |-
+            By default the input Transcript Annotation files are filtered based on the gene_type/gene_biotype attribute. Only features having the following attribute values are are kept:
+            - protein_coding
+            - lncRNA (lincRNA and antisense for Gencode < v31/M22/Ensembl97)
+            - IG_LV_gene
+            - IG_V_gene
+            - IG_V_pseudogene
+            - IG_D_gene
+            - IG_J_gene
+            - IG_J_pseudogene
+            - IG_C_gene
+            - IG_C_pseudogene
+            - TR_V_gene
+            - TR_V_pseudogene
+            - TR_D_gene
+            - TR_J_gene
+            - TR_J_pseudogene
+            - TR_C_gene
+            If you have already pre-filtered the input Annotation files and/or wish to turn-off the filtering, please set this option to True.
+        inputBinding: 
+            prefix: --filtering-off
+            shellQuote: false
+    WTA_Only:
+        type: boolean?
+        label: WTA only index
+        doc: Build a WTA only index, otherwise builds a WTA + ATAC index.
+        inputBinding:
+            prefix: --wta-only-index
+            shellQuote: false
+    Archive_prefix:
+        type: string?
+        label: Archive Prefix
+        doc: |-
+            A prefix for naming the compressed archive file containing the Reference genome index and annotation files. The default value is constructed based on the input Reference files.
+        inputBinding:
+            prefix: --archive-prefix
+            shellQuote: false
+    Extra_STAR_params:
+        type: string?
+        label: Extra STAR Params
+        doc: |-
+            Additional parameters to pass to STAR when building the genome index. Specify exactly like how you would on the command line.
+            Example:
+              --limitGenomeGenerateRAM 48000 --genomeSAindexNbases 11
+        inputBinding:
+            prefix: --extra-star-params 
+            shellQuote: true
+  
+    Maximum_threads:
+        type: int?
+        label: Maximum Number of Threads
+        doc: |-
+            The maximum number of threads to use in the pipeline. By default, all available cores are used.
+        inputBinding:
+            prefix: --maximum-threads
+            shellQuote: false
+
+outputs:
+
+    Archive:
+        type: File
+        doc: |- 
+            A Compressed archive containing the Reference Genome Index and annotation GTF files. This archive is meant to be used as an input in the BD Rhapsodyâ„¢ Sequencing Analysis Pipeline.
+        id: Reference_Archive
+        label: Reference Files Archive
+        outputBinding:
+            glob: '*.tar.gz'
+
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/script.py b/src/bd_rhapsody/bd_rhapsody_make_reference/script.py
new file mode 100644
index 00000000..ca635508
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/script.py
@@ -0,0 +1,161 @@
+import os
+import re
+import subprocess
+import tempfile
+from typing import Any
+import yaml
+import shutil
+
+## VIASH START
+par = {
+    "genome_fasta": [],
+    "gtf": [],
+    "extra_sequences": [],
+    "mitochondrial_contigs": ["chrM", "chrMT", "M", "MT"],
+    "filtering_off": False,
+    "wta_only_index": False,
+    "extra_star_params": None,
+    "reference_archive": "output.tar.gz",
+}
+meta = {
+    "config": "target/nextflow/reference/build_bdrhap_2_reference/.config.vsh.yaml",
+    "resources_dir": os.path.abspath("src/reference/build_bdrhap_2_reference"),
+    "temp_dir": os.getenv("VIASH_TEMP"),
+    "memory_mb": None,
+    "cpus": None
+}
+## VIASH END
+
+def clean_arg(argument):
+    argument["clean_name"] = re.sub("^-*", "", argument["name"])
+    return argument
+
+def read_config(path: str) -> dict[str, Any]:
+    with open(path, "r") as f:
+        config = yaml.safe_load(f)
+    
+    config["all_arguments"] = [
+        clean_arg(arg)
+        for grp in config["argument_groups"]
+        for arg in grp["arguments"]
+    ]
+    
+    return config
+
+def strip_margin(text: str) -> str:
+    return re.sub("(\n?)[ \t]*\|", "\\1", text)
+
+def process_params(par: dict[str, Any], config) -> str:
+    # check input parameters
+    assert par["genome_fasta"], "Pass at least one set of inputs to --genome_fasta."
+    assert par["gtf"], "Pass at least one set of inputs to --gtf."
+    assert par["reference_archive"].endswith(".tar.gz"), "Output reference_archive must end with .tar.gz."
+
+    # make paths absolute
+    for argument in config["all_arguments"]:
+        if par[argument["clean_name"]] and argument["type"] == "file":
+            if isinstance(par[argument["clean_name"]], list):
+                par[argument["clean_name"]] = [ os.path.abspath(f) for f in par[argument["clean_name"]] ]
+            else:
+                par[argument["clean_name"]] = os.path.abspath(par[argument["clean_name"]])
+    
+    return par
+
+def generate_config(par: dict[str, Any], meta, config) -> str:
+    content_list = [strip_margin(f"""\
+        |#!/usr/bin/env cwl-runner
+        |
+        |""")]
+        
+    
+    config_key_value_pairs = []
+    for argument in config["all_arguments"]:
+        config_key = (argument.get("info") or {}).get("config_key")
+        arg_type = argument["type"]
+        par_value = par[argument["clean_name"]]
+        if par_value and config_key:
+            config_key_value_pairs.append((config_key, arg_type, par_value))
+
+    if meta["cpus"]:
+        config_key_value_pairs.append(("Maximum_threads", "integer", meta["cpus"]))
+
+    # print(config_key_value_pairs)
+
+    for config_key, arg_type, par_value in config_key_value_pairs:
+        if arg_type == "file":
+            str = strip_margin(f"""\
+                |{config_key}:
+                |""")
+            if isinstance(par_value, list):
+                for file in par_value:
+                    str += strip_margin(f"""\
+                        | - class: File
+                        |   location: "{file}"
+                        |""")
+            else:
+                str += strip_margin(f"""\
+                    |   class: File
+                    |   location: "{par_value}"
+                    |""")
+            content_list.append(str)
+        else:
+            content_list.append(strip_margin(f"""\
+                |{config_key}: {par_value}
+                |"""))
+            
+    ## Write config to file
+    return "".join(content_list)
+
+def get_cwl_file(meta: dict[str, Any]) -> str:
+    # create cwl file (if need be)
+    cwl_file=os.path.join(meta["resources_dir"], "make_rhap_reference_2.2.1_nodocker.cwl")
+
+    return cwl_file
+
+def main(par: dict[str, Any], meta: dict[str, Any]):
+    config = read_config(meta["config"])
+        
+    # Preprocess params
+    par = process_params(par, config)
+
+    # fetch cwl file
+    cwl_file = get_cwl_file(meta)
+
+    # Create output dir if not exists
+    outdir = os.path.dirname(par["reference_archive"])
+    if not os.path.exists(outdir):
+        os.makedirs(outdir)
+
+    ## Run pipeline
+    with tempfile.TemporaryDirectory(prefix="cwl-bd_rhapsody_wta-", dir=meta["temp_dir"]) as temp_dir:
+        # Create params file
+        config_file = os.path.join(temp_dir, "config.yml")
+        config_content = generate_config(par, meta, config)
+        with open(config_file, "w") as f:
+            f.write(config_content)
+
+
+        cmd = [
+            "cwl-runner",
+            "--no-container",
+            "--preserve-entire-environment",
+            "--outdir",
+            temp_dir,
+            cwl_file,
+            config_file
+        ]
+
+        env = dict(os.environ)
+        env["TMPDIR"] = temp_dir
+
+        print("> " + " ".join(cmd), flush=True)
+        _ = subprocess.check_call(
+            cmd,
+            cwd=os.path.dirname(config_file),
+            env=env
+        )
+
+        shutil.move(os.path.join(temp_dir, "Rhap_reference.tar.gz"), par["reference_archive"])
+
+if __name__ == "__main__":
+    main(par, meta)
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh b/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh
new file mode 100644
index 00000000..3637160a
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+set -e
+
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
+}
+assert_file_doesnt_exist() {
+  [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; }
+}
+assert_file_empty() {
+  #  () will execute in a shubshell, could you use {;}?
+  [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; }
+}
+assert_file_not_empty() {
+  # [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1)
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
+}
+assert_file_contains() {
+  # grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1)
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+assert_file_not_contains() {
+  # grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1)
+  grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
+}
+
+in_fa="$meta_resources_dir/test_data/reference_small.fa"
+in_gtf="$meta_resources_dir/test_data/reference_small.gtf"
+
+echo "#############################################"
+echo "> Simple run"
+
+mkdir simple_run
+cd simple_run
+
+out_tar="myreference.tar.gz"
+
+echo "> Running $meta_name."
+$meta_executable \
+  --genome_fasta "$in_fa" \
+  --gtf "$in_gtf" \
+  --reference_archive "$out_tar" \
+  --extra_star_params "--genomeSAindexNbases 6" \
+  ---cpus 2
+
+exit_code=$?
+[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
+
+assert_file_exists "$out_tar"
+assert_file_not_empty "$out_tar"
+
+echo ">> Checking whether output contains the expected files"
+tar -xvf "$out_tar" > /dev/null
+assert_file_exists "BD_Rhapsody_Reference_Files/star_index/genomeParameters.txt"
+assert_file_exists "BD_Rhapsody_Reference_Files/bwa-mem2_index/reference_small.ann"
+assert_file_exists "BD_Rhapsody_Reference_Files/reference_small-processed.gtf"
+assert_file_exists "BD_Rhapsody_Reference_Files/mitochondrial_contigs.txt"
+assert_file_contains "BD_Rhapsody_Reference_Files/reference_small-processed.gtf" "chr1.*HAVANA.*ENSG00000243485"
+assert_file_contains "BD_Rhapsody_Reference_Files/mitochondrial_contigs.txt" 'chrMT'
+
+cd ..
+
+echo "#############################################"
+
+echo "> Tests succeeded!"
\ No newline at end of file
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.fa b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.fa
new file mode 100644
index 00000000..386d887c
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.fa
@@ -0,0 +1,27 @@
+>chr1 1
+TGGGGAAGCAAGGCGGAGTTGGGCAGCTCGTGTTCAATGGGTAGAGTTTCAGGCTGGGGT
+GATGGAAGGGTGCTGGAAATGAGTGGTAGTGATGGCGGCACAACAGTGTGAATCTACTTA
+ATCCCACTGAACTGTATGCTGAAAAATGGTTTAGACGGTGAATTTTAGGTTATGTATGTT
+TTACCACAATTTTTAAAAAGCTAGTGAAAAGCTGGTAAAAAGAAAGAAAAGAGGCTTTTT
+TAAAAAGTTAAATATATAAAAAGAGCATCATCAGTCCAAAGTCCAGCAGTTGTCCCTCCT
+GGAATCCGTTGGCTTGCCTCCGGCATTTTTGGCCCTTGCCTTTTAGGGTTGCCAGATTAA
+AAGACAGGATGCCCAGCTAGTTTGAATTTTAGATAAACAACGAATAATTTCGTAGCATAA
+ATATGTCCCAAGCTTAGTTTGGGACATACTTATGCTAAAAAACATTATTGGTTGTTTATC
+TGAGATTCAGAATTAAGCATTTTATATTTTATTTGCTGCCTCTGGCCACCCTACTCTCTT
+CCTAACACTCTCTCCCTCTCCCAGTTTTGTCCGCCTTCCCTGCCTCCTCTTCTGGGGGAG
+TTAGATCGAGTTGTAACAAGAACATGCCACTGTCTCGCTGGCTGCAGCGTGTGGTCCCCT
+TACCAGAGGTAAAGAAGAGATGGATCTCCACTCATGTTGTAGACAGAATGTTTATGTCCT
+CTCCAAATGCTTATGTTGAAACCCTAACCCCTAATGTGATGGTATGTGGAGATGGGCCTT
+TGGTAGGTAATTACGGTTAGATGAGGTCATGGGGTGGGGCCCTCATTATAGATCTGGTAA
+GAAAAGAGAGCATTGTCTCTGTGTCTCCCTCTCTCTCTCTCTCTCTCTCTCTCATTTCTC
+TCTATCTCATTTCTCTCTCTCTCGCTATCTCATTTTTCTCTCTCTCTCTTTCTCTCCTCT
+GTCTTTTCCCACCAAGTGAGGATGCGAAGAGAAGGTGGCTGTCTGCAAACCAGGAAGAGA
+GCCCTCACCGGGAACCCGTCCAGCTGCCACCTTGAACTTGGACTTCCAAGCCTCCAGAAC
+TGTGAGGGATAAATGTATGATTTTAAAGTCGCCCAGTGTGTGGTATTTTGTTTTGACTAA
+TACAACCTGAAAACATTTTCCCCTCACTCCACCTGAGCAATATCTGAGTGGCTTAAGGTA
+CTCAGGACACAACAAAGGAGAAATGTCCCATGCACAAGGTGCACCCATGCCTGGGTAAAG
+CAGCCTGGCACAGAGGGAAGCACACAGGCTCAGGGATCTGCTATTCATTCTTTGTGTGAC
+CCTGGGCAAGCCATGAATGGAGCTTCAGTCACCCCATTTGTAATGGGATTTAATTGTGCT
+TGCCCTGCCTCCTTTTGAGGGCTGTAGAGAAAAGATGTCAAAGTATTTTGTAATCTGGCT
+GGGCGTGGTGGCTCATGCCTGTAATCCTAGCACTTTGGTAGGCTGACGCGAGAGGACTGC
+T
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.gtf b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.gtf
new file mode 100644
index 00000000..7ba83523
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.gtf
@@ -0,0 +1,8 @@
+chr1	HAVANA	exon	565	668	.	+	.	gene_id "ENSG00000243485.5"; transcript_id "ENST00000473358.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-202"; exon_number 2; exon_id "ENSE00001922571.1"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "dotter_confirmed"; tag "basic"; tag "Ensembl_canonical"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002840.1";
+chr1	HAVANA	exon	977	1098	.	+	.	gene_id "ENSG00000243485.5"; transcript_id "ENST00000473358.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-202"; exon_number 3; exon_id "ENSE00001827679.1"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "dotter_confirmed"; tag "basic"; tag "Ensembl_canonical"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002840.1";
+chr1	HAVANA	transcript	268	1110	.	+	.	gene_id "ENSG00000243485.5"; transcript_id "ENST00000469289.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-201"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "basic"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002841.2";
+chr1	HAVANA	exon	268	668	.	+	.	gene_id "ENSG00000243485.5"; transcript_id "ENST00000469289.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-201"; exon_number 1; exon_id "ENSE00001841699.1"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "basic"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002841.2";
+chr1	HAVANA	exon	977	1110	.	+	.	gene_id "ENSG00000243485.5"; transcript_id "ENST00000469289.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-201"; exon_number 2; exon_id "ENSE00001890064.1"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "basic"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002841.2";
+chr1	ENSEMBL	gene	367	504	.	+	.	gene_id "ENSG00000284332.1"; gene_type "miRNA"; gene_name "MIR1302-2"; level 3; hgnc_id "HGNC:35294";
+chr1	ENSEMBL	transcript	367	504	.	+	.	gene_id "ENSG00000284332.1"; transcript_id "ENST00000607096.1"; gene_type "miRNA"; gene_name "MIR1302-2"; transcript_type "miRNA"; transcript_name "MIR1302-2-201"; level 3; transcript_support_level "NA"; hgnc_id "HGNC:35294"; tag "basic"; tag "Ensembl_canonical";
+chr1	ENSEMBL	exon	367	504	.	+	.	gene_id "ENSG00000284332.1"; transcript_id "ENST00000607096.1"; gene_type "miRNA"; gene_name "MIR1302-2"; transcript_type "miRNA"; transcript_name "MIR1302-2-201"; exon_number 1; exon_id "ENSE00003695741.1"; level 3; transcript_support_level "NA"; hgnc_id "HGNC:35294"; tag "basic"; tag "Ensembl_canonical";
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/script.sh b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/script.sh
new file mode 100644
index 00000000..8d468064
--- /dev/null
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/script.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+TMP_DIR=/tmp/bd_rhapsody_make_reference
+OUT_DIR=src/bd_rhapsody/bd_rhapsody_make_reference/test_data
+
+# check if seqkit is installed
+if ! command -v seqkit &> /dev/null; then
+  echo "seqkit could not be found"
+  exit 1
+fi
+
+# create temporary directory and clean up on exit
+mkdir -p $TMP_DIR
+function clean_up {
+    rm -rf "$TMP_DIR"
+}
+trap clean_up EXIT
+
+# fetch reference
+ORIG_FA=$TMP_DIR/reference.fa.gz
+if [ ! -f $ORIG_FA ]; then
+  wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz \
+    -O $ORIG_FA
+fi
+
+ORIG_GTF=$TMP_DIR/reference.gtf.gz
+if [ ! -f $ORIG_GTF ]; then
+  wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz \
+    -O $ORIG_GTF
+fi
+
+# create small reference
+START=30000
+END=31500
+CHR=chr1
+
+# subset to small region
+seqkit grep -r -p "^$CHR\$" "$ORIG_FA" | \
+  seqkit subseq -r "$START:$END" > $OUT_DIR/reference_small.fa
+
+zcat "$ORIG_GTF" | \
+  awk -v FS='\t' -v OFS='\t' "
+    \$1 == \"$CHR\" && \$4 >= $START && \$5 <= $END {
+      \$4 = \$4 - $START + 1;
+      \$5 = \$5 - $START + 1;
+      print;
+    }" > $OUT_DIR/reference_small.gtf

From c2e340d92ea7f153d0c5c9de1cffbc6b88fc4124 Mon Sep 17 00:00:00 2001
From: emmarousseau <emmarou1@icloud.com>
Date: Wed, 17 Jul 2024 18:10:37 +0200
Subject: [PATCH 2/7] Remove multiple_sep (#78)

* initial commit dedup

* Revert "initial commit dedup"

This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2.

* get rid of multiple_sep fields in configs

* Fix coverage argument's format in config
---
 src/gffread/config.vsh.yaml                 |  5 +--
 src/gffread/script.sh                       |  2 ++
 src/gffread/test.sh                         |  2 +-
 src/samtools/samtools_stats/config.vsh.yaml | 40 ++++++++++-----------
 src/samtools/samtools_stats/script.sh       |  3 ++
 src/samtools/samtools_stats/test.sh         |  2 +-
 6 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/src/gffread/config.vsh.yaml b/src/gffread/config.vsh.yaml
index d2c41a87..7477a284 100644
--- a/src/gffread/config.vsh.yaml
+++ b/src/gffread/config.vsh.yaml
@@ -8,8 +8,6 @@ links:
 references: 
   doi: 10.12688/f1000research.23297.2
 license: MIT
-requirements:
-  commands: [ gffread ]
 argument_groups:
   - name: Inputs
     arguments:
@@ -52,7 +50,7 @@ argument_groups:
         required: true
         description: |
           Write the output records into <outfile>.
-        default: output.gff
+        example: output.gff
       - name: --force_exons
         type: boolean_true
         description: |
@@ -154,7 +152,6 @@ argument_groups:
       - name: --table
         type: string
         multiple: true
-        multiple_sep: ","
         description: |
           Output a simple tab delimited format instead of GFF, with columns having the values 
           of GFF attributes given in <attrlist>; special pseudo-attributes (prefixed by @) are 
diff --git a/src/gffread/script.sh b/src/gffread/script.sh
index 9c4a2b8f..cd4abf14 100644
--- a/src/gffread/script.sh
+++ b/src/gffread/script.sh
@@ -50,6 +50,8 @@
 [[ "$par_expose_dups" == "false" ]] && unset par_expose_dups
 [[ "$par_cluster_only" == "false" ]] && unset par_cluster_only
 
+# if par_table is not empty, replace ";" with ","
+par_table=$(echo "$par_table" | tr ';' ',')
 
 $(which gffread) \
     "$par_input" \
diff --git a/src/gffread/test.sh b/src/gffread/test.sh
index 326fce50..ea23edcb 100755
--- a/src/gffread/test.sh
+++ b/src/gffread/test.sh
@@ -86,7 +86,7 @@ diff "$expected_output_dir/transcripts.fa" "$test_output_dir/transcripts.fa" ||
 echo "> Test 4 - Generate table from GFF annotation file"
 
 "$meta_executable" \
-  --table @id,@chr,@start,@end,@strand,@exons,Name,gene,product \
+  --table "@id;@chr;@start;@end;@strand;@exons;Name;gene;product" \
   --outfile "$test_output_dir/annotation.tbl" \
   --input "$test_dir/sequence.gff3"
 
diff --git a/src/samtools/samtools_stats/config.vsh.yaml b/src/samtools/samtools_stats/config.vsh.yaml
index 0d8f57a4..ca630876 100644
--- a/src/samtools/samtools_stats/config.vsh.yaml
+++ b/src/samtools/samtools_stats/config.vsh.yaml
@@ -30,10 +30,10 @@ argument_groups:
     - name: --coverage
       alternatives: -c
       type: integer
-      description: |
-        Coverage distribution min,max,step [1,1000,1].
       multiple: true
-      multiple_sep: ','
+      description: |
+        Coverage distribution min;max;step. Default: [1, 1000, 1].
+      example: [1, 1000, 1]
     - name: --remove_dups
       alternatives: -d
       type: boolean_true
@@ -48,25 +48,25 @@ argument_groups:
       alternatives: -f
       type: string
       description: |
-        Required flag, 0 for unset. See also `samtools flags`.
-      default: "0"
+        Required flag, 0 for unset. See also `samtools flags`. Default: `"0"`.
+      example: "0"
     - name: --filtering_flag
       alternatives: -F
       type: string
       description: |
-        Filtering flag, 0 for unset. See also `samtools flags`.
-      default: "0"
+        Filtering flag, 0 for unset. See also `samtools flags`. Default: `0`.
+      example: "0"
     - name: --GC_depth
       type: double
       description: |
-        The size of GC-depth bins (decreasing bin size increases memory requirement).
-      default: 20000.0
+        The size of GC-depth bins (decreasing bin size increases memory requirement). Default: `20000`.
+      example: 20000.0
     - name: --insert_size
       alternatives: -i
       type: integer
       description: |
-        Maximum insert size.
-      default: 8000
+        Maximum insert size. Default: `8000`.
+      example: 8000
     - name: --id
       alternatives: -I
       type: string
@@ -76,14 +76,14 @@ argument_groups:
       alternatives: -l
       type: integer
       description: |
-        Include in the statistics only reads with the given read length.
-      default: -1
+        Include in the statistics only reads with the given read length. Default: `-1`.
+      example: -1
     - name: --most_inserts
       alternatives: -m
       type: double
       description: |
-        Report only the main part of inserts.
-      default: 0.99
+        Report only the main part of inserts. Default: `0.99`.
+      example: 0.99
     - name: --split_prefix
       alternatives: -P
       type: string
@@ -93,8 +93,8 @@ argument_groups:
       alternatives: -q
       type: integer
       description: |
-        The BWA trimming parameter.
-      default: 0
+        The BWA trimming parameter. Default: `0`.
+      example: 0
     - name: --ref_seq
       alternatives: -r
       type: file
@@ -124,8 +124,8 @@ argument_groups:
       alternatives: -g
       type: integer
       description: |
-        Only bases with coverage above this value will be included in the target percentage computation.
-      default: 0
+        Only bases with coverage above this value will be included in the target percentage computation. Default: `0`.
+      example: 0
     - name: --input_fmt_option
       type: string
       description: |
@@ -141,7 +141,7 @@ argument_groups:
       type: file
       description: |
         Output file.
-      default: "out.txt"
+      example: "out.txt"
       required: true
       direction: output
 
diff --git a/src/samtools/samtools_stats/script.sh b/src/samtools/samtools_stats/script.sh
index 6e32e9a5..e3872fc6 100644
--- a/src/samtools/samtools_stats/script.sh
+++ b/src/samtools/samtools_stats/script.sh
@@ -10,6 +10,9 @@ set -e
 [[ "$par_sparse" == "false" ]] && unset par_sparse
 [[ "$par_remove_overlaps" == "false" ]] && unset par_remove_overlaps
 
+# change the coverage input from X;X;X to X,X,X
+par_coverage=$(echo "$par_coverage" | tr ';' ',')
+
 samtools stats \
     ${par_coverage:+-c "$par_coverage"} \
     ${par_remove_dups:+-d} \
diff --git a/src/samtools/samtools_stats/test.sh b/src/samtools/samtools_stats/test.sh
index 05d70d30..b515100e 100644
--- a/src/samtools/samtools_stats/test.sh
+++ b/src/samtools/samtools_stats/test.sh
@@ -17,7 +17,7 @@ echo ">>> Checking whether output is non-empty"
 [ ! -s "$test_dir/test.paired_end.sorted.txt" ] && echo "File 'test.paired_end.sorted.txt' is empty!" && exit 1
 
 echo ">>> Checking whether output is correct"
-# compare using diff,  ignoring the line stating the command that was passed.
+# compare using diff, ignoring the line stating the command that was passed.
 diff <(grep -v "^# The command" "$test_dir/test.paired_end.sorted.txt") \
     <(grep -v "^# The command" "$test_dir/ref.paired_end.sorted.txt") || \
     (echo "Output file ref.paired_end.sorted.txt does not match expected output" && exit 1)

From 8e9abad885b27120a56a580ca7d961c64b96ad60 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Wed, 17 Jul 2024 18:14:21 +0200
Subject: [PATCH 3/7] Update CONTRIBUTING.md (#82)

* Update CONTRIBUTING.md

* update ctb

* clean up helper functions

* update changelog

* update changelog
---
 CHANGELOG.md                                  |  28 +++-
 CONTRIBUTING.md                               | 151 +++++++++++-------
 .../bd_rhapsody_make_reference/test.sh        |   5 +-
 src/cutadapt/test.sh                          |  14 +-
 src/star/star_align_reads/test.sh             |  21 ++-
 5 files changed, 130 insertions(+), 89 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9cfacdbc..2aad0cb8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,19 +6,33 @@
 
   - `bd_rhapsody/bd_rhapsody_make_reference`: Create a reference for the BD Rhapsody pipeline (PR #75).
 
-## BUG FIXES
+## MINOR CHANGES
 
-* `pear`: fix component not exiting with the correct exitcode when PEAR fails.
+* `busco` components: update BUSCO to `5.7.1` (PR #72).
 
-* `cutadapt`: fix `--par_quality_cutoff_r2` argument.
+## DOCUMENTATION
 
-* `cutadapt`: demultiplexing is now disabled by default. It can be re-enabled by using `demultiplex_mode`.
+* Extend the contributing guidelines (PR #82):
 
-* `multiqc`: update multiple separator to `;` (PR #81).
+  - Update format to Viash 0.9.
 
-## MINOR CHANGES
+  - Descriptions should be formatted in markdown.
+
+  - Add defaults to descriptions, not as a default of the argument.
+
+  - Explain parameter expansion.
 
-* `busco` components: update BUSCO to `5.7.1`.
+  - Mention that the contents of the output of components in tests should be checked.
+
+## BUG FIXES
+
+* `pear`: fix component not exiting with the correct exitcode when PEAR fails (PR #70).
+
+* `cutadapt`: fix `--par_quality_cutoff_r2` argument (PR #69).
+
+* `cutadapt`: demultiplexing is now disabled by default. It can be re-enabled by using `demultiplex_mode` (PR #69).
+
+* `multiqc`: update multiple separator to `;` (PR #81).
 
 # biobox 0.1.0
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 7393bc7e..cee4249a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -65,22 +65,21 @@ runners:
 Fill in the relevant metadata fields in the config. Here is an example of the metadata of an existing component.
 
 ```yaml
-functionality:
-  name: arriba
-  description: Detect gene fusions from RNA-Seq data
-  keywords: [Gene fusion, RNA-Seq]
-  links:
-    homepage: https://arriba.readthedocs.io/en/latest/
-    documentation: https://arriba.readthedocs.io/en/latest/
-    repository: https://github.com/suhrig/arriba
-    issue_tracker: https://github.com/suhrig/arriba/issues
-  references:
-    doi: 10.1101/gr.257246.119
-    bibtex: |
-      @article{
-        ... a bibtex entry in case the doi is not available ...
-      }
-  license: MIT
+name: arriba
+description: Detect gene fusions from RNA-Seq data
+keywords: [Gene fusion, RNA-Seq]
+links:
+  homepage: https://arriba.readthedocs.io/en/latest/
+  documentation: https://arriba.readthedocs.io/en/latest/
+  repository: https://github.com/suhrig/arriba
+  issue_tracker: https://github.com/suhrig/arriba/issues
+references:
+  doi: 10.1101/gr.257246.119
+  bibtex: |
+    @article{
+      ... a bibtex entry in case the doi is not available ...
+    }
+license: MIT
 ```
 
 ### Step 4: Find a suitable container
@@ -162,7 +161,7 @@ argument_groups:
       type: file
       description: |
         File in SAM/BAM/CRAM format with main alignments as generated by STAR
-        (Aligned.out.sam). Arriba extracts candidate reads from this file.
+        (`Aligned.out.sam`). Arriba extracts candidate reads from this file.
       required: true
       example: Aligned.out.bam
 ```
@@ -175,7 +174,7 @@ Several notes:
 
 * Input arguments can have `multiple: true` to allow the user to specify multiple files.
 
-
+* The description should be formatted in markdown.
 
 ### Step 8: Add arguments for the output files
 
@@ -220,7 +219,7 @@ argument_groups:
 
 Note: 
 
-* Preferably, these outputs should not be directores but files. For example, if a tool outputs a directory `foo/` containing files `foo/bar.txt` and `foo/baz.txt`, there should be two output arguments `--bar` and `--baz` (as opposed to one output argument which outputs the whole `foo/` directory).
+* Preferably, these outputs should not be directories but files. For example, if a tool outputs a directory `foo/` containing files `foo/bar.txt` and `foo/baz.txt`, there should be two output arguments `--bar` and `--baz` (as opposed to one output argument which outputs the whole `foo/` directory).
 
 ### Step 9: Add arguments for the other arguments
 
@@ -230,6 +229,8 @@ Finally, add all other arguments to the config file. There are a few exceptions:
 
 * Arguments related to printing the information such as printing the version (`-v`, `--version`) or printing the help (`-h`, `--help`) should not be added to the config file.
 
+* If the help lists defaults, do not add them as defaults but to the description. Example: `description: <Explanation of parameter>. Default: 10.`
+
 
 ### Step 10: Add a Docker engine
 
@@ -275,10 +276,13 @@ Next, we need to write a runner script that runs the tool with the input argumen
 ## VIASH START
 ## VIASH END
 
+# unset flags
+[[ "$par_option" == "false" ]] && unset par_option
+
 xxx \
   --input "$par_input" \
   --output "$par_output" \
-  $([ "$par_option" = "true" ] && echo "--option")
+  ${par_option:+--option}
 ```
 
 When building a Viash component, Viash will automatically replace the `## VIASH START` and `## VIASH END` lines (and anything in between) with environment variables based on the arguments specified in the config.
@@ -291,6 +295,11 @@ As an example, this is what the Bash script for the `arriba` component looks lik
 ## VIASH START
 ## VIASH END
 
+# unset flags
+[[ "$par_skip_duplicate_marking" == "false" ]] && unset par_skip_duplicate_marking
+[[ "$par_extra_information" == "false" ]] && unset par_extra_information
+[[ "$par_fill_gaps" == "false" ]] && unset par_fill_gaps
+
 arriba \
   -x "$par_bam" \
   -a "$par_genome" \
@@ -298,26 +307,30 @@ arriba \
   -o "$par_fusions" \
   ${par_known_fusions:+-k "${par_known_fusions}"} \
   ${par_blacklist:+-b "${par_blacklist}"} \
-  ${par_structural_variants:+-d "${par_structural_variants}"} \
-  $([ "$par_skip_duplicate_marking" = "true" ] && echo "-u") \
-  $([ "$par_extra_information" = "true" ] && echo "-X") \
-  $([ "$par_fill_gaps" = "true" ] && echo "-I")
+  # ...
+  ${par_extra_information:+-X} \
+  ${par_fill_gaps:+-I}
 ```
 
+Notes:
 
-### Step 12: Create test script
+* If your arguments can contain special variables (e.g. `$`), you can use quoting (need to find a documentation page for this) to make sure you can use the string as input. Example: `-x ${par_bam@Q}`.
 
+* Optional arguments can be passed to the command conditionally using Bash [parameter expansion](https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html). For example: `${par_known_fusions:+-k ${par_known_fusions@Q}}`
+
+* If your tool allows for multiple inputs using a separator other than `;` (which is the default Viash multiple separator), you can substitute these values with a command like: `par_disable_filters=$(echo $par_disable_filters | tr ';' ',')`.
+
+
+### Step 12: Create test script
 
 If the unit test requires test resources, these should be provided in the `test_resources` section of the component. 
 
 ```yaml
-functionality:
-  # ...
-  test_resources:
-    - type: bash_script
-      path: test.sh
-    - type: file
-      path: test_data
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - type: file
+    path: test_data
 ```
 
 Create a test script at `src/xxx/test.sh` that runs the component with the test data. This script should run the component (available with `$meta_executable`) with the test data and check if the output is as expected. The script should exit with a non-zero exit code if the output is not as expected. For example:
@@ -325,48 +338,64 @@ Create a test script at `src/xxx/test.sh` that runs the component with the test
 ```bash
 #!/bin/bash
 
+set -e
+
 ## VIASH START
 ## VIASH END
 
-echo "> Run xxx with test data"
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
+}
+assert_file_doesnt_exist() {
+  [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; }
+}
+assert_file_empty() {
+  [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; }
+}
+assert_file_not_empty() {
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
+}
+assert_file_contains() {
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+assert_file_not_contains() {
+  grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
+}
+assert_file_contains_regex() {
+  grep -q -E "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+assert_file_not_contains_regex() {
+  grep -q -E "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
+}
+#############################################
+
+echo "> Run $meta_name with test data"
 "$meta_executable" \
-  --input "$meta_resources_dir/test_data/input.txt" \
+  --input "$meta_resources_dir/test_data/reads_R1.fastq" \
   --output "output.txt" \
   --option
 
-echo ">> Checking output"
-[ ! -f "output.txt" ] && echo "Output file output.txt does not exist" && exit 1
-```
+echo ">> Check if output exists"
+assert_file_exists "output.txt"
 
+echo ">> Check if output is empty"
+assert_file_not_empty "output.txt"
 
-For example, this is what the test script for the `arriba` component looks like:
+echo ">> Check if output is correct"
+assert_file_contains "output.txt" "some expected output"
 
-```bash
-#!/bin/bash
+echo "> All tests succeeded!"
+```
 
-## VIASH START
-## VIASH END
+Notes:
 
-echo "> Run arriba with blacklist"
-"$meta_executable" \
-  --bam "$meta_resources_dir/test_data/A.bam" \
-  --genome "$meta_resources_dir/test_data/genome.fasta" \
-  --gene_annotation "$meta_resources_dir/test_data/annotation.gtf" \
-  --blacklist "$meta_resources_dir/test_data/blacklist.tsv" \
-  --fusions "fusions.tsv" \
-  --fusions_discarded "fusions_discarded.tsv" \
-  --interesting_contigs "1,2"
-
-echo ">> Checking output"
-[ ! -f "fusions.tsv" ] && echo "Output file fusions.tsv does not exist" && exit 1
-[ ! -f "fusions_discarded.tsv" ] && echo "Output file fusions_discarded.tsv does not exist" && exit 1
+* Do always check the contents of the output file. If the output is not deterministic, you can use regular expressions to check the output.
 
-echo ">> Check if output is empty"
-[ ! -s "fusions.tsv" ] && echo "Output file fusions.tsv is empty" && exit 1
-[ ! -s "fusions_discarded.tsv" ] && echo "Output file fusions_discarded.tsv is empty" && exit 1
-```
+* If possible, generate your own test data instead of copying it from an external resource.
 
-### Step 12: Create a `/var/software_versions.txt` file
+### Step 13: Create a `/var/software_versions.txt` file
 
 For the sake of transparency and reproducibility, we require that the versions of the software used in the component are documented.
 
@@ -378,6 +407,8 @@ engines:
     image: quay.io/biocontainers/xxx:0.1.0--py_0
     setup:
       - type: docker
+        # note: /var/software_versions.txt should contain:
+        #   arriba: "2.4.0"
         run: |
           echo "xxx: \"0.1.0\"" > /var/software_versions.txt
 ```
diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh b/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh
index 3637160a..845c1739 100644
--- a/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh
+++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh
@@ -11,21 +11,18 @@ assert_file_doesnt_exist() {
   [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; }
 }
 assert_file_empty() {
-  #  () will execute in a shubshell, could you use {;}?
   [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; }
 }
 assert_file_not_empty() {
-  # [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1)
   [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
 }
 assert_file_contains() {
-  # grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1)
   grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
 }
 assert_file_not_contains() {
-  # grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1)
   grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
 }
+#############################################
 
 in_fa="$meta_resources_dir/test_data/reference_small.fa"
 in_gtf="$meta_resources_dir/test_data/reference_small.gtf"
diff --git a/src/cutadapt/test.sh b/src/cutadapt/test.sh
index 1d6d9c18..28248742 100644
--- a/src/cutadapt/test.sh
+++ b/src/cutadapt/test.sh
@@ -6,25 +6,25 @@ set -eo pipefail
 #############################################
 # helper functions
 assert_file_exists() {
-  [ -f "$1" ] || (echo "File '$1' does not exist" && exit 1)
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
 }
 assert_file_doesnt_exist() {
-  [ ! -f "$1" ] || (echo "File '$1' exists but shouldn't" && exit 1)
+  [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; }
 }
 assert_file_empty() {
-  [ ! -s "$1" ] || (echo "File '$1' is not empty but should be" && exit 1)
+  [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; }
 }
 assert_file_not_empty() {
-  [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1)
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
 }
 assert_file_contains() {
-  grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1)
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
 }
 assert_file_not_contains() {
-  grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1)
+  grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
 }
-
 #############################################
+
 mkdir test_multiple_output
 cd test_multiple_output
 
diff --git a/src/star/star_align_reads/test.sh b/src/star/star_align_reads/test.sh
index a15ea599..bd78094d 100644
--- a/src/star/star_align_reads/test.sh
+++ b/src/star/star_align_reads/test.sh
@@ -7,35 +7,34 @@ meta_executable="target/docker/star/star_align_reads/star_align_reads"
 meta_resources_dir="src/star/star_align_reads"
 ## VIASH END
 
-#########################################################################################
-
+#############################################
 # helper functions
 assert_file_exists() {
-  [ -f "$1" ] || (echo "File '$1' does not exist" && exit 1)
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
 }
 assert_file_doesnt_exist() {
-  [ ! -f "$1" ] || (echo "File '$1' exists but shouldn't" && exit 1)
+  [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; }
 }
 assert_file_empty() {
-  [ ! -s "$1" ] || (echo "File '$1' is not empty but should be" && exit 1)
+  [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; }
 }
 assert_file_not_empty() {
-  [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1)
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
 }
 assert_file_contains() {
-  grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1)
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
 }
 assert_file_not_contains() {
-  grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1)
+  grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
 }
 assert_file_contains_regex() {
-  grep -q -E "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1)
+  grep -q -E "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
 }
 assert_file_not_contains_regex() {
-  grep -q -E "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1)
+  grep -q -E "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
 }
+#############################################
 
-#########################################################################################
 echo "> Prepare test data"
 
 cat > reads_R1.fastq <<'EOF'

From 13c5439a0c36f8a1bd3889e68d68ca85672daa62 Mon Sep 17 00:00:00 2001
From: Leila011 <leilapaquay@gmail.com>
Date: Wed, 17 Jul 2024 18:15:08 +0200
Subject: [PATCH 4/7] Add agat convertspgff2gtf (#76)

* Fill in the metadata

* add help.txt

* add test data

* update help.txt

* add arguments for input file, output file and other arguments

* add  a Docker engine

* Write a runner script

* correct --gtf_version choices

* update description

* update keywords

* Create test script

* Create a /var/software_versions.txt file

* remove duplicated argument

* update config

* change name to agat_convert_sp_gff2gtf

* update license

* replace module name by $meta_name in test.sh

* Add more info to --gtf_version description

* remove extra \

* add additional test: check if the D column in the first line of the GFF was correctly converted into GTF format

* update changelog

* Markdown: add newline before listing

* add test to check if the header contains the right GTF version

* cleanup

* fix formatting

---------

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>
---
 CHANGELOG.md                                  |   3 +
 .../agat_convert_sp_gff2gtf/config.vsh.yaml   |  90 ++++++++++++++++
 src/agat/agat_convert_sp_gff2gtf/help.txt     | 102 ++++++++++++++++++
 src/agat/agat_convert_sp_gff2gtf/script.sh    |  10 ++
 src/agat/agat_convert_sp_gff2gtf/test.sh      |  37 +++++++
 .../test_data/0_test.gff                      |  36 +++++++
 .../test_data/script.sh                       |   9 ++
 7 files changed, 287 insertions(+)
 create mode 100644 src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml
 create mode 100644 src/agat/agat_convert_sp_gff2gtf/help.txt
 create mode 100644 src/agat/agat_convert_sp_gff2gtf/script.sh
 create mode 100644 src/agat/agat_convert_sp_gff2gtf/test.sh
 create mode 100644 src/agat/agat_convert_sp_gff2gtf/test_data/0_test.gff
 create mode 100755 src/agat/agat_convert_sp_gff2gtf/test_data/script.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2aad0cb8..8f56b22e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -100,6 +100,9 @@
     - `bedtools_getfasta`: extract sequences from a FASTA file for each of the
                            intervals defined in a BED/GFF/VCF file (PR #59).
 
+* `agat`:
+    - `agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76).
+
 ## MINOR CHANGES
 
 * Uniformize component metadata (PR #23).
diff --git a/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml b/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml
new file mode 100644
index 00000000..b788c7c7
--- /dev/null
+++ b/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml
@@ -0,0 +1,90 @@
+name: agat_convert_sp_gff2gtf
+namespace: agat
+description: |
+  The script aims to convert any GTF/GFF file into a proper GTF file. Full
+  information about the format can be found here:
+  https://agat.readthedocs.io/en/latest/gxf.html You can choose among 7
+  different GTF types (1, 2, 2.1, 2.2, 2.5, 3 or relax). Depending the
+  version selected the script will filter out the features that are not
+  accepted. For GTF2.5 and 3, every level1 feature (e.g nc_gene
+  pseudogene) will be converted into gene feature and every level2 feature
+  (e.g mRNA ncRNA) will be converted into transcript feature. Using the
+  "relax" option you will produce a GTF-like output keeping all original
+  feature types (3rd column). No modification will occur e.g. mRNA to
+  transcript.
+
+  To be fully GTF compliant all feature have a gene_id and a transcript_id
+  attribute. The gene_id is unique identifier for the genomic source of
+  the transcript, which is used to group transcripts into genes. The
+  transcript_id is a unique identifier for the predicted transcript, which
+  is used to group features into transcripts.
+keywords: [gene annotations, GTF conversion]
+links:
+  homepage: https://github.com/NBISweden/AGAT
+  documentation: https://agat.readthedocs.io/
+  issue_tracker: https://github.com/NBISweden/AGAT/issues
+  repository: https://github.com/NBISweden/AGAT
+references: 
+  doi: 10.5281/zenodo.3552717
+license: GPL-3.0
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --gff
+        alternatives: [-i]
+        description: Input GFF/GTF file that will be read
+        type: file
+        required: true
+        direction: input
+        example: input.gff
+  - name: Outputs
+    arguments:       
+      - name: --output
+        alternatives: [-o, --out, --outfile, --gtf]
+        description: Output GTF file. If no output file is specified, the output will be written to STDOUT.
+        type: file
+        direction: output
+        required: true
+        example: output.gtf
+  - name: Arguments
+    arguments:
+      - name: --gtf_version
+        description: |
+          Version of the GTF output (1,2,2.1,2.2,2.5,3 or relax). Default value from AGAT config file (relax for the default config). The script option has the higher priority.  
+          
+            * relax: all feature types are accepted.  
+            * GTF3 (9 feature types accepted): gene, transcript, exon, CDS, Selenocysteine, start_codon, stop_codon, three_prime_utr and five_prime_utr.  
+            * GTF2.5 (8 feature types accepted): gene, transcript, exon, CDS, UTR, start_codon, stop_codon, Selenocysteine.  
+            * GTF2.2 (9 feature types accepted): CDS, start_codon, stop_codon, 5UTR, 3UTR, inter, inter_CNS, intron_CNS and exon.  
+            * GTF2.1 (6 feature types accepted): CDS, start_codon, stop_codon, exon, 5UTR, 3UTR.  
+            * GTF2 (4 feature types accepted): CDS, start_codon, stop_codon, exon.  
+            * GTF1 (5 feature types accepted): CDS, start_codon, stop_codon, exon, intron.  
+        type: string
+        choices: [relax, "1", "2", "2.1", "2.2", "2.5", "3"]
+        required: false
+        example: "3"
+      - name: --config
+        alternatives: [-c]
+        description: |
+          Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently).
+        type: file
+        required: false
+        example: custom_agat_config.yaml
+resources:
+  - type: bash_script
+    path: script.sh
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - type: file
+    path: test_data
+engines:
+  - type: docker
+    image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0
+    setup:
+      - type: docker
+        run: |
+          agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt
+runners:
+  - type: executable
+  - type: nextflow
\ No newline at end of file
diff --git a/src/agat/agat_convert_sp_gff2gtf/help.txt b/src/agat/agat_convert_sp_gff2gtf/help.txt
new file mode 100644
index 00000000..fdd45507
--- /dev/null
+++ b/src/agat/agat_convert_sp_gff2gtf/help.txt
@@ -0,0 +1,102 @@
+```sh
+agat_convert_sp_gff2gtf.pl --help
+```
+ ------------------------------------------------------------------------------
+|   Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0                      |
+|   https://github.com/NBISweden/AGAT                                          |
+|   National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se         |
+ ------------------------------------------------------------------------------
+
+
+Name:
+    agat_convert_sp_gff2gtf.pl
+
+Description:
+    The script aims to convert any GTF/GFF file into a proper GTF file. Full
+    information about the format can be found here:
+    https://agat.readthedocs.io/en/latest/gxf.html You can choose among 7
+    different GTF types (1, 2, 2.1, 2.2, 2.5, 3 or relax). Depending the
+    version selected the script will filter out the features that are not
+    accepted. For GTF2.5 and 3, every level1 feature (e.g nc_gene
+    pseudogene) will be converted into gene feature and every level2 feature
+    (e.g mRNA ncRNA) will be converted into transcript feature. Using the
+    "relax" option you will produce a GTF-like output keeping all original
+    feature types (3rd column). No modification will occur e.g. mRNA to
+    transcript.
+
+    To be fully GTF compliant all feature have a gene_id and a transcript_id
+    attribute. The gene_id is unique identifier for the genomic source of
+    the transcript, which is used to group transcripts into genes. The
+    transcript_id is a unique identifier for the predicted transcript, which
+    is used to group features into transcripts.
+
+Usage:
+        agat_convert_sp_gff2gtf.pl --gff infile.gff [ -o outfile ]
+        agat_convert_sp_gff2gtf -h
+
+Options:
+    --gff, --gtf or -i
+            Input GFF/GTF file that will be read
+
+    --gtf_version version of the GTF output (1,2,2.1,2.2,2.5,3 or relax).
+    Default value from AGAT config file (relax for the default config). The
+    script option has the higher priority.
+            relax: all feature types are accepted.
+
+            GTF3 (9 feature types accepted): gene, transcript, exon, CDS,
+            Selenocysteine, start_codon, stop_codon, three_prime_utr and
+            five_prime_utr
+
+            GTF2.5 (8 feature types accepted): gene, transcript, exon, CDS,
+            UTR, start_codon, stop_codon, Selenocysteine
+
+            GTF2.2 (9 feature types accepted): CDS, start_codon, stop_codon,
+            5UTR, 3UTR, inter, inter_CNS, intron_CNS and exon
+
+            GTF2.1 (6 feature types accepted): CDS, start_codon, stop_codon,
+            exon, 5UTR, 3UTR
+
+            GTF2 (4 feature types accepted): CDS, start_codon, stop_codon,
+            exon
+
+            GTF1 (5 feature types accepted): CDS, start_codon, stop_codon,
+            exon, intron
+
+    -o , --output , --out , --outfile or --gtf
+            Output GTF file. If no output file is specified, the output will
+            be written to STDOUT.
+
+    -c or --config
+            String - Input agat config file. By default AGAT takes as input
+            agat_config.yaml file from the working directory if any,
+            otherwise it takes the orignal agat_config.yaml shipped with
+            AGAT. To get the agat_config.yaml locally type: "agat config
+            --expose". The --config option gives you the possibility to use
+            your own AGAT config file (located elsewhere or named
+            differently).
+
+    -h or --help
+            Display this helpful text.
+
+Feedback:
+  Did you find a bug?:
+    Do not hesitate to report bugs to help us keep track of the bugs and
+    their resolution. Please use the GitHub issue tracking system available
+    at this address:
+
+                https://github.com/NBISweden/AGAT/issues
+
+     Ensure that the bug was not already reported by searching under Issues.
+     If you're unable to find an (open) issue addressing the problem, open a new one.
+     Try as much as possible to include in the issue when relevant:
+     - a clear description,
+     - as much relevant information as possible,
+     - the command used,
+     - a data sample,
+     - an explanation of the expected behaviour that is not occurring.
+
+  Do you want to contribute?:
+    You are very welcome, visit this address for the Contributing
+    guidelines:
+    https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md
+
diff --git a/src/agat/agat_convert_sp_gff2gtf/script.sh b/src/agat/agat_convert_sp_gff2gtf/script.sh
new file mode 100644
index 00000000..69d66739
--- /dev/null
+++ b/src/agat/agat_convert_sp_gff2gtf/script.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+agat_convert_sp_gff2gtf.pl \
+  -i "$par_gff" \
+  -o "$par_output" \
+  ${par_gtf_version:+--gtf_version "${par_gtf_version}"} \
+  ${par_config:+--config "${par_config}"}
diff --git a/src/agat/agat_convert_sp_gff2gtf/test.sh b/src/agat/agat_convert_sp_gff2gtf/test.sh
new file mode 100644
index 00000000..1e7cc142
--- /dev/null
+++ b/src/agat/agat_convert_sp_gff2gtf/test.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+test_dir="${meta_resources_dir}/test_data"
+
+echo "> Run $meta_name with test data"
+"$meta_executable" \
+  --gff "$test_dir/0_test.gff" \
+  --output "output.gtf" 
+
+echo ">> Checking output"
+[ ! -f "output.gtf" ] && echo "Output file output.gtf does not exist" && exit 1
+
+echo ">> Check if output is empty"
+[ ! -s "output.gtf" ] && echo "Output file output.gtf is empty" && exit 1
+
+echo ">> Check if the conversion resulted in the right GTF format"
+idGFF=$(head -n 2 "$test_dir/0_test.gff" | grep -o 'ID=[^;]*' | cut -d '=' -f 2-)
+expectedGTF="gene_id \"$idGFF\"; ID \"$idGFF\";"
+extractedGTF=$(head -n 3 "output.gtf" | grep -o 'gene_id "[^"]*"; ID "[^"]*";')
+[ "$extractedGTF" != "$expectedGTF" ] && echo "Output file output.gtf does not have the right format" && exit 1
+
+rm output.gtf
+
+echo "> Run $meta_name with test data and GTF version 2.5"
+"$meta_executable" \
+  --gff "$test_dir/0_test.gff" \
+  --output "output.gtf" \
+  --gtf_version "2.5"
+
+echo ">> Check if the output file header display the right GTF version"
+grep -q "##gtf-version 2.5" "output.gtf"
+[ $? -ne 0 ] && echo "Output file output.gtf header does not display the right GTF version" && exit 1
+
+echo "> Test successful"
\ No newline at end of file
diff --git a/src/agat/agat_convert_sp_gff2gtf/test_data/0_test.gff b/src/agat/agat_convert_sp_gff2gtf/test_data/0_test.gff
new file mode 100644
index 00000000..fafe86ed
--- /dev/null
+++ b/src/agat/agat_convert_sp_gff2gtf/test_data/0_test.gff
@@ -0,0 +1,36 @@
+##gff-version 3
+scaffold625	maker	gene	337818	343277	.	+	.	ID=CLUHARG00000005458;Name=TUBB3_2
+scaffold625	maker	mRNA	337818	343277	.	+	.	ID=CLUHART00000008717;Parent=CLUHARG00000005458
+scaffold625	maker	exon	337818	337971	.	+	.	ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717
+scaffold625	maker	exon	340733	340841	.	+	.	ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717
+scaffold625	maker	exon	341518	341628	.	+	.	ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717
+scaffold625	maker	exon	341964	343277	.	+	.	ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717
+scaffold625	maker	CDS	337915	337971	.	+	0	ID=CLUHART00000008717:cds;Parent=CLUHART00000008717
+scaffold625	maker	CDS	340733	340841	.	+	0	ID=CLUHART00000008717:cds;Parent=CLUHART00000008717
+scaffold625	maker	CDS	341518	341628	.	+	2	ID=CLUHART00000008717:cds;Parent=CLUHART00000008717
+scaffold625	maker	CDS	341964	343033	.	+	2	ID=CLUHART00000008717:cds;Parent=CLUHART00000008717
+scaffold625	maker	five_prime_UTR	337818	337914	.	+	.	ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717
+scaffold625	maker	three_prime_UTR	343034	343277	.	+	.	ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717
+scaffold789	maker	gene	558184	564780	.	+	.	ID=CLUHARG00000003852;Name=PF11_0240
+scaffold789	maker	mRNA	558184	564780	.	+	.	ID=CLUHART00000006146;Parent=CLUHARG00000003852
+scaffold789	maker	exon	558184	560123	.	+	.	ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146
+scaffold789	maker	exon	561401	561519	.	+	.	ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146
+scaffold789	maker	exon	564171	564235	.	+	.	ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146
+scaffold789	maker	exon	564372	564780	.	+	.	ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146
+scaffold789	maker	CDS	558191	560123	.	+	0	ID=CLUHART00000006146:cds;Parent=CLUHART00000006146
+scaffold789	maker	CDS	561401	561519	.	+	2	ID=CLUHART00000006146:cds;Parent=CLUHART00000006146
+scaffold789	maker	CDS	564171	564235	.	+	0	ID=CLUHART00000006146:cds;Parent=CLUHART00000006146
+scaffold789	maker	CDS	564372	564588	.	+	1	ID=CLUHART00000006146:cds;Parent=CLUHART00000006146
+scaffold789	maker	five_prime_UTR	558184	558190	.	+	.	ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146
+scaffold789	maker	three_prime_UTR	564589	564780	.	+	.	ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146
+scaffold789	maker	mRNA	558184	564780	.	+	.	ID=CLUHART00000006147;Parent=CLUHARG00000003852
+scaffold789	maker	exon	558184	560123	.	+	.	ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147
+scaffold789	maker	exon	561401	561519	.	+	.	ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147
+scaffold789	maker	exon	562057	562121	.	+	.	ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147
+scaffold789	maker	exon	564372	564780	.	+	.	ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147
+scaffold789	maker	CDS	558191	560123	.	+	0	ID=CLUHART00000006147:cds;Parent=CLUHART00000006147
+scaffold789	maker	CDS	561401	561519	.	+	2	ID=CLUHART00000006147:cds;Parent=CLUHART00000006147
+scaffold789	maker	CDS	562057	562121	.	+	0	ID=CLUHART00000006147:cds;Parent=CLUHART00000006147
+scaffold789	maker	CDS	564372	564588	.	+	1	ID=CLUHART00000006147:cds;Parent=CLUHART00000006147
+scaffold789	maker	five_prime_UTR	558184	558190	.	+	.	ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147
+scaffold789	maker	three_prime_UTR	564589	564780	.	+	.	ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147
diff --git a/src/agat/agat_convert_sp_gff2gtf/test_data/script.sh b/src/agat/agat_convert_sp_gff2gtf/test_data/script.sh
new file mode 100755
index 00000000..e453e772
--- /dev/null
+++ b/src/agat/agat_convert_sp_gff2gtf/test_data/script.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+# clone repo
+if [ ! -d /tmp/agat_source ]; then
+  git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source
+fi
+
+# copy test data
+cp -r /tmp/agat_source/t/gff_syntax/in/0_test.gff src/agat/agat_convert_sp_gff2gtf/test_data

From e615d2abb92e56cfc1e1ace9baa308ce10656f9f Mon Sep 17 00:00:00 2001
From: Jakub Majercik <57993790+jakubmajercik@users.noreply.github.com>
Date: Wed, 17 Jul 2024 19:44:21 +0200
Subject: [PATCH 5/7] Seqtk sample (#68)

* tests added

* tests extended

* changelog entry added

* reorganized seqtk namespace + added seqtk subseq config and script

* added subseq help.txt

* revert to seqtk sample only

* remove subseq

* updated tests, added tags

* Update two_pass_mode

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>

* author added to config

---------

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>
---
 CHANGELOG.md                           |   2 +
 src/_authors/jakub_majercik.yaml       |  10 +++
 src/seqtk/seqtk_sample/config.vsh.yaml |  57 ++++++++++++++
 src/seqtk/seqtk_sample/help.txt        |   9 +++
 src/seqtk/seqtk_sample/script.sh       |  11 +++
 src/seqtk/seqtk_sample/test.sh         | 104 +++++++++++++++++++++++++
 src/seqtk/test_data/reads/a.1.fastq.gz | Bin 0 -> 100 bytes
 src/seqtk/test_data/reads/a.2.fastq.gz | Bin 0 -> 100 bytes
 src/seqtk/test_data/reads/a.fastq      |   4 +
 src/seqtk/test_data/reads/a.fastq.gz   | Bin 0 -> 44 bytes
 src/seqtk/test_data/reads/id.list      |   1 +
 src/seqtk/test_data/script.sh          |   9 +++
 12 files changed, 207 insertions(+)
 create mode 100644 src/_authors/jakub_majercik.yaml
 create mode 100644 src/seqtk/seqtk_sample/config.vsh.yaml
 create mode 100644 src/seqtk/seqtk_sample/help.txt
 create mode 100644 src/seqtk/seqtk_sample/script.sh
 create mode 100644 src/seqtk/seqtk_sample/test.sh
 create mode 100644 src/seqtk/test_data/reads/a.1.fastq.gz
 create mode 100644 src/seqtk/test_data/reads/a.2.fastq.gz
 create mode 100644 src/seqtk/test_data/reads/a.fastq
 create mode 100644 src/seqtk/test_data/reads/a.fastq.gz
 create mode 100644 src/seqtk/test_data/reads/id.list
 create mode 100755 src/seqtk/test_data/script.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8f56b22e..f6a8676f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -93,6 +93,8 @@
 
 * `falco`: A C++ drop-in replacement of FastQC to assess the quality of sequence read data (PR #43).
 
+* `seqtk/seqtk_sample`: Sample sequences from FASTA/Q(.gz) files to FASTA/Q (PR #68).
+
 * `umitools`:
     - `umitools_dedup`: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read (PR #54).
 
diff --git a/src/_authors/jakub_majercik.yaml b/src/_authors/jakub_majercik.yaml
new file mode 100644
index 00000000..3b75fffe
--- /dev/null
+++ b/src/_authors/jakub_majercik.yaml
@@ -0,0 +1,10 @@
+name: Jakub Majercik
+info:
+  links:
+    email: jakub@data-intuitive.com
+    github: jakubmajercik
+    linkedin: jakubmajercik
+  organizations:
+    - name: Data Intuitive
+      href: https://www.data-intuitive.com
+      role: Bioinformatics Engineer
\ No newline at end of file
diff --git a/src/seqtk/seqtk_sample/config.vsh.yaml b/src/seqtk/seqtk_sample/config.vsh.yaml
new file mode 100644
index 00000000..0cd369e7
--- /dev/null
+++ b/src/seqtk/seqtk_sample/config.vsh.yaml
@@ -0,0 +1,57 @@
+name: seqtk_sample
+namespace: seqtk
+description: Subsamples sequences from FASTA/Q files.
+keywords: [sample, FASTA, FASTQ]
+links:
+  repository: https://github.com/lh3/seqtk/tree/v1.4
+license: MIT
+authors:
+  - __merge__: /src/_authors/jakub_majercik.yaml
+    roles: [ author, maintainer ]
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input
+        type: file
+        description: The input FASTA/Q file.
+        required: true
+
+  - name: Outputs
+    arguments:
+      - name: --output
+        type: file
+        description: The output FASTA/Q file.
+        required: true
+        direction: output
+
+  - name: Options
+    arguments:
+      - name: --seed
+        type: integer
+        description: Seed for random generator.
+        example: 42
+      - name: --fraction_number
+        type: double
+        description: Fraction or number of sequences to sample.
+        required: true
+        example: 0.1
+      - name: --two_pass_mode
+        type: boolean_true
+        description: Twice as slow but with much reduced memory
+
+resources:
+  - type: bash_script
+    path: script.sh
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - type: file
+    path: ../test_data
+
+engines:
+  - type: docker
+    image: quay.io/biocontainers/seqtk:1.4--he4a0461_2
+runners:
+  - type: executable
+  - type: nextflow
\ No newline at end of file
diff --git a/src/seqtk/seqtk_sample/help.txt b/src/seqtk/seqtk_sample/help.txt
new file mode 100644
index 00000000..49f8001b
--- /dev/null
+++ b/src/seqtk/seqtk_sample/help.txt
@@ -0,0 +1,9 @@
+```
+seqtk_subseq
+```
+Usage:   seqtk subseq [options] <in.fa> <in.bed>|<name.list>
+Options:
+  -t       TAB delimited output
+  -s       strand aware
+  -l INT   sequence line length [0]
+Note: Use 'samtools faidx' if only a few regions are intended.
\ No newline at end of file
diff --git a/src/seqtk/seqtk_sample/script.sh b/src/seqtk/seqtk_sample/script.sh
new file mode 100644
index 00000000..01d981b3
--- /dev/null
+++ b/src/seqtk/seqtk_sample/script.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+seqtk sample \
+    ${par_two_pass_mode:+-2} \
+    ${par_seed:+-s "$par_seed"} \
+    "$par_input" \
+    "$par_fraction_number" \
+    > "$par_output"
\ No newline at end of file
diff --git a/src/seqtk/seqtk_sample/test.sh b/src/seqtk/seqtk_sample/test.sh
new file mode 100644
index 00000000..cba5f613
--- /dev/null
+++ b/src/seqtk/seqtk_sample/test.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+set -e
+
+## VIASH START
+meta_executable="target/executable/seqtk/seqtk_sample"
+meta_resources_dir="src/seqtk"
+## VIASH END
+
+#########################################################################################
+mkdir seqtk_sample_se
+cd seqtk_sample_se
+
+echo "> Run seqtk_sample on fastq SE"
+"$meta_executable" \
+  --input "$meta_resources_dir/test_data/reads/a.1.fastq.gz" \
+  --seed 42 \
+  --fraction_number 3 \
+  --output "sampled.fastq"
+
+echo ">> Check if output exists"
+if [ ! -f "sampled.fastq" ]; then
+    echo ">> sampled.fastq does not exist"
+    exit 1
+fi
+
+echo ">> Count number of samples"
+num_samples=$(grep -c '^@' sampled.fastq)
+if [ "$num_samples" -ne 3 ]; then
+    echo ">> sampled.fastq does not contain 3 samples"
+    exit 1
+fi
+
+#########################################################################################
+cd ..
+mkdir seqtk_sample_pe_number
+cd seqtk_sample_pe_number
+
+echo ">> Run seqtk_sample on fastq.gz PE with number of reads"
+"$meta_executable" \
+  --input "$meta_resources_dir/test_data/reads/a.1.fastq.gz" \
+  --seed 42 \
+  --fraction_number 3 \
+  --output "sampled_1.fastq"
+
+"$meta_executable" \
+  --input "$meta_resources_dir/test_data/reads/a.2.fastq.gz" \
+  --seed 42 \
+  --fraction_number 3 \
+  --output "sampled_2.fastq"
+
+echo ">> Check if output exists"
+if [ ! -f "sampled_1.fastq" ] || [ ! -f "sampled_2.fastq" ]; then
+    echo ">> One or both output files do not exist"
+    exit 1
+fi
+
+echo ">> Compare reads"
+# Extract headers
+headers1=$(grep '^@' sampled_1.fastq | sed -e's/ 1$//' | sort)
+headers2=$(grep '^@' sampled_2.fastq | sed -e 's/ 2$//' | sort)
+
+# Compare headers
+diff <(echo "$headers1") <(echo "$headers2") || { echo "Mismatch detected"; exit 1; }
+
+echo ">> Count number of samples"
+num_headers=$(echo "$headers1" | wc -l)
+if [ "$num_headers" -ne 3 ]; then
+    echo ">> sampled_1.fastq does not contain 3 headers"
+    exit 1
+fi
+
+#########################################################################################
+cd ..
+mkdir seqtk_sample_pe_fraction
+cd seqtk_sample_pe_fraction
+
+echo ">> Run seqtk_sample on fastq.gz PE with fraction of reads"
+"$meta_executable" \
+  --input "$meta_resources_dir/test_data/reads/a.1.fastq.gz" \
+  --seed 42 \
+  --fraction_number 0.5 \
+  --output "sampled_1.fastq"
+
+"$meta_executable" \
+  --input "$meta_resources_dir/test_data/reads/a.2.fastq.gz" \
+  --seed 42 \
+  --fraction_number 0.5 \
+  --output "sampled_2.fastq"
+
+echo ">> Check if output exists"
+if [ ! -f "sampled_1.fastq" ] || [ ! -f "sampled_2.fastq" ]; then
+    echo ">> One or both output files do not exist"
+    exit 1
+fi
+
+echo ">> Compare reads"
+# Extract headers
+headers1=$(grep '^@' sampled_1.fastq | sed -e's/ 1$//' | sort)
+headers2=$(grep '^@' sampled_2.fastq | sed -e 's/ 2$//' | sort)
+
+# Compare headers
+diff <(echo "$headers1") <(echo "$headers2") || { echo "Mismatch detected"; exit 1; }
+
diff --git a/src/seqtk/test_data/reads/a.1.fastq.gz b/src/seqtk/test_data/reads/a.1.fastq.gz
new file mode 100644
index 0000000000000000000000000000000000000000..97a72ce5d48317556a145f93c32c87f0e9e5500f
GIT binary patch
literal 100
zcmV-q0Gt0GiwFRnrn+7N10Bw(6~jOf1wpPTJlJGMx7b%C&OZykT2i1<r$4>C{p;n6
zLRM|nP{^ij8VcF9T|*&<c4#Q%)J_eBT-v3fkXySo6!K_~hC+JG=`pXz+~yxM4VyvC
G0001=oh>&2

literal 0
HcmV?d00001

diff --git a/src/seqtk/test_data/reads/a.2.fastq.gz b/src/seqtk/test_data/reads/a.2.fastq.gz
new file mode 100644
index 0000000000000000000000000000000000000000..038bc976ac32e8f26be16949bf5632c7090e635b
GIT binary patch
literal 100
zcmV-q0Gt0GiwFRnrn+7N10Bw*5yUVM1wrm8Zt)RG{<G&uoPQRs7HKeO(tl%l`a70C
z3K`naP{^vS8VcF8O+z8OwreQl&<+iSoZ6|OkW0HX6mn~~hC+Iq(`{b2xy=tv0gOV-
G0001ax+}K;

literal 0
HcmV?d00001

diff --git a/src/seqtk/test_data/reads/a.fastq b/src/seqtk/test_data/reads/a.fastq
new file mode 100644
index 00000000..42735560
--- /dev/null
+++ b/src/seqtk/test_data/reads/a.fastq
@@ -0,0 +1,4 @@
+@1
+ACGGCAT
++
+!!!!!!!
diff --git a/src/seqtk/test_data/reads/a.fastq.gz b/src/seqtk/test_data/reads/a.fastq.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0ae3f084025880e6f03bec5c0ef2126f858e37fb
GIT binary patch
literal 44
zcmb2|=HU4Hg)xPJIZ-byvACp=q1fO_QAt^Oc^S{cOV5H@81xvLXT<V~GB7Xz0BJT4
AL;wH)

literal 0
HcmV?d00001

diff --git a/src/seqtk/test_data/reads/id.list b/src/seqtk/test_data/reads/id.list
new file mode 100644
index 00000000..d00491fd
--- /dev/null
+++ b/src/seqtk/test_data/reads/id.list
@@ -0,0 +1 @@
+1
diff --git a/src/seqtk/test_data/script.sh b/src/seqtk/test_data/script.sh
new file mode 100755
index 00000000..049093cd
--- /dev/null
+++ b/src/seqtk/test_data/script.sh
@@ -0,0 +1,9 @@
+# clone repo
+if [ ! -d /tmp/snakemake-wrappers ]; then
+  git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers
+fi
+
+# copy test data
+cp -r /tmp/snakemake-wrappers/bio/seqtk/test/* src/seqtk/test_data
+
+rm src/seqtk/test_data/Snakefile

From 40dc314a7cad4f6038a9b9904bb1e2e712f65f5c Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Wed, 17 Jul 2024 23:23:51 +0200
Subject: [PATCH 6/7] switch to viash actions for ci (#86)

* switch to viash actions for ci

* add changelog entry

* ci force
---
 .github/workflows/test.yaml | 6 ++++--
 CHANGELOG.md                | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 2591978f..30f98b03 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,9 +1,11 @@
-name: Component Testing
+name: Test components
 
 on:
   pull_request:
   push:
+    branches:
+      - main
 
 jobs:
   test:
-    uses: viash-hub/toolbox/.github/workflows/test.yaml@main
\ No newline at end of file
+    uses: viash-io/viash-actions/.github/workflows/test.yaml@v6
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f6a8676f..c9f8b222 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,8 @@
 
 * `busco` components: update BUSCO to `5.7.1` (PR #72).
 
+* Update CI to reusable workflow in `viash-io/viash-actions` (PR #86).
+
 ## DOCUMENTATION
 
 * Extend the contributing guidelines (PR #82):

From e8b82b5d968524f495e80afa8092098408d66d1d Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Wed, 17 Jul 2024 23:25:07 +0200
Subject: [PATCH 7/7] fix authorship (#88)

* fix authorship

* add author

* add missing newlines

* update changelog

* update changelog
---
 CHANGELOG.md                                      |  2 ++
 src/_authors/angela_o_pisco.yaml                  | 14 ++++++++++++++
 src/_authors/dorien_roosen.yaml                   | 10 ++++++++++
 src/_authors/dries_schaumont.yaml                 | 11 +++++++++++
 src/_authors/emma_rousseau.yaml                   | 10 ++++++++++
 src/_authors/jakub_majercik.yaml                  |  2 +-
 src/_authors/kai_waldrant.yaml                    | 14 ++++++++++++++
 src/_authors/leila_paquay.yaml                    | 10 ++++++++++
 src/_authors/robrecht_cannoodt.yaml               |  2 +-
 src/_authors/sai_nirmayi_yasa.yaml                | 10 ++++++++++
 src/_authors/toni_verbeiren.yaml                  |  9 +++++++++
 src/_authors/weiwei_schultz.yaml                  |  2 +-
 src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml  |  4 ++++
 src/arriba/config.vsh.yaml                        |  3 +++
 src/bcl_convert/config.vsh.yaml                   | 11 +++++++++++
 src/bedtools/bedtools_getfasta/config.vsh.yaml    |  3 +++
 src/busco/busco_download_datasets/config.vsh.yaml |  3 +++
 src/busco/busco_list_datasets/config.vsh.yaml     |  3 +++
 src/busco/busco_run/config.vsh.yaml               |  3 +++
 src/cutadapt/config.vsh.yaml                      |  3 +++
 src/falco/config.vsh.yaml                         |  3 +++
 src/fastp/config.vsh.yaml                         |  3 +++
 src/featurecounts/config.vsh.yaml                 |  4 +++-
 src/gffread/config.vsh.yaml                       |  3 +++
 src/lofreq/call/config.vsh.yaml                   |  3 +++
 src/lofreq/indelqual/config.vsh.yaml              |  3 +++
 src/multiqc/config.vsh.yaml                       |  4 +++-
 src/pear/config.vsh.yaml                          |  5 ++++-
 src/salmon/salmon_index/config.vsh.yaml           |  4 +++-
 src/salmon/salmon_quant/config.vsh.yaml           |  4 +++-
 src/samtools/samtools_collate/config.vsh.yaml     |  4 +++-
 src/samtools/samtools_faidx/config.vsh.yaml       |  4 +++-
 src/samtools/samtools_fasta/config.vsh.yaml       |  4 +++-
 src/samtools/samtools_fastq/config.vsh.yaml       |  4 +++-
 src/samtools/samtools_flagstat/config.vsh.yaml    |  4 +++-
 src/samtools/samtools_idxstats/config.vsh.yaml    |  4 +++-
 src/samtools/samtools_index/config.vsh.yaml       |  4 +++-
 src/samtools/samtools_sort/config.vsh.yaml        |  4 +++-
 src/samtools/samtools_stats/config.vsh.yaml       |  4 +++-
 src/samtools/samtools_view/config.vsh.yaml        |  4 +++-
 src/star/star_align_reads/config.vsh.yaml         |  5 +++++
 src/star/star_genome_generate/config.vsh.yaml     |  4 +++-
 src/umi_tools/umi_tools_dedup/config.vsh.yaml     |  4 +++-
 43 files changed, 198 insertions(+), 20 deletions(-)
 create mode 100644 src/_authors/angela_o_pisco.yaml
 create mode 100644 src/_authors/dorien_roosen.yaml
 create mode 100644 src/_authors/dries_schaumont.yaml
 create mode 100644 src/_authors/emma_rousseau.yaml
 create mode 100644 src/_authors/kai_waldrant.yaml
 create mode 100644 src/_authors/leila_paquay.yaml
 create mode 100644 src/_authors/sai_nirmayi_yasa.yaml
 create mode 100644 src/_authors/toni_verbeiren.yaml

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c9f8b222..4e6a0369 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,8 @@
 
   - Mention that the contents of the output of components in tests should be checked.
 
+* Add authorship to existing components (PR #88).
+
 ## BUG FIXES
 
 * `pear`: fix component not exiting with the correct exitcode when PEAR fails (PR #70).
diff --git a/src/_authors/angela_o_pisco.yaml b/src/_authors/angela_o_pisco.yaml
new file mode 100644
index 00000000..1f0bf58f
--- /dev/null
+++ b/src/_authors/angela_o_pisco.yaml
@@ -0,0 +1,14 @@
+name: Angela Oliveira Pisco
+info:
+  role: Contributor
+  links:
+    github: aopisco
+    orcid: "0000-0003-0142-2355"
+    linkedin: aopisco
+  organizations:
+    - name: Insitro
+      href: https://insitro.com
+      role: Director of Computational Biology
+    - name: Open Problems
+      href: https://openproblems.bio
+      role: Core Member
diff --git a/src/_authors/dorien_roosen.yaml b/src/_authors/dorien_roosen.yaml
new file mode 100644
index 00000000..d67448d8
--- /dev/null
+++ b/src/_authors/dorien_roosen.yaml
@@ -0,0 +1,10 @@
+name: Dorien Roosen
+info:
+  links:
+    email: dorien@data-intuitive.com
+    github: dorien-er
+    linkedin: dorien-roosen
+  organizations:
+    - name: Data Intuitive
+      href: https://www.data-intuitive.com
+      role: Data Scientist
diff --git a/src/_authors/dries_schaumont.yaml b/src/_authors/dries_schaumont.yaml
new file mode 100644
index 00000000..b2678081
--- /dev/null
+++ b/src/_authors/dries_schaumont.yaml
@@ -0,0 +1,11 @@
+name: Dries Schaumont
+info:
+  links:
+    email: dries@data-intuitive.com
+    github: DriesSchaumont
+    orcid: "0000-0002-4389-0440"
+    linkedin: dries-schaumont
+  organizations:
+    - name: Data Intuitive
+      href: https://www.data-intuitive.com
+      role: Data Scientist
diff --git a/src/_authors/emma_rousseau.yaml b/src/_authors/emma_rousseau.yaml
new file mode 100644
index 00000000..1a9ac456
--- /dev/null
+++ b/src/_authors/emma_rousseau.yaml
@@ -0,0 +1,10 @@
+name: Emma Rousseau
+info:
+  links:
+    email: emma@data-intuitive.com
+    github: emmarousseau
+    linkedin: emmarousseau1
+  organizations:
+    - name: Data Intuitive
+      href: https://www.data-intuitive.com
+      role: Bioinformatician
diff --git a/src/_authors/jakub_majercik.yaml b/src/_authors/jakub_majercik.yaml
index 3b75fffe..c2a7867d 100644
--- a/src/_authors/jakub_majercik.yaml
+++ b/src/_authors/jakub_majercik.yaml
@@ -7,4 +7,4 @@ info:
   organizations:
     - name: Data Intuitive
       href: https://www.data-intuitive.com
-      role: Bioinformatics Engineer
\ No newline at end of file
+      role: Bioinformatics Engineer
diff --git a/src/_authors/kai_waldrant.yaml b/src/_authors/kai_waldrant.yaml
new file mode 100644
index 00000000..a132c528
--- /dev/null
+++ b/src/_authors/kai_waldrant.yaml
@@ -0,0 +1,14 @@
+name: Kai Waldrant
+info:
+  links:
+    email: kai@data-intuitive.com
+    github: KaiWaldrant
+    orcid: "0009-0003-8555-1361"
+    linkedin: kaiwaldrant
+  organizations:
+    - name: Data Intuitive
+      href: https://www.data-intuitive.com
+      role: Bioinformatician
+    - name: Open Problems
+      href: https://openproblems.bio
+      role: Contributor
diff --git a/src/_authors/leila_paquay.yaml b/src/_authors/leila_paquay.yaml
new file mode 100644
index 00000000..21aa532d
--- /dev/null
+++ b/src/_authors/leila_paquay.yaml
@@ -0,0 +1,10 @@
+name: Leïla Paquay
+info:
+  links:
+    email: leila@data-intuitive.com
+    github: Leila011
+    linkedin: leilapaquay
+  organizations:
+    - name: Data Intuitive
+      href: https://www.data-intuitive.com
+      role: Software Developer
diff --git a/src/_authors/robrecht_cannoodt.yaml b/src/_authors/robrecht_cannoodt.yaml
index d7c0f283..c4c1bdec 100644
--- a/src/_authors/robrecht_cannoodt.yaml
+++ b/src/_authors/robrecht_cannoodt.yaml
@@ -11,4 +11,4 @@ info:
       role: Data Science Engineer
     - name: Open Problems
       href: https://openproblems.bio
-      role: Core Member
\ No newline at end of file
+      role: Core Member
diff --git a/src/_authors/sai_nirmayi_yasa.yaml b/src/_authors/sai_nirmayi_yasa.yaml
new file mode 100644
index 00000000..9f560c58
--- /dev/null
+++ b/src/_authors/sai_nirmayi_yasa.yaml
@@ -0,0 +1,10 @@
+name: Sai Nirmayi Yasa
+info:
+  links:
+    email: nirmayi@data-intuitive.com
+    github: sainirmayi
+    linkedin: sai-nirmayi-yasa
+  organizations:
+    - name: Data Intuitive
+      href: https://www.data-intuitive.com
+      role: Junior Bioinformatics Researcher
diff --git a/src/_authors/toni_verbeiren.yaml b/src/_authors/toni_verbeiren.yaml
new file mode 100644
index 00000000..2f2f851f
--- /dev/null
+++ b/src/_authors/toni_verbeiren.yaml
@@ -0,0 +1,9 @@
+name: Toni Verbeiren
+info:
+  links:
+    github: tverbeiren
+    linkedin: verbeiren
+  organizations:
+  - name: Data Intuitive
+    href: https://www.data-intuitive.com
+    role: Data Scientist and CEO
diff --git a/src/_authors/weiwei_schultz.yaml b/src/_authors/weiwei_schultz.yaml
index 324f9378..e4945078 100644
--- a/src/_authors/weiwei_schultz.yaml
+++ b/src/_authors/weiwei_schultz.yaml
@@ -2,4 +2,4 @@ name: Weiwei Schultz
 info:
   organizations:
     - name: Janssen R&D US
-      role: Associate Director Data Sciences
\ No newline at end of file
+      role: Associate Director Data Sciences
diff --git a/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml b/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml
index b788c7c7..757cbd85 100644
--- a/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml
+++ b/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml
@@ -27,6 +27,10 @@ links:
 references: 
   doi: 10.5281/zenodo.3552717
 license: GPL-3.0
+authors:
+  - __merge__: /src/_authors/leila_paquay.yaml
+    roles: [ author, maintainer ]
+
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/arriba/config.vsh.yaml b/src/arriba/config.vsh.yaml
index 8d72d7eb..db5960cf 100644
--- a/src/arriba/config.vsh.yaml
+++ b/src/arriba/config.vsh.yaml
@@ -11,6 +11,9 @@ license: MIT
 requirements:
   cpus: 1
   commands: [ arriba ]
+authors:
+  - __merge__: /src/_authors/robrecht_cannoodt.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/bcl_convert/config.vsh.yaml b/src/bcl_convert/config.vsh.yaml
index 657fb1f0..81103776 100644
--- a/src/bcl_convert/config.vsh.yaml
+++ b/src/bcl_convert/config.vsh.yaml
@@ -4,6 +4,17 @@ description: |
   Information about upgrading from bcl2fastq via
   [Upgrading from bcl2fastq to BCL Convert](https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html)
   and [BCL Convert Compatible Products](https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html)
+keywords: [demultiplex, fastq, bcl, illumina]
+links:
+  homepage: https://support.illumina.com/sequencing/sequencing_software/bcl-convert.html
+  documentation: https://support.illumina.com/downloads/bcl-convert-user-guide.html
+license: Proprietary
+authors:
+  - __merge__: /src/_authors/toni_verbeiren.yaml
+    roles: [ author, maintainer ]
+  - __merge__: /src/_authors/dorien_roosen.yaml
+    roles: [ author ]
+
 argument_groups:
   - name: Input arguments
     arguments:
diff --git a/src/bedtools/bedtools_getfasta/config.vsh.yaml b/src/bedtools/bedtools_getfasta/config.vsh.yaml
index f1f49a87..fe160b20 100644
--- a/src/bedtools/bedtools_getfasta/config.vsh.yaml
+++ b/src/bedtools/bedtools_getfasta/config.vsh.yaml
@@ -10,6 +10,9 @@ references:
 license: GPL-2.0
 requirements:
   commands: [bedtools]
+authors:
+  - __merge__: /src/_authors/dries_schaumont.yaml
+    roles: [ author, maintainer ]
 
 argument_groups:
   - name: Input arguments
diff --git a/src/busco/busco_download_datasets/config.vsh.yaml b/src/busco/busco_download_datasets/config.vsh.yaml
index 5297af2e..cce3faa0 100644
--- a/src/busco/busco_download_datasets/config.vsh.yaml
+++ b/src/busco/busco_download_datasets/config.vsh.yaml
@@ -9,6 +9,9 @@ links:
 references:
   doi: 10.1007/978-1-4939-9173-0_14
 license: MIT
+authors:
+  - __merge__: /src/_authors/dorien_roosen.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/busco/busco_list_datasets/config.vsh.yaml b/src/busco/busco_list_datasets/config.vsh.yaml
index cac34cc6..93fd0559 100644
--- a/src/busco/busco_list_datasets/config.vsh.yaml
+++ b/src/busco/busco_list_datasets/config.vsh.yaml
@@ -9,6 +9,9 @@ links:
 references:
   doi: 10.1007/978-1-4939-9173-0_14
 license: MIT
+authors:
+  - __merge__: /src/_authors/dorien_roosen.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Outputs
     arguments:
diff --git a/src/busco/busco_run/config.vsh.yaml b/src/busco/busco_run/config.vsh.yaml
index 23ee95fb..435e9d2a 100644
--- a/src/busco/busco_run/config.vsh.yaml
+++ b/src/busco/busco_run/config.vsh.yaml
@@ -9,6 +9,9 @@ links:
 references:
   doi: 10.1007/978-1-4939-9173-0_14
 license: MIT
+authors:
+  - __merge__: /src/_authors/dorien_roosen.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/cutadapt/config.vsh.yaml b/src/cutadapt/config.vsh.yaml
index b315d0ce..7e36a8e0 100644
--- a/src/cutadapt/config.vsh.yaml
+++ b/src/cutadapt/config.vsh.yaml
@@ -9,6 +9,9 @@ links:
 references:
   doi: 10.14806/ej.17.1.200
 license: MIT
+authors:
+  - __merge__: /src/_authors/toni_verbeiren.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   ####################################################################
   - name: Specify Adapters for R1
diff --git a/src/falco/config.vsh.yaml b/src/falco/config.vsh.yaml
index 4d9cf656..de9906ef 100644
--- a/src/falco/config.vsh.yaml
+++ b/src/falco/config.vsh.yaml
@@ -9,6 +9,9 @@ references:
 license: GPL-3.0
 requirements:
   commands: [falco]
+authors:
+  - __merge__: /src/_authors/toni_verbeiren.yaml
+    roles: [ author, maintainer ]
 
 # Notes:
 # - falco as arguments similar to -subsample and we update those to --subsample
diff --git a/src/fastp/config.vsh.yaml b/src/fastp/config.vsh.yaml
index b7d9062a..f1f8f1ed 100644
--- a/src/fastp/config.vsh.yaml
+++ b/src/fastp/config.vsh.yaml
@@ -26,6 +26,9 @@ links:
 references:
   doi: "10.1093/bioinformatics/bty560"
 license: MIT
+authors:
+  - __merge__: /src/_authors/robrecht_cannoodt.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     description: |
diff --git a/src/featurecounts/config.vsh.yaml b/src/featurecounts/config.vsh.yaml
index 8697b1fe..e17d9ac0 100644
--- a/src/featurecounts/config.vsh.yaml
+++ b/src/featurecounts/config.vsh.yaml
@@ -11,7 +11,9 @@ references:
 license: GPL-3.0 
 requirements:
   commands: [ featureCounts ]
-
+authors:
+  - __merge__: /src/_authors/sai_nirmayi_yasa.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/gffread/config.vsh.yaml b/src/gffread/config.vsh.yaml
index 7477a284..bd985ffb 100644
--- a/src/gffread/config.vsh.yaml
+++ b/src/gffread/config.vsh.yaml
@@ -8,6 +8,9 @@ links:
 references: 
   doi: 10.12688/f1000research.23297.2
 license: MIT
+authors:
+  - __merge__: /src/_authors/emma_rousseau.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/lofreq/call/config.vsh.yaml b/src/lofreq/call/config.vsh.yaml
index c547de9d..286a040a 100644
--- a/src/lofreq/call/config.vsh.yaml
+++ b/src/lofreq/call/config.vsh.yaml
@@ -17,6 +17,9 @@ references:
 license: "MIT"
 requirements:
   commands: [ lofreq ]
+authors:
+  - __merge__: /src/_authors/kai_waldrant.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/lofreq/indelqual/config.vsh.yaml b/src/lofreq/indelqual/config.vsh.yaml
index 0524458e..29696c81 100644
--- a/src/lofreq/indelqual/config.vsh.yaml
+++ b/src/lofreq/indelqual/config.vsh.yaml
@@ -18,6 +18,9 @@ references:
 license: "MIT"
 requirements:
   commands: [ lofreq ]
+authors:
+  - __merge__: /src/_authors/kai_waldrant.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/multiqc/config.vsh.yaml b/src/multiqc/config.vsh.yaml
index df5e38e1..ba305025 100644
--- a/src/multiqc/config.vsh.yaml
+++ b/src/multiqc/config.vsh.yaml
@@ -11,7 +11,9 @@ info:
   references:
     doi: 10.1093/bioinformatics/btw354
   licence: GPL v3 or later
-
+authors:
+  - __merge__: /src/_authors/dorien_roosen.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: "Input"
     arguments:
diff --git a/src/pear/config.vsh.yaml b/src/pear/config.vsh.yaml
index d6dbe6c9..acae10cc 100644
--- a/src/pear/config.vsh.yaml
+++ b/src/pear/config.vsh.yaml
@@ -12,7 +12,10 @@ references:
   doi: 10.1093/bioinformatics/btt593
 license: "CC-BY-NC-SA-3.0"
 requirements:
-  commands: [ pear , gzip ]
+  commands: [ pear, gzip ]
+authors:
+  - __merge__: /src/_authors/kai_waldrant.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/salmon/salmon_index/config.vsh.yaml b/src/salmon/salmon_index/config.vsh.yaml
index 41c1e05b..925c3000 100644
--- a/src/salmon/salmon_index/config.vsh.yaml
+++ b/src/salmon/salmon_index/config.vsh.yaml
@@ -12,7 +12,9 @@ references:
 license: GPL-3.0 
 requirements:
   commands: [ salmon ]
-
+authors:
+  - __merge__: /src/_authors/sai_nirmayi_yasa.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/salmon/salmon_quant/config.vsh.yaml b/src/salmon/salmon_quant/config.vsh.yaml
index b7e303f4..1f96f0c9 100644
--- a/src/salmon/salmon_quant/config.vsh.yaml
+++ b/src/salmon/salmon_quant/config.vsh.yaml
@@ -12,7 +12,9 @@ references:
 license: GPL-3.0 
 requirements:
   commands: [ salmon ]
-
+authors:
+  - __merge__: /src/_authors/sai_nirmayi_yasa.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Common input options
     arguments:
diff --git a/src/samtools/samtools_collate/config.vsh.yaml b/src/samtools/samtools_collate/config.vsh.yaml
index 669f4cdf..84a3195c 100644
--- a/src/samtools/samtools_collate/config.vsh.yaml
+++ b/src/samtools/samtools_collate/config.vsh.yaml
@@ -9,7 +9,9 @@ links:
 references: 
   doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
 license: MIT/Expat
-
+authors:
+  - __merge__: /src/_authors/emma_rousseau.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/samtools/samtools_faidx/config.vsh.yaml b/src/samtools/samtools_faidx/config.vsh.yaml
index c1c9325d..937b0804 100644
--- a/src/samtools/samtools_faidx/config.vsh.yaml
+++ b/src/samtools/samtools_faidx/config.vsh.yaml
@@ -9,7 +9,9 @@ links:
 references: 
   doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
 license: MIT/Expat
-
+authors:
+  - __merge__: /src/_authors/emma_rousseau.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/samtools/samtools_fasta/config.vsh.yaml b/src/samtools/samtools_fasta/config.vsh.yaml
index 23517f6c..70ba72b9 100644
--- a/src/samtools/samtools_fasta/config.vsh.yaml
+++ b/src/samtools/samtools_fasta/config.vsh.yaml
@@ -9,7 +9,9 @@ links:
 references: 
   doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
 license: MIT/Expat
-
+authors:
+  - __merge__: /src/_authors/emma_rousseau.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/samtools/samtools_fastq/config.vsh.yaml b/src/samtools/samtools_fastq/config.vsh.yaml
index cac7653b..09014ced 100644
--- a/src/samtools/samtools_fastq/config.vsh.yaml
+++ b/src/samtools/samtools_fastq/config.vsh.yaml
@@ -9,7 +9,9 @@ links:
 references: 
   doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
 license: MIT/Expat
-
+authors:
+  - __merge__: /src/_authors/emma_rousseau.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/samtools/samtools_flagstat/config.vsh.yaml b/src/samtools/samtools_flagstat/config.vsh.yaml
index 9b4dfbe1..b30f1867 100644
--- a/src/samtools/samtools_flagstat/config.vsh.yaml
+++ b/src/samtools/samtools_flagstat/config.vsh.yaml
@@ -9,7 +9,9 @@ links:
 references: 
   doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
 license: MIT/Expat
-
+authors:
+  - __merge__: /src/_authors/emma_rousseau.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/samtools/samtools_idxstats/config.vsh.yaml b/src/samtools/samtools_idxstats/config.vsh.yaml
index 30f21348..16e901d7 100644
--- a/src/samtools/samtools_idxstats/config.vsh.yaml
+++ b/src/samtools/samtools_idxstats/config.vsh.yaml
@@ -9,7 +9,9 @@ links:
 references: 
   doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
 license: MIT/Expat
-
+authors:
+  - __merge__: /src/_authors/emma_rousseau.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/samtools/samtools_index/config.vsh.yaml b/src/samtools/samtools_index/config.vsh.yaml
index 8c59a20e..4220c691 100644
--- a/src/samtools/samtools_index/config.vsh.yaml
+++ b/src/samtools/samtools_index/config.vsh.yaml
@@ -9,7 +9,9 @@ links:
 references: 
   doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
 license: MIT/Expat
-
+authors:
+  - __merge__: /src/_authors/emma_rousseau.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/samtools/samtools_sort/config.vsh.yaml b/src/samtools/samtools_sort/config.vsh.yaml
index a78800da..e0776c2d 100644
--- a/src/samtools/samtools_sort/config.vsh.yaml
+++ b/src/samtools/samtools_sort/config.vsh.yaml
@@ -9,7 +9,9 @@ links:
 references: 
   doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
 license: MIT/Expat
-
+authors:
+  - __merge__: /src/_authors/emma_rousseau.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/samtools/samtools_stats/config.vsh.yaml b/src/samtools/samtools_stats/config.vsh.yaml
index ca630876..b115b4df 100644
--- a/src/samtools/samtools_stats/config.vsh.yaml
+++ b/src/samtools/samtools_stats/config.vsh.yaml
@@ -9,7 +9,9 @@ links:
 references: 
   doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
 license: MIT/Expat
-
+authors:
+  - __merge__: /src/_authors/emma_rousseau.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/samtools/samtools_view/config.vsh.yaml b/src/samtools/samtools_view/config.vsh.yaml
index 206b87ac..86dde146 100644
--- a/src/samtools/samtools_view/config.vsh.yaml
+++ b/src/samtools/samtools_view/config.vsh.yaml
@@ -9,7 +9,9 @@ links:
 references: 
   doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
 license: MIT/Expat
-
+authors:
+  - __merge__: /src/_authors/emma_rousseau.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments:
diff --git a/src/star/star_align_reads/config.vsh.yaml b/src/star/star_align_reads/config.vsh.yaml
index eab65b35..bdc956d3 100644
--- a/src/star/star_align_reads/config.vsh.yaml
+++ b/src/star/star_align_reads/config.vsh.yaml
@@ -11,6 +11,11 @@ references:
 license: MIT
 requirements:
   commands: [ STAR, python, ps, zcat, bzcat ]
+authors:
+  - __merge__: /src/_authors/angela_o_pisco.yaml
+    roles: [ author ]
+  - __merge__: /src/_authors/robrecht_cannoodt.yaml
+    roles: [ author, maintainer ]
 # manually taking care of the main input and output arguments
 argument_groups:
   - name: Inputs
diff --git a/src/star/star_genome_generate/config.vsh.yaml b/src/star/star_genome_generate/config.vsh.yaml
index 3adaf7a2..60fa3839 100644
--- a/src/star/star_genome_generate/config.vsh.yaml
+++ b/src/star/star_genome_generate/config.vsh.yaml
@@ -11,7 +11,9 @@ references:
 license: MIT
 requirements:
   commands: [ STAR ]
-
+authors:
+  - __merge__: /src/_authors/sai_nirmayi_yasa.yaml
+    roles: [ author, maintainer ]
 argument_groups:
 - name: "Input"
   arguments: 
diff --git a/src/umi_tools/umi_tools_dedup/config.vsh.yaml b/src/umi_tools/umi_tools_dedup/config.vsh.yaml
index a02e70a1..e6953e6e 100644
--- a/src/umi_tools/umi_tools_dedup/config.vsh.yaml
+++ b/src/umi_tools/umi_tools_dedup/config.vsh.yaml
@@ -10,7 +10,9 @@ links:
 references: 
   doi: 10.1101/gr.209601.116
 license: MIT
-
+authors:
+  - __merge__: /src/_authors/emma_rousseau.yaml
+    roles: [ author, maintainer ]
 argument_groups:
   - name: Inputs
     arguments: