diff --git a/CHANGELOG.md b/CHANGELOG.md
index 45a2a111..bcf6aa71 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,8 +31,21 @@
 * `bedtools`:
   - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94).
   - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98).
+  - `bedtools/bedtools_groupby`: Summarizes a dataset column based upon common column groupings. Akin to the SQL "group by" command (PR #123).
+  - `bedtools/bedtools_merge`: Merges overlapping BED/GFF/VCF entries into a single interval (PR #118).
   - `bedtools/bedtools_bamtofastq`: Convert BAM alignments to FASTQ files (PR #101).
   - `bedtools/bedtools_bedtobam`: Converts genomic feature records (bed/gff/vcf) to BAM format (PR #111).
+  - `bedtools/bedtools_bed12tobed6`: Converts BED12 files to BED6 files (PR #140).
+  - `bedtools/bedtools_links`: Creates an HTML file with links to an instance of the UCSC Genome Browser for all features / intervals in a (bed/gff/vcf) file (PR #137).
+ 
+* `qualimap/qualimap_rnaseq`: RNA-seq QC analysis using qualimap (PR #74). 
+
+* `rsem/rsem_prepare_reference`: Prepare transcript references for RSEM (PR #89).
+
+* `bcftools`:
+  - `bcftools/bcftools_sort`: Sorts BCF/VCF files by position and other criteria (PR #141).
+
+* `fastqc`: High throughput sequence quality control analysis tool (PR #92).
 
 ## MINOR CHANGES
 
diff --git a/src/bcftools/bcftools_sort/config.vsh.yaml b/src/bcftools/bcftools_sort/config.vsh.yaml
new file mode 100644
index 00000000..71a15309
--- /dev/null
+++ b/src/bcftools/bcftools_sort/config.vsh.yaml
@@ -0,0 +1,73 @@
+name: bcftools_sort
+namespace: bcftools
+description: | 
+  Sorts VCF/BCF files.
+keywords: [Sort, VCF, BCF]
+links:
+  homepage: https://samtools.github.io/bcftools/
+  documentation: https://samtools.github.io/bcftools/bcftools.html#sort
+  repository: https://github.com/samtools/bcftools
+  issue_tracker: https://github.com/samtools/bcftools/issues
+references:
+  doi: https://doi.org/10.1093/gigascience/giab008
+license: MIT/Expat, GNU
+requirements:
+  commands: [bcftools]
+authors:
+  - __merge__: /src/_authors/theodoro_gasperin.yaml
+    roles: [ author, maintainer ]
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input
+        alternatives: -i
+        type: file
+        description: Input VCF/BCF file.
+        required: true
+    
+  - name: Outputs
+    arguments:
+      - name: --output
+        alternatives: -o
+        direction: output
+        type: file
+        description: Output sorted VCF/BCF file.
+        required: true
+         
+  - name: Options
+    arguments:
+      - name: --output_type
+        alternatives: -O
+        type: string
+        choices: [b, u, z, v]
+        description: | 
+          Compresses or uncompresses the output.
+          The options are:
+            b: compressed BCF, 
+            u: uncompressed BCF, 
+            z: compressed VCF, 
+            v: uncompressed VCF.        
+
+resources:
+  - type: bash_script
+    path: script.sh
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - path: test_data
+
+engines:
+  - type: docker
+    image: debian:stable-slim
+    setup:
+      - type: apt
+        packages: [bcftools, procps]
+      - type: docker
+        run: |
+          echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftools //p')\"" > /var/software_versions.txt
+
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/bcftools/bcftools_sort/help.txt b/src/bcftools/bcftools_sort/help.txt
new file mode 100644
index 00000000..3b5fa80b
--- /dev/null
+++ b/src/bcftools/bcftools_sort/help.txt
@@ -0,0 +1,14 @@
+```
+bcftools sort
+```
+
+About:   Sort VCF/BCF file.
+Usage:   bcftools sort [OPTIONS] <FILE.vcf>
+
+Options:
+    -m, --max-mem FLOAT[kMG]       maximum memory to use [768M]
+    -o, --output FILE              output file name [stdout]
+    -O, --output-type b|u|z|v      b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]
+    -O, --output-type u|b|v|z[0-9] u/b: un/compressed BCF, v/z: un/compressed VCF, 0-9: compression level [v]
+    -T, --temp-dir DIR             temporary files [/tmp/bcftools.XXXXXX]
+
diff --git a/src/bcftools/bcftools_sort/script.sh b/src/bcftools/bcftools_sort/script.sh
new file mode 100644
index 00000000..e9afb223
--- /dev/null
+++ b/src/bcftools/bcftools_sort/script.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+# Exit on error
+set -eo pipefail
+
+# Execute bedtools bamtofastq with the provided arguments
+bcftools sort \
+    -o "$par_output" \
+    ${par_output_type:+-O "$par_output_type"} \
+    ${meta_memory_mb:+-m "${meta_memory_mb}M"} \
+    ${meta_temp_dir:+-T "$meta_temp_dir"} \
+    $par_input \
+
diff --git a/src/bcftools/bcftools_sort/test.sh b/src/bcftools/bcftools_sort/test.sh
new file mode 100644
index 00000000..f406b8e2
--- /dev/null
+++ b/src/bcftools/bcftools_sort/test.sh
@@ -0,0 +1,185 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+# Exit on error
+set -eo pipefail
+
+test_data="$meta_resources_dir/test_data"
+
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
+}
+assert_file_not_empty() {
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
+}
+assert_file_contains() {
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+assert_identical_content() {
+  diff -a "$2" "$1" \
+    || (echo "Files are not identical!" && exit 1)
+}
+#############################################
+
+# Create directories for tests
+echo "Creating Test Data..."
+TMPDIR=$(mktemp -d "$meta_temp_dir/XXXXXX")
+function clean_up {
+  [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
+}
+trap clean_up EXIT
+
+# Create test data
+cat <<EOF > "$TMPDIR/example.vcf"
+##fileformat=VCFv4.0
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##contig=<ID=19,length=58617616>
+##contig=<ID=20,length=58617616>
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
+##ALT=<ID=CNV,Description="Copy number variable region">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+19	112	.	A	G	10	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+19	111	.	A	C	9.6	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+20	1235237	.	T	.	.	.	.	GT	0/0	0|0	./.
+20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
+20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3:.,.
+20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4:.,.
+20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:.:56,60	0|0:48:4:51,51	0/0:61:2:.,.
+20	1234567	microsat1	G	GA,GAC	50	PASS	NS=3;DP=9;AA=G;AN=6;AC=3,1	GT:GQ:DP	0/1:.:4	0/2:17:2	1/1:40:3
+EOF
+
+# Create expected output
+cat <<EOF > "$TMPDIR/expected_output.vcf"
+##fileformat=VCFv4.0
+##FILTER=<ID=PASS,Description="All filters passed">
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##contig=<ID=19,length=58617616>
+##contig=<ID=20,length=58617616>
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
+##ALT=<ID=CNV,Description="Copy number variable region">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+19	111	.	A	C	9.6	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+19	112	.	A	G	10	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
+20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3:.,.
+20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4:.,.
+20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:.:56,60	0|0:48:4:51,51	0/0:61:2:.,.
+20	1234567	microsat1	G	GA,GAC	50	PASS	NS=3;DP=9;AA=G;AN=6;AC=3,1	GT:GQ:DP	0/1:.:4	0/2:17:2	1/1:40:3
+20	1235237	.	T	.	.	.	.	GT	0/0	0|0	./.
+EOF
+
+cat <<EOF > "$TMPDIR/expected_bcf.vcf"
+##fileformat=VCFv4.0
+##FILTER=<ID=PASS,Description="All filters passed">
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##contig=<ID=19,length=58617616>
+##contig=<ID=20,length=58617616>
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##bcftools_viewVersion=1.16+htslib-1.16
+##bcftools_viewCommand=view -O b -o example.bcf example.vcf.gz; Date=Mon Aug 26 13:00:22 2024
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+19	111	.	A	C	9.6	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+19	112	.	A	G	10	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
+20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3:.,.
+20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4:.,.
+20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:.:56,60	0|0:48:4:51,51	0/0:61:2:.,.
+20	1234567	microsat1	G	GA,GAC	50	PASS	NS=3;DP=9;AA=G;AN=6;AC=3,1	GT:GQ:DP	0/1:.:4	0/2:17:2	1/1:40:3
+20	1235237	.	T	.	.	.	.	GT	0/0	0|0	./.
+EOF
+
+
+# Test 1: Default Use
+mkdir "$TMPDIR/test1" && pushd "$TMPDIR/test1" > /dev/null
+
+echo "> Run bcftools_sort on VCF file"
+"$meta_executable" \
+  --input "../example.vcf" \
+  --output "output.vcf" \
+  --output_type "v" \
+  &> /dev/null
+
+# checks
+assert_file_exists "output.vcf"
+assert_file_not_empty "output.vcf"
+assert_identical_content "output.vcf" "../expected_output.vcf"
+echo "- test1 succeeded -"
+
+popd > /dev/null
+
+# Test 2: BCF file input
+mkdir "$TMPDIR/test2" && pushd "$TMPDIR/test2" > /dev/null
+
+echo "> Run bcftools_sort on BCF file"
+"$meta_executable" \
+  --input "${test_data}/example.bcf" \
+  --output "output.vcf" \
+  --output_type "v" \
+  &> /dev/null
+
+# checks
+assert_file_exists "output.vcf"
+assert_file_not_empty "output.vcf"
+assert_identical_content "output.vcf" "../expected_bcf.vcf"
+echo "- test2 succeeded -"
+
+popd > /dev/null
+
+echo "---- All tests succeeded! ----"
+exit 0
diff --git a/src/bcftools/bcftools_sort/test_data/example.bcf b/src/bcftools/bcftools_sort/test_data/example.bcf
new file mode 100644
index 00000000..d78ae010
Binary files /dev/null and b/src/bcftools/bcftools_sort/test_data/example.bcf differ
diff --git a/src/bedtools/bedtools_bed12tobed6/config.vsh.yaml b/src/bedtools/bedtools_bed12tobed6/config.vsh.yaml
new file mode 100644
index 00000000..8dd6328c
--- /dev/null
+++ b/src/bedtools/bedtools_bed12tobed6/config.vsh.yaml
@@ -0,0 +1,67 @@
+name: bedtools_bed12tobed6
+namespace: bedtools
+description: | 
+  Converts BED features in BED12 (a.k.a. “blocked” BED features such as genes) to discrete BED6 features.
+  For example, in the case of a gene with six exons, bed12ToBed6 would create six separate BED6 features (i.e., one for each exon).
+keywords: [Converts, BED12, BED6]
+links:
+  documentation: https://bedtools.readthedocs.io/en/latest/content/tools/bed12tobed6.html
+  repository: https://github.com/arq5x/bedtools2
+  homepage: https://bedtools.readthedocs.io/en/latest/#
+  issue_tracker: https://github.com/arq5x/bedtools2/issues
+references:
+  doi: 10.1093/bioinformatics/btq033
+license: MIT
+requirements:
+  commands: [bedtools]
+authors:
+  - __merge__: /src/_authors/theodoro_gasperin.yaml
+    roles: [ author, maintainer ]
+
+argument_groups:
+
+  - name: Inputs
+    arguments:
+      - name: --input
+        alternatives: -i
+        type: file
+        description: Input BED12 file.
+        required: true
+    
+  - name: Outputs
+    arguments:
+      - name: --output
+        alternatives: -o
+        type: file
+        direction: output
+        description: Output BED6 file to be written.
+
+  - name: Options
+    arguments:
+      - name: --n_score
+        alternatives: -n
+        type: boolean_true
+        description: | 
+          Force the score to be the (1-based) block number from the BED12.
+
+resources:
+  - type: bash_script
+    path: script.sh
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+
+engines:
+  - type: docker
+    image: debian:stable-slim
+    setup:
+      - type: apt
+        packages: [bedtools, procps]
+      - type: docker
+        run: |
+          echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt
+
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/bedtools/bedtools_bed12tobed6/help.txt b/src/bedtools/bedtools_bed12tobed6/help.txt
new file mode 100644
index 00000000..17af6983
--- /dev/null
+++ b/src/bedtools/bedtools_bed12tobed6/help.txt
@@ -0,0 +1,13 @@
+```
+bedtools bed12tobed6 -h
+```
+
+Tool:    bedtools bed12tobed6 (aka bed12ToBed6)
+Version: v2.30.0
+Summary: Splits BED12 features into discrete BED6 features.
+
+Usage:   bedtools bed12tobed6 [OPTIONS] -i <bed12>
+
+Options: 
+	-n	Force the score to be the (1-based) block number from the BED12.
+
diff --git a/src/bedtools/bedtools_bed12tobed6/script.sh b/src/bedtools/bedtools_bed12tobed6/script.sh
new file mode 100644
index 00000000..bbfaddc6
--- /dev/null
+++ b/src/bedtools/bedtools_bed12tobed6/script.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+set -eo pipefail
+
+# Unset parameters
+[[ "$par_n_score" == "false" ]] && unset par_n_score
+
+# Execute bedtools bed12tobed6 conversion 
+bedtools bed12tobed6 \
+    ${par_n_score:+-n} \
+    -i "$par_input" \
+    > "$par_output"
diff --git a/src/bedtools/bedtools_bed12tobed6/test.sh b/src/bedtools/bedtools_bed12tobed6/test.sh
new file mode 100644
index 00000000..2ef596d9
--- /dev/null
+++ b/src/bedtools/bedtools_bed12tobed6/test.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+# exit on error
+set -eo pipefail
+
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
+}
+assert_file_not_empty() {
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
+}
+assert_file_contains() {
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+assert_identical_content() {
+  diff -a "$2" "$1" \
+    || (echo "Files are not identical!" && exit 1)
+}
+#############################################
+
+# Create directories for tests
+echo "Creating Test Data..."
+TMPDIR=$(mktemp -d "$meta_temp_dir/XXXXXX")
+function clean_up {
+  [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
+}
+trap clean_up EXIT
+
+# Create example BED12 file
+cat <<EOF > "$TMPDIR/example.bed12"
+chr21	10079666	10120808	uc002yiv.1	0	-	10081686	1	0	1	2	0	6	0	8	0	4	528,91,101,215,	0,1930,39750,40927,
+chr21	10080031	10081687	uc002yiw.1	0	-	10080031	1	0	0	8	0	0	3	1	0	2	200,91,	0,1565,
+chr21	10081660	10120796	uc002yix.2	0	-	10081660	1	0	0	8	1	6	6	0	0	3	27,101,223,	0,37756,38913,
+EOF
+
+# Expected output bed6 file
+cat <<EOF > "$TMPDIR/expected.bed6"
+chr21	10079666	10120808	uc002yiv.1	0	-
+chr21	10080031	10081687	uc002yiw.1	0	-
+chr21	10081660	10120796	uc002yix.2	0	-
+EOF
+# Expected output bed6 file with -n option
+cat <<EOF > "$TMPDIR/expected_n.bed6"
+chr21	10079666	10120808	uc002yiv.1	1	-
+chr21	10080031	10081687	uc002yiw.1	1	-
+chr21	10081660	10120796	uc002yix.2	1	-
+EOF
+
+# Test 1: Default conversion BED12 to BED6
+mkdir "$TMPDIR/test1" && pushd "$TMPDIR/test1" > /dev/null
+
+echo "> Run bedtools_bed12tobed6 on BED12 file"
+"$meta_executable" \
+  --input "../example.bed12" \
+  --output "output.bed6"
+
+# checks
+assert_file_exists "output.bed6"
+assert_file_not_empty "output.bed6"
+assert_identical_content "output.bed6" "../expected.bed6"
+echo "- test1 succeeded -"
+
+popd > /dev/null
+
+# Test 2: Conversion BED12 to BED6 with -n option
+mkdir "$TMPDIR/test2" && pushd "$TMPDIR/test2" > /dev/null
+
+echo "> Run bedtools_bed12tobed6 on BED12 file with -n option"
+"$meta_executable" \
+  --input "../example.bed12" \
+  --output "output.bed6" \
+  --n_score
+
+# checks
+assert_file_exists "output.bed6"
+assert_file_not_empty "output.bed6"
+assert_identical_content "output.bed6" "../expected_n.bed6"
+echo "- test2 succeeded -"
+
+popd > /dev/null
+
+echo "---- All tests succeeded! ----"
+exit 0
diff --git a/src/bedtools/bedtools_groupby/config.vsh.yaml b/src/bedtools/bedtools_groupby/config.vsh.yaml
new file mode 100644
index 00000000..89c4845b
--- /dev/null
+++ b/src/bedtools/bedtools_groupby/config.vsh.yaml
@@ -0,0 +1,155 @@
+name: bedtools_groupby
+namespace: bedtools
+description: |
+  Summarizes a dataset column based upon common column groupings. 
+  Akin to the SQL "group by" command.
+keywords: [groupby, BED]
+links:
+  documentation: https://bedtools.readthedocs.io/en/latest/content/tools/groupby.html
+  repository: https://github.com/arq5x/bedtools2
+  homepage: https://bedtools.readthedocs.io/en/latest/#
+  issue_tracker: https://github.com/arq5x/bedtools2/issues
+references:
+  doi: 10.1093/bioinformatics/btq033
+license: MIT
+requirements:
+  commands: [bedtools]
+authors:
+  - __merge__: /src/_authors/theodoro_gasperin.yaml
+    roles: [ author, maintainer ]
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input
+        alternatives: -i
+        type: file
+        direction: input
+        description: |
+          The input BED file to be used.
+        required: true
+        example: input_a.bed
+        
+  - name: Outputs
+    arguments:
+      - name: --output
+        type: file
+        direction: output
+        description: | 
+          The output groupby BED file. 
+        required: true
+        example: output.bed
+  
+  - name: Options
+    arguments:
+      - name: --groupby
+        alternatives: [-g, -grp]
+        type: string
+        description: |
+          Specify the columns (1-based) for the grouping.
+          The columns must be comma separated.
+          - Default: 1,2,3  
+        required: true 
+
+      - name: --column
+        alternatives: [-c, -opCols]
+        type: integer
+        description: |
+          Specify the column (1-based) that should be summarized.
+        required: true   
+
+      - name: --operation
+        alternatives: [-o, -ops]
+        type: string
+        description: |
+          Specify the operation that should be applied to opCol.
+          Valid operations:
+              sum, count, count_distinct, min, max,
+              mean, median, mode, antimode,
+              stdev, sstdev (sample standard dev.),
+              collapse (i.e., print a comma separated list (duplicates allowed)), 
+              distinct (i.e., print a comma separated list (NO duplicates allowed)), 
+              distinct_sort_num (as distinct, but sorted numerically, ascending), 
+              distinct_sort_num_desc (as distinct, but sorted numerically, descending), 
+              concat   (i.e., merge values into a single, non-delimited string), 
+              freqdesc (i.e., print desc. list of values:freq)
+              freqasc (i.e., print asc. list of values:freq)
+              first (i.e., print first value)
+              last (i.e., print last value)
+          
+          Default value: sum   
+
+          If there is only column, but multiple operations, all operations will be
+          applied on that column. Likewise, if there is only one operation, but
+          multiple columns, that operation will be applied to all columns.
+          Otherwise, the number of columns must match the the number of operations,
+          and will be applied in respective order.
+          E.g., "-c 5,4,6 -o sum,mean,count" will give the sum of column 5,
+          the mean of column 4, and the count of column 6.
+          The order of output columns will match the ordering given in the command.
+
+      - name: --full
+        type: boolean_true
+        description: |
+          Print all columns from input file. The first line in the group is used.
+          Default: print only grouped columns.
+
+      - name: --inheader
+        type: boolean_true
+        description: |
+          Input file has a header line - the first line will be ignored.
+
+      - name: --outheader
+        type: boolean_true
+        description: |
+          Print header line in the output, detailing the column names. 
+          If the input file has headers (-inheader), the output file
+          will use the input's column names.
+          If the input file has no headers, the output file
+          will use "col_1", "col_2", etc. as the column names.
+      
+      - name: --header
+        type: boolean_true
+        description: same as '-inheader -outheader'.
+
+      - name: --ignorecase
+        type: boolean_true
+        description: |
+          Group values regardless of upper/lower case.
+
+      - name: --precision
+        alternatives: -prec
+        type: integer
+        description: |
+          Sets the decimal precision for output. 
+        default: 5
+
+      - name: --delimiter
+        alternatives: -delim
+        type: string
+        description: |
+          Specify a custom delimiter for the collapse operations.
+        example: "|"
+        default: ","
+
+resources:
+  - type: bash_script
+    path: script.sh
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+
+engines:
+  - type: docker
+    image: debian:stable-slim
+    setup:
+      - type: apt
+        packages: [bedtools, procps]
+      - type: docker
+        run: |
+          echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt
+
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/bedtools/bedtools_groupby/help.txt b/src/bedtools/bedtools_groupby/help.txt
new file mode 100644
index 00000000..a631b4b1
--- /dev/null
+++ b/src/bedtools/bedtools_groupby/help.txt
@@ -0,0 +1,93 @@
+```bash
+bedtools groupby
+```
+
+Tool:    bedtools groupby 
+Version: v2.30.0
+Summary: Summarizes a dataset column based upon
+	 common column groupings. Akin to the SQL "group by" command.
+
+Usage:	 bedtools groupby -g [group_column(s)] -c [op_column(s)] -o [ops] 
+	 cat [FILE] | bedtools groupby -g [group_column(s)] -c [op_column(s)] -o [ops] 
+
+Options: 
+	-i		Input file. Assumes "stdin" if omitted.
+
+	-g -grp		Specify the columns (1-based) for the grouping.
+			The columns must be comma separated.
+			- Default: 1,2,3
+
+	-c -opCols	Specify the column (1-based) that should be summarized.
+			- Required.
+
+	-o -ops		Specify the operation that should be applied to opCol.
+			Valid operations:
+			    sum, count, count_distinct, min, max,
+			    mean, median, mode, antimode,
+			    stdev, sstdev (sample standard dev.),
+			    collapse (i.e., print a comma separated list (duplicates allowed)), 
+			    distinct (i.e., print a comma separated list (NO duplicates allowed)), 
+			    distinct_sort_num (as distinct, but sorted numerically, ascending), 
+			    distinct_sort_num_desc (as distinct, but sorted numerically, descending), 
+			    concat   (i.e., merge values into a single, non-delimited string), 
+			    freqdesc (i.e., print desc. list of values:freq)
+			    freqasc (i.e., print asc. list of values:freq)
+			    first (i.e., print first value)
+			    last (i.e., print last value)
+			- Default: sum
+
+		If there is only column, but multiple operations, all operations will be
+		applied on that column. Likewise, if there is only one operation, but
+		multiple columns, that operation will be applied to all columns.
+		Otherwise, the number of columns must match the the number of operations,
+		and will be applied in respective order.
+		E.g., "-c 5,4,6 -o sum,mean,count" will give the sum of column 5,
+		the mean of column 4, and the count of column 6.
+		The order of output columns will match the ordering given in the command.
+
+
+	-full		Print all columns from input file.  The first line in the group is used.
+			Default: print only grouped columns.
+
+	-inheader	Input file has a header line - the first line will be ignored.
+
+	-outheader	Print header line in the output, detailing the column names. 
+			If the input file has headers (-inheader), the output file
+			will use the input's column names.
+			If the input file has no headers, the output file
+			will use "col_1", "col_2", etc. as the column names.
+
+	-header		same as '-inheader -outheader'
+
+	-ignorecase	Group values regardless of upper/lower case.
+
+	-prec	Sets the decimal precision for output (Default: 5)
+
+	-delim	Specify a custom delimiter for the collapse operations.
+		- Example: -delim "|"
+		- Default: ",".
+
+Examples: 
+	$ cat ex1.out
+	chr1 10  20  A   chr1    15  25  B.1 1000    ATAT
+	chr1 10  20  A   chr1    25  35  B.2 10000   CGCG
+
+	$ groupBy -i ex1.out -g 1,2,3,4 -c 9 -o sum
+	chr1 10  20  A   11000
+
+	$ groupBy -i ex1.out -grp 1,2,3,4 -opCols 9,9 -ops sum,max
+	chr1 10  20  A   11000   10000
+
+	$ groupBy -i ex1.out -g 1,2,3,4 -c 8,9 -o collapse,mean
+	chr1 10  20  A   B.1,B.2,    5500
+
+	$ cat ex1.out | groupBy -g 1,2,3,4 -c 8,9 -o collapse,mean
+	chr1 10  20  A   B.1,B.2,    5500
+
+	$ cat ex1.out | groupBy -g 1,2,3,4 -c 10 -o concat
+	chr1 10  20  A   ATATCGCG
+
+Notes: 
+	(1)  The input file/stream should be sorted/grouped by the -grp. columns
+	(2)  If -i is unspecified, input is assumed to come from stdin.
+
diff --git a/src/bedtools/bedtools_groupby/script.sh b/src/bedtools/bedtools_groupby/script.sh
new file mode 100644
index 00000000..b8a40cdc
--- /dev/null
+++ b/src/bedtools/bedtools_groupby/script.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+# Exit on error
+set -eo pipefail
+
+# Unset parameters
+unset_if_false=(
+    par_full
+    par_inheader
+    par_outheader
+    par_header
+    par_ignorecase
+)
+
+for par in ${unset_if_false[@]}; do
+    test_val="${!par}"
+    [[ "$test_val" == "false" ]] && unset $par
+done
+
+bedtools groupby \
+    ${par_full:+-full} \
+    ${par_inheader:+-inheader} \
+    ${par_outheader:+-outheader} \
+    ${par_header:+-header} \
+    ${par_ignorecase:+-ignorecase} \
+    ${par_precision:+-prec "$par_precision"} \
+    ${par_delimiter:+-delim "$par_delimiter"} \
+    -i "$par_input" \
+    -g "$par_groupby" \
+    -c "$par_column" \
+    ${par_operation:+-o "$par_operation"} \
+    > "$par_output"
+    
\ No newline at end of file
diff --git a/src/bedtools/bedtools_groupby/test.sh b/src/bedtools/bedtools_groupby/test.sh
new file mode 100644
index 00000000..ce99a1ec
--- /dev/null
+++ b/src/bedtools/bedtools_groupby/test.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+
+# exit on error
+set -eo pipefail
+
+## VIASH START
+meta_executable="target/executable/bedtools/bedtools_groupby/bedtools_groupby"
+meta_resources_dir="src/bedtools/bedtools_groupby"
+## VIASH END
+
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
+}
+assert_file_not_empty() {
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
+}
+assert_file_contains() {
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+assert_identical_content() {
+  diff -a "$2" "$1" \
+    || (echo "Files are not identical!" && exit 1)
+}
+#############################################
+
+# Create directories for tests
+echo "Creating Test Data..."
+TMPDIR=$(mktemp -d "$meta_temp_dir/XXXXXX")
+function clean_up {
+  [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
+}
+trap clean_up EXIT
+
+# Create and populate example.bed
+cat << EOF > $TMPDIR/example.bed
+# Header
+chr21	9719758	9729320	variant1	chr21	9719768	9721892	ALR/Alpha	1004	+
+chr21	9719758	9729320	variant1	chr21	9721905	9725582	ALR/Alpha	1010	+
+chr21	9719758	9729320	variant1	chr21	9725582	9725977	L1PA3	3288	+
+chr21	9719758	9729320	variant1	chr21	9726021	9729309	ALR/Alpha	1051	+
+chr21	9729310	9757478	variant2	chr21	9729320	9729809	L1PA3	3897	-
+chr21	9729310	9757478	variant2	chr21	9729809	9730866	L1P1	8367	+
+chr21	9729310	9757478	variant2	chr21	9730866	9734026	ALR/Alpha	1036	-
+chr21	9729310	9757478	variant2	chr21	9734037	9757471	ALR/Alpha	1182	-
+chr21	9795588	9796685	variant3	chr21	9795589	9795713	(GAATG)n	308	+
+chr21	9795588	9796685	variant3	chr21	9795736	9795894	(GAATG)n	683	+
+chr21	9795588	9796685	variant3	chr21	9795911	9796007	(GAATG)n	345	+
+chr21	9795588	9796685	variant3	chr21	9796028	9796187	(GAATG)n	756	+
+chr21	9795588	9796685	variant3	chr21	9796202	9796615	(GAATG)n	891	+
+chr21	9795588	9796685	variant3	chr21	9796637	9796824	(GAATG)n	621	+
+EOF
+
+# Create and populate expected output files for different tests
+cat << EOF > $TMPDIR/expected.bed
+chr21	9719758	9729320	6353
+chr21	9729310	9757478	14482
+chr21	9795588	9796685	3604
+EOF
+cat << EOF > $TMPDIR/expected_max.bed
+chr21	9719758	9729320	variant1	3288
+chr21	9729310	9757478	variant2	8367
+chr21	9795588	9796685	variant3	891
+EOF
+cat << EOF > $TMPDIR/expected_full.bed
+chr21	9719758	9729320	variant1	chr21	9719768	9721892	ALR/Alpha	1004	+	6353
+chr21	9729310	9757478	variant2	chr21	9729320	9729809	L1PA3	3897	-	14482
+chr21	9795588	9796685	variant3	chr21	9795589	9795713	(GAATG)n	308	+	3604
+EOF
+cat << EOF > $TMPDIR/expected_delimited.bed
+chr21	9719758	9729320	variant1	1004;1010;3288;1051
+chr21	9729310	9757478	variant2	3897;8367;1036;1182
+chr21	9795588	9796685	variant3	308;683;345;756;891;621
+EOF
+cat << EOF > $TMPDIR/expected_precision.bed
+chr21	9719758	9729320	variant1	1.6e+03
+chr21	9729310	9757478	variant2	3.6e+03
+chr21	9795588	9796685	variant3	6e+02
+EOF
+
+# Test 1: without operation option, default operation is sum
+mkdir "$TMPDIR/test1" && pushd "$TMPDIR/test1" > /dev/null
+
+echo "> Run bedtools groupby on BED file"
+"$meta_executable" \
+  --input "../example.bed" \
+  --groupby "1,2,3" \
+  --column "9" \
+  --output "output.bed"
+
+# checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_identical_content "output.bed" "../expected.bed"
+echo "- test1 succeeded -"
+
+popd > /dev/null
+
+# Test 2: with operation max option
+mkdir "$TMPDIR/test2" && pushd "$TMPDIR/test2" > /dev/null
+
+echo "> Run bedtools groupby on BED file with max operation"
+"$meta_executable" \
+  --input "../example.bed" \
+  --groupby "1-4" \
+  --column "9" \
+  --operation "max" \
+  --output "output.bed"
+
+# checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_identical_content "output.bed" "../expected_max.bed"
+echo "- test2 succeeded -"
+
+popd > /dev/null
+
+# Test 3: full option
+mkdir "$TMPDIR/test3" && pushd "$TMPDIR/test3" > /dev/null
+
+echo "> Run bedtools groupby on BED file with full option"
+"$meta_executable" \
+  --input "../example.bed" \
+  --groupby "1-4" \
+  --column "9" \
+  --full \
+  --output "output.bed"
+
+# checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_identical_content "output.bed" "../expected_full.bed"
+echo "- test3 succeeded -"
+
+popd > /dev/null
+
+# Test 4: header option
+mkdir "$TMPDIR/test4" && pushd "$TMPDIR/test4" > /dev/null
+
+echo "> Run bedtools groupby on BED file with header option"
+"$meta_executable" \
+  --input "../example.bed" \
+  --groupby "1-4" \
+  --column "9" \
+  --header \
+  --output "output.bed"
+
+# checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_file_contains "output.bed" "# Header"
+echo "- test4 succeeded -"
+
+popd > /dev/null
+
+# Test 5: Delimiter and collapse
+mkdir "$TMPDIR/test5" && pushd "$TMPDIR/test5" > /dev/null
+
+echo "> Run bedtools groupby on BED file with delimiter and collapse options"
+"$meta_executable" \
+  --input "../example.bed" \
+  --groupby "1-4" \
+  --column "9" \
+  --operation "collapse" \
+  --delimiter ";" \
+  --output "output.bed"
+
+# checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_identical_content "output.bed" "../expected_delimited.bed"
+echo "- test5 succeeded -"
+
+popd > /dev/null
+
+# Test 6: precision option
+mkdir "$TMPDIR/test6" && pushd "$TMPDIR/test6" > /dev/null
+
+echo "> Run bedtools groupby on BED file with precision option"
+"$meta_executable" \
+  --input "../example.bed" \
+  --groupby "1-4" \
+  --column "9" \
+  --operation "mean" \
+  --precision 2 \
+  --output "output.bed"
+
+# checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_identical_content "output.bed" "../expected_precision.bed"
+echo "- test6 succeeded -"
+
+popd > /dev/null
+
+echo "---- All tests succeeded! ----"
+exit 0
diff --git a/src/bedtools/bedtools_links/config.vsh.yaml b/src/bedtools/bedtools_links/config.vsh.yaml
new file mode 100644
index 00000000..b4e43cd3
--- /dev/null
+++ b/src/bedtools/bedtools_links/config.vsh.yaml
@@ -0,0 +1,91 @@
+name: bedtools_links
+namespace: bedtools
+description: | 
+  Creates an HTML file with links to an instance of the UCSC Genome Browser for all features / intervals in a file. 
+  This is useful for cases when one wants to manually inspect through a large set of annotations or features.
+keywords: [Links, BED, GFF, VCF]
+links:
+  documentation: https://bedtools.readthedocs.io/en/latest/content/tools/links.html
+  repository: https://github.com/arq5x/bedtools2
+  homepage: https://bedtools.readthedocs.io/en/latest/#
+  issue_tracker: https://github.com/arq5x/bedtools2/issues
+references:
+  doi: 10.1093/bioinformatics/btq033
+license: MIT
+requirements:
+  commands: [bedtools]
+authors:
+  - __merge__: /src/_authors/theodoro_gasperin.yaml
+    roles: [ author, maintainer ]
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input
+        alternatives: -i
+        type: file
+        description: Input file (bed/gff/vcf).
+        required: true
+    
+  - name: Outputs
+    arguments:
+      - name: --output
+        alternatives: -o
+        type: file
+        direction: output
+        description: Output HTML file to be written.
+
+  - name: Options
+    description: |
+      By default, the links created will point to human (hg18) UCSC browser.
+      If you have a local mirror, you can override this behavior by supplying
+      the -base, -org, and -db options.
+
+      For example, if the URL of your local mirror for mouse MM9 is called: 
+      http://mymirror.myuniversity.edu, then you would use the following:
+      --base_url http://mymirror.myuniversity.edu
+      --organism mouse
+      --database mm9
+    arguments:
+      - name: --base_url
+        alternatives: -base
+        type: string
+        description: | 
+          The “basename” for the UCSC browser.
+        default: http://genome.ucsc.edu
+      
+      - name: --organism
+        alternatives: -org
+        type: string
+        description: | 
+          The organism (e.g. mouse, human). 
+        default: human
+
+      - name: --database
+        alternatives: -db
+        type: string
+        description: | 
+          The genome build. 
+        default: hg18
+      
+resources:
+  - type: bash_script
+    path: script.sh
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+
+engines:
+  - type: docker
+    image: debian:stable-slim
+    setup:
+      - type: apt
+        packages: [bedtools, procps]
+      - type: docker
+        run: |
+          echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt
+
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/bedtools/bedtools_links/help.txt b/src/bedtools/bedtools_links/help.txt
new file mode 100644
index 00000000..d848d989
--- /dev/null
+++ b/src/bedtools/bedtools_links/help.txt
@@ -0,0 +1,25 @@
+```
+bedtools links -h
+```
+
+Tool:    bedtools links (aka linksBed)
+Version: v2.30.0
+Summary: Creates HTML links to an UCSC Genome Browser from a feature file.
+
+Usage:   bedtools links [OPTIONS] -i <bed/gff/vcf> > out.html
+
+Options: 
+	-base	The browser basename.  Default: http://genome.ucsc.edu 
+	-org	The organism. Default: human
+	-db	The build.  Default: hg18
+
+Example: 
+	By default, the links created will point to human (hg18) UCSC browser.
+	If you have a local mirror, you can override this behavior by supplying
+	the -base, -org, and -db options.
+
+	For example, if the URL of your local mirror for mouse MM9 is called: 
+	http://mymirror.myuniversity.edu, then you would use the following:
+	-base http://mymirror.myuniversity.edu
+	-org mouse
+	-db mm9
diff --git a/src/bedtools/bedtools_links/script.sh b/src/bedtools/bedtools_links/script.sh
new file mode 100644
index 00000000..b8ee9a56
--- /dev/null
+++ b/src/bedtools/bedtools_links/script.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+set -eo pipefail
+
+# Execute bedtools links
+bedtools links \
+    ${par_base_url:+-base "$par_base_url"} \
+    ${par_organism:+-org "$par_organism"} \
+    ${par_database:+-db "$par_database"} \
+    -i "$par_input" \
+    > "$par_output"
diff --git a/src/bedtools/bedtools_links/test.sh b/src/bedtools/bedtools_links/test.sh
new file mode 100644
index 00000000..d79cbd6c
--- /dev/null
+++ b/src/bedtools/bedtools_links/test.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+# exit on error
+set -eo pipefail
+
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
+}
+assert_file_not_empty() {
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
+}
+assert_file_contains() {
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+assert_identical_content() {
+  diff -a "$2" "$1" \
+    || (echo "Files are not identical!" && exit 1)
+}
+#############################################
+
+# Create directories for tests
+echo "Creating Test Data..."
+TMPDIR=$(mktemp -d "$meta_temp_dir/XXXXXX")
+function clean_up {
+  [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
+}
+trap clean_up EXIT
+
+# Create test data
+cat <<EOF > "$TMPDIR/genes.bed"
+chr21	9928613	10012791	uc002yip.1	0	-
+chr21	9928613	10012791	uc002yiq.1	0	-
+chr21	9928613	10012791	uc002yir.1	0	-
+chr21	9928613	10012791	uc010gkv.1	0	-
+chr21	9928613	10061300	uc002yis.1	0	-
+chr21	10042683	10120796	uc002yit.1	0	-
+chr21	10042683	10120808	uc002yiu.1	0	-
+chr21	10079666	10120808	uc002yiv.1	0	-
+chr21	10080031	10081687	uc002yiw.1	0	-
+chr21	10081660	10120796	uc002yix.2	0	-
+EOF
+
+# Test 1: Default Use
+mkdir "$TMPDIR/test1" && pushd "$TMPDIR/test1" > /dev/null
+
+echo "> Run bedtools_links on BED file"
+"$meta_executable" \
+  --input "../genes.bed" \
+  --output "genes.html"
+
+# checks
+assert_file_exists "genes.html"
+assert_file_not_empty "genes.html"
+assert_file_contains "genes.html" "uc002yip.1"
+echo "- test1 succeeded -"
+
+popd > /dev/null
+
+# Test 2: Base URL
+mkdir "$TMPDIR/test2" && pushd "$TMPDIR/test2" > /dev/null
+
+echo "> Run bedtools_links with base option"
+"$meta_executable" \
+  --input "../genes.bed" \
+  --output "genes.html" \
+  --base_url "http://genome.ucsc.edu"
+
+# checks
+assert_file_exists "genes.html"
+assert_file_not_empty "genes.html"
+assert_file_contains "genes.html" "uc002yip.1"
+echo "- test2 succeeded -"
+
+popd > /dev/null
+
+# Test 3: Organism and Genome Database Build
+mkdir "$TMPDIR/test3" && pushd "$TMPDIR/test3" > /dev/null
+
+echo "> Run bedtools_links with organism option and genome database build"
+"$meta_executable" \
+  --input "../genes.bed" \
+  --output "genes.html" \
+  --base_url "http://genome.ucsc.edu" \
+  --organism "mouse" \
+  --database "mm9"
+
+# checks
+assert_file_exists "genes.html"
+assert_file_not_empty "genes.html"
+assert_file_contains "genes.html" "uc002yip.1"
+echo "- test3 succeeded -"
+
+popd > /dev/null
+
+echo "---- All tests succeeded! ----"
+exit 0
diff --git a/src/bedtools/bedtools_merge/config.vsh.yaml b/src/bedtools/bedtools_merge/config.vsh.yaml
new file mode 100644
index 00000000..45e4a01d
--- /dev/null
+++ b/src/bedtools/bedtools_merge/config.vsh.yaml
@@ -0,0 +1,160 @@
+name: bedtools_merge
+namespace: bedtools
+description: | 
+  Merges overlapping BED/GFF/VCF entries into a single interval.
+links:
+  documentation: https://bedtools.readthedocs.io/en/latest/content/tools/merge.html
+  repository: https://github.com/arq5x/bedtools2
+  homepage: https://bedtools.readthedocs.io/en/latest/#
+  issue_tracker: https://github.com/arq5x/bedtools2/issues
+references:
+  doi: 10.1093/bioinformatics/btq033
+license: MIT
+requirements:
+  commands: [bedtools]
+authors:
+  - __merge__: /src/_authors/theodoro_gasperin.yaml
+    roles: [ author, maintainer ]
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input
+        alternatives: -i
+        type: file
+        description: Input file (BED/GFF/VCF) to be merged.
+        required: true
+    
+  - name: Outputs
+    arguments:
+      - name: --output
+        type: file
+        direction: output
+        description: Output merged file BED to be written.
+        required: true
+
+  - name: Options
+    arguments:
+      - name: --strand
+        alternatives: -s
+        type: boolean_true
+        description: | 
+          Force strandedness. That is, only merge features
+          that are on the same strand.
+          - By default, merging is done without respect to strand.
+
+      - name: --specific_strand
+        alternatives: -S
+        type: string
+        choices: ["+", "-"]
+        description: | 
+          Force merge for one specific strand only.
+          Follow with + or - to force merge from only
+          the forward or reverse strand, respectively.
+          - By default, merging is done without respect to strand.
+
+      - name: --distance
+        alternatives: -d
+        type: integer
+        description: | 
+          Maximum distance between features allowed for features
+          to be merged.
+          - Def. 0. That is, overlapping & book-ended features are merged.
+          - (INTEGER)
+          - Note: negative values enforce the number of b.p. required for overlap.
+
+      - name: --columns
+        alternatives: -c
+        type: integer
+        description: | 
+          Specify columns from the B file to map onto intervals in A.
+          Default: 5.
+          Multiple columns can be specified in a comma-delimited list.
+
+      - name: --operation
+        alternatives: -o
+        type: string
+        description: | 
+          Specify the operation that should be applied to -c.
+          Valid operations:
+              sum, min, max, absmin, absmax,
+              mean, median, mode, antimode
+              stdev, sstdev
+              collapse (i.e., print a delimited list (duplicates allowed)), 
+              distinct (i.e., print a delimited list (NO duplicates allowed)), 
+              distinct_sort_num (as distinct, sorted numerically, ascending),
+              distinct_sort_num_desc (as distinct, sorted numerically, desscending),
+              distinct_only (delimited list of only unique values),
+              count
+              count_distinct (i.e., a count of the unique values in the column), 
+              first (i.e., just the first value in the column), 
+              last (i.e., just the last value in the column), 
+          Default: sum
+          Multiple operations can be specified in a comma-delimited list.
+
+          If there is only column, but multiple operations, all operations will be
+          applied on that column. Likewise, if there is only one operation, but
+          multiple columns, that operation will be applied to all columns.
+          Otherwise, the number of columns must match the the number of operations,
+          and will be applied in respective order.
+          E.g., "-c 5,4,6 -o sum,mean,count" will give the sum of column 5,
+          the mean of column 4, and the count of column 6.
+          The order of output columns will match the ordering given in the command.
+      
+      - name: --delimiter
+        alternatives: -delim
+        type: string
+        description: | 
+          Specify a custom delimiter for the collapse operations.
+        example: "|"
+        default: ","
+
+      - name: --precision
+        alternatives: -prec
+        type: integer
+        description: | 
+          Sets the decimal precision for output (Default: 5).
+      
+      - name: --bed
+        type: boolean_true
+        description: | 
+          If using BAM input, write output as BED.
+
+      - name: --header
+        type: boolean_true
+        description: | 
+          Print the header from the A file prior to results.
+
+      - name: --no_buffer
+        alternatives: -nobuf
+        type: boolean_true
+        description: | 
+          Disable buffered output. Using this option will cause each line
+          of output to be printed as it is generated, rather than saved
+          in a buffer. This will make printing large output files 
+          noticeably slower, but can be useful in conjunction with
+          other software tools and scripts that need to process one
+          line of bedtools output at a time.
+
+resources:
+  - type: bash_script
+    path: script.sh
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - path: test_data
+
+engines:
+  - type: docker
+    image: debian:stable-slim
+    setup:
+      - type: apt
+        packages: [bedtools, procps]
+      - type: docker
+        run: |
+          echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt
+
+runners:
+  - type: executable
+  - type: nextflow
\ No newline at end of file
diff --git a/src/bedtools/bedtools_merge/help.txt b/src/bedtools/bedtools_merge/help.txt
new file mode 100644
index 00000000..bc78fc67
--- /dev/null
+++ b/src/bedtools/bedtools_merge/help.txt
@@ -0,0 +1,85 @@
+```bash
+bedtools merge
+```
+
+Tool:    bedtools merge (aka mergeBed)
+Version: v2.30.0
+Summary: Merges overlapping BED/GFF/VCF entries into a single interval.
+
+Usage:   bedtools merge [OPTIONS] -i <bed/gff/vcf>
+
+Options: 
+	-s	Force strandedness.  That is, only merge features
+		that are on the same strand.
+		- By default, merging is done without respect to strand.
+
+	-S	Force merge for one specific strand only.
+		Follow with + or - to force merge from only
+		the forward or reverse strand, respectively.
+		- By default, merging is done without respect to strand.
+
+	-d	Maximum distance between features allowed for features
+		to be merged.
+		- Def. 0. That is, overlapping & book-ended features are merged.
+		- (INTEGER)
+		- Note: negative values enforce the number of b.p. required for overlap.
+
+	-c	Specify columns from the B file to map onto intervals in A.
+		Default: 5.
+		Multiple columns can be specified in a comma-delimited list.
+
+	-o	Specify the operation that should be applied to -c.
+		Valid operations:
+		    sum, min, max, absmin, absmax,
+		    mean, median, mode, antimode
+		    stdev, sstdev
+		    collapse (i.e., print a delimited list (duplicates allowed)), 
+		    distinct (i.e., print a delimited list (NO duplicates allowed)), 
+		    distinct_sort_num (as distinct, sorted numerically, ascending),
+		    distinct_sort_num_desc (as distinct, sorted numerically, desscending),
+		    distinct_only (delimited list of only unique values),
+		    count
+		    count_distinct (i.e., a count of the unique values in the column), 
+		    first (i.e., just the first value in the column), 
+		    last (i.e., just the last value in the column), 
+		Default: sum
+		Multiple operations can be specified in a comma-delimited list.
+
+		If there is only column, but multiple operations, all operations will be
+		applied on that column. Likewise, if there is only one operation, but
+		multiple columns, that operation will be applied to all columns.
+		Otherwise, the number of columns must match the the number of operations,
+		and will be applied in respective order.
+		E.g., "-c 5,4,6 -o sum,mean,count" will give the sum of column 5,
+		the mean of column 4, and the count of column 6.
+		The order of output columns will match the ordering given in the command.
+
+
+	-delim	Specify a custom delimiter for the collapse operations.
+		- Example: -delim "|"
+		- Default: ",".
+
+	-prec	Sets the decimal precision for output (Default: 5)
+
+	-bed	If using BAM input, write output as BED.
+
+	-header	Print the header from the A file prior to results.
+
+	-nobuf	Disable buffered output. Using this option will cause each line
+		of output to be printed as it is generated, rather than saved
+		in a buffer. This will make printing large output files 
+		noticeably slower, but can be useful in conjunction with
+		other software tools and scripts that need to process one
+		line of bedtools output at a time.
+
+	-iobuf	Specify amount of memory to use for input buffer.
+		Takes an integer argument. Optional suffixes K/M/G supported.
+		Note: currently has no effect with compressed files.
+
+Notes: 
+	(1) The input file (-i) file must be sorted by chrom, then start.
+
+
+
+
+***** ERROR: No input file given. Exiting. *****
diff --git a/src/bedtools/bedtools_merge/script.sh b/src/bedtools/bedtools_merge/script.sh
new file mode 100644
index 00000000..db50dd83
--- /dev/null
+++ b/src/bedtools/bedtools_merge/script.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+# Exit on error
+set -eo pipefail
+
+# Unset parameters
+unset_if_false=(
+    par_strand
+    par_bed
+    par_header
+    par_no_buffer
+)
+
+for par in ${unset_if_false[@]}; do
+    test_val="${!par}"
+    [[ "$test_val" == "false" ]] && unset $par
+done
+
+# Execute bedtools merge with the provided arguments
+bedtools merge \
+    ${par_strand:+-s} \
+    ${par_specific_strand:+-S "$par_specific_strand"} \
+    ${par_bed:+-bed} \
+    ${par_header:+-header} \
+    ${par_no_buffer:+-nobuf} \
+    ${par_distance:+-d "$par_distance"} \
+    ${par_columns:+-c "$par_columns"} \
+    ${par_operation:+-o "$par_operation"} \
+    ${par_delimiter:+-delim "$par_delimiter"} \
+    ${par_precision:+-prec "$par_precision"} \
+    -i "$par_input" \
+    > "$par_output"
diff --git a/src/bedtools/bedtools_merge/test.sh b/src/bedtools/bedtools_merge/test.sh
new file mode 100644
index 00000000..e2b46c15
--- /dev/null
+++ b/src/bedtools/bedtools_merge/test.sh
@@ -0,0 +1,222 @@
+#!/bin/bash
+
+# exit on error
+set -eo pipefail
+
+## VIASH START
+meta_executable="target/executable/bedtools/bedtools_sort/bedtools_merge"
+meta_resources_dir="src/bedtools/bedtools_merge"
+## VIASH END
+
+# directory of the bam file
+test_data="$meta_resources_dir/test_data"
+
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
+}
+assert_file_not_empty() {
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
+}
+assert_file_contains() {
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+assert_identical_content() {
+  diff -a "$2" "$1" \
+    || (echo "Files are not identical!" && exit 1)
+}
+#############################################
+
+# Create directories for tests
+echo "Creating Test Data..."
+TMPDIR=$(mktemp -d "$meta_temp_dir/XXXXXX")
+function clean_up {
+  [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
+}
+trap clean_up EXIT
+
+# Create and populate example files
+printf "chr1\t100\t200\nchr1\t150\t250\nchr1\t300\t400\n" > "$TMPDIR/featureA.bed"
+printf "chr1\t100\t200\ta1\t1\t+\nchr1\t180\t250\ta2\t2\t+\nchr1\t250\t500\ta3\t3\t-\nchr1\t501\t1000\ta4\t4\t+\n" > "$TMPDIR/featureB.bed"
+printf "chr1\t100\t200\ta1\t1.9\t+\nchr1\t180\t250\ta2\t2.5\t+\nchr1\t250\t500\ta3\t3.3\t-\nchr1\t501\t1000\ta4\t4\t+\n" > "$TMPDIR/feature_precision.bed"
+
+# Create and populate feature.gff file
+printf "##gff-version 3\n" > "$TMPDIR/feature.gff"
+printf "chr1\t.\tgene\t1000\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "$TMPDIR/feature.gff"
+printf "chr1\t.\texon\t1000\t1200\t.\t+\t.\tID=exon1;Parent=transcript1\n" >> "$TMPDIR/feature.gff"
+printf "chr1\t.\tCDS\t1000\t1200\t.\t+\t0\tID=cds1;Parent=transcript1\n" >> "$TMPDIR/feature.gff"
+printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "$TMPDIR/feature.gff"
+printf "chr2\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "$TMPDIR/feature.gff"
+printf "chr3\t.\tmRNA\t1000\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "$TMPDIR/feature.gff"
+
+# Create expected output files
+printf "chr1\t100\t250\nchr1\t300\t400\n" > "$TMPDIR/expected.bed"
+printf "chr1\t100\t250\nchr1\t250\t500\nchr1\t501\t1000\n" > "$TMPDIR/expected_strand.bed"
+printf "chr1\t100\t250\nchr1\t501\t1000\n" > "$TMPDIR/expected_specific_strand.bed"
+printf "chr1\t128\t228\nchr1\t428\t528\n" > "$TMPDIR/expected_bam.bed"
+printf "chr1\t100\t400\n" > "$TMPDIR/expected_distance.bed"
+printf "chr1\t100\t500\t2\t1\t3\nchr1\t501\t1000\t4\t4\t4\n" > "$TMPDIR/expected_operation.bed"
+printf "chr1\t100\t500\ta1|a2|a3\nchr1\t501\t1000\ta4\n" > "$TMPDIR/expected_delim.bed"
+printf "chr1\t100\t500\t2.567\nchr1\t501\t1000\t4\n" > "$TMPDIR/expected_precision.bed"
+printf "##gff-version 3\nchr1\t999\t2000\nchr2\t1499\t1700\nchr3\t999\t2000\n" > "$TMPDIR/expected_header.bed"
+
+# Test 1: Default sort on BED file
+mkdir "$TMPDIR/test1" && pushd "$TMPDIR/test1" > /dev/null
+
+echo "> Run bedtools_merge on BED file"
+"$meta_executable" \
+  --input "../featureA.bed" \
+  --output "output.bed"
+
+# # checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_identical_content "output.bed" "../expected.bed"
+echo "- test1 succeeded -"
+
+popd > /dev/null
+
+# Test 2: strand option
+mkdir "$TMPDIR/test2" && pushd "$TMPDIR/test2" > /dev/null
+
+echo "> Run bedtools_merge on BED file with strand option"
+"$meta_executable" \
+  --input "../featureB.bed" \
+  --output "output.bed" \
+  --strand
+
+# checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_identical_content "output.bed" "../expected_strand.bed"
+echo "- test2 succeeded -"
+
+popd > /dev/null
+
+# Test 3: specific strand option
+mkdir "$TMPDIR/test3" && pushd "$TMPDIR/test3" > /dev/null
+
+echo "> Run bedtools_merge on BED file with specific strand option"
+"$meta_executable" \
+  --input "../featureB.bed" \
+  --output "output.bed" \
+  --specific_strand "+" 
+
+# checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_identical_content "output.bed" "../expected_specific_strand.bed"
+echo "- test3 succeeded -"
+
+popd > /dev/null
+
+# Test 4: BED option
+mkdir "$TMPDIR/test4" && pushd "$TMPDIR/test4" > /dev/null
+
+echo "> Run bedtools_merge on BAM file with BED option"
+"$meta_executable" \
+  --input "$test_data/feature.bam" \
+  --output "output.bed" \
+  --bed
+
+# checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_identical_content "output.bed" "../expected_bam.bed"
+echo "- test4 succeeded -"
+
+popd > /dev/null
+
+# Test 5: distance option
+mkdir "$TMPDIR/test5" && pushd "$TMPDIR/test5" > /dev/null
+
+echo "> Run bedtools_merge on BED file with distance option"
+"$meta_executable" \
+  --input "../featureA.bed" \
+  --output "output.bed" \
+  --distance -5 
+
+# checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_identical_content "output.bed" "../expected.bed"
+echo "- test5 succeeded -"
+
+popd > /dev/null
+
+# Test 6: columns option & operation option
+mkdir "$TMPDIR/test6" && pushd "$TMPDIR/test6" > /dev/null
+
+echo "> Run bedtools_merge on BED file with columns & operation options"
+"$meta_executable" \
+  --input "../featureB.bed" \
+  --output "output.bed" \
+  --columns 5 \
+  --operation "mean,min,max"
+
+# checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_identical_content "output.bed" "../expected_operation.bed"
+echo "- test6 succeeded -"
+
+popd > /dev/null
+
+# Test 7: delimeter option
+mkdir "$TMPDIR/test7" && pushd "$TMPDIR/test7" > /dev/null
+
+echo "> Run bedtools_merge on BED file with delimeter option"
+"$meta_executable" \
+  --input "../featureB.bed" \
+  --output "output.bed" \
+  --columns 4 \
+  --operation "collapse" \
+  --delimiter "|"
+
+# checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_identical_content "output.bed" "../expected_delim.bed"
+echo "- test7 succeeded -"
+
+popd > /dev/null
+
+# Test 8: precision option
+mkdir "$TMPDIR/test8" && pushd "$TMPDIR/test8" > /dev/null
+
+echo "> Run bedtools_merge on BED file with precision option"
+"$meta_executable" \
+  --input "../feature_precision.bed" \
+  --output "output.bed" \
+  --columns 5 \
+  --operation "mean" \
+  --precision 4
+
+# checks
+assert_file_exists "output.bed"
+assert_file_not_empty "output.bed"
+assert_identical_content "output.bed" "../expected_precision.bed"
+echo "- test8 succeeded -"
+
+popd > /dev/null
+
+# Test 9: header option
+mkdir "$TMPDIR/test9" && pushd "$TMPDIR/test9" > /dev/null
+
+echo "> Run bedtools_merge on GFF file with header option"
+"$meta_executable" \
+  --input "../feature.gff" \
+  --output "output.gff" \
+  --header
+
+# checks
+assert_file_exists "output.gff"
+assert_file_not_empty "output.gff"
+assert_identical_content "output.gff" "../expected_header.bed"
+echo "- test9 succeeded -"
+
+popd > /dev/null
+
+echo "---- All tests succeeded! ----"
+exit 0
diff --git a/src/bedtools/bedtools_merge/test_data/feature.bam b/src/bedtools/bedtools_merge/test_data/feature.bam
new file mode 100644
index 00000000..3d56a631
Binary files /dev/null and b/src/bedtools/bedtools_merge/test_data/feature.bam differ
diff --git a/src/fastqc/config.vsh.yaml b/src/fastqc/config.vsh.yaml
new file mode 100644
index 00000000..75b16f36
--- /dev/null
+++ b/src/fastqc/config.vsh.yaml
@@ -0,0 +1,209 @@
+name: fastqc
+description: FastQC - A high throughput sequence QC analysis tool.
+keywords: [Quality control, BAM, SAM, FASTQ]
+links:
+  homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
+  documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
+  repository: https://github.com/s-andrews/FastQC
+  issue_tracker: https://github.com/s-andrews/FastQC/issues
+license: GPL-3.0, Apache-2.0
+authors:
+  - __merge__: /src/_authors/theodoro_gasperin.yaml
+    roles: [ author, maintainer ]
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input
+        type: file
+        direction: input
+        multiple: true
+        description: | 
+          FASTQ file(s) to be analyzed.
+        required: true
+        example: input.fq
+        
+  - name: Outputs
+    description: |
+      At least one of the output options (--html, --zip, --summary, --data) must be used.
+    arguments:
+
+      - name: --html
+        type: file
+        direction: output
+        multiple: true
+        description: |
+          Create the HTML report of the results. 
+          '*' wild card must be provided in the output file name. 
+          Wild card will be replaced by the input file basename.
+          e.g. 
+            --input "sample_1.fq"
+            --html "*.html"
+            would create an output html file named sample_1.html
+        example: "*.html"
+      
+      - name: --zip
+        type: file
+        direction: output
+        multiple: true
+        description: |
+          Create the zip file(s) containing: html report, data, images, icons, summary, etc.
+          '*' wild card must be provided in the output file name.
+          Wild card will be replaced by the input basename.
+          e.g. 
+            --input "sample_1.fq"
+            --html "*.zip"
+            would create an output zip file named sample_1.zip
+        example: "*.zip"   
+
+      - name: --summary
+        type: file
+        direction: output
+        multiple: true
+        description: |
+          Create the summary file(s).
+          '*' wild card must be provided in the output file name.
+          Wild card will be replaced by the input basename.
+          e.g. 
+            --input "sample_1.fq"
+            --summary "*_summary.txt"
+            would create an output summary.txt file named sample_1_summary.txt
+        example: "*_summary.txt"
+
+      - name: --data
+        type: file
+        direction: output
+        multiple: true
+        description: |
+          Create the data file(s).
+          '*' wild card must be provided in the output file name.
+          Wild card will be replaced by the input basename.
+          e.g. 
+            --input "sample_1.fq"
+            --summary "*_data.txt"
+            would create an output data.txt file named sample_1_data.txt
+        example: "*_data.txt"
+
+  - name: Options
+    arguments:  
+      - name: --casava
+        type: boolean_true
+        description: | 
+          Files come from raw casava output. Files in the same sample
+          group (differing only by the group number) will be analysed
+          as a set rather than individually. Sequences with the filter
+          flag set in the header will be excluded from the analysis.
+          Files must have the same names given to them by casava
+          (including being gzipped and ending with .gz) otherwise they
+          won't be grouped together correctly.
+      
+      - name: --nano
+        type: boolean_true
+        description: |
+          Files come from nanopore sequences and are in fast5 format. In
+          this mode you can pass in directories to process and the program
+          will take in all fast5 files within those directories and produce
+          a single output file from the sequences found in all files.
+      
+      - name: --nofilter
+        type: boolean_true
+        description: |
+          If running with --casava then don't remove read flagged by
+          casava as poor quality when performing the QC analysis.
+
+      - name: --nogroup
+        type: boolean_true
+        description: |
+          Disable grouping of bases for reads >50bp. 
+          All reports will show data for every base in the read. 
+          WARNING: Using this option will cause fastqc to crash 
+          and burn if you use it on really long reads, and your 
+          plots may end up a ridiculous size. You have been warned!
+
+      - name: --min_length
+        type: integer
+        description: |
+          Sets an artificial lower limit on the length of the 
+          sequence to be shown in the report. As long as you 
+          set this to a value greater or equal to your longest 
+          read length then this will be the sequence length used 
+          to create your read groups. This can be useful for making
+          directly comparable statistics from datasets with somewhat 
+          variable read lengths.
+        example: 0
+
+      - name: --format
+        alternatives: -f
+        type: string
+        description: |
+          Bypasses the normal sequence file format detection and 
+          forces the program to use the specified format. 
+          Valid formats are bam, sam, bam_mapped, sam_mapped, and fastq.
+        example: bam
+        
+      - name: --contaminants
+        alternatives: -c
+        type: file
+        description: |
+          Specifies a non-default file which contains the list 
+          of contaminants to screen overrepresented sequences against. 
+          The file must contain sets of named contaminants in the form
+          name[tab]sequence. Lines prefixed with a hash will be ignored.
+        example: contaminants.txt
+        
+      - name: --adapters
+        alternatives: -a
+        type: file
+        description: |
+          Specifies a non-default file which contains the list of 
+          adapter sequences which will be explicitly searched against 
+          the library. The file must contain sets of named adapters 
+          in the form name[tab]sequence. Lines prefixed with a hash will be ignored.
+        example: adapters.txt
+
+      - name: --limits
+        alternatives: -l
+        type: file
+        description: |
+          Specifies a non-default file which contains 
+          a set of criteria which will be used to determine 
+          the warn/error limits for the various modules. 
+          This file can also be used to selectively remove 
+          some modules from the output altogether. The format 
+          needs to mirror the default limits.txt file found in 
+          the Configuration folder.
+        example: limits.txt
+
+      - name: --kmers
+        alternatives: -k
+        type: integer
+        description: |
+          Specifies the length of Kmer to look for in the Kmer 
+          content module. Specified Kmer length must be between 
+          2 and 10. Default length is 7 if not specified.
+        example: 7
+        
+      - name: --quiet
+        alternatives: -q
+        type: boolean_true
+        description: |
+          Suppress all progress messages on stdout and only report errors.
+        
+resources:
+  - type: bash_script
+    path: script.sh
+test_resources:
+  - type: bash_script
+    path: test.sh
+
+engines:
+  - type: docker
+    image: biocontainers/fastqc:v0.11.9_cv8
+    setup:
+      - type: docker
+        run: |
+          echo "fastqc: $(fastqc --version | sed -n 's/^FastQC //p')" > /var/software_versions.txt
+
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/fastqc/help.txt b/src/fastqc/help.txt
new file mode 100644
index 00000000..502aebc0
--- /dev/null
+++ b/src/fastqc/help.txt
@@ -0,0 +1,125 @@
+```bash
+fastqc --help
+```
+
+            FastQC - A high throughput sequence QC analysis tool
+
+SYNOPSIS
+
+	fastqc seqfile1 seqfile2 .. seqfileN
+
+    fastqc [-o output dir] [--(no)extract] [-f fastq|bam|sam] 
+           [-c contaminant file] seqfile1 .. seqfileN
+
+DESCRIPTION
+
+    FastQC reads a set of sequence files and produces from each one a quality
+    control report consisting of a number of different modules, each one of 
+    which will help to identify a different potential type of problem in your
+    data.
+    
+    If no files to process are specified on the command line then the program
+    will start as an interactive graphical application.  If files are provided
+    on the command line then the program will run with no user interaction
+    required.  In this mode it is suitable for inclusion into a standardised
+    analysis pipeline.
+    
+    The options for the program as as follows:
+    
+    -h --help       Print this help file and exit
+    
+    -v --version    Print the version of the program and exit
+    
+    -o --outdir     Create all output files in the specified output directory.
+                    Please note that this directory must exist as the program
+                    will not create it.  If this option is not set then the 
+                    output file for each sequence file is created in the same
+                    directory as the sequence file which was processed.
+                    
+    --casava        Files come from raw casava output. Files in the same sample
+                    group (differing only by the group number) will be analysed
+                    as a set rather than individually. Sequences with the filter
+                    flag set in the header will be excluded from the analysis.
+                    Files must have the same names given to them by casava
+                    (including being gzipped and ending with .gz) otherwise they
+                    won't be grouped together correctly.
+                    
+    --nano          Files come from nanopore sequences and are in fast5 format. In
+                    this mode you can pass in directories to process and the program
+                    will take in all fast5 files within those directories and produce
+                    a single output file from the sequences found in all files.                    
+                    
+    --nofilter      If running with --casava then don't remove read flagged by
+                    casava as poor quality when performing the QC analysis.
+                   
+    --extract       If set then the zipped output file will be uncompressed in
+                    the same directory after it has been created.  By default
+                    this option will be set if fastqc is run in non-interactive
+                    mode.
+                    
+    -j --java       Provides the full path to the java binary you want to use to
+                    launch fastqc. If not supplied then java is assumed to be in
+                    your path.
+                   
+    --noextract     Do not uncompress the output file after creating it.  You
+                    should set this option if you do not wish to uncompress
+                    the output when running in non-interactive mode.
+                    
+    --nogroup       Disable grouping of bases for reads >50bp. All reports will
+                    show data for every base in the read.  WARNING: Using this
+                    option will cause fastqc to crash and burn if you use it on
+                    really long reads, and your plots may end up a ridiculous size.
+                    You have been warned!
+                    
+    --min_length    Sets an artificial lower limit on the length of the sequence
+                    to be shown in the report.  As long as you set this to a value
+                    greater or equal to your longest read length then this will be
+                    the sequence length used to create your read groups.  This can
+                    be useful for making directly comaparable statistics from 
+                    datasets with somewhat variable read lengths.
+                    
+    -f --format     Bypasses the normal sequence file format detection and
+                    forces the program to use the specified format.  Valid
+                    formats are bam,sam,bam_mapped,sam_mapped and fastq
+                    
+    -t --threads    Specifies the number of files which can be processed
+                    simultaneously.  Each thread will be allocated 250MB of
+                    memory so you shouldn't run more threads than your
+                    available memory will cope with, and not more than
+                    6 threads on a 32 bit machine
+                  
+    -c              Specifies a non-default file which contains the list of
+    --contaminants  contaminants to screen overrepresented sequences against.
+                    The file must contain sets of named contaminants in the
+                    form name[tab]sequence.  Lines prefixed with a hash will
+                    be ignored.
+
+    -a              Specifies a non-default file which contains the list of
+    --adapters      adapter sequences which will be explicity searched against
+                    the library. The file must contain sets of named adapters
+                    in the form name[tab]sequence.  Lines prefixed with a hash
+                    will be ignored.
+                    
+    -l              Specifies a non-default file which contains a set of criteria
+    --limits        which will be used to determine the warn/error limits for the
+                    various modules.  This file can also be used to selectively 
+                    remove some modules from the output all together.  The format
+                    needs to mirror the default limits.txt file found in the
+                    Configuration folder.
+                    
+   -k --kmers       Specifies the length of Kmer to look for in the Kmer content
+                    module. Specified Kmer length must be between 2 and 10. Default
+                    length is 7 if not specified.
+                    
+   -q --quiet       Supress all progress messages on stdout and only report errors.
+   
+   -d --dir         Selects a directory to be used for temporary files written when
+                    generating report images. Defaults to system temp directory if
+                    not specified.
+                    
+BUGS
+
+    Any bugs in fastqc should be reported either to simon.andrews@babraham.ac.uk
+    or in www.bioinformatics.babraham.ac.uk/bugzilla/
+                   
+    
diff --git a/src/fastqc/script.sh b/src/fastqc/script.sh
new file mode 100644
index 00000000..5cf55868
--- /dev/null
+++ b/src/fastqc/script.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+# exit on error
+set -eo pipefail
+
+# Check if both outputs are empty, at least one must be passed.
+if [[ -z "$par_html" ]] && [[ -z "$par_zip" ]] && [[ -z "$par_summary" ]] && [[ -z "$par_data" ]]; then
+  echo "Error: At least one of the output arguments (--html, --zip, --summary, and --data) must be passed."
+  exit 1
+fi
+
+# unset flags
+unset_if_false=(
+  par_casava
+  par_nano
+  par_nofilter
+  par_extract
+  par_noextract
+  par_nogroup
+  par_quiet
+)
+
+for par in ${unset_if_false[@]}; do
+    test_val="${!par}"
+    [[ "$test_val" == "false" ]] && unset $par
+done
+
+tmpdir=$(mktemp -d "${meta_temp_dir}/${meta_name}-XXXXXXXX")
+function clean_up {
+  rm -rf "$tmpdir"
+}
+trap clean_up EXIT
+
+# Create input array 
+IFS=";" read -ra input <<< $par_input
+
+# Run fastqc
+fastqc \
+  --extract \
+  ${par_casava:+--casava} \
+  ${par_nano:+--nano} \
+  ${par_nofilter:+--nofilter} \
+  ${par_nogroup:+--nogroup} \
+  ${par_min_length:+--min_length "$par_min_length"} \
+  ${par_format:+--format "$par_format"} \
+  ${par_contaminants:+--contaminants "$par_contaminants"} \
+  ${par_adapters:+--adapters "$par_adapters"} \
+  ${par_limits:+--limits "$par_limits"} \
+  ${par_kmers:+--kmers "$par_kmers"} \
+  ${par_quiet:+--quiet} \
+  ${meta_cpus:+--threads "$meta_cpus"} \
+  ${meta_temp_dir:+--dir "$meta_temp_dir"} \
+  --outdir "${tmpdir}" \
+  "${input[@]}"
+ 
+# Move output files
+for file in "${input[@]}"; do
+  # Removes everthing after the first dot of the basename
+  sample_name=$(basename "${file}" | sed 's/\..*$//')
+  if [[ -n "$par_html" ]]; then
+    input_html="${tmpdir}/${sample_name}_fastqc.html"
+    html_file="${par_html//\*/$sample_name}"
+    mv "$input_html" "$html_file"
+  fi
+  if [[ -n "$par_zip" ]]; then
+    input_zip="${tmpdir}/${sample_name}_fastqc.zip"
+    zip_file="${par_zip//\*/$sample_name}"
+    mv "$input_zip" "$zip_file"
+  fi
+  if [[ -n "$par_summary" ]]; then
+    summary_file="${tmpdir}/${sample_name}_fastqc/summary.txt"
+    new_summary="${par_summary//\*/$sample_name}"
+    mv "$summary_file" "$new_summary"
+  fi
+  if [[ -n "$par_data" ]]; then
+    data_file="${tmpdir}/${sample_name}_fastqc/fastqc_data.txt"
+    new_data="${par_data//\*/$sample_name}"
+    mv "$data_file" "$new_data"
+  fi
+  # Remove the extracted directory
+  rm -r "${tmpdir}/${sample_name}_fastqc"
+done
+
diff --git a/src/fastqc/test.sh b/src/fastqc/test.sh
new file mode 100644
index 00000000..8c581ac8
--- /dev/null
+++ b/src/fastqc/test.sh
@@ -0,0 +1,235 @@
+#!/bin/bash
+
+# exit on error
+set -eo pipefail
+
+## VIASH START
+# meta_executable="target/executable/fastqc"
+# meta_resources_dir="src/fastqc"
+## VIASH END
+
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
+}
+assert_file_not_empty() {
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
+}
+assert_file_contains() {
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+assert_identical_content() {
+  diff -a "$2" "$1" \
+    || (echo "Files are not identical!" && exit 1)
+}
+#############################################
+
+# Create directories for tests
+echo "Creating Test Data..."
+TMPDIR=$(mktemp -d "$meta_temp_dir/XXXXXX")
+function clean_up {
+  [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
+}
+trap clean_up EXIT
+
+# Create and populate input.fasta
+cat > "$TMPDIR/input_1.fq" <<EOL
+@HWI-ST330:304:H045HADXX:1:1101:1111:61397
+CACTTGTAAGGGCAGGCCCCCTTCACCCTCCCGCTCCTGGGGGANNNNNNNNNNANNNCGAGGCCCTGGGGTAGAGGGNNNNNNNNNNNNNNGATCTTGG
++
+@?@DDDDDDHHH?GH:?FCBGGB@C?DBEGIIIIAEF;FCGGI#########################################################
+EOL
+
+cat > "$TMPDIR/input_2.fq" <<EOL
+@HWI-ST330:304:H045HADXX:1:1101:1111:61397
+CACTTGTAAGGGCAGGCCCCCTTCACCCTCCCGCTCCTGGGGGANNNNNNNNNNANNNCGAGGCCCTGGGGTAGAGGGNNNNNNNNNNNNNNGATCTTGG
++
+@?@DDDDDDHHH?GH:?FCBGGB@C?DBEGIIIIAEF;FCGGI#########################################################
+EOL
+
+# Create and populate contaminants.txt
+printf "contaminant_sequence1\tCACTTGTAAGGGCAGGCCCCCTTCACCCTCCCGCTCCTGGGGGA\n" > "$TMPDIR/contaminants.txt"
+printf "contaminant_sequence2\tGATCTTGG\n" >> "$TMPDIR/contaminants.txt"
+
+# Create and populate SAM file 
+printf "@HD\tVN:1.0\tSO:unsorted\n" > "$TMPDIR/example.sam"
+printf "@SQ\tSN:chr1\tLN:248956422\n" >> "$TMPDIR/example.sam"
+printf "@SQ\tSN:chr2\tLN:242193529\n" >> "$TMPDIR/example.sam"
+printf "@PG\tID:bowtie2\tPN:bowtie2\tVN:2.3.4.1\tCL:\"/usr/bin/bowtie2-align-s --wrapper basic-0 -x genome -U reads.fq -S output.sam\"\n" >> "$TMPDIR/example.sam"
+printf "read1\t0\tchr1\t100\t255\t50M\t*\t0\t0\tACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-10\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:50\tYT:Z:UU\n" >> "$TMPDIR/example.sam"
+printf "read2\t0\tchr2\t150\t255\t50M\t*\t0\t0\tTGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-8\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:50\tYT:Z:UU\n" >> "$TMPDIR/example.sam"
+printf "read3\t16\tchr1\t200\t255\t50M\t*\t0\t0\tGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-12\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:50\tYT:Z:UU" >> "$TMPDIR/example.sam"
+
+cat > "$TMPDIR/expected_summary.txt" <<EOL
+PASS	Basic Statistics	input_1.fq
+PASS	Per base sequence quality	input_1.fq
+FAIL	Per sequence quality scores	input_1.fq
+FAIL	Per base sequence content	input_1.fq
+FAIL	Per sequence GC content	input_1.fq
+FAIL	Per base N content	input_1.fq
+PASS	Sequence Length Distribution	input_1.fq
+PASS	Sequence Duplication Levels	input_1.fq
+FAIL	Overrepresented sequences	input_1.fq
+PASS	Adapter Content	input_1.fq
+EOL
+
+cat > "$TMPDIR/expected_summary2.txt" <<EOL
+PASS	Basic Statistics	input_2.fq
+PASS	Per base sequence quality	input_2.fq
+FAIL	Per sequence quality scores	input_2.fq
+FAIL	Per base sequence content	input_2.fq
+FAIL	Per sequence GC content	input_2.fq
+FAIL	Per base N content	input_2.fq
+PASS	Sequence Length Distribution	input_2.fq
+PASS	Sequence Duplication Levels	input_2.fq
+FAIL	Overrepresented sequences	input_2.fq
+PASS	Adapter Content	input_2.fq
+EOL
+
+cat > "$TMPDIR/expected_summary_sam.txt" <<EOL
+PASS	Basic Statistics	example.sam
+PASS	Per base sequence quality	example.sam
+FAIL	Per sequence quality scores	example.sam
+FAIL	Per base sequence content	example.sam
+WARN	Per sequence GC content	example.sam
+PASS	Per base N content	example.sam
+WARN	Sequence Length Distribution	example.sam
+PASS	Sequence Duplication Levels	example.sam
+FAIL	Overrepresented sequences	example.sam
+PASS	Adapter Content	example.sam
+EOL
+
+# Test 1: Run fastqc with default parameters
+mkdir "$TMPDIR/test1" && pushd "$TMPDIR/test1" > /dev/null
+
+echo "-> Run Test1: one input"
+"$meta_executable" \
+  --input "../input_1.fq" \
+  --html "*_fastqc.html" \
+  --zip "*_fastqc.zip" \
+  --summary "*_summary.txt" \
+  --data "*_data.txt" \
+  --quiet \
+
+assert_file_exists "input_1_fastqc.html"
+assert_file_exists "input_1_fastqc.zip"
+assert_file_exists "input_1_summary.txt"
+assert_file_not_empty "input_1_fastqc.html"
+assert_file_not_empty "input_1_fastqc.zip"
+assert_identical_content "input_1_summary.txt" "../expected_summary.txt"
+echo "- test succeeded -"
+
+popd > /dev/null
+
+
+# Test 2: Run fastqc with multiple inputs
+mkdir "$TMPDIR/test2" && pushd "$TMPDIR/test2" > /dev/null
+
+echo "-> Run Test2: two inputs"
+"$meta_executable" \
+  --input "../input_1.fq" \
+  --input "../input_2.fq" \
+  --html "*_fastqc.html" \
+  --zip "*_fastqc.zip" \
+  --summary "*_summary.txt" \
+  --data "*_data.txt" \
+  --quiet \
+
+# File 1
+assert_file_exists "input_1_fastqc.html"
+assert_file_exists "input_1_fastqc.zip"
+assert_file_exists "input_1_summary.txt"
+assert_file_not_empty "input_1_fastqc.html"
+assert_file_not_empty "input_1_fastqc.zip"
+assert_identical_content "input_1_summary.txt" "../expected_summary.txt"
+# File 2
+assert_file_exists "input_2_fastqc.html"
+assert_file_exists "input_2_fastqc.zip"
+assert_file_exists "input_2_summary.txt"
+assert_file_not_empty "input_2_fastqc.html"
+assert_file_not_empty "input_2_fastqc.zip"
+assert_identical_content "input_2_summary.txt" "../expected_summary2.txt"
+echo "- test succeeded -"
+
+popd > /dev/null
+
+# Test 3: Run fastqc with contaminants
+mkdir "$TMPDIR/test3" && pushd "$TMPDIR/test3" > /dev/null
+
+echo "-> Run Test3: contaminants"
+"$meta_executable" \
+  --input "../input_1.fq" \
+  --contaminants "../contaminants.txt" \
+  --html "*_fastqc.html" \
+  --zip "*_fastqc.zip" \
+  --summary "*_summary.txt" \
+  --data "*_data.txt" \
+  --quiet \
+
+assert_file_exists "input_1_fastqc.html"
+assert_file_exists "input_1_fastqc.zip"
+assert_file_exists "input_1_summary.txt"
+assert_file_not_empty "input_1_fastqc.html"
+assert_file_not_empty "input_1_fastqc.zip"
+assert_identical_content "input_1_summary.txt" "../expected_summary.txt"
+assert_file_contains "input_1_data.txt" "contaminant"
+echo "- test succeeded -"
+
+popd > /dev/null
+
+# Test 4: Run fastqc with sam file
+mkdir "$TMPDIR/test4" && pushd "$TMPDIR/test4" > /dev/null
+
+echo "-> Run Test4: sam file"
+"$meta_executable" \
+  --input "../example.sam" \
+  --format "sam" \
+  --html "*_fastqc.html" \
+  --zip "*_fastqc.zip" \
+  --summary "*_summary.txt" \
+  --data "*_data.txt" \
+  --quiet \
+
+assert_file_exists "example_fastqc.html"
+assert_file_exists "example_fastqc.zip"
+assert_file_exists "example_summary.txt"
+assert_file_not_empty "example_fastqc.html"
+assert_file_not_empty "example_fastqc.zip"
+assert_identical_content "example_summary.txt" "../expected_summary_sam.txt"
+echo "- test succeeded -"
+
+popd > /dev/null
+
+# Test 5: Run fastqc with multiple options
+mkdir "$TMPDIR/test5" && pushd "$TMPDIR/test5" > /dev/null
+
+echo "-> Run Test5: multiple options"
+"$meta_executable" \
+  --input "../input_1.fq" \
+  --contaminants "../contaminants.txt" \
+  --format "fastq" \
+  --nofilter \
+  --nogroup \
+  --min_length 10 \
+  --kmers 5 \
+  --html "*_fastqc.html" \
+  --zip "*_fastqc.zip" \
+  --summary "*_summary.txt" \
+  --data "*_data.txt" \
+  --quiet \
+# --casava \
+
+assert_file_exists "input_1_fastqc.html"
+assert_file_exists "input_1_fastqc.zip"
+assert_file_exists "input_1_summary.txt"
+assert_file_not_empty "input_1_fastqc.html"
+assert_file_not_empty "input_1_fastqc.zip"
+assert_identical_content "input_1_summary.txt" "../expected_summary.txt"
+assert_file_contains "input_1_data.txt" "contaminant"
+echo "- test succeeded -"
+
+popd > /dev/null
+
+echo "All tests succeeded!"
+exit 0
diff --git a/src/qualimap/qualimap_rnaseq/config.vsh.yaml b/src/qualimap/qualimap_rnaseq/config.vsh.yaml
new file mode 100644
index 00000000..ffc807ab
--- /dev/null
+++ b/src/qualimap/qualimap_rnaseq/config.vsh.yaml
@@ -0,0 +1,103 @@
+name: qualimap_rnaseq
+namespace: qualimap
+keywords: [RNA-seq, quality control, QC Report]
+description: |
+  Qualimap RNA-seq QC reports quality control metrics and bias estimations 
+  which are specific for whole transcriptome sequencing, including reads genomic 
+  origin, junction analysis, transcript coverage and 5’-3’ bias computation.
+links:
+  homepage: http://qualimap.conesalab.org/
+  documentation: http://qualimap.conesalab.org/doc_html/analysis.html#rna-seq-qc
+  issue_tracker: https://bitbucket.org/kokonech/qualimap/issues?status=new&status=open
+  repository: https://bitbucket.org/kokonech/qualimap/commits/branch/master
+references:
+  doi: 10.1093/bioinformatics/btv566
+license: GPL-2.0
+authors:
+  - __merge__: /src/_authors/dorien_roosen.yaml
+    roles: [ author, maintainer ]
+argument_groups:
+  - name: "Input"
+    arguments: 
+    - name: "--bam"
+      type: file
+      required: true
+      example: alignment.bam
+      description: Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner.
+    - name: "--gtf"
+      type: file
+      required: true
+      example: annotations.gtf
+      description: Path to genomic annotations in Ensembl GTF format.
+
+  - name: "Output"
+    arguments: 
+    - name: "--qc_results"
+      direction: output
+      type: file
+      required: true
+      example: rnaseq_qc_results.txt
+      description: Text file containing the RNAseq QC results.
+    - name: "--counts"
+      type: file
+      required: false
+      direction: output
+      description: Output file for computed counts.
+    - name: "--report"
+      type: file
+      direction: output
+      required: false
+      example: report.html
+      description: Report output file. Supported formats are PDF or HTML.
+
+  - name: "Optional"
+    arguments: 
+    - name: "--num_pr_bases"
+      type: integer
+      required: false
+      min: 1
+      description: Number of upstream/downstream nucleotide bases to compute 5'-3' bias (default = 100).
+    - name: "--num_tr_bias"
+      type: integer
+      required: false
+      min: 1
+      description: Number of top highly expressed transcripts to compute 5'-3' bias (default = 1000).
+    - name: "--algorithm"
+      type: string
+      required: false
+      choices: ["uniquely-mapped-reads", "proportional"]
+      description: Counting algorithm (uniquely-mapped-reads (default) or proportional).
+    - name: "--sequencing_protocol"
+      type: string
+      required: false
+      choices: ["non-strand-specific", "strand-specific-reverse", "strand-specific-forward"]
+      description: Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).
+    - name: "--paired"
+      type: boolean_true
+      description: Setting this flag for paired-end experiments will result in counting fragments instead of reads.
+    - name: "--sorted"
+      type: boolean_true
+      description: Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed. Only requiredfor paired-end analysis.
+    - name: "--java_memory_size"
+      type: string
+      required: false
+      description: maximum Java heap memory size, default = 4G.
+
+resources:
+  - type: bash_script
+    path: script.sh
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - path: test_data/
+
+engines:
+  - type: docker
+    image: quay.io/biocontainers/qualimap:2.3--hdfd78af_0
+    setup:   
+      - type: docker
+        run: |
+          echo QualiMap: $(qualimap 2>&1 | grep QualiMap | sed 's/^.*QualiMap//') > /var/software_versions.txt
+runners: 
+  - type: executable
+  - type: nextflow
diff --git a/src/qualimap/qualimap_rnaseq/help.txt b/src/qualimap/qualimap_rnaseq/help.txt
new file mode 100644
index 00000000..c6493ed9
--- /dev/null
+++ b/src/qualimap/qualimap_rnaseq/help.txt
@@ -0,0 +1,52 @@
+QualiMap v.2.3
+Built on 2023-05-19 16:57
+
+usage: qualimap <tool> [options]
+
+To launch GUI leave <tool> empty.
+
+Available tools:
+
+    bamqc            Evaluate NGS mapping to a reference genome
+    rnaseq           Evaluate RNA-seq alignment data
+    counts           Counts data analysis (further RNA-seq data evaluation)
+    multi-bamqc      Compare QC reports from multiple NGS mappings
+    clustering       Cluster epigenomic signals
+    comp-counts      Compute feature counts
+
+Special arguments: 
+
+    --java-mem-size  Use this argument to set Java memory heap size. Example:
+                     qualimap bamqc -bam very_large_alignment.bam --java-mem-size=4G
+                     
+usage: qualimap rnaseq [-a <arg>] -bam <arg> -gtf <arg> [-npb <arg>] [-ntb
+       <arg>] [-oc <arg>] [-outdir <arg>] [-outfile <arg>] [-outformat <arg>]
+       [-p <arg>] [-pe] [-s]
+ -a,--algorithm <arg>             Counting algorithm:
+                                  uniquely-mapped-reads(default) or
+                                  proportional.
+ -bam <arg>                       Input mapping file in BAM format.
+ -gtf <arg>                       Annotations file in Ensembl GTF format.
+ -npb,--num-pr-bases <arg>        Number of upstream/downstream nucleotide bases
+                                  to compute 5'-3' bias (default is 100).
+ -ntb,--num-tr-bias <arg>         Number of top highly expressed transcripts to
+                                  compute 5'-3' bias (default is 1000).
+ -oc <arg>                        Output file for computed counts. If only name
+                                  of the file is provided, then the file will be
+                                  saved in the output folder.
+ -outdir <arg>                    Output folder for HTML report and raw data.
+ -outfile <arg>                   Output file for PDF report (default value is
+                                  report.pdf).
+ -outformat <arg>                 Format of the output report (PDF, HTML or both
+                                  PDF:HTML, default is HTML).
+ -p,--sequencing-protocol <arg>   Sequencing library protocol:
+                                  strand-specific-forward,
+                                  strand-specific-reverse or non-strand-specific
+                                  (default)
+ -pe,--paired                     Setting this flag for paired-end experiments
+                                  will result in counting fragments instead of
+                                  reads
+ -s,--sorted                      This flag indicates that the input file is
+                                  already sorted by name. If not set, additional
+                                  sorting by name will be performed. Only
+                                  required for paired-end analysis.
\ No newline at end of file
diff --git a/src/qualimap/qualimap_rnaseq/script.sh b/src/qualimap/qualimap_rnaseq/script.sh
new file mode 100644
index 00000000..351e5159
--- /dev/null
+++ b/src/qualimap/qualimap_rnaseq/script.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+set -eo pipefail
+
+tmp_dir=$(mktemp -d -p "$meta_temp_dir" qualimap_XXXXXXXXX)
+
+# Handle output parameters
+if [ -n "$par_report" ]; then
+    outfile=$(basename "$par_report")
+    report_extension="${outfile##*.}"
+fi
+
+if [ -n "$par_counts" ]; then
+    counts=$(basename "$par_counts")
+fi
+
+# disable flags
+[[ "$par_paired" == "false" ]] && unset par_paired
+[[ "$par_sorted" == "false" ]] && unset par_sorted
+
+# Run qualimap
+qualimap rnaseq \
+    ${meta_memory_mb:+--java-mem-size=${meta_memory_mb}M} \
+    ${par_algorithm:+--algorithm $par_algorithm} \
+    ${par_sequencing_protocol:+--sequencing-protocol $par_sequencing_protocol} \
+    -bam $par_bam \
+    -gtf $par_gtf \
+    -outdir "$tmp_dir" \
+    ${par_num_pr_bases:+--num-pr-bases $par_num_pr_bases} \
+    ${par_num_tr_bias:+--num-tr-bias $par_num_tr_bias} \
+    ${par_report:+-outformat $report_extension} \
+    ${par_paired:+--paired} \
+    ${par_sorted:+--sorted} \
+    ${par_report:+-outfile "$outfile"} \
+    ${par_counts:+-oc "$counts"}
+
+# Move output files
+mv "$tmp_dir/rnaseq_qc_results.txt" "$par_qc_results"
+
+if [ -n "$par_report" ] && [ $report_extension = "html" ]; then
+    mv "$tmp_dir/qualimapReport.html" "$par_report"
+fi
+
+if [ -n "$par_report" ] && [ $report_extension = "pdf" ]; then
+    mv "$tmp_dir/$outfile" "$par_report"
+fi
+
+if [ -n "$par_counts" ]; then
+    mv "$tmp_dir/$counts" "$par_counts"
+fi
diff --git a/src/qualimap/qualimap_rnaseq/test.sh b/src/qualimap/qualimap_rnaseq/test.sh
new file mode 100755
index 00000000..2e1b647b
--- /dev/null
+++ b/src/qualimap/qualimap_rnaseq/test.sh
@@ -0,0 +1,112 @@
+set -e
+
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
+}
+assert_file_doesnt_exist() {
+  [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; }
+}
+assert_file_not_empty() {
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
+}
+assert_file_contains() {
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+#############################################
+
+
+test_dir="$meta_resources_dir/test_data"
+
+mkdir "run_qualimap_rnaseq_html"
+cd "run_qualimap_rnaseq_html"
+
+echo "> Running qualimap with html output report"
+
+"$meta_executable" \
+    --bam $test_dir/a.bam \
+    --gtf $test_dir/annotation.gtf \
+    --report report.html \
+    --counts counts.txt \
+    --qc_results output.txt
+
+echo ">> Checking output"
+assert_file_exists "report.html"
+assert_file_exists "counts.txt"
+assert_file_exists "output.txt"
+assert_file_doesnt_exist "report.pdf"
+
+echo ">> Checking if output is empty"
+assert_file_not_empty "report.html"
+assert_file_not_empty "counts.txt"
+assert_file_not_empty "output.txt"
+
+echo ">> Checking output contents"
+assert_file_contains "output.txt" ">>>>>>> Input"
+assert_file_contains "output.txt" ">>>>>>> Reads alignment"
+assert_file_contains "output.txt" ">>>>>>> Reads genomic origin"
+assert_file_contains "output.txt" ">>>>>>> Transcript coverage profile"
+assert_file_contains "output.txt" ">>>>>>> Junction analysis"
+assert_file_contains "output.txt" ">>>>>>> Transcript coverage profile"
+
+assert_file_contains "counts.txt" "ENSG00000125841.12"
+
+assert_file_contains "report.html" "<title>Qualimap report: RNA Seq QC</title>"
+assert_file_contains "report.html" "<h3>Input</h3>"
+assert_file_contains "report.html" "<h3>Reads alignment</h3>"
+assert_file_contains "report.html" "<h3>Reads genomic origin</h3>"
+assert_file_contains "report.html" "<h3>Transcript coverage profile</h3>"
+assert_file_contains "report.html" "<h3>Junction analysis</h3>"
+
+
+cd ..
+rm -r run_qualimap_rnaseq_html
+
+mkdir "run_qualimap_rnaseq_pdf"
+cd "run_qualimap_rnaseq_pdf"
+
+echo "> Running qualimap with pdf output report"
+
+"$meta_executable" \
+    --bam $test_dir/a.bam \
+    --gtf $test_dir/annotation.gtf \
+    --report report.pdf \
+    --counts counts.txt \
+    --qc_results output.txt
+
+echo ">> Checking output"
+assert_file_exists "report.pdf"
+assert_file_exists "counts.txt"
+assert_file_exists "output.txt"
+assert_file_doesnt_exist "report.html"
+
+echo ">> Checking if output is empty"
+assert_file_not_empty "report.pdf"
+assert_file_not_empty "counts.txt"
+assert_file_not_empty "output.txt"
+
+cd ..
+rm -r run_qualimap_rnaseq_pdf
+
+mkdir "run_qualimap_rnaseq"
+cd "run_qualimap_rnaseq"
+
+echo "> Running qualimap without report and counts output"
+
+"$meta_executable" \
+    --bam $test_dir/a.bam \
+    --gtf $test_dir/annotation.gtf \
+    --qc_results output.txt
+
+echo ">> Checking output"
+assert_file_doesnt_exist "report.pdf"
+assert_file_doesnt_exist "report.html"
+assert_file_doesnt_exist "counts.txt"
+assert_file_exists "output.txt"
+
+echo ">> Checking if output is empty"
+assert_file_not_empty "output.txt"
+
+cd ..
+rm -r run_qualimap_rnaseq
\ No newline at end of file
diff --git a/src/qualimap/qualimap_rnaseq/test_data/a.bam b/src/qualimap/qualimap_rnaseq/test_data/a.bam
new file mode 100644
index 00000000..c8ea1065
Binary files /dev/null and b/src/qualimap/qualimap_rnaseq/test_data/a.bam differ
diff --git a/src/qualimap/qualimap_rnaseq/test_data/annotation.gtf b/src/qualimap/qualimap_rnaseq/test_data/annotation.gtf
new file mode 100644
index 00000000..976de753
--- /dev/null
+++ b/src/qualimap/qualimap_rnaseq/test_data/annotation.gtf
@@ -0,0 +1,10 @@
+chr20	HAVANA	transcript	347024	354868	.	+	.	gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1";
+chr20	HAVANA	exon	347024	347142	.	+	.	gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 1; exon_id "ENSE00001831391.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1";
+chr20	HAVANA	exon	349249	349363	.	+	.	gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 2; exon_id "ENSE00001491647.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1";
+chr20	HAVANA	exon	349638	349832	.	+	.	gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 3; exon_id "ENSE00003710328.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1";
+chr20	HAVANA	CDS	349644	349832	.	+	0	gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 3; exon_id "ENSE00003710328.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1";
+chr20	HAVANA	start_codon	349644	349646	.	+	0	gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 3; exon_id "ENSE00003710328.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1";
+chr20	HAVANA	exon	353210	354868	.	+	.	gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 4; exon_id "ENSE00001822456.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1";
+chr20	HAVANA	CDS	353210	353632	.	+	0	gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 4; exon_id "ENSE00001822456.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1";
+chr20	HAVANA	stop_codon	353633	353635	.	+	0	gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 4; exon_id "ENSE00001822456.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1";
+chr20	HAVANA	UTR	347024	347142	.	+	.	gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 1; exon_id "ENSE00001831391.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1";
diff --git a/src/qualimap/qualimap_rnaseq/test_data/script.sh b/src/qualimap/qualimap_rnaseq/test_data/script.sh
new file mode 100755
index 00000000..801fe405
--- /dev/null
+++ b/src/qualimap/qualimap_rnaseq/test_data/script.sh
@@ -0,0 +1,10 @@
+# qualimap test data
+
+# Test data was obtained from https://github.com/snakemake/snakemake-wrappers/raw/master/bio/qualimap/rnaseq/test
+
+if [ ! -d /tmp/snakemake-wrappers ]; then
+  git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers
+fi
+
+cp -r /tmp/snakemake-wrappers/bio/qualimap/rnaseq/test/mapped/a.bam src/qualimap/qualimap_rnaseq/test_data
+cp -r /tmp/snakemake-wrappers/bio/qualimap/rnaseq/test/annotation.gtf src/qualimap/qualimap_rnaseq/test_data
diff --git a/src/rsem/rsem_prepare_reference/config.vsh.yaml b/src/rsem/rsem_prepare_reference/config.vsh.yaml
new file mode 100644
index 00000000..44915a2f
--- /dev/null
+++ b/src/rsem/rsem_prepare_reference/config.vsh.yaml
@@ -0,0 +1,196 @@
+name: rsem_prepare_reference
+namespace: rsem
+description: | 
+  RSEM is a software package for estimating gene and isoform expression levels from RNA-Seq data. This component prepares transcript references for RSEM.
+keywords: ["Transcriptome", "Index"]
+links:
+  homepage: http://deweylab.github.io/RSEM
+  documentation: https://deweylab.github.io/RSEM/rsem-prepare-reference.html
+  repository: https://github.com/deweylab/RSEM
+references: 
+  doi: 10.1186/1471-2105-12-323
+license: GPL-3.0 
+requirements:
+  commands: [ rsem-prepare-reference ]
+authors:
+  - __merge__: /src/_authors/sai_nirmayi_yasa.yaml
+    roles: [ author, maintainer ]
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --reference_fasta_files
+        type: file
+        description: | 
+          Semi-colon separated list of Multi-FASTA formatted files OR a directory name. If a directory name is specified, RSEM will read all files with suffix ".fa" or ".fasta" in this directory. The files should contain either the sequences of transcripts or an entire genome, depending on whether the '--gtf' option is used.
+        required: true
+        multiple: true
+        example: read1.fasta
+      - name: --reference_name
+        type: string
+        description: | 
+          The name of the reference used. RSEM will generate several reference-related files that are prefixed by this name. This name can contain path information (e.g. '/ref/mm9').
+        required: true
+        example: /ref/mm9 
+  
+  - name: Outputs
+    arguments:
+      - name: --output
+        type: file
+        description: Directory containing reference files generated by RSEM. 
+        required: true
+        direction: output
+
+  - name: Other options
+    arguments: 
+      - name: --gtf
+        type: file
+        description: Assume that 'reference_fasta_files' contains the sequence of a genome, and extract transcript reference sequences using the gene annotations specified in the GTF file. If this and '--gff3' options are not provided, RSEM will assume 'reference_fasta_files' contains the reference transcripts. In this case, RSEM assumes that name of each sequence in the Multi-FASTA files is its transcript_id.
+        example: annotations.gtf
+      - name: --gff3
+        type: file
+        description: GFF3 annotation file. Converted to GTF format with the file name 'reference_name.gtf'. Please make sure that 'reference_name.gtf' does not exist. 
+        example: annotations.gff
+      - name: --gff3_rna_patterns
+        type: string
+        description: List of transcript categories (separated by semi-colon). Only transcripts that match the string will be extracted.
+        multiple: true
+        example: mRNA;rRNA
+      - name: --gff3_genes_as_transcripts
+        type: boolean_true
+        description: This option is designed for untypical organisms, such as viruses, whose GFF3 files only contain genes. RSEM will assume each gene as a unique transcript when it converts the GFF3 file into GTF format.
+      - name: --trusted_sources
+        type: string
+        description: List of trusted sources (separated by semi-colon). Only transcripts coming from these sources will be extracted. If this option is off, all sources are accepted.
+        multiple: true
+        example: ENSEMBL;HAVANA
+      - name: --transcript_to_gene_map
+        type: file
+        description: | 
+          Use information from this file to map from transcript (isoform) ids to gene ids. Each line of this file should be of the form: 
+            gene_id transcript_id
+          with the two fields separated by a tab character.
+          If you are using a GTF file for the "UCSC Genes" gene set from the UCSC Genome Browser, then the "knownIsoforms.txt" file (obtained from the "Downloads" section of the UCSC Genome Browser site) is of this format. 
+          If this option is off, then the mapping of isoforms to genes depends on whether the '--gtf' option is specified. If '--gtf' is specified, then RSEM uses the "gene_id" and "transcript_id" attributes in the GTF file. Otherwise, RSEM assumes that each sequence in the reference sequence files is a separate gene.
+        example: isoforms.txt
+      - name: --allele_to_gene_map 
+        type: file
+        description: |
+          Use information from <file> to provide gene_id and transcript_id information for each allele-specific transcript. Each line of <file> should be of the form:
+            gene_id transcript_id allele_id
+          with the fields separated by a tab character.
+          This option is designed for quantifying allele-specific expression. It is only valid if '--gtf' option is not specified. allele_id should be the sequence names presented in the Multi-FASTA-formatted files.
+      - name: --polyA
+        type: boolean_true
+        description: Add poly(A) tails to the end of all reference isoforms. The length of poly(A) tail added is specified by '--polyA-length' option. STAR aligner users may not want to use this option. 
+      - name: --polyA_length 
+        type: integer
+        description: The length of the poly(A) tails to be added. 
+        example: 125
+      - name: --no_polyA_subset 
+        type: file
+        description: Only meaningful if '--polyA' is specified. Do not add poly(A) tails to those transcripts listed in this file containing a list of transcript_ids.
+        example: transcript_ids.txt
+      - name: --bowtie
+        type: boolean_true
+        description: Build Bowtie indices. 
+      - name: --bowtie2
+        type: boolean_true
+        description: Build Bowtie 2 indices.
+      - name: --star
+        type: boolean_true
+        description: Build STAR indices.
+      - name: --star_sjdboverhang
+        type: integer
+        description: Length of the genomic sequence around annotated junction. It is only used for STAR to build splice junctions database and not needed for Bowtie or Bowtie2. It will be passed as the --sjdbOverhang option to STAR. According to STAR's manual, its ideal value is max(ReadLength)-1, e.g. for 2x101 paired-end reads, the ideal value is 101-1=100. In most cases, the default value of 100 will work as well as the ideal value. (Default is 100)
+        example: 100
+      - name: --hisat2_hca
+        type: boolean_true
+        description: Build HISAT2 indices on the transcriptome according to Human Cell Atlas (HCA) SMART-Seq2 pipeline.
+      - name: --quiet
+        alternatives: -q
+        type: boolean_true
+        description: Suppress the output of logging information. 
+  
+  - name: Prior-enhanced RSEM options
+    arguments: 
+      - name: --prep_pRSEM
+        type: boolean_true
+        description: A Boolean indicating whether to prepare reference files for pRSEM, including building Bowtie indices for a genome and selecting training set isoforms. The index files will be used for aligning ChIP-seq reads in prior-enhanced RSEM and the training set isoforms will be used for learning prior. A path to Bowtie executables and a mappability file in bigWig format are required when this option is on. Currently, Bowtie2 is not supported for prior-enhanced RSEM. 
+      - name: --mappability_bigwig_file 
+        type: file
+        description: Full path to a whole-genome mappability file in bigWig format. This file is required for running prior-enhanced RSEM. It is used for selecting a training set of isoforms for prior-learning. This file can be either downloaded from UCSC Genome Browser or generated by GEM (Derrien et al., 2012, PLoS One). 
+
+resources:
+  - type: bash_script
+    path: script.sh
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+    
+engines:
+- type: docker
+  image: ubuntu:22.04
+  setup:
+    - type: apt
+      packages: 
+        - build-essential 
+        - gcc 
+        - g++ 
+        - make 
+        - wget 
+        - zlib1g-dev 
+        - unzip xxd 
+        - perl 
+        - r-base
+        - bowtie2
+        - pip 
+        - git
+    - type: python
+      packages: bowtie
+    - type: docker
+      env: 
+        - STAR_VERSION=2.7.11b
+        - RSEM_VERSION=1.3.3
+        - BOWTIE_VERSION=1.3.1
+        - TZ=Europe/Brussels
+      run: |
+        ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
+        cd /tmp && \
+        wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \
+        unzip ${STAR_VERSION}.zip && \
+        cd STAR-${STAR_VERSION}/source && \
+        make STARstatic CXXFLAGS_SIMD=-std=c++11 && \
+        cp STAR /usr/local/bin && \
+        cd /tmp && \
+        wget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip && \
+        unzip v${RSEM_VERSION}.zip && \
+        cd RSEM-${RSEM_VERSION} && \
+        make && \
+        make install && \
+        cd /tmp && \
+        wget --no-check-certificate -O bowtie-${BOWTIE_VERSION}-linux-x86_64.zip https://sourceforge.net/projects/bowtie-bio/files/bowtie/${BOWTIE_VERSION}/bowtie-${BOWTIE_VERSION}-linux-x86_64.zip/download  && \
+        unzip bowtie-${BOWTIE_VERSION}-linux-x86_64.zip && \
+        cp bowtie-${BOWTIE_VERSION}-linux-x86_64/bowtie* /usr/local/bin && \
+        cd /tmp && \
+        git clone https://github.com/DaehwanKimLab/hisat2.git /tmp/hisat2 && \
+        cd /tmp/hisat2 && \
+        make && \
+        cp -r hisat2* /usr/local/bin && \
+        cd && \
+        rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip /tmp/bowtie-${BOWTIE_VERSION}-linux-x86_64 /tmp/hisat2 && \
+        apt-get --purge autoremove -y ${PACKAGES} && \
+        apt-get clean 
+
+    - type: docker
+      run: |
+        echo "RSEM: `rsem-calculate-expression --version | sed -e 's/Current version: RSEM v//g'`" > /var/software_versions.txt && \
+        echo "STAR: `STAR --version`" >> /var/software_versions.txt && \
+        echo "bowtie2: `bowtie2 --version | grep -oP '\d+\.\d+\.\d+'`" >> /var/software_versions.txt && \
+        echo "bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \K\d+\.\d+\.\d+'`" >> /var/software_versions.txt && \
+        echo "HISAT2: `hisat2 --version | grep -oP 'hisat2-align-s version \K\d+\.\d+\.\d+'`" >> /var/software_versions.txt
+
+runners:
+  - type: executable
+  - type: nextflow
\ No newline at end of file
diff --git a/src/rsem/rsem_prepare_reference/help.txt b/src/rsem/rsem_prepare_reference/help.txt
new file mode 100644
index 00000000..c69899ec
--- /dev/null
+++ b/src/rsem/rsem_prepare_reference/help.txt
@@ -0,0 +1,207 @@
+```bash
+rsem-prepare-reference --help
+```
+
+NAME
+rsem-prepare-reference - Prepare transcript references for RSEM and optionally build BOWTIE/BOWTIE2/STAR/HISAT2(transcriptome) indices.
+
+SYNOPSIS
+ rsem-prepare-reference [options] reference_fasta_file(s) reference_name
+ARGUMENTS
+reference_fasta_file(s)
+Either a comma-separated list of Multi-FASTA formatted files OR a directory name. If a directory name is specified, RSEM will read all files with suffix ".fa" or ".fasta" in this directory. The files should contain either the sequences of transcripts or an entire genome, depending on whether the '--gtf' option is used.
+
+reference name
+The name of the reference used. RSEM will generate several reference-related files that are prefixed by this name. This name can contain path information (e.g. '/ref/mm9').
+
+OPTIONS
+--gtf <file>
+If this option is on, RSEM assumes that 'reference_fasta_file(s)' contains the sequence of a genome, and will extract transcript reference sequences using the gene annotations specified in <file>, which should be in GTF format.
+
+If this and '--gff3' options are off, RSEM will assume 'reference_fasta_file(s)' contains the reference transcripts. In this case, RSEM assumes that name of each sequence in the Multi-FASTA files is its transcript_id.
+
+(Default: off)
+
+--gff3 <file>
+The annotation file is in GFF3 format instead of GTF format. RSEM will first convert it to GTF format with the file name 'reference_name.gtf'. Please make sure that 'reference_name.gtf' does not exist. (Default: off)
+
+--gff3-RNA-patterns <pattern>
+<pattern> is a comma-separated list of transcript categories, e.g. "mRNA,rRNA". Only transcripts that match the <pattern> will be extracted. (Default: "mRNA")
+
+--gff3-genes-as-transcripts
+This option is designed for untypical organisms, such as viruses, whose GFF3 files only contain genes. RSEM will assume each gene as a unique transcript when it converts the GFF3 file into GTF format.
+
+--trusted-sources <sources>
+<sources> is a comma-separated list of trusted sources, e.g. "ENSEMBL,HAVANA". Only transcripts coming from these sources will be extracted. If this option is off, all sources are accepted. (Default: off)
+
+--transcript-to-gene-map <file>
+Use information from <file> to map from transcript (isoform) ids to gene ids. Each line of <file> should be of the form:
+
+gene_id transcript_id
+
+with the two fields separated by a tab character.
+
+If you are using a GTF file for the "UCSC Genes" gene set from the UCSC Genome Browser, then the "knownIsoforms.txt" file (obtained from the "Downloads" section of the UCSC Genome Browser site) is of this format.
+
+If this option is off, then the mapping of isoforms to genes depends on whether the '--gtf' option is specified. If '--gtf' is specified, then RSEM uses the "gene_id" and "transcript_id" attributes in the GTF file. Otherwise, RSEM assumes that each sequence in the reference sequence files is a separate gene.
+
+(Default: off)
+
+--allele-to-gene-map <file>
+Use information from <file> to provide gene_id and transcript_id information for each allele-specific transcript. Each line of <file> should be of the form:
+
+gene_id transcript_id allele_id
+
+with the fields separated by a tab character.
+
+This option is designed for quantifying allele-specific expression. It is only valid if '--gtf' option is not specified. allele_id should be the sequence names presented in the Multi-FASTA-formatted files.
+
+(Default: off)
+
+--polyA
+Add poly(A) tails to the end of all reference isoforms. The length of poly(A) tail added is specified by '--polyA-length' option. STAR aligner users may not want to use this option. (Default: do not add poly(A) tail to any of the isoforms)
+
+--polyA-length <int>
+The length of the poly(A) tails to be added. (Default: 125)
+
+--no-polyA-subset <file>
+Only meaningful if '--polyA' is specified. Do not add poly(A) tails to those transcripts listed in <file>. <file> is a file containing a list of transcript_ids. (Default: off)
+
+--bowtie
+Build Bowtie indices. (Default: off)
+
+--bowtie-path <path>
+The path to the Bowtie executables. (Default: the path to Bowtie executables is assumed to be in the user's PATH environment variable)
+
+--bowtie2
+Build Bowtie 2 indices. (Default: off)
+
+--bowtie2-path <path>
+The path to the Bowtie 2 executables. (Default: the path to Bowtie 2 executables is assumed to be in the user's PATH environment variable)
+
+--star
+Build STAR indices. (Default: off)
+
+--star-path <path>
+The path to STAR's executable. (Default: the path to STAR executable is assumed to be in user's PATH environment variable)
+
+--star-sjdboverhang <int>
+Length of the genomic sequence around annotated junction. It is only used for STAR to build splice junctions database and not needed for Bowtie or Bowtie2. It will be passed as the --sjdbOverhang option to STAR. According to STAR's manual, its ideal value is max(ReadLength)-1, e.g. for 2x101 paired-end reads, the ideal value is 101-1=100. In most cases, the default value of 100 will work as well as the ideal value. (Default: 100)
+
+--hisat2-hca
+Build HISAT2 indices on the transcriptome according to Human Cell Atlas (HCA) SMART-Seq2 pipeline. (Default: off)
+
+--hisat2-path <path>
+The path to the HISAT2 executables. (Default: the path to HISAT2 executables is assumed to be in the user's PATH environment variable)
+
+-p/--num-threads <int>
+Number of threads to use for building STAR's genome indices. (Default: 1)
+
+-q/--quiet
+Suppress the output of logging information. (Default: off)
+
+-h/--help
+Show help information.
+
+PRIOR-ENHANCED RSEM OPTIONS
+--prep-pRSEM
+A Boolean indicating whether to prepare reference files for pRSEM, including building Bowtie indices for a genome and selecting training set isoforms. The index files will be used for aligning ChIP-seq reads in prior-enhanced RSEM and the training set isoforms will be used for learning prior. A path to Bowtie executables and a mappability file in bigWig format are required when this option is on. Currently, Bowtie2 is not supported for prior-enhanced RSEM. (Default: off)
+
+--mappability-bigwig-file <string>
+Full path to a whole-genome mappability file in bigWig format. This file is required for running prior-enhanced RSEM. It is used for selecting a training set of isoforms for prior-learning. This file can be either downloaded from UCSC Genome Browser or generated by GEM (Derrien et al., 2012, PLoS One). (Default: "")
+
+DESCRIPTION
+This program extracts/preprocesses the reference sequences for RSEM and prior-enhanced RSEM. It can optionally build Bowtie indices (with '--bowtie' option) and/or Bowtie 2 indices (with '--bowtie2' option) using their default parameters. It can also optionally build STAR indices (with '--star' option) using parameters from ENCODE3's STAR-RSEM pipeline. For prior-enhanced RSEM, it can build Bowtie genomic indices and select training set isoforms (with options '--prep-pRSEM' and '--mappability-bigwig-file <string>'). If an alternative aligner is to be used, indices for that particular aligner can be built from either 'reference_name.idx.fa' or 'reference_name.n2g.idx.fa' (see OUTPUT for details). This program is used in conjunction with the 'rsem-calculate-expression' program.
+
+OUTPUT
+This program will generate 'reference_name.grp', 'reference_name.ti', 'reference_name.transcripts.fa', 'reference_name.seq', 'reference_name.chrlist' (if '--gtf' is on), 'reference_name.idx.fa', 'reference_name.n2g.idx.fa', optional Bowtie/Bowtie 2 index files, and optional STAR index files.
+
+'reference_name.grp', 'reference_name.ti', 'reference_name.seq', and 'reference_name.chrlist' are used by RSEM internally.
+
+'reference_name.transcripts.fa' contains the extracted reference transcripts in Multi-FASTA format. Poly(A) tails are not added and it may contain lower case bases in its sequences if the corresponding genomic regions are soft-masked.
+
+'reference_name.idx.fa' and 'reference_name.n2g.idx.fa' are used by aligners to build their own indices. In these two files, all sequence bases are converted into upper case. In addition, poly(A) tails are added if '--polyA' option is set. The only difference between 'reference_name.idx.fa' and 'reference_name.n2g.idx.fa' is that 'reference_name.n2g.idx.fa' in addition converts all 'N' characters to 'G' characters. This conversion is in particular desired for aligners (e.g. Bowtie) that do not allow reads to overlap with 'N' characters in the reference sequences. Otherwise, 'reference_name.idx.fa' should be used to build the aligner's index files. RSEM uses 'reference_name.idx.fa' to build Bowtie 2 indices and 'reference_name.n2g.idx.fa' to build Bowtie indices. For visualizing the transcript-coordinate-based BAM files generated by RSEM in IGV, 'reference_name.idx.fa' should be imported as a "genome" (see Visualization section in README.md for details).
+
+If the whole genome is indexed for prior-enhanced RSEM, all the index files will be generated with prefix as 'reference_name_prsem'. Selected isoforms for training set are listed in the file 'reference_name_prsem.training_tr_crd'
+
+EXAMPLES
+1) Suppose we have mouse RNA-Seq data and want to use the UCSC mm9 version of the mouse genome. We have downloaded the UCSC Genes transcript annotations in GTF format (as mm9.gtf) using the Table Browser and the knownIsoforms.txt file for mm9 from the UCSC Downloads. We also have all chromosome files for mm9 in the directory '/data/mm9'. We want to put the generated reference files under '/ref' with name 'mouse_0'. We do not add any poly(A) tails. Please note that GTF files generated from UCSC's Table Browser do not contain isoform-gene relationship information. For the UCSC Genes annotation, this information can be obtained from the knownIsoforms.txt file. Suppose we want to build Bowtie indices and Bowtie executables are found in '/sw/bowtie'.
+
+There are two ways to write the command:
+
+ rsem-prepare-reference --gtf mm9.gtf \
+                        --transcript-to-gene-map knownIsoforms.txt \
+                        --bowtie \
+                        --bowtie-path /sw/bowtie \                  
+                        /data/mm9/chr1.fa,/data/mm9/chr2.fa,...,/data/mm9/chrM.fa \
+                        /ref/mouse_0
+OR
+
+ rsem-prepare-reference --gtf mm9.gtf \
+                        --transcript-to-gene-map knownIsoforms.txt \
+                        --bowtie \
+                        --bowtie-path /sw/bowtie \
+                        /data/mm9 \
+                        /ref/mouse_0
+2) Suppose we also want to build Bowtie 2 indices in the above example and Bowtie 2 executables are found in '/sw/bowtie2', the command will be:
+
+ rsem-prepare-reference --gtf mm9.gtf \
+                        --transcript-to-gene-map knownIsoforms.txt \
+                        --bowtie \
+                        --bowtie-path /sw/bowtie \
+                        --bowtie2 \
+                        --bowtie2-path /sw/bowtie2 \
+                        /data/mm9 \
+                        /ref/mouse_0
+3) Suppose we want to build STAR indices in the above example and save index files under '/ref' with name 'mouse_0'. Assuming STAR executable is '/sw/STAR', the command will be:
+
+ rsem-prepare-reference --gtf mm9.gtf \
+                        --transcript-to-gene-map knownIsoforms.txt \
+                        --star \
+                        --star-path /sw/STAR \
+                        -p 8 \
+                        /data/mm9/chr1.fa,/data/mm9/chr2.fa,...,/data/mm9/chrM.fa \
+                        /ref/mouse_0
+OR
+
+ rsem-prepare-reference --gtf mm9.gtf \
+                        --transcript-to-gene-map knownIsoforms.txt \
+                        --star \
+                        --star-path /sw/STAR \
+                        -p 8 \
+                        /data/mm9
+                        /ref/mouse_0
+STAR genome index files will be saved under '/ref/'.
+
+4) Suppose we want to prepare references for prior-enhanced RSEM in the above example. In this scenario, both STAR and Bowtie are required to build genomic indices - STAR for RNA-seq reads and Bowtie for ChIP-seq reads. Assuming their executables are under '/sw/STAR' and '/sw/Bowtie', respectively. Also, assuming the mappability file for mouse genome is '/data/mm9.bigWig'. The command will be:
+
+ rsem-prepare-reference --gtf mm9.gtf \
+                        --transcript-to-gene-map knownIsoforms.txt \
+                        --star \
+                        --star-path /sw/STAR \
+                        -p 8 \
+                        --prep-pRSEM \
+                        --bowtie-path /sw/Bowtie \
+                        --mappability-bigwig-file /data/mm9.bigWig \
+                        /data/mm9/chr1.fa,/data/mm9/chr2.fa,...,/data/mm9/chrM.fa \
+                        /ref/mouse_0
+OR
+
+ rsem-prepare-reference --gtf mm9.gtf \
+                        --transcript-to-gene-map knownIsoforms.txt \
+                        --star \
+                        --star-path /sw/STAR \
+                        -p 8 \
+                        --prep-pRSEM \
+                        --bowtie-path /sw/Bowtie \
+                        --mappability-bigwig-file /data/mm9.bigWig \
+                        /data/mm9
+                        /ref/mouse_0
+Both STAR and Bowtie's index files will be saved under '/ref/'. Bowtie files will have name prefix 'mouse_0_prsem'
+
+5) Suppose we only have transcripts from EST tags stored in 'mm9.fasta' and isoform-gene information stored in 'mapping.txt'. We want to add 125bp long poly(A) tails to all transcripts. The reference_name is set as 'mouse_125'. In addition, we do not want to build Bowtie/Bowtie 2 indices, and will use an alternative aligner to align reads against either 'mouse_125.idx.fa' or 'mouse_125.idx.n2g.fa':
+
+ rsem-prepare-reference --transcript-to-gene-map mapping.txt \
+                        --polyA
+                        mm9.fasta \
+                        mouse_125
\ No newline at end of file
diff --git a/src/rsem/rsem_prepare_reference/script.sh b/src/rsem/rsem_prepare_reference/script.sh
new file mode 100644
index 00000000..806804d8
--- /dev/null
+++ b/src/rsem/rsem_prepare_reference/script.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+set -eo pipefail
+
+unset_if_false=( par_gff3_genes_as_transcripts par_polyA par_bowtie par_bowtie2 par_star par_hisat2_hca par_quiet par_prep_pRSEM )
+
+for par in ${unset_if_false[@]}; do
+    test_val="${!par}"
+    [[ "$test_val" == "false" ]] && unset $par
+done
+
+# replace ';' with ','
+par_reference_fasta_files=$(echo $par_reference_fasta_files | tr ';' ',')
+par_gff3_rna_patterns=$(echo $par_gff3_rna_patterns | tr ';' ',')
+par_trusted_sources=$(echo $par_trusted_sources | tr ';' ',')
+
+echo "$par_reference_fasta_files"
+rsem-prepare-reference \
+    ${par_gtf:+--gtf "${par_gtf}"} \
+    ${par_gff3:+--gff3 "${par_gff3}"} \
+    ${par_gff3_rna_patterns:+--gff3-RNA-patterns "${par_gff3_rna_patterns}"} \
+    ${par_gff3_genes_as_transcripts:+--gff3-genes-as-transcripts "${par_gff3_genes_as_transcripts}"} \
+    ${par_trusted_sources:+--trusted-sources "${par_trusted_sources}"} \
+    ${par_transcript_to_gene_map:+--transcript-to-gene-map "${par_transcript_to_gene_map}"} \
+    ${par_allele_to_gene_map:+--allele-to-gene-map "${par_allele_to_gene_map}"} \
+    ${par_polyA:+--polyA} \
+    ${par_polyA_length:+--polyA-length "${par_polyA_length}"} \
+    ${par_no_polyA_subset:+--no-polyA-subset "${par_no_polyA_subset}"} \
+    ${par_bowtie:+--bowtie} \
+    ${par_bowtie2:+--bowtie2} \
+    ${par_star:+--star} \
+    ${par_star_sjdboverhang:+--star-sjdboverhang "${par_star_sjdboverhang}"} \
+    ${par_hisat2_hca:+--hisat2-hca} \
+    ${par_quiet:+--quiet} \
+    ${par_prep_pRSEM:+--prep-pRSEM} \
+    ${par_mappability_bigwig_file:+--mappability-bigwig-file "${par_mappability_bigwig_file}"} \
+    ${meta_cpus:+--num-threads "${meta_cpus}"} \
+    "${par_reference_fasta_files}" \
+    "${par_reference_name}"
+
+mkdir -p "${par_output}"
+mv ${par_reference_name}.* "${par_output}/"
diff --git a/src/rsem/rsem_prepare_reference/test.sh b/src/rsem/rsem_prepare_reference/test.sh
new file mode 100644
index 00000000..b38dd0a9
--- /dev/null
+++ b/src/rsem/rsem_prepare_reference/test.sh
@@ -0,0 +1,37 @@
+
+#!/bin/bash
+
+set -e pipefail
+
+echo ">>> Testing $meta_functionality_name"
+
+cat > genome.fasta <<'EOF'
+>Sheila
+GCTAGCTCAGAAAAaaaNNN
+EOF
+
+echo ">>> Prepare RSEM reference without gene annotations"
+"$meta_executable" \
+  --reference_fasta_files genome.fasta \
+  --reference_name test \
+  --output RSEM_index
+
+echo ">>> Checking whether output files exist"
+[ ! -d "RSEM_index" ] && echo "RSEM index does not exist!" && exit 1
+[ ! -f "RSEM_index/test.grp" ] && echo "test.grp does not exist!" && exit 1
+[ ! -f "RSEM_index/test.n2g.idx.fa" ] && echo "test.n2g.idx.fa does not exist!" && exit 1
+[ ! -f "RSEM_index/test.ti" ] && echo "test.ti does not exist!" && exit 1
+[ ! -f "RSEM_index/test.idx.fa" ] && echo "test.idx.fa does not exist!" && exit 1
+[ ! -f "RSEM_index/test.seq" ] && echo "test.seq does not exist!" && exit 1
+[ ! -f "RSEM_index/test.transcripts.fa" ] && echo "test.transcripts.fa does not exist!" && exit 1
+
+echo ">>> Checking whether output is correct"
+[ ! -s "RSEM_index/test.grp" ] && echo "test.grp is empty!" && exit 1
+[ ! -s "RSEM_index/test.ti" ] && echo "test.ti is empty!" && exit 1
+[ ! -s "RSEM_index/test.seq" ] && echo "test.seq is empty!" && exit 1
+grep -q "GCTAGCTCAGAAAAaaaNNN" "RSEM_index/test.transcripts.fa" || { echo "The content of file 'test.transcripts.fa' seems to be incorrect." && exit 1; }
+grep -q "GCTAGCTCAGAAAAAAANNN" "RSEM_index/test.idx.fa" || { echo "The content of file 'test.idx.fa' seems to be incorrect." && exit 1; }
+grep -q "GCTAGCTCAGAAAAAAAGGG" "RSEM_index/test.n2g.idx.fa" || { echo "The content of file 'test.n2g.idx.fa' seems to be incorrect." && exit 1; }
+
+echo "All tests succeeded!"
+exit 0