From 8fe9d66b0c689776846dcb0ecb01a30f3ef1b66b Mon Sep 17 00:00:00 2001
From: Theodoro Gasperin Terra Camargo
 <98555209+tgaspe@users.noreply.github.com>
Date: Tue, 10 Sep 2024 15:51:12 +0200
Subject: [PATCH] Bcftools stats (#142)

* Initial Commit

* Adding options to config

* Update on script

* update

* Adding test 2 and 3

* Update on config and test

* adding more tests

* debugging and adding tests

* Adding last tests

* removing test_data dir

* Update CHANGELOG.md

* small changes

* small change in help file

* Requested changes

---------

Co-authored-by: Jakub Majercik <57993790+jakubmajercik@users.noreply.github.com>
---
 CHANGELOG.md                                |   1 +
 src/bcftools/bcftools_stats/config.vsh.yaml | 240 +++++++++++++++
 src/bcftools/bcftools_stats/help.txt        |  35 +++
 src/bcftools/bcftools_stats/script.sh       |  56 ++++
 src/bcftools/bcftools_stats/test.sh         | 306 ++++++++++++++++++++
 5 files changed, 638 insertions(+)
 create mode 100644 src/bcftools/bcftools_stats/config.vsh.yaml
 create mode 100644 src/bcftools/bcftools_stats/help.txt
 create mode 100644 src/bcftools/bcftools_stats/script.sh
 create mode 100644 src/bcftools/bcftools_stats/test.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5041f082..2dd152bb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -42,6 +42,7 @@
 * `rsem/rsem_prepare_reference`: Prepare transcript references for RSEM (PR #89).
 
 * `bcftools`:
+  - `bcftools/bcftools_stats`: Parses VCF or BCF and produces a txt stats file which can be plotted using plot-vcfstats (PR #142).
   - `bcftools/bcftools_sort`: Sorts BCF/VCF files by position and other criteria (PR #141).
 
 * `fastqc`: High throughput sequence quality control analysis tool (PR #92).
diff --git a/src/bcftools/bcftools_stats/config.vsh.yaml b/src/bcftools/bcftools_stats/config.vsh.yaml
new file mode 100644
index 00000000..8fb57f7a
--- /dev/null
+++ b/src/bcftools/bcftools_stats/config.vsh.yaml
@@ -0,0 +1,240 @@
+name: bcftools_stats
+namespace: bcftools
+description: | 
+  Parses VCF or BCF and produces a txt stats file which can be plotted using plot-vcfstats.
+  When two files are given, the program generates separate stats for intersection
+  and the complements. By default only sites are compared, -s/-S must given to include
+  also sample columns.
+keywords: [Stats, VCF, BCF]
+links:
+  homepage: https://samtools.github.io/bcftools/
+  documentation: https://samtools.github.io/bcftools/bcftools.html#stats
+  repository: https://github.com/samtools/bcftools
+  issue_tracker: https://github.com/samtools/bcftools/issues
+references:
+  doi: https://doi.org/10.1093/gigascience/giab008
+license: MIT/Expat, GNU
+requirements:
+  commands: [bcftools]
+authors:
+  - __merge__: /src/_authors/theodoro_gasperin.yaml
+    roles: [ author ]
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input
+        alternatives: -i
+        type: file
+        multiple: true
+        description: Input VCF/BCF file. Maximum of two files.
+        required: true
+    
+  - name: Outputs
+    arguments:
+      - name: --output
+        alternatives: -o
+        direction: output
+        type: file
+        description: Output txt statistics file.
+        required: true
+         
+  - name: Options
+    arguments:
+      
+      - name: --allele_frequency_bins
+        alternatives: --af_bins
+        type: string
+        description: | 
+          Allele frequency bins, a list of bin values (0.1,0.5,1).
+        example: 0.1,0.5,1
+
+      - name: --allele_frequency_bins_file
+        alternatives: --af_bins_file
+        type: file
+        description: | 
+          Same as allele_frequency_bins, but in a file.
+          Format of file is one value per line. 
+          e.g. 
+            0.1
+            0.5
+            1
+
+      - name: --allele_frequency_tag
+        alternatives: --af_tag
+        type: string
+        description: | 
+          Allele frequency tag to use, by default estimated from AN,AC or GT.
+
+      - name: --first_allele_only
+        alternatives: --first_only
+        type: boolean_true
+        description: | 
+          Include only 1st allele at multiallelic sites.
+
+      - name: --collapse
+        alternatives: --c
+        type: string
+        choices: [ snps, indels, both, all, some, none ]
+        description: | 
+          Treat as identical records with <snps|indels|both|all|some|none>.
+          See https://samtools.github.io/bcftools/bcftools.html#common_options for details.
+
+      - name: --depth
+        alternatives: --d
+        type: string
+        description: | 
+          Depth distribution: min,max,bin size.
+        example: 0,500,1
+
+      - name: --exclude
+        alternatives: --e
+        type: string
+        description: | 
+          Exclude sites for which the expression is true.
+          See https://samtools.github.io/bcftools/bcftools.html#expressions for details.
+        example: 'QUAL < 30 && DP < 10'
+
+      - name: --exons
+        alternatives: --E
+        type: file
+        description: | 
+          tab-delimited file with exons for indel frameshifts statistics. 
+          The columns of the file are CHR, FROM, TO, with 1-based, inclusive, positions. 
+          The file is BGZF-compressed and indexed with tabix (e.g. tabix -s1 -b2 -e3 file.gz).
+
+      - name: --apply_filters
+        alternatives: --f
+        type: string
+        description: | 
+          Require at least one of the listed FILTER strings (e.g. "PASS,.").
+
+      - name: --fasta_reference
+        alternatives: --F
+        type: file
+        description: | 
+          Faidx indexed reference sequence file to determine INDEL context.
+
+      - name: --include
+        alternatives: --i
+        type: string
+        description: | 
+          Select sites for which the expression is true.
+          See https://samtools.github.io/bcftools/bcftools.html#expressions for details.
+        example: 'QUAL >= 30 && DP >= 10'
+      
+      - name: --split_by_ID
+        alternatives: --I
+        type: boolean_true
+        description: | 
+          Collect stats for sites with ID separately (known vs novel).
+
+      - name: --regions
+        alternatives: --r
+        type: string
+        description: | 
+          Restrict to comma-separated list of regions. 
+          Following formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​].
+        example: '20:1000000-2000000'
+
+      - name: --regions_file
+        alternatives: --R
+        type: file
+        description: | 
+          Restrict to regions listed in a file. 
+          Regions can be specified either on a VCF, BED, or tab-delimited file (the default). 
+          For more information check manual.
+
+      - name: --regions_overlap
+        type: string
+        choices: ['pos', 'record', 'variant', '0', '1', '2']
+        description: | 
+          This option controls how overlapping records are determined: 
+          set to 'pos' or '0' if the VCF record has to have POS inside a region (this corresponds to the default behavior of -t/-T); 
+          set to 'record' or '1' if also overlapping records with POS outside a region should be included (this is the default behavior of -r/-R, 
+          and includes indels with POS at the end of a region, which are technically outside the region); 
+          or set to 'variant' or '2' to include only true overlapping variation (compare the full VCF representation "TA>T-" vs the true sequence variation "A>-").
+
+      - name: --samples
+        alternatives: --s
+        type: string
+        description: | 
+          List of samples for sample stats, "-" to include all samples.
+
+      - name: --samples_file
+        alternatives: --S
+        type: file
+        description: | 
+          File of samples to include.
+          e.g. 
+            sample1    1
+            sample2    2
+            sample3    2
+
+      - name: --targets
+        alternatives: --t
+        type: string
+        description: | 
+          Similar as -r, --regions, but the next position is accessed by streaming the whole VCF/BCF 
+          rather than using the tbi/csi index. Both -r and -t options can be applied simultaneously: -r uses the 
+          index to jump to a region and -t discards positions which are not in the targets. Unlike -r, targets 
+          can be prefixed with "^" to request logical complement. For example, "^X,Y,MT" indicates that 
+          sequences X, Y and MT should be skipped. Yet another difference between the -t/-T and -r/-R is 
+          that -r/-R checks for proper overlaps and considers both POS and the end position of an indel, 
+          while -t/-T considers the POS coordinate only (by default; see also --regions-overlap and --targets-overlap). 
+          Note that -t cannot be used in combination with -T.
+          Following formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​].
+        example: '20:1000000-2000000'
+      
+      - name: --targets_file
+        alternatives: --T
+        type: file
+        description: | 
+          Similar to --regions_file option but streams rather than index-jumps.
+
+      - name: --targets_overlaps
+        type: string
+        choices: ['pos', 'record', 'variant', '0', '1', '2']
+        description: | 
+          Include if POS in the region (0), record overlaps (1), variant overlaps (2).
+
+      - name: --user_tstv
+        alternatives: --u
+        type: string
+        description: | 
+          Collect Ts/Tv stats for any tag using the given binning [0:1:100].
+          Format is <TAG[:min:max:n]>.
+          A subfield can be selected as e.g. 'PV4[0]', here the first value of the PV4 tag.
+          
+      
+      - name: --verbose 
+        alternatives: --v
+        type: boolean_true
+        description: | 
+          Produce verbose per-site and per-sample output.
+
+resources:
+  - type: bash_script
+    path: script.sh
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+
+engines:
+  - type: docker
+    image: debian:stable-slim
+    setup:
+      - type: apt
+        packages: [bcftools, procps]
+      - type: docker
+        run: |
+          echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftools //p')\"" > /var/software_versions.txt
+    test_setup:  
+      - type: apt  
+        packages: [tabix]
+
+runners:
+  - type: executable
+  - type: nextflow
+
diff --git a/src/bcftools/bcftools_stats/help.txt b/src/bcftools/bcftools_stats/help.txt
new file mode 100644
index 00000000..e702e838
--- /dev/null
+++ b/src/bcftools/bcftools_stats/help.txt
@@ -0,0 +1,35 @@
+```
+bcftools stats -h
+```
+
+About:   Parses VCF or BCF and produces stats which can be plotted using plot-vcfstats.
+         When two files are given, the program generates separate stats for intersection
+         and the complements. By default only sites are compared, -s/-S must given to include
+         also sample columns.
+Usage:   bcftools stats [options] <A.vcf.gz> [<B.vcf.gz>]
+
+Options:
+        --af-bins LIST               Allele frequency bins, a list (0.1,0.5,1) or a file (0.1\n0.5\n1)
+        --af-tag STRING              Allele frequency tag to use, by default estimated from AN,AC or GT
+    -1, --1st-allele-only            Include only 1st allele at multiallelic sites
+    -c, --collapse STRING            Treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]
+    -d, --depth INT,INT,INT          Depth distribution: min,max,bin size [0,500,1]
+    -e, --exclude EXPR               Exclude sites for which the expression is true (see man page for details)
+    -E, --exons FILE.gz              Tab-delimited file with exons for indel frameshifts (chr,beg,end; 1-based, inclusive, bgzip compressed)
+    -f, --apply-filters LIST         Require at least one of the listed FILTER strings (e.g. "PASS,.")
+    -F, --fasta-ref FILE             Faidx indexed reference sequence file to determine INDEL context
+    -i, --include EXPR               Select sites for which the expression is true (see man page for details)
+    -I, --split-by-ID                Collect stats for sites with ID separately (known vs novel)
+    -r, --regions REGION             Restrict to comma-separated list of regions
+    -R, --regions-file FILE          Restrict to regions listed in a file
+        --regions-overlap 0|1|2      Include if POS in the region (0), record overlaps (1), variant overlaps (2) [1]
+    -s, --samples LIST               List of samples for sample stats, "-" to include all samples
+    -S, --samples-file FILE          File of samples to include
+    -t, --targets REGION             Similar to -r but streams rather than index-jumps
+    -T, --targets-file FILE          Similar to -R but streams rather than index-jumps
+        --targets-overlap 0|1|2      Include if POS in the region (0), record overlaps (1), variant overlaps (2) [0]
+    -u, --user-tstv TAG[:min:max:n]  Collect Ts/Tv stats for any tag using the given binning [0:1:100]
+                                       A subfield can be selected as e.g. 'PV4[0]', here the first value of the PV4 tag
+        --threads INT                Use multithreading with <int> worker threads [0]
+    -v, --verbose                    Produce verbose per-site and per-sample output
+
diff --git a/src/bcftools/bcftools_stats/script.sh b/src/bcftools/bcftools_stats/script.sh
new file mode 100644
index 00000000..119502fd
--- /dev/null
+++ b/src/bcftools/bcftools_stats/script.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+# Exit on error
+set -eo pipefail
+
+# Unset parameters
+unset_if_false=(
+    par_first_allele_only
+    par_split_by_ID
+    par_verbose 
+)
+
+for par in ${unset_if_false[@]}; do
+    test_val="${!par}"
+    [[ "$test_val" == "false" ]] && unset $par
+done
+
+# Create input array 
+IFS=";" read -ra input <<< $par_input
+
+# Check the size of the input array
+if [[ ${#input[@]} -gt 2 ]]; then
+    echo "Error: --input only takes a max of two files!"
+    exit 1
+fi
+
+# Execute bcftools stats with the provided arguments
+bcftools stats \
+    ${par_first_allele_only:+--1st-allele-only} \
+    ${par_split_by_ID:+--split-by-ID} \
+    ${par_verbose:+--verbose} \
+    ${par_allele_frequency_bins:+--af-bins "${par_allele_frequency_bins}"} \
+    ${par_allele_frequency_bins_file:+--af-bins "${par_allele_frequency_bins_file}"} \
+    ${par_allele_frequency_tag:+--af-tag "${par_allele_frequency_tag}"} \
+    ${par_collapse:+-c "${par_collapse}"} \
+    ${par_depth:+-d "${par_depth}"} \
+    ${par_exclude:+-e "${par_exclude}"} \
+    ${par_exons:+-E "${par_exons}"} \
+    ${par_apply_filters:+-f "${par_apply_filters}"} \
+    ${par_fasta_reference:+-F "${par_fasta_reference}"} \
+    ${par_include:+-i "${par_include}"} \
+    ${par_regions:+-r "${par_regions}"} \
+    ${par_regions_file:+-R "${par_regions_file}"} \
+    ${par_regions_overlap:+--regions-overlap "${par_regions_overlap}"} \
+    ${par_samples:+-s "${par_samples}"} \
+    ${par_samples_file:+-S "${par_samples_file}"} \
+    ${par_targets:+-t "${par_targets}"} \
+    ${par_targets_file:+-T "${par_targets_file}"} \
+    ${par_targets_overlaps:+--targets-overlap "${par_targets_overlaps}"} \
+    ${par_user_tstv:+-u "${par_user_tstv}"} \
+    "${input[@]}" \
+    > $par_output
+
diff --git a/src/bcftools/bcftools_stats/test.sh b/src/bcftools/bcftools_stats/test.sh
new file mode 100644
index 00000000..18f0256b
--- /dev/null
+++ b/src/bcftools/bcftools_stats/test.sh
@@ -0,0 +1,306 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+# Exit on error
+set -eo pipefail
+
+#test_data="$meta_resources_dir/test_data"
+
+#############################################
+# helper functions
+assert_file_exists() {
+  [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
+}
+assert_file_not_empty() {
+  [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
+}
+assert_file_contains() {
+  grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
+}
+assert_identical_content() {
+  diff -a "$2" "$1" \
+    || (echo "Files are not identical!" && exit 1)
+}
+#############################################
+
+# Create directories for tests
+echo "Creating Test Data..."
+TMPDIR=$(mktemp -d "$meta_temp_dir/XXXXXX")
+function clean_up {
+  [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
+}
+trap clean_up EXIT
+
+# Create test data
+cat <<EOF > "$TMPDIR/example.vcf"
+##fileformat=VCFv4.0
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##contig=<ID=19,length=58617616>
+##contig=<ID=20,length=58617616>
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
+##ALT=<ID=CNV,Description="Copy number variable region">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+19	111	.	A	C	9.6	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+19	112	.	A	G	10	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
+20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3:.,.
+20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4:.,.
+20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:.:56,60	0|0:48:4:51,51	0/0:61:2:.,.
+20	1234567	microsat1	G	GA,GAC	50	PASS	NS=3;DP=9;AA=G;AN=6;AC=3,1	GT:GQ:DP	0/1:.:4	0/2:17:2	1/1:40:3
+20	1235237	.	T	.	.	.	.	GT	0/0	0|0	./.
+EOF
+
+bgzip -c $TMPDIR/example.vcf > $TMPDIR/example.vcf.gz
+tabix -p vcf $TMPDIR/example.vcf.gz
+
+cat <<EOF > "$TMPDIR/exons.bed"
+chr19	12345	12567
+chr20	23456	23789
+EOF
+
+# Compressing and indexing the exons file
+bgzip -c $TMPDIR/exons.bed > $TMPDIR/exons.bed.gz
+tabix -s1 -b2 -e3 $TMPDIR/exons.bed.gz
+
+# Create fai test file
+# cat <<EOF > "$TMPDIR/reference.fasta.fai"
+# 19	100	895464957	60	61
+# 20	10000	1083893029	60	61
+# EOF
+
+# Create allele frequency bins file
+cat <<EOF > "$TMPDIR/allele_frequency_bins.txt"
+0.1
+0.2
+0.3
+0.4
+0.5
+0.6
+0.7
+0.8
+0.9
+EOF
+
+# Test 1: Default Use
+mkdir "$TMPDIR/test1" && pushd "$TMPDIR/test1" > /dev/null
+
+echo "> Run bcftools_stats on VCF file"
+"$meta_executable" \
+  --input "../example.vcf" \
+  --output "stats.txt" \
+
+# checks
+assert_file_exists "stats.txt"
+assert_file_not_empty "stats.txt"
+assert_file_contains "stats.txt" "bcftools stats  ../example.vcf"
+echo "- test1 succeeded -"
+
+popd > /dev/null
+
+# Test 2: First allele only
+mkdir "$TMPDIR/test2" && pushd "$TMPDIR/test2" > /dev/null
+
+echo "> Run bcftools_stats on VCF file with first allele only"
+"$meta_executable" \
+  --input "../example.vcf" \
+  --output "stats.txt" \
+  --first_allele_only \
+  --allele_frequency_bins "0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9" \
+  --allele_frequency_tag "AF" \
+
+# checks
+assert_file_exists "stats.txt"
+assert_file_not_empty "stats.txt"
+assert_file_contains "stats.txt" "bcftools stats  --1st-allele-only --af-bins 0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9 --af-tag AF ../example.vcf"
+echo "- test2 succeeded -"
+
+popd > /dev/null
+
+# Test 3: Split by ID
+mkdir "$TMPDIR/test3" && pushd "$TMPDIR/test3" > /dev/null
+
+echo "> Run bcftools_stats on VCF file with split by ID"
+"$meta_executable" \
+  --input "../example.vcf" \
+  --output "stats.txt" \
+  --split_by_ID \
+
+# checks
+assert_file_exists "stats.txt"
+assert_file_not_empty "stats.txt"
+assert_file_contains "stats.txt" "bcftools stats  --split-by-ID ../example.vcf"
+echo "- test3 succeeded -"
+
+popd > /dev/null
+
+# Test 4: Collapse, Depth, Exclude
+mkdir "$TMPDIR/test4" && pushd "$TMPDIR/test4" > /dev/null
+
+echo "> Run bcftools_stats on VCF file with collapse, depth, and exclude"
+"$meta_executable" \
+  --input "../example.vcf" \
+  --output "stats.txt" \
+  --depth "0,500,1" \
+  --exclude "GT='mis'" \
+  --collapse "snps" \
+
+# checks
+assert_file_exists "stats.txt"
+assert_file_not_empty "stats.txt"
+assert_file_contains "stats.txt" "bcftools stats  -c snps -d 0,500,1 -e GT='mis' ../example.vcf"
+echo "- test4 succeeded -"
+
+popd > /dev/null
+
+# Test 5: Exons, Apply Filters
+mkdir "$TMPDIR/test5" && pushd "$TMPDIR/test5" > /dev/null
+
+echo "> Run bcftools_stats on VCF file with exons, apply filters, and fasta reference"
+"$meta_executable" \
+  --input "../example.vcf" \
+  --output "stats.txt" \
+  --exons "../exons.bed.gz" \
+  --apply_filters "PASS" \
+#  --fasta_reference "../reference.fasta.fai" \
+
+# NOTE: fasta_reference option not included in testing because of error from bcftools stats.
+
+# checks
+assert_file_exists "stats.txt"
+assert_file_not_empty "stats.txt"
+assert_file_contains "stats.txt" "bcftools stats  -E ../exons.bed.gz -f PASS ../example.vcf"
+#assert_file_contains "stats.txt" "bcftools stats  -E ../exons.bed.gz -f PASS -F ../reference.fasta.fai ../example.vcf"
+echo "- test5 succeeded -"
+
+popd > /dev/null
+
+# Test 6: Include, Regions
+mkdir "$TMPDIR/test6" && pushd "$TMPDIR/test6" > /dev/null
+
+echo "> Run bcftools_stats on VCF file with include and regions options"
+"$meta_executable" \
+  --input "../example.vcf.gz" \
+  --output "stats.txt" \
+  --include "GT='mis'" \
+  --regions "20:1000000-2000000" \
+
+# checks
+assert_file_exists "stats.txt"
+assert_file_not_empty "stats.txt"
+assert_file_contains "stats.txt" "bcftools stats  -i GT='mis' -r 20:1000000-2000000 ../example.vcf.gz"
+echo "- test6 succeeded -"
+
+popd > /dev/null
+
+# Test 7: Regions Overlap, Samples
+mkdir "$TMPDIR/test7" && pushd "$TMPDIR/test7" > /dev/null
+
+echo "> Run bcftools_stats on VCF file with regions overlap, and samples options"
+"$meta_executable" \
+  --input "../example.vcf" \
+  --output "stats.txt" \
+  --regions_overlap "record" \
+  --samples "NA00001,NA00002" \
+
+# checks
+assert_file_exists "stats.txt"
+assert_file_not_empty "stats.txt"
+assert_file_contains "stats.txt" "bcftools stats  --regions-overlap record -s NA00001,NA00002 ../example.vcf"
+echo "- test7 succeeded -"
+
+popd > /dev/null
+
+# Test 8: Targets, Targets File, Targets Overlaps
+mkdir "$TMPDIR/test8" && pushd "$TMPDIR/test8" > /dev/null
+
+echo "> Run bcftools_stats on VCF file with targets, targets file, and targets overlaps"
+"$meta_executable" \
+  --input "../example.vcf" \
+  --output "stats.txt" \
+  --targets "20:1000000-2000000" \
+  --targets_overlaps "pos" \
+
+# checks
+assert_file_exists "stats.txt"
+assert_file_not_empty "stats.txt"
+assert_file_contains "stats.txt" "bcftools stats  -t 20:1000000-2000000 --targets-overlap pos ../example.vcf"
+echo "- test8 succeeded -"
+
+popd > /dev/null
+
+# Test 9: User TSTV and Verbose
+mkdir "$TMPDIR/test9" && pushd "$TMPDIR/test9" > /dev/null
+
+echo "> Run bcftools_stats on VCF file with user TSTV and verbose"
+"$meta_executable" \
+  --input "../example.vcf" \
+  --output "stats.txt" \
+  --user_tstv "DP" \
+  --verbose \
+
+# checks
+assert_file_exists "stats.txt"
+assert_file_not_empty "stats.txt"
+assert_file_contains "stats.txt" "bcftools stats  --verbose -u DP ../example.vcf"
+echo "- test9 succeeded -"
+
+popd > /dev/null
+
+# Test 10: Two vcf files
+mkdir "$TMPDIR/test10" && pushd "$TMPDIR/test10" > /dev/null
+
+echo "> Run bcftools_stats on two VCF files"
+"$meta_executable" \
+  --input "../example.vcf.gz" \
+  --input "../example.vcf.gz" \
+  --output "stats.txt" \
+
+# checks
+assert_file_exists "stats.txt"
+assert_file_not_empty "stats.txt"
+assert_file_contains "stats.txt" "bcftools stats  ../example.vcf.gz ../example.vcf.gz"
+echo "- test10 succeeded -"
+
+popd > /dev/null
+
+# Test 11: with allele frequency bins file option
+mkdir "$TMPDIR/test11" && pushd "$TMPDIR/test11" > /dev/null
+
+echo "> Run bcftools_stats on VCF file with allele frequency bins file option"
+"$meta_executable" \
+  --input "../example.vcf" \
+  --output "stats.txt" \
+  --allele_frequency_bins "../allele_frequency_bins.txt" \
+
+# checks
+assert_file_exists "stats.txt"
+assert_file_not_empty "stats.txt"
+assert_file_contains "stats.txt" "bcftools stats  --af-bins ../allele_frequency_bins.txt ../example.vcf"
+echo "- test11 succeeded -"
+
+popd > /dev/null
+
+
+echo "---- All tests succeeded! ----"
+exit 0
+
+