From 86333c1a465db45facd936695f1f33b186ccf0fc Mon Sep 17 00:00:00 2001
From: Suman Muralidharan <104161349+sumanm99@users.noreply.github.com>
Date: Tue, 15 Oct 2024 23:46:17 +0530
Subject: [PATCH] SnpEff (#153)

* Help file

* config file

* config file

* runners script

* config file

* test script

* test

* test

* runners script

* snake case

* snake case

* output parameters

* modify argument formatting, container setup

* fix buf with mv command

* avoid boolean_false and fix bug with output files

---------

Co-authored-by: Emma Rousseau <emmarou1@icloud.com>
---
 src/snpeff/config.vsh.yaml              | 297 ++++++++++++++++++++++++
 src/snpeff/help.txt                     |  79 +++++++
 src/snpeff/script.sh                    | 148 ++++++++++++
 src/snpeff/test.sh                      | 129 ++++++++++
 src/snpeff/test_data/cancer.vcf         |   2 +
 src/snpeff/test_data/my_annotations.bed |   1 +
 src/snpeff/test_data/script.sh          |  15 ++
 src/snpeff/test_data/test.vcf           |   1 +
 8 files changed, 672 insertions(+)
 create mode 100644 src/snpeff/config.vsh.yaml
 create mode 100644 src/snpeff/help.txt
 create mode 100644 src/snpeff/script.sh
 create mode 100644 src/snpeff/test.sh
 create mode 100644 src/snpeff/test_data/cancer.vcf
 create mode 100644 src/snpeff/test_data/my_annotations.bed
 create mode 100644 src/snpeff/test_data/script.sh
 create mode 100644 src/snpeff/test_data/test.vcf

diff --git a/src/snpeff/config.vsh.yaml b/src/snpeff/config.vsh.yaml
new file mode 100644
index 00000000..5fb8622d
--- /dev/null
+++ b/src/snpeff/config.vsh.yaml
@@ -0,0 +1,297 @@
+name: snpeff
+description: |
+  Genetic variant annotation, and functional effect prediction toolbox. 
+  It annotates and predicts the effects of genetic variants on genes and 
+  proteins (such as amino acid changes).
+keywords: [ "annotation", "effect prediction", "snp", "variant", "vcf"]
+
+links:
+  repository: https://github.com/pcingola/SnpEff
+  homepage: https://pcingola.github.io/SnpEff/
+  documentation: https://pcingola.github.io/SnpEff/
+references:
+  doi: 10.3389/fgene.2012.00035
+license: MIT
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input
+        type: file
+        description: Input variants file.
+        example: test.vcf
+        required: true
+      - name: --genome_version
+        type: string
+        description: Reference genome version.
+        example: GRCh37.75
+        required: true
+  - name: Outputs
+    arguments:
+      - name: --output
+        type: file
+        description: The output file.
+        example: out.vcf
+        direction: output
+        required: true
+      - name: --summary
+        type: file
+        description: Summary file directory.
+        example: summary_dir
+        direction: output
+      - name: --genes
+        type: file
+        description: Txt file directory.
+        example: genes_dir
+        direction: output
+  - name: Options
+    arguments:
+      - name: --chr
+        type: string
+        description: |
+          Prepend 'string' to chromosome name (e.g. 'chr1' instead of '1'). Only on TXT output.
+      - name: --classic
+        type: boolean_true
+        description: Use old style annotations instead of Sequence Ontology and Hgvs.
+      - name: --csv_stats
+        type: file
+        description: Create CSV summary file.
+      - name: --download
+        type: boolean_true
+        description: Download reference genome if not available.
+      - name: --input_format
+        alternatives: [-i]
+        type: string
+        description: |
+          Input format [ vcf, bed ]. Default: VCF.
+          example: "VCF"
+      - name: --file_list
+        type: boolean_true
+        description: Input actually contains a list of files to process.
+      - name: --output_format
+        alternatives: [-o]
+        type: string
+        description: |
+          Output format [ vcf, gatk, bed, bedAnn ]. Default: VCF.
+        example: "VCF"
+      - name: --stats
+        alternatives: [-s, --htmlStats]
+        type: boolean_true
+        description: Create HTML summary file.
+      - name: --no_stats
+        type: boolean_true
+        description: Do not create stats (summary) file.
+  - name: Results filter options
+    arguments:
+      - name: --fi
+        alternatives: [--filterInterval]
+        type: file
+        description: |
+          Only analyze changes that intersect with the intervals 
+          specified in this file. This option can be used several times.
+      - name: --no_downstream 
+        type: boolean_true
+        description: Do not show DOWNSTREAM changes
+      - name: --no_intergenic
+        type: boolean_true
+        description: Do not show INTERGENIC changes.
+      - name: --no_intron
+        type: boolean_true
+        description: Do not show INTRON changes.
+      - name: --no_upstream
+        type: boolean_true
+        description: Do not show UPSTREAM changes.
+      - name: --no_utr
+        type: boolean_true
+        description: Do not show 5_PRIME_UTR or 3_PRIME_UTR changes.
+      - name: --no
+        type: string
+        description: |
+          Do not show 'EffectType'. This option can be used several times.
+  - name: Annotations options
+    arguments:
+      - name: --cancer 
+        type: boolean_true
+        description: Perform 'cancer' comparisons (Somatic vs Germline).
+      - name: --cancer_samples
+        type: file
+        description: Two column TXT file defining 'original \t derived' samples.
+      - name: --fastaprot
+        type: file
+        description: |
+          Create an output file containing the resulting protein sequences.
+      - name: --format_eff
+        type: boolean_true
+        description: |
+          Use 'EFF' field compatible with older versions (instead of 'ANN').
+      - name: --gene_id
+        type: boolean_true
+        description: Use gene ID instead of gene name (VCF output).
+      - name: --hgvs
+        type: boolean_true
+        description: Use HGVS annotations for amino acid sub-field.
+      - name: --hgvs_old
+        type: boolean_true
+        description: Use old HGVS notation.
+      - name: --hgvs1_letter_aa 
+        type: boolean_true
+        description: Use one letter Amino acid codes in HGVS notation.
+      - name: --hgvs_tr_id
+        type: boolean_true
+        description: Use transcript ID in HGVS notation.
+      - name: --lof
+        type: boolean_true
+        description: |
+          Add loss of function (LOF) and Nonsense mediated decay (NMD) tags.
+      - name: -no_hgvs
+        type: boolean_true
+        description: Do not add HGVS annotations.
+      - name: --no_lof
+        type: boolean_true
+        description: Do not add LOF and NMD annotations.
+      - name: --no_shift_hgvs
+        type: boolean_true
+        description: |
+          Do not shift variants according to HGVS notation (most 3prime end).
+      - name: --oicr
+        type: boolean_true
+        description: Add OICR tag in VCF file.
+      - name: --sequence_ontology
+        type: boolean_true
+        description: Use Sequence Ontology terms.
+  - name: Generic options
+    arguments:
+      - name: --config
+        alternatives: [-c]
+        type: file
+        description: Specify config file
+      - name: --config_option
+        type: string
+        description: Override a config file option (name=value).
+      - name: --debug
+        alternatives: [-d]
+        type: boolean_true
+        description: Debug mode (very verbose).
+      - name: --data_dir
+        type: file
+        description: Override data_dir parameter from config file.
+      - name: --no_download
+        type: boolean_true
+        description: Do not download a SnpEff database, if not available locally.
+      - name: --no_log
+        type: boolean_true
+        description: Do not report usage statistics to server.
+      - name: --quiet
+        alternatives: [-q]
+        type: boolean_true
+        description: Quiet mode (do not show any messages or errors)
+      - name: --verbose
+        alternatives: [-v]
+        type: boolean_true
+        description: Verbose mode.
+  - name: Database options
+    arguments:
+      - name: --canon 
+        type: boolean_true
+        description: Only use canonical transcripts.
+      - name: --canon_list
+        type: file
+        description: |
+          Only use canonical transcripts, replace some transcripts using the 'gene_id         
+          transcript_id' entries in <file>.
+      - name: --tag
+        type: string
+        description: |
+          Only use transcript having a tag 'tagName'. This option can be used multiple times.
+      - name: --no_tag
+        type: boolean_true
+        description: |
+          Filter out transcript having a tag 'tagName'. This option can be used multiple times.
+      - name: --interaction
+        type: boolean_true
+        description: Annotate using interactions (requires interaction database).
+      - name: --interval
+        type: file
+        description: |
+          Use a custom intervals in TXT/BED/BigBed/VCF/GFF file (you may use this option many times).
+      - name: --max_tsl
+        type: integer
+        description: Only use transcripts having Transcript Support Level lower than <TSL_number>.
+      - name: --motif 
+        type: boolean_true
+        description: Annotate using motifs (requires Motif database).
+      - name: --nextprot
+        type: boolean_true
+        description: Annotate using NextProt (requires NextProt database).
+      - name: --no_genome
+        type: boolean_true
+        description: Do not load any genomic database (e.g. annotate using custom files).
+      - name: --no_expand_iub
+        type: boolean_true
+        description: Disable IUB code expansion in input variants.
+      - name: --no_interaction
+        type: boolean_true
+        description: Disable inteaction annotations.
+      - name: --no_motif
+        type: boolean_true
+        description: Disable motif annotations.
+      - name: --no_nextprot
+        type: boolean_true
+        description: Disable NextProt annotations.
+      - name: --only_reg
+        type: boolean_true
+        description: Only use regulation tracks.
+      - name: --only_protein
+        type: boolean_true
+        description: Only use protein coding transcripts.
+      - name: --only_tr
+        type: file
+        description: |
+          Only use the transcripts in this file. Format: One transcript ID per line.
+        example: file.txt
+      - name: --reg
+        type: string
+        description: Regulation track to use (this option can be used add several times).
+      - name: --ss
+        alternatives: [--spliceSiteSize]
+        type: integer
+        description: |
+          Set size for splice sites (donor and acceptor) in bases. Default: 2.
+      - name: --splice_region_exon_size
+        type: integer
+        description: |
+          Set size for splice site region within exons. Default: 3 bases.
+      - name: --splice_region_intron_min
+        type: integer
+        description: |
+          Set minimum number of bases for splice site region within intron. Default: 3 bases.
+      - name: --splice_region_intron_max
+        type: integer
+        description: |
+          Set maximum number of bases for splice site region within intron. Default: 8 bases.
+      - name: --strict
+        type: boolean_true
+        description: Only use 'validated' transcripts (i.e. sequence has been checked).
+      - name: --ud
+        alternatives: [--upDownStreamLen]
+        type: integer
+        description: Set upstream downstream interval length (in bases).
+resources:
+  - type: bash_script
+    path: script.sh
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - type: file
+    path: test_data
+engines:
+  - type: docker
+    image: quay.io/staphb/snpeff:5.2a
+    setup:
+      - type: docker
+        run: |
+          version=$(snpEff -version) && \
+          version_trimmed=$(echo "$version" | awk '{print $1, $2}') && \
+          echo "$version_trimmed" > /var/software_versions.txt
+runners:
+  - type: executable
+  - type: nextflow
\ No newline at end of file
diff --git a/src/snpeff/help.txt b/src/snpeff/help.txt
new file mode 100644
index 00000000..d1950220
--- /dev/null
+++ b/src/snpeff/help.txt
@@ -0,0 +1,79 @@
+Usage: snpEff [eff] [options] genome_version [input_file]
+
+        variants_file                   : Default is STDIN
+
+Options:
+        -chr <string>                   : Prepend 'string' to chromosome name (e.g. 'chr1' instead of '1'). Only on TXT output.
+        -classic                        : Use old style annotations instead of Sequence Ontology and Hgvs.
+        -csvStats <file>                : Create CSV summary file.
+        -download                       : Download reference genome if not available. Default: true
+        -i <format>                     : Input format [ vcf, bed ]. Default: VCF.
+        -fileList                       : Input actually contains a list of files to process.
+        -o <format>                     : Ouput format [ vcf, gatk, bed, bedAnn ]. Default: VCF.
+        -s , -stats, -htmlStats         : Create HTML summary file.  Default is 'snpEff_summary.html'
+        -noStats                        : Do not create stats (summary) file      
+
+Results filter options:
+        -fi , -filterInterval  <file>   : Only analyze changes that intersect with the intervals specified in this file (you may use this option many times)        
+        -no-downstream                  : Do not show DOWNSTREAM changes
+        -no-intergenic                  : Do not show INTERGENIC changes
+        -no-intron                      : Do not show INTRON changes
+        -no-upstream                    : Do not show UPSTREAM changes
+        -no-utr                         : Do not show 5_PRIME_UTR or 3_PRIME_UTR changes
+        -no <effectType>                : Do not show 'EffectType'. This option can be used several times.
+
+Annotations options:
+        -cancer                         : Perform 'cancer' comparisons (Somatic vs Germline). Default: false
+        -cancerSamples <file>           : Two column TXT file defining 'oringinal \t derived' samples.
+        -fastaProt <file>               : Create an output file containing the resulting protein sequences.
+        -formatEff                      : Use 'EFF' field compatible with older versions (instead of 'ANN').
+        -geneId                         : Use gene ID instead of gene name (VCF output). Default: false
+        -hgvs                           : Use HGVS annotations for amino acid sub-field. Default: true
+        -hgvsOld                        : Use old HGVS notation. Default: false   
+        -hgvs1LetterAa                  : Use one letter Amino acid codes in HGVS notation. Default: false
+        -hgvsTrId                       : Use transcript ID in HGVS notation. Default: false
+        -lof                            : Add loss of function (LOF) and Nonsense mediated decay (NMD) tags.
+        -noHgvs                         : Do not add HGVS annotations.
+        -noLof                          : Do not add LOF and NMD annotations.     
+        -noShiftHgvs                    : Do not shift variants according to HGVS notation (most 3prime end).
+        -oicr                           : Add OICR tag in VCF file. Default: false
+        -sequenceOntology               : Use Sequence Ontology terms. Default: true
+
+Generic options:
+        -c , -config                 : Specify config file
+        -configOption name=value     : Override a config file option
+        -d , -debug                  : Debug mode (very verbose).
+        -dataDir <path>              : Override data_dir parameter from config file.
+        -download                    : Download a SnpEff database, if not available locally. Default: true
+        -nodownload                  : Do not download a SnpEff database, if not available locally.
+        -h , -help                   : Show this help and exit
+        -noLog                       : Do not report usage statistics to server   
+        -q , -quiet                  : Quiet mode (do not show any messages or errors)
+        -v , -verbose                : Verbose mode
+        -version                     : Show version number and exit
+
+Database options:
+        -canon                       : Only use canonical transcripts.
+        -canonList <file>            : Only use canonical transcripts, replace some transcripts using the 'gene_id         transcript_id' entries in <file>.        
+        -tag <tagName>               : Only use transcript having a tag 'tagName'. This option can be used multiple times.
+        -notag <tagName>             : Filter out transcript having a tag 'tagName'. This option can be used multiple times.
+        -interaction                 : Annotate using interactions (requires interaction database). Default: true
+        -interval <file>             : Use a custom intervals in TXT/BED/BigBed/VCF/GFF file (you may use this option many times)
+        -maxTSL <TSL_number>         : Only use transcripts having Transcript Support Level lower than <TSL_number>.
+        -motif                       : Annotate using motifs (requires Motif database). Default: true
+        -nextProt                    : Annotate using NextProt (requires NextProt database).
+        -noGenome                    : Do not load any genomic database (e.g. annotate using custom files).
+        -noExpandIUB                 : Disable IUB code expansion in input variants
+        -noInteraction               : Disable inteaction annotations
+        -noMotif                     : Disable motif annotations.
+        -noNextProt                  : Disable NextProt annotations.
+        -onlyReg                     : Only use regulation tracks.
+        -onlyProtein                 : Only use protein coding transcripts. Default: false
+        -onlyTr <file.txt>           : Only use the transcripts in this file. Format: One transcript ID per line.
+        -reg <name>                  : Regulation track to use (this option can be used add several times).
+        -ss , -spliceSiteSize <int>  : Set size for splice sites (donor and acceptor) in bases. Default: 2
+        -spliceRegionExonSize <int>  : Set size for splice site region within exons. Default: 3 bases
+        -spliceRegionIntronMin <int> : Set minimum number of bases for splice site region within intron. Default: 3 bases
+        -spliceRegionIntronMax <int> : Set maximum number of bases for splice site region within intron. Default: 8 bases
+        -strict                      : Only use 'validated' transcripts (i.e. sequence has been checked). Default: false
+        -ud , -upDownStreamLen <int> : Set upstream downstream interval length (in bases)
\ No newline at end of file
diff --git a/src/snpeff/script.sh b/src/snpeff/script.sh
new file mode 100644
index 00000000..bf3914bb
--- /dev/null
+++ b/src/snpeff/script.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+
+set -eo pipefail
+
+## VIASH START
+## VIASH END
+
+# Unset flags if 'false'
+unset_if_false=(
+    par_classic
+    par_download
+    par_file_list
+    par_stats
+    par_cancer
+    par_format_eff
+    par_gene_id
+    par_hgvs
+    par_hgvs_old
+    par_hgvs1_letter_aa
+    par_hgvs_tr_id
+    par_lof
+    par_oicr
+    par_sequence_ontology
+    par_debug
+    par_quiet
+    par_verbose
+    par_canon
+    par_interaction
+    par_motif
+    par_nextprot
+    par_only_reg
+    par_only_protein
+    par_strict
+    par_no_stats
+    par_no_downstream
+    par_no_intergenic
+    par_no_intron
+    par_no_upstream
+    par_no_utr
+    par_no_hgvs
+    par_no_lof
+    par_no_shift_hgvs
+    par_no_download
+    par_no_log
+    par_no_tag
+    par_no_genome
+    par_no_expand_iub
+    par_no_interaction
+    par_no_motif
+    par_no_nextprot
+)
+for par in ${unset_if_false[@]}; do
+    test_val="${!par}" # contains the value of the 'par'
+    [[ "$test_val" == "false" ]] && unset $par
+done
+
+
+# Run SnpEff
+snpEff \
+    ${par_chr:+-chr "$par_chr"} \
+    ${par_classic:+-classic} \
+    ${par_csv_stats:+-csvStats "$par_csv_stats"} \
+    ${par_download:+-download} \
+    ${par_input_format:+-i "$par_input_format"} \
+    ${par_file_list:+-fileList} \
+    ${par_output_format:+-o "$par_output_format"} \
+    ${par_stats:+-stats} \
+    ${par_no_stats:+-noStats} \
+    ${par_fi:+-fi "$par_fi"} \
+    ${par_no_downstream:+-no-downstream} \
+    ${par_no_intergenic:+-no-intergenic} \
+    ${par_no_intron:+-no-intron} \
+    ${par_no_upstream:+-no-upstream} \
+    ${par_no_utr:+-no-utr} \
+    ${par_no:+-no "$par_no"} \
+    ${par_cancer:+-cancer} \
+    ${par_cancer_samples:+-cancerSamples "$par_cancer_samples]"} \
+    ${par_fastaprot:+-fastaProt "$par_fastaprot]"} \
+    ${par_format_eff:+-formatEff} \
+    ${par_gene_id:+-geneId} \
+    ${par_hgvs:+-hgvs} \
+    ${par_hgvs_old:+-hgvsOld} \
+    ${par_hgvs1_letter_aa:+-hgvs1LetterAa} \
+    ${par_hgvs_tr_id:+-hgvsTrId} \
+    ${par_lof:+-lof} \
+    ${par_no_hgvs:+-noHgvs} \
+    ${par_no_lof:+-noLof} \
+    ${par_no_shift_hgvs:+-noShiftHgvs} \
+    ${par_oicr:+-oicr} \
+    ${par_sequence_ontology:+-sequenceOntology} \
+    ${par_config:+-config "$par_config"} \
+    ${par_config_option:+-configOption "$par_config_option"} \
+    ${par_debug:+-debug} \
+    ${par_data_dir:+-dataDir "$par_data_dir"} \
+    ${par_no_download:+-nodownload} \
+    ${par_no_log:+-noLog} \
+    ${par_quiet:+-quiet} \
+    ${par_verbose:+-verbose} \
+    ${par_canon:+-canon} \
+    ${par_canon_list:+-canonList "$par_canon_list"} \
+    ${par_tag:+-tag "$par_tag"} \
+    ${par_no_tag:+-notag} \
+    ${par_interaction:+-interaction} \
+    ${par_interval:+-interval "$par_interval"} \
+    ${par_max_tsl:+-maxTSL "$par_max_tsl"} \
+    ${par_motif:+-motif} \
+    ${par_nextprot:+-nextProt} \
+    ${par_no_genome:+-noGenome} \
+    ${par_no_expand_iub:+-noExpandIUB} \
+    ${par_no_interaction:+-noInteraction} \
+    ${par_no_motif:+-noMotif} \
+    ${par_no_nextprot:+-noNextProt} \
+    ${par_only_reg:+-onlyReg} \
+    ${par_only_protein:+-onlyProtein} \
+    ${par_only_tr:+-onlyTr "$par_onlyTr"} \
+    ${par_reg:+-reg "$par_reg"} \
+    ${par_ss:+-ss "$par_ss"} \
+    ${par_splice_region_exon_size:+-spliceRegionExonSize "$par_splice_region_exon_size"} \
+    ${par_splice_region_intron_min:+-spliceRegionIntronMin "$par_splice_region_intron_min"} \
+    ${par_splice_region_intron_max:+-spliceRegionIntronMax "$par_splice_region_intron_max"} \
+    ${par_strict:+-strict} \
+    ${par_ud:+-ud "$par_ud"} \
+    "$par_genome_version" \
+    "$par_input" \
+    > "$par_output"
+
+# Path of the output file (par_output)
+absolute_path=$(realpath "$par_output")
+directory_path=$(dirname "$absolute_path")
+
+# Move the automatically generated outputs to their locations
+if [ -z "$par_no_stats" ]; then
+    if [ ! -z "$par_summary" ]; then
+        mv -n snpEff_summary.html "$par_summary"
+    else
+        mv -n snpEff_summary.html "$directory_path"
+    fi
+fi 
+
+if [ -z "$par_no_stats" ]; then
+    if [ ! -z "$par_genes" ]; then
+        mv -n snpEff_genes.txt "$par_genes"
+    else
+        mv -n snpEff_genes.txt "$directory_path"
+    fi
+fi
+
+exit 0
diff --git a/src/snpeff/test.sh b/src/snpeff/test.sh
new file mode 100644
index 00000000..d8c72c20
--- /dev/null
+++ b/src/snpeff/test.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+
+set -eo pipefail
+
+## VIASH START
+## VIASH END
+
+###########################################################################
+
+# Test 1: Run SnpEff with only required parameters
+
+mkdir test1
+pushd test1 > /dev/null # cd test1 (stack)
+
+echo "> Run Test 1: required parameters"
+"$meta_executable" \
+  --genome_version GRCh37.75 \
+  --input "$meta_resources_dir/test_data/cancer.vcf" \
+  --output out.vcf
+
+# Check if output files are generated
+output_files=("out.vcf" "snpEff_genes.txt" "snpEff_summary.html")
+
+# Check if any of the files do not exist
+for file in "${output_files[@]}"; do
+    if [ ! -e "$file" ]; then
+        echo "File $file does not exist."
+    fi
+done
+
+# Check if files are empty
+for file in "${output_files[@]}"; do
+    if [ ! -s "$file" ]; then
+        echo "File $file is empty."
+    fi
+done
+
+popd > /dev/null # Remove directory from stack (LIFO)
+
+echo "Test 1 succeeded."
+
+###########################################################################
+
+# Test 2: Run SnpEff with a different input + options
+
+mkdir test2
+pushd test2 > /dev/null
+
+echo "> Run Test 2: different input + options"
+"$meta_executable" \
+  --genome_version GRCh37.75 \
+  --input "$meta_resources_dir/test_data/test.vcf" \
+  --interval "$meta_resources_dir/test_data/my_annotations.bed" \
+  --no_stats \
+  --output output.vcf
+
+# Check if output.vcf exists
+if [ ! -e "output.vcf" ]; then
+    echo "File output.vcf does not exist."
+fi
+
+# These files should not exist
+files=("snpEff_genes.txt" "snpEff_summary.html")
+for file in "${files[@]}"; do
+    if [ -e "$file" ]; then
+        echo "Error: File $file exists."
+    fi
+done
+
+# Check if output.vcf is empty
+if [ ! -s "output.vcf" ]; then
+    echo "File output.vcf is empty."
+fi
+
+popd > /dev/null
+
+echo "Test 2 succeeded."
+
+###########################################################################
+
+# Test 3: Move the output files to other locations
+
+mkdir test3
+pushd test3 > /dev/null
+
+mkdir temp
+
+echo "> Run Test 3: move output files"
+"$meta_executable" \
+  --genome_version GRCh37.75 \
+  --input "$meta_resources_dir/test_data/test.vcf" \
+  --output output.vcf \
+  --summary temp \
+  --genes temp
+
+# Check if output.vcf exists
+if [ ! -e "output.vcf" ]; then
+    echo "File output.vcf does not exist."
+fi
+
+# Check if the other output files have been moved to temp folder
+output_files=("snpEff_genes.txt" "snpEff_summary.html")
+
+# Check if any of the files do not exist
+for file in "${output_files[@]}"; do
+    if [ ! -e "temp/$file" ]; then
+        echo "File $file does not exist in 'temp' folder."
+    fi
+done
+
+# Check if output.vcf is empty
+if [ ! -s "output.vcf" ]; then
+    echo "File output.vcf is empty."
+fi
+
+# Check if the other output files in temp folder are empty
+for file in "${output_files[@]}"; do
+    if [ ! -s "temp/$file" ]; then
+        echo "File $file is empty."
+    fi
+done
+
+popd > /dev/null
+
+echo "Test 3 succeeded."
+
+###########################################################################
+
+echo "All tests successfully completed!"
\ No newline at end of file
diff --git a/src/snpeff/test_data/cancer.vcf b/src/snpeff/test_data/cancer.vcf
new file mode 100644
index 00000000..f37ad8c3
--- /dev/null
+++ b/src/snpeff/test_data/cancer.vcf
@@ -0,0 +1,2 @@
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	Patient_01_Germline	Patient_01_Somatic
+1	69091	.	A	C,G	.	PASS	AC=1	GT	1/0	2/1
diff --git a/src/snpeff/test_data/my_annotations.bed b/src/snpeff/test_data/my_annotations.bed
new file mode 100644
index 00000000..a5247f97
--- /dev/null
+++ b/src/snpeff/test_data/my_annotations.bed
@@ -0,0 +1 @@
+1	10000	20000	MY_ANNOTATION
diff --git a/src/snpeff/test_data/script.sh b/src/snpeff/test_data/script.sh
new file mode 100644
index 00000000..a47ec136
--- /dev/null
+++ b/src/snpeff/test_data/script.sh
@@ -0,0 +1,15 @@
+# Test files from SnpEff examples
+if [ ! -f snpEff_latest_core.zip ]; then
+    wget https://snpeff.blob.core.windows.net/versions/snpEff_latest_core.zip
+fi
+
+if [ ! -d snpEff ]; then
+    unzip snpEff_latest_core.zip
+fi
+
+mv snpEff/examples/test.vcf src/snpeff/test_data/
+mv snpEff/examples/cancer.vcf src/snpeff/test_data/
+mv snpEff/examples/my_annotations.bed src/snpeff/test_data/
+
+rm -rf snpEff_latest_core.zip
+rm -rf snpEff
\ No newline at end of file
diff --git a/src/snpeff/test_data/test.vcf b/src/snpeff/test_data/test.vcf
new file mode 100644
index 00000000..d552ef18
--- /dev/null
+++ b/src/snpeff/test_data/test.vcf
@@ -0,0 +1 @@
+1	10469	.	C	G	365.78	PASS	AC=30;AF=0.0732