FJDpipeline.sh

#!/bin/bash

	#DATE
DATE=`date +%Y-%m-%d`

	#······································

	#Set the pathways to work for you.
	#Create a directory or fix it.

	#······································

	#Main directory, all data will be stored in this file.
	#MDAP=set the patwhay to a principal file which will contain all.
#MDAP='/home/marius/testFJDRP/final_test'
MDAP='/mnt/genetica3/Ionut/PipelineM/pruebas'
	#·······································

	#Select the directory where fastq files are stored.
	#FASTQ directory to look after the .fastq files.
#FQ='/home/marius/genetica3/marius/fastq/NIST70'
FQ='/mnt/genetica3/Ionut/PipelineM/fastqs'

	#·······································

	#Reference genome directory used  as a mini bundle data.
	#Where the reference and bundle data is stored
	#HG19: where all the references and indexed files will be + bundle data from UCSC.
#HG19='/home/marius/genetica3/marius/hg19bundle'
HG19='/mnt/genetica3/marius/pipeline_practicas_marius/hg19bundle'
	#·······································

	#Software directory.
	#All the software used is stored in this location.
#SFT='/home/marius/software'
SFT='/mnt/genetica3/marius/pipeline_practicas_marius/software'

	#Specify where this command is requiered for the script, but gets separated to work.
#GENOTYPE_SCRIPT="${MDAP}/genotype_script.sh"

	#Specify the PEDIGREE PATH FOR GENOTYPEGVCFS
#PEDIGREE='home/marius/genetica3/marius/fastq/RP-1773/family_info.txt'


	#·······································


	#Files generated by the script for each part of the analysis.
	#Can change their name or add/remove but then outputs would need redirects.


	#IMPORTANT to keep in mind
	#If using pathway like ANY=${ANYPATH}/anypath USE DOUBLE QUOTATIONS
	#IMPORTANT, or no quotations
	#·······································
	#Mapped_data: will contain the .SAM files after alignment using BWA.
MD="${MDAP}/mapped_data"
	#·······································
	#·······································
	#Sorted_data: will store the sorted SAM files, after using Picard.
SD="${MDAP}/sorted_data"
	#·······································
	#·······································
	#Dedupped_data: will store the file of the duplicate reads, after using Picard.
DD="${MDAP}/dedupped_data"
	#·······································
	#·······································
	#Recalibrated_base_quality_scores_data: generates a recalibration table  based on specified covariates, read_group,reported_quiality_score,machine_cycle and nucleotide_context, after GATK.
RBQSRD="${MDAP}/recalibrated_bqsr_data"
	#·······································
	#·····································
	#Applied_bqsr_data: applying the reacalibration table to the BAM file to continue the analysis based on the READS best selected by GATK.
ABQSRD="${MDAP}/applied_bqsr_data"
	#·······································
	#·······································
	#Plot_recalibration_data: plots the recalibration differences between the first and the second pass of the recalibration.
	#First using the APPLIED_RECALIBRATION_BQSR_DATA BAM file, running again BaseRecalibrator and generating the plots with AnalyzeCovariates.
PRD="${MDAP}/plot_recalibration_data"
	#·······································
	#·······································
	#Haplotype_caller_gvcf_data:calling for SNPs and indels via local re-assembly of HAPLOTYPES using HAPLOTYPECALLER by GATK.
HCGVCFD="${MDAP}/haplotype_caller_gvcf_data"
	#·······································
	#·······································
	#If analyzing more than one sample or an family trio, use COMBINE_GVCFS to  combine them into a single GVCF.
	#If more than a few samples, consider using the alternative tool, GenomicsDBImport(large number of samples).
	#Combine_gvcf_data: merge one or more GVCFS into a single g.vcf file
CGVCFD="${MDAP}/combined_gvcf_data"
	#·······································
	#·······································
	#Perform joint genotyping on one or more samples precalled with Haplotype_caller, if one sample,
	#straight after haplotypecaller and if more than one use Combine_gvcfs.
	#Genotyped_vcf_data: single or single-multisample GVCF as input, output will be a VCF.
GVCFD="${MDAP}/genotyped_vcf_data"
	#·······································
	#·······································
	#HARD_FILTERING, filters for SNPs and filters for INDELs.
	#Variant_filtration_vcf_data:hard filtering process to select based on the INFO and FORMAT annotations(QD,MQO,FS,MQ,ReadPosrankSum)
VFVCFD="${MDAP}/variant_filtration_vcf_data"
	#·······································
	#·······································
	#Vep_vcf_annotated_data: annotations added to the CSQ tag in INFO columnd to the VCF format, --vcf argument (change name in VCF_out)
	#if TSV will come out with a separated tab value for each annotation in a column. --tab argument for TSV format

VEPVCFAD="${MDAP}/vep_vcf_annotated_data"
	#VEP Variables for Variant_effect_predictor, Annotatiting the VCF file.
	#VEP DIRECTORY
VEP="${SFT}/variant_effect_predictor/ensembl-vep/vep"
	#VEP DIRECTORY for the downloaded files (HUMAN DATABASE FILES,PLUGINS)
VEP_CACHE='/home/marius/.vep'
VEP_CACHE='/mnt/genetica3/marius/pipeline_practicas_marius/software/variant_effect_predictor/.vep'
	#VEP_FASTA, reference fasta used by VEP (GRCh37 version)
VEP_FASTA="${VEP_CACHE}/homo_sapiens/93_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa"
	#PLUGINS directory for vep.
PLUGIN_DIR="${VEP_CACHE}/Plugins"
	#DATABASE directory for vep.
PLUGIN_DBS="${VEP_CACHE}/dbs"
	#PLUGINS PATHWAYS FOR VEP to aim after the exact folders and files.
	#Loss of Function PLUGIN
LOFTEE="${PLUGIN_DBS}/human_ancestor.fa.gz"
	#Loftee folder directoyry (REQUIREMENTS of LoF)
LOFTEE_PATH="${VEP_CACHE}/loftee"
	#·······································
	#·······································
	#dbnSFP plugin, it is a functional prediction and annotation database of all
	#potetial non-synonymous single-nucleotide-variants (nsSNVs) in the human genome.
DBNSFP="${PLUGIN_DBS}/dbNSFP_hg19.gz"
	#·······································
	#·······································
	#VCF INPUT
VCF_IN="${VFVCFD}/pre_filtered_ready_to_annotate.vcf"
	#·······································
	#·······································
	#OUTPUT
	#Here you can choose from different outputs.
	#·······································
	#.1:VCF by selecting the --vcf ARGUMENT
	#.2:TSV by selecting the --tab ARGUMENT
	#.3:DEFAULT --> .TXT + .HTML (just set the name to -o annonated_vcf) and will obtain --> annotated_vcf.txt  and annotated_vcf.html
VCF_OUT="${VEPVCFAD}/vep_"$DATE"_annotated.vcf"
	#·······································
	#·······································
	#Also install and set the paths correctly to PERL5LIB by exporting all the commands in libreriasVEP.sh

	#selectVariantsData_vcfs, selecting the qualities and the desired FILTERING options.
	#SVDVCF='/home/marius/testFJDRP/trioGDBImportJointCallGenotype/selecVariants_data_vcf'
	#VTTVCF='/home/marius/testFJDRP/trioGDBImportJointCallGenotype/variantstotable_vcf'
	#Last two are still in progress.


	#Start pipeline processing.


echo -e '\n\n\n'
echo -e " _____   _ ____    ____ ___ ____  _____ _     ___ _   _ _____ "
echo -e "|  ___| | |  _ \  |  _ \_ _|  _ \| ____| |   |_ _| \ | | ____|"
echo -e "| |_ _  | | | | | | |_) | || |_) |  _| | |    | ||  \| |  _|  "
echo -e "|  _| |_| | |_| | |  __/| ||  __/| |___| |___ | || |\  | |___ "
echo -e "|_|  \___/|____/  |_|  |___|_|   |_____|_____|___|_| \_|_____|"
echo -e "                                                              "
echo -e " ____ _____  _    ____ _____ ____  "
echo -e "/ ___|_   _|/ \  |  _ \_   _/ ___| "
echo -e "\___ \ | | / _ \ | |_) || | \___ \ "
echo -e " ___) || |/ ___ \|  _ < | |  ___) |"
echo -e "|____/ |_/_/   \_\_| \_\|_| |____/ "
echo -e "                                   "
echo -e '\n\n\n\n\n\n'
echo ············································································································
echo -e " ___ _   _ ____  _______  _____ _   _  ____ "
echo -e "|_ _| \ | |  _ \| ____\ \/ /_ _| \ | |/ ___|"
echo -e " | ||  \| | | | |  _|  \  / | ||  \| | |  _ "
echo -e " | || |\  | |_| | |___ /  \ | || |\  | |_| |"
echo -e "|___|_| \_|____/|_____/_/\_\___|_| \_|\____|"
echo -e "                                            "
echo -e " ____  _____ _____ _____ ____  _____ _   _  ____ _____ "
echo -e "|  _ \| ____|  ___| ____|  _ \| ____| \ | |/ ___| ____|"
echo -e "| |_) |  _| | |_  |  _| | |_) |  _| |  \| | |   |  _|  "
echo -e "|  _ <| |___|  _| | |___|  _ <| |___| |\  | |___| |___ "
echo -e "|_| \_\_____|_|   |_____|_| \_\_____|_| \_|\____|_____|"
echo -e "                                                       "
echo -e " _____ ___ _     _____ ____    ______        ___     "
echo -e "|  ___|_ _| |   | ____/ ___|  | __ ) \      / / \    "
echo -e "| |_   | || |   |  _| \___ \  |  _ \\ \ /\ / / _ \   "
echo -e "|  _|  | || |___| |___ ___) | | |_) |\ V  V / ___ \  "
echo -e "|_|   |___|_____|_____|____/  |____/  \_/\_/_/   \_\ "

echo -e                                           '\n \tINDEXING REFERENCE FILES (BWA)\n'
echo ··············································································································


	#Start BWA INDEX.
	#Stores .dict and .fai files in HG19 directory.
echo 'Starts BWA INDEX'
echo 'Starts BWA INDEX' >>  registerFile
$SFT/bwa/./bwa index $HG19/ucsc.hg19.fasta

echo -e '\nBWA INDEX DONE' ; paplay /usr/share/sounds/freedesktop/stereo/complete.oga
echo -e '\nBWA INDEX DONE' >> registerFile

	#REMOVE PREVIOUS UCSC.HG19.FASTA.FAI and UCSC.HG19.DICT file
rm $HG19/ucsc.hg19.fasta.fai $HG19/ucsc.hg19.dict

	#Creating .FAI in HG19.
echo 'Create .FAI file, using samtools faidx'
echo 'Create .FAI file, using samtools faidx' >> registerFile
$SFT/samtools/./samtools faidx $HG19/ucsc.hg19.fasta -o $HG19/ucsc.hg19.fai

echo -e '\nucsc.hg19.fasta.FAI DONE'
echo -e '\nucsc.hg19.fasta.FAI DONE' >> registerFile


	#Creating .DICT in HG19.
echo 'Create .DICT file, using picardtools CreateSequnceDictionary'
echo 'Create .DICT file, using picardtools CreateSequnceDictionary' >> registerFile
java -jar $SFT/picard/build/libs/picard.jar CreateSequenceDictionary \
R=$HG19/ucsc.hg19.fasta \
O=$HG19/ucsc.hg19.dict

echo -e '\nucsc.hg19.DICT DONE'
echo -e '\nucsc.hg19.DICT DONE'>> registerFile


echo ············································································································
echo -e " __  __    _    ____  ____ ___ _   _  ____  __        _____ _____ _   _ "
echo -e "|  \/  |  / \  |  _ \|  _ \_ _| \ | |/ ___| \ \      / /_ _|_   _| | | |"
echo -e "| |\/| | / _ \ | |_) | |_) | ||  \| | |  _   \ \ /\ / / | |  | | | |_| |"
echo -e "| |  | |/ ___ \|  __/|  __/| || |\  | |_| |   \ V  V /  | |  | | |  _  |"
echo -e "|_|  |_/_/   \_\_|   |_|  |___|_| \_|\____|    \_/\_/  |___| |_| |_| |_|"
echo -e "                                                                        "
echo -e " ______        ___     "
echo -e "| __ ) \      / / \    "
echo -e "|  _ \\ \ /\ / / _ \   "
echo -e "| |_) |\ V  V / ___ \  "
echo -e "|____/  \_/\_/_/   \_\ "


echo -e                                              '\n \tMAPPING (BWA)\n'
echo ············································································································


	#mapping fastq files to reference_genome after BWA INDEX
	#reference genome is: UCSC.HG19.FASTA

mkdir $MDAP/mapped_data
echo 'mkdir mapped_data' >> registerFile

	#Run BWA mem -t 12
	#-t threads
	#-P search for Pair mate if not mapped properly, if it found a better hit, skips it.
	#PREVIOUS HEADER $SFT/bwa/./bwa mem -t 12 -R @RG\tID:\tPL:illumina\tSM:Analysis_$i $HG19/ucsc.hg19.fasta \

for i in $@
do
	echo -e '\nBuilding the header for '$i' ongoing...\n'
#	#build header
	header=$(zcat $FQ/$i*1.fastq.gz | head -n 1)
	echo $header
	id=$(echo $header | head -n 1 | cut -f 1-4 -d':' | sed 's/@//' | sed 's/:/_/g')
	echo $id
	sm=$(echo $header | head -n 1 | grep -Eo '[ATGCN]+$')
	echo $sm
	echo -e "\nThis is how the new header looks\n"
	echo '@RG\tID:'$id'\tSM:'$i'\tLB:'$id'_'$sm'\tSM:'$id'_'$m'\tPL:ILLUMINA'
	echo -e '\nHEADER --> DONE\n'
	echo -e '\n\nStart BWA MEM for '$i' sample'
       	echo 'Start BWA MEM for '$i' sample'>> registerFile
	echo ············································································································
	$SFT/bwa/./bwa mem -v 3 -t 12 -R '@RG\tID:'$id'\tSM:'$i'\tLB:'$id'_'$sm'\tPL:ILLUMINA' \
	$HG19/ucsc.hg19.fasta \
	$FQ/$i*1.fastq.gz \
       	$FQ/$i*2.fastq.gz > $MD/mapped_$i.sam
       	echo -e '\nBWA MEM '$i'  DONE' ; paplay /usr/share/sounds/freedesktop/stereo/complete.oga
       	echo -e '\nBWA MEM '$i'  DONE'  >> registerFile
        #Unzip all the files before continuing the process.
       	echo 'Unzip mapped sams.'
       	echo 'Unzip mapped sams.' >> registerFile
      	#If unzipped not necessary
	gunzip -k $MD/mapped_$i.sam.gz
       	echo 'Gunzip completed.'
       	echo 'Gunzip completed.' >> registerFile
	#Comprobar si el archivo esta en formato zip o no.
done
#
#
echo ···········································································································
echo -e " ____   ___  ____ _____ ___ _   _  ____   ____    _    __  __ "
echo -e "/ ___| / _ \|  _ \_   _|_ _| \ | |/ ___| / ___|  / \  |  \/  |"
echo -e "\___ \| | | | |_) || |  | ||  \| | |  _  \___ \ / _ \ | |\/| |"
echo -e " ___) | |_| |  _ < | |  | || |\  | |_| |  ___) / ___ \| |  | |"
echo -e "|____/ \___/|_| \_\|_| |___|_| \_|\____| |____/_/   \_\_|  |_|"
echo -e                                         '\n \tSORTING SAM (PICARD)\n'
echo ···········································································································


mkdir $MDAP/sorted_data
echo 'mkdir sorted_data'>> registerFile
	#Sorting the mapped data.
for i in $@
do
	#SORTING THE SAM FILE.
        echo 'Run picard SortSam '$i''
        echo 'Run picard SortSam '$i''>> registerFile
        java -jar $SFT/picard/build/libs/picard.jar SortSam I=$MD/mapped_$i.sam \
        O=$SD/sorted$i.bam \
        SORT_ORDER=coordinate
        echo -e '\nPicard SortSam '$i'  DONE' ; paplay /usr/share/sounds/freedesktop/stereo/complete.oga
        echo -e '\nPicard SortSam '$i'  DONE' >> registerFile
done


echo ············································································································
echo -e " __  __    _    ____  _  _____ _   _  ____ "
echo -e "|  \/  |  / \  |  _ \| |/ /_ _| \ | |/ ___|"
echo -e "| |\/| | / _ \ | |_) | ' / | ||  \| | |  _ "
echo -e "| |  | |/ ___ \|  _ <| . \ | || |\  | |_| |"
echo -e "|_|  |_/_/   \_\_| \_\_|\_\___|_| \_|\____|"
echo -e "                                           "
echo -e " ____  _   _ ____  _     ___ ____    _  _____ _____ ____  "
echo -e "|  _ \| | | |  _ \| |   |_ _/ ___|  / \|_   _| ____/ ___| "
echo -e "| | | | | | | |_) | |    | | |     / _ \ | | |  _| \___ \ "
echo -e "| |_| | |_| |  __/| |___ | | |___ / ___ \| | | |___ ___) |"
echo -e "|____/ \___/|_|   |_____|___\____/_/   \_\_| |_____|____/ "
echo -e "                                                          "
echo -e " ____ ___ ____    _    ____  ____  "
echo -e "|  _ \_ _/ ___|  / \  |  _ \|  _ \ "
echo -e "| |_) | | |     / _ \ | |_) | | | |"
echo -e "|  __/| | |___ / ___ \|  _ <| |_| |"
echo -e "|_|  |___\____/_/   \_\_| \_\____/ "
echo -e "                                   "
echo -e                                         '\n \tMARKING DUPLICATES (PICARD)\n'
echo ············································································································


	#Selecting the duplicates reads from the mapped and sorted reads.
mkdir $MDAP/dedupped_data
echo 'mkdir dedupped_data'>> registerFile

for i in $@
do
	#Mark duplicates PICARD
        echo 'Start picard MarkDuplicates '$i' '
        echo 'Start picard MarkDuplicates '$i' '>>registerFile
        java -jar $SFT/picard/build/libs/picard.jar MarkDuplicates \
        I=$SD/sorted$i.bam \
        O=$DD/dedupped_$i.bam \
        M=$DD/marked_dup_metrics_$i.txt \
        REMOVE_DUPLICATES=true \
        AS=SortOrder
        echo -e '\n PICARD MarkDuplicates '$i' DONE' ;paplay /usr/share/sounds/freedesktop/stereo/complete.oga
        echo -e '\n PICARD MarkDuplicates '$i' DONE'  >> registerFile
done


echo ············································································································
echo -e " ____  _____ ____  _   _ ____  ____  _____ ____    ____    _    __  __ "
echo -e "|  _ \| ____|  _ \| | | |  _ \|  _ \| ____|  _ \  | __ )  / \  |  \/  |"
echo -e "| | | |  _| | | | | | | | |_) | |_) |  _| | | | | |  _ \ / _ \ | |\/| |"
echo -e "| |_| | |___| |_| | |_| |  __/|  __/| |___| |_| | | |_) / ___ \| |  | |"
echo -e "|____/|_____|____/ \___/|_|   |_|   |_____|____/  |____/_/   \_\_|  |_|"
echo -e "                                                                       "
echo -e " ___ _   _ ____  _______  __  ____    _    ___   _____ ___ _     _____   "
echo -e "|_ _| \ | |  _ \| ____\ \/ / | __ )  / \  |_ _| |  ___|_ _| |   | ____|  "
echo -e " | ||  \| | | | |  _|  \  /  |  _ \ / _ \  | |  | |_   | || |   |  _|    "
echo -e " | || |\  | |_| | |___ /  \  | |_) / ___ \ | |  |  _|  | || |___| |___ _ "
echo -e "|___|_| \_|____/|_____/_/\_\ |____/_/   \_\___| |_|   |___|_____|_____(_)"
echo -e "                                                                         "
echo -e                                         '\n \t Dedupped BAM index (BAI) file. \n'
echo ············································································································


	#Create a .BAI file to compare original vs removed duplicates reads.
for i in $@
do
#	#Indexing the BAM files.
#	#Generating the .BAI files from the DEDUPPED (markedDuplicates from the original SAM/BAM file).
       echo 'Indexing '$i' BAM files'
       echo 'Indexing '$i' BAM files' >> registerFile
        java -jar $SFT/picard/build/libs/picard.jar BuildBamIndex \
        I=$DD/dedupped_$i.bam \
        O=$DD/dedupped_$i.bai
        echo -e '\n PICARD BuildBamIndex '$i' DONE' ;paplay /usr/share/sounds/freedesktop/stereo/complete.oga
        echo -e '\n PICARD BuildBamIndex '$i' DONE' >> registerFile
done


echo ············································································································
echo -e " ____   ___  ____  ____   "
echo -e "| __ ) / _ \/ ___||  _ \  "
echo -e "|  _ \| | | \___ \| |_) | "
echo -e "| |_) | |_| |___) |  _ <  "
echo -e "|____/ \__\_\____/|_| \_\ "
echo -e "                          "
echo -e " ____  _____ ____    _    _     ___ ____  ____      _  _____ ___ ___  _   _  "
echo -e "|  _ \| ____/ ___|  / \  | |   |_ _| __ )|  _ \    / \|_   _|_ _/ _ \| \ | | "
echo -e "| |_) |  _|| |     / _ \ | |    | ||  _ \| |_) |  / _ \ | |  | | | | |  \| | "
echo -e "|  _ <| |__| |___ / ___ \| |___ | || |_) |  _ <  / ___ \| |  | | |_| | |\  | "
echo -e "|_| \_\_____\____/_/   \_\_____|___|____/|_| \_\/_/   \_\_| |___\___/|_| \_| "
echo -e "                                                                             "
echo -e " ____    _  _____  _     "
echo -e "|  _ \  / \|_   _|/ \    "
echo -e "| | | |/ _ \ | | / _ \   "
echo -e "| |_| / ___ \| |/ ___ \  "
echo -e "|____/_/   \_\_/_/   \_\ "
echo -e                                     '\n \tBQSR (GATK)\n'
echo -e                                      '\t -.1 Recalibration data table\n'
echo -e                                      '\t -.2 Recalibration data table \n'
echo ············································································································


	#Recalibrating  the reads using base quality score reads.
mkdir $MDAP/recalibrated_bqsr_data
echo 'mkdir recalibrated_bqsr_data' >> registerFile
for i in $@
do
#	#GATK BaseRecalibration first table
#	#BaseRecalibration + table
        echo 'Starts GATK '$1' Recalibrator'
        echo 'Starts GATK '$1' Recalibrator' >> registerFile
        java -jar $SFT/gatk/build/libs/gatk-package-4.0.6.0-22-g9d9484f-SNAPSHOT-local.jar BaseRecalibrator \
        -R $HG19/ucsc.hg19.fasta \
        -I $DD/dedupped_$i.bam \
        --known-sites $HG19/dbsnp_138.hg19.vcf.gz \
        --known-sites $HG19/1000G_phase1.indels.hg19.sites.vcf.gz \
        --known-sites $HG19/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf.gz \
        -O $RBQSRD/before_recalibrated_bqsr_data_$i.recal.table
#		--bqsr 1st_racalibration.table
        echo -e '\n GATK BaseRecalibrator '$i' DONE' ;paplay /usr/share/sounds/freedesktop/stereo/complete.oga
        echo -e '\n GATK BaseRecalibrator '$i' DONE' >>registerFile
done


echo ············································································································
echo -e "    _    ____  ____  _  __   _____ _   _  ____   ____   ___  ____  ____   "
echo -e "   / \  |  _ \|  _ \| | \ \ / /_ _| \ | |/ ___| | __ ) / _ \/ ___||  _ \  "
echo -e "  / _ \ | |_) | |_) | |  \ V / | ||  \| | |  _  |  _ \| | | \___ \| |_) | "
echo -e " / ___ \|  __/|  __/| |___| |  | || |\  | |_| | | |_) | |_| |___) |  _ <  "
echo -e "/_/   \_\_|   |_|   |_____|_| |___|_| \_|\____| |____/ \__\_\____/|_| \_\ "
echo -e "                                                                          "
echo -e "__        _____ _____ _   _    ____    _  _____ _  __ "
echo -e "\ \      / /_ _|_   _| | | |  / ___|  / \|_   _| |/ / "
echo -e " \ \ /\ / / | |  | | | |_| | | |  _  / _ \ | | | ' /  "
echo -e "  \ V  V /  | |  | | |  _  | | |_| |/ ___ \| | | . \  "
echo -e "   \_/\_/  |___| |_| |_| |_|  \____/_/   \_\_| |_|\_\ "
echo -e "                                                     "
echo -e                                       '\n \tApplying BQSR GATK\n'
echo ············································································································


	#Applying the recalibration table to the bam file to continue the analysis.
mkdir $MDAP/applied_bqsr_data
echo 'mkdir applied_bqsr_data' >> registerFile
for i in $@
do
#	#ApplyBQSR
        echo 'Starts picard  '$i' ApplyBQSR'
        echo 'Starts picard  '$i' ApplyBQSR' >> registerFile
        java -jar $SFT/gatk/build/libs/gatk-package-4.0.6.0-22-g9d9484f-SNAPSHOT-local.jar ApplyBQSR \
        -R $HG19/ucsc.hg19.fasta \
        -I $DD/dedupped_$i.bam \
        --bqsr $RBQSRD/before_recalibrated_bqsr_data_$i.recal.table \
        -O $ABQSRD/applied_bqsr_data_$i.bam
        echo -e '\nGATK ApplyBQSR '$i' DONE' ;paplay /usr/share/sounds/freedesktop/stereo/complete.oga
        echo -e '\nGATK ApplyBQSR '$i' DONE' >>registerFile
done


echo ············································································································
echo -e "    _    _   _    _    _  __   ____________ "
echo -e "   / \  | \ | |  / \  | | \ \ / /__  / ____|"
echo -e "  / _ \ |  \| | / _ \ | |  \ V /  / /|  _|  "
echo -e " / ___ \| |\  |/ ___ \| |___| |  / /_| |___ "
echo -e "/_/   \_\_| \_/_/   \_\_____|_| /____|_____|"
echo -e "  ____ _____     ___    ____  ___    _  _____ _____ ____    _____ ___  ____    "
echo -e " / ___/ _ \ \   / / \  |  _ \|_ _|  / \|_   _| ____/ ___|  |  ___/ _ \|  _ \   "
echo -e "| |  | | | \ \ / / _ \ | |_) || |  / _ \ | | |  _| \___ \  | |_ | | | | |_) |  "
echo -e "| |__| |_| |\ V / ___ \|  _ < | | / ___ \| | | |___ ___) | |  _|| |_| |  _ <   "
echo -e " \____\___/  \_/_/   \_\_| \_\___/_/   \_\_| |_____|____/  |_|   \___/|_| \_\  "
echo -e "                                                                               "
echo -e " ____  ____  _____   ____  _     ___ _____ ____    "
echo -e "|  _ \|  _ \|  ___| |  _ \| |   / _ \_   _/ ___|   "
echo -e "| |_) | | | | |_    | |_) | |  | | | || | \___ \   "
echo -e "|  __/| |_| |  _|   |  __/| |__| |_| || |  ___) |  "
echo -e "|_|   |____/|_|     |_|   |_____\___/ |_| |____/   "
echo -e "                                                   "
echo -e "  ____ ___  __  __ ____   _    ____      _  _____ ___ ___  _   _    ___  _____  "
echo -e " / ___/ _ \|  \/  |  _ \ / \  |  _ \    / \|_   _|_ _/ _ \| \ | |  / _ \|  ___| "
echo -e "| |  | | | | |\/| | |_) / _ \ | |_) |  / _ \ | |  | | | | |  \| | | | | | |_    "
echo -e "| |__| |_| | |  | |  __/ ___ \|  _ <  / ___ \| |  | | |_| | |\  | | |_| |  _|   "
echo -e " \____\___/|_|  |_|_| /_/   \_\_| \_\/_/   \_\_| |___\___/|_| \_|  \___/|_|     "
echo -e "                                                                                "
echo -e " _____ _   _ _____  "
echo -e "|_   _| | | | ____| "
echo -e "  | | | |_| |  _|   "
echo -e "  | | |  _  | |___  "
echo -e "  |_| |_| |_|_____| "
echo -e "                    "
echo -e " ____  _____ ____    _    _     ___ ____  ____      _  _____ ___ ___  _   _  "
echo -e "|  _ \| ____/ ___|  / \  | |   |_ _| __ )|  _ \    / \|_   _|_ _/ _ \| \ | | "
echo -e "| |_) |  _|| |     / _ \ | |    | ||  _ \| |_) |  / _ \ | |  | | | | |  \| | "
echo -e "|  _ <| |__| |___ / ___ \| |___ | || |_) |  _ <  / ___ \| |  | | |_| | |\  | "
echo -e "|_| \_\_____\____/_/   \_\_____|___|____/|_| \_\/_/   \_\_| |___\___/|_| \_| "
echo -e "                                                                             "
echo -e " ____    _  _____  _       ____    _  _____ _  __ "
echo -e "|  _ \  / \|_   _|/ \     / ___|  / \|_   _| |/ / "
echo -e "| | | |/ _ \ | | / _ \   | |  _  / _ \ | | | ' /  "
echo -e "| |_| / ___ \| |/ ___ \  | |_| |/ ___ \| | | . \  "
echo -e "|____/_/   \_\_/_/   \_\  \____/_/   \_\_| |_|\_\ "
echo -e "                                                  "
echo -e             '\n \tAnalyze Covariates for PDF plots comparation of the Recalibration GATK\n'
echo -e 		"Comparisson among the bias of the sequencer errors"
echo ············································································································


mkdir $MDAP/plot_recalibration_data
echo 'mkdir plot_recalibration_data' >> registerFile
echo 'Starts GATK Second Recalibration'
echo 'Starts GATK Second Recalibration' >> registerFile

        #GATK BaseRecalibration second table for next step AnalyzeCovariates.
        #Generates the second pass table.
        #instead of second table, we use the BAM created by ApplyBQSR to regenrate a new TABLE for plot.

for i in $@
do
	java -jar $SFT/gatk/build/libs/gatk-package-4.0.6.0-22-g9d9484f-SNAPSHOT-local.jar BaseRecalibrator \
        -I $ABQSRD/applied_bqsr_data_$i.bam \
        -R $HG19/ucsc.hg19.fasta \
        --known-sites $HG19/dbsnp_138.hg19.vcf.gz \
        --known-sites $HG19/1000G_phase1.indels.hg19.sites.vcf.gz \
        --known-sites $HG19/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf.gz \
        -O $PRD/after_recalibrated_bqsr_data_$i.recal.table
	#second bqsr table for comparation.
done

echo 'Second recalibration GATK table --> DONE'

	#Full generating the Plots of the recalibration tables using AnalyzeCovariates and saving a csv copy.
	#Analyze  the tables.

echo  -e '\n  Generating Plots, pdf and csv files'

for i in $@
do
	echo -e '\nGenerating the files for '$i' starting...'
       	java -jar $SFT/gatk/build/libs/gatk-package-4.0.6.0-22-g9d9484f-SNAPSHOT-local.jar AnalyzeCovariates \
        -before $RBQSRD/before_recalibrated_bqsr_data_$i.recal.table \
       	-after $PRD/after_recalibrated_bqsr_data_$i.recal.table \
        -csv $PRD/BQSR_$i.csv \
        -plots $PRD/AnalyzeCovariates_bqsr_$i.pdf
	echo '\nPlot and CSV file for '$i' is DONE!'
done

echo -e '\n Plots files generated --> DONE'
	#Obtaining an CSV and PDF file of the comparrisson between first and second pass of the recalibration applied to the bam fi$

echo ············································································································

echo -e " _   _    _    ____  _     ___ _______   ______  _____ "
echo -e "| | | |  / \  |  _ \| |   / _ \_   _\ \ / /  _ \| ____|"
echo -e "| |_| | / _ \ | |_) | |  | | | || |  \ V /| |_) |  _|  "
echo -e "|  _  |/ ___ \|  __/| |__| |_| || |   | | |  __/| |___ "
echo -e "|_| |_/_/   \_\_|   |_____\___/ |_|   |_| |_|   |_____|"
echo -e "                                                       "
echo -e "  ____    _    _     _     _____ ____   "
echo -e " / ___|  / \  | |   | |   | ____|  _ \  "
echo -e "| |     / _ \ | |   | |   |  _| | |_) | "
echo -e "| |___ / ___ \| |___| |___| |___|  _ <  "
echo -e " \____/_/   \_\_____|_____|_____|_| \_\ "
echo -e "                                        "
echo -e                                       '\n \tHAPLOTYPE CALLER GATK\n'

echo ············································································································


	#Ready to call for Variants.
mkdir $MDAP/haplotype_caller_gvcf_data
echo 'mkdir haplotype_caller_gvcf_data' >> registerFile
for i in $@
do
	#HaplotypeCaller for each sample for later joint genotyping.
        echo -e '\nGATK HaplotypeCallerGVCF for '$i' STARTS'
        echo -e '\nGATK HaplotypeCallerGVCF for '$i' STARTS'>> registerFile
        java -jar $SFT/gatk/build/libs/gatk-package-4.0.6.0-22-g9d9484f-SNAPSHOT-local.jar HaplotypeCaller \
        -R $HG19/ucsc.hg19.fasta \
        -I $ABQSRD/applied_bqsr_data_$i.bam \
        -ERC GVCF \
        -bamout $HCGVCFD/HC_bamout_$i.bam \
        -O $HCGVCFD/HC_data_$i.g.vcf \
        -G StandardAnnotation \
        -G AS_StandardAnnotation \
        -G StandardHCAnnotation \
        -A FisherStrand -A StrandOddsRatio -A RMSMappingQuality -A MappingQualityRankSumTest -A ReadPosRankSumTest -A DepthPerSampleHC -A BaseQualityRankSumTest -A ExcessHet -A StrandArtifact \
        --annotate-with-num-discovered-alleles=true
        echo -e '\nGATK HaplotypeCallerGVCF ERC GVCF for '$i' DONE' ;paplay /usr/share/sounds/freedesktop/stereo/complete.oga
        echo -e '\nGATK HaplotypeCallerGVCF ERC GVCF for '$i' DONE' >> registerFile
done


	#If more than one input the pipeline will continue the Joint analysis.

if [ $# -gt 1 ]
	then
       mkdir $MDAP/combined_gvcf_data
        echo 'mkdir combined_gvcf_data'>> registerFile

#	#Obtenido en GCVF pasamos al Joint Genotyping on one or more samples called with HC.
#
	echo -e '\nUsing GATK COMBINEGVCFs for merging GVCFs'
	echo -e '\nUsing GATK COMBINEGVCFs for merging GVCFs'>> registerFile
	echo -e "   ____ ___  __  __ ____ ___ _   _ _____    ______     ______ _____ ____   "
	echo -e "  / ___/ _ \|  \/  | __ )_ _| \ | | ____|  / ___\ \   / / ___|  ___/ ___|  "
	echo -e " | |  | | | | |\/| |  _ \| ||  \| |  _|   | |  _ \ \ / / |   | |_  \___ \  "
	echo -e " | |__| |_| | |  | | |_) | || |\  | |___  | |_| | \ V /| |___|  _|  ___) | "
	echo -e "  \____\___/|_|  |_|____/___|_| \_|_____|  \____|  \_/  \____|_|   |____/  "

#	#Switch to the directory containing the Haplotypecaller generated gvcfs and obtaining a list use CombineGVCFS
#	#only works like this or by separated.
       cd $HCGVCFD ; ls *.g.vcf > my_list_of_gvcfs_files_to_combine.list
#	#List obtained
	java -jar $SFT/gatk/build/libs/gatk-package-4.0.6.0-22-g9d9484f-SNAPSHOT-local.jar CombineGVCFs \
	-R $HG19/ucsc.hg19.fasta \
	--variant $HCGVCFD/my_list_of_gvcfs_files_to_combine.list \
	-O $CGVCFD/combined.g.vcf

	echo ············································································································
	echo -e "     _  ___ ___ _   _ _____ "
	echo -e "    | |/ _ \_ _| \ | |_   _|"
	echo -e " _  | | | | | ||  \| | | |  "
	echo -e "| |_| | |_| | || |\  | | |  "
	echo -e " \___/ \___/___|_| \_| |_|  "
	echo -e "                            "
	echo -e "  ____ _   _  ___ _______   ______ ___ _   _  ____ "
	echo -e " / ___| \ | |/ _ \_   _\ \ / /  _ \_ _| \ | |/ ___|"
	echo -e "| |  _|  \| | | | || |  \ V /| |_) | ||  \| | |  _ "
	echo -e "| |_| | |\  | |_| || |   | | |  __/| || |\  | |_| |"
	echo -e " \____|_| \_|\___/ |_|   |_| |_|  |___|_| \_|\____|"
	echo -e "                                                   "
	echo -e                                            '\n \tJOINT GENOTYPING (GATK)\n'
	echo ············································································································
	echo -e '\nGATK COMBINEGVCFs DONE' ;paplay /usr/share/sounds/freedesktop/stereo/complete.oga
        echo -e '\nGATK COMBINEGVCFs DONE'  >> registerFile
        mkdir $MDAP/genotyped_vcf_data
        echo 'mkdir genotyped_data_vcf'>> registerFile
        #GenotypeGVCFs into final VCF
        echo -e '\nUsing GATK GenotypeGVCFs for final VCF'
	echo -e "   ____ _____ _   _  ___ _______   ______  _____ ____   "
	echo -e "  / ___| ____| \ | |/ _ \_   _\ \ / /  _ \| ____|  _ \  "
	echo -e " | |  _|  _| |  \| | | | || |  \ V /| |_) |  _| | | | | "
	echo -e " | |_| | |___| |\  | |_| || |   | | |  __/| |___| |_| | "
	echo -e "  \____|_____|_| \_|\___/ |_|   |_| |_|   |_____|____/  "
	echo -e "                                                        "
	echo -e "   ____ ___  __  __ ____ ___ _   _ _____ ____     ______     ______ _____ ____   "
	echo -e "  / ___/ _ \|  \/  | __ )_ _| \ | | ____|  _ \   / ___\ \   / / ___|  ___/ ___|  "
	echo -e " | |  | | | | |\/| |  _ \| ||  \| |  _| | | | | | |  _ \ \ / / |   | |_  \___ \  "
	echo -e " | |__| |_| | |  | | |_) | || |\  | |___| |_| | | |_| | \ V /| |___|  _|  ___) | "
	echo -e "  \____\___/|_|  |_|____/___|_| \_|_____|____/   \____|  \_/  \____|_|   |____/  "
	echo -e "                                                                                 "
	echo -e "  __  __ _   _ _   _____ ___  "
	echo -e " |  \/  | | | | | |_   _|_ _| "
	echo -e " | |\/| | | | | |   | |  | |  "
	echo -e " | |  | | |_| | |___| |  | |  "
	echo -e " |_|  |_|\___/|_____|_| |___| "
	echo -e "                              "

	java -jar $SFT/gatk/build/libs/gatk-package-4.0.6.0-22-g9d9484f-SNAPSHOT-local.jar GenotypeGVCFs \
	-R $HG19/ucsc.hg19.fasta \
	-V $CGVCFD/combined.g.vcf \
	-O $GVCFD/genotyped_data.vcf
		bash $GENOTYPE_SCRIPT
        echo -e '\nGATK GenotypeGVCFs DONE' ;paplay /usr/share/sounds/freedesktop/stereo/complete.oga
        echo -e '\nGATK GenotypeGVCFs DONE' >> registerFile
	#if only one INPUT continue to joint analysis.
else
	mkdir $MDAP/genotyped_vcf_data
	echo 'mkdir genotyped_data_vcf'>> registerFile
	echo -e "   ____ _____ _   _  ___ _______   ______  _____ ____   "
	echo -e "  / ___| ____| \ | |/ _ \_   _\ \ / /  _ \| ____|  _ \  "
	echo -e " | |  _|  _| |  \| | | | || |  \ V /| |_) |  _| | | | | "
	echo -e " | |_| | |___| |\  | |_| || |   | | |  __/| |___| |_| | "
	echo -e "  \____|_____|_| \_|\___/ |_|   |_| |_|   |_____|____/  "
	echo -e "                                                        "
	echo -e "   ____ ___  __  __ ____ ___ _   _ _____ ____     ______     ______ _____ ____   "
	echo -e "  / ___/ _ \|  \/  | __ )_ _| \ | | ____|  _ \   / ___\ \   / / ___|  ___/ ___|  "
	echo -e " | |  | | | | |\/| |  _ \| ||  \| |  _| | | | | | |  _ \ \ / / |   | |_  \___ \  "
	echo -e " | |__| |_| | |  | | |_) | || |\  | |___| |_| | | |_| | \ V /| |___|  _|  ___) | "
	echo -e "  \____\___/|_|  |_|____/___|_| \_|_____|____/   \____|  \_/  \____|_|   |____/  "
	echo -e "                                                                                 "
	echo -e "  ____   ___  _     ___   "
	echo -e " / ___| / _ \| |   / _ \  "
	echo -e " \___ \| | | | |  | | | | "
	echo -e "  ___) | |_| | |__| |_| | "
	echo -e " |____/ \___/|_____\___/  "
	echo -e "                          "

	#GenotypeGVCFs into final VCF
	echo -e '\nUsing GATK GenotypeGVCFs for final VCF'
       java -jar $SFT/gatk/build/libs/gatk-package-4.0.6.0-22-g9d9484f-SNAPSHOT-local.jar GenotypeGVCFs \
        -R $HG19/ucsc.hg19.fasta \
        -V $HCGVCFD/HC_data_$1.g.vcf \
        -G StandardAnnotation \
        -O $GVCFD/genotyped_data.vcf
        echo -e '\nGATK GenotypeGVCFs DONE' ;paplay /usr/share/sounds/freedesktop/stereo/complete.oga
        echo -e '\nGATK GenotypeGVCFs DONE' >> registerFile
fi


echo ············································································································
echo -e " _   _    _    ____  ____    _____ ___ _   _____ _____ ____  ___ _   _  ____ "
echo -e "| | | |  / \  |  _ \|  _ \  |  ___|_ _| | |_   _| ____|  _ \|_ _| \ | |/ ___|"
echo -e "| |_| | / _ \ | |_) | | | | | |_   | || |   | | |  _| | |_) || ||  \| | |  _ "
echo -e "|  _  |/ ___ \|  _ <| |_| | |  _|  | || |___| | | |___|  _ < | || |\  | |_| |"
echo -e "|_| |_/_/   \_\_| \_\____/  |_|   |___|_____|_| |_____|_| \_\___|_| \_|\____|"
echo -e "                                                                             "
echo -e  "\n \tHard filtering if less than 30 samples and doing it in the classical way, selecting variants\n"
echo ············································································································


	#HARD FILTERING
	#First step extacting the SNP's
	#Second step extracting the INDEL's


mkdir $MDAP/variant_filtration_vcf_data
echo "variantfiltration_data_vcf ">> registerFile

	#1.Extract the SNP's from the call set.
echo "Extract the SNP's from the call set."

java -jar $SFT/gatk/build/libs/gatk-package-4.0.6.0-22-g9d9484f-SNAPSHOT-local.jar SelectVariants \
-R $HG19/ucsc.hg19.fasta \
-V $GVCFD/genotyped_data.vcf \
--select-type-to-include SNP \
-O $VFVCFD/selected_raw_snp.vcf
	#Creates the selected_raw_snps vcf containing just the SNP's from the original  callset.


	#2.Apply the filters to the SNP's callset.

java -jar $SFT/gatk/build/libs/gatk-package-4.0.6.0-22-g9d9484f-SNAPSHOT-local.jar VariantFiltration \
-R $HG19/ucsc.hg19.fasta \
-V $VFVCFD/selected_raw_snp.vcf \
--filter-expression "QD < 2.0 || FS > 60.0 || MQ < 40.0 || MQRankSum < -12.5 || ReadPosRankSum < -8.0" \
--filter-name "my_SNP_filter" \
-O $VFVCFD/filtered_SNP_data.vcf


	#3. Extract the INDELS from the ORIGINAL call set.
java -jar $SFT/gatk/build/libs/gatk-package-4.0.6.0-22-g9d9484f-SNAPSHOT-local.jar SelectVariants \
-R $HG19/ucsc.hg19.fasta \
-V $GVCFD/genotyped_data.vcf \
--select-type-to-include INDEL \
-O $VFVCFD/selected_raw_indels.vcf
	#Creates the selected_raw_indels vcf containing just the INDEL's from the original  callset.


	#4.Apply the filters to the INDEL's callset.
java -jar $SFT/gatk/build/libs/gatk-package-4.0.6.0-22-g9d9484f-SNAPSHOT-local.jar VariantFiltration \
-R $HG19/ucsc.hg19.fasta \
-V $VFVCFD/selected_raw_indels.vcf \
--filter-expression "QD < 2.0 || FS > 200.0 || ReadPosRankSum < -20.0" \
--filter-name "my_INDEL_filter" \
-O $VFVCFD/filtered_INDEL_data.vcf

	#Filtered INDELS and SNPS of the original file.

	#Combine Variants after using SNPS and INDELS filtering into a single file and get it ready for annotation.

java -jar $SFT/gatk/build/libs/gatk-package-4.0.6.0-22-g9d9484f-SNAPSHOT-local.jar MergeVcfs \
-R $HG19/ucsc.hg19.fasta \
-I $VFVCFD/filtered_SNP_data.vcf \
-I $VFVCFD/filtered_INDEL_data.vcf \
-O $VFVCFD/filtered_INDEL_SNP_data.vcf


echo ············································································································
echo -e "Filtering QUALITY,chrM,chrUn,my_SNP_filter and my_INDEL_filter out of the VCF file"
echo ············································································································

awk 'BEGIN {FS=OFS="\t"}{if ($1 ~/^#/){print $0} else if ($1 ~/^chrM.*/ || $1 ~/^chrUn.*/ || $7 ~/my_SNP_filter/ || $7 ~/my_INDEL_filter/ || $6 < 100) {print ""} else {print $0}}' $VFVCFD/filtered_INDEL_SNP_data.vcf > $VFVCFD/test; gawk 'NF > 0' $VFVCFD/test > $VFVCFD/pre_filtered_ready_to_annotate.vcf

echo ············································································································
echo -e "__     ___    ____  ___    _    _   _ _____ "
echo -e "\ \   / / \  |  _ \|_ _|  / \  | \ | |_   _|"
echo -e " \ \ / / _ \ | |_) || |  / _ \ |  \| | | |  "
echo -e "  \ V / ___ \|  _ < | | / ___ \| |\  | | |  "
echo -e "   \_/_/   \_\_| \_\___/_/   \_\_| \_| |_|  "
echo -e "                                            "
echo -e "    _    _   _ _   _  ___ _____  _  _____ ___ ___  _   _ "
echo -e "   / \  | \ | | \ | |/ _ \_   _|/ \|_   _|_ _/ _ \| \ | |"
echo -e "  / _ \ |  \| |  \| | | | || | / _ \ | |  | | | | |  \| |"
echo -e " / ___ \| |\  | |\  | |_| || |/ ___ \| |  | | |_| | |\  |"
echo -e "/_/   \_\_| \_|_| \_|\___/ |_/_/   \_\_| |___\___/|_| \_|"
echo -e "                                                         "
echo -e                                  "\n \tVARIANT ANNOTATION (VEP ENSEMBL)\n"

echo ············································································································

mkdir $MDAP/vep_vcf_annotated_data
echo ············································································································
echo -e '\nFilter by popuplation frequencies\n'
echo ············································································································
	#RUN vep
	#················································································································
perl $VEP --everything --allele_number --cache --offline --dir_plugins $PLUGIN_DIR \
--dir $VEP_CACHE --v --assembly GRCh37 --fork 12 --fasta $VEP_FASTA \
--force_overwrite --symbol --canonical --sift b --polyphen b --af_1kg --af_gnomad --af_esp --af  \
--ccds --protein --uniprot --hgvs --pubmed --biotype --regulatory --numbers --domains \
--gene_phenotype --max_af --variant_class --filter_common --force_overwrite \
--plugin LoF,human_ancestor_fa:${LOFTEE},$LOFTEE_PATH \
--plugin dbNSFP,$DBNSFP,ALL \
--vcf --fields "VARIANT_CLASS,Location,Uploaded_variation,Allele,Gene,Feature,Feature_type,BIOTYPE,SYMBOL,Consequence,CANONICAL,LoF_flags,LoF_filter,LoF,CDS_position,HGVSc,HGVSp,HGVSg,SIFT,PolyPhen,CADD_raw,FATHMM_pred,GERP++_RS,GERP++_NR,SWISSPROT,PUBMED,CLIN_SIG,FILTER,cDNA_position,EUR_AF,gnomAD_exomes_AF,gnomAD_genomes_AF,1000Gp3_AF,1000Gp3_EUR_AF,ExAC_EAS_AF" \
-i $VCF_IN -o $VCF_OUT


echo "Filtering population frequencies and Consequences out of the VCF/TSV file"
awk 'BEGIN{FS=OFS="\t"}{
		if ($1 ~/^#/)
			{
			print $0
			}
                else if (($30 < 0.0100 || $30 == " ") && ($31 < 0.0100 || $31 == " ") && ($32 < 0.0100 || $32 == " ") && ($33 < 0.0100 || $33 == " ") && ($34 < 0.0100 || $34 == " ") && ($35 < 0.0100 || $35 == " "))
                {
                        print $0
                }
}' $VCF_OUT > pre-filtered.vcf

echo -e '\n VEP prefiltered VCF file ready in the MDAP directory'
echo -e 'DONE'