-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path04-VCFFilter-Rename.sh
46 lines (32 loc) · 1.76 KB
/
04-VCFFilter-Rename.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
############################################# Head of all Scripts ####################################
# The following directories and files are expected to run for SNP calling
refDir=/work2/02786/taslima/stampede2/dbs/PV/VS16_HAP1_V1/Panicum_virgatum_var_VS16_HAP1_V1_release/Panicum_virgatum_var_VS16/sequences #directory where the reference genome file will be
ref=Panicum_virgatum_var_VS16.mainGenome.fasta # Name of reference genome file
outDir=/scratch/02786/taslima/data/PV_Reseq # output directory. It must be created before running the script
met=/work2/02786/taslima/stampede2/dbs/PV/Pvirg_48_midwest_metadata_mod.csv
TMP=/scratch/02786/taslima/data/phalli/Temp
LOG="logs"
# Sample of meta file, ignore the "#" before each line. you can use any kind of tab delim file and change Step 1 accordingly.
# load required module in TACC
ml intel/17.0.4
ml picard
ml samtools
LC_ALL=C
#################################### Step 4: VarScan ############################################
if [ -e filvcf.param ]; then rm filvcf.param; fi
if [ ! -d $LOG ]; then
echo "Log directory doesn't exist. Making $LOG"
mkdir $LOG
fi
if [ ! -d ${outDir}/VarScan/ ]; then
echo "Input directory doesn't exist! Exiting ..."
exit 1
fi
while read line
do
SAMP=`echo $line | cut -d',' -f1`
INF="${outDir}/VarScan/${SAMP}.vcf.gz"
OFIL="${outDir}/VarScan_Filter/${SAMP}.d8.vcf.gz"
OLOG="${LOG}/VCFFil_${SAMP}.log"
echo "gzip -dc $INF | awk BEGIN{'FS=OFS=\"\t\";A=\"$SAMP\"} {if(\$0 ~ /^#CHROM/) {print \"#CHROM\", \"POS\", \"ID\", \"REF\", \"ALT\", \"QUAL\", \"FILTER\", \"INFO\", \"FORMAT\", A } else if(\$0 ~ /^#/) {print} else {split(\$10,a,\":\"); \$10 = a[1]}; \$9=\"GT\"; if((a[2] + a[3]) >=8) print \$0}' | bgzip -c > $OFIL 2>$OLOG" >> filvcf2.param
done < $met