From f7478a9c1977004c2885a1df267ebb5450817e2b Mon Sep 17 00:00:00 2001
From: Ahsan <umairahsan57@gmail.com>
Date: Thu, 20 Jul 2023 17:22:22 -0400
Subject: [PATCH] allelic depth with strand info, short ONT read mode, SNP
 quality filter for phasing

---
 NanoCaller                             | 13 ++++++---
 nanocaller_src/generate_SNP_pileups.py | 38 ++++++++++++++++++++++--
 nanocaller_src/indelCaller.py          | 10 +++++--
 nanocaller_src/snpCaller.py            | 40 ++++++++++++++++++--------
 4 files changed, 80 insertions(+), 21 deletions(-)

diff --git a/NanoCaller b/NanoCaller
index 7c13c51..b461b1e 100755
--- a/NanoCaller
+++ b/NanoCaller
@@ -30,7 +30,8 @@ def run(args):
              'threshold':threshold, 'snp_model':args.snp_model, 'cpu':args.cpu,  \
                  'vcf_path':args.output, 'prefix':args.prefix, 'sample':args.sample, \
                  'seq':args.sequencing, 'supplementary':args.supplementary, \
-                 'exclude_bed':args.exclude_bed, 'suppress_progress':args.suppress_progress_bar}
+                 'exclude_bed':args.exclude_bed, 'suppress_progress':args.suppress_progress_bar, 
+                 'phase_qual_score':args.phase_qual_score}
         
         snp_vcf=snpCaller.call_manager(in_dict)
         print('\n%s: SNP calling completed. Time taken= %.4f\n' %(str(datetime.datetime.now()), time.time()-snp_time),flush=True)
@@ -47,7 +48,8 @@ def run(args):
                  'seq':args.sequencing, 'del_t':args.del_threshold, 'ins_t':args.ins_threshold,\
                  'impute_indel_phase':args.impute_indel_phase, 'supplementary':args.supplementary,\
                  'exclude_bed':args.exclude_bed, 'win_size':args.win_size, 'small_win_size':args.small_win_size,
-                 'enable_whatshap':args.enable_whatshap, 'suppress_progress':args.suppress_progress_bar}
+                 'enable_whatshap':args.enable_whatshap, 'suppress_progress':args.suppress_progress_bar,
+                 'phase_qual_score':args.phase_qual_score}
         
         files=indelCaller.call_manager(in_dict)
         print('\n%s: %s completed. Time taken= %.4f\n' %(str(datetime.datetime.now()), 'Phasing' if args.mode=='snps' else 'Indel calling', time.time()-indel_phase_time),flush=True)
@@ -62,6 +64,8 @@ if __name__ == '__main__':
     
     preset_dict={'ont':{'sequencing':'ont', 'snp_model':'ONT-HG002', 'indel_model':'ONT-HG002', 'neighbor_threshold':'0.4,0.6', 'ins_threshold':0.4,'del_threshold':0.6, 'enable_whatshap':False, 'impute_indel_phase':False},
                  
+                 'short_ont':{'sequencing':'short_ont', 'snp_model':'ONT-HG002', 'indel_model':'ONT-HG002', 'neighbor_threshold':'0.3,0.7', 'ins_threshold':0.4,'del_threshold':0.6, 'enable_whatshap':False, 'impute_indel_phase':False},
+                 
                 'ul_ont': {'sequencing':'ul_ont', 'snp_model':'ONT-HG002', 'indel_model':'ONT-HG002', 'neighbor_threshold':'0.4,0.6', 'ins_threshold':0.4,'del_threshold':0.6, 'enable_whatshap':False, 'impute_indel_phase':False},
                  
                 'ul_ont_extreme':{'sequencing':'ul_ont_extreme', 'snp_model':'ONT-HG002', 'indel_model':'ONT-HG002', 'neighbor_threshold':'0.4,0.6', 'ins_threshold':0.4,'del_threshold':0.6, 'enable_whatshap':False, 'impute_indel_phase':False},
@@ -89,7 +93,7 @@ if __name__ == '__main__':
     phase_group=parser.add_argument_group("Phasing")
     
     config_group.add_argument("--mode",  help="NanoCaller mode to run. 'snps' mode quits NanoCaller without using WhatsHap for phasing. In this mode, if you want NanoCaller to phase SNPs and BAM files, use --phase argument additionally.", type=str, default='all', choices=['snps', 'indels', 'all'])
-    config_group.add_argument("--sequencing",  help="Sequencing type.  'ont' works well for any type of ONT sequencing datasets. However, use 'ul_ont' if you have several ultra-long ONT reads up to 100kbp long, and 'ul_ont_extreme' if you have several ultra-long ONT reads up to 300kbp long. For PacBio CCS (HiFi) and CLR reads, use 'pacbio'.", type=str, default='ont', choices=['ont', 'ul_ont', 'ul_ont_extreme', 'pacbio'])
+    config_group.add_argument("--sequencing",  help="Sequencing type.  'ont' works well for any type of ONT sequencing datasets. However, use 'ul_ont' if you have several ultra-long ONT reads up to 100kbp long, and 'ul_ont_extreme' if you have several ultra-long ONT reads up to 300kbp long. If you have short ONT reads, typically within 2-10kbp, use 'short_ont'. For PacBio CCS (HiFi) and CLR reads, use 'pacbio'.", type=str, default='ont', choices=['short_ont','ont', 'ul_ont', 'ul_ont_extreme', 'pacbio'])
 
     config_group.add_argument("--cpu",  help="Number of CPUs to use.", type=int, default=1)
     
@@ -141,8 +145,9 @@ if __name__ == '__main__':
     
     #phasing
     phase_group.add_argument('--phase', help='Phase SNPs and BAM files if snps mode is selected.', default=False, action='store_true')
+    phase_group.add_argument('--phase_qual_score', help='SNP QUAL score cutoff for phasing via Whatshap. SNPS with QUAL score lower than this will not be used for infering haplotype and will also not be phased.', type=float, default=10)
     
-    phase_group.add_argument("--enable_whatshap",  help="Allow WhatsHap to change SNP genotypes when phasing using --distrust-genotypes and --include-homozygous flags (this is not the same as regenotyping), increasing the time needed for phasing. It has a negligible effect on SNP calling accuracy for Nanopore reads, but may make a small improvement for PacBio reads. By default WhatsHap will only phase SNP calls produced by NanoCaller, but not change their genotypes.",  default=False, action='store_true')
+    phase_group.add_argument("--enable_whatshap",  help="Allow WhatsHap to change SNP genotypes when phasing using --distrust-genotypes and --include-homozygous flags (this is not the same as regenotyping), increasing the time needed for phasing. It has a negligible effect on SNP calling accuracy for Nanopore reads, but may make a small improvement for PacBio reads or low coverage ONT samples. By default WhatsHap will only phase SNP calls produced by NanoCaller, but not change their genotypes.",  default=False, action='store_true')
     
     args = parser.parse_args()
     
diff --git a/nanocaller_src/generate_SNP_pileups.py b/nanocaller_src/generate_SNP_pileups.py
index c757671..0420270 100644
--- a/nanocaller_src/generate_SNP_pileups.py
+++ b/nanocaller_src/generate_SNP_pileups.py
@@ -21,7 +21,23 @@ def get_cnd_pos(v_pos,cnd_pos, seq='ont'):
 
         ls_total_1=sorted(ls1_0+ls1_1+ls1_2+ls1_3+ls1_4)
         ls_total_2=sorted(ls2_0+ls2_1+ls2_2+ls2_3+ls2_4)
+    
+    elif seq=='short_ont':
+        ls=cnd_pos[abs(cnd_pos-v_pos)<50000] 
+
+        ls1_0= [p for p in ls if (p>=v_pos-2000) &  (p<v_pos)][-5:]
+        ls1_1= [p for p in ls if (p>=v_pos-5000) &  (p<v_pos-2000)][-10:]
+        ls1_2= [p for p in ls if                    (p<v_pos-5000)][-5:]
+        
 
+        ls2_0= [p for p in ls if (p>v_pos) & (p<=v_pos+2000)][:5]
+        ls2_1= [p for p in ls if (p>v_pos+2000) & (p<=v_pos+5000)][:10]
+        ls2_2= [p for p in ls if (p>v_pos+5000)][:5]
+        
+
+        ls_total_1=sorted(ls1_0+ls1_1+ls1_2)
+        ls_total_2=sorted(ls2_0+ls2_1+ls2_2)
+        
     elif seq=='ul_ont':
         ls=cnd_pos[abs(cnd_pos-v_pos)<100000] 
 
@@ -120,6 +136,12 @@ def ex_bed(tree, pos):
 
     ref_dict={j:s.upper() if s in 'AGTC' else '*' for j,s in zip(range(max(1,start-50000),end+50000+1),fastafile.fetch(chrom,max(1,start-50000)-1,end+50000)) }
     
+    strand_dict={}
+    
+    for pread in samfile.fetch(chrom, max(0, start-10), end + 10):
+        if pread.flag & 0x900==0:
+            strand_dict[pread.qname]=(pread.flag & 2320)//16
+            
     pileup_dict={}
     
     nbr_sites=[]
@@ -170,7 +192,9 @@ def ex_bed(tree, pos):
     pos_list=output.keys()
     current_depth=[]
     depth=0
-    output_pos,output_ref,output_mat,output_dp,output_freq=[],[],[],[],[]
+    
+    
+    output_pos,output_ref,output_mat,output_dp,output_freq,output_fwd_dp, output_rev_dp=[],[],[],[],[],[],[]
     if pos_list:
 
         for v_pos in pos_list:
@@ -183,6 +207,11 @@ def ex_bed(tree, pos):
             
             sample=list(pileup_dict[v_pos].keys())
             
+            bases=np.eye(5)[np.array(list(pileup_dict[v_pos].values()))][:,:4]
+            strand_info=np.array([strand_dict[rname] for rname in pileup_dict[v_pos].keys()]).astype(bool)
+            fwd_bases=np.sum(bases[~strand_info],axis=0)
+            rev_bases=np.sum(bases[strand_info],axis=0)
+
             if len(sample) > dct['maxcov']:
                 sample=random.sample(sample,min(len(sample), dct['maxcov']))
 
@@ -229,6 +258,8 @@ def ex_bed(tree, pos):
             output_mat.append(data)
             output_dp.append(output[v_pos][0])
             output_freq.append(output[v_pos][1])
+            output_fwd_dp.append(fwd_bases)
+            output_rev_dp.append(rev_bases)
             current_depth.append(len(sample))
             
         if len(output_pos)>0:
@@ -241,5 +272,8 @@ def ex_bed(tree, pos):
             output_dp=np.array(output_dp)
             output_freq=np.array(output_freq)
             depth=np.mean(current_depth)
+            
+            output_fwd_dp=np.array(output_fwd_dp)
+            output_rev_dp=np.array(output_rev_dp)
     
-    return (output_pos,output_ref,output_mat,output_dp,output_freq,depth)
\ No newline at end of file
+    return (output_pos,output_ref,output_mat,output_dp,output_freq,depth, output_fwd_dp, output_rev_dp)
\ No newline at end of file
diff --git a/nanocaller_src/indelCaller.py b/nanocaller_src/indelCaller.py
index a4fd654..570cb8b 100644
--- a/nanocaller_src/indelCaller.py
+++ b/nanocaller_src/indelCaller.py
@@ -215,17 +215,21 @@ def phase_run(contig_dict, params, indel_dict, job_Q, counter_Q, phased_snp_file
         start, end = contig_dict['start'], contig_dict['end']
         phase_dir = params['intermediate_phase_files_dir']
         input_snp_vcf = params['snp_vcf']
+        phase_qual_score=params['phase_qual_score']
         input_contig_vcf = os.path.join(phase_dir, '%s.snps.unphased.vcf' %contig)
+        lowq_input_contig_vcf= os.path.join(phase_dir, '%s.snps.lowq.unphased.vcf.gz' %contig)
         raw_output_contig_vcf = os.path.join(phase_dir, '%s.snps.phased.raw.vcf' %contig)
         output_contig_vcf = os.path.join(phase_dir, '%s.snps.phased.vcf.gz' %contig)
         phased_bam = os.path.join(phase_dir, '%s.phased.bam' %contig)
 
         phased_snp_files_list.append(output_contig_vcf)
+        phased_snp_files_list.append(lowq_input_contig_vcf)
 
         enable_whatshap = '--distrust-genotypes --include-homozygous' if params['enable_whatshap'] else ''
-
+        
         # extract region from VCF
-        run_cmd('bcftools view %s -r %s -o %s' %(input_snp_vcf, contig, input_contig_vcf))
+        run_cmd('bcftools view %s -r %s -i "QUAL>=%.4f"  -o %s' %(input_snp_vcf, contig, phase_qual_score,input_contig_vcf))
+        run_cmd('bcftools view %s -r %s -i "QUAL<%.4f"|bgziptabix  %s' %(input_snp_vcf, contig, phase_qual_score,lowq_input_contig_vcf))
 
         #phase VCF
         run_cmd("whatshap phase %s %s -o %s -r %s --ignore-read-groups --chromosome %s %s" %(input_contig_vcf, params['sam_path'], raw_output_contig_vcf, params['fasta_path'], contig , enable_whatshap))
@@ -346,7 +350,7 @@ def call_manager(params):
         output_files['snps']=os.path.join(params['vcf_path'],'%s.snps.phased.vcf.gz' %params['prefix'])
         
         phased_snps_files='\n'.join(phased_snp_files_list)
-        cmd='bcftools concat -f - |bgziptabix %s' %(output_files['snps'])
+        cmd='bcftools concat -a -f - |bgziptabix %s' %(output_files['snps'])
         stream=Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE)
         stdout, stderr = stream.communicate(input=phased_snps_files.encode())
         
diff --git a/nanocaller_src/snpCaller.py b/nanocaller_src/snpCaller.py
index ec94c9d..6851181 100644
--- a/nanocaller_src/snpCaller.py
+++ b/nanocaller_src/snpCaller.py
@@ -83,7 +83,7 @@ def caller(params, chunks_Q, counter_Q, snp_files):
         while not chunks_Q.empty():
             chunk = chunks_Q.get(block=False)
             chrom=chunk['chrom']
-            pos, test_ref, x_test, dp, freq, coverage = get_snp_testing_candidates(params, chunk)
+            pos, test_ref, x_test, dp, freq, coverage, fwd_dp, rev_dp = get_snp_testing_candidates(params, chunk)
             
             if len(pos)!=0:                
                 if chunk['ploidy']=='diploid':
@@ -95,6 +95,9 @@ def caller(params, chunks_Q, counter_Q, snp_files):
                     for batch in range(int(np.ceil(len(x_test)/batch_size))):
                         batch_freq=freq[batch*batch_size:min((batch+1)*batch_size,len(freq))]
                         batch_dp=dp[batch*batch_size:min((batch+1)*batch_size,len(dp))]
+                        batch_fwd_dp=fwd_dp[batch*batch_size:min((batch+1)*batch_size,len(fwd_dp))]
+                        batch_rev_dp=rev_dp[batch*batch_size:min((batch+1)*batch_size,len(rev_dp))]
+                        
                         batch_pos = pos[batch*batch_size:min((batch+1)*batch_size,len(pos))]
                         batch_x = x_test[batch*batch_size:min((batch+1)*batch_size,len(x_test))]
 
@@ -118,34 +121,42 @@ def caller(params, chunks_Q, counter_Q, snp_files):
                         sort_probs=np.sort(batch_probs,axis=1)
 
                         for j in range(len(batch_pred_GT)):
-                            info_field='PR='+','.join("{:.4f}".format(x) for x in batch_probs[j,[0,3,1,2]])
+                            info_field='PR='+','.join("{:.4f}".format(x) for x in batch_probs[j,[0,3,1,2]])+ ";FQ={:.4f}".format(batch_freq[j])
+                            ref_dp=(batch_fwd_dp[j][batch_ref[j]], batch_rev_dp[j][batch_ref[j]])
+                            
                             if batch_pred_GT[j]>=2: # if het
                                     pred1,pred2=batch_pred[j,-1],batch_pred[j,-2]
                                     if pred1==batch_ref[j]:
-                                                s='%s\t%d\t.\t%s\t%s\t%.3f\t%s\t%s\tGT:DP:FQ\t%s:%d:%.4f\n' %(chrom, batch_pos[j], num_to_base_map[batch_ref[j]], num_to_base_map[pred2], min(99,-10*np.log10(1e-10+ 1-batch_probs[j,pred2])),'PASS',info_field, '0/1', batch_dp[j], batch_freq[j])
-                                                f.write(s)
+                                        alt_dp=(batch_fwd_dp[j][pred2], batch_rev_dp[j][pred2])
+                                        
+                                        s='%s\t%d\t.\t%s\t%s\t%.3f\t%s\t%s\tGT:DP:VF:AD:ADF:ADR\t%s:%d:%.4f:%d,%d:%d,%d:%d,%d\n' %(chrom, batch_pos[j], num_to_base_map[batch_ref[j]], num_to_base_map[pred2], min(99,-10*np.log10(1e-10+ 1-batch_probs[j,pred2])),'PASS',info_field, '0/1', batch_dp[j], sum(alt_dp)/batch_dp[j], sum(ref_dp), sum(alt_dp), ref_dp[0], alt_dp[0], ref_dp[1], alt_dp[1])
+                                        f.write(s)
 
                                     elif pred2==batch_ref[j] and batch_probs[j,pred2]>=0.5:
-                                        s='%s\t%d\t.\t%s\t%s\t%.3f\t%s\t%s\tGT:DP:FQ\t%s:%d:%.4f\n' %(chrom,batch_pos[j], num_to_base_map[batch_ref[j]], num_to_base_map[pred1], min(99,-10*np.log10(1e-10+ 1-batch_probs[j,pred2])),'PASS',info_field, '1/0', batch_dp[j], batch_freq[j])
+                                        alt_dp=(batch_fwd_dp[j][pred1], batch_rev_dp[j][pred1])
+                                        s='%s\t%d\t.\t%s\t%s\t%.3f\t%s\t%s\tGT:DP:VF:AD:ADF:ADR\t%s:%d:%.4f:%d,%d:%d,%d:%d,%d\n' %(chrom,batch_pos[j], num_to_base_map[batch_ref[j]], num_to_base_map[pred1], min(99,-10*np.log10(1e-10+ 1-batch_probs[j,pred2])),'PASS',info_field, '0/1', batch_dp[j], sum(alt_dp)/batch_dp[j], sum(ref_dp), sum(alt_dp), ref_dp[0], alt_dp[0], ref_dp[1], alt_dp[1])
                                         f.write(s)
 
                                     elif pred2!=batch_ref[j] and pred1!=batch_ref[j] and batch_probs[j,pred2]>=0.5:
-                                        s='%s\t%d\t.\t%s\t%s,%s\t%.3f\t%s\t%s\tGT:DP:FQ\t%s:%d:%.4f\n' %\
-                            (chrom,batch_pos[j],num_to_base_map[batch_ref[j]],num_to_base_map[pred1],num_to_base_map[pred2],min(99,-10*np.log10(1e-10+ 1-batch_probs[j,pred2])),'PASS',info_field,'1/2', batch_dp[j], batch_freq[j])
+                                        alt1_dp=(batch_fwd_dp[j][pred1], batch_rev_dp[j][pred1])
+                                        alt2_dp=(batch_fwd_dp[j][pred2], batch_rev_dp[j][pred2])
+                                        s='%s\t%d\t.\t%s\t%s,%s\t%.3f\t%s\t%s\tGT:DP:VF:AD:ADF:ADR\t%s:%d:%.4f,%.4f:%d,%d,%d:%d,%d,%d:%d,%d,%d\n' %\
+                            (chrom,batch_pos[j],num_to_base_map[batch_ref[j]],num_to_base_map[pred1],num_to_base_map[pred2],min(99,-10*np.log10(1e-10+ 1-batch_probs[j,pred2])),'PASS',info_field,'1/2', batch_dp[j], sum(alt1_dp)/batch_dp[j],sum(alt2_dp)/batch_dp[j], sum(ref_dp), sum(alt1_dp), sum(alt2_dp), ref_dp[0], alt1_dp[0], alt2_dp[0], ref_dp[1], alt1_dp[1], alt2_dp[1])
                                         f.write(s)
 
                             elif batch_pred_GT[j]==1 and batch_ref[j]!=batch_pred[j,-1] and batch_probs[j,batch_pred[j,-1]]>=0.5:
                                 pred1=batch_pred[j,-1]
-                                s='%s\t%d\t.\t%s\t%s\t%.3f\t%s\t%s\tGT:DP:FQ\t%s:%d:%.4f\n' %(chrom, batch_pos[j], num_to_base_map[batch_ref[j]], num_to_base_map[pred1], min(99,-10*np.log10(1e-10+ 1-batch_probs[j,pred1])),'PASS',info_field,'1/1', batch_dp[j], batch_freq[j])
+                                alt_dp=(batch_fwd_dp[j][pred1], batch_rev_dp[j][pred1])
+                                s='%s\t%d\t.\t%s\t%s\t%.3f\t%s\t%s\tGT:DP:VF:AD:ADF:ADR\t%s:%d:%.4f:%d,%d:%d,%d:%d,%d\n' %(chrom, batch_pos[j], num_to_base_map[batch_ref[j]], num_to_base_map[pred1], min(99,-10*np.log10(1e-10+ 1-batch_probs[j,pred1])),'PASS',info_field,'1/1', batch_dp[j], sum(alt_dp)/batch_dp[j], sum(ref_dp), sum(alt_dp), ref_dp[0], alt_dp[0], ref_dp[1], alt_dp[1])
                                 f.write(s)
                                 
                             else:
                                 if batch_pred_GT[j]==1 and batch_ref[j]==batch_pred[j,-1]:
                                     pred1=batch_pred[j,-1]
-                                    s='%s\t%d\t.\t%s\t%s\t%.3f\t%s\t%s\tGT:DP:FQ\t%s:%d:%.4f\n' %(chrom, batch_pos[j], num_to_base_map[batch_ref[j]], '.', min(99,-10*np.log10(1e-10+ 1-batch_probs[j,pred1])),'REF',info_field,'./.', batch_dp[j], batch_freq[j])
+                                    s='%s\t%d\t.\t%s\t%s\t%.3f\t%s\t%s\tGT:DP:VF:AD:ADF:ADR\t%s:%d:.:.:.:.\n' %(chrom, batch_pos[j], num_to_base_map[batch_ref[j]], '.', min(99,-10*np.log10(1e-10+ 1-batch_probs[j,pred1])),'REF',info_field,'./.', batch_dp[j])
                                     f.write(s)
                                 else:
-                                    s='%s\t%d\t.\t%s\t%s\t%.3f\t%s\t%s\tGT:DP:FQ\t%s:%d:%.4f\n' %(chrom, batch_pos[j], num_to_base_map[batch_ref[j]], '.', 0,'LOW',info_field,'./.', batch_dp[j], batch_freq[j])
+                                    s='%s\t%d\t.\t%s\t%s\t%.3f\t%s\t%s\tGT:DP:VF:AD:ADF:ADR\t%s:%d:.:.:.:.\n' %(chrom, batch_pos[j], num_to_base_map[batch_ref[j]], '.', 0,'LOW',info_field,'./.', batch_dp[j])
                                     f.write(s)
                                     
 
@@ -170,7 +181,8 @@ def caller(params, chunks_Q, counter_Q, snp_files):
                         for j in range(len(batch_pred)):
                             pred=batch_pred[j]
                             if pred!=batch_ref[j]:
-                                s='%s\t%d\t.\t%s\t%s\t%.3f\t%s\t.\tGT:DP:FQ\t%s:%d:%.4f\n' %(chrom, batch_pos[j], num_to_base_map[batch_ref[j]], num_to_base_map[pred], min(999,-100*np.log10(1e-10+ 1-batch_probs[j,pred])),'PASS','1/1', batch_dp[j], batch_freq[j])
+                                info_field='PR='+','.join("{:.4f}".format(x) for x in batch_probs[j,[0,3,1,2]])+ ";FQ={:.4f}".format(batch_freq[j])
+                                s='%s\t%d\t.\t%s\t%s\t%.3f\t%s\t%s\tGT:DP:VF:AD:ADF:ADR\t%s:%d:%.4f:.:.:.\n' %(chrom, batch_pos[j], num_to_base_map[batch_ref[j]], num_to_base_map[pred], min(999,-100*np.log10(1e-10+ 1-batch_probs[j,pred])),'PASS',info_field,'1/1', batch_dp[j], batch_freq[j])
                                 f.write(s)
 
                 f.flush()
@@ -242,8 +254,12 @@ def call_manager(params):
 
         outfile.write(b'##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n')
         outfile.write(b'##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth">\n')
-        outfile.write(b'##FORMAT=<ID=FQ,Number=1,Type=Float,Description="Alternative Allele Frequency">\n')
+        outfile.write(b'##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n')
+        outfile.write(b'##FORMAT=<ID=ADF,Number=R,Type=Integer,Description="Allelic depths on forward strand for the ref and alt alleles in the order listed">\n')
+        outfile.write(b'##FORMAT=<ID=ADR,Number=R,Type=Integer,Description="Allelic depths on reverse strand for the ref and alt alleles in the order listed">\n')
+        outfile.write(b'##FORMAT=<ID=VF,Number=A,Type=Float,Description="Alternative allele frequency in the order listed">\n')
         outfile.write(b'##INFO=<ID=PR,Number=4,Type=Float,Description="Probability of presence of alleles A, C, G and T, in the given order. Probability of each base is out of 1, independent of each other.">\n')
+        outfile.write(b'##INFO=<ID=FQ,Number=1,Type=Float,Description="Maximum frequency of non-reference base.">\n')
         
         outfile.write(b'#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n' %bytes(params['sample'].encode('utf-8')))