Merge pull request #65 from martinghunt/call_filtering

Call filtering
iqbal-lab-org · Mar 18, 2019 · 76d6a48 · 76d6a48
2 parents 19296b5 + 4376e4a
commit 76d6a48
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 10 deletions.
diff --git a/minos/adjudicator.py b/minos/adjudicator.py
@@ -217,6 +217,8 @@ def _run_gramtools_not_split_vcf(self):
             filtered_outfile=self.final_vcf
         )
 
+        logging.info(f'Adding GT_CONF_PERCENTLE to debug VCF file {self.unfiltered_vcf_file}, using mean depth {mean_depth}, depth variance {depth_variance}, error rate {self.read_error_rate}, and {self.genotype_simulation_iterations} simulation iterations')
+        Adjudicator._add_gt_conf_percentile_to_vcf_file(self.unfiltered_vcf_file, mean_depth, depth_variance, self.read_error_rate, self.genotype_simulation_iterations)
         logging.info(f'Adding GT_CONF_PERCENTLE to final VCF file {self.final_vcf}, using mean depth {mean_depth}, depth variance {depth_variance}, error rate {self.read_error_rate}, and {self.genotype_simulation_iterations} simulation iterations')
         Adjudicator._add_gt_conf_percentile_to_vcf_file(self.final_vcf, mean_depth, depth_variance, self.read_error_rate, self.genotype_simulation_iterations)
 
@@ -332,6 +334,8 @@ def _run_gramtools_with_split_vcf(self):
 
         mean_depth = statistics.mean(mean_depths)
         depth_variance = statistics.mean(depth_variances)
+        logging.info(f'Adding GT_CONF_PERCENTLE to debug VCF file {self.unfiltered_vcf_file}, using mean depth {mean_depth}, depth variance {depth_variance}, error rate {self.read_error_rate}, and {self.genotype_simulation_iterations} simulation iterations')
+        Adjudicator._add_gt_conf_percentile_to_vcf_file(self.unfiltered_vcf_file, mean_depth, depth_variance, self.read_error_rate, self.genotype_simulation_iterations)
         logging.info(f'Adding GT_CONF_PERCENTLE to final VCF file {self.final_vcf}, using mean depth {mean_depth}, depth variance {depth_variance}, error rate {self.read_error_rate}, and {self.genotype_simulation_iterations} simulation iterations')
         Adjudicator._add_gt_conf_percentile_to_vcf_file(self.final_vcf, mean_depth, depth_variance, self.read_error_rate, self.genotype_simulation_iterations)
 

diff --git a/minos/tests/data/multi_sample_pipeline/run.out.vcf b/minos/tests/data/multi_sample_pipeline/run.out.vcf
@@ -1,15 +1,16 @@
 ##fileformat=VCFv4.2
-##source=minos, version 0.4.1
-##fileDate=2018-05-18
+##source=minos, version 0.5.1
+##fileDate=2019-03-18
 ##FORMAT=<ID=COV,Number=R,Type=Integer,Description="Number of reads on ref and alt alleles">
 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="total kmer depth from gramtools",Source="minos">
 ##FORMAT=<ID=GT_CONF,Number=1,Type=Float,Description="Genotype confidence. Difference in log likelihood of most likely and next most likely genotype">
+##FORMAT=<ID=GT_CONF_PERCENTILE,Number=1,Type=Float,Description="Percentile of GT_CONF"
 ##INFO=<ID=KMER,Number=1,Type=Integer,Description="Kmer size at which variant was discovered (kmer-size used by gramtools build)">
 ##minos_max_read_length=100
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	sample.1	sample.2
-ref.0	75	.	A	G	.	.	KMER=5	DP:GT:COV:GT_CONF	9:0/0:9,0:118.13	10:1/1:0,10:116.09
-ref.0	150	.	G	A,T	.	.	KMER=5	DP:GT:COV:GT_CONF	17:1/1:0,17,0:195.37	19:2/2:0,0,19:198.0
-ref.0	450	.	T	C	.	.	KMER=5	DP:GT:COV:GT_CONF	26:1/1:0,26:277.8	24:0/0:24,0:241.53
-ref.0	610	.	A	G	.	.	KMER=5	DP:GT:COV:GT_CONF	31:1/1:0,31:335.16	0:./.:0,0:0.0
-ref.0	800	.	C	CA	.	.	KMER=5	DP:GT:COV:GT_CONF	22:1/1:0,22:252.45	23:0/0:23,0:254.85
+ref.0	75	.	A	G	.	.	KMER=5	DP:GT:COV:GT_CONF:GT_CONF_PERCENTILE	9:0/0:9,0:118.49:5.49	10:1/1:0,10:119.41:35.47
+ref.0	150	.	G	A,T	.	.	KMER=5	DP:GT:COV:GT_CONF:GT_CONF_PERCENTILE	17:1/1:0,17,0:195.83:35.48	19:2/2:0,0,19:202.56:71.03
+ref.0	450	.	T	C	.	.	KMER=5	DP:GT:COV:GT_CONF:GT_CONF_PERCENTILE	26:1/1:0,26:278.36:75.24	24:0/0:24,0:246.77:83.28
+ref.0	610	.	A	G	.	.	KMER=5	DP:GT:COV:GT_CONF:GT_CONF_PERCENTILE	31:1/1:0,31:322.81:87.93	0:./.:0,0:0.0
+ref.0	800	.	C	CA	.	.	KMER=5	DP:GT:COV:GT_CONF:GT_CONF_PERCENTILE	22:1/1:0,22:242.14:59.44	23:0/0:23,0:238.02:81.18
diff --git a/minos/tests/multi_sample_pipeline_test.py b/minos/tests/multi_sample_pipeline_test.py
@@ -172,7 +172,7 @@ def test_run_no_small_var_vcf_chunking(self):
 
 
     def test_run_with_small_var_vcf_chunking_vars_per_split(self):
-        '''test run with chunking small variatn VCF file using variants_per_split option'''
+        '''test run with chunking small variant VCF file using variants_per_split option'''
         input_tsv = 'tmp.multi_sample_pipeline.run.in.tsv'
         ref_fasta = os.path.join(data_dir, 'run.ref.0.fa')
         with open(input_tsv, 'w') as f:
@@ -205,7 +205,7 @@ def test_run_with_small_var_vcf_chunking_vars_per_split(self):
 
 
     def test_run_with_small_var_vcf_chunking_total_splits(self):
-        '''test run with chunking small variatn VCF file using total_splits option'''
+        '''test run with chunking small variant VCF file using total_splits option'''
         input_tsv = 'tmp.multi_sample_pipeline.run.in.tsv'
         ref_fasta = os.path.join(data_dir, 'run.ref.0.fa')
         with open(input_tsv, 'w') as f:

diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
 
 setup(
     name='bio-minos',
-    version='0.5.0',
+    version='0.5.1',
     description='Variant call adjudication',
     packages = find_packages(),
     author='Martin Hunt',