From 87535aeeffbf09397342548fcc1f4dd0177f18d5 Mon Sep 17 00:00:00 2001 From: David Jones Date: Tue, 1 Mar 2022 09:58:26 +0000 Subject: [PATCH 1/3] Add confidence calls to merged VCF --- python/casmsmartphase/MNVMerge.py | 55 ++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/python/casmsmartphase/MNVMerge.py b/python/casmsmartphase/MNVMerge.py index c7c2968..f64e7e9 100644 --- a/python/casmsmartphase/MNVMerge.py +++ b/python/casmsmartphase/MNVMerge.py @@ -58,7 +58,6 @@ def parse_homs_bed_to_dict(bed_file: str) -> Dict: while line: line = line.rstrip() split_line = line.split("\t") - print(split_line) hom = False if len(split_line) == 3 else True if hom: bed_entry = (int(split_line[1]) + 1, int(split_line[2]), hom) @@ -116,8 +115,8 @@ def parse_sphase_output( line = line.rstrip() try: (mnv_id, pair1, pair2, flag, confidence) = re.split(r"\s+", line, 5) - (_id_contig, id_start_region, _id_stop) = mnv_id.split("-") - print(_id_contig, id_start_region, _id_stop) + # (_id_contig, id_start_region, _id_stop) = mnv_id.split("-") + # print(_id_contig, id_start_region, _id_stop) if float(confidence) < cutoff or int(flag) & exclude_flags: continue (contig, startpos, _tmp) = pair1.split("-", maxsplit=2) @@ -134,18 +133,18 @@ def parse_sphase_output( # Check if startpos in mnv_id is already stored and see if these are adjacent to an already recorded MNV # Find key for mnv that adjoins this one key = None - for k, v in mnvs[contig].items(): - if v == startpos: + for k, (end_pos, conf_score_list) in mnvs[contig].items(): + if end_pos == startpos: key = k break # Check if current end_pos is adjacent, if so, extend this MNV - mnvs[contig][key] = endpos + mnvs[contig][key] = (endpos, conf_score_list.append(confidence)) mnv_len = (endpos - key) + 1 if max_len < mnv_len: max_len = mnv_len else: # Otherwise this is a new MNV - mnvs[contig][startpos] = endpos + mnvs[contig][startpos] = (endpos, [confidence]) mnv_len = (endpos - startpos) + 1 if max_len < mnv_len: max_len = mnv_len @@ -168,8 +167,8 @@ def parse_sphase_output( for store in hom_bed_parsed[contig]: # (start, stop, hom) (start, stop, _hom) = store - mnvs[contig][start] = stop mnv_len = (stop - start) + 1 + mnvs[contig][start] = (stop, [1.0] * mnv_len) if max_len < mnv_len: max_len = mnv_len mnvs[contig] = { @@ -242,7 +241,6 @@ def generate_new_increment_header( line with the key and description updates to include said incremental int """ - print(f"headerline {n}") new_line = existing_line.copy() new_line.mapping["ID"] = new_line.mapping["ID"] + f"_{n}" new_line.mapping["Description"] = ( @@ -271,12 +269,31 @@ def parse_header_add_merge_and_process( for i in range(1, max_len + 1): lines_to_add.append(self.generate_new_increment_header(form_line, i)) + lines_to_add.append(vcfpy.InfoHeaderLine("SPCONF")) for new_head_line in lines_to_add: writer_header.add_line(new_head_line) + # Add confidence score info headerline + ##INFO= + writer_header.add_info_line( + vcfpy.OrderedDict( + [ + ("ID", "SPCONF"), + ("Number", "."), + ("Type", "String"), + ( + "Description", + "Smart-Phase confidence scores of the form score_MNV_base_1,score_MNV_base_2 etc.", + ), + ] + ) + ) + return writer_header - def merge_snv_to_mnv(self, snv_list: List[vcfpy.Record]) -> vcfpy.Record: + def merge_snv_to_mnv( + self, snv_list: List[vcfpy.Record], conf_calls: List + ) -> vcfpy.Record: """ Merge snvs from list into a single variant and output to VCF """ @@ -305,7 +322,7 @@ def merge_snv_to_mnv(self, snv_list: List[vcfpy.Record]) -> vcfpy.Record: id = [] # list of SNV IDs? ref = "" alt_str = "" - info = {} + info = {"SPCONF": ",".join(map(str, conf_calls))} format = [] # Setup new call objects calls_dict = {} @@ -393,24 +410,30 @@ def perform_mnv_merge_to_vcf(self): start_pos_mnv = 0 start_contig_mnv = "" in_mnv = False + mnv_end_pos = 0 + mnv_conf_calls = None for variant in reader: if variant.CHROM in mnvs: # Start position in an mnv if int(variant.POS) in mnvs[variant.CHROM]: in_mnv = True + (mnv_end_pos, mnv_conf_calls) = mnvs[variant.CHROM][ + int(variant.POS) + ] start_pos_mnv = int(variant.POS) start_contig_mnv = variant.CHROM snvs.append(variant) # In an MNV and waiting for finish - elif ( - in_mnv and int(variant.POS) <= mnvs[start_contig_mnv][start_pos_mnv] - ): + elif in_mnv and int(variant.POS) <= mnv_end_pos: snvs.append(variant) - if int(variant.POS) == mnvs[start_contig_mnv][start_pos_mnv]: - mnv_rec = self.merge_snv_to_mnv(snvs) + if int(variant.POS) == mnv_end_pos: + mnv_rec = self.merge_snv_to_mnv(snvs, mnv_conf_calls) writer.write_record(mnv_rec) snvs.clear() in_mnv = False + mnv_end_pos = 0 + mnv_conf_calls = None + else: writer.write_record(variant) else: From 419790ca5484de543f5b01911ebf51352a446525 Mon Sep 17 00:00:00 2001 From: David Jones Date: Tue, 1 Mar 2022 13:51:58 +0000 Subject: [PATCH 2/3] Update tests initial changes --- python/casmsmartphase/MNVMerge.py | 8 ++- python/tests/01_MNVMerge_test.py | 94 +++++++++++++++++++++++++++++-- 2 files changed, 93 insertions(+), 9 deletions(-) diff --git a/python/casmsmartphase/MNVMerge.py b/python/casmsmartphase/MNVMerge.py index f64e7e9..41e035d 100644 --- a/python/casmsmartphase/MNVMerge.py +++ b/python/casmsmartphase/MNVMerge.py @@ -138,7 +138,9 @@ def parse_sphase_output( key = k break # Check if current end_pos is adjacent, if so, extend this MNV + del (mnvs[contig])[key] mnvs[contig][key] = (endpos, conf_score_list.append(confidence)) + print(mnvs) mnv_len = (endpos - key) + 1 if max_len < mnv_len: max_len = mnv_len @@ -168,7 +170,7 @@ def parse_sphase_output( # (start, stop, hom) (start, stop, _hom) = store mnv_len = (stop - start) + 1 - mnvs[contig][start] = (stop, [1.0] * mnv_len) + mnvs[contig][start] = (stop, ["1.0"] * mnv_len) if max_len < mnv_len: max_len = mnv_len mnvs[contig] = { @@ -274,7 +276,7 @@ def parse_header_add_merge_and_process( writer_header.add_line(new_head_line) # Add confidence score info headerline - ##INFO= + ##INFO= writer_header.add_info_line( vcfpy.OrderedDict( [ @@ -283,7 +285,7 @@ def parse_header_add_merge_and_process( ("Type", "String"), ( "Description", - "Smart-Phase confidence scores of the form score_MNV_base_1,score_MNV_base_2 etc.", + "Smart-Phase confidence scores of the form score_MNV_base_1to2,score_MNV_base_2to3 etc.", ), ] ) diff --git a/python/tests/01_MNVMerge_test.py b/python/tests/01_MNVMerge_test.py index a1d22cd..0a71d56 100644 --- a/python/tests/01_MNVMerge_test.py +++ b/python/tests/01_MNVMerge_test.py @@ -157,24 +157,101 @@ def test_get_last_vcf_process_index(in_head, key_prefix, exp_idx): @pytest.mark.parametrize( "sphaseout,cutoff,exclude_flags,hom_dict,exp_result", + # SPOUT = "test_data/sample.phased.output" + # SPOUT_TRINUC = "test_data/sample.phased.trinuc.output" + # BAD_SPOUT = "test_data/bad_sample.phased.output" + # SPOUT_EXCEPT = "test_data/sample.phased.except.output" + # BED_INPUT_HOM = "test_data/expected_output_hethom.bed" + # BED_INPUT_NOHOM = "test_data/expected_output.bed" + # SPOUT_TRINUC_2 = "test_data/test_phase_triplet.out" [ - (SPOUT, 0.0, 2, {}, ({"chr1": {1627262: 1627263}}, 2)), + ( + SPOUT, + 0.0, + 2, + {}, + ( + {"chr1": {1627262: (1627263, ["0.1999724805754193"])}}, + 2, + ), + ), (SPOUT, 0.1, 1, {}, ({}, 1)), - (SPOUT_EXCEPT, 0.0, 2, {}, ({"chr1": {1627262: 1627263}}, 2)), - (SPOUT_TRINUC, 0.0, 2, {}, ({"chr12": {9420710: 9420713}}, 4)), + ( + SPOUT_EXCEPT, + 0.0, + 2, + {}, + ({"chr1": {1627262: (1627263, ["0.1999724805754193"])}}, 2), + ), + ( + SPOUT_TRINUC, + 0.0, + 2, + {}, + ( + { + "chr12": { + 9420710: ( + 9420713, + [ + "0.39992018950740127", + "0.15993615797563573", + "0.15993615797563573", + ], + ) + } + }, + 4, + ), + ), ( SPOUT_TRINUC, 0.0, 2, {"chr1": [(1627262, 1627263, "hom")]}, - ({"chr1": {1627262: 1627263}, "chr12": {9420710: 9420713}}, 4), + ( + { + "chr1": { + 1627262: ( + 1627263, + ["1.0", "1.0", "1.0", "1.0", "1.0", "1.0", "1.0", "1.0"], + ) + }, + "chr12": { + 9420710: ( + 9420713, + [ + "0.39992018950740127", + "0.15993615797563573", + "0.15993615797563573", + ], + ) + }, + }, + 4, + ), ), ( SPOUT_TRINUC, 0.0, 2, {"chr1": [(1627262, 1627269, "hom")]}, - ({"chr1": {1627262: 1627269}, "chr12": {9420710: 9420713}}, 8), + ( + { + "chr1": {1627262: 1627269}, + "chr12": { + 9420710: ( + 9420713, + [ + "0.39992018950740127", + "0.15993615797563573", + "0.15993615797563573", + ], + ) + }, + }, + 8, + ), ), ( SPOUT_TRINUC_2, @@ -183,7 +260,12 @@ def test_get_last_vcf_process_index(in_head, key_prefix, exp_idx): {}, ( { - "chr17": {42760364: 42760366}, + "chr17": { + 42760364: ( + 42760366, + ["0.27489430479697324", "0.06933530763570786"], + ) + }, }, 3, ), From 2724de1d1c8368326a1598c256d8cf011f03df19 Mon Sep 17 00:00:00 2001 From: David Jones Date: Thu, 3 Mar 2022 11:10:25 +0000 Subject: [PATCH 3/3] Update tests inline with MNV confidence score addition --- python/casmsmartphase/MNVMerge.py | 23 +++++++++---------- python/test_data/test_exp_result.vcf | 3 ++- python/test_data/test_exp_result_trinuc.vcf | 3 ++- .../test_data/test_filt_qual_exp_result.vcf | 3 ++- python/tests/01_MNVMerge_test.py | 9 ++++++-- 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/python/casmsmartphase/MNVMerge.py b/python/casmsmartphase/MNVMerge.py index 41e035d..43d4ac5 100644 --- a/python/casmsmartphase/MNVMerge.py +++ b/python/casmsmartphase/MNVMerge.py @@ -115,8 +115,6 @@ def parse_sphase_output( line = line.rstrip() try: (mnv_id, pair1, pair2, flag, confidence) = re.split(r"\s+", line, 5) - # (_id_contig, id_start_region, _id_stop) = mnv_id.split("-") - # print(_id_contig, id_start_region, _id_stop) if float(confidence) < cutoff or int(flag) & exclude_flags: continue (contig, startpos, _tmp) = pair1.split("-", maxsplit=2) @@ -129,7 +127,7 @@ def parse_sphase_output( if not contig in mnvs: mnvs[contig] = {} # Check for adjacent MNV - if startpos in mnvs[contig].values(): + if startpos in [t[0] for t in mnvs[contig].values()]: # Check if startpos in mnv_id is already stored and see if these are adjacent to an already recorded MNV # Find key for mnv that adjoins this one key = None @@ -138,15 +136,17 @@ def parse_sphase_output( key = k break # Check if current end_pos is adjacent, if so, extend this MNV - del (mnvs[contig])[key] - mnvs[contig][key] = (endpos, conf_score_list.append(confidence)) - print(mnvs) + (end_pos, conf_score_list) = mnvs[contig].pop(key) + conf_score_list.append(confidence) + mnvs[contig][key] = (endpos, conf_score_list) mnv_len = (endpos - key) + 1 if max_len < mnv_len: max_len = mnv_len else: # Otherwise this is a new MNV - mnvs[contig][startpos] = (endpos, [confidence]) + conf_list = list() + conf_list.append(confidence) + mnvs[contig][startpos] = (endpos, conf_list) mnv_len = (endpos - startpos) + 1 if max_len < mnv_len: max_len = mnv_len @@ -170,7 +170,7 @@ def parse_sphase_output( # (start, stop, hom) (start, stop, _hom) = store mnv_len = (stop - start) + 1 - mnvs[contig][start] = (stop, ["1.0"] * mnv_len) + mnvs[contig][start] = (stop, ["1.0"] * (mnv_len - 1)) if max_len < mnv_len: max_len = mnv_len mnvs[contig] = { @@ -271,7 +271,6 @@ def parse_header_add_merge_and_process( for i in range(1, max_len + 1): lines_to_add.append(self.generate_new_increment_header(form_line, i)) - lines_to_add.append(vcfpy.InfoHeaderLine("SPCONF")) for new_head_line in lines_to_add: writer_header.add_line(new_head_line) @@ -281,7 +280,7 @@ def parse_header_add_merge_and_process( vcfpy.OrderedDict( [ ("ID", "SPCONF"), - ("Number", "."), + ("Number", "1"), ("Type", "String"), ( "Description", @@ -294,7 +293,7 @@ def parse_header_add_merge_and_process( return writer_header def merge_snv_to_mnv( - self, snv_list: List[vcfpy.Record], conf_calls: List + self, snv_list: List[vcfpy.Record], conf_calls: List[str] ) -> vcfpy.Record: """ Merge snvs from list into a single variant and output to VCF @@ -324,7 +323,7 @@ def merge_snv_to_mnv( id = [] # list of SNV IDs? ref = "" alt_str = "" - info = {"SPCONF": ",".join(map(str, conf_calls))} + info = {"SPCONF": str(",".join(conf_calls))} format = [] # Setup new call objects calls_dict = {} diff --git a/python/test_data/test_exp_result.vcf b/python/test_data/test_exp_result.vcf index f29084b..2324b5f 100644 --- a/python/test_data/test_exp_result.vcf +++ b/python/test_data/test_exp_result.vcf @@ -3426,11 +3426,12 @@ ##FORMAT= ##FORMAT= ##FORMAT= +##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOUR chr1 1291220 8adb6b0a-a8ac-11eb-a08f-58fa0906d930 G A . . DP=105;MP=1.0;GP=2.6e-18;TG=GG/AGGGG;TP=0.99;SG=GG/AAGGG;SP=0.0071 GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:32:0:0:0:32:0:0.0 0|1:3:0:23:0:3:0:12:0:0.15 chr1 1321114 8adb6fb0-a8ac-11eb-a08f-58fa0906d930 G A . . DP=201;MP=1.0;GP=3.7e-39;TG=GG/AGGGG;TP=1.0;SG=GG/AAGGG;SP=0.00062 GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:6:1:0:0:129:0:0.0 0|1:1:0:5:0:9:0:50:0:0.15 chr1 1324702 8adb7104-a8ac-11eb-a08f-58fa0906d930 G A . . DP=179;MP=1.0;GP=7.9e-29;TG=GG/AGGGG;TP=0.99;SG=GG/AAGGG;SP=0.0089 GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:19:0:2:0:96:0:0.017 0|1:4:0:13:0:8:0:37:0:0.19 chr1 1341593 8adb7244-a8ac-11eb-a08f-58fa0906d930 G C . . DP=304;MP=1.0;GP=6.4e-17;TG=GG/CGGGG;TP=1.0;SG=GG/CCGGG;SP=1.2e-06 GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:5:157:0:0:7:42:0:0.057 0|1:0:6:57:0:0:5:25:0:0.12 chr1 1341778 8adb737a-a8ac-11eb-a08f-58fa0906d930 C T . . DP=143;MP=1.0;GP=2.6e-27;TG=CC/CCCCT;TP=1.0;SG=CC/CCCTT;SP=0.003 GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:17:0:0:0:78:0:0:0.0 0|1:0:8:0:2:0:33:0:5:0.15 -chr1 1627262 8adb74a6-a8ac-11eb-a08f-58fa0906d930;8adb75c8-a8ac-11eb-a08f-58fa0906d930 GG AA . . DP_1=266;MP_1=1.0;GP_1=4.6e-51;TG_1=GG/AGGGG;TP_1=1.0;SG_1=GG/AAGGG;SP_1=9.4e-07;DP_2=271;MP_2=1.0;GP_2=7.1e-49;TG_2=GG/AGGGG;TP_2=1.0;SG_2=GG/AAGGG;SP_2=1.6e-06 GT_1:FAZ_1:FCZ_1:FGZ_1:FTZ_1:RAZ_1:RCZ_1:RGZ_1:RTZ_1:PM_1:GT_2:FAZ_2:FCZ_2:FGZ_2:FTZ_2:RAZ_2:RCZ_2:RGZ_2:RTZ_2:PM_2 0|0:0:0:88:0:0:0:88:0:0.0:0|0:0:1:88:0:1:0:88:0:0.0056 0|1:6:0:40:0:4:0:40:0:0.11:0|1:6:0:41:0:5:1:40:0:0.12 +chr1 1627262 8adb74a6-a8ac-11eb-a08f-58fa0906d930;8adb75c8-a8ac-11eb-a08f-58fa0906d930 GG AA . . SPCONF=0.1999724805754193;DP_1=266;MP_1=1.0;GP_1=4.6e-51;TG_1=GG/AGGGG;TP_1=1.0;SG_1=GG/AAGGG;SP_1=9.4e-07;DP_2=271;MP_2=1.0;GP_2=7.1e-49;TG_2=GG/AGGGG;TP_2=1.0;SG_2=GG/AAGGG;SP_2=1.6e-06 GT_1:FAZ_1:FCZ_1:FGZ_1:FTZ_1:RAZ_1:RCZ_1:RGZ_1:RTZ_1:PM_1:GT_2:FAZ_2:FCZ_2:FGZ_2:FTZ_2:RAZ_2:RCZ_2:RGZ_2:RTZ_2:PM_2 0|0:0:0:88:0:0:0:88:0:0.0:0|0:0:1:88:0:1:0:88:0:0.0056 0|1:6:0:40:0:4:0:40:0:0.11:0|1:6:0:41:0:5:1:40:0:0.12 chr1 1866692 8adb76ea-a8ac-11eb-a08f-58fa0906d930 C T . . DP=25;MP=1.0;GP=0.00015;TG=CC/CCTTT;TP=0.44;SG=CC/CCCTT;SP=0.36 GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:9:0:0:0:8:0:0:0.0 0|1:0:2:0:0:0:2:0:4:0.5 diff --git a/python/test_data/test_exp_result_trinuc.vcf b/python/test_data/test_exp_result_trinuc.vcf index 6a57881..bcc4185 100644 --- a/python/test_data/test_exp_result_trinuc.vcf +++ b/python/test_data/test_exp_result_trinuc.vcf @@ -3462,5 +3462,6 @@ ##FORMAT= ##FORMAT= ##FORMAT= +##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOUR -chr12 9420710 8bfa48aa-37d4-11ec-b209-ac832c775300;8bfa48fa-37d4-11ec-b209-ac832c775300;8bfa495e-37d4-11ec-b209-ac832c775300;8bfa49a4-37d4-11ec-b209-ac832c775300 GGAG CCCC . . DP_1=60;MP_1=1;GP_1=5e-15;TG_1=GG/CCGGG;TP_1=0.44;SG_1=GG/CCCGG;SP_1=0.34;DP_2=62;MP_2=1;GP_2=1.3e-15;TG_2=GG/CCGGG;TP_2=0.45;SG_2=GG/CCCGG;SP_2=0.32;DP_3=62;MP_3=1;GP_3=1.3e-15;TG_3=AA/AAACC;TP_3=0.45;SG_3=AA/AACCC;SP_3=0.32;DP_4=63;MP_4=1;GP_4=6.8e-16;TG_4=GG/CCGGG;TP_4=0.45;SG_4=GG/CCCGG;SP_4=0.32 GT_1:FAZ_1:FCZ_1:FGZ_1:FTZ_1:RAZ_1:RCZ_1:RGZ_1:RTZ_1:PM_1:GT_2:FAZ_2:FCZ_2:FGZ_2:FTZ_2:RAZ_2:RCZ_2:RGZ_2:RTZ_2:PM_2:GT_3:FAZ_3:FCZ_3:FGZ_3:FTZ_3:RAZ_3:RCZ_3:RGZ_3:RTZ_3:PM_3:GT_4:FAZ_4:FCZ_4:FGZ_4:FTZ_4:RAZ_4:RCZ_4:RGZ_4:RTZ_4:PM_4 0|0:0:0:29:0:0:0:24:0:0:0|0:0:0:31:0:0:0:24:0:0:0|0:31:0:0:0:24:0:0:0:0:0|0:0:0:32:0:0:0:24:0:0 0|1:0:0:2:0:1:3:1:0:0.43:0|1:0:0:2:0:0:3:2:0:0.43:0|1:2:0:0:0:2:3:0:0:0.43:0|1:0:0:2:0:0:3:2:0:0.43 +chr12 9420710 8bfa48aa-37d4-11ec-b209-ac832c775300;8bfa48fa-37d4-11ec-b209-ac832c775300;8bfa495e-37d4-11ec-b209-ac832c775300;8bfa49a4-37d4-11ec-b209-ac832c775300 GGAG CCCC . . SPCONF=0.39992018950740127,0.15993615797563573,0.15993615797563573;DP_1=60;MP_1=1;GP_1=5e-15;TG_1=GG/CCGGG;TP_1=0.44;SG_1=GG/CCCGG;SP_1=0.34;DP_2=62;MP_2=1;GP_2=1.3e-15;TG_2=GG/CCGGG;TP_2=0.45;SG_2=GG/CCCGG;SP_2=0.32;DP_3=62;MP_3=1;GP_3=1.3e-15;TG_3=AA/AAACC;TP_3=0.45;SG_3=AA/AACCC;SP_3=0.32;DP_4=63;MP_4=1;GP_4=6.8e-16;TG_4=GG/CCGGG;TP_4=0.45;SG_4=GG/CCCGG;SP_4=0.32 GT_1:FAZ_1:FCZ_1:FGZ_1:FTZ_1:RAZ_1:RCZ_1:RGZ_1:RTZ_1:PM_1:GT_2:FAZ_2:FCZ_2:FGZ_2:FTZ_2:RAZ_2:RCZ_2:RGZ_2:RTZ_2:PM_2:GT_3:FAZ_3:FCZ_3:FGZ_3:FTZ_3:RAZ_3:RCZ_3:RGZ_3:RTZ_3:PM_3:GT_4:FAZ_4:FCZ_4:FGZ_4:FTZ_4:RAZ_4:RCZ_4:RGZ_4:RTZ_4:PM_4 0|0:0:0:29:0:0:0:24:0:0:0|0:0:0:31:0:0:0:24:0:0:0|0:31:0:0:0:24:0:0:0:0:0|0:0:0:32:0:0:0:24:0:0 0|1:0:0:2:0:1:3:1:0:0.43:0|1:0:0:2:0:0:3:2:0:0.43:0|1:2:0:0:0:2:3:0:0:0.43:0|1:0:0:2:0:0:3:2:0:0.43 diff --git a/python/test_data/test_filt_qual_exp_result.vcf b/python/test_data/test_filt_qual_exp_result.vcf index 6923c40..f42a3fe 100644 --- a/python/test_data/test_filt_qual_exp_result.vcf +++ b/python/test_data/test_filt_qual_exp_result.vcf @@ -3427,11 +3427,12 @@ ##FORMAT= ##FORMAT= ##FORMAT= +##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOUR chr1 1291220 8adb6b0a-a8ac-11eb-a08f-58fa0906d930 G A . . DP=105;MP=1.0;GP=2.6e-18;TG=GG/AGGGG;TP=0.99;SG=GG/AAGGG;SP=0.0071 GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:32:0:0:0:32:0:0.0 0|1:3:0:23:0:3:0:12:0:0.15 chr1 1321114 8adb6fb0-a8ac-11eb-a08f-58fa0906d930 G A . . DP=201;MP=1.0;GP=3.7e-39;TG=GG/AGGGG;TP=1.0;SG=GG/AAGGG;SP=0.00062 GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:6:1:0:0:129:0:0.0 0|1:1:0:5:0:9:0:50:0:0.15 chr1 1324702 8adb7104-a8ac-11eb-a08f-58fa0906d930 G A . . DP=179;MP=1.0;GP=7.9e-29;TG=GG/AGGGG;TP=0.99;SG=GG/AAGGG;SP=0.0089 GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:19:0:2:0:96:0:0.017 0|1:4:0:13:0:8:0:37:0:0.19 chr1 1341593 8adb7244-a8ac-11eb-a08f-58fa0906d930 G C . . DP=304;MP=1.0;GP=6.4e-17;TG=GG/CGGGG;TP=1.0;SG=GG/CCGGG;SP=1.2e-06 GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:5:157:0:0:7:42:0:0.057 0|1:0:6:57:0:0:5:25:0:0.12 chr1 1341778 8adb737a-a8ac-11eb-a08f-58fa0906d930 C T . . DP=143;MP=1.0;GP=2.6e-27;TG=CC/CCCCT;TP=1.0;SG=CC/CCCTT;SP=0.003 GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:17:0:0:0:78:0:0:0.0 0|1:0:8:0:2:0:33:0:5:0.15 -chr1 1627262 8adb74a6-a8ac-11eb-a08f-58fa0906d930;8adb75c8-a8ac-11eb-a08f-58fa0906d930 GG AA 5 MNP DP_1=266;MP_1=1.0;GP_1=4.6e-51;TG_1=GG/AGGGG;TP_1=1.0;SG_1=GG/AAGGG;SP_1=9.4e-07;DP_2=271;MP_2=1.0;GP_2=7.1e-49;TG_2=GG/AGGGG;TP_2=1.0;SG_2=GG/AAGGG;SP_2=1.6e-06 GT_1:FAZ_1:FCZ_1:FGZ_1:FTZ_1:RAZ_1:RCZ_1:RGZ_1:RTZ_1:PM_1:GT_2:FAZ_2:FCZ_2:FGZ_2:FTZ_2:RAZ_2:RCZ_2:RGZ_2:RTZ_2:PM_2 0|0:0:0:88:0:0:0:88:0:0.0:0|0:0:1:88:0:1:0:88:0:0.0056 0|1:6:0:40:0:4:0:40:0:0.11:0|1:6:0:41:0:5:1:40:0:0.12 +chr1 1627262 8adb74a6-a8ac-11eb-a08f-58fa0906d930;8adb75c8-a8ac-11eb-a08f-58fa0906d930 GG AA 5 MNP SPCONF=0.1999724805754193;DP_1=266;MP_1=1.0;GP_1=4.6e-51;TG_1=GG/AGGGG;TP_1=1.0;SG_1=GG/AAGGG;SP_1=9.4e-07;DP_2=271;MP_2=1.0;GP_2=7.1e-49;TG_2=GG/AGGGG;TP_2=1.0;SG_2=GG/AAGGG;SP_2=1.6e-06 GT_1:FAZ_1:FCZ_1:FGZ_1:FTZ_1:RAZ_1:RCZ_1:RGZ_1:RTZ_1:PM_1:GT_2:FAZ_2:FCZ_2:FGZ_2:FTZ_2:RAZ_2:RCZ_2:RGZ_2:RTZ_2:PM_2 0|0:0:0:88:0:0:0:88:0:0.0:0|0:0:1:88:0:1:0:88:0:0.0056 0|1:6:0:40:0:4:0:40:0:0.11:0|1:6:0:41:0:5:1:40:0:0.12 chr1 1866692 8adb76ea-a8ac-11eb-a08f-58fa0906d930 C T . . DP=25;MP=1.0;GP=0.00015;TG=CC/CCTTT;TP=0.44;SG=CC/CCCTT;SP=0.36 GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:9:0:0:0:8:0:0:0.0 0|1:0:2:0:0:0:2:0:4:0.5 diff --git a/python/tests/01_MNVMerge_test.py b/python/tests/01_MNVMerge_test.py index 0a71d56..f8480d9 100644 --- a/python/tests/01_MNVMerge_test.py +++ b/python/tests/01_MNVMerge_test.py @@ -214,7 +214,7 @@ def test_get_last_vcf_process_index(in_head, key_prefix, exp_idx): "chr1": { 1627262: ( 1627263, - ["1.0", "1.0", "1.0", "1.0", "1.0", "1.0", "1.0", "1.0"], + ["1.0"], ) }, "chr12": { @@ -238,7 +238,12 @@ def test_get_last_vcf_process_index(in_head, key_prefix, exp_idx): {"chr1": [(1627262, 1627269, "hom")]}, ( { - "chr1": {1627262: 1627269}, + "chr1": { + 1627262: ( + 1627269, + ["1.0", "1.0", "1.0", "1.0", "1.0", "1.0", "1.0"], + ) + }, "chr12": { 9420710: ( 9420713,