diff --git a/intact/intact.py b/intact/intact.py index d95268a..b8df09a 100644 --- a/intact/intact.py +++ b/intact/intact.py @@ -110,6 +110,9 @@ class HolisticInfo: blast_matched_qlen: int = dataclasses.field(default=None) # number of query nucleotides matched to a known reference sequence blast_sseq_coverage: float = dataclasses.field(default=None) # percentage of reference sequence covered by the query sequence blast_qseq_coverage: float = dataclasses.field(default=None) # percentage of the query sequence covered by reference sequence + blast_sseq_orfs_coverage: float = dataclasses.field(default=None) # percentage of the query sequence covered by reference sequence + orfs_start: int = dataclasses.field(default=None) # start position of the region used for orfs coverage + orfs_end: int = dataclasses.field(default=None) # end position of the region used for orfs coverage blast_n_conseqs: int = dataclasses.field(default=None) # number of blast conseqs in the resulting match @@ -867,6 +870,13 @@ def intact( working_dir, blast_matched_slen = blast_rows[0].slen if blast_rows else 1 holistic.blast_sseq_coverage = aligned_reference_length / blast_matched_slen + holistic.orfs_start = min(forward_orfs, key=lambda e: e.start).start + holistic.orfs_end = max(forward_orfs, key=lambda e: e.end).end + clamp = lambda p: max(holistic.orfs_start, min(holistic.orfs_end, p)) + aligned_reference_orfs_length = sum(abs(clamp(x.send + 1) - clamp(x.sstart)) for x in blast_rows) + blast_matched_orfs_slen = holistic.orfs_end - holistic.orfs_start + holistic.blast_sseq_orfs_coverage = aligned_reference_orfs_length / blast_matched_orfs_slen + reverse_sequence = SeqRecord.SeqRecord(Seq.reverse_complement(sequence.seq), id = sequence.id + " [REVERSED]", name = sequence.name diff --git a/tests/expected-results-large-csv/holistic.csv b/tests/expected-results-large-csv/holistic.csv index 383c944..fc0954a 100644 --- a/tests/expected-results-large-csv/holistic.csv +++ b/tests/expected-results-large-csv/holistic.csv @@ -1,42 +1,42 @@ -seqid,qlen,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_n_conseqs -KX505501.1,1997,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,1997,0.2498199403230785,1.2158237356034052,4 -MN691959,9493,0.19593905853945925,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9493,1.0816956477003807,1.1086063415148004,3 -MN692074,4178,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,4178,0.5041670953801831,1.1728099569171853,4 -MN692145,9689,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1304660973351168,1.1271545051088863,3 -MN090335,9069,0.1754017863888554,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9069,0.9842576396748637,1.0603153600176425,3 -MN090376,8985,0.026007919521734202,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,8985,0.9784957300133759,1.0604340567612687,3 -MK115581.1,9495,0.6897199265079494,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9495,0.9870357032616525,1.0046340179041602,2 -MK115690.1,9689,0.05065930954004094,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,0.988167506945159,0.9949427185468056,2 -MK115571.1,9394,0.8012585672082311,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9394,0.9866241382858318,1.0113902490951672,2 -MK115514.1,9382,0.6458974386368621,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9382,0.9864183557979216,1.0173736943082499,2 -MK115488.1,9623,0.6511896911074662,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9623,1.0255170285008746,1.0325262392185388,6 -MK115030.1,9126,0.031598631869680704,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9126,0.9940323078506019,1.0655270655270654,3 -MK115498.1,9461,0.8339748776671196,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9461,0.9866241382858318,1.0080329774865235,2 -MK115211.1,9032,0.11689558806708,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9032,0.9950612202901533,1.0598981399468557,3 -MK115158.1,9143,0.002572269807584293,Ref.47_BF.ES.08.P1942.GQ372987,9143,0.9921577414295317,0.9699223449633599,1 -MK114705.1,9411,0.14449377496074622,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9411,1.0980553554892478,1.122622463075125,6 -MK114856.1,9477,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9477,1.0485646671468258,1.0812493405085997,4 -MK115009.1,9207,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9207,0.9965016977055252,1.0590854784403172,3 -MK115387.1,9136,0.5412311092694289,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9136,0.9796275336968824,1.040936952714536,2 -MK115491.1,9422,0.8951015182445495,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9422,1.003704084782385,1.0299299511780937,2 -MK116110.1,8967,0.07021438897893317,Ref.B.TH.90.BK132.AY173951,8967,0.9957759004001778,0.9972119995539199,3 -MK115527.1,9481,0.7689834393883834,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9481,0.986727029529787,1.0056956017297753,2 -MK114997.1,9055,0.054959132555391754,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9055,0.9784957300133759,1.0516841524019878,2 -MK115518.1,9537,0.6385326595592609,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9537,0.9847720958946393,0.9996854356715948,3 -MK115065.1,9214,0.033517722768753644,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9214,1.0080255170285009,1.069459518124593,6 -MK115464.1,9663,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9663,0.9838460746990431,0.9893407844354756,2 -MK115530.1,9544,0.5789377103398377,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9544,0.9866241382858318,0.9992665549036044,2 -MK115520.1,9589,0.5200353682902832,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9589,0.9787015125012861,0.987902805297737,3 -MK115503.1,9617,0.4263025132504157,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9617,0.9870357032616525,0.9953207861079338,2 -MK115570.1,9485,0.738578434638724,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9485,0.9866241382858318,1.0057986294148655,2 -MK115509.1,9353,0.7866198309713798,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9353,0.9866241382858318,1.0197797498128942,2 -MK115702.1,9098,0.14401391767451666,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9098,0.987447268237473,1.0596834469114091,4 -MK115095.1,9137,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9137,0.9907397880440375,1.060085367188355,2 -MK115490.1,9347,0.8863248655310947,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9347,0.9848749871385946,1.0204343639670483,3 -MK115576.1,9266,0.818189227062389,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9266,0.9917687004835889,1.0342110943233327,3 -OQ092466,9686,0.3876036547663967,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9686,1.1015536577837226,1.1192442700805285,3 -OQ092463,9605,0.21628713708846803,Ref.B.TH.90.BK132.AY173951,9605,1.0529124055135617,0.9884435190005205,2 -OQ092465,9659,0.02412789935966586,Ref.28_BF.BR.99.BREPM12817.DQ085874,9659,1.0429696287964005,0.9620043482762191,2 -OQ092462,9714,0.10777665573070194,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9714,1.130671879823027,1.1301214741610048,3 -OQ092464,9556,0.006887768010151674,Ref.28_BF.BR.99.BREPM12817.DQ085874,9556,1.0427446569178853,0.9678735872750105,2 -OQ092467,9936,0.6416537859942263,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9936,1.1308776623109373,1.0962157809983897,3 +seqid,qlen,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_sseq_orfs_coverage,orfs_start,orfs_end,blast_n_conseqs +KX505501.1,1997,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,1997,0.2498199403230785,1.2158237356034052,0.17661753684736448,789,8795,4 +MN691959,9493,0.19593905853945925,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9493,1.0816956477003807,1.1086063415148004,1.0,789,8795,3 +MN692074,4178,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,4178,0.5041670953801831,1.1728099569171853,0.41144141893579816,789,8795,4 +MN692145,9689,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1304660973351168,1.1271545051088863,1.0,789,8795,3 +MN090335,9069,0.1754017863888554,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9069,0.9842576396748637,1.0603153600176425,1.0003747189607795,789,8795,3 +MN090376,8985,0.026007919521734202,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,8985,0.9784957300133759,1.0604340567612687,0.9943792155883088,789,8795,3 +MK115581.1,9495,0.6897199265079494,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9495,0.9870357032616525,1.0046340179041602,1.0,789,8795,2 +MK115690.1,9689,0.05065930954004094,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,0.988167506945159,0.9949427185468056,1.0,789,8795,2 +MK115571.1,9394,0.8012585672082311,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9394,0.9866241382858318,1.0113902490951672,1.0,789,8795,2 +MK115514.1,9382,0.6458974386368621,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9382,0.9864183557979216,1.0173736943082499,1.0,789,8795,2 +MK115488.1,9623,0.6511896911074662,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9623,1.0255170285008746,1.0325262392185388,1.0,789,8795,6 +MK115030.1,9126,0.031598631869680704,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9126,0.9940323078506019,1.0655270655270654,1.0,789,8795,3 +MK115498.1,9461,0.8339748776671196,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9461,0.9866241382858318,1.0080329774865235,1.0,789,8795,2 +MK115211.1,9032,0.11689558806708,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9032,0.9950612202901533,1.0598981399468557,1.0,789,8795,3 +MK115158.1,9143,0.002572269807584293,Ref.47_BF.ES.08.P1942.GQ372987,9143,0.9921577414295317,0.9699223449633599,1.0,234,8212,1 +MK114705.1,9411,0.14449377496074622,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9411,1.0980553554892478,1.122622463075125,1.0,789,8795,6 +MK114856.1,9477,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9477,1.0485646671468258,1.0812493405085997,1.0,789,8795,4 +MK115009.1,9207,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9207,0.9965016977055252,1.0590854784403172,1.0,789,8795,3 +MK115387.1,9136,0.5412311092694289,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9136,0.9796275336968824,1.040936952714536,1.0,789,8795,2 +MK115491.1,9422,0.8951015182445495,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9422,1.003704084782385,1.0299299511780937,1.0,789,8795,2 +MK116110.1,8967,0.07021438897893317,Ref.B.TH.90.BK132.AY173951,8967,0.9957759004001778,0.9972119995539199,0.998622244488978,140,8124,3 +MK115527.1,9481,0.7689834393883834,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9481,0.986727029529787,1.0056956017297753,1.0,789,8795,2 +MK114997.1,9055,0.054959132555391754,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9055,0.9784957300133759,1.0516841524019878,1.0,789,8795,2 +MK115518.1,9537,0.6385326595592609,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9537,0.9847720958946393,0.9996854356715948,1.0,789,8795,3 +MK115065.1,9214,0.033517722768753644,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9214,1.0080255170285009,1.069459518124593,1.0,789,8795,6 +MK115464.1,9663,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9663,0.9838460746990431,0.9893407844354756,1.0,789,8795,2 +MK115530.1,9544,0.5789377103398377,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9544,0.9866241382858318,0.9992665549036044,1.0,789,8795,2 +MK115520.1,9589,0.5200353682902832,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9589,0.9787015125012861,0.987902805297737,1.012115913065201,789,8795,3 +MK115503.1,9617,0.4263025132504157,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9617,0.9870357032616525,0.9953207861079338,1.0,789,8795,2 +MK115570.1,9485,0.738578434638724,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9485,0.9866241382858318,1.0057986294148655,1.0,789,8795,2 +MK115509.1,9353,0.7866198309713798,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9353,0.9866241382858318,1.0197797498128942,1.0,789,8795,2 +MK115702.1,9098,0.14401391767451666,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9098,0.987447268237473,1.0596834469114091,1.019860104921309,789,8795,4 +MK115095.1,9137,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9137,0.9907397880440375,1.060085367188355,1.0,789,8795,2 +MK115490.1,9347,0.8863248655310947,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9347,0.9848749871385946,1.0204343639670483,1.0,789,8795,3 +MK115576.1,9266,0.818189227062389,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9266,0.9917687004835889,1.0342110943233327,1.0,789,8795,3 +OQ092466,9686,0.3876036547663967,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9686,1.1015536577837226,1.1192442700805285,1.0,789,8795,3 +OQ092463,9605,0.21628713708846803,Ref.B.TH.90.BK132.AY173951,9605,1.0529124055135617,0.9884435190005205,1.0,140,8124,2 +OQ092465,9659,0.02412789935966586,Ref.28_BF.BR.99.BREPM12817.DQ085874,9659,1.0429696287964005,0.9620043482762191,1.0,200,8208,2 +OQ092462,9714,0.10777665573070194,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9714,1.130671879823027,1.1301214741610048,1.0,789,8795,3 +OQ092464,9556,0.006887768010151674,Ref.28_BF.BR.99.BREPM12817.DQ085874,9556,1.0427446569178853,0.9678735872750105,1.0,200,8208,2 +OQ092467,9936,0.6416537859942263,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9936,1.1308776623109373,1.0962157809983897,1.0,789,8795,3 diff --git a/tests/expected-results-large/holistic.json b/tests/expected-results-large/holistic.json index f98371a..a84c4dc 100644 --- a/tests/expected-results-large/holistic.json +++ b/tests/expected-results-large/holistic.json @@ -6,6 +6,9 @@ "blast_matched_qlen": 1997, "blast_sseq_coverage": 0.2498199403230785, "blast_qseq_coverage": 1.2158237356034052, + "blast_sseq_orfs_coverage": 0.17661753684736448, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 4 }, "MN691959": { @@ -15,6 +18,9 @@ "blast_matched_qlen": 9493, "blast_sseq_coverage": 1.0816956477003807, "blast_qseq_coverage": 1.1086063415148004, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MN692074": { @@ -24,6 +30,9 @@ "blast_matched_qlen": 4178, "blast_sseq_coverage": 0.5041670953801831, "blast_qseq_coverage": 1.1728099569171853, + "blast_sseq_orfs_coverage": 0.41144141893579816, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 4 }, "MN692145": { @@ -33,6 +42,9 @@ "blast_matched_qlen": 9689, "blast_sseq_coverage": 1.1304660973351168, "blast_qseq_coverage": 1.1271545051088863, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MN090335": { @@ -42,6 +54,9 @@ "blast_matched_qlen": 9069, "blast_sseq_coverage": 0.9842576396748637, "blast_qseq_coverage": 1.0603153600176425, + "blast_sseq_orfs_coverage": 1.0003747189607795, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MN090376": { @@ -51,6 +66,9 @@ "blast_matched_qlen": 8985, "blast_sseq_coverage": 0.9784957300133759, "blast_qseq_coverage": 1.0604340567612687, + "blast_sseq_orfs_coverage": 0.9943792155883088, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115581.1": { @@ -60,6 +78,9 @@ "blast_matched_qlen": 9495, "blast_sseq_coverage": 0.9870357032616525, "blast_qseq_coverage": 1.0046340179041602, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115690.1": { @@ -69,6 +90,9 @@ "blast_matched_qlen": 9689, "blast_sseq_coverage": 0.988167506945159, "blast_qseq_coverage": 0.9949427185468056, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115571.1": { @@ -78,6 +102,9 @@ "blast_matched_qlen": 9394, "blast_sseq_coverage": 0.9866241382858318, "blast_qseq_coverage": 1.0113902490951672, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115514.1": { @@ -87,6 +114,9 @@ "blast_matched_qlen": 9382, "blast_sseq_coverage": 0.9864183557979216, "blast_qseq_coverage": 1.0173736943082499, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115488.1": { @@ -96,6 +126,9 @@ "blast_matched_qlen": 9623, "blast_sseq_coverage": 1.0255170285008746, "blast_qseq_coverage": 1.0325262392185388, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 6 }, "MK115030.1": { @@ -105,6 +138,9 @@ "blast_matched_qlen": 9126, "blast_sseq_coverage": 0.9940323078506019, "blast_qseq_coverage": 1.0655270655270654, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115498.1": { @@ -114,6 +150,9 @@ "blast_matched_qlen": 9461, "blast_sseq_coverage": 0.9866241382858318, "blast_qseq_coverage": 1.0080329774865235, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115211.1": { @@ -123,6 +162,9 @@ "blast_matched_qlen": 9032, "blast_sseq_coverage": 0.9950612202901533, "blast_qseq_coverage": 1.0598981399468557, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115158.1": { @@ -132,6 +174,9 @@ "blast_matched_qlen": 9143, "blast_sseq_coverage": 0.9921577414295317, "blast_qseq_coverage": 0.9699223449633599, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 234, + "orfs_end": 8212, "blast_n_conseqs": 1 }, "MK114705.1": { @@ -141,6 +186,9 @@ "blast_matched_qlen": 9411, "blast_sseq_coverage": 1.0980553554892478, "blast_qseq_coverage": 1.122622463075125, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 6 }, "MK114856.1": { @@ -150,6 +198,9 @@ "blast_matched_qlen": 9477, "blast_sseq_coverage": 1.0485646671468258, "blast_qseq_coverage": 1.0812493405085997, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 4 }, "MK115009.1": { @@ -159,6 +210,9 @@ "blast_matched_qlen": 9207, "blast_sseq_coverage": 0.9965016977055252, "blast_qseq_coverage": 1.0590854784403172, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115387.1": { @@ -168,6 +222,9 @@ "blast_matched_qlen": 9136, "blast_sseq_coverage": 0.9796275336968824, "blast_qseq_coverage": 1.040936952714536, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115491.1": { @@ -177,6 +234,9 @@ "blast_matched_qlen": 9422, "blast_sseq_coverage": 1.003704084782385, "blast_qseq_coverage": 1.0299299511780937, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK116110.1": { @@ -186,6 +246,9 @@ "blast_matched_qlen": 8967, "blast_sseq_coverage": 0.9957759004001778, "blast_qseq_coverage": 0.9972119995539199, + "blast_sseq_orfs_coverage": 0.998622244488978, + "orfs_start": 140, + "orfs_end": 8124, "blast_n_conseqs": 3 }, "MK115527.1": { @@ -195,6 +258,9 @@ "blast_matched_qlen": 9481, "blast_sseq_coverage": 0.986727029529787, "blast_qseq_coverage": 1.0056956017297753, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK114997.1": { @@ -204,6 +270,9 @@ "blast_matched_qlen": 9055, "blast_sseq_coverage": 0.9784957300133759, "blast_qseq_coverage": 1.0516841524019878, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115518.1": { @@ -213,6 +282,9 @@ "blast_matched_qlen": 9537, "blast_sseq_coverage": 0.9847720958946393, "blast_qseq_coverage": 0.9996854356715948, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115065.1": { @@ -222,6 +294,9 @@ "blast_matched_qlen": 9214, "blast_sseq_coverage": 1.0080255170285009, "blast_qseq_coverage": 1.069459518124593, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 6 }, "MK115464.1": { @@ -231,6 +306,9 @@ "blast_matched_qlen": 9663, "blast_sseq_coverage": 0.9838460746990431, "blast_qseq_coverage": 0.9893407844354756, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115530.1": { @@ -240,6 +318,9 @@ "blast_matched_qlen": 9544, "blast_sseq_coverage": 0.9866241382858318, "blast_qseq_coverage": 0.9992665549036044, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115520.1": { @@ -249,6 +330,9 @@ "blast_matched_qlen": 9589, "blast_sseq_coverage": 0.9787015125012861, "blast_qseq_coverage": 0.987902805297737, + "blast_sseq_orfs_coverage": 1.012115913065201, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115503.1": { @@ -258,6 +342,9 @@ "blast_matched_qlen": 9617, "blast_sseq_coverage": 0.9870357032616525, "blast_qseq_coverage": 0.9953207861079338, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115570.1": { @@ -267,6 +354,9 @@ "blast_matched_qlen": 9485, "blast_sseq_coverage": 0.9866241382858318, "blast_qseq_coverage": 1.0057986294148655, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115509.1": { @@ -276,6 +366,9 @@ "blast_matched_qlen": 9353, "blast_sseq_coverage": 0.9866241382858318, "blast_qseq_coverage": 1.0197797498128942, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115702.1": { @@ -285,6 +378,9 @@ "blast_matched_qlen": 9098, "blast_sseq_coverage": 0.987447268237473, "blast_qseq_coverage": 1.0596834469114091, + "blast_sseq_orfs_coverage": 1.019860104921309, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 4 }, "MK115095.1": { @@ -294,6 +390,9 @@ "blast_matched_qlen": 9137, "blast_sseq_coverage": 0.9907397880440375, "blast_qseq_coverage": 1.060085367188355, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115490.1": { @@ -303,6 +402,9 @@ "blast_matched_qlen": 9347, "blast_sseq_coverage": 0.9848749871385946, "blast_qseq_coverage": 1.0204343639670483, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115576.1": { @@ -312,6 +414,9 @@ "blast_matched_qlen": 9266, "blast_sseq_coverage": 0.9917687004835889, "blast_qseq_coverage": 1.0342110943233327, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "OQ092466": { @@ -321,6 +426,9 @@ "blast_matched_qlen": 9686, "blast_sseq_coverage": 1.1015536577837226, "blast_qseq_coverage": 1.1192442700805285, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "OQ092463": { @@ -330,6 +438,9 @@ "blast_matched_qlen": 9605, "blast_sseq_coverage": 1.0529124055135617, "blast_qseq_coverage": 0.9884435190005205, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 140, + "orfs_end": 8124, "blast_n_conseqs": 2 }, "OQ092465": { @@ -339,6 +450,9 @@ "blast_matched_qlen": 9659, "blast_sseq_coverage": 1.0429696287964005, "blast_qseq_coverage": 0.9620043482762191, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 200, + "orfs_end": 8208, "blast_n_conseqs": 2 }, "OQ092462": { @@ -348,6 +462,9 @@ "blast_matched_qlen": 9714, "blast_sseq_coverage": 1.130671879823027, "blast_qseq_coverage": 1.1301214741610048, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "OQ092464": { @@ -357,6 +474,9 @@ "blast_matched_qlen": 9556, "blast_sseq_coverage": 1.0427446569178853, "blast_qseq_coverage": 0.9678735872750105, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 200, + "orfs_end": 8208, "blast_n_conseqs": 2 }, "OQ092467": { @@ -366,6 +486,9 @@ "blast_matched_qlen": 9936, "blast_sseq_coverage": 1.1308776623109373, "blast_qseq_coverage": 1.0962157809983897, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 } } \ No newline at end of file diff --git a/tests/expected-results-single-csv/holistic.csv b/tests/expected-results-single-csv/holistic.csv index f1840a4..d3d8de7 100644 --- a/tests/expected-results-single-csv/holistic.csv +++ b/tests/expected-results-single-csv/holistic.csv @@ -1,2 +1,2 @@ -seqid,qlen,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_n_conseqs -MN692145,9689,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1304660973351168,1.1271545051088863,3 +seqid,qlen,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_sseq_orfs_coverage,orfs_start,orfs_end,blast_n_conseqs +MN692145,9689,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1304660973351168,1.1271545051088863,1.0,789,8795,3 diff --git a/tests/expected-results-single/holistic.json b/tests/expected-results-single/holistic.json index 3cd930d..060aa5a 100644 --- a/tests/expected-results-single/holistic.json +++ b/tests/expected-results-single/holistic.json @@ -6,6 +6,9 @@ "blast_matched_qlen": 9689, "blast_sseq_coverage": 1.1304660973351168, "blast_qseq_coverage": 1.1271545051088863, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 } } \ No newline at end of file diff --git a/tests/expected-results-small-csv/holistic.csv b/tests/expected-results-small-csv/holistic.csv index 8da1aa4..0dd3a51 100644 --- a/tests/expected-results-small-csv/holistic.csv +++ b/tests/expected-results-small-csv/holistic.csv @@ -1,6 +1,6 @@ -seqid,qlen,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_n_conseqs -KX505501.1,1997,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,1997,0.2498199403230785,1.2158237356034052,4 -MN691959,9493,0.19593905853945925,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9493,1.0816956477003807,1.1086063415148004,3 -MN692074,4178,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,4178,0.5041670953801831,1.1728099569171853,4 -MN692145,9689,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1304660973351168,1.1271545051088863,3 -MN090335,9069,0.1754017863888554,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9069,0.9842576396748637,1.0603153600176425,3 +seqid,qlen,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_sseq_orfs_coverage,orfs_start,orfs_end,blast_n_conseqs +KX505501.1,1997,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,1997,0.2498199403230785,1.2158237356034052,0.17661753684736448,789,8795,4 +MN691959,9493,0.19593905853945925,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9493,1.0816956477003807,1.1086063415148004,1.0,789,8795,3 +MN692074,4178,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,4178,0.5041670953801831,1.1728099569171853,0.41144141893579816,789,8795,4 +MN692145,9689,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1304660973351168,1.1271545051088863,1.0,789,8795,3 +MN090335,9069,0.1754017863888554,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9069,0.9842576396748637,1.0603153600176425,1.0003747189607795,789,8795,3 diff --git a/tests/expected-results-small/holistic.json b/tests/expected-results-small/holistic.json index 2bbd7d0..f80f6bf 100644 --- a/tests/expected-results-small/holistic.json +++ b/tests/expected-results-small/holistic.json @@ -6,6 +6,9 @@ "blast_matched_qlen": 1997, "blast_sseq_coverage": 0.2498199403230785, "blast_qseq_coverage": 1.2158237356034052, + "blast_sseq_orfs_coverage": 0.17661753684736448, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 4 }, "MN691959": { @@ -15,6 +18,9 @@ "blast_matched_qlen": 9493, "blast_sseq_coverage": 1.0816956477003807, "blast_qseq_coverage": 1.1086063415148004, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MN692074": { @@ -24,6 +30,9 @@ "blast_matched_qlen": 4178, "blast_sseq_coverage": 0.5041670953801831, "blast_qseq_coverage": 1.1728099569171853, + "blast_sseq_orfs_coverage": 0.41144141893579816, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 4 }, "MN692145": { @@ -33,6 +42,9 @@ "blast_matched_qlen": 9689, "blast_sseq_coverage": 1.1304660973351168, "blast_qseq_coverage": 1.1271545051088863, + "blast_sseq_orfs_coverage": 1.0, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MN090335": { @@ -42,6 +54,9 @@ "blast_matched_qlen": 9069, "blast_sseq_coverage": 0.9842576396748637, "blast_qseq_coverage": 1.0603153600176425, + "blast_sseq_orfs_coverage": 1.0003747189607795, + "orfs_start": 789, + "orfs_end": 8795, "blast_n_conseqs": 3 } } \ No newline at end of file