Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
fix calculation of blast_sseq_coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Sep 18, 2023
1 parent 9a36671 commit ca49f71
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 98 deletions.
8 changes: 4 additions & 4 deletions intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,6 @@ def is_nonhiv(holistic, seqid, seqlen, blast_rows):
holistic.blast_matched_qlen = blast_rows[0].qlen if blast_rows else 1
holistic.blast_qseq_coverage = aligned_length / holistic.blast_matched_qlen

aligned_reference_length = sum(abs(x.qend - x.qstart) + 1 for x in blast_rows)
blast_matched_slen = blast_rows[0].qlen if blast_rows else 1
holistic.blast_sseq_coverage = aligned_reference_length / blast_matched_slen

if holistic.blast_qseq_coverage < 0.8 and seqlen > holistic.blast_matched_qlen * 0.6:
return IntactnessError(seqid, NONHIV_ERROR,
"Sequence contains unrecognized parts. "
Expand Down Expand Up @@ -861,6 +857,10 @@ def intact( working_dir,
holistic.inferred_subtype = reference_name
holistic.blast_n_conseqs = len(blast_rows)

aligned_reference_length = sum(abs(x.send - x.sstart) + 1 for x in blast_rows)
blast_matched_slen = blast_rows[0].slen if blast_rows else 1
holistic.blast_sseq_coverage = aligned_reference_length / blast_matched_slen

reverse_sequence = SeqRecord.SeqRecord(Seq.reverse_complement(sequence.seq),
id = sequence.id + " [REVERSED]",
name = sequence.name
Expand Down
82 changes: 41 additions & 41 deletions tests/expected-results-large-csv/holistic.csv
Original file line number Diff line number Diff line change
@@ -1,42 +1,42 @@
seqid,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_n_conseqs
KX505501.1,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,1997,1.2158237356034052,1.2158237356034052,4
MN691959,0.19593905853945925,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9493,1.1086063415148004,1.1086063415148004,3
MN692074,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,4178,1.1728099569171853,1.1728099569171853,4
MN692145,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1271545051088863,1.1271545051088863,3
MN090335,0.1754017863888554,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9069,1.0603153600176425,1.0603153600176425,3
MN090376,0.026007919521734202,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,8985,1.0604340567612687,1.0604340567612687,3
MK115581.1,0.6897199265079494,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9495,1.0046340179041602,1.0046340179041602,2
MK115690.1,0.05065930954004094,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,0.9949427185468056,0.9949427185468056,2
MK115571.1,0.8012585672082311,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9394,1.0113902490951672,1.0113902490951672,2
MK115514.1,0.6458974386368621,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9382,1.0173736943082499,1.0173736943082499,2
MK115488.1,0.6511896911074662,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9623,1.0325262392185388,1.0325262392185388,6
MK115030.1,0.031598631869680704,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9126,1.0655270655270654,1.0655270655270654,3
MK115498.1,0.8339748776671196,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9461,1.0080329774865235,1.0080329774865235,2
MK115211.1,0.11689558806708,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9032,1.0598981399468557,1.0598981399468557,3
MK115158.1,0.002572269807584293,Ref.47_BF.ES.08.P1942.GQ372987,9143,0.9699223449633599,0.9699223449633599,1
MK114705.1,0.14449377496074622,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9411,1.122622463075125,1.122622463075125,6
MK114856.1,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9477,1.0812493405085997,1.0812493405085997,4
MK115009.1,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9207,1.0590854784403172,1.0590854784403172,3
MK115387.1,0.5412311092694289,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9136,1.040936952714536,1.040936952714536,2
MK115491.1,0.8951015182445495,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9422,1.0299299511780937,1.0299299511780937,2
MK116110.1,0.07021438897893317,Ref.B.TH.90.BK132.AY173951,8967,0.9972119995539199,0.9972119995539199,3
MK115527.1,0.7689834393883834,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9481,1.0056956017297753,1.0056956017297753,2
MK114997.1,0.054959132555391754,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9055,1.0516841524019878,1.0516841524019878,2
MK115518.1,0.6385326595592609,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9537,0.9996854356715948,0.9996854356715948,3
MK115065.1,0.033517722768753644,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9214,1.069459518124593,1.069459518124593,6
MK115464.1,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9663,0.9893407844354756,0.9893407844354756,2
MK115530.1,0.5789377103398377,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9544,0.9992665549036044,0.9992665549036044,2
MK115520.1,0.5200353682902832,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9589,0.987902805297737,0.987902805297737,3
MK115503.1,0.4263025132504157,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9617,0.9953207861079338,0.9953207861079338,2
MK115570.1,0.738578434638724,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9485,1.0057986294148655,1.0057986294148655,2
MK115509.1,0.7866198309713798,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9353,1.0197797498128942,1.0197797498128942,2
MK115702.1,0.14401391767451666,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9098,1.0596834469114091,1.0596834469114091,4
MK115095.1,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9137,1.060085367188355,1.060085367188355,2
MK115490.1,0.8863248655310947,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9347,1.0204343639670483,1.0204343639670483,3
MK115576.1,0.818189227062389,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9266,1.0342110943233327,1.0342110943233327,3
OQ092466,0.3876036547663967,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9686,1.1192442700805285,1.1192442700805285,3
OQ092463,0.21628713708846803,Ref.B.TH.90.BK132.AY173951,9605,0.9884435190005205,0.9884435190005205,2
OQ092465,0.02412789935966586,Ref.28_BF.BR.99.BREPM12817.DQ085874,9659,0.9620043482762191,0.9620043482762191,2
OQ092462,0.10777665573070194,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9714,1.1301214741610048,1.1301214741610048,3
OQ092464,0.006887768010151674,Ref.28_BF.BR.99.BREPM12817.DQ085874,9556,0.9678735872750105,0.9678735872750105,2
OQ092467,0.6416537859942263,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9936,1.0962157809983897,1.0962157809983897,3
KX505501.1,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,1997,0.2498199403230785,1.2158237356034052,4
MN691959,0.19593905853945925,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9493,1.0816956477003807,1.1086063415148004,3
MN692074,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,4178,0.5041670953801831,1.1728099569171853,4
MN692145,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1304660973351168,1.1271545051088863,3
MN090335,0.1754017863888554,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9069,0.9842576396748637,1.0603153600176425,3
MN090376,0.026007919521734202,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,8985,0.9784957300133759,1.0604340567612687,3
MK115581.1,0.6897199265079494,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9495,0.9870357032616525,1.0046340179041602,2
MK115690.1,0.05065930954004094,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,0.988167506945159,0.9949427185468056,2
MK115571.1,0.8012585672082311,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9394,0.9866241382858318,1.0113902490951672,2
MK115514.1,0.6458974386368621,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9382,0.9864183557979216,1.0173736943082499,2
MK115488.1,0.6511896911074662,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9623,1.0255170285008746,1.0325262392185388,6
MK115030.1,0.031598631869680704,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9126,0.9940323078506019,1.0655270655270654,3
MK115498.1,0.8339748776671196,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9461,0.9866241382858318,1.0080329774865235,2
MK115211.1,0.11689558806708,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9032,0.9950612202901533,1.0598981399468557,3
MK115158.1,0.002572269807584293,Ref.47_BF.ES.08.P1942.GQ372987,9143,0.9921577414295317,0.9699223449633599,1
MK114705.1,0.14449377496074622,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9411,1.0980553554892478,1.122622463075125,6
MK114856.1,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9477,1.0485646671468258,1.0812493405085997,4
MK115009.1,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9207,0.9965016977055252,1.0590854784403172,3
MK115387.1,0.5412311092694289,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9136,0.9796275336968824,1.040936952714536,2
MK115491.1,0.8951015182445495,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9422,1.003704084782385,1.0299299511780937,2
MK116110.1,0.07021438897893317,Ref.B.TH.90.BK132.AY173951,8967,0.9957759004001778,0.9972119995539199,3
MK115527.1,0.7689834393883834,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9481,0.986727029529787,1.0056956017297753,2
MK114997.1,0.054959132555391754,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9055,0.9784957300133759,1.0516841524019878,2
MK115518.1,0.6385326595592609,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9537,0.9847720958946393,0.9996854356715948,3
MK115065.1,0.033517722768753644,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9214,1.0080255170285009,1.069459518124593,6
MK115464.1,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9663,0.9838460746990431,0.9893407844354756,2
MK115530.1,0.5789377103398377,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9544,0.9866241382858318,0.9992665549036044,2
MK115520.1,0.5200353682902832,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9589,0.9787015125012861,0.987902805297737,3
MK115503.1,0.4263025132504157,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9617,0.9870357032616525,0.9953207861079338,2
MK115570.1,0.738578434638724,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9485,0.9866241382858318,1.0057986294148655,2
MK115509.1,0.7866198309713798,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9353,0.9866241382858318,1.0197797498128942,2
MK115702.1,0.14401391767451666,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9098,0.987447268237473,1.0596834469114091,4
MK115095.1,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9137,0.9907397880440375,1.060085367188355,2
MK115490.1,0.8863248655310947,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9347,0.9848749871385946,1.0204343639670483,3
MK115576.1,0.818189227062389,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9266,0.9917687004835889,1.0342110943233327,3
OQ092466,0.3876036547663967,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9686,1.1015536577837226,1.1192442700805285,3
OQ092463,0.21628713708846803,Ref.B.TH.90.BK132.AY173951,9605,1.0529124055135617,0.9884435190005205,2
OQ092465,0.02412789935966586,Ref.28_BF.BR.99.BREPM12817.DQ085874,9659,1.0429696287964005,0.9620043482762191,2
OQ092462,0.10777665573070194,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9714,1.130671879823027,1.1301214741610048,3
OQ092464,0.006887768010151674,Ref.28_BF.BR.99.BREPM12817.DQ085874,9556,1.0427446569178853,0.9678735872750105,2
OQ092467,0.6416537859942263,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9936,1.1308776623109373,1.0962157809983897,3
Loading

0 comments on commit ca49f71

Please sign in to comment.