Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
include final decision into holistic.csv
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Feb 7, 2024
1 parent 4c77509 commit 975a58d
Show file tree
Hide file tree
Showing 10 changed files with 157 additions and 53 deletions.
6 changes: 3 additions & 3 deletions intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class FoundORF:

@dataclass
class HolisticInfo:
intact: bool = dataclasses.field(default=None)
qlen: int = dataclasses.field(default=None)
hypermutation_probablility: float = dataclasses.field(default=None)
inferred_subtype: str = dataclasses.field(default=None)
Expand Down Expand Up @@ -884,17 +885,16 @@ def analyse_single_sequence(writer, sequence, blast_rows):
if error:
sequence_errors.append(error)

is_intact = len(sequence_errors) == 0
holistic.intact = len(sequence_errors) == 0

# add the small orf errors after the intactness check if not included
if not include_small_orfs:
sequence_errors.extend(small_orf_errors)

orfs = [x.__dict__ for x in hxb2_found_orfs]
errors = [x.__dict__ for x in sequence_errors]
holistic = holistic.__dict__
subtype = aligned_sequence.reference
writer.write(sequence, subtype, is_intact, orfs, errors, holistic)
writer.write(sequence, subtype, holistic.intact, orfs, errors, holistic.__dict__)

with OutputWriter(working_dir, "csv" if output_csv else "json") as writer:

Expand Down
15 changes: 15 additions & 0 deletions tests/expected-results-edgy/holistic.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455": {
"intact": true,
"qlen": 9718,
"hypermutation_probablility": 0.0,
"inferred_subtype": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455",
Expand All @@ -12,6 +13,7 @@
"blast_n_conseqs": 3
},
"small-sequence[REVERSE_COMPLEMENT]": {
"intact": false,
"qlen": 19,
"hypermutation_probablility": 0.0,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -24,6 +26,7 @@
"blast_n_conseqs": 0
},
"singleton-sequence": {
"intact": false,
"qlen": 1,
"hypermutation_probablility": 0.0,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -36,6 +39,7 @@
"blast_n_conseqs": 0
},
"empty-sequence": {
"intact": false,
"qlen": 0,
"hypermutation_probablility": 0.0,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -48,6 +52,7 @@
"blast_n_conseqs": 0
},
"empty-sequence2": {
"intact": false,
"qlen": 0,
"hypermutation_probablility": 0.0,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -60,6 +65,7 @@
"blast_n_conseqs": 0
},
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH.SOME.LOWERCASES": {
"intact": false,
"qlen": 9714,
"hypermutation_probablility": 0.1679778416620883,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -72,6 +78,7 @@
"blast_n_conseqs": 0
},
"small-sequence-with-lowercases": {
"intact": false,
"qlen": 16,
"hypermutation_probablility": 0.0,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -84,6 +91,7 @@
"blast_n_conseqs": 0
},
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSECOMPLEMENTED[REVERSE_COMPLEMENT]": {
"intact": false,
"qlen": 9718,
"hypermutation_probablility": 0.13527282947774355,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -96,6 +104,7 @@
"blast_n_conseqs": 0
},
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED": {
"intact": false,
"qlen": 9718,
"hypermutation_probablility": 7.126591271466864e-05,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -108,6 +117,7 @@
"blast_n_conseqs": 0
},
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS[REVERSE_COMPLEMENT]": {
"intact": false,
"qlen": 9711,
"hypermutation_probablility": 0.13527282947774355,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -120,6 +130,7 @@
"blast_n_conseqs": 0
},
"small-sequence-with-xs": {
"intact": false,
"qlen": 12,
"hypermutation_probablility": 0.0,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -132,6 +143,7 @@
"blast_n_conseqs": 0
},
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-BAD-SYMBOLS[REVERSE_COMPLEMENT]": {
"intact": false,
"qlen": 9711,
"hypermutation_probablility": 0.13527282947774355,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -144,6 +156,7 @@
"blast_n_conseqs": 0
},
"small-sequence-with-bad-symbols": {
"intact": false,
"qlen": 12,
"hypermutation_probablility": 0.0,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -156,6 +169,7 @@
"blast_n_conseqs": 0
},
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-DASHES[REVERSE_COMPLEMENT]": {
"intact": false,
"qlen": 9711,
"hypermutation_probablility": 0.13527282947774355,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -168,6 +182,7 @@
"blast_n_conseqs": 0
},
"small-sequence-with-dashes": {
"intact": false,
"qlen": 12,
"hypermutation_probablility": 0.0,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand Down
84 changes: 42 additions & 42 deletions tests/expected-results-large-csv/holistic.csv
Original file line number Diff line number Diff line change
@@ -1,42 +1,42 @@
seqid,qlen,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_sseq_orfs_coverage,orfs_start,orfs_end,blast_n_conseqs
KX505501.1,1997,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,1997,0.2498456472525211,1.2158237356034052,0.17666166916541728,789,8793,4
MN691959,9493,0.19667690182893238,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9493,1.0817040543321672,1.1086063415148004,1.0,789,8793,3
MN692074,4178,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,4178,0.5042189750977567,1.1728099569171853,0.411544227886057,789,8793,4
MN692145,9689,0.1672411051048176,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.130479522535501,1.1271545051088863,1.0,789,8793,3
MN090335,9069,0.1771850809736527,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9069,0.9842560197571517,1.0603153600176425,1.000374812593703,789,8793,3
MN090376,8985,0.026415767987601813,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,8985,0.9784935171846059,1.0604340567612687,0.9943778110944528,789,8793,3
MK115581.1,9495,0.6919440876652894,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9495,0.987034369211772,1.0046340179041602,1.0,789,8793,2
MK115690.1,9689,0.051230576250981485,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.004630582424367,1.0113530808132933,1.0,789,8793,3
MK115571.1,9394,0.8029570594372466,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9394,0.9866227618851615,1.0113902490951672,1.0,789,8793,2
MK115514.1,9382,0.6482462132632603,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9382,0.9864169582218564,1.0173736943082499,1.0,789,8793,2
MK115488.1,9623,0.6534999185838631,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9623,1.0255196542498457,1.0325262392185388,1.0,789,8793,6
MK115030.1,9126,0.032014462397289556,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9126,0.994031693764149,1.0655270655270654,1.0,789,8793,3
MK115498.1,9461,0.83547963060225,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9461,0.9866227618851615,1.0080329774865235,1.0,789,8793,2
MK115211.1,9032,0.11818291879607423,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9032,0.995060712080675,1.0598981399468557,1.0,789,8793,3
MK115158.1,9143,0.002572269807584293,Ref.47_BF.ES.08.P1942.GQ372987,9143,0.9921577414295317,0.9699223449633599,1.0,234,8211,1
MK114705.1,9411,0.14584270737492833,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9411,1.098065445564931,1.122622463075125,1.0,789,8793,6
MK114856.1,9477,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9477,1.0485696645400289,1.0812493405085997,1.0,789,8793,4
MK115009.1,9207,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9207,0.9965013377238114,1.0590854784403172,1.0,789,8793,3
MK115387.1,9136,0.5436355526687852,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9136,0.9796254373327845,1.040936952714536,1.0,789,8793,2
MK115491.1,9422,0.8961809048805741,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9422,1.0037044659394938,1.0299299511780937,1.0,789,8793,2
MK116110.1,8967,0.07021438897893317,Ref.B.TH.90.BK132.AY173951,8967,0.9957759004001778,0.9972119995539199,0.9986220719027934,140,8123,3
MK115527.1,9481,0.770862998910788,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9481,0.9867256637168141,1.0056956017297753,1.0,789,8793,2
MK114997.1,9055,0.05560625344150194,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9055,0.9784935171846059,1.0516841524019878,1.0,789,8793,2
MK115518.1,9537,0.6408152618300496,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9537,0.9847705289154147,0.9996854356715948,1.0,789,8793,3
MK115065.1,9214,0.033954952452739495,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9214,1.0080263428689031,1.069459518124593,1.0,789,8793,6
MK115464.1,9663,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9663,0.9838444124305412,0.9893407844354756,1.0,789,8793,2
MK115530.1,9544,0.5812621948015355,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9544,0.9866227618851615,0.9992665549036044,1.0,789,8793,2
MK115520.1,9589,0.5225247969864292,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9589,0.9786993208479111,0.987902805297737,1.012118940529735,789,8793,3
MK115503.1,9617,0.42870398270204335,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9617,0.987034369211772,0.9953207861079338,1.0,789,8793,2
MK115570.1,9485,0.7406166892211931,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9485,0.9866227618851615,1.0057986294148655,1.0,789,8793,2
MK115509.1,9353,0.7883768413537747,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9353,0.9866227618851615,1.0197797498128942,1.0,789,8793,2
MK115702.1,9098,0.1454272422215308,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9098,0.9874459765383824,1.0596834469114091,1.0198650674662668,789,8793,4
MK115095.1,9137,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9137,0.9907388351512657,1.060085367188355,1.0,789,8793,2
MK115490.1,9347,0.8875203448314265,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9347,0.9848734307470673,1.0204343639670483,1.0,789,8793,3
MK115576.1,9266,0.8197662757563093,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9266,0.9917678534677917,1.0342110943233327,1.0,789,8793,3
OQ092466,9686,0.3858261890626,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9686,1.1015641078411196,1.1192442700805285,1.0,789,8793,3
OQ092463,9605,0.21628713708846803,Ref.B.TH.90.BK132.AY173951,9605,1.0529124055135617,0.9884435190005205,1.0,140,8123,2
OQ092465,9659,0.02412789935966586,Ref.28_BF.BR.99.BREPM12817.DQ085874,9659,1.0429696287964005,0.9620043482762191,1.0,200,8207,2
OQ092462,9714,0.10883643311676816,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9714,1.1306853261988064,1.1301214741610048,1.0,789,8793,3
OQ092464,9556,0.006887768010151674,Ref.28_BF.BR.99.BREPM12817.DQ085874,9556,1.0427446569178853,0.9678735872750105,1.0,200,8207,2
OQ092467,9936,0.6438715160567257,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9936,1.1308911298621116,1.0962157809983897,1.0,789,8793,3
seqid,intact,qlen,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_sseq_orfs_coverage,orfs_start,orfs_end,blast_n_conseqs
KX505501.1,False,1997,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,1997,0.2498456472525211,1.2158237356034052,0.17666166916541728,789,8793,4
MN691959,False,9493,0.19667690182893238,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9493,1.0817040543321672,1.1086063415148004,1.0,789,8793,3
MN692074,False,4178,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,4178,0.5042189750977567,1.1728099569171853,0.411544227886057,789,8793,4
MN692145,True,9689,0.1672411051048176,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.130479522535501,1.1271545051088863,1.0,789,8793,3
MN090335,False,9069,0.1771850809736527,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9069,0.9842560197571517,1.0603153600176425,1.000374812593703,789,8793,3
MN090376,False,8985,0.026415767987601813,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,8985,0.9784935171846059,1.0604340567612687,0.9943778110944528,789,8793,3
MK115581.1,True,9495,0.6919440876652894,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9495,0.987034369211772,1.0046340179041602,1.0,789,8793,2
MK115690.1,False,9689,0.051230576250981485,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.004630582424367,1.0113530808132933,1.0,789,8793,3
MK115571.1,False,9394,0.8029570594372466,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9394,0.9866227618851615,1.0113902490951672,1.0,789,8793,2
MK115514.1,False,9382,0.6482462132632603,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9382,0.9864169582218564,1.0173736943082499,1.0,789,8793,2
MK115488.1,False,9623,0.6534999185838631,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9623,1.0255196542498457,1.0325262392185388,1.0,789,8793,6
MK115030.1,False,9126,0.032014462397289556,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9126,0.994031693764149,1.0655270655270654,1.0,789,8793,3
MK115498.1,True,9461,0.83547963060225,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9461,0.9866227618851615,1.0080329774865235,1.0,789,8793,2
MK115211.1,False,9032,0.11818291879607423,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9032,0.995060712080675,1.0598981399468557,1.0,789,8793,3
MK115158.1,False,9143,0.002572269807584293,Ref.47_BF.ES.08.P1942.GQ372987,9143,0.9921577414295317,0.9699223449633599,1.0,234,8211,1
MK114705.1,False,9411,0.14584270737492833,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9411,1.098065445564931,1.122622463075125,1.0,789,8793,6
MK114856.1,False,9477,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9477,1.0485696645400289,1.0812493405085997,1.0,789,8793,4
MK115009.1,False,9207,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9207,0.9965013377238114,1.0590854784403172,1.0,789,8793,3
MK115387.1,False,9136,0.5436355526687852,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9136,0.9796254373327845,1.040936952714536,1.0,789,8793,2
MK115491.1,True,9422,0.8961809048805741,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9422,1.0037044659394938,1.0299299511780937,1.0,789,8793,2
MK116110.1,False,8967,0.07021438897893317,Ref.B.TH.90.BK132.AY173951,8967,0.9957759004001778,0.9972119995539199,0.9986220719027934,140,8123,3
MK115527.1,True,9481,0.770862998910788,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9481,0.9867256637168141,1.0056956017297753,1.0,789,8793,2
MK114997.1,False,9055,0.05560625344150194,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9055,0.9784935171846059,1.0516841524019878,1.0,789,8793,2
MK115518.1,True,9537,0.6408152618300496,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9537,0.9847705289154147,0.9996854356715948,1.0,789,8793,3
MK115065.1,False,9214,0.033954952452739495,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9214,1.0080263428689031,1.069459518124593,1.0,789,8793,6
MK115464.1,False,9663,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9663,0.9838444124305412,0.9893407844354756,1.0,789,8793,2
MK115530.1,True,9544,0.5812621948015355,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9544,0.9866227618851615,0.9992665549036044,1.0,789,8793,2
MK115520.1,False,9589,0.5225247969864292,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9589,0.9786993208479111,0.987902805297737,1.012118940529735,789,8793,3
MK115503.1,True,9617,0.42870398270204335,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9617,0.987034369211772,0.9953207861079338,1.0,789,8793,2
MK115570.1,False,9485,0.7406166892211931,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9485,0.9866227618851615,1.0057986294148655,1.0,789,8793,2
MK115509.1,True,9353,0.7883768413537747,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9353,0.9866227618851615,1.0197797498128942,1.0,789,8793,2
MK115702.1,False,9098,0.1454272422215308,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9098,0.9874459765383824,1.0596834469114091,1.0198650674662668,789,8793,4
MK115095.1,False,9137,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9137,0.9907388351512657,1.060085367188355,1.0,789,8793,2
MK115490.1,True,9347,0.8875203448314265,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9347,0.9848734307470673,1.0204343639670483,1.0,789,8793,3
MK115576.1,True,9266,0.8197662757563093,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9266,0.9917678534677917,1.0342110943233327,1.0,789,8793,3
OQ092466,False,9686,0.3858261890626,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9686,1.1015641078411196,1.1192442700805285,1.0,789,8793,3
OQ092463,False,9605,0.21628713708846803,Ref.B.TH.90.BK132.AY173951,9605,1.0529124055135617,0.9884435190005205,1.0,140,8123,2
OQ092465,False,9659,0.02412789935966586,Ref.28_BF.BR.99.BREPM12817.DQ085874,9659,1.0429696287964005,0.9620043482762191,1.0,200,8207,2
OQ092462,False,9714,0.10883643311676816,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9714,1.1306853261988064,1.1301214741610048,1.0,789,8793,3
OQ092464,False,9556,0.006887768010151674,Ref.28_BF.BR.99.BREPM12817.DQ085874,9556,1.0427446569178853,0.9678735872750105,1.0,200,8207,2
OQ092467,False,9936,0.6438715160567257,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9936,1.1308911298621116,1.0962157809983897,1.0,789,8793,3
Loading

0 comments on commit 975a58d

Please sign in to comment.