Skip to content

Commit

Permalink
revert to protein-based deletions count
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Jun 23, 2023
1 parent 273b950 commit fbda016
Show file tree
Hide file tree
Showing 7 changed files with 695 additions and 528 deletions.
14 changes: 9 additions & 5 deletions intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,11 +656,9 @@ def find_real_correspondence(e):
for e in expected:
best_match = find_real_correspondence(e)

aligned_start = query_aligned_mapping[best_match.start]
aligned_end = query_aligned_mapping[best_match.end - 1] + 1

insertions = len(re.findall(r"-", str(alignment[0].seq[aligned_start:aligned_end])))
deletions = len(re.findall(r"-", str(alignment[1].seq[aligned_start:aligned_end])))
got_protein = best_match.aminoseq.split("*")[0]
exp_protein = best_match.expectedaminoseq.split("*")[0]
deletions = (len(exp_protein) - len(got_protein)) * 3

# Max deletion allowed in ORF exceeded
if deletions > e.deletion_tolerence:
Expand All @@ -682,6 +680,12 @@ def find_real_correspondence(e):
+ str(deletions)
))

aligned_start = query_aligned_mapping[best_match.start]
aligned_end = query_aligned_mapping[best_match.end - 1] + 1

insertions = len(re.findall(r"-", str(alignment[0].seq[aligned_start:aligned_end])))
deletions = len(re.findall(r"-", str(alignment[1].seq[aligned_start:aligned_end])))

# Check for frameshift in ORF
if (deletions - insertions) % 3 != 0:

Expand Down
192 changes: 167 additions & 25 deletions tests/expected-results-large/errors.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,36 @@
"error": "FrameshiftInOrf",
"message": "Smaller ORF vif at 5040-5619 contains an out of frame indel: insertions 1 deletions 7182."
},
{
"sequence_name": "KX505501.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF vpr at 5558-5849 contains an internal stop codon"
},
{
"sequence_name": "KX505501.1",
"error": "DeletionInOrf",
"message": "Smaller ORF tat_exon1 at 5829-6044 can have maximum deletions 30, got 213"
},
{
"sequence_name": "KX505501.1",
"error": "DeletionInOrf",
"message": "Smaller ORF rev_exon1 at 5968-6044 can have maximum deletions 30, got 75"
},
{
"sequence_name": "KX505501.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon"
},
{
"sequence_name": "KX505501.1",
"error": "FrameshiftInOrf",
"message": "Smaller ORF tat_exon2 at 8375-8468 contains an out of frame indel: insertions 1 deletions 0."
},
{
"sequence_name": "KX505501.1",
"error": "DeletionInOrf",
"message": "Smaller ORF rev_exon2 at 8376-8652 can have maximum deletions 30, got 96"
},
{
"sequence_name": "KX505501.1",
"error": "FrameshiftInOrf",
Expand Down Expand Up @@ -51,7 +76,18 @@
"message": "Sequence is plus-scrambled."
}
],
"MN691959": [],
"MN691959": [
{
"sequence_name": "MN691959",
"error": "InternalStopInOrf",
"message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon"
},
{
"sequence_name": "MN691959",
"error": "InternalStopInOrf",
"message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon"
}
],
"MN692074": [
{
"sequence_name": "MN692074",
Expand All @@ -61,12 +97,7 @@
{
"sequence_name": "MN692074",
"error": "DeletionInOrf",
"message": "Smaller ORF vif at 5040-5619 can have maximum deletions 30, got 5400"
},
{
"sequence_name": "MN692074",
"error": "DeletionInOrf",
"message": "Smaller ORF vpr at 5558-5849 can have maximum deletions 30, got 5400"
"message": "Smaller ORF vif at 5040-5619 can have maximum deletions 30, got 81"
},
{
"sequence_name": "MN692074",
Expand All @@ -78,11 +109,6 @@
"error": "FrameshiftInOrf",
"message": "Smaller ORF tat_exon1 at 5829-6044 contains an out of frame indel: insertions 0 deletions 5236."
},
{
"sequence_name": "MN692074",
"error": "InternalStopInOrf",
"message": "Smaller ORF rev_exon1 at 5968-6044 contains an internal stop codon"
},
{
"sequence_name": "MN692074",
"error": "FrameshiftInOrf",
Expand All @@ -91,22 +117,12 @@
{
"sequence_name": "MN692074",
"error": "DeletionInOrf",
"message": "Smaller ORF vpu at 6060-6309 can have maximum deletions 30, got 5400"
},
{
"sequence_name": "MN692074",
"error": "DeletionInOrf",
"message": "Smaller ORF tat_exon2 at 8375-8468 can have maximum deletions 30, got 5400"
},
{
"sequence_name": "MN692074",
"error": "DeletionInOrf",
"message": "Smaller ORF rev_exon2 at 8376-8652 can have maximum deletions 30, got 5400"
"message": "Smaller ORF rev_exon2 at 8376-8652 can have maximum deletions 30, got 192"
},
{
"sequence_name": "MN692074",
"error": "DeletionInOrf",
"message": "Smaller ORF nef at 8795-9416 can have maximum deletions 30, got 5400"
"message": "Smaller ORF nef at 8795-9416 can have maximum deletions 30, got 123"
},
{
"sequence_name": "MN692074",
Expand Down Expand Up @@ -136,6 +152,11 @@
"error": "FrameshiftInOrf",
"message": "ORF gag at 1175-2291 contains an out of frame indel, deletions 91 insertions 33."
},
{
"sequence_name": "MN090335",
"error": "InternalStopInOrf",
"message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon"
},
{
"sequence_name": "MN090335",
"error": "PackagingSignalDeletion",
Expand Down Expand Up @@ -323,6 +344,41 @@
"error": "WrongORFNumber",
"message": "Expected 3 forward ORFs, got 0"
},
{
"sequence_name": "MK114856.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF vif at 5040-5619 contains an internal stop codon"
},
{
"sequence_name": "MK114856.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF vpr at 5558-5849 contains an internal stop codon"
},
{
"sequence_name": "MK114856.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF tat_exon1 at 5829-6044 contains an internal stop codon"
},
{
"sequence_name": "MK114856.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon"
},
{
"sequence_name": "MK114856.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon"
},
{
"sequence_name": "MK114856.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF rev_exon2 at 8376-8652 contains an internal stop codon"
},
{
"sequence_name": "MK114856.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF nef at 8795-9416 contains an internal stop codon"
},
{
"sequence_name": "MK114856.1",
"error": "APOBECHypermutationDetected",
Expand All @@ -335,6 +391,31 @@
"error": "WrongORFNumber",
"message": "Expected 3 forward ORFs, got 0"
},
{
"sequence_name": "MK115009.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF vif at 5040-5619 contains an internal stop codon"
},
{
"sequence_name": "MK115009.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF vpr at 5558-5849 contains an internal stop codon"
},
{
"sequence_name": "MK115009.1",
"error": "DeletionInOrf",
"message": "Smaller ORF tat_exon1 at 5829-6044 can have maximum deletions 30, got 54"
},
{
"sequence_name": "MK115009.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF rev_exon2 at 8376-8652 contains an internal stop codon"
},
{
"sequence_name": "MK115009.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF nef at 8795-9416 contains an internal stop codon"
},
{
"sequence_name": "MK115009.1",
"error": "APOBECHypermutationDetected",
Expand All @@ -351,7 +432,13 @@
"message": "Sequence contains an internal inversion."
}
],
"MK115387.1": [],
"MK115387.1": [
{
"sequence_name": "MK115387.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon"
}
],
"MK115491.1": [],
"MK116110.1": [
{
Expand Down Expand Up @@ -422,6 +509,36 @@
"error": "WrongORFNumber",
"message": "Expected 3 forward ORFs, got 0"
},
{
"sequence_name": "MK115464.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF vif at 5040-5619 contains an internal stop codon"
},
{
"sequence_name": "MK115464.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF tat_exon1 at 5829-6044 contains an internal stop codon"
},
{
"sequence_name": "MK115464.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon"
},
{
"sequence_name": "MK115464.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon"
},
{
"sequence_name": "MK115464.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF rev_exon2 at 8376-8652 contains an internal stop codon"
},
{
"sequence_name": "MK115464.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF nef at 8795-9416 contains an internal stop codon"
},
{
"sequence_name": "MK115464.1",
"error": "FrameshiftInOrf",
Expand Down Expand Up @@ -508,6 +625,31 @@
"error": "WrongORFNumber",
"message": "Expected 3 forward ORFs, got 0"
},
{
"sequence_name": "MK115095.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF vif at 5040-5619 contains an internal stop codon"
},
{
"sequence_name": "MK115095.1",
"error": "DeletionInOrf",
"message": "Smaller ORF tat_exon1 at 5829-6044 can have maximum deletions 30, got 54"
},
{
"sequence_name": "MK115095.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon"
},
{
"sequence_name": "MK115095.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF rev_exon2 at 8376-8652 contains an internal stop codon"
},
{
"sequence_name": "MK115095.1",
"error": "DeletionInOrf",
"message": "Smaller ORF nef at 8795-9416 can have maximum deletions 30, got 36"
},
{
"sequence_name": "MK115095.1",
"error": "APOBECHypermutationDetected",
Expand Down
Loading

0 comments on commit fbda016

Please sign in to comment.