From 4c11e3a1bd391afbc5b94ae4e4d8c46bc93d9fc2 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 15 Sep 2023 19:07:08 -0700 Subject: [PATCH] do not assume that expected orf is nonempty --- intact/intact.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/intact/intact.py b/intact/intact.py index f470331..41cfb95 100644 --- a/intact/intact.py +++ b/intact/intact.py @@ -592,12 +592,12 @@ def get_indel_impact(alignment): deletions = max(0, len(exp_protein) - len(got_protein)) * 3 insertions = max(0, len(got_protein) - len(exp_protein)) * 3 - if got_protein: + if got_protein and exp_protein: orf_alignment = aligner.align(exp_protein, got_protein)[0] best_match.distance = aligner.match_score - (orf_alignment.score / len(exp_protein)) else: orf_alignment = (exp_protein, "-" * len(exp_protein)) - best_match.distance = aligner.match_score - ((aligner.open_gap_score + len(exp_protein) * aligner.extend_gap_score) / len(exp_protein)) + best_match.distance = aligner.match_score - ((aligner.open_gap_score + len(exp_protein) * aligner.extend_gap_score) / max(1, len(exp_protein))) # Max deletion allowed in ORF exceeded if deletions > e.deletion_tolerence: @@ -642,8 +642,8 @@ def get_indel_impact(alignment): continue got_nucleotides = sequence.seq[best_match.start:best_match.start + len(got_protein) * 3].upper() - if got_nucleotides: - exp_nucleotides = reference.seq[e.start:e.end].upper() + exp_nucleotides = reference.seq[e.start:e.end].upper() + if got_nucleotides and exp_nucleotides: orf_alignment = aligner.align(exp_nucleotides, got_nucleotides)[0] impacted_by_indels = get_indel_impact(orf_alignment)