tolerate invalid codons

cfe-lab · Sep 22, 2023 · e9e84e6 · e9e84e6
1 parent 47fde59
commit e9e84e6
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 35 deletions.
diff --git a/intact/intact.py b/intact/intact.py
@@ -20,7 +20,7 @@
 from util.blastrow import BlastRow
 from util.candidate_orf import CandidateORF
 from util.expected_orf import ExpectedORF
-from util.translate_to_aminoacids import translate_to_aminoacids
+from util.get_query_aminoacids_table import get_query_aminoacids_table
 from util.get_biggest_protein import get_biggest_protein
 from util.find_orf import find_orf
 
@@ -682,6 +682,14 @@ def intact( working_dir,
     def analyse_single_sequence(holistic, sequence, blast_rows):
         sequence_errors = []
 
+        e = get_query_aminoacids_table(sequence)
+        if isinstance(e, Exception):
+            log.error(e)
+            err = IntactnessError(sequence.id, INVALID_CODON, e.args[0])
+            sequence_errors.append(err)
+            sequence = SeqRecord.SeqRecord(Seq.Seq(''.join(x for x in sequence.seq if x in "ACTGN")),
+                                           id = sequence.id, name = sequence.name)
+
         holistic.qlen = len(sequence)
         holistic.blast_n_conseqs = len(blast_rows)
 
@@ -742,36 +750,29 @@ def analyse_single_sequence(holistic, sequence, blast_rows):
 
         alignment = aligned_sequence.get_alignment()
 
-        try:
-            sequence_orfs, orf_errors = has_reading_frames(
-                aligned_sequence, False,
-                forward_orfs, error_bar)
-            sequence_errors.extend(orf_errors)
-
-            sequence_small_orfs, small_orf_errors = has_reading_frames(
-                aligned_sequence, True,
-                small_orfs, error_bar, reverse = False)
-            if include_small_orfs:
-                sequence_errors.extend(small_orf_errors)
-
-            hxb2_found_orfs = [FoundORF(
-                o.name,
-                o.start,
-                o.end,
-                o.subtype_start,
-                o.subtype_end,
-                o.orientation,
-                o.distance,
-                str(o.protein),
-                str(o.aminoacids),
-                str(sequence[o.start:o.end].seq),
-            ) for o in sorted(sequence_orfs + sequence_small_orfs, key=lambda o: o.start)]
-
-        except Bio.Data.CodonTable.TranslationError as e:
-            log.error(e)
-            err = IntactnessError(sequence.id, INVALID_CODON, e.args[0])
-            sequence_errors.append(err)
-            hxb2_found_orfs = []
+        sequence_orfs, orf_errors = has_reading_frames(
+            aligned_sequence, False,
+            forward_orfs, error_bar)
+        sequence_errors.extend(orf_errors)
+
+        sequence_small_orfs, small_orf_errors = has_reading_frames(
+            aligned_sequence, True,
+            small_orfs, error_bar, reverse = False)
+        if include_small_orfs:
+            sequence_errors.extend(small_orf_errors)
+
+        hxb2_found_orfs = [FoundORF(
+            o.name,
+            o.start,
+            o.end,
+            o.subtype_start,
+            o.subtype_end,
+            o.orientation,
+            o.distance,
+            str(o.protein),
+            str(o.aminoacids),
+            str(sequence[o.start:o.end].seq),
+        ) for o in sorted(sequence_orfs + sequence_small_orfs, key=lambda o: o.start)]
 
         if include_packaging_signal:
             missing_psi_locus = has_packaging_signal(alignment,

diff --git a/util/get_query_aminoacids_table.py b/util/get_query_aminoacids_table.py
@@ -2,10 +2,10 @@
 from util.translate_to_aminoacids import translate_to_aminoacids
 
 def get_query_aminoacids_table(sequence):
-    if not hasattr(sequence, "query_aminoacids_table"):
+    if not hasattr(sequence.seq, "query_aminoacids_table"):
         try:
-            sequence.query_aminoacids_table = [translate_to_aminoacids(sequence.seq, i) for i in range(3)]
+            sequence.seq.query_aminoacids_table = [translate_to_aminoacids(sequence.seq, i) for i in range(3)]
         except Bio.Data.CodonTable.TranslationError as e:
-            sequence.query_aminoacids_table = e
+            sequence.seq.query_aminoacids_table = e
 
-    return sequence.query_aminoacids_table
+    return sequence.seq.query_aminoacids_table