From 37927021f70f31a1cec0480f6f8ffbc9fbf11101 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 15 Sep 2023 19:11:12 -0700 Subject: [PATCH] catch alignment error --- intact/intact.py | 38 ++++++++++++++++++++++---------------- util/wrappers.py | 10 +++++++++- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/intact/intact.py b/intact/intact.py index e599e61..42ac6cc 100644 --- a/intact/intact.py +++ b/intact/intact.py @@ -18,22 +18,23 @@ from util.blastrow import BlastRow -WRONGORFNUMBER_ERROR = "WrongORFNumber" -MISPLACEDORF_ERROR = "MisplacedORF" -LONGDELETION_ERROR = "LongDeletion" -DELETIONINORF_ERROR = "DeletionInOrf" -INSERTIONINORF_ERROR = "InsertionInOrf" -INTERNALSTOP_ERROR = "InternalStopInOrf" -SCRAMBLE_ERROR = "Scramble" -NONHIV_ERROR = "NonHIV" +WRONGORFNUMBER_ERROR = "WrongORFNumber" +MISPLACEDORF_ERROR = "MisplacedORF" +LONGDELETION_ERROR = "LongDeletion" +DELETIONINORF_ERROR = "DeletionInOrf" +INSERTIONINORF_ERROR = "InsertionInOrf" +INTERNALSTOP_ERROR = "InternalStopInOrf" +SCRAMBLE_ERROR = "Scramble" +NONHIV_ERROR = "NonHIV" INTERNALINVERSION_ERROR = "InternalInversion" +ALIGNMENT_FAILED = "AlignmentFailed" # Happens when mafft process fails -FRAMESHIFTINORF_ERROR = "FrameshiftInOrf" -MSDMUTATED_ERROR = "MajorSpliceDonorSiteMutated" -PSIDELETION_ERROR = "PackagingSignalDeletion" -PSINOTFOUND_ERROR = "PackagingSignalNotComplete" -RREDELETION_ERROR = "RevResponseElementDeletion" -HYPERMUTATION_ERROR = "APOBECHypermutationDetected" +FRAMESHIFTINORF_ERROR = "FrameshiftInOrf" +MSDMUTATED_ERROR = "MajorSpliceDonorSiteMutated" +PSIDELETION_ERROR = "PackagingSignalDeletion" +PSINOTFOUND_ERROR = "PackagingSignalNotComplete" +RREDELETION_ERROR = "RevResponseElementDeletion" +HYPERMUTATION_ERROR = "APOBECHypermutationDetected" @dataclass @@ -832,8 +833,13 @@ def intact( working_dir, name = sequence.name ) - alignment = wrappers.mafft([reference, sequence]) - reverse_alignment = wrappers.mafft([reference, reverse_sequence]) + try: + alignment = wrappers.mafft([reference, sequence]) + reverse_alignment = wrappers.mafft([reference, reverse_sequence]) + except wrappers.AlignmentFailure: + err = IntactnessError(sequence.id, ALIGNMENT_FAILED, "Alignment failed for this sequence. It probably contains unallowed symbols.") + sequence_errors.append(err) + continue forward_score = alignment_score(alignment) reverse_score = alignment_score(reverse_alignment) diff --git a/util/wrappers.py b/util/wrappers.py index 09c3362..1000902 100644 --- a/util/wrappers.py +++ b/util/wrappers.py @@ -6,6 +6,9 @@ from util.blastrow import BlastRow +class AlignmentFailure(Exception): + pass + def mafft(sequences): ''' Call mafft on a set of sequences and return the resulting alignment. @@ -19,7 +22,12 @@ def mafft(sequences): with tempfile.NamedTemporaryFile() as alignment_input, tempfile.NamedTemporaryFile() as alignment_output: SeqIO.write(sequences, alignment_input.name, "fasta") - subprocess.call(["mafft", "--quiet", alignment_input.name], shell=False, stdout=alignment_output) + + try: + subprocess.run(["mafft", "--quiet", alignment_input.name], shell=False, stdout=alignment_output, check=True) + except subprocess.CalledProcessError: + raise AlignmentFailure() + alignment = AlignIO.read(alignment_output.name, "fasta") return alignment