Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
hide all mafft uses behind AlignedSequence
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Sep 21, 2023
1 parent d30c6cb commit f950e4e
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 22 deletions.
28 changes: 10 additions & 18 deletions intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,15 +475,6 @@ def has_rev_response_element(alignment, rre_locus, rre_tolerance):
#/end def has_rev_response_element


def alignment_score(alignment):
"""
Simple score for an alignment (just to find out if it's forward or
reverse - absolutely not a true genetic distance)
"""

return sum([a==b for a, b in zip(alignment[0].seq, alignment[1].seq)])


def get_biggest_protein(has_start_codon, aminoacids):
def skip_to_startcodon(x):
index = x.find("M")
Expand Down Expand Up @@ -845,9 +836,6 @@ def intact( working_dir,
reference = subtype_choices[reference_name]
aligned_subtype = AlignedSequence(this=reference, reference=hxb2_reference)

# hxb2_alignment = wrappers.mafft([hxb2_reference, reference])
# pos_mapping = coords.map_positions(hxb2_alignment[0], hxb2_alignment[1])

# convert ORF positions to appropriate subtype
forward_orfs, reverse_orfs, small_orfs = [
[
Expand Down Expand Up @@ -884,8 +872,8 @@ def intact( working_dir,
)

try:
alignment = wrappers.mafft([reference, sequence])
reverse_alignment = wrappers.mafft([reference, reverse_sequence])
forward_aligned_sequence = AlignedSequence(this=sequence, reference=aligned_subtype.this)
reverse_aligned_sequence = forward_aligned_sequence.reverse()
except wrappers.AlignmentFailure:
err = IntactnessError(sequence.id, ALIGNMENT_FAILED, "Alignment failed for this sequence. It probably contains invalid symbols.")
sequence_errors.append(err)
Expand All @@ -894,14 +882,18 @@ def intact( working_dir,
writer.write(sequence, is_intact=False, orfs=[], errors=errors, holistic=holistic)
continue

forward_score = alignment_score(alignment)
reverse_score = alignment_score(reverse_alignment)
if alignment_score(reverse_alignment) > alignment_score(alignment):
forward_score = forward_aligned_sequence.alignment_score()
reverse_score = reverse_aligned_sequence.alignment_score()
if forward_score >= reverse_score:
aligned_sequence = forward_aligned_sequence
else:
print("Reversing sequence " + sequence.id + "; forward score "
+ str(forward_score) + "; reverse score " + str(reverse_score))
alignment = reverse_alignment
aligned_sequence = reverse_aligned_sequence
sequence = reverse_sequence

alignment = aligned_sequence.get_alignment()

sequence_orfs, orf_errors = has_reading_frames(
alignment, sequence, reference, False,
forward_orfs, error_bar)
Expand Down
12 changes: 8 additions & 4 deletions util/aligned_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,13 @@ def slice(self, first, last):


def reverse(self):
newthis = SeqRecord.SeqRecord(Seq.reverse_complement(this.seq),
id = this.id + " [REVERSED]",
name = this.name
newthis = SeqRecord.SeqRecord(Seq.reverse_complement(self.this.seq),
id = self.this.id + " [REVERSED]",
name = self.this.name
)

return Sequence(this=newthis, reference=self.reference)
return AlignedSequence(this=newthis, reference=self.reference)


def alignment_score(self):
return sum([a==b for a, b in zip(self.get_alignment()[0].seq, self.get_alignment()[1].seq)])

0 comments on commit f950e4e

Please sign in to comment.