Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
factor out reading of HXB2 ORFs
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Sep 22, 2023
1 parent c399add commit 3b24f43
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 18 deletions.
26 changes: 14 additions & 12 deletions intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,10 +638,16 @@ def write(self, sequence, is_intact, orfs, errors, holistic):
for error in errors:
self.errors_writer.writerow([error[key] for key in self.errors_header])

def strip_sequence_dashes(seq):
return SeqRecord.SeqRecord(
Seq.Seq(str(seq.seq).replace("-","").replace("\n", "")),
id = seq.id, name = seq.name)
def read_hxb2_orfs(aligned_subtype, orfs):
for (name, start, end, delta) in orfs:
vpr_defective_insertion_pos = 5772
start = start if start < vpr_defective_insertion_pos else start - 1
end = end if end < vpr_defective_insertion_pos else end - 1

# decrement is needed because original coordinates are 1-based.
start = start - 1

yield ExpectedORF.subtyped(aligned_subtype, name, start, end, delta)

def intact( working_dir,
input_file,
Expand Down Expand Up @@ -677,7 +683,7 @@ def intact( working_dir,
subtype_choices = {}
with open(st.alignment_file(subtype), 'r') as in_handle:
for sequence in SeqIO.parse(in_handle, "fasta"):
subtype_choices[sequence.id] = strip_sequence_dashes(sequence)
subtype_choices[sequence.id] = sequence

def analyse_single_sequence(holistic, sequence, blast_rows):
sequence_errors = []
Expand Down Expand Up @@ -724,13 +730,9 @@ def analyse_single_sequence(holistic, sequence, blast_rows):
sequence = aligned_sequence.this

# convert ORF positions to appropriate subtype
forward_orfs, reverse_orfs, small_orfs = [
[
ExpectedORF.subtyped(aligned_subtype, n, s, e, delta) \
for (n, s, e, delta) in orfs
] \
for orfs in [hxb2_forward_orfs, hxb2_reverse_orfs, hxb2_small_orfs]
]
forward_orfs, reverse_orfs, small_orfs = \
[list(read_hxb2_orfs(aligned_subtype, orfs)) \
for orfs in [hxb2_forward_orfs, hxb2_reverse_orfs, hxb2_small_orfs]]

holistic.orfs_start = min(forward_orfs, key=lambda e: e.start).start
holistic.orfs_end = max(forward_orfs, key=lambda e: e.end).end
Expand Down
8 changes: 2 additions & 6 deletions util/expected_orf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,12 @@ class ExpectedORF:

@staticmethod
def subtyped(aligned_sequence, name, start, end, deletion_tolerence):
vpr_defective_insertion_pos = 5772
start = start if start < vpr_defective_insertion_pos else start - 1
end = end if end < vpr_defective_insertion_pos else end - 1

start_s = ReferenceIndex(start - 1).mapto(aligned_sequence) # decrement is needed because original "start" is 1-based.
start_s = ReferenceIndex(start).mapto(aligned_sequence)
end_s = ReferenceIndex(end).mapto(aligned_sequence)

nucleotides = str(aligned_sequence.this.seq[start_s:end_s])
aminoacids = translate_to_aminoacids(nucleotides)
has_start_codon = translate_to_aminoacids(aligned_sequence.this.seq[(start - 1):end]).startswith("M")
has_start_codon = translate_to_aminoacids(aligned_sequence.this.seq[start:end]).startswith("M")
protein = get_biggest_protein(has_start_codon, aminoacids)

return ExpectedORF(name=name,
Expand Down

0 comments on commit 3b24f43

Please sign in to comment.