Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
factor out reading of hbx2 orfs coordinates
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Sep 22, 2023
1 parent cef7223 commit 7c4eede
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
19 changes: 12 additions & 7 deletions intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,15 @@ def strip_sequence_dashes(seq):
Seq.Seq(str(seq.seq).replace("-","").replace("\n", "")),
id = seq.id, name = seq.name)

def read_hxb2_orfs(aligned_subtype, orfs):
for (name, start, end, delta) in orfs:
vpr_defective_insertion_pos = 5771
start = start - 1 # Decrement is needed because the original "start" is 1-based.
start = start if start < vpr_defective_insertion_pos else start - 1
end = end if end < vpr_defective_insertion_pos else end - 1

yield ExpectedORF.subtyped(aligned_subtype, name, start, end, delta)

def intact( working_dir,
input_file,
subtype,
Expand Down Expand Up @@ -724,13 +733,9 @@ def analyse_single_sequence(holistic, sequence, blast_rows):
sequence = aligned_sequence.this

# convert ORF positions to appropriate subtype
forward_orfs, reverse_orfs, small_orfs = [
[
ExpectedORF.subtyped(aligned_subtype, n, s, e, delta) \
for (n, s, e, delta) in orfs
] \
for orfs in [hxb2_forward_orfs, hxb2_reverse_orfs, hxb2_small_orfs]
]
forward_orfs, reverse_orfs, small_orfs = \
[list(read_hxb2_orfs(aligned_subtype, orfs)) \
for orfs in [hxb2_forward_orfs, hxb2_reverse_orfs, hxb2_small_orfs]]

holistic.orfs_start = min(forward_orfs, key=lambda e: e.start).start
holistic.orfs_end = max(forward_orfs, key=lambda e: e.end).end
Expand Down
6 changes: 1 addition & 5 deletions util/expected_orf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,7 @@ class ExpectedORF:

@staticmethod
def subtyped(aligned_sequence, name, start, end, deletion_tolerence):
vpr_defective_insertion_pos = 5772
start = start if start < vpr_defective_insertion_pos else start - 1
end = end if end < vpr_defective_insertion_pos else end - 1

start_s = ReferenceIndex(start - 1).mapto(aligned_sequence) # decrement is needed because original "start" is 1-based.
start_s = ReferenceIndex(start).mapto(aligned_sequence)
end_s = ReferenceIndex(end).mapto(aligned_sequence)

nucleotides = str(aligned_sequence.this.seq[start_s:end_s])
Expand Down

0 comments on commit 7c4eede

Please sign in to comment.