This repository has been archived by the owner on Mar 19, 2024. It is now read-only.
forked from ramics/HIVIntact
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
move shared classes to their own source files
- Loading branch information
Showing
7 changed files
with
99 additions
and
79 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from dataclasses import dataclass | ||
|
||
@dataclass | ||
class CandidateORF: | ||
name: str | ||
start: int | ||
end: int | ||
subtype_start: int | ||
subtype_end: int | ||
orientation: str | ||
distance: float | ||
protein: str | ||
aminoacids: str |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from dataclasses import dataclass | ||
from util.reference_index import ReferenceIndex | ||
from util.translate_to_aminoacids import translate_to_aminoacids | ||
from util.get_biggest_protein import get_biggest_protein | ||
|
||
|
||
@dataclass | ||
class ExpectedORF: | ||
name: str | ||
start: int | ||
end: int | ||
deletion_tolerence: int | ||
nucleotides: str | ||
aminoacids: str | ||
protein: str | ||
|
||
|
||
@staticmethod | ||
def subtyped(aligned_sequence, name, start, end, deletion_tolerence): | ||
vpr_defective_insertion_pos = 5772 | ||
start = start if start < vpr_defective_insertion_pos else start - 1 | ||
end = end if end < vpr_defective_insertion_pos else end - 1 | ||
|
||
start_s = ReferenceIndex(start - 1).mapto(aligned_sequence) # decrement is needed because original "start" is 1-based. | ||
end_s = ReferenceIndex(end).mapto(aligned_sequence) | ||
|
||
nucleotides = str(aligned_sequence.this.seq[start_s:end_s]) | ||
aminoacids = translate_to_aminoacids(nucleotides) | ||
has_start_codon = translate_to_aminoacids(aligned_sequence.this.seq[(start - 1):end]).startswith("M") | ||
protein = get_biggest_protein(has_start_codon, aminoacids) | ||
|
||
return ExpectedORF(name=name, | ||
start=start_s, | ||
end=end_s, | ||
deletion_tolerence=deletion_tolerence, | ||
nucleotides=nucleotides, | ||
aminoacids=aminoacids, | ||
protein=protein, | ||
) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
|
||
def get_biggest_protein(has_start_codon, aminoacids): | ||
def skip_to_startcodon(x): | ||
index = x.find("M") | ||
if index >= 0: | ||
return x[index:] | ||
else: | ||
return "" | ||
|
||
parts = aminoacids.split("*") | ||
subparts = [skip_to_startcodon(x) for x in parts] if has_start_codon else parts | ||
longest = max(subparts, key=len) | ||
return longest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from dataclasses import dataclass | ||
|
||
@dataclass | ||
class ReferenceIndex: | ||
value: int | ||
|
||
def mapto(self, aligned): | ||
return aligned.map_index(self.value) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from Bio import Seq | ||
|
||
def translate_to_aminoacids(seq, frame = 0, to_stop = False): | ||
for_translation = seq[frame:] | ||
for_translation += 'N' * ({0: 0, 1: 2, 2: 1}[len(for_translation) % 3]) | ||
return Seq.translate(for_translation, to_stop = to_stop) |