-
Notifications
You must be signed in to change notification settings - Fork 0
/
protein_seq.py
152 lines (122 loc) · 4.62 KB
/
protein_seq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from amino_acid import AminoAcid
from Bio import PDB
class Protein:
r"""
Class name: Protein
Description: A Protein object is a list of AminoAcid objects. It is convenient for mass operations on AminoAcids.
Variables:
self.pdb_id: PDB ID of the protein
self.seqdict: A dictionary with keys being chain ids and values being lists of AminoAcids.
"""
def __init__(self, pdb_id: str):
r"""
Object constructor.
:param pdb_id: PDB ID of the protein
"""
self._pdb_id = pdb_id
self._seqdict = {}
with open(f"{self._pdb_id}.pdb", "r") as file:
line = file.readline()
while not line.startswith("ATOM"):
line = file.readline()
while line.startswith("ATOM") or line.startswith("TER"):
chainID = line.split()[4]
self._seqdict[chainID] = []
while line.startswith("ATOM"):
order = line.split()[5]
aa = AminoAcid(num=int(line.split()[5]), chain_id=chainID, aa=line.split()[3])
self._seqdict[chainID].append(aa)
while line.startswith("ATOM") and line.split()[5] == order:
line = file.readline()
if line.startswith("TER"):
line = file.readline()
def get_seq(self, chainID: str):
str = ""
for i in self._seqdict[chainID]:
str += i.get_aa()
return str
def get_seq_fasta(self, chainID: str):
seq = ""
for i in self._seqdict[chainID]:
seq += i.get_aa()
with open(f"{self._pdb_id}_{chainID}_SEQ.fasta", "w") as f:
f.write(f">{self._pdb_id}\n")
f.write(seq)
def display(self):
r"""
Print the Protein object
:return: N/A
"""
print("ORDER | CHAINID | NAME | MEM | SOLEX | CONS | SECSTRUCT")
for i in self._seqdict:
for j in self._seqdict[i]:
j.aa_display()
def get_length(self) -> int:
r"""
Get the number of residues stored in the Protein object.
:return: the number of residues stored in the protein object
"""
length = 0
for i in self._seqdict:
length += len(self._seqdict[i])
return length
def get_chain_length(self, chainID: str):
r"""
Get the number of residues stored in a particular chain.
:param chainID: chain identifier
:return: the number of residues stored in the chain
"""
return len(self._seqdict[chainID])
def check_dssp(self):
r"""
Reads the .dssp file and sets secondary structures for each AminoAcid.
:return: N/A
"""
with open(f"{self._pdb_id}.dssp", "r") as file:
line = file.readline()
while not line.startswith(" # RESIDUE AA STRUCTURE"):
line = file.readline()
line = file.readline()
while line != "":
chainID = line.split()[2]
idx = 0
while line != "" and line.split()[2] == chainID:
self._seqdict[line.split()[2]][idx].set_solex(int(line[35:38].replace(" ", "")))
if line.split()[4] in "HBEGITS":
self._seqdict[line.split()[2]][idx].set_secstruct(line.split()[4])
else:
self._seqdict[line.split()[2]][idx].set_secstruct("n")
idx += 1
line = file.readline()
if line != "":
line = file.readline()
else:
break
def check_mem(self, chainID: str):
r"""
TO BE COMPLETED
Sets membrane affiliation for each AminoAcid
:return: N/A
"""
with open(f"{self._pdb_id}_{chainID}_MEM.txt", "r") as file:
line = file.readline()
while not line.startswith("TOPCONS predicted topology"):
line = file.readline()
line = file.readline().strip()
for i in range(len(line)):
self._seqdict[chainID][i].set_mem(line[i])
def check_cons(self, chianID: str):
r"""
TO BE COMPLETED
Sets conservation scores for each AminoAcid
:return: N/A
"""
return 0
def result(self) -> str:
r"""
Display all information in the Protein in a text file.
:return: the path to which the file is stored
"""
with open(f"{self._pdb_id}_SUMMARY.txt", "w") as out:
out.write("")
return ""