-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpdb_format.py
executable file
·152 lines (120 loc) · 4.61 KB
/
pdb_format.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python
# PDB Helix extracting stuff
global atom_defs_by_id
global sheet_defs
global helix_defs
global res_atoms_by_resid
global sorted_residues
# Atoms look like: ('symbol' is chemical, eg 'C', 'N', 'O' etc
# (atomid, resid, (x, y, z), symbol)
atom_defs_by_id = {}
sheet_defs = []
helix_defs = []
# atom looks the same here:
# (atomid, resid, (x, y, z), symbol)
res_atoms_by_resid = {}
# sorted_residues look like: (resid, (x,y,z))
sorted_residues = []
# 1-based, to make easier to match to spec
def _text_at(line, start, end):
text = line[start - 1:end - 1 + 1]
return text
# 1-based, to make easier to match to spec
def _tok_at(line, start, end):
# print "line: %s, (%s)%d - (%s)%d" % (line, type(start), start, type(end), end)
return line[start - 1: end - 1 + 1].strip()
# 1-based, to make easier to match to spec
def _float_at(line, start, end):
# print "line:\n%s, %d - %d" % (line, start, end)
return float(_tok_at(line, start, end))
# 1-based, to make easier to match to spec
def _int_at(line, start, end):
return int(_tok_at(line, start, end))
def residue_index(res0, res1):
(first, last) = (-1, -1)
# print "res0, res1: ", res0, res1
# print "r[0]: ", sorted_residues[0][0]
# find index of res0, res1
first = map(lambda r: r[0], sorted_residues).index(res0)
last = map(lambda r: r[0], sorted_residues).index(res1)
# print "fst, lst: ", first, last
# for x in range(first, last + 1):
# print "res[", x, "]", sorted_residues[x][0]
return (first, last)
# Keep chainno separate from resno
def _make_resid(chainno, resno):
# print "chainno: ", chainno, "resno:", resno
return "%3.3s:%5.5s" % (chainno, resno)
#SHEET 2 A 5 ILE 1 184 ARG 1 190 -1 N LEU 1 186 O ILE 1 223
def _load_helix_def(line):
chain = _tok_at(line, 20, 20)
start_res_no = _tok_at(line, 23, 26)
start_res_id = _make_resid(chain, start_res_no)
end_res_no = _tok_at(line, 34, 37)
end_res_id = _make_resid(chain, end_res_no)
return (start_res_id, end_res_id)
def _load_sheet_def(line):
chain = _tok_at(line, 22, 22)
start_res_no = _tok_at(line, 23, 26)
start_res_id = _make_resid(chain, start_res_no)
end_res_no = _tok_at(line, 34, 37)
end_res_id = _make_resid(chain, end_res_no)
return (start_res_id, end_res_id)
# (atomid, resid, (x, y, z), symbol)
def _load_atom_def(line):
atomid = _tok_at(line, 7, 11)
# chain no, residue within that chain
chain = _tok_at(line, 22, 22)
resno = _tok_at(line, 23, 26)
# ... not sure this id is unique!
resid = _make_resid(chain, resno)
x = _float_at(line, 31, 38)
y = _float_at(line, 39, 46)
z = _float_at(line, 47, 54)
# where the atomic symbol is supposed to be
symbol = _text_at(line, 77, 78).strip()
# digits instead of characters
if (symbol[0]).isdigit():
# secondary place to get atomic symbol
symbol = _text_at(line, 14, 14)
return (atomid, resid, (x, y, z), symbol)
# manipulates globals
def load_pdb(file):
nHelixLines = 0
nSheetLines = 0
nAtoms = 0
for line in file:
if line.startswith("HELIX"):
nHelixLines += 1
tup = _load_helix_def(line)
helix_defs.append(tup)
elif line.startswith("SHEET"):
nSheetLines += 1
tup = _load_sheet_def(line)
sheet_defs.append(tup)
elif line.startswith("ATOM"):
nAtoms += 1
tup = _load_atom_def(line)
# atom id, straight 1 - N index (in tup[0])
atom_defs_by_id[tup[0]] = tup
# tup[1] is a non-unique residue id
res_atoms_by_resid.setdefault(tup[1], []).append(tup)
# print >> sys.stderr, "pdb: %d atoms, %d helix lines, %d sheet lines" % (nAtoms, nHelixLines, nSheetLines)
_process_residues()
# looks like it just finds center of mass
def _process_residues():
(xmin,ymin,zmin,xmax,ymax,zmax) = (1e38,1e38,1e38,-1e38,-1e38,-1e38)
global sorted_residues
for resid, atoms in res_atoms_by_resid.items():
num_atoms = len(atoms)
# x,y,z = center of mass it looks like
(x, y, z) = (0.0, 0.0, 0.0)
for a in atoms:
x += a[2][0]; y += a[2][1]; z += a[2][2]
x /= num_atoms; y /= num_atoms; z /= num_atoms
xmax = max(x, xmax) ; ymax = max(y, ymax) ; zmax = max(z, zmax)
xmin = min(x, xmin) ; ymin = min(y, ymin) ; zmin = min(z, zmin)
sorted_residues.append((resid, (x, y, z)))
# sort by id ([0])
sorted_residues.sort(key=lambda r: r[0])
# print >> sys.stderr, "min res: %f, %f, %f; max res: %f, %f, %f" % (xmin, ymin, zmin, xmax, ymax, zmax)