forked from rlabduke/reduce
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdate_het_dict.py
169 lines (149 loc) · 6.08 KB
/
update_het_dict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
from __future__ import division
import os, sys
#This script updates the het dictionary used by Reduce to add hydrogens to het groups
#It obsoletes and replaces update_het_dict.sh
#Outline:
#Download the lasted version of the het_dictionary from the PDB
#Reformat certain columns of the het dictionary
#Edit certain functional groups
#Regularize endlines for Windows
#Set filenames and clen up
#----- Download het dict from PDB -----
import urllib
import time
print >> sys.stderr, "Downloading most recent het dict"
#Download the lasted version of the het_dictionary from the PDB
urllib.urlretrieve('ftp://ftp.wwpdb.org/pub/pdb/data/monomers/het_dictionary.txt', 'reduce_wwPDB_het_dict_download.txt')
#Store time of download for versioning
fetchtime = time.strftime("%Y%m%d_%H%M%S")
#----- End download -----
#----- Adjust columns in het dict -----
def make_pdb3_format_atom(atomstring):
#put atoms into a PDB3-like format
#Note that this reformatting does not distinguish between calcium 'CA ' and
# alpha carbons ' CA '. Both will be rendered as ' CA '.
#No issues are known to result, but the possibility remains.
atomstring = atomstring.strip()
l = len(atomstring)
if l == 4: return atomstring
elif l == 3: return ' '+atomstring
elif l == 2: return ' '+atomstring+' '
elif l == 1: return ' '+atomstring+' '
else:
sys.stdout.write("ERROR: Strange atom name |"+atomstring+"|")
sys.exit()
infile = open("reduce_wwPDB_het_dict_download.txt")
outfile = open("reduce_wwPDB_het_dict_adjusted.txt","w")
print >> sys.stderr, "Adjusting atom columns"
for line in infile:
if line.startswith("CONECT"):
x = line.split()
numconnections = '%5s' % x[2]
connections = x[3:]
atomlist = []
for atom in connections:
atomlist.append(make_pdb3_format_atom(atom))
outlist = [x[0]," ",make_pdb3_format_atom(x[1]),numconnections,' '.join(atomlist).rstrip()]
print >> outfile, ''.join(outlist).strip()
elif line.startswith("RESIDUE"):
x = line.split()
resname = x[1].rjust(3)
members = x[2].strip().rjust(7)
print >> outfile, x[0]+" "+resname+ members
elif line.startswith("END"):
print >> outfile, line.strip()," "
elif line.startswith("HETNAM") or line.startswith("HETSYN") or line.startswith("FORMUL"):
outfile.write(line)
else:
print >> outfile, line.strip()
infile.close()
outfile.close()
#----- End adjustment -----
#----- Remove H from Phosphate OH -----
#The default het dict contains some hydrogens that are not present at
# physiological conditions. These hydrogens are usually present to make the
# ligands charge-neutral.
#The most prevalent of these is the H on phosphate OH.
#Here we comment out those hydrogens so Reduce will not add them.
import re
class record():
def __init__(self):
self.lines = []
self.has_O = False
self.has_P = False
def addline(self, line):
self.lines.append(line)
def output_record(self,outfile):
for line in self.lines:
outfile.write(line)
outfile.write("\n")
def remove_OH_on_P(self):
line_index = 0
hydrogens_to_remove = []
while line_index < len(self.lines):
line = self.lines[line_index]
if line.startswith("CONECT"):
parent_atom = line[11:15].strip()
number_atoms = line[16:20].strip()
connected_atoms = line[20:].split()
if parent_atom in hydrogens_to_remove:
self.lines[line_index] = "xx"+line #assumes hydrogen always comes after oxygen
elif parent_atom.startswith("O") and number_atoms=="2": #need 2 connected atoms
atom_1 = connected_atoms[0].strip()
atom_2 = connected_atoms[1].strip()
if atom_1.startswith("P") and atom_2.startswith("H"):
hydrogens_to_remove.append(atom_2)
#matches pattern for a phosphate oxygen, need to comment out the attached hydrogen
line_index += 1
def process(self, formul_line, outfile):
phosphate_pattern = re.compile('(FORMUL)(.........)(.*H[1-9])(.*O[1-9])(.*P[1-9])')
if phosphate_pattern.match(formul_line):
self.remove_OH_on_P()
self.output_record(outfile)
infile = open("reduce_wwPDB_het_dict_adjusted.txt")
outfile = open("reduce_wwPDB_het_dict_no_poh.txt","w")
print >> sys.stderr, "Removing phosphate hydorgens"
residue_line_pattern = re.compile('(RESIDUE)(...)(?P<resname>...)')
formula_line_pattern = re.compile('FORMUL')
for line in infile:
if residue_line_pattern.match(line):
#resname = residue_line_pattern.match(line).group('resname')
rec = record()
rec.addline(line)
elif formula_line_pattern.match(line):
rec.addline(line)
rec.process(line, outfile)
else:
rec.addline(line)
infile.close()
outfile.close()
#----- End remove H from POH -----
#----- Regularized endlines for Windows -----
#Swap newline characters for Windows-compatible ones to ensure usability on Win
infile = open("reduce_wwPDB_het_dict_no_poh.txt")
outfile = open("reduce_wwPDB_het_dict_endlines.txt","w")
print >> sys.stderr, "Switching to Windows endlines"
lines = infile.readlines()
for line in lines:
line=line.replace("\n","")
outfile.write("%s\r\n" % line)
infile.close()
outfile.close()
#----- End regularize endlines -----
#----- Rename and remove files -----
#The default path Reduce uses to find the het dict file can be checked by
# invoking reduce -h and looking under the -DB flag.
#Generally, Reduce is looking for a file named reduce_wwPDB_het_dict.txt.
#Renaming the updated file is renamed to match that name allows it to be used
# automatically in most circumstances.
#The previous file is renamed with a datestamp.
print >> sys.stderr, "File cleanup"
existing_files = os.listdir(".")
if "reduce_wwPDB_het_dict.txt" in existing_files:
print >> sys.stderr, " The previous het dict was renamed to reduce_wwPDB_het_dict_obsoleted_"+fetchtime+".txt"
os.rename("reduce_wwPDB_het_dict.txt","reduce_wwPDB_het_dict_obsoleted_"+fetchtime+".txt")
os.rename("reduce_wwPDB_het_dict_endlines.txt","reduce_wwPDB_het_dict.txt")
os.remove("reduce_wwPDB_het_dict_download.txt")
os.remove("reduce_wwPDB_het_dict_adjusted.txt")
os.remove("reduce_wwPDB_het_dict_no_poh.txt")
#----- End rename and remove -----