forked from haddocking/pdb-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pdb_join.py
executable file
·103 lines (81 loc) · 2.97 KB
/
pdb_join.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python
"""
Joins several conformations of the same structure in one ensemble file.
usage: python pdb_join.py <pdb file> <pdb file> ...
example: python pdb_join.py 1CTF_01.pdb 1CTF_02.pdb
Author: {0} ({1})
This program is part of the PDB tools distributed with HADDOCK
or with the HADDOCK tutorial. The utilities in this package
can be used to quickly manipulate PDB files, with the benefit
of 'piping' several different commands. This is a rewrite of old
FORTRAN77 code that was taking too much effort to compile. RIP.
"""
import os
import re
import sys
__author__ = "Joao Rodrigues"
__email__ = "[email protected]"
USAGE = __doc__.format(__author__, __email__)
def check_input(args):
"""Checks whether to read from stdin/file and validates user input/options."""
if len(args) < 2:
sys.stderr.write(USAGE)
sys.exit(1)
for pdb_f in args:
if not os.path.isfile(pdb_f):
sys.stderr.write('File could not be read: ' + pdb_f + '\n')
sys.exit(1)
return args
def _build_atom_unique_id(atom_line):
"""Returns a unique identifying tuple from an ATOM line"""
# unique_id: (name, altloc, resi, insert, chain, segid)
unique_id = (atom_line[12:16], atom_line[16], int(atom_line[22:26]), atom_line[26], atom_line[21], atom_line[72:76].strip())
return unique_id
def _join_pdb(pdb_list):
"""Enclosing logic in a function"""
auid = _build_atom_unique_id
coord_re = re.compile('^(ATOM|HETATM)')
pdb_list = pdb_list
atom_set = set()
pdb_data, remarks = [], []
for i_model, pdb_f in enumerate(pdb_list, start=1):
model_auids = set()
remarks.append('REMARK {0}'.format(os.path.basename(pdb_f)))
pdb_data.append('MODEL {0:>4d}'.format(i_model))
with open(pdb_f, 'r') as handle:
for line in handle:
line = line.strip('\n')
if line.startswith('TER'):
pdb_data.append('TER')
continue
if coord_re.match(line):
pdb_data.append(line)
model_auids.add(auid(line))
pdb_data.append('ENDMDL')
if atom_set:
diff = atom_set - model_auids
if diff:
emsg = 'Different atoms in model {0}\n'.format(i_model)
sys.stderr.write(emsg)
sys.exit(1)
else:
atom_set = model_auids
pdb_data.append('END')
pdb_data = remarks + pdb_data
return pdb_data
if __name__ == '__main__':
# Check Input
pdb_list = check_input(sys.argv[1:])
# Do the job
ensemble = _join_pdb(pdb_list)
try:
sys.stdout.write('\n'.join(ensemble))
sys.stdout.flush()
except IOError:
# This is here to catch Broken Pipes
# for example to use 'head' or 'tail' without
# the error message showing up
pass
# last line of the script
# We can close it even if it is sys.stdin
sys.exit(0)