-
Notifications
You must be signed in to change notification settings - Fork 20
/
waters.py
82 lines (71 loc) · 2.36 KB
/
waters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
'''
Calculates the average number of water contacts for each residue across the trajectory.
For each residue the waters are selected in a specified cutoff distance and then the counts are averaged
for that residue.
Output:
a .csv file with indexed by resid including residue names and the average water contacts
e.g.
resid | resname | waters
1 LEU | 2.6
Running the script
python waters.py MDAnalysis_supported_topolgy MDAnalysis_supported_trajectory csv_output_name.csv
e.g.
python contacts.py FXN_R165N_complex.prmtop FXN_R165N_complex1.dcd waters1.csv
'''
import sys
import MDAnalysis
import pandas as pd
from pandas import DataFrame
import numpy as np # numpy 1.16.6
from MDAnalysis.analysis import distances
import multiprocessing
'''
Load trajectory into curr_traj
'''
def getTraj():
global dcd
global curr_traj # must be global to work with multiprocessing
top = str(sys.argv[1])
dcd = str(sys.argv[2])
traj = MDAnalysis.Universe(top, dcd)
curr_traj = traj
def get_res_waters(traj,cutoff=2):
'''Given an mdanalysis trajectory and contact distance cutoff,
count the number of water molecules within cutoff of each residue'''
protein = traj.select_atoms('protein')
resids = np.unique(protein.resids)
ret = []
for rid in resids:
wat = traj.select_atoms('resname WAT and around %f resid %d'%(cutoff,rid))
ret.append((rid,len(wat)))
return pd.DataFrame(ret,columns=('resid','waters'))
'''
calls get_res_contacts and returns numpy area of contacts for that frame
'''
def traj_frame_waters(i):
curr_traj.trajectory[i]
rc = get_res_waters(curr_traj)
return rc.waters.to_numpy()
'''
Creates and returns the final dataframe with water contacts averaged for each residue over the
entire trajectory
'''
def waters_df():
waters = pd.DataFrame()
protein = curr_traj.select_atoms('protein')
waters['resid'] = protein.residues.resids
waters['resname'] = protein.residues.resnames
print("Calculating waters...")
pool = multiprocessing.Pool()
traj_waters = pool.map(traj_frame_waters,range(curr_traj.trajectory.n_frames))
traj_waters = np.array(traj_waters)
waters['waters'] = (traj_waters.mean(axis=0))
return waters.set_index('resid')
if __name__ == '__main__' :
print('Loading trajectories...')
getTraj()
print('Calculating waters...')
waters = waters_df()
print('Outputing csv...')
waters.to_csv(sys.argv[3])
print('Done.')