forked from prody/ProDy
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request prody#1903 from jamesmkrieger/streamline_fix_filter
Streamline fixer and water filter for multiple structures
- Loading branch information
Showing
3 changed files
with
163 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,12 +19,16 @@ | |
__email__ = ['[email protected]', '[email protected]'] | ||
|
||
from prody import LOGGER | ||
from numbers import Integral, Number | ||
|
||
__all__ = ['addMissingAtoms'] | ||
__all__ = ['addMissingAtoms', 'fixStructuresMissingAtoms'] | ||
|
||
def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): | ||
"""Function will add hydrogens to the protein and ligand structure using Openbabel [NO11]_ | ||
or PDBFixer with OpenMM. | ||
"""This function will add hydrogens to the protein and ligand structure using Openbabel [NO11]_ | ||
or PDBFixer with OpenMM. | ||
There are also options whether to *model_residues* (default False), *remove_heterogens* | ||
(default False), *keep_waters* (default True), *overwrite* (default False). | ||
:arg infile: PDB file name | ||
:type infile: str | ||
|
@@ -53,11 +57,39 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): | |
model_residues = kwargs.get("model_residues", False) | ||
remove_heterogens = kwargs.get("remove_heterogens", False) | ||
keep_water = kwargs.get("keep_water", True) | ||
overwrite = kwargs.get("overwrite", False) | ||
|
||
import os | ||
|
||
if not isinstance(model_residues, bool): | ||
raise TypeError('model_residues should be True or False') | ||
|
||
if not isinstance(remove_heterogens, bool): | ||
raise TypeError('remove_heterogens should be True or False') | ||
|
||
if not isinstance(keep_water, bool): | ||
raise TypeError('keep_water should be True or False') | ||
|
||
if not isinstance(overwrite, bool): | ||
raise TypeError('overwrite should be True or False') | ||
|
||
if not isinstance(infile, str): | ||
raise TypeError('infile should be a string pointing to a file') | ||
|
||
if not os.path.exists(infile): | ||
raise ValueError('infile {0} does not exist'.format(infile)) | ||
|
||
if not isinstance(pH, Number): | ||
raise TypeError('pH should be a number') | ||
|
||
if outfile == None: | ||
outfile = os.path.join(os.path.split(infile)[0], "addH_" + os.path.split(infile)[1]) | ||
outfile = os.path.join(os.path.split(infile)[0], | ||
"addH_" + os.path.split(infile)[1]) | ||
|
||
if os.path.exists(outfile) and not overwrite: | ||
LOGGER.warn('outfile {0} already exists, so returning it. \ | ||
Set overwrite=True to overwrite it'.format(outfile)) | ||
return outfile | ||
|
||
if outfile == infile: | ||
raise ValueError('outfile cannot be the same as infile') | ||
|
@@ -70,17 +102,18 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): | |
raise ValueError('Openbabel cannot handle cif files') | ||
|
||
try: | ||
#import openbabel | ||
from openbabel import openbabel | ||
obconversion = openbabel.OBConversion() | ||
obconversion.SetInFormat("pdb") | ||
mol = openbabel.OBMol() | ||
obconversion.ReadFile(mol, infile) | ||
mol.AddHydrogens() | ||
obconversion.WriteFile(mol, outfile) | ||
LOGGER.info("Hydrogens were added to the structure. Structure {0} is saved in the local directry.".format(outfile)) | ||
except ImportError: | ||
raise ImportError("Install Openbabel to add hydrogens to the structure or use PDBFixer/OpenMM.") | ||
|
||
obconversion = openbabel.OBConversion() | ||
obconversion.SetInFormat("pdb") | ||
mol = openbabel.OBMol() | ||
obconversion.ReadFile(mol, infile) | ||
mol.AddHydrogens() | ||
obconversion.WriteFile(mol, outfile) | ||
LOGGER.info("Hydrogens were added to the structure. Structure {0} is saved in the local directry.".format(outfile)) | ||
|
||
|
||
elif method == 'pdbfixer': | ||
try: | ||
|
@@ -115,3 +148,50 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): | |
return outfile | ||
|
||
|
||
def fixStructuresMissingAtoms(infiles, method='openbabel', pH=7.0, outfiles=None, **kwargs): | ||
"""This function will add hydrogens to the protein and ligand structure from a set of files | ||
using Openbabel [NO11]_ or PDBFixer with OpenMM. | ||
There are also options whether to *model_residues* (default False), *remove_heterogens* | ||
(default False) and *keep_waters* (default True). | ||
:arg infiles: a list of PDB file names | ||
:type infile: list | ||
:arg method: Name of program which will be use to fix protein structure. | ||
Two alternative options are available: 'openbabel' and 'pdbfixer'. | ||
For either option additional software need to be installed: | ||
'openbabel': OpenBabel | ||
'pdbfixer': PDBFixer and OpenMM | ||
default is 'openbabel' | ||
:type method: str | ||
:arg pH: pH value applyed only for PDBfixer. | ||
:type pH: int, float | ||
Instalation of Openbabel: | ||
conda install -c conda-forge openbabel | ||
Find more information here: https://anaconda.org/conda-forge/openbabel | ||
https://github.com/openmm/pdbfixer | ||
Program will create new file in the same directory with 'addH_' prefix. | ||
.. [NO11] O'Boyle, N. M., Banck M., James C. A., Morley C., Vandermeersch T., Hutchison G. R. | ||
Open Babel: An open chemical toolbox *Journal of cheminformatics* **2011** 3:1-14. """ | ||
|
||
if not isinstance(infiles, list): | ||
raise TypeError('infiles should be a list') | ||
|
||
if outfiles is None: | ||
outfiles = [None for infile in infiles] | ||
|
||
if not isinstance(outfiles, list): | ||
raise TypeError('outfiles should be None or a list') | ||
if len(outfiles) != len(infiles): | ||
raise ValueError('outfiles should have the same length as infiles') | ||
|
||
results = [] | ||
for i, infile in enumerate(infiles): | ||
results.append(addMissingAtoms(infile, method, pH, | ||
outfiles[i], **kwargs)) | ||
return results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
__email__ = ['[email protected]', '[email protected]'] | ||
|
||
import numpy as np | ||
import os | ||
|
||
from itertools import combinations | ||
from collections import deque | ||
|
@@ -28,7 +29,8 @@ | |
'calcWaterBridgesStatistics', 'getWaterBridgeStatInfo', 'calcWaterBridgeMatrix', 'showWaterBridgeMatrix', | ||
'calcBridgingResiduesHistogram', 'calcWaterBridgesDistribution', | ||
'savePDBWaterBridges', 'savePDBWaterBridgesTrajectory', | ||
'saveWaterBridges', 'parseWaterBridges', 'findClusterCenters'] | ||
'saveWaterBridges', 'parseWaterBridges', 'findClusterCenters', | ||
'filterStructuresWithoutWater'] | ||
|
||
|
||
class ResType(Enum): | ||
|
@@ -1122,8 +1124,8 @@ def findClusterCenters(file_pattern, **kwargs): | |
removeCoords = [] | ||
for ii in range(len(coords_all)): | ||
sel = coords_all.select('water within '+str(distC)+' of center', | ||
center=coords_all.getCoords()[ii]) | ||
if len(sel) <= int(numC): | ||
center=coords_all.getCoords()[ii]) | ||
if sel is not None and len(sel) <= int(numC): | ||
removeResid.append(coords_all.getResnums()[ii]) | ||
removeCoords.append(list(coords_all.getCoords()[ii])) | ||
|
||
|
@@ -1149,3 +1151,67 @@ def findClusterCenters(file_pattern, **kwargs): | |
filename = 'clusters.pdb' | ||
writePDB(filename, selectedWaters) | ||
LOGGER.info("Results are saved in {0}.".format(filename)) | ||
|
||
def filterStructuresWithoutWater(structures, min_water=0, filenames=None): | ||
"""This function will filter out structures from *structures* that have no water | ||
or fewer water molecules than *min_water*. | ||
:arg structures: list of :class:`.Atomic` structures to be filtered | ||
:type structures: list | ||
:arg min_water: minimum number of water O atoms, | ||
default is 0 | ||
:type min_water: int | ||
:arg filenames: an optional list of filenames to filter too | ||
This is an output argument | ||
:type filenames: list | ||
""" | ||
|
||
if not isinstance(structures, list): | ||
raise TypeError('structures should be a list') | ||
|
||
if not np.alltrue([isinstance(struct, Atomic) for struct in structures]): | ||
raise ValueError('elements of structures should be Atomic objects') | ||
|
||
if not isinstance(min_water, int): | ||
raise TypeError('min_water should be an integer') | ||
|
||
if filenames is None: filenames = [] | ||
|
||
if not isinstance(filenames, list): | ||
raise TypeError('filenames should be None or a list') | ||
|
||
if len(filenames) not in [0, len(structures)]: | ||
raise TypeError('filenames should have the same length as structures') | ||
|
||
if not np.alltrue([isinstance(filename, str) for filename in filenames]): | ||
raise ValueError('elements of filenames should be strings') | ||
|
||
if not np.alltrue([os.path.exists(filename) for filename in filenames]): | ||
raise ValueError('at least one of the filenames does not exist') | ||
|
||
have_filenames = len(filenames)>0 | ||
|
||
new_structures = [] | ||
numStructures = len(structures) | ||
for i, struct in enumerate(reversed(structures)): | ||
title = struct.getTitle() | ||
waters = struct.select('water and name O') | ||
|
||
if waters == None: | ||
LOGGER.warn(title+" doesn't contain water molecules") | ||
if have_filenames: | ||
filenames.pop(numStructures-i-1) | ||
continue | ||
|
||
numWaters = waters.numAtoms() | ||
if numWaters < min_water: | ||
LOGGER.warn(title+" doesn't contain enough water molecules ({0})".format(numWaters)) | ||
if have_filenames: | ||
filenames.pop(numStructures-i-1) | ||
continue | ||
|
||
new_structures.append(struct) | ||
|
||
return list(reversed(new_structures)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters