diff --git a/prody/proteins/fixer.py b/prody/proteins/fixer.py index 26114bcbb..ad30aa731 100644 --- a/prody/proteins/fixer.py +++ b/prody/proteins/fixer.py @@ -19,12 +19,16 @@ __email__ = ['karolamik@fizyka.umk.pl', 'jamesmkrieger@gmail.com'] from prody import LOGGER +from numbers import Integral, Number -__all__ = ['addMissingAtoms'] +__all__ = ['addMissingAtoms', 'fixStructuresMissingAtoms'] def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): - """Function will add hydrogens to the protein and ligand structure using Openbabel [NO11]_ - or PDBFixer with OpenMM. + """This function will add hydrogens to the protein and ligand structure using Openbabel [NO11]_ + or PDBFixer with OpenMM. + + There are also options whether to *model_residues* (default False), *remove_heterogens* + (default False), *keep_waters* (default True), *overwrite* (default False). :arg infile: PDB file name :type infile: str @@ -53,11 +57,39 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): model_residues = kwargs.get("model_residues", False) remove_heterogens = kwargs.get("remove_heterogens", False) keep_water = kwargs.get("keep_water", True) + overwrite = kwargs.get("overwrite", False) import os + if not isinstance(model_residues, bool): + raise TypeError('model_residues should be True or False') + + if not isinstance(remove_heterogens, bool): + raise TypeError('remove_heterogens should be True or False') + + if not isinstance(keep_water, bool): + raise TypeError('keep_water should be True or False') + + if not isinstance(overwrite, bool): + raise TypeError('overwrite should be True or False') + + if not isinstance(infile, str): + raise TypeError('infile should be a string pointing to a file') + + if not os.path.exists(infile): + raise ValueError('infile {0} does not exist'.format(infile)) + + if not isinstance(pH, Number): + raise TypeError('pH should be a number') + if outfile == None: - outfile = os.path.join(os.path.split(infile)[0], "addH_" + os.path.split(infile)[1]) + outfile = os.path.join(os.path.split(infile)[0], + "addH_" + os.path.split(infile)[1]) + + if os.path.exists(outfile) and not overwrite: + LOGGER.warn('outfile {0} already exists, so returning it. \ +Set overwrite=True to overwrite it'.format(outfile)) + return outfile if outfile == infile: raise ValueError('outfile cannot be the same as infile') @@ -70,17 +102,18 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): raise ValueError('Openbabel cannot handle cif files') try: - #import openbabel from openbabel import openbabel - obconversion = openbabel.OBConversion() - obconversion.SetInFormat("pdb") - mol = openbabel.OBMol() - obconversion.ReadFile(mol, infile) - mol.AddHydrogens() - obconversion.WriteFile(mol, outfile) - LOGGER.info("Hydrogens were added to the structure. Structure {0} is saved in the local directry.".format(outfile)) except ImportError: raise ImportError("Install Openbabel to add hydrogens to the structure or use PDBFixer/OpenMM.") + + obconversion = openbabel.OBConversion() + obconversion.SetInFormat("pdb") + mol = openbabel.OBMol() + obconversion.ReadFile(mol, infile) + mol.AddHydrogens() + obconversion.WriteFile(mol, outfile) + LOGGER.info("Hydrogens were added to the structure. Structure {0} is saved in the local directry.".format(outfile)) + elif method == 'pdbfixer': try: @@ -115,3 +148,50 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): return outfile +def fixStructuresMissingAtoms(infiles, method='openbabel', pH=7.0, outfiles=None, **kwargs): + """This function will add hydrogens to the protein and ligand structure from a set of files + using Openbabel [NO11]_ or PDBFixer with OpenMM. + + There are also options whether to *model_residues* (default False), *remove_heterogens* + (default False) and *keep_waters* (default True). + + :arg infiles: a list of PDB file names + :type infile: list + + :arg method: Name of program which will be use to fix protein structure. + Two alternative options are available: 'openbabel' and 'pdbfixer'. + For either option additional software need to be installed: + 'openbabel': OpenBabel + 'pdbfixer': PDBFixer and OpenMM + default is 'openbabel' + :type method: str + + :arg pH: pH value applyed only for PDBfixer. + :type pH: int, float + + Instalation of Openbabel: + conda install -c conda-forge openbabel + + Find more information here: https://anaconda.org/conda-forge/openbabel + https://github.com/openmm/pdbfixer + Program will create new file in the same directory with 'addH_' prefix. + + .. [NO11] O'Boyle, N. M., Banck M., James C. A., Morley C., Vandermeersch T., Hutchison G. R. + Open Babel: An open chemical toolbox *Journal of cheminformatics* **2011** 3:1-14. """ + + if not isinstance(infiles, list): + raise TypeError('infiles should be a list') + + if outfiles is None: + outfiles = [None for infile in infiles] + + if not isinstance(outfiles, list): + raise TypeError('outfiles should be None or a list') + if len(outfiles) != len(infiles): + raise ValueError('outfiles should have the same length as infiles') + + results = [] + for i, infile in enumerate(infiles): + results.append(addMissingAtoms(infile, method, pH, + outfiles[i], **kwargs)) + return results diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index a01c9f114..494beb28f 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -8,6 +8,7 @@ __email__ = ['karolamik@fizyka.umk.pl', 'fdoljanin@pmfst.hr'] import numpy as np +import os from itertools import combinations from collections import deque @@ -28,7 +29,8 @@ 'calcWaterBridgesStatistics', 'getWaterBridgeStatInfo', 'calcWaterBridgeMatrix', 'showWaterBridgeMatrix', 'calcBridgingResiduesHistogram', 'calcWaterBridgesDistribution', 'savePDBWaterBridges', 'savePDBWaterBridgesTrajectory', - 'saveWaterBridges', 'parseWaterBridges', 'findClusterCenters'] + 'saveWaterBridges', 'parseWaterBridges', 'findClusterCenters', + 'filterStructuresWithoutWater'] class ResType(Enum): @@ -1122,8 +1124,8 @@ def findClusterCenters(file_pattern, **kwargs): removeCoords = [] for ii in range(len(coords_all)): sel = coords_all.select('water within '+str(distC)+' of center', - center=coords_all.getCoords()[ii]) - if len(sel) <= int(numC): + center=coords_all.getCoords()[ii]) + if sel is not None and len(sel) <= int(numC): removeResid.append(coords_all.getResnums()[ii]) removeCoords.append(list(coords_all.getCoords()[ii])) @@ -1149,3 +1151,67 @@ def findClusterCenters(file_pattern, **kwargs): filename = 'clusters.pdb' writePDB(filename, selectedWaters) LOGGER.info("Results are saved in {0}.".format(filename)) + +def filterStructuresWithoutWater(structures, min_water=0, filenames=None): + """This function will filter out structures from *structures* that have no water + or fewer water molecules than *min_water*. + + :arg structures: list of :class:`.Atomic` structures to be filtered + :type structures: list + + :arg min_water: minimum number of water O atoms, + default is 0 + :type min_water: int + + :arg filenames: an optional list of filenames to filter too + This is an output argument + :type filenames: list + """ + + if not isinstance(structures, list): + raise TypeError('structures should be a list') + + if not np.alltrue([isinstance(struct, Atomic) for struct in structures]): + raise ValueError('elements of structures should be Atomic objects') + + if not isinstance(min_water, int): + raise TypeError('min_water should be an integer') + + if filenames is None: filenames = [] + + if not isinstance(filenames, list): + raise TypeError('filenames should be None or a list') + + if len(filenames) not in [0, len(structures)]: + raise TypeError('filenames should have the same length as structures') + + if not np.alltrue([isinstance(filename, str) for filename in filenames]): + raise ValueError('elements of filenames should be strings') + + if not np.alltrue([os.path.exists(filename) for filename in filenames]): + raise ValueError('at least one of the filenames does not exist') + + have_filenames = len(filenames)>0 + + new_structures = [] + numStructures = len(structures) + for i, struct in enumerate(reversed(structures)): + title = struct.getTitle() + waters = struct.select('water and name O') + + if waters == None: + LOGGER.warn(title+" doesn't contain water molecules") + if have_filenames: + filenames.pop(numStructures-i-1) + continue + + numWaters = waters.numAtoms() + if numWaters < min_water: + LOGGER.warn(title+" doesn't contain enough water molecules ({0})".format(numWaters)) + if have_filenames: + filenames.pop(numStructures-i-1) + continue + + new_structures.append(struct) + + return list(reversed(new_structures)) diff --git a/prody/tests/database/test_pfam.py b/prody/tests/database/test_pfam.py index f041cb76b..385d8b372 100644 --- a/prody/tests/database/test_pfam.py +++ b/prody/tests/database/test_pfam.py @@ -34,7 +34,7 @@ def testUniprotAccMulti(self): 'searchPfam failed to return a dict instance') self.assertEqual(sorted(list(a.keys())), - ['PF00060', 'PF00497', 'PF01094', 'PF10613'], + ['PF00060', 'PF01094', 'PF10613'], 'searchPfam failed to return the right domain family IDs') def testPdbIdChMulti(self): @@ -46,7 +46,7 @@ def testPdbIdChMulti(self): self.assertIsInstance(a, dict, 'searchPfam failed to return a dict instance') - self.assertEqual(sorted(list(a.keys())), ['PF00060', 'PF00497', 'PF01094', 'PF10613'], + self.assertEqual(sorted(list(a.keys())), ['PF00060', 'PF01094', 'PF10613'], 'searchPfam failed to return the right domain family IDs for AMPAR') def testPdbIdChSingle(self):