diff --git a/docs/about/people.rst b/docs/about/people.rst index e72e7225b..a46c0516a 100644 --- a/docs/about/people.rst +++ b/docs/about/people.rst @@ -29,8 +29,15 @@ including *SignDy* and Adaptive ANM. the *cryo-EM* module, :mod:`.protein.emdmap`. `Burak Kaynak`_ contributed significantly to the development of -:mod:`.domain_decomposition` and :mod:`.dynamics.essa`, -and is in the process of adding other modules too. +:mod:`.domain_decomposition`, :mod:`.dynamics.essa`, and +:mod:`.dynamics.clustenm`. + +`Karolina Mikulska-Ruminska`_ contributed significantly to the development of +:mod:`.protein.interactions` (*InSty*), :mod:`.protein.waterbridges` +(*WatFinder*), and :mod:`.dynamics.mechstiff` (*MechStiff*). + +`Anthony Bogetti`_ is overseeing the overall development of *ProDy* since +2024. `Anindita Dutta`_ contributed to the development of *Evol*, :mod:`.database` and :mod:`.sequence` modules. @@ -52,15 +59,18 @@ contributions and feedback from the following individuals: `Ying Liu`_ provided the code for Perturbation Response Scanning method. +`Frane Doljanin`_ provided the code for the water bridge detection. + `Kian Ho`_ contributed with bug fixes and unit tests for DSSP functions. `Gökçen Eraslan`_ contributed with bug fixes and development and maintenance insights. + .. _Ahmet Bakan: https://scholar.google.com/citations?user=-QAYVgMAAAAJ&hl=en .. _Cihan Kaya: https://www.linkedin.com/in/cihan-kaya/ -.. _Bahar Lab: http://www.ccbb.pitt.edu/faculty/bahar/ +.. _Bahar Lab: http://www.bahargroup.org/Faculty/bahar/ .. _University of Pittsburgh: http://www.pitt.edu/ .. _Anindita Dutta: http://www.linkedin.com/pub/anindita-dutta/5a/568/a90 .. _Wenzhi Mao: http://www.linkedin.com/pub/wenzhi-mao/2a/29a/29 @@ -68,10 +78,13 @@ insights. .. _Ying Liu: http://www.linkedin.com/pub/ying-liu/15/48b/5a9 .. _Kian Ho: https://github.com/kianho .. _Gökçen Eraslan: http://blog.yeredusuncedernegi.com/ -.. _Tim Lezon: http://www.csb.pitt.edu/Faculty/Lezon/ +.. _Tim Lezon: https://scholar.google.pl/citations?user=1MwNI3EAAAAJ&hl=pl&oi=ao .. _Chakra Chennubhotla: http://www.csb.pitt.edu/Faculty/Chakra/ .. _She (John) Zhang: https://www.linkedin.com/in/she-zhang-49164399/ .. _Hongchun Li: http://www.pitt.edu/~hongchun/ -.. _James Krieger: http://www.csb.pitt.edu/Faculty/bahar/lab.html -.. _Yan Zhang: https://www.csb.pitt.edu/Faculty/bahar/lab.html -.. _Burak Kaynak: https://www.csb.pitt.edu/Faculty/bahar/lab.html \ No newline at end of file +.. _James Krieger: https://scholar.google.pl/citations?user=DoiCjkUAAAAJ&hl=pl +.. _Yan Zhang: https://scholar.google.pl/citations?user=VxwU0pgAAAAJ&hl=pl&oi=sra +.. _Burak Kaynak: https://scholar.google.pl/citations?user=gP8RokwAAAAJ&hl=pl&oi=ao +.. _Karolina Mikulska-Ruminska: https://scholar.google.pl/citations?user=IpyPHRwAAAAJ&hl=pl +.. _Anthony Bogetti: https://scholar.google.pl/citations?hl=pl&user=9qQClIcAAAAJ +.. _Frane Doljanin: https://github.com/fdoljanin \ No newline at end of file diff --git a/docs/release/index.rst b/docs/release/index.rst index 2007f5566..cfe0dfaff 100644 --- a/docs/release/index.rst +++ b/docs/release/index.rst @@ -10,6 +10,10 @@ Release Notes :maxdepth: 2 :glob: + v2.4_series + v2.3_series + v2.2_series + v2.1_series v2.0_series v1.11_series v1.10_series diff --git a/prody/apps/prody_apps/prody_catdcd.py b/prody/apps/prody_apps/prody_catdcd.py index 8ce4e378c..a71c2bbdb 100644 --- a/prody/apps/prody_apps/prody_catdcd.py +++ b/prody/apps/prody_apps/prody_catdcd.py @@ -80,8 +80,9 @@ def prody_catdcd(*dcd, **kwargs): out = prody.DCDFile(output, 'w') count = 0 stride = kwargs.get('stride', 1) - goto = stride != 1 - slc = slice(kwargs.get('first', 0), kwargs.get('last', -1), + first = kwargs.get('first', 0) + goto = stride != 1 or first != 0 + slc = slice(first, kwargs.get('last', -1), stride).indices(len(traj)+1) for i in range(*slc): if goto: diff --git a/prody/database/dali.py b/prody/database/dali.py index 5a4925e0e..6f7c73b2f 100644 --- a/prody/database/dali.py +++ b/prody/database/dali.py @@ -154,8 +154,8 @@ def __init__(self, url, pdbId, chain, subset='fullPDB', localFile=False, **kwarg """ self._url = url - self._pdbId = pdbId - self._chain = chain + self._pdbId = pdbId.lower() + self._chain = chain.upper() subset = subset.upper() if subset == "FULLPDB" or subset not in ["PDB25", "PDB50", "PDB90"]: self._subset = "" @@ -163,7 +163,7 @@ def __init__(self, url, pdbId, chain, subset='fullPDB', localFile=False, **kwarg self._subset = "-"+subset[3:] timeout = kwargs.pop('timeout', 120) - self._title = pdbId + '-' + chain + self._title = self._pdbId + '-' + self._chain self._alignPDB = None self._filterDict = None self._max_index = None diff --git a/prody/dynamics/clustenm.py b/prody/dynamics/clustenm.py index 8284c505c..9bac6ea92 100644 --- a/prody/dynamics/clustenm.py +++ b/prody/dynamics/clustenm.py @@ -111,6 +111,8 @@ def __init__(self, title=None): self._targeted = False self._tmdk = 10. + self._cc = None + super(ClustENM, self).__init__('Unknown') # dummy title; will be replaced in the next line self._title = title diff --git a/prody/dynamics/plotting.py b/prody/dynamics/plotting.py index e6c7a546d..3a87ee4dc 100644 --- a/prody/dynamics/plotting.py +++ b/prody/dynamics/plotting.py @@ -1997,7 +1997,7 @@ def func_ticklabels(val, pos): last += len(resnums) else: x.extend(resnums + last) - last = resnums[-1] + last += resnums[-1] if gap: if overlay: @@ -2355,7 +2355,7 @@ def showTree_networkx(tree, node_size=20, node_color='red', node_shape='o', networkx.draw_networkx_edges(G, pos=layout) if np.isscalar(node_shape): - networkx.draw_networkx_nodes(G, pos=layout, withlabels=False, node_size=sizes, + networkx.draw_networkx_nodes(G, pos=layout, label=None, node_size=sizes, node_shape=node_shape, node_color=colors) else: for shape in shape_groups: @@ -2363,7 +2363,7 @@ def showTree_networkx(tree, node_size=20, node_color='red', node_shape='o', nodesizes = [sizes[i] for i in shape_groups[shape]] nodecolors = [colors[i] for i in shape_groups[shape]] if not nodelist: continue - networkx.draw_networkx_nodes(G, pos=layout, withlabels=False, node_size=nodesizes, + networkx.draw_networkx_nodes(G, pos=layout, label=None, node_size=nodesizes, node_shape=shape, node_color=nodecolors, nodelist=nodelist) diff --git a/prody/ensemble/functions.py b/prody/ensemble/functions.py index 884191168..c09cd844e 100644 --- a/prody/ensemble/functions.py +++ b/prody/ensemble/functions.py @@ -9,12 +9,18 @@ from prody.proteins import alignChains from prody.utilities import openFile, showFigure, copy, isListLike, pystr, DTYPE from prody import LOGGER, SETTINGS -from prody.atomic import Atomic, AtomMap, Chain, AtomGroup, Selection, Segment, Select, AtomSubset +from prody.atomic import Atomic, AtomGroup from prody.sequence import buildSeqidMatrix -from .ensemble import * -from .pdbensemble import * -from .conformation import * +have_openmm = True +try: + import openmm +except ImportError: + have_openmm = False + +from .ensemble import Ensemble +from .pdbensemble import PDBEnsemble +from .conformation import PDBConformation __all__ = ['saveEnsemble', 'loadEnsemble', 'trimPDBEnsemble', 'calcOccupancies', 'showOccupancies', @@ -46,7 +52,9 @@ def saveEnsemble(ensemble, filename=None, **kwargs): '_padding', '_ionicStrength', '_force_field', '_tolerance', '_maxIterations', '_sim', '_temp', '_t_steps', '_outlier', '_mzscore', '_v1', '_parallel', '_idx_cg', '_n_cg', '_cycle', - '_time', '_targeted', '_tmdk', '_topology', '_positions', '_cc']) + '_time', '_targeted', '_tmdk', '_cc']) + if have_openmm: + attr_list.extend(['_topology', '_positions']) if filename is None: filename = ensemble.getTitle().replace(' ', '_') @@ -158,7 +166,10 @@ def loadEnsemble(filename, **kwargs): '_rmsd', '_n_gens', '_maxclust', '_threshold', '_sol', '_sim', '_temp', '_t_steps', '_outlier', '_mzscore', '_v1', '_parallel', '_idx_ca', '_n_ca', '_cycle', '_time', '_targeted', - '_tmdk', '_topology', '_positions', '_cc'] + '_tmdk', '_cc'] + if have_openmm: + attrs.extend(['_topology', '_position']) + for attr in attrs: if attr in attr_dict.files: diff --git a/prody/measure/measure.py b/prody/measure/measure.py index 2fa4a12be..4d8678fd2 100644 --- a/prody/measure/measure.py +++ b/prody/measure/measure.py @@ -811,7 +811,7 @@ def buildADPMatrix(atoms): element[0, 1] = element[1, 0] = anisou[3] element[0, 2] = element[2, 0] = anisou[4] element[1, 2] = element[2, 1] = anisou[5] - adp[i*3:i*3, i*3:i*3] = element + adp[i*3:i*3+3, i*3:i*3+3] = element return adp diff --git a/prody/proteins/__init__.py b/prody/proteins/__init__.py index 193bc12b6..b0abfef53 100644 --- a/prody/proteins/__init__.py +++ b/prody/proteins/__init__.py @@ -260,7 +260,7 @@ except SyntaxError: import logging logger = logging.getLogger() - logger.warn("Cannot import waterbridges in python 2") + logger.warn("Cannot import waterbridges") else: __all__.extend(waterbridges.__all__) diff --git a/prody/proteins/fixer.py b/prody/proteins/fixer.py index 26114bcbb..ad30aa731 100644 --- a/prody/proteins/fixer.py +++ b/prody/proteins/fixer.py @@ -19,12 +19,16 @@ __email__ = ['karolamik@fizyka.umk.pl', 'jamesmkrieger@gmail.com'] from prody import LOGGER +from numbers import Integral, Number -__all__ = ['addMissingAtoms'] +__all__ = ['addMissingAtoms', 'fixStructuresMissingAtoms'] def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): - """Function will add hydrogens to the protein and ligand structure using Openbabel [NO11]_ - or PDBFixer with OpenMM. + """This function will add hydrogens to the protein and ligand structure using Openbabel [NO11]_ + or PDBFixer with OpenMM. + + There are also options whether to *model_residues* (default False), *remove_heterogens* + (default False), *keep_waters* (default True), *overwrite* (default False). :arg infile: PDB file name :type infile: str @@ -53,11 +57,39 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): model_residues = kwargs.get("model_residues", False) remove_heterogens = kwargs.get("remove_heterogens", False) keep_water = kwargs.get("keep_water", True) + overwrite = kwargs.get("overwrite", False) import os + if not isinstance(model_residues, bool): + raise TypeError('model_residues should be True or False') + + if not isinstance(remove_heterogens, bool): + raise TypeError('remove_heterogens should be True or False') + + if not isinstance(keep_water, bool): + raise TypeError('keep_water should be True or False') + + if not isinstance(overwrite, bool): + raise TypeError('overwrite should be True or False') + + if not isinstance(infile, str): + raise TypeError('infile should be a string pointing to a file') + + if not os.path.exists(infile): + raise ValueError('infile {0} does not exist'.format(infile)) + + if not isinstance(pH, Number): + raise TypeError('pH should be a number') + if outfile == None: - outfile = os.path.join(os.path.split(infile)[0], "addH_" + os.path.split(infile)[1]) + outfile = os.path.join(os.path.split(infile)[0], + "addH_" + os.path.split(infile)[1]) + + if os.path.exists(outfile) and not overwrite: + LOGGER.warn('outfile {0} already exists, so returning it. \ +Set overwrite=True to overwrite it'.format(outfile)) + return outfile if outfile == infile: raise ValueError('outfile cannot be the same as infile') @@ -70,17 +102,18 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): raise ValueError('Openbabel cannot handle cif files') try: - #import openbabel from openbabel import openbabel - obconversion = openbabel.OBConversion() - obconversion.SetInFormat("pdb") - mol = openbabel.OBMol() - obconversion.ReadFile(mol, infile) - mol.AddHydrogens() - obconversion.WriteFile(mol, outfile) - LOGGER.info("Hydrogens were added to the structure. Structure {0} is saved in the local directry.".format(outfile)) except ImportError: raise ImportError("Install Openbabel to add hydrogens to the structure or use PDBFixer/OpenMM.") + + obconversion = openbabel.OBConversion() + obconversion.SetInFormat("pdb") + mol = openbabel.OBMol() + obconversion.ReadFile(mol, infile) + mol.AddHydrogens() + obconversion.WriteFile(mol, outfile) + LOGGER.info("Hydrogens were added to the structure. Structure {0} is saved in the local directry.".format(outfile)) + elif method == 'pdbfixer': try: @@ -115,3 +148,50 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): return outfile +def fixStructuresMissingAtoms(infiles, method='openbabel', pH=7.0, outfiles=None, **kwargs): + """This function will add hydrogens to the protein and ligand structure from a set of files + using Openbabel [NO11]_ or PDBFixer with OpenMM. + + There are also options whether to *model_residues* (default False), *remove_heterogens* + (default False) and *keep_waters* (default True). + + :arg infiles: a list of PDB file names + :type infile: list + + :arg method: Name of program which will be use to fix protein structure. + Two alternative options are available: 'openbabel' and 'pdbfixer'. + For either option additional software need to be installed: + 'openbabel': OpenBabel + 'pdbfixer': PDBFixer and OpenMM + default is 'openbabel' + :type method: str + + :arg pH: pH value applyed only for PDBfixer. + :type pH: int, float + + Instalation of Openbabel: + conda install -c conda-forge openbabel + + Find more information here: https://anaconda.org/conda-forge/openbabel + https://github.com/openmm/pdbfixer + Program will create new file in the same directory with 'addH_' prefix. + + .. [NO11] O'Boyle, N. M., Banck M., James C. A., Morley C., Vandermeersch T., Hutchison G. R. + Open Babel: An open chemical toolbox *Journal of cheminformatics* **2011** 3:1-14. """ + + if not isinstance(infiles, list): + raise TypeError('infiles should be a list') + + if outfiles is None: + outfiles = [None for infile in infiles] + + if not isinstance(outfiles, list): + raise TypeError('outfiles should be None or a list') + if len(outfiles) != len(infiles): + raise ValueError('outfiles should have the same length as infiles') + + results = [] + for i, infile in enumerate(infiles): + results.append(addMissingAtoms(infile, method, pH, + outfiles[i], **kwargs)) + return results diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index a01c9f114..500d85ce9 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -7,7 +7,11 @@ __credits__ = ['Frane Doljanin', 'Karolina Mikulska-Ruminska'] __email__ = ['karolamik@fizyka.umk.pl', 'fdoljanin@pmfst.hr'] +import multiprocessing as mp + +from numbers import Number import numpy as np +import os from itertools import combinations from collections import deque @@ -25,10 +29,12 @@ __all__ = ['calcWaterBridges', 'calcWaterBridgesTrajectory', 'getWaterBridgesInfoOutput', - 'calcWaterBridgesStatistics', 'getWaterBridgeStatInfo', 'calcWaterBridgeMatrix', 'showWaterBridgeMatrix', + 'calcWaterBridgesStatistics', 'getWaterBridgeStatInfo', + 'calcWaterBridgeMatrix', 'showWaterBridgeMatrix', 'calcBridgingResiduesHistogram', 'calcWaterBridgesDistribution', 'savePDBWaterBridges', 'savePDBWaterBridgesTrajectory', - 'saveWaterBridges', 'parseWaterBridges', 'findClusterCenters'] + 'saveWaterBridges', 'parseWaterBridges', 'findClusterCenters', + 'filterStructuresWithoutWater', 'selectSurroundingsBox'] class ResType(Enum): @@ -364,6 +370,15 @@ def calcWaterBridges(atoms, **kwargs): :arg isInfoLog: should log information default is True :type output: bool + + :arg selstr: selection string for focusing analysis + default of **None** focuses on everything + :type selstr: str + + :arg expand_selection: whether to expand the selection with + :func:`.selectSurroundingsBox`, selecting a box surrounding it. + Default is **False** + :type expand_selection: bool """ method = kwargs.pop('method', 'chain') @@ -379,12 +394,23 @@ def calcWaterBridges(atoms, **kwargs): outputType = kwargs.pop('output', 'atomic') isInfoLog = kwargs.pop('isInfoLog', True) DIST_COVALENT_H = 1.4 + prefix = kwargs.pop('prefix', '') if method not in ['chain', 'cluster']: raise TypeError('Method should be chain or cluster.') if outputType not in ['info', 'atomic', 'indices']: raise TypeError('Output can be info, atomic or indices.') + selstr = kwargs.pop('selstr', None) + if selstr is not None: + selection = atoms.select(selstr).copy() + + expand_selection = kwargs.pop('expand_selection', False) + if expand_selection: + atoms = selectSurroundingsBox(atoms, selection).copy() + else: + atoms = selection.copy() + water = atoms.select('water') if water is None: raise ValueError('atoms has no water so cannot be analysed with WatFinder') @@ -400,7 +426,7 @@ def calcWaterBridges(atoms, **kwargs): waterHydrogens = consideredAtoms.select('water and hydrogen') or [] waterOxygens = consideredAtoms.select('water and oxygen') waterHydroOxyPairs = findNeighbors( - waterOxygens, DIST_COVALENT_H, waterHydrogens) + waterOxygens, DIST_COVALENT_H, waterHydrogens) if waterHydrogens else [] for oxygen in waterOxygens: relations.addNode(oxygen, ResType.WATER) for pair in waterHydroOxyPairs: @@ -412,7 +438,7 @@ def calcWaterBridges(atoms, **kwargs): proteinHydrogens = consideredAtoms.select(f'protein and hydrogen') or [] proteinHydroPairs = findNeighbors( - proteinHydrophilic, DIST_COVALENT_H, proteinHydrogens) + proteinHydrophilic, DIST_COVALENT_H, proteinHydrogens) if proteinHydrogens else [] for hydrophilic in proteinHydrophilic: relations.addNode(hydrophilic, ResType.PROTEIN) for pair in proteinHydroPairs: @@ -448,8 +474,11 @@ def calcWaterBridges(atoms, **kwargs): waterBridgesWithIndices = getUniqueElements( waterBridgesWithIndices, getChainBridgeTuple) - LOGGER.info( - f'{len(waterBridgesWithIndices)} water bridges detected using method {method}.') + log_string = f'{len(waterBridgesWithIndices)} water bridges detected using method {method}' + if prefix != '': + log_string += ' for ' + prefix + LOGGER.info(log_string) + if method == 'atomic': LOGGER.info('Call getInfoOutput to convert atomic to info output.') @@ -475,7 +504,7 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): :arg atoms: Atomic object from which atoms are considered :type atoms: :class:`.Atomic` - :arg trajectory: Trajectory data coming from a DCD or multi-model PDB file. + :arg trajectory: Trajectory data coming from a DCD, ensemble or multi-model PDB file. :type trajectory: :class:`.Trajectory', :class:`.Ensemble`, :class:`.Atomic` :arg start_frame: frame to start from @@ -483,11 +512,34 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): :arg stop_frame: frame to stop :type stop_frame: int - """ - interactions_all = [] + :arg max_proc: maximum number of processes to use + default is half of the number of CPUs + :type max_proc: int + + :arg selstr: selection string for focusing analysis + default of **None** focuses on everything + :type selstr: str + + :arg expand_selection: whether to expand the selection with + :func:`.selectSurroundingsBox`, selecting a box surrounding it. + Default is **False** + :type expand_selection: bool + + If selstr is provided, a common selection will be found across all frames + combining selections satifying the criteria in each. + + :arg return_selection: whether to return the combined common selection + Default is **False** to keep expected behaviour. + However, this output is required when using selstr. + :type return_selection: bool + """ start_frame = kwargs.pop('start_frame', 0) stop_frame = kwargs.pop('stop_frame', -1) + max_proc = kwargs.pop('max_proc', mp.cpu_count()//2) + selstr = kwargs.pop('selstr', None) + expand_selection = kwargs.pop('expand_selection', False) + return_selection = kwargs.pop('return_selection', False) if trajectory is not None: if isinstance(trajectory, Atomic): @@ -502,30 +554,134 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): else: traj = trajectory[start_frame:stop_frame+1] - atoms_copy = atoms.copy() - for j0, frame0 in enumerate(traj, start=start_frame): + indices = None + selection = atoms + if selstr is not None: + LOGGER.info('Finding common selection') + indices = [] + for frame0 in traj: + atoms_copy = atoms.copy() + atoms_copy.setCoords(frame0.getCoords()) + selection = atoms_copy.select(selstr) + + if expand_selection: + selection = selectSurroundingsBox(atoms_copy, selection) + + indices.extend(list(selection.getIndices())) + + indices = np.unique(indices) + selection = atoms_copy[indices] + + LOGGER.info('Common selection found with {0} atoms and {1} protein chains'.format(selection.numAtoms(), + len(list(selection.protein.getHierView())))) + + def analyseFrame(j0, start_frame, frame0, interactions_all): LOGGER.info('Frame: {0}'.format(j0)) + atoms_copy = atoms.copy() atoms_copy.setCoords(frame0.getCoords()) + + if indices is not None: + atoms_copy = atoms_copy[indices] + kwargs['selstr'] = atoms_copy.getSelstr() + interactions = calcWaterBridges( - atoms_copy, isInfoLog=False, **kwargs) - interactions_all.append(interactions) + atoms_copy, isInfoLog=False, + prefix='frame {0}'.format(j0), + **kwargs) + interactions_all[j0-start_frame] = interactions + + if max_proc == 1: + interactions_all = [] + for j0, frame0 in enumerate(traj, start=start_frame): + interactions_all.append([]) + analyseFrame(j0, start_frame, frame0, interactions_all) + else: + with mp.Manager() as manager: + interactions_all = manager.list() + for j0, frame0 in enumerate(traj, start=start_frame): + interactions_all.append([]) + + j0 = start_frame + while j0 < traj.numConfs()+start_frame: + + processes = [] + for _ in range(max_proc): + frame0 = traj[j0-start_frame] + + p = mp.Process(target=analyseFrame, args=(j0, start_frame, + frame0, + interactions_all)) + p.start() + processes.append(p) + + j0 += 1 + if j0 >= traj.numConfs()+start_frame: + break + + for p in processes: + p.join() + + interactions_all = interactions_all[:] + # trajectory._nfi = nfi else: if atoms.numCoordsets() > 1: - for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): - LOGGER.info('Model: {0}'.format(i+start_frame)) + def analyseFrame(i, interactions_all): + frameNum = i+start_frame + LOGGER.info('Model: {0}'.format(frameNum)) atoms.setACSIndex(i+start_frame) - interactions = calcWaterBridges(atoms, **kwargs) - interactions_all.append(interactions) + interactions = calcWaterBridges( + atoms, isInfoLog=False, prefix='frame {0}'.format(frameNum), + **kwargs) + interactions_all[i] = interactions + + if max_proc == 1: + interactions_all = [] + for j0, frame0 in enumerate(traj, start=start_frame): + interactions_all.append([]) + analyseFrame(j0, start_frame, frame0, interactions_all) + else: + with mp.Manager() as manager: + interactions_all = manager.list() + for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): + interactions_all.append([]) + + i = start_frame + while i < len(atoms.getCoordsets()[start_frame:stop_frame]): + processes = [] + for _ in range(max_proc): + p = mp.Process(target=analyseFrame, args=(i, interactions_all)) + p.start() + processes.append(p) + + i += 1 + if i >= len(atoms.getCoordsets()[start_frame:stop_frame]): + break + + for p in processes: + p.join() + + interactions_all = interactions_all[:] else: LOGGER.info('Include trajectory or use multi-model PDB file.') + if return_selection: + if indices is not None: + atoms_copy = atoms_copy[indices] + kwargs['selstr'] = atoms_copy.getSelstr() + + return interactions_all, atoms_copy + return interactions_all -def getResidueName(atom): - return f'{atom.getResname()}{atom.getResnum()}{atom.getChid()}' +def getResidueName(atom, use_segname=False): + result = f'{atom.getResname()}{atom.getResnum()}{atom.getChid()}' + if use_segname: + result += f'{atom.getSegname()}' + + return result class DictionaryList: @@ -744,9 +900,14 @@ def calcBridgingResiduesHistogram(frames, **kwargs): :arg clip: maximal number of residues on graph; to represent all set None default is 20 :type clip: int + + :arg use_segname: whether to use segname to label residues + default is False, because then the labels get long + :type use_segname: bool """ show_plot = kwargs.pop('show_plot', False) + use_segname = kwargs.get('use_segname', False) clip = kwargs.pop('clip', 20) if clip == None: @@ -755,7 +916,8 @@ def calcBridgingResiduesHistogram(frames, **kwargs): residuesWithCount = {} for frame in frames: frameResidues = set(reduceTo1D( - frame, getResidueName, lambda wb: wb.proteins)) + frame, lambda x: getResidueName(x, use_segname=use_segname), + lambda wb: wb.proteins)) for res in frameResidues: residuesWithCount[res] = residuesWithCount.get(res, 0) + 1 @@ -845,7 +1007,7 @@ def getDistanceDistribution(frames, res_a, res_b, trajectory): return distances -def getResidueLocationDistrubtion(frames, res_a, res_b): +def getResidueLocationDistribution(frames, res_a, res_b): locationInfo = {"backbone": 0, "side": 0} result = {res_a: locationInfo.copy(), res_b: locationInfo.copy()} @@ -903,7 +1065,7 @@ def calcWaterBridgesDistribution(frames, res_a, res_b=None, **kwargs): 'residues': lambda: getBridgingResidues(frames, res_a), 'waters': lambda: getWaterCountDistribution(frames, res_a, res_b), 'distance': lambda: getDistanceDistribution(frames, res_a, res_b, trajectory), - 'location': lambda: getResidueLocationDistrubtion(frames, res_a, res_b) + 'location': lambda: getResidueLocationDistribution(frames, res_a, res_b) } result = methods[metric]() @@ -952,7 +1114,7 @@ def savePDBWaterBridges(bridges, atoms, filename): return writePDB(filename, atomsToSave) -def savePDBWaterBridgesTrajectory(bridgeFrames, atoms, filename, trajectory=None): +def savePDBWaterBridgesTrajectory(bridgeFrames, atoms, filename, trajectory=None, max_proc=1): """Saves one PDB per frame with occupancy and beta on protein atoms and waters forming bridges in frame. :arg bridgeFrames: atomic output from calcWaterBridgesTrajectory @@ -964,7 +1126,8 @@ def savePDBWaterBridgesTrajectory(bridgeFrames, atoms, filename, trajectory=None :arg filename: name of file to be saved; must end in .pdb :type filename: string - :arg trajectory: DCD trajectory (not needed for multimodal PDB) + :arg trajectory: trajectory data (not needed for multi-model PDB) + :type trajectory: :class:`.Trajectory', :class:`.Ensemble`, :class:`.Atomic` """ if not trajectory and atoms.numCoordsets() < len(bridgeFrames): raise TypeError('Provide parsed trajectory!') @@ -974,7 +1137,8 @@ def savePDBWaterBridgesTrajectory(bridgeFrames, atoms, filename, trajectory=None atoms = atoms.copy() mofifyBeta(bridgeFrames, atoms) - for frameIndex, frame in enumerate(bridgeFrames): + def saveBridgesFrame(trajectory, atoms, frameIndex, frame): + LOGGER.info('Frame: {0}'.format(frameIndex)) if trajectory: coords = trajectory[frameIndex].getCoords() atoms.setCoords(coords) @@ -1000,6 +1164,26 @@ def savePDBWaterBridgesTrajectory(bridgeFrames, atoms, filename, trajectory=None writePDB(f'{filename}_{frameIndex}.pdb', atomsToSave, csets=frameIndex) + if max_proc == 1: + for frameIndex, frame in enumerate(bridgeFrames): + saveBridgesFrame(trajectory, atoms, frameIndex, frame) + else: + frameIndex = 0 + numFrames = len(bridgeFrames) + while frameIndex < numFrames: + processes = [] + for _ in range(max_proc): + p = mp.Process(target=saveBridgesFrame, args=(trajectory, atoms, frameIndex, + bridgeFrames[frameIndex])) + p.start() + processes.append(p) + + frameIndex += 1 + if frameIndex >= numFrames: + break + + for p in processes: + p.join() def getBridgeIndicesString(bridge): return ' '.join(map(lambda a: str(a.getIndex()), bridge.proteins))\ @@ -1101,6 +1285,11 @@ def findClusterCenters(file_pattern, **kwargs): :arg numC: min number of molecules in a cluster default is 3 :type numC: int + + :arg filename: filename for output pdb file with clusters + Default of **None** leads to + 'clusters_'+file_pattern.split("*")[0]+'.pdb' + :type filename: str """ import glob @@ -1109,24 +1298,28 @@ def findClusterCenters(file_pattern, **kwargs): selection = kwargs.pop('selection', 'water and name OH2') distC = kwargs.pop('distC', 0.3) numC = kwargs.pop('numC', 3) + filename = kwargs.pop('filename', None) matching_files = glob.glob(file_pattern) matching_files.sort() coords_all = parsePDB(matching_files[0]).select(selection).toAtomGroup() for i in matching_files[1:]: - coords = parsePDB(i).select('water').toAtomGroup() + coords = parsePDB(i).select(selection).toAtomGroup() coords_all += coords removeResid = [] removeCoords = [] for ii in range(len(coords_all)): sel = coords_all.select('water within '+str(distC)+' of center', - center=coords_all.getCoords()[ii]) - if len(sel) <= int(numC): + center=coords_all.getCoords()[ii]) + if sel is not None and len(sel) <= int(numC): removeResid.append(coords_all.getResnums()[ii]) removeCoords.append(list(coords_all.getCoords()[ii])) + if len(removeCoords) == coords_all.numAtoms(): + raise ValueError('No waters were selected. You may need to align your trajectory') + selectedWaters = AtomGroup() sel_waters = [] @@ -1135,17 +1328,113 @@ def findClusterCenters(file_pattern, **kwargs): sel_waters.append(j) coords_wat = np.array([sel_waters], dtype=float) - if coords_wat.shape[0] == 0: - raise ValueError('No waters were selected. You may need to align your trajectory') - selectedWaters.setCoords(coords_wat) selectedWaters.setNames(['DUM']*len(selectedWaters)) selectedWaters.setResnums(range(1, len(selectedWaters)+1)) selectedWaters.setResnames(['DUM']*len(selectedWaters)) - try: - filename = 'clusters_'+file_pattern.split("*")[0]+'.pdb' - except: - filename = 'clusters.pdb' + if filename is None: + try: + filename = 'clusters_'+file_pattern.split("*")[0]+'.pdb' + except: + filename = 'clusters.pdb' + writePDB(filename, selectedWaters) LOGGER.info("Results are saved in {0}.".format(filename)) + +def filterStructuresWithoutWater(structures, min_water=0, filenames=None): + """This function will filter out structures from *structures* that have no water + or fewer water molecules than *min_water*. + + :arg structures: list of :class:`.Atomic` structures to be filtered + :type structures: list + + :arg min_water: minimum number of water O atoms, + default is 0 + :type min_water: int + + :arg filenames: an optional list of filenames to filter too + This is an output argument + :type filenames: list + """ + + if not isinstance(structures, list): + raise TypeError('structures should be a list') + + if not np.alltrue([isinstance(struct, Atomic) for struct in structures]): + raise ValueError('elements of structures should be Atomic objects') + + if not isinstance(min_water, int): + raise TypeError('min_water should be an integer') + + if filenames is None: filenames = [] + + if not isinstance(filenames, list): + raise TypeError('filenames should be None or a list') + + if len(filenames) not in [0, len(structures)]: + raise TypeError('filenames should have the same length as structures') + + if not np.alltrue([isinstance(filename, str) for filename in filenames]): + raise ValueError('elements of filenames should be strings') + + if not np.alltrue([os.path.exists(filename) for filename in filenames]): + raise ValueError('at least one of the filenames does not exist') + + have_filenames = len(filenames)>0 + + new_structures = [] + numStructures = len(structures) + for i, struct in enumerate(reversed(structures)): + title = struct.getTitle() + waters = struct.select('water and name O') + + if waters == None: + LOGGER.warn(title+" doesn't contain water molecules") + if have_filenames: + filenames.pop(numStructures-i-1) + continue + + numWaters = waters.numAtoms() + if numWaters < min_water: + LOGGER.warn(title+" doesn't contain enough water molecules ({0})".format(numWaters)) + if have_filenames: + filenames.pop(numStructures-i-1) + continue + + new_structures.append(struct) + + return list(reversed(new_structures)) + + +def selectSurroundingsBox(atoms, select, padding=0, return_selstr=False): + """Select the surroundings of *select* within *atoms* using + a bounding box with optional *padding*.""" + + if not isinstance(atoms, Atomic): + raise TypeError('atoms should be an Atomic object') + + if isinstance(select, str): + select = atoms.select(select) + + if not isinstance(select, Atomic): + raise TypeError('select should be a valid selection or selection string') + + if not isinstance(padding, Number): + raise TypeError('padding should be a number') + if padding < 0: + raise ValueError('padding should be a positive number') + + minCoords = select.getCoords().min(axis=0) + maxCoords = select.getCoords().max(axis=0) + + if padding > 0: + minCoords -= padding + maxCoords += padding + + selstr = 'same residue as ((x `{0} to {1}`) and (y `{2} to {3}`) and (z `{4} to {5}`))'.format( + minCoords[0], maxCoords[0], minCoords[1], maxCoords[1], minCoords[2], maxCoords[2]) + + if return_selstr: + return selstr + return atoms.select(selstr) diff --git a/prody/tests/database/test_pfam.py b/prody/tests/database/test_pfam.py index f041cb76b..385d8b372 100644 --- a/prody/tests/database/test_pfam.py +++ b/prody/tests/database/test_pfam.py @@ -34,7 +34,7 @@ def testUniprotAccMulti(self): 'searchPfam failed to return a dict instance') self.assertEqual(sorted(list(a.keys())), - ['PF00060', 'PF00497', 'PF01094', 'PF10613'], + ['PF00060', 'PF01094', 'PF10613'], 'searchPfam failed to return the right domain family IDs') def testPdbIdChMulti(self): @@ -46,7 +46,7 @@ def testPdbIdChMulti(self): self.assertIsInstance(a, dict, 'searchPfam failed to return a dict instance') - self.assertEqual(sorted(list(a.keys())), ['PF00060', 'PF00497', 'PF01094', 'PF10613'], + self.assertEqual(sorted(list(a.keys())), ['PF00060', 'PF01094', 'PF10613'], 'searchPfam failed to return the right domain family IDs for AMPAR') def testPdbIdChSingle(self): diff --git a/pyproject.toml b/pyproject.toml index 097169ce0..40aa5e374 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,2 @@ [build-system] -requires = ["setuptools", "wheel", "numpy>=1.10,<1.25", "pyparsing<=3.1.1", "scipy"] +requires = ["setuptools", "wheel", "numpy>=1.10,<1.25", "pyparsing<=3.1.1", "scipy<=1.13.1"] diff --git a/setup.py b/setup.py index c9080278c..33980ad1c 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ if sys.version_info[:2] == (2, 7) or sys.version_info[:2] <= (3, 5): INSTALL_REQUIRES=['numpy>=1.10,<1.25', 'biopython<=1.76', 'pyparsing', 'scipy'] else: - INSTALL_REQUIRES=['numpy>=1.10,<1.24', 'biopython', 'pyparsing<=3.1.1', 'scipy', 'setuptools'] + INSTALL_REQUIRES=['numpy>=1.10,<1.24', 'biopython', 'pyparsing<=3.1.1', 'scipy<=1.13.1', 'setuptools'] if sys.version_info[0] == 3 and sys.version_info[1] < 6: sys.stderr.write('Python 3.5 and older is not supported\n')