From 06e57046679ee5166ef393274e8982d6a223a2f0 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 20:10:55 +0200 Subject: [PATCH 01/12] getResidueName use_segname --- prody/proteins/waterbridges.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 5b519113c..e479729f7 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -29,7 +29,8 @@ __all__ = ['calcWaterBridges', 'calcWaterBridgesTrajectory', 'getWaterBridgesInfoOutput', - 'calcWaterBridgesStatistics', 'getWaterBridgeStatInfo', 'calcWaterBridgeMatrix', 'showWaterBridgeMatrix', + 'calcWaterBridgesStatistics', 'getWaterBridgeStatInfo', + 'calcWaterBridgeMatrix', 'showWaterBridgeMatrix', 'calcBridgingResiduesHistogram', 'calcWaterBridgesDistribution', 'savePDBWaterBridges', 'savePDBWaterBridgesTrajectory', 'saveWaterBridges', 'parseWaterBridges', 'findClusterCenters', @@ -623,8 +624,12 @@ def analyseFrame(i, interactions_all): return interactions_all -def getResidueName(atom): - return f'{atom.getResname()}{atom.getResnum()}{atom.getChid()}' +def getResidueName(atom, use_segname=False): + result = f'{atom.getResname()}{atom.getResnum()}{atom.getChid()}' + if use_segname: + result += f'{atom.getSegname()}' + + return result class DictionaryList: @@ -843,9 +848,14 @@ def calcBridgingResiduesHistogram(frames, **kwargs): :arg clip: maximal number of residues on graph; to represent all set None default is 20 :type clip: int + + :arg use_segname: whether to use segname to label residues + default is False, because then the labels get long + :type use_segname: bool """ show_plot = kwargs.pop('show_plot', False) + use_segname = kwargs.get('use_segname', False) clip = kwargs.pop('clip', 20) if clip == None: @@ -854,7 +864,7 @@ def calcBridgingResiduesHistogram(frames, **kwargs): residuesWithCount = {} for frame in frames: frameResidues = set(reduceTo1D( - frame, getResidueName, lambda wb: wb.proteins)) + frame, lambda x: getResidueName(x, use_segname=use_segname), lambda wb: wb.proteins)) for res in frameResidues: residuesWithCount[res] = residuesWithCount.get(res, 0) + 1 From cf5f2f30e7ac79b7b6de67b097118b9641ecba6d Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 20:11:21 +0200 Subject: [PATCH 02/12] rename typo internal function --- prody/proteins/waterbridges.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index e479729f7..5059fb6dc 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -954,7 +954,7 @@ def getDistanceDistribution(frames, res_a, res_b, trajectory): return distances -def getResidueLocationDistrubtion(frames, res_a, res_b): +def getResidueLocationDistribution(frames, res_a, res_b): locationInfo = {"backbone": 0, "side": 0} result = {res_a: locationInfo.copy(), res_b: locationInfo.copy()} @@ -1012,7 +1012,7 @@ def calcWaterBridgesDistribution(frames, res_a, res_b=None, **kwargs): 'residues': lambda: getBridgingResidues(frames, res_a), 'waters': lambda: getWaterCountDistribution(frames, res_a, res_b), 'distance': lambda: getDistanceDistribution(frames, res_a, res_b, trajectory), - 'location': lambda: getResidueLocationDistrubtion(frames, res_a, res_b) + 'location': lambda: getResidueLocationDistribution(frames, res_a, res_b) } result = methods[metric]() From 9efc3876203ad53f7127afc2eb5213d81d975870 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 21:09:52 +0200 Subject: [PATCH 03/12] formatting fix --- prody/proteins/waterbridges.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 5059fb6dc..081a50820 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -564,8 +564,8 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): frame0 = traj[j0] p = mp.Process(target=analyseFrame, args=(j0, start_frame, - frame0, - interactions_all)) + frame0, + interactions_all)) p.start() processes.append(p) @@ -864,7 +864,8 @@ def calcBridgingResiduesHistogram(frames, **kwargs): residuesWithCount = {} for frame in frames: frameResidues = set(reduceTo1D( - frame, lambda x: getResidueName(x, use_segname=use_segname), lambda wb: wb.proteins)) + frame, lambda x: getResidueName(x, use_segname=use_segname), + lambda wb: wb.proteins)) for res in frameResidues: residuesWithCount[res] = residuesWithCount.get(res, 0) + 1 From 893a886582d61393933c3c84142d768b56728f38 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 21:10:09 +0200 Subject: [PATCH 04/12] selectSurrounding whole residues --- prody/proteins/waterbridges.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 081a50820..b67041cda 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -1353,7 +1353,7 @@ def selectSurroundingsBox(atoms, select, padding=0, return_selstr=False): minCoords -= padding maxCoords += padding - selstr = '(x `{0} to {1}`) and (y `{2} to {3}`) and (z `{4} to {5}`)'.format( + selstr = 'same residue as ((x `{0} to {1}`) and (y `{2} to {3}`) and (z `{4} to {5}`))'.format( minCoords[0], maxCoords[0], minCoords[1], maxCoords[1], minCoords[2], maxCoords[2]) if return_selstr: From 94b8eec1142d98b3c6621c0b703ae704d8bd652e Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 21:46:58 +0200 Subject: [PATCH 05/12] super important fix on frames analysed --- prody/proteins/waterbridges.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index b67041cda..0594911af 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -557,11 +557,11 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): interactions_all.append([]) j0 = start_frame - while j0 < traj.numConfs(): + while j0 < traj.numConfs()+start_frame: processes = [] for i in range(max_proc): - frame0 = traj[j0] + frame0 = traj[j0-start_frame] p = mp.Process(target=analyseFrame, args=(j0, start_frame, frame0, @@ -570,7 +570,7 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): processes.append(p) j0 += 1 - if j0 >= traj.numConfs(): + if j0 >= traj.numConfs()+start_frame: break for p in processes: From 35ec0d928d92da55f8daa1dfe5d297190eee0fc1 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 21:49:09 +0200 Subject: [PATCH 06/12] fix on pdb coordsets analysed --- prody/proteins/waterbridges.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 0594911af..c517d532b 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -603,7 +603,7 @@ def analyseFrame(i, interactions_all): interactions_all.append([]) i = start_frame - while i < stop_frame: + while i < len(atoms.getCoordsets()[start_frame:stop_frame]): processes = [] for i in range(max_proc): p = mp.Process(target=analyseFrame, args=(i, interactions_all)) @@ -611,7 +611,7 @@ def analyseFrame(i, interactions_all): processes.append(p) i += 1 - if i >= stop_frame: + if i >= len(atoms.getCoordsets()[start_frame:stop_frame]): break for p in processes: From d17e539a4a72f102df476e7be1e4fc876dc2c80e Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 22:43:55 +0200 Subject: [PATCH 07/12] common selection --- prody/proteins/waterbridges.py | 47 +++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index c517d532b..c31c432e6 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -504,7 +504,7 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): :arg atoms: Atomic object from which atoms are considered :type atoms: :class:`.Atomic` - :arg trajectory: Trajectory data coming from a DCD or multi-model PDB file. + :arg trajectory: Trajectory data coming from a DCD, ensemble or multi-model PDB file. :type trajectory: :class:`.Trajectory', :class:`.Ensemble`, :class:`.Atomic` :arg start_frame: frame to start from @@ -516,10 +516,29 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): :arg max_proc: maximum number of processes to use default is half of the number of CPUs :type max_proc: int + + :arg selstr: selection string for focusing analysis + default of **None** focuses on everything + :type selstr: str + + :arg expand_selection: whether to expand the selection with + :func:`.selectSurroundingsBox`, selecting a box surrounding it. + Default is **False** + :type expand_selection: bool + + If selstr is provided, a common selection will be found across all frames + combining selections satifying the criteria in each. + + :arg return_selection: whether to return the combined common selection + Default is **False** + :type return_selection: bool """ start_frame = kwargs.pop('start_frame', 0) stop_frame = kwargs.pop('stop_frame', -1) max_proc = kwargs.pop('max_proc', mp.cpu_count()//2) + selstr = kwargs.pop('selstr', None) + expand_selection = kwargs.pop('expand_selection', False) + return_selection = kwargs.pop('return_selection', False) if trajectory is not None: if isinstance(trajectory, Atomic): @@ -533,11 +552,29 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): traj = trajectory[start_frame:] else: traj = trajectory[start_frame:stop_frame+1] + + if selstr is not None: + indices = [] + for frame0 in traj: + atoms_copy = atoms.copy() + atoms_copy.setCoords(frame0.getCoords()) + selection = atoms_copy.select(selstr) + + if expand_selection: + selection = selectSurroundingsBox(atoms_copy, selection) + + indices.extend(list(selection.getIndices())) + + indices = np.unique(indices) + - atoms_copy = atoms.copy() def analyseFrame(j0, start_frame, frame0, interactions_all): LOGGER.info('Frame: {0}'.format(j0)) + atoms_copy = atoms.copy() atoms_copy.setCoords(frame0.getCoords()) + atoms_copy = atoms_copy[indices] + + kwargs['selstr'] = atoms_copy.getSelstr() interactions = calcWaterBridges( atoms_copy, isInfoLog=False, @@ -621,6 +658,9 @@ def analyseFrame(i, interactions_all): else: LOGGER.info('Include trajectory or use multi-model PDB file.') + if return_selection: + return interactions_all, atoms_copy + return interactions_all @@ -1074,7 +1114,8 @@ def savePDBWaterBridgesTrajectory(bridgeFrames, atoms, filename, trajectory=None :arg filename: name of file to be saved; must end in .pdb :type filename: string - :arg trajectory: DCD trajectory (not needed for multimodal PDB) + :arg trajectory: trajectory data (not needed for multi-model PDB) + :type trajectory: :class:`.Trajectory', :class:`.Ensemble`, :class:`.Atomic` """ if not trajectory and atoms.numCoordsets() < len(bridgeFrames): raise TypeError('Provide parsed trajectory!') From c1d0f3575bda193f72143f21bd2db43df11c8c34 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 4 Aug 2024 00:09:44 +0200 Subject: [PATCH 08/12] Define indices --- prody/proteins/waterbridges.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index c31c432e6..c30a942dc 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -552,7 +552,8 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): traj = trajectory[start_frame:] else: traj = trajectory[start_frame:stop_frame+1] - + + indices = None if selstr is not None: indices = [] for frame0 in traj: From 4d580f0d34166ab012b8117536a35d2371b81ae6 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 4 Aug 2024 00:34:19 +0200 Subject: [PATCH 09/12] Only use indices if not none --- prody/proteins/waterbridges.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index c30a942dc..8836b2f75 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -573,7 +573,9 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): LOGGER.info('Frame: {0}'.format(j0)) atoms_copy = atoms.copy() atoms_copy.setCoords(frame0.getCoords()) - atoms_copy = atoms_copy[indices] + + if indices is not None: + atoms_copy = atoms_copy[indices] kwargs['selstr'] = atoms_copy.getSelstr() From a78f3fd3d3661d3dfab10fc1738a6e377e8a7235 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 4 Aug 2024 00:40:05 +0200 Subject: [PATCH 10/12] Another fix --- prody/proteins/waterbridges.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 8836b2f75..141457711 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -576,8 +576,7 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): if indices is not None: atoms_copy = atoms_copy[indices] - - kwargs['selstr'] = atoms_copy.getSelstr() + kwargs['selstr'] = atoms_copy.getSelstr() interactions = calcWaterBridges( atoms_copy, isInfoLog=False, From 2e46facdf7bde51c52a55fe073cf726f2f46790f Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 4 Aug 2024 02:53:07 +0200 Subject: [PATCH 11/12] apply indices outside analyseFrame for output --- prody/proteins/waterbridges.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 141457711..ef8497731 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -661,6 +661,10 @@ def analyseFrame(i, interactions_all): LOGGER.info('Include trajectory or use multi-model PDB file.') if return_selection: + if indices is not None: + atoms_copy = atoms_copy[indices] + kwargs['selstr'] = atoms_copy.getSelstr() + return interactions_all, atoms_copy return interactions_all From e346e509492a995ffe1a08a169f938d34e9728ec Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 4 Aug 2024 03:44:34 +0200 Subject: [PATCH 12/12] doc findClusterCenters filename kwarg --- prody/proteins/waterbridges.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index ef8497731..aa62e8990 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -1258,6 +1258,11 @@ def findClusterCenters(file_pattern, **kwargs): :arg numC: min number of molecules in a cluster default is 3 :type numC: int + + :arg filename: filename for output pdb file with clusters + Default of **None** leads to + 'clusters_'+file_pattern.split("*")[0]+'.pdb' + :type filename: str """ import glob