From ae9355eff7f2c1b1a4810abf194418c3b566d875 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 7 Jul 2024 16:02:17 +0100 Subject: [PATCH 01/30] fix showAtomicLines gaps --- prody/dynamics/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/dynamics/plotting.py b/prody/dynamics/plotting.py index e8bc603e4..3a87ee4dc 100644 --- a/prody/dynamics/plotting.py +++ b/prody/dynamics/plotting.py @@ -1997,7 +1997,7 @@ def func_ticklabels(val, pos): last += len(resnums) else: x.extend(resnums + last) - last = resnums[-1] + last += resnums[-1] if gap: if overlay: From dfb615daf3df1e601f1bfac773eb46cdfa77d005 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 1 Aug 2024 17:15:52 +0200 Subject: [PATCH 02/30] fix multiprocessing output --- prody/proteins/waterbridges.py | 42 ++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index f0edcb60b..cff468742 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -508,8 +508,6 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): :arg stop_frame: frame to stop :type stop_frame: int """ - - interactions_all = [] start_frame = kwargs.pop('start_frame', 0) stop_frame = kwargs.pop('stop_frame', -1) @@ -535,15 +533,21 @@ def analyseFrame(j0, frame0, interactions_all): atoms_copy, isInfoLog=False, **kwargs) interactions_all.append(interactions) - jobs = [] - for j0, frame0 in enumerate(traj, start=start_frame): - p = mp.Process(target=analyseFrame, args=(j0, frame0, - interactions_all)) - p.start() - jobs.append(p) + with mp.Manager() as manager: + interactions_all = manager.list() + processes = [] + for j0, frame0 in enumerate(traj, start=start_frame): + p = mp.Process(target=analyseFrame, args=(j0, + frame0, + interactions_all)) + p.start() + processes.append(p) + + for p in processes: + p.join() + + interactions_all = interactions_all[:] - for proc in jobs: - proc.join() # trajectory._nfi = nfi else: @@ -555,14 +559,18 @@ def analyseFrame(i, interactions_all): atoms, isInfoLog=False, **kwargs) interactions_all.append(interactions) - jobs = [] - for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): - p = mp.Process(target=analyseFrame, args=(i, interactions_all)) - p.start() - jobs.append(p) + with mp.Manager() as manager: + interactions_all = manager.list() + processes = [] + for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): + p = mp.Process(target=analyseFrame, args=(i, interactions_all)) + p.start() + processes.append(p) + + for p in processes: + p.join() - for proc in jobs: - proc.join() + interactions_all = interactions_all[:] else: LOGGER.info('Include trajectory or use multi-model PDB file.') From fbad0edabd5e91037e8e0f0993fa4aa5c1469453 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 1 Aug 2024 18:26:27 +0200 Subject: [PATCH 03/30] fix output order --- prody/proteins/waterbridges.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index cff468742..3b79b063d 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -525,21 +525,24 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): traj = trajectory[start_frame:stop_frame+1] atoms_copy = atoms.copy() - def analyseFrame(j0, frame0, interactions_all): + def analyseFrame(j0, start_frame, frame0, interactions_all): LOGGER.info('Frame: {0}'.format(j0)) atoms_copy.setCoords(frame0.getCoords()) interactions = calcWaterBridges( atoms_copy, isInfoLog=False, **kwargs) - interactions_all.append(interactions) + interactions_all[j0-start_frame] = interactions with mp.Manager() as manager: interactions_all = manager.list() + for j0, frame0 in enumerate(traj, start=start_frame): + interactions_all.append([]) + processes = [] for j0, frame0 in enumerate(traj, start=start_frame): - p = mp.Process(target=analyseFrame, args=(j0, - frame0, - interactions_all)) + p = mp.Process(target=analyseFrame, args=(j0, start_frame, + frame0, + interactions_all)) p.start() processes.append(p) @@ -557,10 +560,13 @@ def analyseFrame(i, interactions_all): atoms.setACSIndex(i+start_frame) interactions = calcWaterBridges( atoms, isInfoLog=False, **kwargs) - interactions_all.append(interactions) + interactions_all[i] = interactions with mp.Manager() as manager: interactions_all = manager.list() + for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): + interactions_all.append([]) + processes = [] for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): p = mp.Process(target=analyseFrame, args=(i, interactions_all)) From 9946f9f6e687f632c21675ea3b967f1aa47780e2 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 1 Aug 2024 22:21:49 +0200 Subject: [PATCH 04/30] label log output frames --- prody/proteins/waterbridges.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 3b79b063d..98c9674cc 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -393,6 +393,7 @@ def calcWaterBridges(atoms, **kwargs): outputType = kwargs.pop('output', 'atomic') isInfoLog = kwargs.pop('isInfoLog', True) DIST_COVALENT_H = 1.4 + prefix = kwargs.pop('prefix', '') if method not in ['chain', 'cluster']: raise TypeError('Method should be chain or cluster.') @@ -472,8 +473,11 @@ def calcWaterBridges(atoms, **kwargs): waterBridgesWithIndices = getUniqueElements( waterBridgesWithIndices, getChainBridgeTuple) - LOGGER.info( - f'{len(waterBridgesWithIndices)} water bridges detected using method {method}.') + log_string = f'{len(waterBridgesWithIndices)} water bridges detected using method {method}' + if prefix != '': + log_string += ' for ' + prefix + LOGGER.info(log_string) + if method == 'atomic': LOGGER.info('Call getInfoOutput to convert atomic to info output.') @@ -530,7 +534,9 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): atoms_copy.setCoords(frame0.getCoords()) interactions = calcWaterBridges( - atoms_copy, isInfoLog=False, **kwargs) + atoms_copy, isInfoLog=False, + prefix='frame {0}'.format(j0), + **kwargs) interactions_all[j0-start_frame] = interactions with mp.Manager() as manager: @@ -556,10 +562,12 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): else: if atoms.numCoordsets() > 1: def analyseFrame(i, interactions_all): - LOGGER.info('Model: {0}'.format(i+start_frame)) + frameNum = i+start_frame + LOGGER.info('Model: {0}'.format(frameNum)) atoms.setACSIndex(i+start_frame) interactions = calcWaterBridges( - atoms, isInfoLog=False, **kwargs) + atoms, isInfoLog=False, prefix='frame {0}'.format(frameNum), + **kwargs) interactions_all[i] = interactions with mp.Manager() as manager: From 3b89f83227644f4c065835f3b5cf950f760cf610 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 1 Aug 2024 23:43:36 +0200 Subject: [PATCH 05/30] fix findClusterCenters no waters check --- prody/proteins/waterbridges.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 98c9674cc..394836183 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -1202,7 +1202,7 @@ def findClusterCenters(file_pattern, **kwargs): sel_waters.append(j) coords_wat = np.array([sel_waters], dtype=float) - if coords_wat.shape[0] == 0: + if coords_wat.shape[1] == 0: raise ValueError('No waters were selected. You may need to align your trajectory') selectedWaters.setCoords(coords_wat) From b039e52e81d2e792fa829ac882dcb097c840bf2e Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 1 Aug 2024 23:49:46 +0200 Subject: [PATCH 06/30] add filename kwarg to findClusterCenters --- prody/proteins/waterbridges.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 394836183..0e9d0b206 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -1176,6 +1176,7 @@ def findClusterCenters(file_pattern, **kwargs): selection = kwargs.pop('selection', 'water and name OH2') distC = kwargs.pop('distC', 0.3) numC = kwargs.pop('numC', 3) + filename = kwargs.pop('filename', None) matching_files = glob.glob(file_pattern) matching_files.sort() @@ -1210,10 +1211,12 @@ def findClusterCenters(file_pattern, **kwargs): selectedWaters.setResnums(range(1, len(selectedWaters)+1)) selectedWaters.setResnames(['DUM']*len(selectedWaters)) - try: - filename = 'clusters_'+file_pattern.split("*")[0]+'.pdb' - except: - filename = 'clusters.pdb' + if filename is None: + try: + filename = 'clusters_'+file_pattern.split("*")[0]+'.pdb' + except: + filename = 'clusters.pdb' + writePDB(filename, selectedWaters) LOGGER.info("Results are saved in {0}.".format(filename)) From b02578e7567d27bdd6e03fd11e1162a8d0da6b80 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 2 Aug 2024 14:27:28 +0200 Subject: [PATCH 07/30] use selection for parsePDB(i) to not have whole water --- prody/proteins/waterbridges.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 0e9d0b206..e820d2855 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -1183,7 +1183,7 @@ def findClusterCenters(file_pattern, **kwargs): coords_all = parsePDB(matching_files[0]).select(selection).toAtomGroup() for i in matching_files[1:]: - coords = parsePDB(i).select('water').toAtomGroup() + coords = parsePDB(i).select(selection).toAtomGroup() coords_all += coords removeResid = [] From 0a362255b2c471c6c4bf7e9321285d0e85e5cf46 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 2 Aug 2024 14:27:52 +0200 Subject: [PATCH 08/30] better no water check before looping --- prody/proteins/waterbridges.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index e820d2855..ad40e7f1a 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -1195,6 +1195,9 @@ def findClusterCenters(file_pattern, **kwargs): removeResid.append(coords_all.getResnums()[ii]) removeCoords.append(list(coords_all.getCoords()[ii])) + if len(removeCoords) == coords_all.numAtoms(): + raise ValueError('No waters were selected. You may need to align your trajectory') + selectedWaters = AtomGroup() sel_waters = [] @@ -1203,9 +1206,6 @@ def findClusterCenters(file_pattern, **kwargs): sel_waters.append(j) coords_wat = np.array([sel_waters], dtype=float) - if coords_wat.shape[1] == 0: - raise ValueError('No waters were selected. You may need to align your trajectory') - selectedWaters.setCoords(coords_wat) selectedWaters.setNames(['DUM']*len(selectedWaters)) selectedWaters.setResnums(range(1, len(selectedWaters)+1)) From 2ff2c6f6aa9efe92e9f5ff150570875e728fabb4 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 2 Aug 2024 16:40:58 +0200 Subject: [PATCH 09/30] limit processes --- prody/proteins/waterbridges.py | 51 +++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index ad40e7f1a..acb9ed77d 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -511,9 +511,14 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): :arg stop_frame: frame to stop :type stop_frame: int + + :arg max_proc: maximum number of processes to use + default is half of the number of CPUs + :type max_proc: int """ start_frame = kwargs.pop('start_frame', 0) stop_frame = kwargs.pop('stop_frame', -1) + max_proc = kwargs.pop('max_proc', mp.cpu_count()//2) if trajectory is not None: if isinstance(trajectory, Atomic): @@ -544,16 +549,24 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): for j0, frame0 in enumerate(traj, start=start_frame): interactions_all.append([]) - processes = [] - for j0, frame0 in enumerate(traj, start=start_frame): - p = mp.Process(target=analyseFrame, args=(j0, start_frame, - frame0, - interactions_all)) - p.start() - processes.append(p) + j0 = start_frame + while j0 < traj.numConfs(): + frame0 = traj[j0] + + processes = [] + for i in range(max_proc): + p = mp.Process(target=analyseFrame, args=(j0, start_frame, + frame0, + interactions_all)) + p.start() + processes.append(p) - for p in processes: - p.join() + j0 += 1 + if j0 >= traj.numConfs(): + break + + for p in processes: + p.join() interactions_all = interactions_all[:] @@ -575,14 +588,20 @@ def analyseFrame(i, interactions_all): for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): interactions_all.append([]) - processes = [] - for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): - p = mp.Process(target=analyseFrame, args=(i, interactions_all)) - p.start() - processes.append(p) + i = start_frame + while i < stop_frame: + processes = [] + for i in range(max_proc): + p = mp.Process(target=analyseFrame, args=(i, interactions_all)) + p.start() + processes.append(p) - for p in processes: - p.join() + i += 1 + if i >= stop_frame: + break + + for p in processes: + p.join() interactions_all = interactions_all[:] else: From a3cfb225490b137b2f12acfce3a79218e98df5d7 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 01:31:52 +0200 Subject: [PATCH 10/30] fix frame update --- prody/proteins/waterbridges.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index acb9ed77d..31d3a40f2 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -551,10 +551,10 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): j0 = start_frame while j0 < traj.numConfs(): - frame0 = traj[j0] processes = [] for i in range(max_proc): + frame0 = traj[j0] p = mp.Process(target=analyseFrame, args=(j0, start_frame, frame0, interactions_all)) From 2358ee6089f6a41e27627039873d1978ca4efada Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 01:32:45 +0200 Subject: [PATCH 11/30] allow no parallel processes --- prody/proteins/waterbridges.py | 89 +++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 38 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 31d3a40f2..5b519113c 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -544,31 +544,38 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): **kwargs) interactions_all[j0-start_frame] = interactions - with mp.Manager() as manager: - interactions_all = manager.list() + if max_proc == 1: + interactions_all = [] for j0, frame0 in enumerate(traj, start=start_frame): interactions_all.append([]) + analyseFrame(j0, start_frame, frame0, interactions_all) + else: + with mp.Manager() as manager: + interactions_all = manager.list() + for j0, frame0 in enumerate(traj, start=start_frame): + interactions_all.append([]) - j0 = start_frame - while j0 < traj.numConfs(): + j0 = start_frame + while j0 < traj.numConfs(): - processes = [] - for i in range(max_proc): - frame0 = traj[j0] - p = mp.Process(target=analyseFrame, args=(j0, start_frame, - frame0, - interactions_all)) - p.start() - processes.append(p) + processes = [] + for i in range(max_proc): + frame0 = traj[j0] + + p = mp.Process(target=analyseFrame, args=(j0, start_frame, + frame0, + interactions_all)) + p.start() + processes.append(p) - j0 += 1 - if j0 >= traj.numConfs(): - break + j0 += 1 + if j0 >= traj.numConfs(): + break - for p in processes: - p.join() + for p in processes: + p.join() - interactions_all = interactions_all[:] + interactions_all = interactions_all[:] # trajectory._nfi = nfi @@ -583,27 +590,33 @@ def analyseFrame(i, interactions_all): **kwargs) interactions_all[i] = interactions - with mp.Manager() as manager: - interactions_all = manager.list() - for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): + if max_proc == 1: + interactions_all = [] + for j0, frame0 in enumerate(traj, start=start_frame): interactions_all.append([]) - - i = start_frame - while i < stop_frame: - processes = [] - for i in range(max_proc): - p = mp.Process(target=analyseFrame, args=(i, interactions_all)) - p.start() - processes.append(p) - - i += 1 - if i >= stop_frame: - break - - for p in processes: - p.join() - - interactions_all = interactions_all[:] + analyseFrame(j0, start_frame, frame0, interactions_all) + else: + with mp.Manager() as manager: + interactions_all = manager.list() + for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): + interactions_all.append([]) + + i = start_frame + while i < stop_frame: + processes = [] + for i in range(max_proc): + p = mp.Process(target=analyseFrame, args=(i, interactions_all)) + p.start() + processes.append(p) + + i += 1 + if i >= stop_frame: + break + + for p in processes: + p.join() + + interactions_all = interactions_all[:] else: LOGGER.info('Include trajectory or use multi-model PDB file.') From 06e57046679ee5166ef393274e8982d6a223a2f0 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 20:10:55 +0200 Subject: [PATCH 12/30] getResidueName use_segname --- prody/proteins/waterbridges.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 5b519113c..e479729f7 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -29,7 +29,8 @@ __all__ = ['calcWaterBridges', 'calcWaterBridgesTrajectory', 'getWaterBridgesInfoOutput', - 'calcWaterBridgesStatistics', 'getWaterBridgeStatInfo', 'calcWaterBridgeMatrix', 'showWaterBridgeMatrix', + 'calcWaterBridgesStatistics', 'getWaterBridgeStatInfo', + 'calcWaterBridgeMatrix', 'showWaterBridgeMatrix', 'calcBridgingResiduesHistogram', 'calcWaterBridgesDistribution', 'savePDBWaterBridges', 'savePDBWaterBridgesTrajectory', 'saveWaterBridges', 'parseWaterBridges', 'findClusterCenters', @@ -623,8 +624,12 @@ def analyseFrame(i, interactions_all): return interactions_all -def getResidueName(atom): - return f'{atom.getResname()}{atom.getResnum()}{atom.getChid()}' +def getResidueName(atom, use_segname=False): + result = f'{atom.getResname()}{atom.getResnum()}{atom.getChid()}' + if use_segname: + result += f'{atom.getSegname()}' + + return result class DictionaryList: @@ -843,9 +848,14 @@ def calcBridgingResiduesHistogram(frames, **kwargs): :arg clip: maximal number of residues on graph; to represent all set None default is 20 :type clip: int + + :arg use_segname: whether to use segname to label residues + default is False, because then the labels get long + :type use_segname: bool """ show_plot = kwargs.pop('show_plot', False) + use_segname = kwargs.get('use_segname', False) clip = kwargs.pop('clip', 20) if clip == None: @@ -854,7 +864,7 @@ def calcBridgingResiduesHistogram(frames, **kwargs): residuesWithCount = {} for frame in frames: frameResidues = set(reduceTo1D( - frame, getResidueName, lambda wb: wb.proteins)) + frame, lambda x: getResidueName(x, use_segname=use_segname), lambda wb: wb.proteins)) for res in frameResidues: residuesWithCount[res] = residuesWithCount.get(res, 0) + 1 From cf5f2f30e7ac79b7b6de67b097118b9641ecba6d Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 20:11:21 +0200 Subject: [PATCH 13/30] rename typo internal function --- prody/proteins/waterbridges.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index e479729f7..5059fb6dc 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -954,7 +954,7 @@ def getDistanceDistribution(frames, res_a, res_b, trajectory): return distances -def getResidueLocationDistrubtion(frames, res_a, res_b): +def getResidueLocationDistribution(frames, res_a, res_b): locationInfo = {"backbone": 0, "side": 0} result = {res_a: locationInfo.copy(), res_b: locationInfo.copy()} @@ -1012,7 +1012,7 @@ def calcWaterBridgesDistribution(frames, res_a, res_b=None, **kwargs): 'residues': lambda: getBridgingResidues(frames, res_a), 'waters': lambda: getWaterCountDistribution(frames, res_a, res_b), 'distance': lambda: getDistanceDistribution(frames, res_a, res_b, trajectory), - 'location': lambda: getResidueLocationDistrubtion(frames, res_a, res_b) + 'location': lambda: getResidueLocationDistribution(frames, res_a, res_b) } result = methods[metric]() From 9efc3876203ad53f7127afc2eb5213d81d975870 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 21:09:52 +0200 Subject: [PATCH 14/30] formatting fix --- prody/proteins/waterbridges.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 5059fb6dc..081a50820 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -564,8 +564,8 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): frame0 = traj[j0] p = mp.Process(target=analyseFrame, args=(j0, start_frame, - frame0, - interactions_all)) + frame0, + interactions_all)) p.start() processes.append(p) @@ -864,7 +864,8 @@ def calcBridgingResiduesHistogram(frames, **kwargs): residuesWithCount = {} for frame in frames: frameResidues = set(reduceTo1D( - frame, lambda x: getResidueName(x, use_segname=use_segname), lambda wb: wb.proteins)) + frame, lambda x: getResidueName(x, use_segname=use_segname), + lambda wb: wb.proteins)) for res in frameResidues: residuesWithCount[res] = residuesWithCount.get(res, 0) + 1 From 893a886582d61393933c3c84142d768b56728f38 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 21:10:09 +0200 Subject: [PATCH 15/30] selectSurrounding whole residues --- prody/proteins/waterbridges.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 081a50820..b67041cda 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -1353,7 +1353,7 @@ def selectSurroundingsBox(atoms, select, padding=0, return_selstr=False): minCoords -= padding maxCoords += padding - selstr = '(x `{0} to {1}`) and (y `{2} to {3}`) and (z `{4} to {5}`)'.format( + selstr = 'same residue as ((x `{0} to {1}`) and (y `{2} to {3}`) and (z `{4} to {5}`))'.format( minCoords[0], maxCoords[0], minCoords[1], maxCoords[1], minCoords[2], maxCoords[2]) if return_selstr: From 94b8eec1142d98b3c6621c0b703ae704d8bd652e Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 21:46:58 +0200 Subject: [PATCH 16/30] super important fix on frames analysed --- prody/proteins/waterbridges.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index b67041cda..0594911af 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -557,11 +557,11 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): interactions_all.append([]) j0 = start_frame - while j0 < traj.numConfs(): + while j0 < traj.numConfs()+start_frame: processes = [] for i in range(max_proc): - frame0 = traj[j0] + frame0 = traj[j0-start_frame] p = mp.Process(target=analyseFrame, args=(j0, start_frame, frame0, @@ -570,7 +570,7 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): processes.append(p) j0 += 1 - if j0 >= traj.numConfs(): + if j0 >= traj.numConfs()+start_frame: break for p in processes: From 35ec0d928d92da55f8daa1dfe5d297190eee0fc1 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 21:49:09 +0200 Subject: [PATCH 17/30] fix on pdb coordsets analysed --- prody/proteins/waterbridges.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 0594911af..c517d532b 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -603,7 +603,7 @@ def analyseFrame(i, interactions_all): interactions_all.append([]) i = start_frame - while i < stop_frame: + while i < len(atoms.getCoordsets()[start_frame:stop_frame]): processes = [] for i in range(max_proc): p = mp.Process(target=analyseFrame, args=(i, interactions_all)) @@ -611,7 +611,7 @@ def analyseFrame(i, interactions_all): processes.append(p) i += 1 - if i >= stop_frame: + if i >= len(atoms.getCoordsets()[start_frame:stop_frame]): break for p in processes: From d17e539a4a72f102df476e7be1e4fc876dc2c80e Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sat, 3 Aug 2024 22:43:55 +0200 Subject: [PATCH 18/30] common selection --- prody/proteins/waterbridges.py | 47 +++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index c517d532b..c31c432e6 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -504,7 +504,7 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): :arg atoms: Atomic object from which atoms are considered :type atoms: :class:`.Atomic` - :arg trajectory: Trajectory data coming from a DCD or multi-model PDB file. + :arg trajectory: Trajectory data coming from a DCD, ensemble or multi-model PDB file. :type trajectory: :class:`.Trajectory', :class:`.Ensemble`, :class:`.Atomic` :arg start_frame: frame to start from @@ -516,10 +516,29 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): :arg max_proc: maximum number of processes to use default is half of the number of CPUs :type max_proc: int + + :arg selstr: selection string for focusing analysis + default of **None** focuses on everything + :type selstr: str + + :arg expand_selection: whether to expand the selection with + :func:`.selectSurroundingsBox`, selecting a box surrounding it. + Default is **False** + :type expand_selection: bool + + If selstr is provided, a common selection will be found across all frames + combining selections satifying the criteria in each. + + :arg return_selection: whether to return the combined common selection + Default is **False** + :type return_selection: bool """ start_frame = kwargs.pop('start_frame', 0) stop_frame = kwargs.pop('stop_frame', -1) max_proc = kwargs.pop('max_proc', mp.cpu_count()//2) + selstr = kwargs.pop('selstr', None) + expand_selection = kwargs.pop('expand_selection', False) + return_selection = kwargs.pop('return_selection', False) if trajectory is not None: if isinstance(trajectory, Atomic): @@ -533,11 +552,29 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): traj = trajectory[start_frame:] else: traj = trajectory[start_frame:stop_frame+1] + + if selstr is not None: + indices = [] + for frame0 in traj: + atoms_copy = atoms.copy() + atoms_copy.setCoords(frame0.getCoords()) + selection = atoms_copy.select(selstr) + + if expand_selection: + selection = selectSurroundingsBox(atoms_copy, selection) + + indices.extend(list(selection.getIndices())) + + indices = np.unique(indices) + - atoms_copy = atoms.copy() def analyseFrame(j0, start_frame, frame0, interactions_all): LOGGER.info('Frame: {0}'.format(j0)) + atoms_copy = atoms.copy() atoms_copy.setCoords(frame0.getCoords()) + atoms_copy = atoms_copy[indices] + + kwargs['selstr'] = atoms_copy.getSelstr() interactions = calcWaterBridges( atoms_copy, isInfoLog=False, @@ -621,6 +658,9 @@ def analyseFrame(i, interactions_all): else: LOGGER.info('Include trajectory or use multi-model PDB file.') + if return_selection: + return interactions_all, atoms_copy + return interactions_all @@ -1074,7 +1114,8 @@ def savePDBWaterBridgesTrajectory(bridgeFrames, atoms, filename, trajectory=None :arg filename: name of file to be saved; must end in .pdb :type filename: string - :arg trajectory: DCD trajectory (not needed for multimodal PDB) + :arg trajectory: trajectory data (not needed for multi-model PDB) + :type trajectory: :class:`.Trajectory', :class:`.Ensemble`, :class:`.Atomic` """ if not trajectory and atoms.numCoordsets() < len(bridgeFrames): raise TypeError('Provide parsed trajectory!') From c1d0f3575bda193f72143f21bd2db43df11c8c34 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 4 Aug 2024 00:09:44 +0200 Subject: [PATCH 19/30] Define indices --- prody/proteins/waterbridges.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index c31c432e6..c30a942dc 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -552,7 +552,8 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): traj = trajectory[start_frame:] else: traj = trajectory[start_frame:stop_frame+1] - + + indices = None if selstr is not None: indices = [] for frame0 in traj: From 4d580f0d34166ab012b8117536a35d2371b81ae6 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 4 Aug 2024 00:34:19 +0200 Subject: [PATCH 20/30] Only use indices if not none --- prody/proteins/waterbridges.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index c30a942dc..8836b2f75 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -573,7 +573,9 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): LOGGER.info('Frame: {0}'.format(j0)) atoms_copy = atoms.copy() atoms_copy.setCoords(frame0.getCoords()) - atoms_copy = atoms_copy[indices] + + if indices is not None: + atoms_copy = atoms_copy[indices] kwargs['selstr'] = atoms_copy.getSelstr() From a78f3fd3d3661d3dfab10fc1738a6e377e8a7235 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 4 Aug 2024 00:40:05 +0200 Subject: [PATCH 21/30] Another fix --- prody/proteins/waterbridges.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 8836b2f75..141457711 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -576,8 +576,7 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): if indices is not None: atoms_copy = atoms_copy[indices] - - kwargs['selstr'] = atoms_copy.getSelstr() + kwargs['selstr'] = atoms_copy.getSelstr() interactions = calcWaterBridges( atoms_copy, isInfoLog=False, From 2e46facdf7bde51c52a55fe073cf726f2f46790f Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 4 Aug 2024 02:53:07 +0200 Subject: [PATCH 22/30] apply indices outside analyseFrame for output --- prody/proteins/waterbridges.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 141457711..ef8497731 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -661,6 +661,10 @@ def analyseFrame(i, interactions_all): LOGGER.info('Include trajectory or use multi-model PDB file.') if return_selection: + if indices is not None: + atoms_copy = atoms_copy[indices] + kwargs['selstr'] = atoms_copy.getSelstr() + return interactions_all, atoms_copy return interactions_all From e346e509492a995ffe1a08a169f938d34e9728ec Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 4 Aug 2024 03:44:34 +0200 Subject: [PATCH 23/30] doc findClusterCenters filename kwarg --- prody/proteins/waterbridges.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index ef8497731..aa62e8990 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -1258,6 +1258,11 @@ def findClusterCenters(file_pattern, **kwargs): :arg numC: min number of molecules in a cluster default is 3 :type numC: int + + :arg filename: filename for output pdb file with clusters + Default of **None** leads to + 'clusters_'+file_pattern.split("*")[0]+'.pdb' + :type filename: str """ import glob From 58986747cfcb08a756a9d37388e4f9005715cde5 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Mon, 5 Aug 2024 18:57:28 +0200 Subject: [PATCH 24/30] manually update release docs --- docs/release/index.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/release/index.rst b/docs/release/index.rst index 2007f5566..cfe0dfaff 100644 --- a/docs/release/index.rst +++ b/docs/release/index.rst @@ -10,6 +10,10 @@ Release Notes :maxdepth: 2 :glob: + v2.4_series + v2.3_series + v2.2_series + v2.1_series v2.0_series v1.11_series v1.10_series From c4d832762a63f9cb076b4b376713b08ccf9dd453 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Tue, 6 Aug 2024 14:38:35 +0100 Subject: [PATCH 25/30] _ not i in calcWBtraj proc for loop --- prody/proteins/waterbridges.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 141457711..a2f45d00b 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -599,7 +599,7 @@ def analyseFrame(j0, start_frame, frame0, interactions_all): while j0 < traj.numConfs()+start_frame: processes = [] - for i in range(max_proc): + for _ in range(max_proc): frame0 = traj[j0-start_frame] p = mp.Process(target=analyseFrame, args=(j0, start_frame, @@ -644,7 +644,7 @@ def analyseFrame(i, interactions_all): i = start_frame while i < len(atoms.getCoordsets()[start_frame:stop_frame]): processes = [] - for i in range(max_proc): + for _ in range(max_proc): p = mp.Process(target=analyseFrame, args=(i, interactions_all)) p.start() processes.append(p) From ab0f3e46233f54d097142015ae2e896552ecc4ce Mon Sep 17 00:00:00 2001 From: James Krieger Date: Tue, 6 Aug 2024 17:42:58 +0100 Subject: [PATCH 26/30] first try parallel savePDBwbt --- prody/proteins/waterbridges.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index a2f45d00b..8f2dbf4b7 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -1104,7 +1104,7 @@ def savePDBWaterBridges(bridges, atoms, filename): return writePDB(filename, atomsToSave) -def savePDBWaterBridgesTrajectory(bridgeFrames, atoms, filename, trajectory=None): +def savePDBWaterBridgesTrajectory(bridgeFrames, atoms, filename, trajectory=None, max_proc=1): """Saves one PDB per frame with occupancy and beta on protein atoms and waters forming bridges in frame. :arg bridgeFrames: atomic output from calcWaterBridgesTrajectory @@ -1127,7 +1127,7 @@ def savePDBWaterBridgesTrajectory(bridgeFrames, atoms, filename, trajectory=None atoms = atoms.copy() mofifyBeta(bridgeFrames, atoms) - for frameIndex, frame in enumerate(bridgeFrames): + def saveBridgesFrame(trajectory, atoms, frameIndex, frame): if trajectory: coords = trajectory[frameIndex].getCoords() atoms.setCoords(coords) @@ -1153,6 +1153,26 @@ def savePDBWaterBridgesTrajectory(bridgeFrames, atoms, filename, trajectory=None writePDB(f'{filename}_{frameIndex}.pdb', atomsToSave, csets=frameIndex) + if max_proc == 1: + for frameIndex, frame in enumerate(bridgeFrames): + saveBridgesFrame(trajectory, atoms, frameIndex, frame) + else: + frameIndex = 0 + numFrames = len(bridgeFrames) + while frameIndex < numFrames: + processes = [] + for _ in range(max_proc): + p = mp.Process(target=saveBridgesFrame, args=(trajectory, atoms, frameIndex, + bridgeFrames[frameIndex])) + p.start() + processes.append(p) + + j0 += 1 + if j0 >= numFrames: + break + + for p in processes: + p.join() def getBridgeIndicesString(bridge): return ' '.join(map(lambda a: str(a.getIndex()), bridge.proteins))\ From 57eeddbabc8141a9eb23ec36c2b3a9a3eb453300 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 7 Aug 2024 11:36:27 +0100 Subject: [PATCH 27/30] bug fix parallel save --- prody/proteins/waterbridges.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 8f2dbf4b7..083e797d0 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -1128,6 +1128,7 @@ def savePDBWaterBridgesTrajectory(bridgeFrames, atoms, filename, trajectory=None mofifyBeta(bridgeFrames, atoms) def saveBridgesFrame(trajectory, atoms, frameIndex, frame): + LOGGER.info('Frame: {0}'.format(frameIndex)) if trajectory: coords = trajectory[frameIndex].getCoords() atoms.setCoords(coords) @@ -1167,8 +1168,8 @@ def saveBridgesFrame(trajectory, atoms, frameIndex, frame): p.start() processes.append(p) - j0 += 1 - if j0 >= numFrames: + frameIndex += 1 + if frameIndex >= numFrames: break for p in processes: From e587375335577fc84aa8a78ce648c9a413f7ddb9 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 7 Aug 2024 14:12:33 +0100 Subject: [PATCH 28/30] restore missing fix --- prody/proteins/waterbridges.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 083e797d0..feed5bf6b 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -660,6 +660,11 @@ def analyseFrame(i, interactions_all): else: LOGGER.info('Include trajectory or use multi-model PDB file.') + if return_selection: + if indices is not None: + atoms_copy = atoms_copy[indices] + kwargs['selstr'] = atoms_copy.getSelstr() + if return_selection: return interactions_all, atoms_copy From 6fa53f5f71290bdee51bde91be2a21068a56c506 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 7 Aug 2024 14:12:51 +0100 Subject: [PATCH 29/30] log common selection --- prody/proteins/waterbridges.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index feed5bf6b..fd19c85c6 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -530,7 +530,8 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): combining selections satifying the criteria in each. :arg return_selection: whether to return the combined common selection - Default is **False** + Default is **False** to keep expected behaviour. + However, this output is required when using selstr. :type return_selection: bool """ start_frame = kwargs.pop('start_frame', 0) @@ -554,7 +555,9 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): traj = trajectory[start_frame:stop_frame+1] indices = None + selection = atoms if selstr is not None: + LOGGER.info('Finding common selection') indices = [] for frame0 in traj: atoms_copy = atoms.copy() @@ -568,6 +571,8 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): indices = np.unique(indices) + LOGGER.info('Common selection found with {0} atoms and {1} protein chains'.format(selection.numAtoms(), + len(list(selection.protein.getHierView())))) def analyseFrame(j0, start_frame, frame0, interactions_all): LOGGER.info('Frame: {0}'.format(j0)) From fd727cf6c598598df8849c4486a32126322eea93 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 7 Aug 2024 18:10:18 +0100 Subject: [PATCH 30/30] fix log to common selection --- prody/proteins/waterbridges.py | 1 + 1 file changed, 1 insertion(+) diff --git a/prody/proteins/waterbridges.py b/prody/proteins/waterbridges.py index 9745246ee..500d85ce9 100644 --- a/prody/proteins/waterbridges.py +++ b/prody/proteins/waterbridges.py @@ -570,6 +570,7 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs): indices.extend(list(selection.getIndices())) indices = np.unique(indices) + selection = atoms_copy[indices] LOGGER.info('Common selection found with {0} atoms and {1} protein chains'.format(selection.numAtoms(), len(list(selection.protein.getHierView()))))