From 66786ade5001a85fdada77b637f4d335ea0d5a09 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 3 Sep 2024 11:40:44 +0200 Subject: [PATCH 01/28] Energy optional in calcStatisticsInteractions() [InSty] --- prody/proteins/interactions.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 82c8fd18c..8ec8a2476 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -1988,12 +1988,20 @@ def calcStatisticsInteractions(data, **kwargs): for element in elements: if element not in stats: values = [t[1] for t in interactions_list if t[0] == element] - stats[element] = { - "stddev": np.round(np.std(values),6), - "mean": np.round(np.mean(values),6), - "weight": np.round(float(len(values))/len(data), 6), - "energy": get_energy([element.split('-')[0][:3], element.split('-')[1][:3]], energy_list_type) - } + + try: + stats[element] = { + "stddev": np.round(np.std(values),6), + "mean": np.round(np.mean(values),6), + "weight": np.round(float(len(values))/len(data), 6), + "energy": get_energy([element.split('-')[0][:3], element.split('-')[1][:3]], energy_list_type) + } + except: + stats[element] = { + "stddev": np.round(np.std(values),6), + "mean": np.round(np.mean(values),6), + "weight": np.round(float(len(values))/len(data), 6) + } statistic = [] for key, value in stats.items(): @@ -2002,8 +2010,11 @@ def calcStatisticsInteractions(data, **kwargs): LOGGER.info(" Average [Ang.]: {}".format(value['mean'])) LOGGER.info(" Standard deviation [Ang.]: {0}".format(value['stddev'])) LOGGER.info(" Weight: {0}".format(value['weight'])) - LOGGER.info(" Energy [kcal/mol]: {0}".format(value['energy'])) - statistic.append([key, value['weight'], value['mean'], value['stddev'], value['energy']]) + try: + LOGGER.info(" Energy [kcal/mol]: {0}".format(value['energy'])) + statistic.append([key, value['weight'], value['mean'], value['stddev'], value['energy']]) + except: + statistic.append([key, value['weight'], value['mean'], value['stddev']]) else: pass statistic.sort(key=lambda x: x[1], reverse=True) From 86714f4a7ca0f8bb6adb4a0c483a2f69ce63d7d7 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 4 Sep 2024 13:38:57 +0200 Subject: [PATCH 02/28] HIS/HSD/HSP added to tabulated_energies.txt; bug fix in get_energy() --- prody/proteins/interactions.py | 12 ++- prody/proteins/tabulated_energies.txt | 117 ++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 4 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 8ec8a2476..b7e4eaad9 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -163,14 +163,13 @@ def get_energy(pair, source): try: # Python 3 with pkg_resources.path('prody.proteins', 'tabulated_energies.txt') as file_path: - data = np.loadtxt(file_path, skiprows=1, dtype=str) + data = np.loadtxt(file_path, dtype=str) except: # Python 2.7 import pkg_resources file_path = pkg_resources.resource_filename('prody.proteins', 'tabulated_energies.txt') with open(file_path) as f: - data = np.loadtxt(f, skiprows=1, dtype=str) - + data = np.loadtxt(f, dtype=str) sources = ["IB_nosolv", "IB_solv", "CS"] aa_pairs = [] @@ -180,7 +179,12 @@ def get_energy(pair, source): lookup = pair[0]+pair[1] - return data[np.where(np.array(aa_pairs)==lookup)[0]][0][2:][np.where(np.array(sources)==source)][0] + try: + data_results = data[np.where(np.array(aa_pairs)==lookup)[0]][0][2:][np.where(np.array(sources)==source)][0] + except ImportError: + raise ImportError('Please replace non-standard names of residues with standard names.') + + return data_results def showPairEnergy(data, **kwargs): diff --git a/prody/proteins/tabulated_energies.txt b/prody/proteins/tabulated_energies.txt index 009c8c1b5..af530f206 100644 --- a/prody/proteins/tabulated_energies.txt +++ b/prody/proteins/tabulated_energies.txt @@ -398,3 +398,120 @@ PRO LYS 0.3 -0.5 0.0 PRO ARG -0.17 -2.43 0.0 PRO HIS 0.33 -1.8 0.0 PRO PRO -0.03 -0.83 0.0 +GLY HSE 0.01 -2.47 0.0 +ALA HSE 0.01 -3.29 0.0 +VAL HSE -0.23 -4.14 -1.1 +ILE HSE -0.02 -4.55 -1.1 +LEU HSE 0.25 -4.85 -1.1 +CYS HSE -0.64 -4.79 -0.8 +MET HSE -0.17 -4.47 -0.5 +PHE HSE 0.79 -3.82 -0.6 +TYR HSE 0.34 -3.78 -1.1 +TRP HSE -0.05 -4.5 -1.7 +SER HSE -0.38 -3.12 -0.5 +THR HSE -0.05 -2.73 -0.5 +ASP HSE 0.0 -2.93 -0.4 +ASN HSE -0.52 -3.05 -1.2 +GLU HSE -0.1 -3.15 -0.4 +GLN HSE -0.31 -4.2 -1.2 +LYS HSE -0.01 -2.14 0.0 +ARG HSE 0.35 -3.24 -0.4 +HSE GLY 0.01 -2.47 0.0 +HSE ALA 0.01 -3.29 0.0 +HSE VAL -0.23 -4.14 -1.1 +HSE ILE -0.02 -4.55 -1.1 +HSE LEU 0.25 -4.85 -1.1 +HSE CYS -0.64 -4.79 -0.8 +HSE MET -0.17 -4.47 -0.5 +HSE PHE 0.79 -3.82 -0.6 +HSE TYR 0.34 -3.78 -1.1 +HSE TRP -0.05 -4.5 -1.7 +HSE SER -0.38 -3.12 -0.5 +HSE THR -0.05 -2.73 -0.5 +HSE ASP 0.0 -2.93 -0.4 +HSE ASN -0.52 -3.05 -1.2 +HSE GLU -0.1 -3.15 -0.4 +HSE GLN -0.31 -4.2 -1.2 +HSE LYS -0.01 -2.14 0.0 +HSE ARG 0.35 -3.24 -0.4 +HSE HSE 0.38 -3.08 -0.5 +HSE PRO 0.33 -1.8 0.0 +PRO HSE 0.33 -1.8 0.0 +GLY HSD 0.01 -2.47 0.0 +ALA HSD 0.01 -3.29 0.0 +VAL HSD -0.23 -4.14 -1.1 +ILE HSD -0.02 -4.55 -1.1 +LEU HSD 0.25 -4.85 -1.1 +CYS HSD -0.64 -4.79 -0.8 +MET HSD -0.17 -4.47 -0.5 +PHE HSD 0.79 -3.82 -0.6 +TYR HSD 0.34 -3.78 -1.1 +TRP HSD -0.05 -4.5 -1.7 +SER HSD -0.38 -3.12 -0.5 +THR HSD -0.05 -2.73 -0.5 +ASP HSD 0.0 -2.93 -0.4 +ASN HSD -0.52 -3.05 -1.2 +GLU HSD -0.1 -3.15 -0.4 +GLN HSD -0.31 -4.2 -1.2 +LYS HSD -0.01 -2.14 0.0 +ARG HSD 0.35 -3.24 -0.4 +HSD GLY 0.01 -2.47 0.0 +HSD ALA 0.01 -3.29 0.0 +HSD VAL -0.23 -4.14 -1.1 +HSD ILE -0.02 -4.55 -1.1 +HSD LEU 0.25 -4.85 -1.1 +HSD CYS -0.64 -4.79 -0.8 +HSD MET -0.17 -4.47 -0.5 +HSD PHE 0.79 -3.82 -0.6 +HSD TYR 0.34 -3.78 -1.1 +HSD TRP -0.05 -4.5 -1.7 +HSD SER -0.38 -3.12 -0.5 +HSD THR -0.05 -2.73 -0.5 +HSD ASP 0.0 -2.93 -0.4 +HSD ASN -0.52 -3.05 -1.2 +HSD GLU -0.1 -3.15 -0.4 +HSD GLN -0.31 -4.2 -1.2 +HSD LYS -0.01 -2.14 0.0 +HSD ARG 0.35 -3.24 -0.4 +HSD HSD 0.38 -3.08 -0.5 +HSD PRO 0.33 -1.8 0.0 +PRO HSD 0.33 -1.8 0.0 +GLY HSP 0.01 -2.47 0.0 +ALA HSP 0.01 -3.29 0.0 +VAL HSP -0.23 -4.14 -1.1 +ILE HSP -0.02 -4.55 -1.1 +LEU HSP 0.25 -4.85 -1.1 +CYS HSP -0.64 -4.79 -0.8 +MET HSP -0.17 -4.47 -0.5 +PHE HSP 0.79 -3.82 -0.6 +TYR HSP 0.34 -3.78 -1.1 +TRP HSP -0.05 -4.5 -1.7 +SER HSP -0.38 -3.12 -0.5 +THR HSP -0.05 -2.73 -0.5 +ASP HSP 0.0 -2.93 -0.4 +ASN HSP -0.52 -3.05 -1.2 +GLU HSP -0.1 -3.15 -0.4 +GLN HSP -0.31 -4.2 -1.2 +LYS HSP -0.01 -2.14 0.0 +ARG HSP 0.35 -3.24 -0.4 +HSP GLY 0.01 -2.47 0.0 +HSP ALA 0.01 -3.29 0.0 +HSP VAL -0.23 -4.14 -1.1 +HSP ILE -0.02 -4.55 -1.1 +HSP LEU 0.25 -4.85 -1.1 +HSP CYS -0.64 -4.79 -0.8 +HSP MET -0.17 -4.47 -0.5 +HSP PHE 0.79 -3.82 -0.6 +HSP TYR 0.34 -3.78 -1.1 +HSP TRP -0.05 -4.5 -1.7 +HSP SER -0.38 -3.12 -0.5 +HSP THR -0.05 -2.73 -0.5 +HSP ASP 0.0 -2.93 -0.4 +HSP ASN -0.52 -3.05 -1.2 +HSP GLU -0.1 -3.15 -0.4 +HSP GLN -0.31 -4.2 -1.2 +HSP LYS -0.01 -2.14 0.0 +HSP ARG 0.35 -3.24 -0.4 +HSP HSP 0.38 -3.08 -0.5 +HSP PRO 0.33 -1.8 0.0 +PRO HSP 0.33 -1.8 0.0 From 3c7fde93d19e7912ef5bf18a99fa8cd60283ecd9 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 4 Sep 2024 13:59:47 +0200 Subject: [PATCH 03/28] reference/energy added [InSty] --- prody/proteins/interactions.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index b7e4eaad9..6f15148fa 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -190,8 +190,8 @@ def get_energy(pair, source): def showPairEnergy(data, **kwargs): """Return energies when a list of interactions is given. Energies will be added to each pair of residues at the last position in the list. Energy is based on the residue types and not on the distances. - The unit of energy is kcal/mol. The energies defined as 'IB_nosolv', 'IB_solv' are taken from XX and - 'CS' from YY. + The unit of energy is kcal/mol. The energies defined as 'IB_nosolv', 'IB_solv' are taken from [OK98]_ and + 'CS' from InSty paper. :arg data: list with interactions from calcHydrogenBonds() or other types :type data: list @@ -199,6 +199,12 @@ def showPairEnergy(data, **kwargs): :arg energy_list_type: name of the list with energies default is 'IB_solv' :type energy_list_type: 'IB_nosolv', 'IB_solv', 'CS' + + + .. [OK98] Keskin O., Bahar I., Badretdinov A.Y., Ptitsyn O.B., Jernigan R.L., + Empirical solvet-mediated potentials hold for both intra-molecular and + inter-molecular inter-residues interactions, + *Protein Science* **1998** 7: 2578–2586. """ if not isinstance(data, list): From 1e16368d5d99ae27fa0881072edb9dbc0a194cff Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 4 Sep 2024 22:14:49 +0200 Subject: [PATCH 04/28] HSE/HSD/HSP removed from tabulated_energies.txt and get_energy() is modified --- prody/proteins/interactions.py | 50 ++++++++++- prody/proteins/tabulated_energies.txt | 117 -------------------------- 2 files changed, 49 insertions(+), 118 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 6f15148fa..18f076b53 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -160,6 +160,53 @@ def get_energy(pair, source): import numpy as np import importlib.resources as pkg_resources + aa_correction = { + # Histidine (His) + 'HSD': 'HIS', # Protonated at ND1 (HID in AMBER) + 'HSE': 'HIS', # Protonated at NE2 (HIE in AMBER) + 'HSP': 'HIS', # Doubly protonated (HIP in AMBER) + 'HID': 'HIS', # AMBER name, protonated at ND1 + 'HIE': 'HIS', # AMBER name, protonated at NE2 + 'HIP': 'HIS', # AMBER name, doubly protonated + 'HISD': 'HIS', # GROMACS: protonated at ND1 + 'HISE': 'HIS', # GROMACS: protonated at NE2 + 'HISP': 'HIS', # GROMACS: doubly protonated + + # Cysteine (Cys) + 'CYX': 'CYS', # Cystine (disulfide bridge) + 'CYM': 'CYS', # Deprotonated cysteine, anion (GROMACS) + + # Aspartic acid (Asp) + 'ASH': 'ASP', # Deprotonated Asp (GROMACS: ASH, AMBER: AS4) + + # Glutamic acid (Glu) + 'GLH': 'GLU', # Deprotonated Glu (GROMACS: GLH, AMBER: GL4) + + # Lysine (Lys) + 'LYN': 'LYS', # Deprotonated lysine (GROMACS: LYN, AMBER: LYP) + + # Arginine (Arg) + 'ARN': 'ARG', # Deprotonated arginine (rare, GROMACS) + + # Tyrosine (Tyr) + 'TYM': 'TYR', # Deprotonated tyrosine (GROMACS: TYM) + + # Serine (Ser) + 'SEP': 'SER', # Phosphorylated serine (GROMACS/AMBER) + + # Threonine (Thr) + 'TPO': 'THR', # Phosphorylated threonine (GROMACS/AMBER) + + # Tyrosine (Tyr) + 'PTR': 'TYR', # Phosphorylated tyrosine (GROMACS/AMBER) + + # Non-standard names for aspartic and glutamic acids in low pH environments + 'ASH': 'ASP', # Protonated Asp + 'GLH': 'GLU', # Protonated Glu + } + + pair = [aa_correction.get(aa, aa) for aa in pair] + try: # Python 3 with pkg_resources.path('prody.proteins', 'tabulated_energies.txt') as file_path: @@ -191,7 +238,7 @@ def showPairEnergy(data, **kwargs): """Return energies when a list of interactions is given. Energies will be added to each pair of residues at the last position in the list. Energy is based on the residue types and not on the distances. The unit of energy is kcal/mol. The energies defined as 'IB_nosolv', 'IB_solv' are taken from [OK98]_ and - 'CS' from InSty paper. + 'CS' from InSty paper (under preparation). :arg data: list with interactions from calcHydrogenBonds() or other types :type data: list @@ -2007,6 +2054,7 @@ def calcStatisticsInteractions(data, **kwargs): "energy": get_energy([element.split('-')[0][:3], element.split('-')[1][:3]], energy_list_type) } except: + LOGGER.warn('energy information is not available for ', element.split('-')[0][:3], element.split('-')[1][:3]) stats[element] = { "stddev": np.round(np.std(values),6), "mean": np.round(np.mean(values),6), diff --git a/prody/proteins/tabulated_energies.txt b/prody/proteins/tabulated_energies.txt index af530f206..009c8c1b5 100644 --- a/prody/proteins/tabulated_energies.txt +++ b/prody/proteins/tabulated_energies.txt @@ -398,120 +398,3 @@ PRO LYS 0.3 -0.5 0.0 PRO ARG -0.17 -2.43 0.0 PRO HIS 0.33 -1.8 0.0 PRO PRO -0.03 -0.83 0.0 -GLY HSE 0.01 -2.47 0.0 -ALA HSE 0.01 -3.29 0.0 -VAL HSE -0.23 -4.14 -1.1 -ILE HSE -0.02 -4.55 -1.1 -LEU HSE 0.25 -4.85 -1.1 -CYS HSE -0.64 -4.79 -0.8 -MET HSE -0.17 -4.47 -0.5 -PHE HSE 0.79 -3.82 -0.6 -TYR HSE 0.34 -3.78 -1.1 -TRP HSE -0.05 -4.5 -1.7 -SER HSE -0.38 -3.12 -0.5 -THR HSE -0.05 -2.73 -0.5 -ASP HSE 0.0 -2.93 -0.4 -ASN HSE -0.52 -3.05 -1.2 -GLU HSE -0.1 -3.15 -0.4 -GLN HSE -0.31 -4.2 -1.2 -LYS HSE -0.01 -2.14 0.0 -ARG HSE 0.35 -3.24 -0.4 -HSE GLY 0.01 -2.47 0.0 -HSE ALA 0.01 -3.29 0.0 -HSE VAL -0.23 -4.14 -1.1 -HSE ILE -0.02 -4.55 -1.1 -HSE LEU 0.25 -4.85 -1.1 -HSE CYS -0.64 -4.79 -0.8 -HSE MET -0.17 -4.47 -0.5 -HSE PHE 0.79 -3.82 -0.6 -HSE TYR 0.34 -3.78 -1.1 -HSE TRP -0.05 -4.5 -1.7 -HSE SER -0.38 -3.12 -0.5 -HSE THR -0.05 -2.73 -0.5 -HSE ASP 0.0 -2.93 -0.4 -HSE ASN -0.52 -3.05 -1.2 -HSE GLU -0.1 -3.15 -0.4 -HSE GLN -0.31 -4.2 -1.2 -HSE LYS -0.01 -2.14 0.0 -HSE ARG 0.35 -3.24 -0.4 -HSE HSE 0.38 -3.08 -0.5 -HSE PRO 0.33 -1.8 0.0 -PRO HSE 0.33 -1.8 0.0 -GLY HSD 0.01 -2.47 0.0 -ALA HSD 0.01 -3.29 0.0 -VAL HSD -0.23 -4.14 -1.1 -ILE HSD -0.02 -4.55 -1.1 -LEU HSD 0.25 -4.85 -1.1 -CYS HSD -0.64 -4.79 -0.8 -MET HSD -0.17 -4.47 -0.5 -PHE HSD 0.79 -3.82 -0.6 -TYR HSD 0.34 -3.78 -1.1 -TRP HSD -0.05 -4.5 -1.7 -SER HSD -0.38 -3.12 -0.5 -THR HSD -0.05 -2.73 -0.5 -ASP HSD 0.0 -2.93 -0.4 -ASN HSD -0.52 -3.05 -1.2 -GLU HSD -0.1 -3.15 -0.4 -GLN HSD -0.31 -4.2 -1.2 -LYS HSD -0.01 -2.14 0.0 -ARG HSD 0.35 -3.24 -0.4 -HSD GLY 0.01 -2.47 0.0 -HSD ALA 0.01 -3.29 0.0 -HSD VAL -0.23 -4.14 -1.1 -HSD ILE -0.02 -4.55 -1.1 -HSD LEU 0.25 -4.85 -1.1 -HSD CYS -0.64 -4.79 -0.8 -HSD MET -0.17 -4.47 -0.5 -HSD PHE 0.79 -3.82 -0.6 -HSD TYR 0.34 -3.78 -1.1 -HSD TRP -0.05 -4.5 -1.7 -HSD SER -0.38 -3.12 -0.5 -HSD THR -0.05 -2.73 -0.5 -HSD ASP 0.0 -2.93 -0.4 -HSD ASN -0.52 -3.05 -1.2 -HSD GLU -0.1 -3.15 -0.4 -HSD GLN -0.31 -4.2 -1.2 -HSD LYS -0.01 -2.14 0.0 -HSD ARG 0.35 -3.24 -0.4 -HSD HSD 0.38 -3.08 -0.5 -HSD PRO 0.33 -1.8 0.0 -PRO HSD 0.33 -1.8 0.0 -GLY HSP 0.01 -2.47 0.0 -ALA HSP 0.01 -3.29 0.0 -VAL HSP -0.23 -4.14 -1.1 -ILE HSP -0.02 -4.55 -1.1 -LEU HSP 0.25 -4.85 -1.1 -CYS HSP -0.64 -4.79 -0.8 -MET HSP -0.17 -4.47 -0.5 -PHE HSP 0.79 -3.82 -0.6 -TYR HSP 0.34 -3.78 -1.1 -TRP HSP -0.05 -4.5 -1.7 -SER HSP -0.38 -3.12 -0.5 -THR HSP -0.05 -2.73 -0.5 -ASP HSP 0.0 -2.93 -0.4 -ASN HSP -0.52 -3.05 -1.2 -GLU HSP -0.1 -3.15 -0.4 -GLN HSP -0.31 -4.2 -1.2 -LYS HSP -0.01 -2.14 0.0 -ARG HSP 0.35 -3.24 -0.4 -HSP GLY 0.01 -2.47 0.0 -HSP ALA 0.01 -3.29 0.0 -HSP VAL -0.23 -4.14 -1.1 -HSP ILE -0.02 -4.55 -1.1 -HSP LEU 0.25 -4.85 -1.1 -HSP CYS -0.64 -4.79 -0.8 -HSP MET -0.17 -4.47 -0.5 -HSP PHE 0.79 -3.82 -0.6 -HSP TYR 0.34 -3.78 -1.1 -HSP TRP -0.05 -4.5 -1.7 -HSP SER -0.38 -3.12 -0.5 -HSP THR -0.05 -2.73 -0.5 -HSP ASP 0.0 -2.93 -0.4 -HSP ASN -0.52 -3.05 -1.2 -HSP GLU -0.1 -3.15 -0.4 -HSP GLN -0.31 -4.2 -1.2 -HSP LYS -0.01 -2.14 0.0 -HSP ARG 0.35 -3.24 -0.4 -HSP HSP 0.38 -3.08 -0.5 -HSP PRO 0.33 -1.8 0.0 -PRO HSP 0.33 -1.8 0.0 From d5c64c2f0a5256bf986aca223464aad1954793d6 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 4 Sep 2024 22:42:53 +0200 Subject: [PATCH 05/28] new function - checkNonstandardResidues() --- prody/proteins/interactions.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 18f076b53..79d909bf9 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -46,7 +46,7 @@ 'calcHydrogenBondsTrajectory', 'calcHydrophobicOverlapingAreas', 'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory', 'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues', - 'showSminaTermValues', 'showPairEnergy'] + 'showSminaTermValues', 'showPairEnergy', 'checkNonstandardResidues'] def cleanNumbers(listContacts): @@ -234,6 +234,25 @@ def get_energy(pair, source): return data_results +def checkNonstandardResidues(atoms): + """Check whether the aromic structure contain non-standard residues and inform to replace the name + to the standard one to be that non-standard residues are treated in a correct way while computing + interactions.""" + + amino_acids = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN", "GLY", "HIS", "ILE", + "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL"] + aa_list = atoms.select('name CA').getResnames() + nonstandard = [] + + for i in aa_list: + if i not in amino_acids: + nonstandard.append(i) + + LOGGER.info('There are several non-standard residues in the structure.') + LOGGER.info('Replace the non-standard name in the PDB file with the equivalent name from the standard one if you want to include them in the interactions.') + LOGGER.info("Residues: {0}.".format(' '.join(nonstandard))) + + def showPairEnergy(data, **kwargs): """Return energies when a list of interactions is given. Energies will be added to each pair of residues at the last position in the list. Energy is based on the residue types and not on the distances. From 0263b04b947de0a939c4e1e31644be7828e7628f Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 4 Sep 2024 22:47:11 +0200 Subject: [PATCH 06/28] check for checkNonstandardResidues() --- prody/proteins/interactions.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 79d909bf9..d5d4423f5 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -235,12 +235,27 @@ def get_energy(pair, source): def checkNonstandardResidues(atoms): - """Check whether the aromic structure contain non-standard residues and inform to replace the name + """Check whether the atomic structure contain non-standard residues and inform to replace the name to the standard one to be that non-standard residues are treated in a correct way while computing - interactions.""" + interactions. + + :arg atoms: an Atomic object from which residues are selected + :type atoms: :class:`.Atomic` + """ + + try: + coords = (atoms._getCoords() if hasattr(atoms, '_getCoords') else + atoms.getCoords()) + except AttributeError: + try: + checkCoords(coords) + except TypeError: + raise TypeError('coords must be an object ' + 'with `getCoords` method') amino_acids = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN", "GLY", "HIS", "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL"] + aa_list = atoms.select('name CA').getResnames() nonstandard = [] From e22ff3de60fe207383bad0d4e58afc6d09e46924 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Thu, 5 Sep 2024 08:55:09 +0200 Subject: [PATCH 07/28] More info in docs about energies --- prody/proteins/interactions.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index d5d4423f5..46c57355a 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -271,8 +271,8 @@ def checkNonstandardResidues(atoms): def showPairEnergy(data, **kwargs): """Return energies when a list of interactions is given. Energies will be added to each pair of residues at the last position in the list. Energy is based on the residue types and not on the distances. - The unit of energy is kcal/mol. The energies defined as 'IB_nosolv', 'IB_solv' are taken from [OK98]_ and - 'CS' from InSty paper (under preparation). + The unit of energy is kcal/mol. The energies defined as 'IB_nosolv' (non-solvent-mediated), 'IB_solv' (solvent-mediated) + are taken from [OK98]_ and 'CS' from InSty paper (under preparation). :arg data: list with interactions from calcHydrogenBonds() or other types :type data: list @@ -2040,8 +2040,12 @@ def showInteractionsGraph(statistics, **kwargs): def calcStatisticsInteractions(data, **kwargs): """Return the statistics of interactions from PDB Ensemble or trajectory including: (1) the weight for each residue pair: corresponds to the number of counts divided by the - number of frames (values >1 are obtained when residue pair creates multiple contacts); - (2) average distance of interactions for each pair [in Ang] and (3) standard deviation [Ang.]. + number of frames (values >1 are obtained when the residue pair creates multiple contacts); + (2) average distance of interactions for each pair [in Ang], (3) standard deviation [Ang.], + (4) Energy [in kcal/mol] that is not distance dependent. Energy by default is solvent-mediated + from [OK98]_ ('IB_solv'). To use non-solvent-mediated entries ('IB_nosolv') from [OK98]_ or + solvent-mediated values obtained for InSty paper ('CS', under preparation) change + `energy_list_type` parameter. :arg data: list with interactions from calcHydrogenBondsTrajectory() or other types :type data: list From 75fbd58624365f7ea970df20b2e0a751d089dbd2 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Thu, 5 Sep 2024 09:05:02 +0200 Subject: [PATCH 08/28] more info in docs InSty --- prody/proteins/interactions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 46c57355a..6a1cdb94d 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -273,6 +273,8 @@ def showPairEnergy(data, **kwargs): at the last position in the list. Energy is based on the residue types and not on the distances. The unit of energy is kcal/mol. The energies defined as 'IB_nosolv' (non-solvent-mediated), 'IB_solv' (solvent-mediated) are taken from [OK98]_ and 'CS' from InSty paper (under preparation). + Protonation of resiudues is not distinguished. The protonation of residues is not distinguished. + Known residues such as HSD, HSE, HIE, and HID (used in MD simulations) are treated as HIS. :arg data: list with interactions from calcHydrogenBonds() or other types :type data: list From 1e15ef97572b875b628e83b0703f83de77acd0f3 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Thu, 5 Sep 2024 09:39:09 +0200 Subject: [PATCH 09/28] improvement in the non-standard residues in get_energy --- prody/proteins/interactions.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 6a1cdb94d..02b662094 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -162,9 +162,9 @@ def get_energy(pair, source): aa_correction = { # Histidine (His) - 'HSD': 'HIS', # Protonated at ND1 (HID in AMBER) - 'HSE': 'HIS', # Protonated at NE2 (HIE in AMBER) - 'HSP': 'HIS', # Doubly protonated (HIP in AMBER) + 'HSD': 'HIS', # NAMD, protonated at ND1 (HID in AMBER) + 'HSE': 'HIS', # NAMD, protonated at NE2 (HIE in AMBER) + 'HSP': 'HIS', # NAMD, doubly protonated (HIP in AMBER) 'HID': 'HIS', # AMBER name, protonated at ND1 'HIE': 'HIS', # AMBER name, protonated at NE2 'HIP': 'HIS', # AMBER name, doubly protonated @@ -174,22 +174,24 @@ def get_energy(pair, source): # Cysteine (Cys) 'CYX': 'CYS', # Cystine (disulfide bridge) - 'CYM': 'CYS', # Deprotonated cysteine, anion (GROMACS) + 'CYM': 'CYS', # Deprotonated cysteine, anion # Aspartic acid (Asp) - 'ASH': 'ASP', # Deprotonated Asp (GROMACS: ASH, AMBER: AS4) + 'ASH': 'ASP', # Protonated Asp + 'ASPP': 'ASP', # Glutamic acid (Glu) - 'GLH': 'GLU', # Deprotonated Glu (GROMACS: GLH, AMBER: GL4) + 'GLH': 'GLU', # Protonated Glu + 'GLUP': 'GLU', # Protonated Glu # Lysine (Lys) - 'LYN': 'LYS', # Deprotonated lysine (GROMACS: LYN, AMBER: LYP) + 'LYN': 'LYS', # Deprotonated lysine (nautral) # Arginine (Arg) 'ARN': 'ARG', # Deprotonated arginine (rare, GROMACS) # Tyrosine (Tyr) - 'TYM': 'TYR', # Deprotonated tyrosine (GROMACS: TYM) + 'TYM': 'TYR', # Deprotonated tyrosine (GROMACS) # Serine (Ser) 'SEP': 'SER', # Phosphorylated serine (GROMACS/AMBER) From 9f2484b218ea8491e7975997ffcb97b0d391d892 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Sat, 7 Sep 2024 09:21:21 +0200 Subject: [PATCH 10/28] new function saveInteractionsAsDummyAtoms() is added --- prody/proteins/interactions.py | 77 +++++++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 02b662094..5fee3b88b 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -46,7 +46,8 @@ 'calcHydrogenBondsTrajectory', 'calcHydrophobicOverlapingAreas', 'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory', 'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues', - 'showSminaTermValues', 'showPairEnergy', 'checkNonstandardResidues'] + 'showSminaTermValues', 'showPairEnergy', 'checkNonstandardResidues', + 'saveInteractionsAsDummyAtoms'] def cleanNumbers(listContacts): @@ -2198,6 +2199,80 @@ def calcDistribution(interactions, residue1, residue2=None, **kwargs): LOGGER.info(i) +def saveInteractionsAsDummyAtoms(atoms, interactions, filename, *kwargs): + '''Creates a PDB file which will contain protein structure and dummy atoms that will be placed between pairs + of interacting residues. + + :arg atoms: an Atomic object from which residues are selected + :type atoms: :class:`.Atomic` + + :arg interactions: list of interactions + :type interactions: list + + :arg filename: name of the PDB file which will contain dummy atoms and protein structure + :type filename: str + + :arg RESNAME_dummy: resname of the dummy atom, use 3-letter name + be default is 'DUM' + :type RESNAME_dummy: str ''' + + + try: + coords = (atoms._getCoords() if hasattr(atoms, '_getCoords') else + atoms.getCoords()) + except AttributeError: + try: + checkCoords(coords) + except TypeError: + raise TypeError('coords must be an object ' + 'with `getCoords` method') + + RESNAME_dummy = kwargs.pop('RESNAME_dummy', 'DUM') + + def putDUMatom(coord1, coord2): + midpoint = [ + (coord1[0] + coord2[0]) / 2, + (coord1[1] + coord2[1]) / 2, + (coord1[2] + coord2[2]) / 2 + ] + return midpoint + + all_DUMs = [] + atoms_ = atoms.copy() + + for i in interactions: + if len(i[1].split('_')) <= 3: + res1_name = 'chain '+i[2]+' resname '+i[0][:3]+' and resid '+i[0][3:]+' and index '+' '.join(i[1].split('_')[1:]) + res1_coords = calcCenter(atoms.select(res1_name)) + + if len(i[1].split('_')) > 3: + res1_name = 'chain '+i[2]+' resname '+i[0][:3]+' and resid '+i[0][3:]+' and index '+' '.join(i[1].split('_')) + res1_coords = calcCenter(atoms.select(res1_name)) + + if len(i[4].split('_')) <= 3: + res2_name = 'chain '+i[5]+' resname '+i[3][:3]+' and resid '+i[3][3:]+' and index '+' '.join(i[4].split('_')[1:]) + res2_coords = calcCenter(atoms.select(res2_name)) + + if len(i[4].split('_')) > 3: + res2_name = 'chain '+i[5]+' resname '+i[3][:3]+' and resid '+i[3][3:]+' and index '+' '.join(i[4].split('_')) + res2_coords = calcCenter(atoms.select(res2_name)) + + all_DUMs.append(putDUMatom(res1_coords, res2_coords)) + + if all_DUMs == []: + LOGGER.info('Lack of interactions') + else: + LOGGER.info('Creating file with dummy atoms') + dummyAtoms = AtomGroup() + coords = array([all_DUMs], dtype=float) + dummyAtoms.setCoords(coords) + dummyAtoms.setNames([RESNAME_dummy]*len(dummyAtoms)) + dummyAtoms.setResnums(range(1, len(dummyAtoms)+1)) + dummyAtoms.setResnames([RESNAME_dummy]*len(dummyAtoms)) + + writePDB(filename, atoms_+dummyAtoms) + + def listLigandInteractions(PLIP_output, **kwargs): """Create a list of interactions from PLIP output created using calcLigandInteractions(). Results can be displayed in VMD. From def37bbb6892f32d0788463fb95f585d58d81f19 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Sun, 8 Sep 2024 22:00:40 +0200 Subject: [PATCH 11/28] saveInteractionsAsDummyAtoms fix with kwargs --- prody/proteins/interactions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 5fee3b88b..0de0a8aef 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -2199,7 +2199,7 @@ def calcDistribution(interactions, residue1, residue2=None, **kwargs): LOGGER.info(i) -def saveInteractionsAsDummyAtoms(atoms, interactions, filename, *kwargs): +def saveInteractionsAsDummyAtoms(atoms, interactions, filename, **kwargs): '''Creates a PDB file which will contain protein structure and dummy atoms that will be placed between pairs of interacting residues. @@ -2226,9 +2226,9 @@ def saveInteractionsAsDummyAtoms(atoms, interactions, filename, *kwargs): except TypeError: raise TypeError('coords must be an object ' 'with `getCoords` method') - + RESNAME_dummy = kwargs.pop('RESNAME_dummy', 'DUM') - + def putDUMatom(coord1, coord2): midpoint = [ (coord1[0] + coord2[0]) / 2, From 6277f1dad3814b30e526758c00c212a2f11951ea Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Mon, 9 Sep 2024 21:44:01 +0200 Subject: [PATCH 12/28] improvements of checkNonstandardResidues() --- prody/proteins/interactions.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 0de0a8aef..d7f561507 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -260,11 +260,12 @@ def checkNonstandardResidues(atoms): "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL"] aa_list = atoms.select('name CA').getResnames() + aa_list_nr = atoms.select('name CA').getResnums() nonstandard = [] - for i in aa_list: + for nr_i,i in enumerate(aa_list): if i not in amino_acids: - nonstandard.append(i) + nonstandard.append(aa_list[nr_i] + str(aa_list_nr[nr_i])) LOGGER.info('There are several non-standard residues in the structure.') LOGGER.info('Replace the non-standard name in the PDB file with the equivalent name from the standard one if you want to include them in the interactions.') From 286ceb8f208af5ba843bbe4c315cd8a4fbad0e77 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 10 Sep 2024 11:23:04 +0200 Subject: [PATCH 13/28] getInteractions - replace option for trajectory --- prody/proteins/interactions.py | 46 +++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index d7f561507..6b5bf777d 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -28,7 +28,7 @@ from prody.proteins import writePDB, parsePDB from collections import Counter -from prody.trajectory import TrajBase, Trajectory +from prody.trajectory import TrajBase, Trajectory, Frame from prody.ensemble import Ensemble import multiprocessing @@ -3025,14 +3025,26 @@ def getInteractions(self, **kwargs): :arg selection2: selection string :type selection2: str + :arg replace: Used with selection criteria to set the new one + If set to True the selection will be replaced by the new one + :type replace: True or False + by default is False + Selection: If we want to select interactions for the particular residue or group of residues: selection='chain A and resid 1 to 50' If we want to study chain-chain interactions: selection='chain A', selection2='chain B' """ - + + replace = kwargs.pop('replace', False) + if len(kwargs) != 0: results = [filterInteractions(j, self._atoms, **kwargs) for j in self._interactions] + + if replace == True: + self._interactions = results + LOGGER.info('New interactions are set') + else: results = self._interactions @@ -3917,19 +3929,47 @@ def getInteractions(self, **kwargs): :arg selection2: selection string :type selection2: str + + :arg replace: Used with selection criteria to set the new one + If set to True the selection will be replaced by the new one + :type replace: True or False + by default is False Selection: If we want to select interactions for the particular residue or group of residues: selection='chain A and resid 1 to 50' If we want to study chain-chain interactions: selection='chain A', selection2='chain B' """ - + + replace = kwargs.pop('replace', False) + if len(kwargs) != 0: sele_inter = [] for i in self._interactions_traj: for nr_j,j in enumerate(i): sele_inter.append(filterInteractions(i[nr_j], self._atoms, **kwargs)) + + if replace == True: + try: + trajectory = self._traj + numFrames = trajectory._n_csets + except: + # If we analyze previously saved PKL file we doesn't have dcd information + # We have seven type of interactions. It will give number of frames. + numFrames = int(len(sele_inter)/7) + + self._interactions_traj = sele_inter + self._hbs_traj = sele_inter[0:numFrames] + self._sbs_traj = sele_inter[numFrames:2*numFrames] + self._rib_traj = sele_inter[2*numFrames:3*numFrames] + self._piStack_traj = sele_inter[3*numFrames:4*numFrames] + self._piCat_traj = sele_inter[4*numFrames:5*numFrames] + self._hps_traj = sele_inter[5*numFrames:6*numFrames] + self._dibs_traj = sele_inter[6*numFrames:7*numFrames] + LOGGER.info('New interactions are set') + results = sele_inter + else: results = self._interactions_traj From 3a06083e3de079c9766d18a2091cf6b81be1475e Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 10 Sep 2024 11:35:27 +0200 Subject: [PATCH 14/28] getInteractions - replace for a single PDB interactions --- prody/proteins/interactions.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 6b5bf777d..1e2d2fe46 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -3042,8 +3042,15 @@ def getInteractions(self, **kwargs): results = [filterInteractions(j, self._atoms, **kwargs) for j in self._interactions] if replace == True: - self._interactions = results LOGGER.info('New interactions are set') + self._interactions = results + self._hbs = results[0] + self._sbs = results[1] + self._rib = results[2] + self._piStack = results[3] + self._piCat = results[4] + self._hps = results[5] + self._dibs = results[6] else: results = self._interactions From d86f7b8af9a7dbe18e6f1847fdfb0b6ad2d8b0f6 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 10 Sep 2024 14:24:54 +0200 Subject: [PATCH 15/28] getTimeInteractions() improvement to include selection replacement --- prody/proteins/interactions.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 1e2d2fe46..bca2349a9 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -3974,7 +3974,20 @@ def getInteractions(self, **kwargs): self._hps_traj = sele_inter[5*numFrames:6*numFrames] self._dibs_traj = sele_inter[6*numFrames:7*numFrames] LOGGER.info('New interactions are set') - + + self._interactions_nb_traj = None + self._interactions_matrix_traj = None + + new_interactions_nb_traj = [] + new_interactions_nb_traj.append([ len(i) for i in self._hbs_traj ]) + new_interactions_nb_traj.append([ len(i) for i in self._sbs_traj ]) + new_interactions_nb_traj.append([ len(i) for i in self._rib_traj ]) + new_interactions_nb_traj.append([ len(i) for i in self._piStack_traj ]) + new_interactions_nb_traj.append([ len(i) for i in self._piCat_traj ]) + new_interactions_nb_traj.append([ len(i) for i in self._hps_traj ]) + new_interactions_nb_traj.append([ len(i) for i in self._dibs_traj ]) + self._interactions_nb_traj = new_interactions_nb_traj + results = sele_inter else: From 4b5d687c358182e9ed3b974f9a61798f2c0dfd47 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 10 Sep 2024 22:48:39 +0200 Subject: [PATCH 16/28] showFrequentInteractors() [InSty] improvement - return dict with residues --- prody/proteins/interactions.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index bca2349a9..c0a07bfc2 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -3583,18 +3583,22 @@ def showFrequentInteractors(self, cutoff=5, **kwargs): y.append(all_y[nr_ii]) if SETTINGS['auto_show']: - matplotlib.rcParams['font.size'] = '20' - fig = plt.figure(num=None, figsize=(12,6), facecolor='w') + matplotlib.rcParams['font.size'] = '12' + fig = plt.figure(num=None, figsize=(16,5), facecolor='w') y_pos = np.arange(len(y)) show = plt.bar(y_pos, x, align='center', alpha=0.5, color='blue') - plt.xticks(y_pos, y, rotation=45, fontsize=20) - plt.ylabel('Number of interactions') + plt.xticks(y_pos, y, rotation=45, fontsize=16) + plt.ylabel('Number of interactions', fontsize=16) plt.tight_layout() if SETTINGS['auto_show']: showFigure() - return show + + dict_counts = dict(zip(y, x)) + dict_counts_sorted = dict(sorted(dict_counts.items(), key=lambda item: item[1], reverse=True)) + + return dict_counts_sorted def showCumulativeInteractionTypes(self, **kwargs): From d5de489dd4e9ace1e0713f04bd63ed3e9ab81178 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 11 Sep 2024 21:39:16 +0200 Subject: [PATCH 17/28] addMissingAtoms() [fixer.py] - keep_ids added --- prody/proteins/fixer.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/prody/proteins/fixer.py b/prody/proteins/fixer.py index ad30aa731..d78882d7a 100644 --- a/prody/proteins/fixer.py +++ b/prody/proteins/fixer.py @@ -28,7 +28,7 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): or PDBFixer with OpenMM. There are also options whether to *model_residues* (default False), *remove_heterogens* - (default False), *keep_waters* (default True), *overwrite* (default False). + (default False), *keep_waters* (default True), *overwrite* (default False), *keep_ids* (default True). :arg infile: PDB file name :type infile: str @@ -44,6 +44,14 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): :arg pH: pH value applyed only for PDBfixer. :type pH: int, float + :arg model_residues: add all missing atoms from residues, applyed only for PDBfixer. + default is False + :type model_residues: bool + + :arg keep_ids: keep the oryginal residue number, applyed only for PDBfixer. + default is True + :type keep_ids: bool + Instalation of Openbabel: conda install -c conda-forge openbabel @@ -58,6 +66,7 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): remove_heterogens = kwargs.get("remove_heterogens", False) keep_water = kwargs.get("keep_water", True) overwrite = kwargs.get("overwrite", False) + keep_ids = kwargs.get("keep_ids", True) import os @@ -70,6 +79,9 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): if not isinstance(keep_water, bool): raise TypeError('keep_water should be True or False') + if not isinstance(keep_Ids, bool): + raise TypeError('keep_Ids should be True or False') + if not isinstance(overwrite, bool): raise TypeError('overwrite should be True or False') @@ -136,7 +148,7 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(pH) - PDBFile.writeFile(fixer.topology, fixer.positions, open(outfile, 'w')) + PDBFile.writeFile(fixer.topology, fixer.positions, open(outfile, 'w'), keepIds=keep_ids) LOGGER.info("Hydrogens were added to the structure. New structure is saved as {0}.".format(outfile)) except ImportError: @@ -165,6 +177,14 @@ def fixStructuresMissingAtoms(infiles, method='openbabel', pH=7.0, outfiles=None 'pdbfixer': PDBFixer and OpenMM default is 'openbabel' :type method: str + + :arg model_residues: add all missing atoms from residues, applyed only for PDBfixer. + default is False + :type model_residues: bool + + :arg keep_ids: keep the oryginal residue number, applyed only for PDBfixer. + default is True + :type keep_ids: bool :arg pH: pH value applyed only for PDBfixer. :type pH: int, float From 6c10c4647ae06eee4c5c73b59afed47c7db0e941 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 11 Sep 2024 21:51:49 +0200 Subject: [PATCH 18/28] typo in addMissingAtoms --- prody/proteins/fixer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prody/proteins/fixer.py b/prody/proteins/fixer.py index d78882d7a..1aa97acb4 100644 --- a/prody/proteins/fixer.py +++ b/prody/proteins/fixer.py @@ -79,8 +79,8 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): if not isinstance(keep_water, bool): raise TypeError('keep_water should be True or False') - if not isinstance(keep_Ids, bool): - raise TypeError('keep_Ids should be True or False') + if not isinstance(keep_ids, bool): + raise TypeError('keep_ids should be True or False') if not isinstance(overwrite, bool): raise TypeError('overwrite should be True or False') From cf86d944bca13dadccfcbbb6d232dade2e654d67 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 11 Sep 2024 22:36:20 +0200 Subject: [PATCH 19/28] typo found by James --- prody/proteins/interactions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index c0a07bfc2..e996427da 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -3965,7 +3965,7 @@ def getInteractions(self, **kwargs): trajectory = self._traj numFrames = trajectory._n_csets except: - # If we analyze previously saved PKL file we doesn't have dcd information + # If we analyze previously saved PKL file it doesn't have dcd information # We have seven type of interactions. It will give number of frames. numFrames = int(len(sele_inter)/7) From 11738ad78d64e84ba1e7b9b2bb14bda6a59e66ba Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 11 Sep 2024 22:46:55 +0200 Subject: [PATCH 20/28] InSty - typos and TypeError fixed [James comments] --- prody/proteins/interactions.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index e996427da..cbe700d87 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -231,8 +231,8 @@ def get_energy(pair, source): try: data_results = data[np.where(np.array(aa_pairs)==lookup)[0]][0][2:][np.where(np.array(sources)==source)][0] - except ImportError: - raise ImportError('Please replace non-standard names of residues with standard names.') + except TypeError: + raise TypeError('Please replace non-standard names of residues with standard names.') return data_results @@ -277,7 +277,7 @@ def showPairEnergy(data, **kwargs): at the last position in the list. Energy is based on the residue types and not on the distances. The unit of energy is kcal/mol. The energies defined as 'IB_nosolv' (non-solvent-mediated), 'IB_solv' (solvent-mediated) are taken from [OK98]_ and 'CS' from InSty paper (under preparation). - Protonation of resiudues is not distinguished. The protonation of residues is not distinguished. + Protonation of residues is not distinguished. The protonation of residues is not distinguished. Known residues such as HSD, HSE, HIE, and HID (used in MD simulations) are treated as HIS. :arg data: list with interactions from calcHydrogenBonds() or other types @@ -2047,7 +2047,8 @@ def calcStatisticsInteractions(data, **kwargs): """Return the statistics of interactions from PDB Ensemble or trajectory including: (1) the weight for each residue pair: corresponds to the number of counts divided by the number of frames (values >1 are obtained when the residue pair creates multiple contacts); - (2) average distance of interactions for each pair [in Ang], (3) standard deviation [Ang.], + (2) average distance of interactions for each pair [in Ang], + (3) standard deviation [Ang.], (4) Energy [in kcal/mol] that is not distance dependent. Energy by default is solvent-mediated from [OK98]_ ('IB_solv'). To use non-solvent-mediated entries ('IB_nosolv') from [OK98]_ or solvent-mediated values obtained for InSty paper ('CS', under preparation) change From 95a9a0d4fa5c6ac4ea912e62b5a022b69c2dfcde Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Thu, 12 Sep 2024 08:43:10 +0200 Subject: [PATCH 21/28] InSty [docs and checks improvements] --- prody/proteins/interactions.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index cbe700d87..5c7cd53a4 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -2053,6 +2053,8 @@ def calcStatisticsInteractions(data, **kwargs): from [OK98]_ ('IB_solv'). To use non-solvent-mediated entries ('IB_nosolv') from [OK98]_ or solvent-mediated values obtained for InSty paper ('CS', under preparation) change `energy_list_type` parameter. + If energy information is not available, please check whether the pair of residues is listed in + the "tabulated_energies.txt" file, which is localized in the ProDy directory. :arg data: list with interactions from calcHydrogenBondsTrajectory() or other types :type data: list @@ -3445,17 +3447,20 @@ def saveInteractionsPDB(self, **kwargs): :arg energy: sum of the energy between residues default is False - :type energy: True, False + :type energy: bool """ + energy = kwargs.pop('energy', False) + if not hasattr(self, '_interactions_matrix') or self._interactions_matrix is None: raise ValueError('Please calculate interactions matrix first.') + + if not isinstance(energy, bool): + raise TypeError('energy should be True or False') import numpy as np from collections import Counter - energy = kwargs.pop('energy', False) - atoms = self._atoms interaction_matrix = self._interactions_matrix interaction_matrix_en = self._interactions_matrix_en @@ -3629,7 +3634,7 @@ def showCumulativeInteractionTypes(self, **kwargs): :arg energy: sum of the energy between residues default is False - :type energy: True, False + :type energy: bool """ import numpy as np @@ -3644,13 +3649,15 @@ def showCumulativeInteractionTypes(self, **kwargs): atoms = self._atoms energy = kwargs.pop('energy', False) - + + if not isinstance(energy, bool): + raise TypeError('energy should be True or False') + ResNumb = atoms.select('protein and name CA').getResnums() ResName = atoms.select('protein and name CA').getResnames() ResChid = atoms.select('protein and name CA').getChids() ResList = [ i[0]+str(i[1])+i[2] for i in list(zip([ aa_dic[i] for i in ResName ], ResNumb, ResChid)) ] - if energy == True: matrix_en = self._interactions_matrix_en matrix_en_sum = np.sum(matrix_en, axis=0) @@ -3668,7 +3675,6 @@ def showCumulativeInteractionTypes(self, **kwargs): plt.show() return matrix_en_sum - else: replace_matrix = kwargs.get('replace_matrix', False) From 860c7b3d2acef2e3d1310a918629f8b19db6dc96 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 12 Sep 2024 14:49:01 +0200 Subject: [PATCH 22/28] fix typos --- prody/proteins/fixer.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/prody/proteins/fixer.py b/prody/proteins/fixer.py index 1aa97acb4..ccd49aef3 100644 --- a/prody/proteins/fixer.py +++ b/prody/proteins/fixer.py @@ -41,14 +41,14 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): default is 'openbabel' :type method: str - :arg pH: pH value applyed only for PDBfixer. + :arg pH: pH value applied only for PDBfixer. :type pH: int, float - :arg model_residues: add all missing atoms from residues, applyed only for PDBfixer. + :arg model_residues: add all missing atoms from residues, applied only for PDBfixer. default is False :type model_residues: bool - :arg keep_ids: keep the oryginal residue number, applyed only for PDBfixer. + :arg keep_ids: keep the original residue number, applied only for PDBfixer. default is True :type keep_ids: bool @@ -178,15 +178,15 @@ def fixStructuresMissingAtoms(infiles, method='openbabel', pH=7.0, outfiles=None default is 'openbabel' :type method: str - :arg model_residues: add all missing atoms from residues, applyed only for PDBfixer. + :arg model_residues: add all missing atoms from residues, applied only for PDBfixer. default is False :type model_residues: bool - :arg keep_ids: keep the oryginal residue number, applyed only for PDBfixer. + :arg keep_ids: keep the original residue number, applied only for PDBfixer. default is True :type keep_ids: bool - :arg pH: pH value applyed only for PDBfixer. + :arg pH: pH value applied only for PDBfixer. :type pH: int, float Instalation of Openbabel: From 0ffa953f8ffbe28c6bd32f05b15714644163c112 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 12 Sep 2024 15:07:28 +0200 Subject: [PATCH 23/28] fix replace type --- prody/proteins/interactions.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 5c7cd53a4..88e6403f8 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -3029,10 +3029,10 @@ def getInteractions(self, **kwargs): :type selection2: str :arg replace: Used with selection criteria to set the new one - If set to True the selection will be replaced by the new one - :type replace: True or False - by default is False - + If set to **True** the selection will be replaced by the new one. + Default is **False** + :type replace: bool + Selection: If we want to select interactions for the particular residue or group of residues: selection='chain A and resid 1 to 50' @@ -3949,10 +3949,10 @@ def getInteractions(self, **kwargs): :type selection2: str :arg replace: Used with selection criteria to set the new one - If set to True the selection will be replaced by the new one - :type replace: True or False - by default is False - + If set to **True** the selection will be replaced by the new one. + Default is **False** + :type replace: bool + Selection: If we want to select interactions for the particular residue or group of residues: selection='chain A and resid 1 to 50' From e1f4c6ff52832f19e6ec0493b70d6377a1b0017a Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 12 Sep 2024 15:17:10 +0200 Subject: [PATCH 24/28] fix pkg_resources for PY3K --- prody/proteins/interactions.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 88e6403f8..3b6a75386 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -210,16 +210,13 @@ def get_energy(pair, source): pair = [aa_correction.get(aa, aa) for aa in pair] - try: - # Python 3 - with pkg_resources.path('prody.proteins', 'tabulated_energies.txt') as file_path: - data = np.loadtxt(file_path, dtype=str) - except: - # Python 2.7 - import pkg_resources + if PY3K: + file_path = pkg_resources.path('prody.proteins', 'tabulated_energies.txt') + else: file_path = pkg_resources.resource_filename('prody.proteins', 'tabulated_energies.txt') - with open(file_path) as f: - data = np.loadtxt(f, dtype=str) + + with open(file_path) as f: + data = np.loadtxt(f, dtype=str) sources = ["IB_nosolv", "IB_solv", "CS"] aa_pairs = [] From 370e5612b9f1f338173ab6fdbe345caefd0fe4dd Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 12 Sep 2024 15:25:29 +0200 Subject: [PATCH 25/28] fix checkNonstandardResidues --- prody/proteins/interactions.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 3b6a75386..b27a00fc4 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -235,8 +235,8 @@ def get_energy(pair, source): def checkNonstandardResidues(atoms): - """Check whether the atomic structure contain non-standard residues and inform to replace the name - to the standard one to be that non-standard residues are treated in a correct way while computing + """Check whether the atomic structure contains non-standard residues and inform to replace the name + to the standard one so that non-standard residues are treated in a correct way while computing interactions. :arg atoms: an Atomic object from which residues are selected @@ -264,9 +264,13 @@ def checkNonstandardResidues(atoms): if i not in amino_acids: nonstandard.append(aa_list[nr_i] + str(aa_list_nr[nr_i])) - LOGGER.info('There are several non-standard residues in the structure.') - LOGGER.info('Replace the non-standard name in the PDB file with the equivalent name from the standard one if you want to include them in the interactions.') - LOGGER.info("Residues: {0}.".format(' '.join(nonstandard))) + if len(nonstandard) > 0: + LOGGER.info('There are several non-standard residues in the structure.') + LOGGER.info('Replace the non-standard name in the PDB file with the equivalent name from the standard one if you want to include them in the interactions.') + LOGGER.info("Residues: {0}.".format(' '.join(nonstandard))) + return True + + return False def showPairEnergy(data, **kwargs): From e0f4d102e7eab703fe2d9ca956b9aafc96a40f04 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 12 Sep 2024 15:26:32 +0200 Subject: [PATCH 26/28] rename putDUMatom --- prody/proteins/interactions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index b27a00fc4..35a9cfc4a 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -2234,7 +2234,7 @@ def saveInteractionsAsDummyAtoms(atoms, interactions, filename, **kwargs): RESNAME_dummy = kwargs.pop('RESNAME_dummy', 'DUM') - def putDUMatom(coord1, coord2): + def calcDUMposition(coord1, coord2): midpoint = [ (coord1[0] + coord2[0]) / 2, (coord1[1] + coord2[1]) / 2, @@ -2262,7 +2262,7 @@ def putDUMatom(coord1, coord2): res2_name = 'chain '+i[5]+' resname '+i[3][:3]+' and resid '+i[3][3:]+' and index '+' '.join(i[4].split('_')) res2_coords = calcCenter(atoms.select(res2_name)) - all_DUMs.append(putDUMatom(res1_coords, res2_coords)) + all_DUMs.append(calcDUMposition(res1_coords, res2_coords)) if all_DUMs == []: LOGGER.info('Lack of interactions') From 670db545a7948b9a0797c00648ab33f4ad7e1531 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 12 Sep 2024 15:28:00 +0200 Subject: [PATCH 27/28] add and for readability --- prody/proteins/interactions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 35a9cfc4a..a35cf87ae 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -2247,19 +2247,19 @@ def calcDUMposition(coord1, coord2): for i in interactions: if len(i[1].split('_')) <= 3: - res1_name = 'chain '+i[2]+' resname '+i[0][:3]+' and resid '+i[0][3:]+' and index '+' '.join(i[1].split('_')[1:]) + res1_name = 'chain '+i[2]+' and resname '+i[0][:3]+' and resid '+i[0][3:]+' and index '+' '.join(i[1].split('_')[1:]) res1_coords = calcCenter(atoms.select(res1_name)) if len(i[1].split('_')) > 3: - res1_name = 'chain '+i[2]+' resname '+i[0][:3]+' and resid '+i[0][3:]+' and index '+' '.join(i[1].split('_')) + res1_name = 'chain '+i[2]+' and resname '+i[0][:3]+' and resid '+i[0][3:]+' and index '+' '.join(i[1].split('_')) res1_coords = calcCenter(atoms.select(res1_name)) if len(i[4].split('_')) <= 3: - res2_name = 'chain '+i[5]+' resname '+i[3][:3]+' and resid '+i[3][3:]+' and index '+' '.join(i[4].split('_')[1:]) + res2_name = 'chain '+i[5]+' and resname '+i[3][:3]+' and resid '+i[3][3:]+' and index '+' '.join(i[4].split('_')[1:]) res2_coords = calcCenter(atoms.select(res2_name)) if len(i[4].split('_')) > 3: - res2_name = 'chain '+i[5]+' resname '+i[3][:3]+' and resid '+i[3][3:]+' and index '+' '.join(i[4].split('_')) + res2_name = 'chain '+i[5]+' and resname '+i[3][:3]+' and resid '+i[3][3:]+' and index '+' '.join(i[4].split('_')) res2_coords = calcCenter(atoms.select(res2_name)) all_DUMs.append(calcDUMposition(res1_coords, res2_coords)) From 2db6ea99ffa9a05f92bece582b4417905ca8aa81 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 12 Sep 2024 16:06:51 +0200 Subject: [PATCH 28/28] another typo fix --- prody/proteins/interactions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index a35cf87ae..7c49cc4c0 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -186,7 +186,7 @@ def get_energy(pair, source): 'GLUP': 'GLU', # Protonated Glu # Lysine (Lys) - 'LYN': 'LYS', # Deprotonated lysine (nautral) + 'LYN': 'LYS', # Deprotonated lysine (neutral) # Arginine (Arg) 'ARN': 'ARG', # Deprotonated arginine (rare, GROMACS)