diff --git a/prody/proteins/fixer.py b/prody/proteins/fixer.py index ad30aa731..ccd49aef3 100644 --- a/prody/proteins/fixer.py +++ b/prody/proteins/fixer.py @@ -28,7 +28,7 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): or PDBFixer with OpenMM. There are also options whether to *model_residues* (default False), *remove_heterogens* - (default False), *keep_waters* (default True), *overwrite* (default False). + (default False), *keep_waters* (default True), *overwrite* (default False), *keep_ids* (default True). :arg infile: PDB file name :type infile: str @@ -41,9 +41,17 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): default is 'openbabel' :type method: str - :arg pH: pH value applyed only for PDBfixer. + :arg pH: pH value applied only for PDBfixer. :type pH: int, float + :arg model_residues: add all missing atoms from residues, applied only for PDBfixer. + default is False + :type model_residues: bool + + :arg keep_ids: keep the original residue number, applied only for PDBfixer. + default is True + :type keep_ids: bool + Instalation of Openbabel: conda install -c conda-forge openbabel @@ -58,6 +66,7 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): remove_heterogens = kwargs.get("remove_heterogens", False) keep_water = kwargs.get("keep_water", True) overwrite = kwargs.get("overwrite", False) + keep_ids = kwargs.get("keep_ids", True) import os @@ -70,6 +79,9 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): if not isinstance(keep_water, bool): raise TypeError('keep_water should be True or False') + if not isinstance(keep_ids, bool): + raise TypeError('keep_ids should be True or False') + if not isinstance(overwrite, bool): raise TypeError('overwrite should be True or False') @@ -136,7 +148,7 @@ def addMissingAtoms(infile, method='openbabel', pH=7.0, outfile=None, **kwargs): fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(pH) - PDBFile.writeFile(fixer.topology, fixer.positions, open(outfile, 'w')) + PDBFile.writeFile(fixer.topology, fixer.positions, open(outfile, 'w'), keepIds=keep_ids) LOGGER.info("Hydrogens were added to the structure. New structure is saved as {0}.".format(outfile)) except ImportError: @@ -165,8 +177,16 @@ def fixStructuresMissingAtoms(infiles, method='openbabel', pH=7.0, outfiles=None 'pdbfixer': PDBFixer and OpenMM default is 'openbabel' :type method: str + + :arg model_residues: add all missing atoms from residues, applied only for PDBfixer. + default is False + :type model_residues: bool + + :arg keep_ids: keep the original residue number, applied only for PDBfixer. + default is True + :type keep_ids: bool - :arg pH: pH value applyed only for PDBfixer. + :arg pH: pH value applied only for PDBfixer. :type pH: int, float Instalation of Openbabel: diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 82c8fd18c..7c49cc4c0 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -28,7 +28,7 @@ from prody.proteins import writePDB, parsePDB from collections import Counter -from prody.trajectory import TrajBase, Trajectory +from prody.trajectory import TrajBase, Trajectory, Frame from prody.ensemble import Ensemble import multiprocessing @@ -46,7 +46,8 @@ 'calcHydrogenBondsTrajectory', 'calcHydrophobicOverlapingAreas', 'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory', 'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues', - 'showSminaTermValues', 'showPairEnergy'] + 'showSminaTermValues', 'showPairEnergy', 'checkNonstandardResidues', + 'saveInteractionsAsDummyAtoms'] def cleanNumbers(listContacts): @@ -160,17 +161,62 @@ def get_energy(pair, source): import numpy as np import importlib.resources as pkg_resources - try: - # Python 3 - with pkg_resources.path('prody.proteins', 'tabulated_energies.txt') as file_path: - data = np.loadtxt(file_path, skiprows=1, dtype=str) - except: - # Python 2.7 - import pkg_resources + aa_correction = { + # Histidine (His) + 'HSD': 'HIS', # NAMD, protonated at ND1 (HID in AMBER) + 'HSE': 'HIS', # NAMD, protonated at NE2 (HIE in AMBER) + 'HSP': 'HIS', # NAMD, doubly protonated (HIP in AMBER) + 'HID': 'HIS', # AMBER name, protonated at ND1 + 'HIE': 'HIS', # AMBER name, protonated at NE2 + 'HIP': 'HIS', # AMBER name, doubly protonated + 'HISD': 'HIS', # GROMACS: protonated at ND1 + 'HISE': 'HIS', # GROMACS: protonated at NE2 + 'HISP': 'HIS', # GROMACS: doubly protonated + + # Cysteine (Cys) + 'CYX': 'CYS', # Cystine (disulfide bridge) + 'CYM': 'CYS', # Deprotonated cysteine, anion + + # Aspartic acid (Asp) + 'ASH': 'ASP', # Protonated Asp + 'ASPP': 'ASP', + + # Glutamic acid (Glu) + 'GLH': 'GLU', # Protonated Glu + 'GLUP': 'GLU', # Protonated Glu + + # Lysine (Lys) + 'LYN': 'LYS', # Deprotonated lysine (neutral) + + # Arginine (Arg) + 'ARN': 'ARG', # Deprotonated arginine (rare, GROMACS) + + # Tyrosine (Tyr) + 'TYM': 'TYR', # Deprotonated tyrosine (GROMACS) + + # Serine (Ser) + 'SEP': 'SER', # Phosphorylated serine (GROMACS/AMBER) + + # Threonine (Thr) + 'TPO': 'THR', # Phosphorylated threonine (GROMACS/AMBER) + + # Tyrosine (Tyr) + 'PTR': 'TYR', # Phosphorylated tyrosine (GROMACS/AMBER) + + # Non-standard names for aspartic and glutamic acids in low pH environments + 'ASH': 'ASP', # Protonated Asp + 'GLH': 'GLU', # Protonated Glu + } + + pair = [aa_correction.get(aa, aa) for aa in pair] + + if PY3K: + file_path = pkg_resources.path('prody.proteins', 'tabulated_energies.txt') + else: file_path = pkg_resources.resource_filename('prody.proteins', 'tabulated_energies.txt') - with open(file_path) as f: - data = np.loadtxt(f, skiprows=1, dtype=str) + with open(file_path) as f: + data = np.loadtxt(f, dtype=str) sources = ["IB_nosolv", "IB_solv", "CS"] aa_pairs = [] @@ -180,14 +226,60 @@ def get_energy(pair, source): lookup = pair[0]+pair[1] - return data[np.where(np.array(aa_pairs)==lookup)[0]][0][2:][np.where(np.array(sources)==source)][0] + try: + data_results = data[np.where(np.array(aa_pairs)==lookup)[0]][0][2:][np.where(np.array(sources)==source)][0] + except TypeError: + raise TypeError('Please replace non-standard names of residues with standard names.') + + return data_results + + +def checkNonstandardResidues(atoms): + """Check whether the atomic structure contains non-standard residues and inform to replace the name + to the standard one so that non-standard residues are treated in a correct way while computing + interactions. + + :arg atoms: an Atomic object from which residues are selected + :type atoms: :class:`.Atomic` + """ + + try: + coords = (atoms._getCoords() if hasattr(atoms, '_getCoords') else + atoms.getCoords()) + except AttributeError: + try: + checkCoords(coords) + except TypeError: + raise TypeError('coords must be an object ' + 'with `getCoords` method') + + amino_acids = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN", "GLY", "HIS", "ILE", + "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL"] + + aa_list = atoms.select('name CA').getResnames() + aa_list_nr = atoms.select('name CA').getResnums() + nonstandard = [] + + for nr_i,i in enumerate(aa_list): + if i not in amino_acids: + nonstandard.append(aa_list[nr_i] + str(aa_list_nr[nr_i])) + + if len(nonstandard) > 0: + LOGGER.info('There are several non-standard residues in the structure.') + LOGGER.info('Replace the non-standard name in the PDB file with the equivalent name from the standard one if you want to include them in the interactions.') + LOGGER.info("Residues: {0}.".format(' '.join(nonstandard))) + return True + + return False def showPairEnergy(data, **kwargs): """Return energies when a list of interactions is given. Energies will be added to each pair of residues at the last position in the list. Energy is based on the residue types and not on the distances. - The unit of energy is kcal/mol. The energies defined as 'IB_nosolv', 'IB_solv' are taken from XX and - 'CS' from YY. + The unit of energy is kcal/mol. The energies defined as 'IB_nosolv' (non-solvent-mediated), 'IB_solv' (solvent-mediated) + are taken from [OK98]_ and 'CS' from InSty paper (under preparation). + Protonation of residues is not distinguished. The protonation of residues is not distinguished. + Known residues such as HSD, HSE, HIE, and HID (used in MD simulations) are treated as HIS. :arg data: list with interactions from calcHydrogenBonds() or other types :type data: list @@ -195,6 +287,12 @@ def showPairEnergy(data, **kwargs): :arg energy_list_type: name of the list with energies default is 'IB_solv' :type energy_list_type: 'IB_nosolv', 'IB_solv', 'CS' + + + .. [OK98] Keskin O., Bahar I., Badretdinov A.Y., Ptitsyn O.B., Jernigan R.L., + Empirical solvet-mediated potentials hold for both intra-molecular and + inter-molecular inter-residues interactions, + *Protein Science* **1998** 7: 2578–2586. """ if not isinstance(data, list): @@ -1949,8 +2047,15 @@ def showInteractionsGraph(statistics, **kwargs): def calcStatisticsInteractions(data, **kwargs): """Return the statistics of interactions from PDB Ensemble or trajectory including: (1) the weight for each residue pair: corresponds to the number of counts divided by the - number of frames (values >1 are obtained when residue pair creates multiple contacts); - (2) average distance of interactions for each pair [in Ang] and (3) standard deviation [Ang.]. + number of frames (values >1 are obtained when the residue pair creates multiple contacts); + (2) average distance of interactions for each pair [in Ang], + (3) standard deviation [Ang.], + (4) Energy [in kcal/mol] that is not distance dependent. Energy by default is solvent-mediated + from [OK98]_ ('IB_solv'). To use non-solvent-mediated entries ('IB_nosolv') from [OK98]_ or + solvent-mediated values obtained for InSty paper ('CS', under preparation) change + `energy_list_type` parameter. + If energy information is not available, please check whether the pair of residues is listed in + the "tabulated_energies.txt" file, which is localized in the ProDy directory. :arg data: list with interactions from calcHydrogenBondsTrajectory() or other types :type data: list @@ -1988,12 +2093,21 @@ def calcStatisticsInteractions(data, **kwargs): for element in elements: if element not in stats: values = [t[1] for t in interactions_list if t[0] == element] - stats[element] = { - "stddev": np.round(np.std(values),6), - "mean": np.round(np.mean(values),6), - "weight": np.round(float(len(values))/len(data), 6), - "energy": get_energy([element.split('-')[0][:3], element.split('-')[1][:3]], energy_list_type) - } + + try: + stats[element] = { + "stddev": np.round(np.std(values),6), + "mean": np.round(np.mean(values),6), + "weight": np.round(float(len(values))/len(data), 6), + "energy": get_energy([element.split('-')[0][:3], element.split('-')[1][:3]], energy_list_type) + } + except: + LOGGER.warn('energy information is not available for ', element.split('-')[0][:3], element.split('-')[1][:3]) + stats[element] = { + "stddev": np.round(np.std(values),6), + "mean": np.round(np.mean(values),6), + "weight": np.round(float(len(values))/len(data), 6) + } statistic = [] for key, value in stats.items(): @@ -2002,8 +2116,11 @@ def calcStatisticsInteractions(data, **kwargs): LOGGER.info(" Average [Ang.]: {}".format(value['mean'])) LOGGER.info(" Standard deviation [Ang.]: {0}".format(value['stddev'])) LOGGER.info(" Weight: {0}".format(value['weight'])) - LOGGER.info(" Energy [kcal/mol]: {0}".format(value['energy'])) - statistic.append([key, value['weight'], value['mean'], value['stddev'], value['energy']]) + try: + LOGGER.info(" Energy [kcal/mol]: {0}".format(value['energy'])) + statistic.append([key, value['weight'], value['mean'], value['stddev'], value['energy']]) + except: + statistic.append([key, value['weight'], value['mean'], value['stddev']]) else: pass statistic.sort(key=lambda x: x[1], reverse=True) @@ -2087,6 +2204,80 @@ def calcDistribution(interactions, residue1, residue2=None, **kwargs): LOGGER.info(i) +def saveInteractionsAsDummyAtoms(atoms, interactions, filename, **kwargs): + '''Creates a PDB file which will contain protein structure and dummy atoms that will be placed between pairs + of interacting residues. + + :arg atoms: an Atomic object from which residues are selected + :type atoms: :class:`.Atomic` + + :arg interactions: list of interactions + :type interactions: list + + :arg filename: name of the PDB file which will contain dummy atoms and protein structure + :type filename: str + + :arg RESNAME_dummy: resname of the dummy atom, use 3-letter name + be default is 'DUM' + :type RESNAME_dummy: str ''' + + + try: + coords = (atoms._getCoords() if hasattr(atoms, '_getCoords') else + atoms.getCoords()) + except AttributeError: + try: + checkCoords(coords) + except TypeError: + raise TypeError('coords must be an object ' + 'with `getCoords` method') + + RESNAME_dummy = kwargs.pop('RESNAME_dummy', 'DUM') + + def calcDUMposition(coord1, coord2): + midpoint = [ + (coord1[0] + coord2[0]) / 2, + (coord1[1] + coord2[1]) / 2, + (coord1[2] + coord2[2]) / 2 + ] + return midpoint + + all_DUMs = [] + atoms_ = atoms.copy() + + for i in interactions: + if len(i[1].split('_')) <= 3: + res1_name = 'chain '+i[2]+' and resname '+i[0][:3]+' and resid '+i[0][3:]+' and index '+' '.join(i[1].split('_')[1:]) + res1_coords = calcCenter(atoms.select(res1_name)) + + if len(i[1].split('_')) > 3: + res1_name = 'chain '+i[2]+' and resname '+i[0][:3]+' and resid '+i[0][3:]+' and index '+' '.join(i[1].split('_')) + res1_coords = calcCenter(atoms.select(res1_name)) + + if len(i[4].split('_')) <= 3: + res2_name = 'chain '+i[5]+' and resname '+i[3][:3]+' and resid '+i[3][3:]+' and index '+' '.join(i[4].split('_')[1:]) + res2_coords = calcCenter(atoms.select(res2_name)) + + if len(i[4].split('_')) > 3: + res2_name = 'chain '+i[5]+' and resname '+i[3][:3]+' and resid '+i[3][3:]+' and index '+' '.join(i[4].split('_')) + res2_coords = calcCenter(atoms.select(res2_name)) + + all_DUMs.append(calcDUMposition(res1_coords, res2_coords)) + + if all_DUMs == []: + LOGGER.info('Lack of interactions') + else: + LOGGER.info('Creating file with dummy atoms') + dummyAtoms = AtomGroup() + coords = array([all_DUMs], dtype=float) + dummyAtoms.setCoords(coords) + dummyAtoms.setNames([RESNAME_dummy]*len(dummyAtoms)) + dummyAtoms.setResnums(range(1, len(dummyAtoms)+1)) + dummyAtoms.setResnames([RESNAME_dummy]*len(dummyAtoms)) + + writePDB(filename, atoms_+dummyAtoms) + + def listLigandInteractions(PLIP_output, **kwargs): """Create a list of interactions from PLIP output created using calcLigandInteractions(). Results can be displayed in VMD. @@ -2838,14 +3029,33 @@ def getInteractions(self, **kwargs): :arg selection2: selection string :type selection2: str + :arg replace: Used with selection criteria to set the new one + If set to **True** the selection will be replaced by the new one. + Default is **False** + :type replace: bool + Selection: If we want to select interactions for the particular residue or group of residues: selection='chain A and resid 1 to 50' If we want to study chain-chain interactions: selection='chain A', selection2='chain B' """ - + + replace = kwargs.pop('replace', False) + if len(kwargs) != 0: results = [filterInteractions(j, self._atoms, **kwargs) for j in self._interactions] + + if replace == True: + LOGGER.info('New interactions are set') + self._interactions = results + self._hbs = results[0] + self._sbs = results[1] + self._rib = results[2] + self._piStack = results[3] + self._piCat = results[4] + self._hps = results[5] + self._dibs = results[6] + else: results = self._interactions @@ -3238,17 +3448,20 @@ def saveInteractionsPDB(self, **kwargs): :arg energy: sum of the energy between residues default is False - :type energy: True, False + :type energy: bool """ + energy = kwargs.pop('energy', False) + if not hasattr(self, '_interactions_matrix') or self._interactions_matrix is None: raise ValueError('Please calculate interactions matrix first.') + + if not isinstance(energy, bool): + raise TypeError('energy should be True or False') import numpy as np from collections import Counter - energy = kwargs.pop('energy', False) - atoms = self._atoms interaction_matrix = self._interactions_matrix interaction_matrix_en = self._interactions_matrix_en @@ -3377,18 +3590,22 @@ def showFrequentInteractors(self, cutoff=5, **kwargs): y.append(all_y[nr_ii]) if SETTINGS['auto_show']: - matplotlib.rcParams['font.size'] = '20' - fig = plt.figure(num=None, figsize=(12,6), facecolor='w') + matplotlib.rcParams['font.size'] = '12' + fig = plt.figure(num=None, figsize=(16,5), facecolor='w') y_pos = np.arange(len(y)) show = plt.bar(y_pos, x, align='center', alpha=0.5, color='blue') - plt.xticks(y_pos, y, rotation=45, fontsize=20) - plt.ylabel('Number of interactions') + plt.xticks(y_pos, y, rotation=45, fontsize=16) + plt.ylabel('Number of interactions', fontsize=16) plt.tight_layout() if SETTINGS['auto_show']: showFigure() - return show + + dict_counts = dict(zip(y, x)) + dict_counts_sorted = dict(sorted(dict_counts.items(), key=lambda item: item[1], reverse=True)) + + return dict_counts_sorted def showCumulativeInteractionTypes(self, **kwargs): @@ -3418,7 +3635,7 @@ def showCumulativeInteractionTypes(self, **kwargs): :arg energy: sum of the energy between residues default is False - :type energy: True, False + :type energy: bool """ import numpy as np @@ -3433,13 +3650,15 @@ def showCumulativeInteractionTypes(self, **kwargs): atoms = self._atoms energy = kwargs.pop('energy', False) - + + if not isinstance(energy, bool): + raise TypeError('energy should be True or False') + ResNumb = atoms.select('protein and name CA').getResnums() ResName = atoms.select('protein and name CA').getResnames() ResChid = atoms.select('protein and name CA').getChids() ResList = [ i[0]+str(i[1])+i[2] for i in list(zip([ aa_dic[i] for i in ResName ], ResNumb, ResChid)) ] - if energy == True: matrix_en = self._interactions_matrix_en matrix_en_sum = np.sum(matrix_en, axis=0) @@ -3457,7 +3676,6 @@ def showCumulativeInteractionTypes(self, **kwargs): plt.show() return matrix_en_sum - else: replace_matrix = kwargs.get('replace_matrix', False) @@ -3730,19 +3948,60 @@ def getInteractions(self, **kwargs): :arg selection2: selection string :type selection2: str - + + :arg replace: Used with selection criteria to set the new one + If set to **True** the selection will be replaced by the new one. + Default is **False** + :type replace: bool + Selection: If we want to select interactions for the particular residue or group of residues: selection='chain A and resid 1 to 50' If we want to study chain-chain interactions: selection='chain A', selection2='chain B' """ - + + replace = kwargs.pop('replace', False) + if len(kwargs) != 0: sele_inter = [] for i in self._interactions_traj: for nr_j,j in enumerate(i): sele_inter.append(filterInteractions(i[nr_j], self._atoms, **kwargs)) + + if replace == True: + try: + trajectory = self._traj + numFrames = trajectory._n_csets + except: + # If we analyze previously saved PKL file it doesn't have dcd information + # We have seven type of interactions. It will give number of frames. + numFrames = int(len(sele_inter)/7) + + self._interactions_traj = sele_inter + self._hbs_traj = sele_inter[0:numFrames] + self._sbs_traj = sele_inter[numFrames:2*numFrames] + self._rib_traj = sele_inter[2*numFrames:3*numFrames] + self._piStack_traj = sele_inter[3*numFrames:4*numFrames] + self._piCat_traj = sele_inter[4*numFrames:5*numFrames] + self._hps_traj = sele_inter[5*numFrames:6*numFrames] + self._dibs_traj = sele_inter[6*numFrames:7*numFrames] + LOGGER.info('New interactions are set') + + self._interactions_nb_traj = None + self._interactions_matrix_traj = None + + new_interactions_nb_traj = [] + new_interactions_nb_traj.append([ len(i) for i in self._hbs_traj ]) + new_interactions_nb_traj.append([ len(i) for i in self._sbs_traj ]) + new_interactions_nb_traj.append([ len(i) for i in self._rib_traj ]) + new_interactions_nb_traj.append([ len(i) for i in self._piStack_traj ]) + new_interactions_nb_traj.append([ len(i) for i in self._piCat_traj ]) + new_interactions_nb_traj.append([ len(i) for i in self._hps_traj ]) + new_interactions_nb_traj.append([ len(i) for i in self._dibs_traj ]) + self._interactions_nb_traj = new_interactions_nb_traj + results = sele_inter + else: results = self._interactions_traj