diff --git a/src/prodigy_cryst/interface_classifier.py b/src/prodigy_cryst/interface_classifier.py index 02cf43b..73950d0 100755 --- a/src/prodigy_cryst/interface_classifier.py +++ b/src/prodigy_cryst/interface_classifier.py @@ -27,11 +27,11 @@ logging.error("[!] The interface classifier tool requires Biopython") raise ImportError(e) -from prodigy_cryst.lib import aa_properties -from prodigy_cryst.lib.parsers import parse_structure +from prodigy_cryst.modules import aa_properties +from prodigy_cryst.modules.parsers import parse_structure # from prodigy_cryst.lib.freesasa import execute_freesasa -from prodigy_cryst.lib.utils import _check_path +from prodigy_cryst.modules.utils import _check_path def calculate_ic(structure, d_cutoff=5.0, selection=None): @@ -99,7 +99,7 @@ def analyse_contacts(contact_list): } _data = aa_properties.aa_character_ic - for (res_i, res_j) in contact_list: + for res_i, res_j in contact_list: contact_type = (_data.get(res_i.resname), _data.get(res_j.resname)) contact_type = "".join(sorted(contact_type)) bins[contact_type] += 1 diff --git a/src/prodigy_cryst/modules/__init__.py b/src/prodigy_cryst/modules/__init__.py new file mode 100644 index 0000000..4f8e64d --- /dev/null +++ b/src/prodigy_cryst/modules/__init__.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +# +# This code is part of the interface classifier tool distribution +# and governed by its license. Please see the LICENSE file that should +# have been included as part of this package. +# +""" +Interface classification methods developed by the Bonvin Lab. +""" diff --git a/src/prodigy_cryst/modules/aa_properties.py b/src/prodigy_cryst/modules/aa_properties.py new file mode 100644 index 0000000..139babd --- /dev/null +++ b/src/prodigy_cryst/modules/aa_properties.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python +# +# This code is part of the interface classifier tool distribution +# and governed by its license. Please see the LICENSE file that should +# have been included as part of this package. +# + +""" +Generic properties of amino acids required for the interface classification methods. +""" + +__author__ = ["Anna Vangone", "Joao Rodrigues"] + +aa_character_ic = { + 'ALA': 'A', + 'CYS': 'A', # ? + 'GLU': 'C', + 'ASP': 'C', + 'GLY': 'A', + 'PHE': 'A', + 'ILE': 'A', + 'HIS': 'C', + 'LYS': 'C', + 'MET': 'A', + 'LEU': 'A', + 'ASN': 'P', + 'GLN': 'P', + 'PRO': 'A', + 'SER': 'P', + 'ARG': 'C', + 'THR': 'P', + 'TRP': 'A', + 'VAL': 'A', + 'TYR': 'A', +} + +aa_character_protorp = { + 'ALA': 'A', + 'CYS': 'P', + 'GLU': 'C', + 'ASP': 'C', + 'GLY': 'A', + 'PHE': 'A', + 'ILE': 'A', + 'HIS': 'P', + 'LYS': 'C', + 'MET': 'A', + 'LEU': 'A', + 'ASN': 'P', + 'GLN': 'P', + 'PRO': 'A', + 'SER': 'P', + 'ARG': 'C', + 'THR': 'P', + 'TRP': 'P', + 'VAL': 'A', + 'TYR': 'P', +} + +# Scaling factors for relative ASA +# Calculated using extended ALA-X-ALA peptides +# Taken from NACCESS +rel_asa = { + 'total': + { + 'ALA': 107.95, + 'CYS': 134.28, + 'ASP': 140.39, + 'GLU': 172.25, + 'PHE': 199.48, + 'GLY': 80.10, + 'HIS': 182.88, + 'ILE': 175.12, + 'LYS': 200.81, + 'LEU': 178.63, + 'MET': 194.15, + 'ASN': 143.94, + 'PRO': 136.13, + 'GLN': 178.50, + 'ARG': 238.76, + 'SER': 116.50, + 'THR': 139.27, + 'VAL': 151.44, + 'TRP': 249.36, + 'TYR': 212.76, + }, + 'bb': + { + 'ALA': 38.54, + 'CYS': 37.53, + 'ASP': 37.70, + 'GLU': 37.51, + 'PHE': 35.37, + 'GLY': 47.77, + 'HIS': 35.80, + 'ILE': 37.16, + 'LYS': 37.51, + 'LEU': 37.51, + 'MET': 37.51, + 'ASN': 37.70, + 'PRO': 16.23, + 'GLN': 37.51, + 'ARG': 37.51, + 'SER': 38.40, + 'THR': 37.57, + 'VAL': 37.16, + 'TRP': 38.10, + 'TYR': 35.38, + }, + 'sc': + { + 'ALA': 69.41, + 'CYS': 96.75, + 'ASP': 102.69, + 'GLU': 134.74, + 'PHE': 164.11, + 'GLY': 32.33, + 'HIS': 147.08, + 'ILE': 137.96, + 'LYS': 163.30, + 'LEU': 141.12, + 'MET': 156.64, + 'ASN': 106.24, + 'PRO': 119.90, + 'GLN': 140.99, + 'ARG': 201.25, + 'SER': 78.11, + 'THR': 101.70, + 'VAL': 114.28, + 'TRP': 211.26, + 'TYR': 177.38, + } +} diff --git a/src/prodigy_cryst/modules/parsers.py b/src/prodigy_cryst/modules/parsers.py new file mode 100644 index 0000000..4db755d --- /dev/null +++ b/src/prodigy_cryst/modules/parsers.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python +# +# This code is part of the interface classifier tool distribution +# and governed by its license. Please see the LICENSE file that should +# have been included as part of this package. +# + +""" +Functions to read PDB/mmCIF files +""" + +from __future__ import division, print_function + +import logging +import os + +try: + from Bio.PDB import MMCIFParser, PDBParser + from Bio.PDB.Polypeptide import PPBuilder, is_aa +except ImportError as e: + logging.error("[!] The interface classifier tool requires Biopython") + raise ImportError(e) + + +def parse_structure(path): + """ + Parses a structure using Biopython's PDB/mmCIF Parser + Verifies the integrity of the structure (gaps) and its + suitability for the calculation (is it a complex?). + """ + log = logging.getLogger("Prodigy") + log.info("[+] Reading structure file: {0}".format(path)) + fname = os.path.basename(path) + sname = ".".join(fname.split(".")[:-1]) + s_ext = fname.split(".")[-1] + + _ext = set(("pdb", "ent", "cif")) + if s_ext not in _ext: + raise IOError( + "[!] Structure format '{0}' is not supported. Use '.pdb' or '.cif'.".format( + s_ext + ) + ) + + if s_ext in set(("pdb", "ent")): + sparser = PDBParser(QUIET=1) + elif s_ext == "cif": + sparser = MMCIFParser() + + try: + s = sparser.get_structure(sname, path) + except Exception as e: + # log.error("[!] Structure '{0}' could not be parsed".format(sname)) + log.error("[!] Structure '{0}' could not be parsed".format(sname)) + raise Exception(e) + + # Keep first model only + if len(s) > 1: + log.warning( + "[!] Structure contains more than one model. Only the first one will be kept" + ) + model_one = s[0].id + for m in s.child_list[:]: + if m.id != model_one: + s.detach_child(m.id) + + # Double occupancy check + for atom in list(s.get_atoms()): + if atom.is_disordered(): + residue = atom.parent + sel_at = atom.selected_child + sel_at.altloc = " " + sel_at.disordered_flag = 0 + residue.detach_child(atom.id) + residue.add(sel_at) + + # Remove HETATMs and solvent + res_list = list(s.get_residues()) + + def _ignore(r): + return r.id[0][0] == "W" or r.id[0][0] == "H" + + for res in res_list: + if _ignore(res): + chain = res.parent + chain.detach_child(res.id) + elif not is_aa(res, standard=True): + raise ValueError( + "Unsupported non-standard amino acid found: {0}".format(res.resname) + ) + n_res = len(list(s.get_residues())) + + # Remove Hydrogens + atom_list = list(s.get_atoms()) + + def _ignore(x): + return x.element == "H" + + for atom in atom_list: + if _ignore(atom): + residue = atom.parent + residue.detach_child(atom.name) + + # Detect gaps and compare with no. of chains + pep_builder = PPBuilder() + peptides = pep_builder.build_peptides(s) + n_peptides = len(peptides) + n_chains = len(set([c.id for c in s.get_chains()])) + + if n_peptides != n_chains: + log.warning("[!] Structure contains gaps:") + for i_pp, pp in enumerate(peptides): + log.warning( + "\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > {2.parent.id} {2.resname}{2.id[1]}".format( + i_pp, pp[0], pp[-1] + ) + ) + # raise Exception('Calculation cannot proceed') + + return (s, n_chains, n_res) diff --git a/src/prodigy_cryst/modules/utils.py b/src/prodigy_cryst/modules/utils.py new file mode 100644 index 0000000..1df5a50 --- /dev/null +++ b/src/prodigy_cryst/modules/utils.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# +# This code is part of the interface classifier tool distribution +# and governed by its license. Please see the LICENSE file that should +# have been included as part of this package. +# + +""" +Assorted utility functions. +""" + +from __future__ import division, print_function + +import os + + +def _check_path(path): + """ + Checks if a file is readable. + """ + + full_path = os.path.abspath(path) + if not os.path.isfile(full_path): + raise IOError('Could not read file: {0}'.format(path)) + return full_path diff --git a/tests/test_aa_properties.py b/tests/test_aa_properties.py index 5a48646..bcfe7b2 100644 --- a/tests/test_aa_properties.py +++ b/tests/test_aa_properties.py @@ -1,6 +1,6 @@ import pytest -from prodigy_cryst.lib.aa_properties import ( +from prodigy_cryst.modules.aa_properties import ( aa_character_ic, aa_character_protorp, rel_asa, diff --git a/tests/test_interface_classifier.py b/tests/test_interface_classifier.py index 7e8fa6e..5a6df83 100644 --- a/tests/test_interface_classifier.py +++ b/tests/test_interface_classifier.py @@ -9,7 +9,7 @@ analyse_contacts, calculate_ic, ) -from prodigy_cryst.lib.parsers import parse_structure +from prodigy_cryst.modules.parsers import parse_structure from tests import DATA_FOLDER diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 05606a9..d07dfd2 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -3,7 +3,7 @@ import pytest from Bio.PDB.Structure import Structure -from prodigy_cryst.lib.parsers import parse_structure +from prodigy_cryst.modules.parsers import parse_structure from . import DATA_FOLDER diff --git a/tests/test_utils.py b/tests/test_utils.py index 9f1fa1c..ec6b076 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,6 @@ from tempfile import NamedTemporaryFile -from prodigy_cryst.lib.utils import _check_path +from prodigy_cryst.modules.utils import _check_path def test__check_path():