-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
37ff1b8
commit 8165e99
Showing
9 changed files
with
295 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#!/usr/bin/env python | ||
# | ||
# This code is part of the interface classifier tool distribution | ||
# and governed by its license. Please see the LICENSE file that should | ||
# have been included as part of this package. | ||
# | ||
""" | ||
Interface classification methods developed by the Bonvin Lab. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
#!/usr/bin/env python | ||
# | ||
# This code is part of the interface classifier tool distribution | ||
# and governed by its license. Please see the LICENSE file that should | ||
# have been included as part of this package. | ||
# | ||
|
||
""" | ||
Generic properties of amino acids required for the interface classification methods. | ||
""" | ||
|
||
__author__ = ["Anna Vangone", "Joao Rodrigues"] | ||
|
||
aa_character_ic = { | ||
'ALA': 'A', | ||
'CYS': 'A', # ? | ||
'GLU': 'C', | ||
'ASP': 'C', | ||
'GLY': 'A', | ||
'PHE': 'A', | ||
'ILE': 'A', | ||
'HIS': 'C', | ||
'LYS': 'C', | ||
'MET': 'A', | ||
'LEU': 'A', | ||
'ASN': 'P', | ||
'GLN': 'P', | ||
'PRO': 'A', | ||
'SER': 'P', | ||
'ARG': 'C', | ||
'THR': 'P', | ||
'TRP': 'A', | ||
'VAL': 'A', | ||
'TYR': 'A', | ||
} | ||
|
||
aa_character_protorp = { | ||
'ALA': 'A', | ||
'CYS': 'P', | ||
'GLU': 'C', | ||
'ASP': 'C', | ||
'GLY': 'A', | ||
'PHE': 'A', | ||
'ILE': 'A', | ||
'HIS': 'P', | ||
'LYS': 'C', | ||
'MET': 'A', | ||
'LEU': 'A', | ||
'ASN': 'P', | ||
'GLN': 'P', | ||
'PRO': 'A', | ||
'SER': 'P', | ||
'ARG': 'C', | ||
'THR': 'P', | ||
'TRP': 'P', | ||
'VAL': 'A', | ||
'TYR': 'P', | ||
} | ||
|
||
# Scaling factors for relative ASA | ||
# Calculated using extended ALA-X-ALA peptides | ||
# Taken from NACCESS | ||
rel_asa = { | ||
'total': | ||
{ | ||
'ALA': 107.95, | ||
'CYS': 134.28, | ||
'ASP': 140.39, | ||
'GLU': 172.25, | ||
'PHE': 199.48, | ||
'GLY': 80.10, | ||
'HIS': 182.88, | ||
'ILE': 175.12, | ||
'LYS': 200.81, | ||
'LEU': 178.63, | ||
'MET': 194.15, | ||
'ASN': 143.94, | ||
'PRO': 136.13, | ||
'GLN': 178.50, | ||
'ARG': 238.76, | ||
'SER': 116.50, | ||
'THR': 139.27, | ||
'VAL': 151.44, | ||
'TRP': 249.36, | ||
'TYR': 212.76, | ||
}, | ||
'bb': | ||
{ | ||
'ALA': 38.54, | ||
'CYS': 37.53, | ||
'ASP': 37.70, | ||
'GLU': 37.51, | ||
'PHE': 35.37, | ||
'GLY': 47.77, | ||
'HIS': 35.80, | ||
'ILE': 37.16, | ||
'LYS': 37.51, | ||
'LEU': 37.51, | ||
'MET': 37.51, | ||
'ASN': 37.70, | ||
'PRO': 16.23, | ||
'GLN': 37.51, | ||
'ARG': 37.51, | ||
'SER': 38.40, | ||
'THR': 37.57, | ||
'VAL': 37.16, | ||
'TRP': 38.10, | ||
'TYR': 35.38, | ||
}, | ||
'sc': | ||
{ | ||
'ALA': 69.41, | ||
'CYS': 96.75, | ||
'ASP': 102.69, | ||
'GLU': 134.74, | ||
'PHE': 164.11, | ||
'GLY': 32.33, | ||
'HIS': 147.08, | ||
'ILE': 137.96, | ||
'LYS': 163.30, | ||
'LEU': 141.12, | ||
'MET': 156.64, | ||
'ASN': 106.24, | ||
'PRO': 119.90, | ||
'GLN': 140.99, | ||
'ARG': 201.25, | ||
'SER': 78.11, | ||
'THR': 101.70, | ||
'VAL': 114.28, | ||
'TRP': 211.26, | ||
'TYR': 177.38, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
#!/usr/bin/env python | ||
# | ||
# This code is part of the interface classifier tool distribution | ||
# and governed by its license. Please see the LICENSE file that should | ||
# have been included as part of this package. | ||
# | ||
|
||
""" | ||
Functions to read PDB/mmCIF files | ||
""" | ||
|
||
from __future__ import division, print_function | ||
|
||
import logging | ||
import os | ||
|
||
try: | ||
from Bio.PDB import MMCIFParser, PDBParser | ||
from Bio.PDB.Polypeptide import PPBuilder, is_aa | ||
except ImportError as e: | ||
logging.error("[!] The interface classifier tool requires Biopython") | ||
raise ImportError(e) | ||
|
||
|
||
def parse_structure(path): | ||
""" | ||
Parses a structure using Biopython's PDB/mmCIF Parser | ||
Verifies the integrity of the structure (gaps) and its | ||
suitability for the calculation (is it a complex?). | ||
""" | ||
log = logging.getLogger("Prodigy") | ||
log.info("[+] Reading structure file: {0}".format(path)) | ||
fname = os.path.basename(path) | ||
sname = ".".join(fname.split(".")[:-1]) | ||
s_ext = fname.split(".")[-1] | ||
|
||
_ext = set(("pdb", "ent", "cif")) | ||
if s_ext not in _ext: | ||
raise IOError( | ||
"[!] Structure format '{0}' is not supported. Use '.pdb' or '.cif'.".format( | ||
s_ext | ||
) | ||
) | ||
|
||
if s_ext in set(("pdb", "ent")): | ||
sparser = PDBParser(QUIET=1) | ||
elif s_ext == "cif": | ||
sparser = MMCIFParser() | ||
|
||
try: | ||
s = sparser.get_structure(sname, path) | ||
except Exception as e: | ||
# log.error("[!] Structure '{0}' could not be parsed".format(sname)) | ||
log.error("[!] Structure '{0}' could not be parsed".format(sname)) | ||
raise Exception(e) | ||
|
||
# Keep first model only | ||
if len(s) > 1: | ||
log.warning( | ||
"[!] Structure contains more than one model. Only the first one will be kept" | ||
) | ||
model_one = s[0].id | ||
for m in s.child_list[:]: | ||
if m.id != model_one: | ||
s.detach_child(m.id) | ||
|
||
# Double occupancy check | ||
for atom in list(s.get_atoms()): | ||
if atom.is_disordered(): | ||
residue = atom.parent | ||
sel_at = atom.selected_child | ||
sel_at.altloc = " " | ||
sel_at.disordered_flag = 0 | ||
residue.detach_child(atom.id) | ||
residue.add(sel_at) | ||
|
||
# Remove HETATMs and solvent | ||
res_list = list(s.get_residues()) | ||
|
||
def _ignore(r): | ||
return r.id[0][0] == "W" or r.id[0][0] == "H" | ||
|
||
for res in res_list: | ||
if _ignore(res): | ||
chain = res.parent | ||
chain.detach_child(res.id) | ||
elif not is_aa(res, standard=True): | ||
raise ValueError( | ||
"Unsupported non-standard amino acid found: {0}".format(res.resname) | ||
) | ||
n_res = len(list(s.get_residues())) | ||
|
||
# Remove Hydrogens | ||
atom_list = list(s.get_atoms()) | ||
|
||
def _ignore(x): | ||
return x.element == "H" | ||
|
||
for atom in atom_list: | ||
if _ignore(atom): | ||
residue = atom.parent | ||
residue.detach_child(atom.name) | ||
|
||
# Detect gaps and compare with no. of chains | ||
pep_builder = PPBuilder() | ||
peptides = pep_builder.build_peptides(s) | ||
n_peptides = len(peptides) | ||
n_chains = len(set([c.id for c in s.get_chains()])) | ||
|
||
if n_peptides != n_chains: | ||
log.warning("[!] Structure contains gaps:") | ||
for i_pp, pp in enumerate(peptides): | ||
log.warning( | ||
"\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > {2.parent.id} {2.resname}{2.id[1]}".format( | ||
i_pp, pp[0], pp[-1] | ||
) | ||
) | ||
# raise Exception('Calculation cannot proceed') | ||
|
||
return (s, n_chains, n_res) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/usr/bin/env python | ||
# | ||
# This code is part of the interface classifier tool distribution | ||
# and governed by its license. Please see the LICENSE file that should | ||
# have been included as part of this package. | ||
# | ||
|
||
""" | ||
Assorted utility functions. | ||
""" | ||
|
||
from __future__ import division, print_function | ||
|
||
import os | ||
|
||
|
||
def _check_path(path): | ||
""" | ||
Checks if a file is readable. | ||
""" | ||
|
||
full_path = os.path.abspath(path) | ||
if not os.path.isfile(full_path): | ||
raise IOError('Could not read file: {0}'.format(path)) | ||
return full_path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters