Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build model #180

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions moldesign/helpers/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@

BioAssembly = collections.namedtuple('BioAssembly', 'desc chains transforms')

# This named tuple is used to store the mapping between PDB and CIF residue
# sequence numbering.
ResidueSequenceMap = collections.namedtuple('ResidueSequenceMap', 'pdb_chain_id, begin_seq, end_seq, pdb_begin_seq, pdb_end_seq')


def get_conect_pairs(mol):
""" Returns a dicitonary of HETATM bonds for a PDB CONECT record
Expand Down
15 changes: 9 additions & 6 deletions moldesign/interfaces/biopython_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,18 @@ def get_mmcif_assemblies(fileobj=None, mmcdata=None):
opers = [opers]

# now create the assembly specifications
assemblies = {}
for id, detail, chainlist, operlist in zip(ids, details, chains, opers):
assert id not in assemblies
transforms = [transforms[i] for i in operlist.split(',')]
assemblies[id] = BioAssembly(detail, chainlist.split(','), transforms)
try:
assemblies = {}
for id, detail, chainlist, operlist in zip(ids, details, chains, opers):
assert id not in assemblies
transforms = [transforms[i] for i in operlist.split(',')]
assemblies[id] = BioAssembly(detail, chainlist.split(','), transforms)
except Exception:
print('WARNING: failed to construct assemblies')
assemblies = {}

return assemblies


def _make_transform_dict(tmat, transform_ids):
if isinstance(transform_ids, list):
for i, j in itertools.product(range(0, 3), range(0, 4)):
Expand Down
74 changes: 69 additions & 5 deletions moldesign/interfaces/parmed_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import moldesign as mdt
from .. import units as u
from .. import utils
from moldesign.helpers.pdb import ResidueSequenceMap

if future.utils.PY2:
from cStringIO import StringIO
Expand Down Expand Up @@ -161,13 +162,16 @@ def parmed_to_mdt(pmdmol):
positions = [[pa.xx, pa.xy, pa.xz] for pa in pmdmol.atoms] * u.angstrom

for iatom, patm in enumerate(pmdmol.atoms):
if patm.residue.insertion_code:
continue

if patm.residue.chain not in chains:
chains[patm.residue.chain] = mdt.Chain(pdbname=patm.residue.chain)
chain = chains[patm.residue.chain]

if patm.residue not in residues:
residues[patm.residue] = mdt.Residue(resname=patm.residue.name,
pdbindex=patm.residue.number)
pdbindex=patm.residue.number)
residues[patm.residue].chain = chain
chain.add(residues[patm.residue])
residue = residues[patm.residue]
Expand All @@ -184,6 +188,8 @@ def parmed_to_mdt(pmdmol):
atoms[patm] = atom

for pbnd in pmdmol.bonds:
if pbnd.atom1.residue.insertion_code or pbnd.atom2.residue.insertion_code:
continue
atoms[pbnd.atom1].bond_to(atoms[pbnd.atom2], int(pbnd.order))

mol = mdt.Molecule(list(atoms.values()),
Expand Down Expand Up @@ -266,7 +272,11 @@ def _reassign_chains(f, mol):
poly_seq_ids = _aslist(data['_pdbx_poly_seq_scheme.asym_id'])
nonpoly_ids = _aslist(data['_pdbx_nonpoly_scheme.asym_id'])
except KeyError:
return mol.copy(name=mol.name)
if '_pdbx_poly_seq_scheme.asym_id' in data:
residue_seq_map = _add_residue_map(data)
else:
residue_seq_map = _add_default_residue_map(mol)
return mol.copy(name=mol.name, residue_map=residue_seq_map)

newchain_names = set(poly_seq_ids + nonpoly_ids)
newchains = {name: mdt.Chain(name) for name in newchain_names}
Expand All @@ -276,7 +286,6 @@ def _reassign_chains(f, mol):
_aslist(data['_pdbx_poly_seq_scheme.pdb_seq_num']),
_aslist(data['_pdbx_poly_seq_scheme.pdb_strand_id']),
_aslist(data['_pdbx_poly_seq_scheme.asym_id'])),

zip(_aslist(data['_pdbx_nonpoly_scheme.mon_id']),
_aslist(data['_pdbx_nonpoly_scheme.pdb_seq_num']),
_aslist(data['_pdbx_nonpoly_scheme.pdb_strand_id']),
Expand All @@ -290,8 +299,63 @@ def _reassign_chains(f, mol):

residue.chain = newchain

return mdt.Molecule(mol.atoms,
name=mol.name, metadata=mol.metadata)
# Add data to map residue numbering between CIF and PDB.
residue_seq_map = _add_residue_map(data)

return mdt.Molecule(mol.atoms, name=mol.name, metadata=mol.metadata,
residue_map=residue_seq_map)

def _add_default_residue_map(mol):
""" Create a default map between CIF and PDB residue numbers.

If the CIF file does not have '_pdbx_poly_seq_scheme' records then
create the residue map from the existing chains.
"""
residue_seq_map = {}
chains = mol.chains
for chain in chains:
chain_id = chain.name
residues = chain.residues
start = residues[0].pdbindex
end = residues[-1].pdbindex
residue_seq_map[chain_id] = ResidueSequenceMap(chain_id, begin_seq=start, end_seq=end,
pdb_begin_seq=start, pdb_end_seq=end)

return residue_seq_map


def _add_residue_map(data):
""" Create a map between CIF and PDB residue numbers. """
residue_seq_map = {}
chain_ids = data["_pdbx_poly_seq_scheme.asym_id"]
pdb_chain_ids = data["_pdbx_poly_seq_scheme.pdb_strand_id"]
seq_ids = data["_pdbx_poly_seq_scheme.seq_id"]
res_names = data["_pdbx_poly_seq_scheme.mon_id"]
auth_seq_ids = data["_pdbx_poly_seq_scheme.auth_seq_num"]
list_size = len(chain_ids)

# Find the min/max residue sequence number for each chain.
chain_res_range = {}
for i in range(list_size-1):
chain_id = chain_ids[i]
seq_id = seq_ids[i]
res_name = res_names[i]
pdb_seq_id = auth_seq_ids[i]
if (pdb_seq_id == "?") or (res_name not in mdt.data.mdt.data.AMINO_NAMES):
continue
if chain_id not in chain_res_range:
pdb_chain = pdb_chain_ids[i]
chain_res_range[chain_id] = [int(seq_id), int(seq_id), pdb_chain,
int(pdb_seq_id), int(pdb_seq_id)]
chain_res_range[chain_id][1] = int(seq_id)
chain_res_range[chain_id][4] = int(pdb_seq_id)

for chain_id in chain_res_range:
start, end, pdb_chain, pdb_start, pdb_end = chain_res_range[chain_id]
residue_seq_map[chain_id] = ResidueSequenceMap(pdb_chain, begin_seq=start,
end_seq=end, pdb_begin_seq=pdb_start, pdb_end_seq=pdb_end)

return residue_seq_map


def _aslist(l):
Expand Down
8 changes: 5 additions & 3 deletions moldesign/molecules/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ class MolTopologyMixin(object):
are here for code organization only - they could be included in the main
Atom class without changing any functionality
"""
def copy(self, name=None):
def copy(self, name=None, residue_map=None):
""" Create a copy of the molecule and all of its substructures, metadata, and methods

Returns:
Expand All @@ -390,7 +390,7 @@ def copy(self, name=None):
name=name,
pdbname=self.pdbname,
charge=self.charge,
metadata=self.metadata)
metadata=self.metadata, residue_map=residue_map)
if self.energy_model is not None:
newmodel = self._copy_method('energy_model')
newmol.set_energy_model(newmodel)
Expand Down Expand Up @@ -992,7 +992,8 @@ def __init__(self, atomcontainer,
copy_atoms=False,
pdbname=None,
charge=None,
metadata=None):
metadata=None,
residue_map=None):
super().__init__()

# initial property init
Expand All @@ -1015,6 +1016,7 @@ def __init__(self, atomcontainer,
self.integrator = None
self.metadata = metadata
self.electronic_state_index = 0
self.residue_map = residue_map

if charge is not None:
self.charge = charge
Expand Down
2 changes: 1 addition & 1 deletion moldesign/tools/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,4 @@ def build_assembly(mol, assembly_name):
all_atoms.extend(chain.atoms)
newmol = mdt.Molecule(all_atoms,
name="%s (bioassembly %s)" % (mol.name, assembly_name))
return newmol
return newmol