Skip to content

Commit

Permalink
Merge pull request #395 from BradyAJohnston/dev-ss-mmcif
Browse files Browse the repository at this point in the history
dev ss mmcif
  • Loading branch information
BradyAJohnston authored Jan 5, 2024
2 parents c62d3d6 + eec09ee commit 105da27
Show file tree
Hide file tree
Showing 8 changed files with 404 additions and 287 deletions.
2 changes: 1 addition & 1 deletion molecularnodes/blender/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ def chain_color(name, input_list, label_prefix = "Chain ", field = "chain_id", s

# create an input for this chain
group.interface.new_socket(current_chain, in_out='INPUT', socket_type='NodeSocketColor').default_value = color.random_rgb(i)
# switch input colours 10 and 11
# switch color input values for colors are index 10 and 11
link(node_input.outputs[current_chain], node_color.inputs[11])
link(node_compare.outputs['Result'], node_color.inputs['Switch'])

Expand Down
70 changes: 3 additions & 67 deletions molecularnodes/io/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,68 +71,8 @@ def pdb_get_b_factors(file):
b_factors.append(atoms.b_factor)
return b_factors

def get_secondary_structure(array, file) -> np.array:
"""
Gets the secondary structure annotation that is included in mmtf files and returns it as a numerical numpy array.

Parameters:
-----------
array : numpy.array
The molecular coordinates array, from mmtf.get_structure()
file : mmtf.MMTFFile
The MMTF file containing the secondary structure information, from mmtf.MMTFFile.read()

Returns:
--------
atom_sse : numpy.array
Numerical numpy array representing the secondary structure of the molecule.
Description:
------------
This function uses the biotite.structure package to extract the secondary structure information from the MMTF file.
The resulting secondary structures are `1: Alpha Helix, 2: Beta-sheet, 3: loop`.
"""

from biotite.structure import spread_residue_wise

sec_struct_codes = {
-1: "X",
0 : "I",
1 : "S",
2 : "H",
3 : "E",
4 : "G",
5 : "B",
6 : "T",
7 : "C"
}

dssp_to_abc = {
"X" : 0,
"I" : 1, #"a",
"S" : 3, #"c",
"H" : 1, #"a",
"E" : 2, #"b",
"G" : 1, #"a",
"B" : 2, #"b",
"T" : 3, #"c",
"C" : 3 #"c"
}

try:
sse = file["secStructList"]
except KeyError:
ss_int = np.full(len(array), 3)
print('Warning: "secStructList" field missing from MMTF file. Defaulting \
to "loop" for all residues.')
else:
ss_int = np.array(
[dssp_to_abc.get(sec_struct_codes.get(ss)) for ss in sse],
dtype = int
)
atom_sse = spread_residue_wise(array, ss_int)

return atom_sse


def comp_secondary_structure(array):
Expand Down Expand Up @@ -351,14 +291,10 @@ def att_is_carb():
return struc.filter_carbohydrates(array)

def att_sec_struct():
if calculate_ss or not file:
if hasattr(array, "sec_struct"):
return array.sec_struct
if calculate_ss:
return comp_secondary_structure(array)
else:
return get_secondary_structure(array, file)





# these are all of the attributes that will be added to the structure
# TODO add capcity for selection of particular attributes to include / not include to potentially
Expand Down
66 changes: 66 additions & 0 deletions molecularnodes/io/local.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import bpy
import numpy as np
import warnings
from .. import assembly
from .load import create_molecule
Expand Down Expand Up @@ -52,6 +53,7 @@ def load(
except InvalidFileError:
pass


else:
warnings.warn("Unable to open local file. Format not supported.")
# if bonds chosen but no bonds currently exist (mn.bonds is None)
Expand Down Expand Up @@ -90,6 +92,64 @@ def load(

return mol

def ss_id_to_numeric(id: str) -> int:
"Convert the given ids in the mmmCIF file to 1 AH / 2 BS / 3 Loop integers"
if "HELX" in id:
return int(1)
elif "STRN" in id:
return int(2)
else:
return int(3)

class NoSecondaryStructureError(Exception):
"""Raised when no secondary structure is found"""
pass

def get_ss_mmcif(mol, file):
import biotite.structure as struc

conf = file.get_category('struct_conf')
if not conf:
raise NoSecondaryStructureError
starts = conf['beg_auth_seq_id'].astype(int)
ends = conf['end_auth_seq_id'].astype(int)
chains = conf['end_auth_asym_id'].astype(str)
id_label = conf['id'].astype(str)

sheet = file.get_category('struct_sheet_range')
if sheet:
starts = np.append(starts, sheet['beg_auth_seq_id'].astype(int))
ends = np.append(ends, sheet['end_auth_seq_id'].astype(int))
chains = np.append(chains, sheet['end_auth_asym_id'].astype(str))
id_label = np.append(id_label, np.repeat('STRN', len(sheet['id'])))

id_int = np.array([ss_id_to_numeric(x) for x in id_label])
lookup = dict()
for chain in np.unique(chains):
arrays = []
mask = (chain == chains)
start_sub = starts[mask]
end_sub = ends[mask]
id_sub = id_int[mask]

for (start, end, id) in zip(start_sub, end_sub, id_sub):
idx = np.arange(start, end + 1, dtype = int)
arr = np.zeros((len(idx), 2), dtype = int)
arr[:, 0] = idx
arr[:, 1] = 3
arr[:, 1] = id
arrays.append(arr)

lookup[chain] = dict(np.vstack(arrays).tolist())

ss = []

for i, (chain_id, res_id) in enumerate(zip(mol.chain_id, mol.res_id)):
ss.append(lookup[chain_id].get(res_id, 3))

arr = np.array(ss, dtype = int)
arr[~struc.filter_amino_acids(mol)] = 0
return arr

def open_structure_local_pdb(file_path):
import biotite.structure.io.pdb as pdb
Expand All @@ -101,6 +161,7 @@ def open_structure_local_pdb(file_path):
mol = pdb.get_structure(file, extra_fields = ['b_factor', 'charge', 'occupancy', 'atom_id'], include_bonds = True)
return mol, file


def open_structure_local_pdbx(file_path):
import biotite.structure as struc
import biotite.structure.io.pdbx as pdbx
Expand All @@ -116,6 +177,11 @@ def open_structure_local_pdbx(file_path):
mol = pdbx.get_structure(file, extra_fields = ['b_factor', 'charge'])
except InvalidFileError:
mol = pdbx.get_component(file)

try:
mol.set_annotation('sec_struct', get_ss_mmcif(mol, file))
except NoSecondaryStructureError:
pass

# pdbx doesn't include bond information apparently, so manually create them here
if not mol.bonds:
Expand Down
90 changes: 89 additions & 1 deletion molecularnodes/io/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def load(

mol, coll_frames = create_molecule(
array = mol,
name = pdb_code,
name = pdb_code,
file = file,
calculate_ss = False,
centre = centre,
Expand Down Expand Up @@ -87,6 +87,8 @@ def get_chain_entity_id(file):

return ent_dic



def set_atom_entity_id(mol, file):
mol.add_annotation('entity_id', int)
ent_dic = get_chain_entity_id(file)
Expand All @@ -97,6 +99,91 @@ def set_atom_entity_id(mol, file):
mol.set_annotation('entity_id', entity_ids)
return entity_ids

def get_secondary_structure(array, file) -> np.array:
"""
Gets the secondary structure annotation that is included in mmtf files and returns it as a numerical numpy array.
Parameters:
-----------
array : numpy.array
The molecular coordinates array, from mmtf.get_structure()
file : mmtf.MMTFFile
The MMTF file containing the secondary structure information, from mmtf.MMTFFile.read()
Returns:
--------
atom_sse : numpy.array
Numerical numpy array representing the secondary structure of the molecule.
Description:
------------
This function uses the biotite.structure package to extract the secondary structure information from the MMTF file.
The resulting secondary structures are `1: Alpha Helix, 2: Beta-sheet, 3: loop`.
"""

from biotite.structure import spread_residue_wise

sec_struct_codes = {
-1: "X", # undefined
0 : "I", # pi helix
1 : "S", # bend
2 : "H", # alpha helix
3 : "E", # extended
4 : "G", # 3-10 helix
5 : "B", # bridge
6 : "T", # turn
7 : "C" # coil
}

# convert to 1 AH / 2 BS / 3 LOOP
dssp_codes_to_int = {
-1: 0, # undefined

0 : 1, # pi helix
2 : 1, # alpha helix
4 : 1, # 3-10 helix

3 : 2, # extended
5 : 2, # bridge

6 : 3, # turn
1 : 3, # bend
7 : 3 # coil
}



dssp_to_abc = {
"X" : 0,
"I" : 3, #"a",
"G" : 1, #"a",
"H" : 1, #"a",

"E" : 2, #"b",
"B" : 2, #"b",

"T" : 3, #"c",
"S" : 3, #"c",
"C" : 3 #"c"
}

try:
sse = file["secStructList"]
except KeyError:
ss_int = np.full(len(array), 3)
print('Warning: "secStructList" field missing from MMTF file. Defaulting \
to "loop" for all residues.')
else:
pass
ss_int = np.array(
[dssp_to_abc.get(sec_struct_codes.get(ss)) for ss in sse],
dtype = int
)
atom_sse = spread_residue_wise(array, ss_int)
# atom_sse = spread_residue_wise(array, sse)

return atom_sse

def open_structure_rcsb(pdb_code, cache_dir = None):
import biotite.structure.io.mmtf as mmtf
import biotite.database.rcsb as rcsb
Expand All @@ -108,6 +195,7 @@ def open_structure_rcsb(pdb_code, cache_dir = None):
# the file. The stack will be of length = 1 if there is only one model in the file
mol = mmtf.get_structure(file, extra_fields = ["b_factor", "charge", 'occupancy', 'atom_id'], include_bonds = True)
set_atom_entity_id(mol, file)
mol.set_annotation('sec_struct', get_secondary_structure(mol, file))
return mol, file

# operator that calls the function to import the structure from the PDB
Expand Down
Loading

0 comments on commit 105da27

Please sign in to comment.