Skip to content

Commit

Permalink
Improvements to MMTF bioassemlies
Browse files Browse the repository at this point in the history
Correctly parse biotransform header and include mutiple assemblies if
present.  Some tweaks to the atom parser for efficiency.
  • Loading branch information
dkoes committed Oct 4, 2023
1 parent 7252431 commit 7102c7b
Showing 1 changed file with 34 additions and 32 deletions.
66 changes: 34 additions & 32 deletions prody/proteins/mmtffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,15 @@ def parseMMTF(mmtf_struc, **kwargs):

parseMMTF.__doc__ += _parseMMTFdoc


def _has_biomol(hd):
'''Return true if header actually has multiple transformations'''
if 'biomoltrans' not in hd:
return False
for v in hd['biomoltrans'].values():
if len(v) > 4:
return True
return False

def _parseMMTF(mmtf_struc, **kwargs):
LOGGER.timeit()
ag = AtomGroup()
Expand Down Expand Up @@ -156,7 +164,7 @@ def _parseMMTF(mmtf_struc, **kwargs):


if ag is not None and isinstance(hd, dict):
if biomol:
if biomol and _has_biomol(hd):
ag = buildBiomolecules(hd, ag)
if isinstance(ag, list):
LOGGER.info('Biomolecular transformations were applied, {0} '
Expand All @@ -176,8 +184,7 @@ def set_header(data_api):
#get the transform here and convert it to the format that prody wants
assembly = data_api.bio_assembly[0]
chain_list = data_api.chain_name_list
assembly = bio_transform(assembly, chain_list)

assembly = _bio_transform(data_api)
header = {
'r_work': data_api.r_work,
'r_free': data_api.r_free,
Expand All @@ -191,23 +198,20 @@ def set_header(data_api):

return header

def bio_transform(input_data, chain_list):

output_dict = {}
index = input_data['name']
transforms=[]

for item in input_data['transformList']:
chains = list(set([chain_list[i] for i in item['chainIndexList']]))[::-1]
transforms.append(chains)
for i in range(0, len(item['matrix'])-4, 4):
string_slice = item['matrix'][i:i+4]
formatted_string = ' '.join(map(str, string_slice))
transforms.append(formatted_string)

output_dict[index]=transforms

return output_dict
def _bio_transform(dec):
ret = {}
for t in dec.bio_assembly:
name = t['name']
L = []
for trans in t['transformList']:
chis = trans['chainIndexList']
chains = sorted(set([dec.chain_name_list[c] for c in chis]))
m = trans['matrix']
L += [chains, '%f %f %f %f'%tuple(m[:4]),
'%f %f %f %f'%tuple(m[4:8]),
'%f %f %f %f'%tuple(m[8:12])]
ret[t['name']] = L
return ret

def set_info(atomgroup, mmtf_data,get_bonds=False,altloc_sel='A'):

Expand Down Expand Up @@ -255,14 +259,17 @@ def set_info(atomgroup, mmtf_data,get_bonds=False,altloc_sel='A'):
resnums = np.empty(asize, dtype=ATOMIC_FIELDS['resnum'].dtype)
chainids = np.empty(asize, dtype=ATOMIC_FIELDS['chain'].dtype)
segnames = np.empty(asize, dtype=ATOMIC_FIELDS['segment'].dtype)
segnames[:] = ''
elements = np.empty(asize, dtype=ATOMIC_FIELDS['element'].dtype)
icodes = np.empty(asize, dtype=ATOMIC_FIELDS['icode'].dtype)

hetero = np.zeros(asize, dtype=bool)
termini = np.zeros(asize, dtype=bool)
altlocs = np.empty(asize, dtype=ATOMIC_FIELDS['altloc'].dtype)
icodes = np.empty(asize, dtype=ATOMIC_FIELDS['icode'].dtype)
serials = np.empty(asize, dtype=ATOMIC_FIELDS['serial'].dtype)
elements = np.empty(asize, dtype=ATOMIC_FIELDS['element'].dtype)
bfactors = np.empty(asize, dtype=ATOMIC_FIELDS['beta'].dtype)
occupancies = np.empty(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype)

altlocs = np.array(mmtf_data.alt_loc_list[:asize], dtype=ATOMIC_FIELDS['altloc'].dtype)
serials = np.array(mmtf_data.atom_id_list[:asize], dtype=ATOMIC_FIELDS['serial'].dtype)
bfactors = np.array(mmtf_data.b_factor_list[:asize], dtype=ATOMIC_FIELDS['beta'].dtype)
occupancies = np.array(mmtf_data.occupancy_list[:asize], dtype=ATOMIC_FIELDS['occupancy'].dtype)

modelIndex = 0
chainIndex = 0
Expand All @@ -284,9 +291,6 @@ def set_info(atomgroup, mmtf_data,get_bonds=False,altloc_sel='A'):

#traverse atoms
for i in range(groupAtomCount):
bfactors[atomIndex] = mmtf_data.b_factor_list[atomIndex]
altlocs[atomIndex] = mmtf_data.alt_loc_list[atomIndex]
occupancies[atomIndex] = mmtf_data.occupancy_list[atomIndex]
atom_names[atomIndex] = group.get('atomNameList')[i]
elements[atomIndex] = group.get('elementList')[i]
resnames[atomIndex] = group.get('groupName')
Expand All @@ -295,8 +299,6 @@ def set_info(atomgroup, mmtf_data,get_bonds=False,altloc_sel='A'):
chainids[atomIndex] = chain_name_list[k]
if group.get('chemCompType') in mmtfHETATMtypes:
hetero[atomIndex] = True
serials[atomIndex] = atomIndex + 1
segnames[atomIndex]=''

atomIndex += 1

Expand Down

0 comments on commit 7102c7b

Please sign in to comment.