-
Notifications
You must be signed in to change notification settings - Fork 83
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #278 from BradyAJohnston/dev-assemblies
Overhaul Biological Assemblies Brings work from @padix-key in #148 to properly support more biological assemblies. Fixes errors in #109.
- Loading branch information
Showing
24 changed files
with
46,318 additions
and
1,163 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
""" | ||
A subpackage for reading rotation matrices and translation vectors | ||
for biological assemblies from different file formats. | ||
The central functions are `get_transformations_` | ||
""" | ||
|
||
from abc import ABCMeta, abstractmethod | ||
|
||
|
||
class AssemblyParser(metaclass=ABCMeta): | ||
|
||
@abstractmethod | ||
def list_assemblies(self): | ||
""" | ||
Return a ``list`` of ``str`` containing the available assembly | ||
IDs. | ||
""" | ||
|
||
@abstractmethod | ||
def get_transformations(self, assembly_id): | ||
""" | ||
Parse the necessary transformations for a given | ||
assembly ID. | ||
Return a ``list`` of transformations for a set of chains | ||
transformations: | ||
transformations on sets of chains for this assembly | ||
| chain IDs affected by the transformation | ||
| | 3x3 rotation matrix | ||
| | | translation vector | ||
| | | | | ||
list[tuple[ndarray, ndarray, ndarray]]] | ||
""" | ||
|
||
@abstractmethod | ||
def get_assemblies(self): | ||
""" | ||
Parse all the transformations for each assembly, returning a dictionary of | ||
key:value pairs of assembly_id:transformations. The transformations list | ||
comes from the `get_transformations(assembly_id)` method. | ||
Dictionary of all assemblies | ||
| Assembly ID | ||
| | List of transformations to create biological assembly. | ||
| | | | ||
dict{'1', list[transformations]} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
import numpy as np | ||
import itertools | ||
import biotite.structure.io.pdbx as pdbx | ||
import biotite | ||
from . import AssemblyParser | ||
|
||
|
||
class CIFAssemblyParser(AssemblyParser): | ||
### Implementation adapted from ``biotite.structure.io.pdbx.convert`` | ||
|
||
def __init__(self, mmtf_file): | ||
self._file = mmtf_file | ||
|
||
|
||
def list_assemblies(self): | ||
return list(pdbx.list_assemblies(self._file).keys()) | ||
|
||
|
||
def get_transformations(self, assembly_id): | ||
assembly_gen_category = self._file.get_category( | ||
"pdbx_struct_assembly_gen", expect_looped=True | ||
) | ||
if assembly_gen_category is None: | ||
raise biotite.InvalidFileError( | ||
"File has no 'pdbx_struct_assembly_gen' category" | ||
) | ||
|
||
struct_oper_category = self._file.get_category( | ||
"pdbx_struct_oper_list", expect_looped=True | ||
) | ||
if struct_oper_category is None: | ||
raise biotite.InvalidFileError( | ||
"File has no 'pdbx_struct_oper_list' category" | ||
) | ||
|
||
if assembly_id not in assembly_gen_category["assembly_id"]: | ||
raise KeyError(f"File has no Assembly ID '{assembly_id}'") | ||
|
||
# Extract all possible transformations indexed by operation ID | ||
transformation_dict = _get_transformations(struct_oper_category) | ||
|
||
# Get necessary transformations and the affected chain IDs | ||
# NOTE: The chains given here refer to the `label_asym_id` field | ||
# of the `atom_site` category | ||
# However, by default `PDBxFile` uses the `auth_asym_id` as | ||
# chain ID | ||
transformations = [] | ||
for id, op_expr, asym_id_expr in zip( | ||
assembly_gen_category["assembly_id"], | ||
assembly_gen_category["oper_expression"], | ||
assembly_gen_category["asym_id_list"], | ||
): | ||
# Find the operation expressions for given assembly ID | ||
# We already asserted that the ID is actually present | ||
if id == assembly_id: | ||
operations = _parse_operation_expression(op_expr) | ||
affected_chain_ids = asym_id_expr.split(",") | ||
for i, operation in enumerate(operations): | ||
rotations = [] | ||
translations = [] | ||
for op_step in operation: | ||
rotation, translation = transformation_dict[op_step] | ||
rotations.append(rotation) | ||
translations.append(translation) | ||
total_rotation, total_translation = _chain_transformations( | ||
rotations, translations | ||
) | ||
transformations.append(( | ||
np.array(affected_chain_ids, dtype="U4"), | ||
total_rotation, | ||
total_translation | ||
)) | ||
|
||
return transformations | ||
|
||
def get_assemblies(self): | ||
assembly_dict = {} | ||
for assembly_id in self.list_assemblies(): | ||
assembly_dict[assembly_id] = self.get_transformations(assembly_id) | ||
|
||
return assembly_dict | ||
|
||
|
||
def _chain_transformations(rotations, translations): | ||
""" | ||
Get a total rotation/translation transformation by combining | ||
multiple rotation/translation transformations. | ||
This is done by intermediately combining rotation matrices and | ||
translation vectors into 4x4 matrices in the form | ||
|r11 r12 r13 t1| | ||
|r21 r22 r23 t2| | ||
|r31 r32 r33 t3| | ||
|0 0 0 1 |. | ||
""" | ||
total_matrix = np.identity(4) | ||
for rotation, translation in zip(rotations, translations): | ||
matrix = np.zeros((4,4)) | ||
matrix[:3, :3] = rotation | ||
matrix[:3, 3] = translation | ||
matrix[3, 3] = 1 | ||
total_matrix = matrix @ total_matrix | ||
|
||
return total_matrix[:3, :3], total_matrix[:3, 3] | ||
|
||
|
||
|
||
def _get_transformations(struct_oper): | ||
""" | ||
Get transformation operation in terms of rotation matrix and | ||
translation for each operation ID in ``pdbx_struct_oper_list``. | ||
""" | ||
transformation_dict = {} | ||
for index, id in enumerate(struct_oper["id"]): | ||
rotation_matrix = np.array( | ||
[ | ||
[ | ||
float(struct_oper[f"matrix[{i}][{j}]"][index]) | ||
for j in (1, 2, 3) | ||
] | ||
for i in (1, 2, 3) | ||
] | ||
) | ||
translation_vector = np.array( | ||
[float(struct_oper[f"vector[{i}]"][index]) for i in (1, 2, 3)] | ||
) | ||
transformation_dict[id] = (rotation_matrix, translation_vector) | ||
return transformation_dict | ||
|
||
|
||
def _parse_operation_expression(expression): | ||
""" | ||
Get successive operation steps (IDs) for the given | ||
``oper_expression``. | ||
Form the cartesian product, if necessary. | ||
""" | ||
# Split groups by parentheses: | ||
# use the opening parenthesis as delimiter | ||
# and just remove the closing parenthesis | ||
expressions_per_step = expression.replace(")", "").split("(") | ||
expressions_per_step = [e for e in expressions_per_step if len(e) > 0] | ||
# Important: Operations are applied from right to left | ||
expressions_per_step.reverse() | ||
|
||
operations = [] | ||
for expr in expressions_per_step: | ||
if "-" in expr: | ||
# Range of operation IDs, they must be integers | ||
first, last = expr.split("-") | ||
operations.append( | ||
[str(id) for id in range(int(first), int(last) + 1)] | ||
) | ||
elif "," in expr: | ||
# List of operation IDs | ||
operations.append(expr.split(",")) | ||
else: | ||
# Single operation ID | ||
operations.append([expr]) | ||
|
||
# Cartesian product of operations | ||
return list(itertools.product(*operations)) |
Oops, something went wrong.