Skip to content

Commit

Permalink
Merge pull request #179 from StollLab/issue168
Browse files Browse the repository at this point in the history
Issue168
  • Loading branch information
mtessmer authored Dec 26, 2024
2 parents bb1a859 + 3456007 commit 003a352
Show file tree
Hide file tree
Showing 14 changed files with 70,472 additions and 38,096 deletions.
130 changes: 96 additions & 34 deletions src/chilife/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,8 @@ def save(
*molecules: Union[re.RotamerEnsemble, MolecularSystemBase, mda.Universe, mda.AtomGroup, str],
protein_path: Union[str, Path] = None,
mode: str = 'w',
conect = False,
conect: bool = False,
frames: Union[int, str, ArrayLike, None] = 'all',
**kwargs,
) -> None:
"""Save a pdb file of the provided labels and proteins
Expand All @@ -368,7 +369,12 @@ def save(
The Path to a pdb file to use as the protein object.
mode : str
Which mode to open the file in. Accepts 'w' or 'a' to overwrite or append.
conect : bool
Whether to save PDB CONECT records. Defaults to False.
frames : int, str, ArrayLike[int], None
Exclusively used for ``MolSys`` and ``AtomGroup`` objects. ``frames`` are the indices of the frames to save.
Can be a single index, array of indices, 'all', or ``None``. If ``None`` is passed only the
activate frame will be saved. If 'all' is passed, all frames are saved. Default is 'all'
**kwargs :
Additional arguments to pass to ``write_labels``
Expand Down Expand Up @@ -454,14 +460,28 @@ def save(

used_names[name] = used_names.setdefault(name, 0) + 1

write_protein(pdb_file, protein, name_, conect=conect)
write_protein(pdb_file, protein, name_, conect=conect, frames=frames)

for ic in molecules['molic']:
write_ic(pdb_file, ic, conect=conect)
if isinstance(ic.protein, (mda.AtomGroup, mda.Universe)):
name = Path(ic.protein.universe.filename) if ic.protein.universe.filename is not None else Path(pdb_file.name)
name = name.name
else:
name = ic.protein.fname if hasattr(ic.protein, 'fname') else None

if name is None:
name = Path(pdb_file.name).name

name = name[:-4] if name.endswith(".pdb") else name
name_ = name + str(used_names[name]) if name in used_names else name

write_ic(pdb_file, ic, name=name_, conect=conect, frames=frames)

if len(molecules['rotens']) > 0:
write_labels(pdb_file, *molecules['rotens'], conect=conect, **kwargs)

pdb_file.close()


def fetch(accession_number: str, save: bool = False) -> MDAnalysis.Universe:
"""Fetch pdb file from the protein data bank or the AlphaFold Database and optionally save to disk.
Expand Down Expand Up @@ -534,7 +554,8 @@ def load_protein(struct_file: Union[str, Path],
def write_protein(pdb_file: TextIO,
protein: Union[mda.Universe, mda.AtomGroup, MolecularSystemBase],
name: str = None,
conect: bool = False) -> None:
conect: bool = False,
frames: Union[int, str, ArrayLike, None] = 'all') -> None:
"""
Helper function to write protein PDBs from MDAnalysis and MolSys objects.
Expand All @@ -546,6 +567,8 @@ def write_protein(pdb_file: TextIO,
MDAnalysis or MolSys object to save
name : str
Name of the protein to put in the header
frames : int, str, ArrayLike or None
Frames of the trajectory/ensemble to save to file
"""

# Change chain identifier if longer than 1
Expand All @@ -555,28 +578,21 @@ def write_protein(pdb_file: TextIO,
seg.segid = next(available_segids)

traj = protein.universe.trajectory

pdb_file.write(f'HEADER {name}\n')
for mdl, ts in enumerate(traj):
pdb_file.write(f"MODEL {mdl}\n")
[
pdb_file.write(
fmt_str.format(
atom.index,
atom.name,
atom.resname[:3],
atom.segid,
atom.resnum,
*atom.position,
1.00,
1.0,
atom.type,
)
)
for atom in protein.atoms
]
pdb_file.write("TER\n")
pdb_file.write("ENDMDL\n")
frames = None if frames == 'all' and len(traj) == 1 else frames

if frames == 'all':
for mdl, ts in enumerate(traj):
write_frame(pdb_file, protein.atoms, mdl)
elif hasattr(frames, '__len__'):
for mdl, frame in enumerate(frames):
traj[frame]
write_frame(pdb_file, protein.atoms, mdl)
elif frames is not None:
traj[frames]
write_frame(pdb_file, protein.atoms)
else:
write_frame(pdb_file, protein.atoms)

if conect:
bonds = (protein.bonds.indices if hasattr(protein, 'bonds') else
Expand All @@ -587,7 +603,39 @@ def write_protein(pdb_file: TextIO,
write_bonds(pdb_file, bonds)


def write_ic(pdb_file: TextIO, ic: MolSysIC, conect: bool = None)-> None:
def write_frame(pdb_file: TextIO, atoms, frame=None, coords=None):

if frame is not None:
pdb_file.write(f"MODEL {frame + 1}\n")

if coords is None:
coords = atoms.positions

[pdb_file.write(
fmt_str.format(
i + 1,
atom.name,
atom.resname[:3],
atom.segid,
atom.resnum,
*coord,
1.00,
1.0,
atom.type,
)
) for i, (atom, coord) in enumerate(zip(atoms, coords))]

pdb_file.write("TER\n")

if frame is not None:
pdb_file.write(f"ENDMDL\n")


def write_ic(pdb_file: TextIO,
ic: MolSysIC,
name: str = None,
conect: bool = None,
frames: Union[int, str, ArrayLike, None] = 'all')-> None:
"""
Write a :class:`~MolSysIC` internal coordinates object to a pdb file.
Expand All @@ -597,14 +645,28 @@ def write_ic(pdb_file: TextIO, ic: MolSysIC, conect: bool = None)-> None:
open file or io object.
ic: MolSysIC
chiLife internal coordinates object.
name : str
Name of the molecule to be used for the HEADER.
conect : bool, optional
Write PDB CONECT information to file.
frames : int, str, ArrayLike or None
Frames of the trajectory/ensemble to save to file
"""

pdb_file.write('MODEL\n')
for i, (atom, coord) in enumerate(zip(ic.atoms, ic.coords)):
pdb_file.write(fmt_str.format(i + 1, atom.name, atom.resname, atom.segid, atom.resnum,
coord[0], coord[1], coord[2],
1.0, 1.0, atom.type))
pdb_file.write('ENDMDL\n')
frames = None if frames == 'all' and len(ic.trajectory) == 1 else frames
pdb_file.write(f'HEADER {name}\n')
traj = ic.trajectory
if frames == 'all':
for mdl, ts in enumerate(traj):
write_frame(pdb_file, ic.atoms, mdl, ic.coords)
elif hasattr(frames, '__len__'):
for mdl, frame in enumerate(frames):
traj[frame]
write_frame(pdb_file, ic.atoms, mdl, ic.coords)
elif frames is not None:
traj[frames]
write_frame(pdb_file, ic.atoms, coords=ic.coords)
else:
write_frame(pdb_file, ic.atoms, coords=ic.coords)

if conect:
bonds = ic.topology.bonds
Expand Down
40 changes: 28 additions & 12 deletions src/chilife/protein_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def mutate(
protein: MDAnalysis.Universe,
*ensembles: 'RotamerEnsemble',
add_missing_atoms: bool = True,
random_rotamers: bool = False,
rotamer_index: Union[int, str, None] = None,
) -> MDAnalysis.Universe:
"""Create a new Universe where the native residue is replaced with the highest probability rotamer from a
RotamerEnsemble or SpinLabel object.
Expand All @@ -487,8 +487,10 @@ def mutate(
An MDA Universe object containing protein to be spin labeled
ensembles : RotamerEnsemble, SpinLabel
Precomputed RotamerEnsemble or SpinLabel object to use for selecting and replacing the spin native amino acid
random_rotamers :bool
Randomize rotamer conformations
rotamer_index : int, str, None
Index of the rotamer to be used for the mutation. If None, the most probable rotamer will be used. If 'all',
mutate will return an ensemble with each rotamer, if random, mutate will return an ensemble with a random
rotamer.
add_missing_atoms : bool
Model side chains missing atoms if they are not present in the provided structure.
Expand Down Expand Up @@ -614,15 +616,29 @@ def mutate(
new_other_atoms = U.select_atoms(f"not ({label_selstr})")
new_other_atoms.atoms.positions = other_atoms.atoms.positions

# Apply most probable spin label coordinates to spin label atoms
for spin_label in label_sites.values():
sl_atoms = U.select_atoms(spin_label.selstr)
if random_rotamers:
sl_atoms.atoms.positions = spin_label.coords[
np.random.choice(len(spin_label.coords), p=spin_label.weights)
]
else:
sl_atoms.atoms.positions = spin_label.coords[np.argmax(spin_label.weights)]
if rotamer_index == 'all':
max_label_len = max([len(label) for label in label_sites.values()])
coordinates = np.array([U.atoms.positions.copy() for _ in range(max_label_len)])
U.load_new(coordinates)

for i, ts in enumerate(U.trajectory):
for spin_label in label_sites.values():
sl_atoms = U.select_atoms(spin_label.selstr)
if len(spin_label) <= i:
sl_atoms.positions = spin_label.coords[-1]
else:
sl_atoms.positions = spin_label.coords[i]

else:
for spin_label in label_sites.values():
sl_atoms = U.select_atoms(spin_label.selstr)
if rotamer_index == 'random':
rand_idx = np.random.choice(len(spin_label.coords), p=spin_label.weights)
sl_atoms.atoms.positions = spin_label.coords[rand_idx]
elif isinstance(rotamer_index, int):
sl_atoms.atoms.positions = spin_label.coords[rotamer_index]
else:
sl_atoms.atoms.positions = spin_label.coords[np.argmax(spin_label.weights)]

return U

Expand Down
6 changes: 3 additions & 3 deletions tests/test_MolSys.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,15 +288,15 @@ def test_xl_protein_mutate():
SL = chilife.SpinLabel('R1M', 238, p)
pSL = chilife.mutate(p, SL)
pSL._fname = 'test_mutate'
chilife.save('test_mutate.pdb', pSL)
chilife.save('mutate_xlprotein.pdb', pSL)

with open('test_data/mutate_xlprotein.pdb', 'r') as f:
ans = hashlib.md5(f.read().encode("utf-8")).hexdigest()

with open("test_mutate.pdb", "r") as f:
with open("mutate_xlprotein.pdb", "r") as f:
test = hashlib.md5(f.read().encode("utf-8")).hexdigest()

os.remove('test_mutate.pdb')
os.remove('mutate_xlprotein.pdb')
assert ans == test


Expand Down
6 changes: 3 additions & 3 deletions tests/test_MolSysIC.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,15 @@ def test_save_pdb():
protein = mda.Universe("test_data/alphabetical_peptide.pdb").select_atoms("protein")

uni_ics = xl.MolSysIC.from_atoms(protein)
xl.save("test_data/postwrite_alphabet_peptide.pdb", uni_ics)
xl.save("alphabetical_peptide.pdb", uni_ics)

with open("test_data/postwrite_alphabet_peptide.pdb", "r") as f:
with open("alphabetical_peptide.pdb", "r") as f:
test = hashlib.md5(f.read().encode("utf-8")).hexdigest()

with open("test_data/alphabetical_peptide.pdb", "r") as f:
truth = hashlib.md5(f.read().encode("utf-8")).hexdigest()

os.remove("test_data/postwrite_alphabet_peptide.pdb")
os.remove("alphabetical_peptide.pdb")
assert test == truth


Expand Down
35 changes: 35 additions & 0 deletions tests/test_ProteinUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,41 @@ def test_mutate5():
PPIIm = chilife.mutate(PPII, TOC)
assert PPIIm.residues[-1].resname == 'NHH'

def test_mutate6():
SL = chilife.SpinLabel("R1C", site=28, protein=ubq)
labeled_protein = chilife.mutate(ubq, SL, rotamer_index=10)

# Make sure the newly mutated protein has the coordinates of the rotamer selected
np.testing.assert_allclose(labeled_protein.select_atoms(SL.selstr).positions, SL.coords[10])

# Make sure that is not the same as the first rotamer.
assert np.max(labeled_protein.select_atoms(SL.selstr).positions - SL.coords[0]) > 1

def test_mutate7():
SL = chilife.SpinLabel("R1C", site=28, protein=ubq)
labeled_protein = chilife.mutate(ubq, SL, rotamer_index='all')

assert len(labeled_protein.trajectory) == len(SL)

label_atoms =labeled_protein.select_atoms(SL.selstr)
for i, ts in enumerate(labeled_protein.trajectory):
np.testing.assert_allclose(label_atoms.positions, SL.coords[i])

def test_mutate8():
SL1 = chilife.SpinLabel("R1C", site=28, protein=ubq)
SL2 = chilife.RotamerEnsemble("LYS", site=48, protein=ubq)
labeled_protein = chilife.mutate(ubq, SL1, SL2, rotamer_index='all')

assert len(labeled_protein.trajectory) == max(len(SL1), len(SL2))

for i, ts in enumerate(labeled_protein.trajectory):
for SL in (SL1, SL2):
label_atoms = labeled_protein.select_atoms(SL.selstr)
if len(SL) <= i:
np.testing.assert_allclose(label_atoms.positions, SL.coords[-1])
else:
np.testing.assert_allclose(label_atoms.positions, SL.coords[i])


def test_add_missing_atoms():
protein = mda.Universe("test_data/1omp.pdb", in_memory=True).select_atoms("protein")
Expand Down
Loading

0 comments on commit 003a352

Please sign in to comment.