Merge pull request #179 from StollLab/issue168

Issue168
StollLab · Dec 26, 2024 · 003a352 · 003a352
2 parents bb1a859 + 3456007
commit 003a352
Show file tree

Hide file tree

Showing 14 changed files with 70,472 additions and 38,096 deletions.
diff --git a/src/chilife/io.py b/src/chilife/io.py
@@ -351,7 +351,8 @@ def save(
         *molecules: Union[re.RotamerEnsemble, MolecularSystemBase, mda.Universe, mda.AtomGroup, str],
         protein_path: Union[str, Path] = None,
         mode: str = 'w',
-        conect = False,
+        conect: bool = False,
+        frames: Union[int, str, ArrayLike, None] = 'all',
         **kwargs,
 ) -> None:
     """Save a pdb file of the provided labels and proteins
@@ -368,7 +369,12 @@ def save(
         The Path to a pdb file to use as the protein object.
     mode : str
         Which mode to open the file in. Accepts 'w' or 'a' to overwrite or append.
-
+    conect : bool
+        Whether to save  PDB CONECT records. Defaults to False.
+    frames : int, str, ArrayLike[int], None
+        Exclusively used for ``MolSys`` and ``AtomGroup`` objects. ``frames`` are the indices of the frames to save.
+        Can be a single index, array of indices, 'all', or ``None``. If ``None`` is passed only the
+        activate frame will be saved. If 'all' is passed, all frames are saved. Default is 'all'
     **kwargs :
         Additional arguments to pass to ``write_labels``
 
@@ -454,14 +460,28 @@ def save(
 
         used_names[name] = used_names.setdefault(name, 0) + 1
 
-        write_protein(pdb_file, protein, name_, conect=conect)
+        write_protein(pdb_file, protein, name_, conect=conect, frames=frames)
 
     for ic in molecules['molic']:
-        write_ic(pdb_file, ic, conect=conect)
+        if isinstance(ic.protein, (mda.AtomGroup, mda.Universe)):
+            name = Path(ic.protein.universe.filename) if ic.protein.universe.filename is not None else Path(pdb_file.name)
+            name = name.name
+        else:
+            name = ic.protein.fname if hasattr(ic.protein, 'fname') else None
+
+        if name is None:
+            name = Path(pdb_file.name).name
+
+        name = name[:-4] if name.endswith(".pdb") else name
+        name_ = name + str(used_names[name]) if name in used_names else name
+
+        write_ic(pdb_file, ic, name=name_, conect=conect, frames=frames)
 
     if len(molecules['rotens']) > 0:
         write_labels(pdb_file, *molecules['rotens'], conect=conect, **kwargs)
 
+    pdb_file.close()
+
 
 def fetch(accession_number: str, save: bool = False) -> MDAnalysis.Universe:
     """Fetch pdb file from the protein data bank or the AlphaFold Database and optionally save to disk.
@@ -534,7 +554,8 @@ def load_protein(struct_file: Union[str, Path],
 def write_protein(pdb_file: TextIO,
                   protein: Union[mda.Universe, mda.AtomGroup, MolecularSystemBase],
                   name: str = None,
-                  conect: bool = False) -> None:
+                  conect: bool = False,
+                  frames: Union[int, str, ArrayLike, None] = 'all') -> None:
     """
     Helper function to write protein PDBs from MDAnalysis and MolSys objects.
 
@@ -546,6 +567,8 @@ def write_protein(pdb_file: TextIO,
         MDAnalysis or MolSys object to save
     name : str
         Name of the protein to put in the header
+    frames : int, str, ArrayLike or None
+        Frames of the trajectory/ensemble to save to file
     """
 
     # Change chain identifier if longer than 1
@@ -555,28 +578,21 @@ def write_protein(pdb_file: TextIO,
             seg.segid = next(available_segids)
 
     traj = protein.universe.trajectory
-
     pdb_file.write(f'HEADER {name}\n')
-    for mdl, ts in enumerate(traj):
-        pdb_file.write(f"MODEL {mdl}\n")
-        [
-            pdb_file.write(
-                fmt_str.format(
-                    atom.index,
-                    atom.name,
-                    atom.resname[:3],
-                    atom.segid,
-                    atom.resnum,
-                    *atom.position,
-                    1.00,
-                    1.0,
-                    atom.type,
-                )
-            )
-            for atom in protein.atoms
-        ]
-        pdb_file.write("TER\n")
-        pdb_file.write("ENDMDL\n")
+    frames = None if frames == 'all' and len(traj) == 1 else frames
+
+    if frames == 'all':
+        for mdl, ts in enumerate(traj):
+           write_frame(pdb_file, protein.atoms, mdl)
+    elif hasattr(frames, '__len__'):
+        for mdl, frame in enumerate(frames):
+            traj[frame]
+            write_frame(pdb_file, protein.atoms, mdl)
+    elif frames is not None:
+        traj[frames]
+        write_frame(pdb_file, protein.atoms)
+    else:
+        write_frame(pdb_file, protein.atoms)
 
     if conect:
         bonds = (protein.bonds.indices if hasattr(protein, 'bonds') else
@@ -587,7 +603,39 @@ def write_protein(pdb_file: TextIO,
             write_bonds(pdb_file, bonds)
 
 
-def write_ic(pdb_file: TextIO, ic: MolSysIC, conect: bool = None)-> None:
+def write_frame(pdb_file: TextIO, atoms, frame=None, coords=None):
+
+    if frame is not None:
+        pdb_file.write(f"MODEL {frame + 1}\n")
+
+    if coords is None:
+        coords = atoms.positions
+
+    [pdb_file.write(
+        fmt_str.format(
+            i + 1,
+            atom.name,
+            atom.resname[:3],
+            atom.segid,
+            atom.resnum,
+            *coord,
+            1.00,
+            1.0,
+            atom.type,
+        )
+    ) for i, (atom, coord) in enumerate(zip(atoms, coords))]
+
+    pdb_file.write("TER\n")
+
+    if frame is not None:
+        pdb_file.write(f"ENDMDL\n")
+
+
+def write_ic(pdb_file: TextIO,
+             ic: MolSysIC,
+             name: str = None,
+             conect: bool = None,
+             frames: Union[int, str, ArrayLike, None] = 'all')-> None:
     """
     Write a :class:`~MolSysIC` internal coordinates object to a pdb file.
 
@@ -597,14 +645,28 @@ def write_ic(pdb_file: TextIO, ic: MolSysIC, conect: bool = None)-> None:
         open file or io object.
     ic: MolSysIC
         chiLife internal coordinates object.
+    name : str
+        Name of the molecule to be used for the HEADER.
+    conect : bool, optional
+        Write PDB CONECT information to file.
+    frames : int, str, ArrayLike or None
+        Frames of the trajectory/ensemble to save to file
     """
-
-    pdb_file.write('MODEL\n')
-    for i, (atom, coord) in enumerate(zip(ic.atoms, ic.coords)):
-        pdb_file.write(fmt_str.format(i + 1, atom.name, atom.resname, atom.segid, atom.resnum,
-                                     coord[0], coord[1], coord[2],
-                                     1.0, 1.0, atom.type))
-    pdb_file.write('ENDMDL\n')
+    frames = None if frames == 'all' and len(ic.trajectory) == 1 else frames
+    pdb_file.write(f'HEADER {name}\n')
+    traj = ic.trajectory
+    if frames == 'all':
+        for mdl, ts in enumerate(traj):
+            write_frame(pdb_file, ic.atoms, mdl, ic.coords)
+    elif hasattr(frames, '__len__'):
+        for mdl, frame in enumerate(frames):
+            traj[frame]
+            write_frame(pdb_file, ic.atoms, mdl, ic.coords)
+    elif frames is not None:
+        traj[frames]
+        write_frame(pdb_file, ic.atoms, coords=ic.coords)
+    else:
+        write_frame(pdb_file, ic.atoms, coords=ic.coords)
 
     if conect:
         bonds = ic.topology.bonds

diff --git a/src/chilife/protein_utils.py b/src/chilife/protein_utils.py
@@ -476,7 +476,7 @@ def mutate(
         protein: MDAnalysis.Universe,
         *ensembles: 'RotamerEnsemble',
         add_missing_atoms: bool = True,
-        random_rotamers: bool = False,
+        rotamer_index: Union[int, str, None] = None,
 ) -> MDAnalysis.Universe:
     """Create a new Universe where the native residue is replaced with the highest probability rotamer from a
     RotamerEnsemble or SpinLabel object.
@@ -487,8 +487,10 @@ def mutate(
         An MDA Universe object containing protein to be spin labeled
     ensembles : RotamerEnsemble, SpinLabel
         Precomputed RotamerEnsemble or SpinLabel object to use for selecting and replacing the spin native amino acid
-    random_rotamers :bool
-        Randomize rotamer conformations
+    rotamer_index : int, str, None
+        Index of the rotamer to be used for the mutation. If None, the most probable rotamer will be used. If 'all',
+        mutate will return an ensemble with each rotamer, if random, mutate will return an ensemble with a random
+        rotamer.
     add_missing_atoms : bool
         Model side chains missing atoms if they are not present in the provided structure.
 
@@ -614,15 +616,29 @@ def mutate(
     new_other_atoms = U.select_atoms(f"not ({label_selstr})")
     new_other_atoms.atoms.positions = other_atoms.atoms.positions
 
-    # Apply most probable spin label coordinates to spin label atoms
-    for spin_label in label_sites.values():
-        sl_atoms = U.select_atoms(spin_label.selstr)
-        if random_rotamers:
-            sl_atoms.atoms.positions = spin_label.coords[
-                np.random.choice(len(spin_label.coords), p=spin_label.weights)
-            ]
-        else:
-            sl_atoms.atoms.positions = spin_label.coords[np.argmax(spin_label.weights)]
+    if rotamer_index == 'all':
+        max_label_len = max([len(label) for label in label_sites.values()])
+        coordinates = np.array([U.atoms.positions.copy() for _ in range(max_label_len)])
+        U.load_new(coordinates)
+
+        for i, ts in enumerate(U.trajectory):
+            for spin_label in label_sites.values():
+                sl_atoms = U.select_atoms(spin_label.selstr)
+                if len(spin_label) <= i:
+                    sl_atoms.positions = spin_label.coords[-1]
+                else:
+                    sl_atoms.positions = spin_label.coords[i]
+
+    else:
+        for spin_label in label_sites.values():
+            sl_atoms = U.select_atoms(spin_label.selstr)
+            if rotamer_index == 'random':
+                rand_idx = np.random.choice(len(spin_label.coords), p=spin_label.weights)
+                sl_atoms.atoms.positions = spin_label.coords[rand_idx]
+            elif isinstance(rotamer_index, int):
+                sl_atoms.atoms.positions = spin_label.coords[rotamer_index]
+            else:
+                sl_atoms.atoms.positions = spin_label.coords[np.argmax(spin_label.weights)]
 
     return U
 

diff --git a/tests/test_MolSys.py b/tests/test_MolSys.py
@@ -288,15 +288,15 @@ def test_xl_protein_mutate():
     SL = chilife.SpinLabel('R1M', 238, p)
     pSL = chilife.mutate(p, SL)
     pSL._fname = 'test_mutate'
-    chilife.save('test_mutate.pdb', pSL)
+    chilife.save('mutate_xlprotein.pdb', pSL)
 
     with open('test_data/mutate_xlprotein.pdb', 'r') as f:
         ans = hashlib.md5(f.read().encode("utf-8")).hexdigest()
 
-    with open("test_mutate.pdb", "r") as f:
+    with open("mutate_xlprotein.pdb", "r") as f:
         test = hashlib.md5(f.read().encode("utf-8")).hexdigest()
 
-    os.remove('test_mutate.pdb')
+    os.remove('mutate_xlprotein.pdb')
     assert ans == test
 
 

diff --git a/tests/test_MolSysIC.py b/tests/test_MolSysIC.py
@@ -76,15 +76,15 @@ def test_save_pdb():
     protein = mda.Universe("test_data/alphabetical_peptide.pdb").select_atoms("protein")
 
     uni_ics = xl.MolSysIC.from_atoms(protein)
-    xl.save("test_data/postwrite_alphabet_peptide.pdb", uni_ics)
+    xl.save("alphabetical_peptide.pdb", uni_ics)
 
-    with open("test_data/postwrite_alphabet_peptide.pdb", "r") as f:
+    with open("alphabetical_peptide.pdb", "r") as f:
         test = hashlib.md5(f.read().encode("utf-8")).hexdigest()
 
     with open("test_data/alphabetical_peptide.pdb", "r") as f:
         truth = hashlib.md5(f.read().encode("utf-8")).hexdigest()
 
-    os.remove("test_data/postwrite_alphabet_peptide.pdb")
+    os.remove("alphabetical_peptide.pdb")
     assert test == truth
 
 

diff --git a/tests/test_ProteinUtils.py b/tests/test_ProteinUtils.py
@@ -155,6 +155,41 @@ def test_mutate5():
     PPIIm = chilife.mutate(PPII, TOC)
     assert PPIIm.residues[-1].resname == 'NHH'
 
+def test_mutate6():
+    SL = chilife.SpinLabel("R1C", site=28, protein=ubq)
+    labeled_protein = chilife.mutate(ubq, SL, rotamer_index=10)
+
+    # Make sure the newly mutated protein has the coordinates of the rotamer selected
+    np.testing.assert_allclose(labeled_protein.select_atoms(SL.selstr).positions, SL.coords[10])
+
+    # Make sure that is not the same as the first rotamer.
+    assert np.max(labeled_protein.select_atoms(SL.selstr).positions - SL.coords[0]) > 1
+
+def test_mutate7():
+    SL = chilife.SpinLabel("R1C", site=28, protein=ubq)
+    labeled_protein = chilife.mutate(ubq, SL, rotamer_index='all')
+
+    assert len(labeled_protein.trajectory) == len(SL)
+
+    label_atoms =labeled_protein.select_atoms(SL.selstr)
+    for i, ts in enumerate(labeled_protein.trajectory):
+        np.testing.assert_allclose(label_atoms.positions, SL.coords[i])
+
+def test_mutate8():
+    SL1 =  chilife.SpinLabel("R1C", site=28, protein=ubq)
+    SL2 = chilife.RotamerEnsemble("LYS", site=48, protein=ubq)
+    labeled_protein = chilife.mutate(ubq, SL1, SL2, rotamer_index='all')
+
+    assert len(labeled_protein.trajectory) == max(len(SL1), len(SL2))
+
+    for i, ts in enumerate(labeled_protein.trajectory):
+        for SL in  (SL1, SL2):
+            label_atoms = labeled_protein.select_atoms(SL.selstr)
+            if len(SL) <= i:
+                np.testing.assert_allclose(label_atoms.positions, SL.coords[-1])
+            else:
+                np.testing.assert_allclose(label_atoms.positions, SL.coords[i])
+
 
 def test_add_missing_atoms():
     protein = mda.Universe("test_data/1omp.pdb", in_memory=True).select_atoms("protein")