Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store initial and final topologies for all phases -- small molecule pipeline #1210

Merged
merged 10 commits into from
Jul 31, 2023
70 changes: 62 additions & 8 deletions perses/app/relative_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,9 @@ def __init__(self,
else:
_logger.info("Skipping counterion")

# Store phase topology and positions in PDB file
self._store_phase_topologies(phase="complex")


if 'solvent' in phases:
_logger.info(f"Detected solvent...")
Expand Down Expand Up @@ -521,6 +524,9 @@ def __init__(self,
else:
_logger.info("Skipping counterion")

# Store phase topology and positions in PDB file
self._store_phase_topologies(phase="solvent")

if 'vacuum' in phases:
_logger.info(f"Detected solvent...")
# need to change nonbonded cutoff and remove barostat for vacuum leg
Expand Down Expand Up @@ -586,6 +592,8 @@ def __init__(self,
self._vacuum_forward_neglected_angles = self._geometry_engine.forward_neglected_angle_terms
self._vacuum_reverse_neglected_angles = self._geometry_engine.reverse_neglected_angle_terms
self._vacuum_geometry_engine = copy.deepcopy(self._geometry_engine)
# Store phase topology and positions in PDB file
self._store_phase_topologies(phase="vacuum")

def _setup_complex_phase(self):
"""
Expand Down Expand Up @@ -847,7 +855,6 @@ def _solvate_system(self, topology, positions, model='tip3p',phase='complex', bo
The parameterized system, containing a barostat if one was specified.
"""
# DEBUG: Write PDB file being fed into Modeller to check why MOL isn't being matched
from simtk.openmm.app import PDBFile
modeller = app.Modeller(topology, positions)
# retaining protein protonation from input files
#hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name not in ['MOL','OLD','NEW']]
Expand Down Expand Up @@ -881,13 +888,6 @@ def _solvate_system(self, topology, positions, model='tip3p',phase='complex', bo
solvated_system = self._system_generator.create_system(solvated_topology)
_logger.info(f"\tSystem parameterized")

if self._trajectory_directory is not None and self._trajectory_prefix is not None:
pdb_filename = AnyPath(f"{self._trajectory_directory}/{self._trajectory_prefix}-{phase}.pdb")
with open(pdb_filename, 'w') as outfile:
PDBFile.writeFile(solvated_topology, solvated_positions, outfile)
else:
_logger.info('Both trajectory_directory and trajectory_prefix need to be provided to save .pdb')

return solvated_topology, solvated_positions, solvated_system

def _handle_charge_changes(self, topology_proposal, new_positions):
Expand Down Expand Up @@ -922,6 +922,60 @@ def _handle_charge_changes(self, topology_proposal, new_positions):
# modify the topology proposal
modify_atom_classes(new_water_indices_to_ionize, topology_proposal)

def _store_phase_topologies(self, phase: str):
"""
Stores topologies and positions in PDB file for the given the phase. Stores both solvent and solute
whenever possible (only "solute" in vacuum).

Notes
-----
- The topologies and positions are stored in the specified directory using the global trajectory prefix.
- The directory will be created if it doesn't already exist.
- The topologies and positions are stored as PDB files in the "models" subdirectory of the trajectory directory.
- Both the trajectory directory and trajectory prefix need to be provided in order to store the topologies.
- If any of the directories or files already exist, they will get overwritten.

Usage
-----
To store the topologies and positions, make sure to set the `trajectory_directory` and `trajectory_prefix`
attributes before calling this method.
"""
from openmm.app import PDBFile
if self._trajectory_directory is not None and self._trajectory_prefix is not None:
if phase == "complex":
topology_proposal = self.complex_topology_proposal
old_positions = self.complex_old_positions
new_positions = self.complex_new_positions
elif phase == "solvent":
topology_proposal = self.solvent_topology_proposal
old_positions = self.solvent_old_positions
new_positions = self.solvent_new_positions
elif phase == "vacuum":
topology_proposal = self.vacuum_topology_proposal
old_positions = self.vacuum_old_positions
new_positions = self.vacuum_new_positions

topologies_to_store = {
f"{phase}_old": {"topology": topology_proposal.old_topology,
"positions": old_positions},
f"{phase}_new": {"topology": topology_proposal.new_topology,
"positions": new_positions},
}
# Store in "models" subdir -- create if doesn't exist
models_path = f"{self._trajectory_directory}/models"
if not os.path.exists(models_path):
os.makedirs(models_path)
for phase_key, phase_data in topologies_to_store.items():
pdb_filename = AnyPath(f"{models_path}/{self._trajectory_prefix}_{phase_key}.pdb")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for keeping this an AnyPath

with open(pdb_filename, 'w') as outfile:
PDBFile.writeFile(phase_data["topology"], phase_data["positions"], outfile)
else:
_logger.warning(
'Not storing topologies. Both trajectory_directory and trajectory_prefix arguments need to'
' be provided to store topology .pdb files.')



@property
def complex_topology_proposal(self):
return self._complex_topology_proposal
Expand Down
100 changes: 54 additions & 46 deletions perses/tests/test_relative_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from perses.app import setup_relative_calculation
import mdtraj as md
from openmmtools import states, alchemy, testsystems, cache
from unittest import skipIf
import pytest

from perses.tests.utils import enter_temp_directory
Expand All @@ -19,7 +18,6 @@
'lambda_electrostatics' : 'lambda',
}


def generate_example_waterbox_states(temperature=300.0*unit.kelvin, pressure=1.0*unit.atmosphere):
"""
This is a convenience function to generate a CompoundThermodynamicState and SamplerState to use in other tests.
Expand Down Expand Up @@ -260,47 +258,6 @@ def warn(*args, **kwargs):
found_tla = True
assert found_tla == True, 'Spectator TLA not in old topology'


def test_host_guest_deterministic_geometries():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this test being deleted?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It basically got replaced by the test at

def test_relative_setup_topologies_storage(tmp_path):

The diff between the two is shown in the following screenshot
perses_tests_diff

You can see only the forcefield was changed between the tests (since we want to be testing openff rather than gaff, preferably), and the lines to test the storage pdbs were added.

"""
execute the `RelativeFEPSetup` with geometries specified for a host-guest relative free energy pair
"""
from perses.app.relative_setup import RelativeFEPSetup

# Setup directory
ligand_sdf = resource_filename("perses", "data/given-geometries/ligands.sdf")
host_pdb = resource_filename("perses", "data/given-geometries/receptor.pdb")

setup = RelativeFEPSetup(
ligand_input = ligand_sdf,
old_ligand_index=0,
new_ligand_index=1,
forcefield_files = ['amber/ff14SB.xml','amber/tip3p_standard.xml','amber/tip3p_HFE_multivalent.xml'],
phases = ['complex', 'solvent', 'vacuum'],
protein_pdb_filename=host_pdb,
receptor_mol2_filename=None,
pressure=1.0 * unit.atmosphere,
temperature=300.0 * unit.kelvin,
solvent_padding=9.0 * unit.angstroms,
ionic_strength=0.15 * unit.molar,
hmass=3*unit.amus,
neglect_angles=False,
map_strength='default',
atom_expr=None,
bond_expr=None,
anneal_14s=False,
small_molecule_forcefield='gaff-2.11',
small_molecule_parameters_cache=None,
trajectory_directory=None,
trajectory_prefix=None,
spectator_filenames=None,
nonbonded_method = 'PME',
complex_box_dimensions=None,
solvent_box_dimensions=None,
remove_constraints=False,
use_given_geometries = True
)

def test_relative_setup_charge_change():
"""
execute `RelativeFEPSetup` in solvent/complex phase on a charge change and assert that the modified new system and old system charge difference is zero.
Expand Down Expand Up @@ -357,7 +314,6 @@ def test_relative_setup_solvent_padding():
Check that the user inputted solvent_padding argument to `RelativeFEPSetup` actually changes the padding for the solvent phase
"""
from perses.app.relative_setup import RelativeFEPSetup
import numpy as np

input_solvent_padding = 1.7 * unit.nanometers
smiles_filename = resource_filename("perses", os.path.join("data", "test.smi"))
Expand Down Expand Up @@ -409,6 +365,58 @@ def test_relative_setup_list_ligand_input():
assert ligand_input_size == expected_input_size, f"There should be {expected_input_size} ligand input files, " \
f"receiving {ligand_input_size} files."

def test_relative_setup_topologies_storage(tmp_path):
"""
Check that topologies get stored
"""
from perses.app.relative_setup import RelativeFEPSetup

# if __name__=="__main__":
# test_run_cdk2_iterations_repex()
# Setup directory
ligand_sdf = resource_filename("perses", "data/given-geometries/ligands.sdf")
host_pdb = resource_filename("perses", "data/given-geometries/receptor.pdb")

fe_setup = RelativeFEPSetup(
ligand_input=ligand_sdf,
old_ligand_index=0,
new_ligand_index=1,
forcefield_files=['amber/ff14SB.xml', 'amber/tip3p_standard.xml', 'amber/tip3p_HFE_multivalent.xml'],
phases=['complex', 'solvent', 'vacuum'],
protein_pdb_filename=host_pdb,
receptor_mol2_filename=None,
pressure=1.0 * unit.atmosphere,
temperature=300.0 * unit.kelvin,
solvent_padding=9.0 * unit.angstroms,
ionic_strength=0.15 * unit.molar,
hmass=3 * unit.amus,
neglect_angles=False,
map_strength='default',
atom_expr=None,
bond_expr=None,
anneal_14s=False,
small_molecule_forcefield='openff-2.1.0',
small_molecule_parameters_cache=None,
trajectory_directory=tmp_path,
trajectory_prefix="host-guest",
spectator_filenames=None,
nonbonded_method='PME',
complex_box_dimensions=None,
solvent_box_dimensions=None,
remove_constraints=False,
use_given_geometries=True
)

expected_files = [
'host-guest_vacuum_old.pdb',
'host-guest_vacuum_new.pdb',
'host-guest_solvent_old.pdb',
'host-guest_solvent_new.pdb',
'host-guest_complex_old.pdb',
'host-guest_complex_new.pdb',
]
for file in expected_files:
pdb_filename = f"{tmp_path}/models/{file}"
assert os.path.exists(pdb_filename)

# Assert that no extra files are created
files_in_directory = os.listdir(f"{tmp_path}/models")
assert len(files_in_directory) == len(expected_files)