Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow mdpow-solvationenergy to select alchemlyb estimators #148

Merged
merged 21 commits into from
Jun 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ orbeckst, VOD555
invocation (#128)
* Configuration setting mdrun.maxthreads now also applies to energy
minimization with mdpow-equilibrium.py/mdpow.run() (#119)
* supported forcefield options in scripts (#123)
* supported multiple edr files input in fep.py (#132)
* supported alchemlyb estimators in analysis functions and scripts
(#133 #135)
* fixed: return fig in fep.py so plot function in scripts can access the
object (#129)
* support opls-aa tip4pd, amber tip4pew and cyclohexane, and charmm TIP5P
and cyclohexane solvent type (#141)


2017-05-02 0.6.1
Expand Down
55 changes: 30 additions & 25 deletions mdpow/fep.py
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,7 @@ def analyze(self, force=False, stride=None, autosave=True, ncorrel=25000):
.. _p526: http://books.google.co.uk/books?id=XmyO2oRUg0cC&pg=PA526
"""
stride = stride or self.stride
logger.info("Analysis stride is %s.",stride)

if force or not self.has_dVdl():
try:
Expand Down Expand Up @@ -1021,7 +1022,7 @@ def collect_alchemlyb(self, SI=True, start=0, stop=None, stride=None, autosave=T
xvg_file = self.dgdl_xvg(self.wdir(component, l))
xvg_df = extract(xvg_file, T=self.Temperature).iloc[start:stop:stride]
if SI:
logger.info("Perform statistical inefficiency analysis.")
logger.info("Performing statistical inefficiency analysis for window %s %04d" % (component, 1000 * l))
ts = _extract_dataframe(xvg_file).iloc[start:stop:stride]
ts = pd.DataFrame({'time': ts.iloc[:,0], 'dhdl': ts.iloc[:,1]})
ts = ts.set_index('time')
Expand All @@ -1044,6 +1045,10 @@ def analyze_alchemlyb(self, SI=True, start=0, stop=None, stride=None, force=Fals
start = start or self.start
stop = stop or self.stop

logger.info("Analysis stride is %s.",stride)
logger.info("Analysis starts from frame %s.",start)
logger.info("Analysis stops at frame %s.", stop)

if self.method in ['TI', 'BAR', 'MBAR']:
estimator = self.estimators[self.method]['estimator']
else:
Expand Down Expand Up @@ -1346,6 +1351,7 @@ def p_transfer(G1, G2, **kwargs):
errmsg = "estimator = %r is not supported, must be 'mdpow' or 'alchemlyb'" % estimator
logger.error(errmsg)
raise ValueError(errmsg)

if G1.molecule != G2.molecule:
raise ValueError("The two simulations were done for different molecules.")
if G1.Temperature != G2.Temperature:
Expand All @@ -1354,40 +1360,39 @@ def p_transfer(G1, G2, **kwargs):
logger.info("[%s] transfer free energy %s --> %s calculation",
G1.molecule, G1.solvent_type, G2.solvent_type)
for G in (G1, G2):
G_kwargs = kwargs.copy()
# for fep files generated with old code which doesn't have these attributes
if not hasattr(G, 'start'):
G.start = kwargs.pop('start', 0)
G.start = G_kwargs.pop('start', 0)
if not hasattr(G, 'stop'):
G.stop = kwargs.pop('stop', None)
G.stop = G_kwargs.pop('stop', None)
if not hasattr(G, 'SI'):
kwargs.setdefault('SI', True)
G_kwargs.setdefault('SI', True)
else:
kwargs.setdefault('SI', G.SI)
G_kwargs.setdefault('SI', G.SI)

# for this version. use the method given instead of the one in the input cfg file
G.method = kwargs.pop('method', 'MBAR')
if kwargs['force']:
if estimator == 'mdpow':
G.start = kwargs.pop('start', 0)
G.stop = kwargs.pop('stop', None)
G.SI = kwargs.pop('stop', False)
G.analyze(**kwargs)
if G.method != "TI":
errmsg = "Method %s is not implemented in MDPOW, use estimator='alchemlyb'" % G.method
logger.error(errmsg)
raise ValueError(errmsg)
elif estimator == 'alchemlyb':
G.analyze_alchemlyb(**kwargs)
G.method = G_kwargs.pop('method', 'MBAR')
if estimator == 'mdpow':
if G.method != "TI":
errmsg = "Method %s is not implemented in MDPOW, use estimator='alchemlyb'" % G.method
logger.error(errmsg)
raise ValueError(errmsg)

try:
G.results.DeltaA.Gibbs
G.logger_DeltaA0()
except (KeyError, AttributeError): # KeyError because results is a AttributeDict
logger.warn("Must analyze simulation because no hydration free energy data available...")
if kwargs['force'] or (not hasattr(G.results.DeltaA, 'Gibbs')):
# write out the settings when the analysis is performed
logger.info("The solvent is %s .", G.solvent_type)
logger.info("Estimator is %s.", estimator)
logger.info("Free energy calculation method is %s.", G.method)
if estimator == 'mdpow':
G.analyze(**kwargs)
G.analyze(**G_kwargs)
elif estimator == 'alchemlyb':
G.analyze_alchemlyb(**kwargs)
if G_kwargs['SI']:
logger.info("Statistical inefficiency analysis will be performed.")
else:
logger.info("Statistical inefficiency analysis won't be performed.")
G.analyze_alchemlyb(**G_kwargs)

# x.Gibbs are QuantityWithError so they do error propagation
transferFE = G2.results.DeltaA.Gibbs - G1.results.DeltaA.Gibbs
# note minus sign, with our convention of the free energy difference to be
Expand Down
6 changes: 3 additions & 3 deletions scripts/mdpow-pow
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ if __name__ == "__main__":
parser.add_option('-p', '--plotfile', dest="plotfile",
help="plot dV/dlambda to FILE; use png or pdf suffix to "
"determine the file type. 'auto' generates a pdf file "
"DIRECTORY/dVdl_MOLID_pow.pdf and 'None' disables it [%default]",
"DIRECTORY/dVdl_MOLID_pow.pdf and 'None' disables it [%default]"
"The plot function is only available for mdpow estimator,"
"and is disabled when using alchemlyb estimators.",
metavar="FILE")
Expand All @@ -208,8 +208,8 @@ if __name__ == "__main__":
help="force rereading all data [%default]")
parser.add_option('--noSI', dest='SI',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename as --no-SI so that when we use argparse bool options, it has the same name.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To make things clear, also add

  parser.add_option('--SI', dest='SI', action="store_true", ...)

as the on switch. Set the default for SI to True.

action="store_false",
help="Disable statistical inefficiency analysis."
"Statitical inefficiency analysis is performed by default when using"
help="Disable statistical inefficiency analysis"
"Statistical inefficiency analysis is performed by default when using"
"alchemlyb estimators and is disabled when using mdpow estimator.")
parser.add_option('-s', '--stride', dest="stride", type='int',
help="Use every N-th datapoint from the original dV/dlambda data. "
Expand Down
180 changes: 121 additions & 59 deletions scripts/mdpow-solvationenergy
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
"""%prog [options] DIRECTORY [DIRECTORY ...]
"""%(prog)s [options] DIRECTORY [DIRECTORY ...]

Run the free energy analysis for a solvent in <DIRECTORY>/FEP
and return DeltaG.
Expand Down Expand Up @@ -104,31 +104,66 @@ def run_gsolv(solvent, directory, **kwargs):
return _datastatus[g.contains_corrupted_xvgs()]

gsolv = load_gsolv(solvent, directory, permissive=kwargs.pop('permissive',False),)
logger.info("The solvent is %s.", solvent)

# for this version. use the method given instead of the one in the input cfg file
estimator = kwargs.pop('estimator', 'alchemlyb')
if not estimator in ('mdpow', 'alchemlyb'):
errmsg = "estimator = %r is not supported, must be 'mdpow' or 'alchemlyb'" % estimator
logger.error(errmsg)
raise ValueError(errmsg)

gsolv.method = kwargs.pop('method', 'MBAR')
if estimator == 'mdpow':
if gsolv.method != "TI":
errmsg = "Method %s is not implemented in MDPOW, use estimator='alchemlyb'" % gsolv.method
logger.error(errmsg)
raise ValueError(errmsg)

energyfile = kwargs.pop('energyfile', None)
plotfile = kwargs.pop('plotfile', None)

if not hasattr(gsolv, 'start'):
gsolv.start = kwargs.pop('start', 0)
if not hasattr(gsolv, 'stop'):
gsolv.stop = kwargs.pop('stop', None)
orbeckst marked this conversation as resolved.
Show resolved Hide resolved
if not hasattr(gsolv, 'SI'):
kwargs.setdefault('SI', True)
else:
kwargs.setdefault('SI', gsolv.SI)

orbeckst marked this conversation as resolved.
Show resolved Hide resolved
# do the analysis (only use keywords that are needed for FEP.analysis())
akw = {'stride': kwargs.pop('stride', None), 'force': kwargs.pop('force', False)}
if akw['force']:
logger.info("[%(directory)s] Forcing (re)analysis of hydration free energy data", vars())
gsolv.analyze(**akw)
# make sure that we have data
try:
gsolv.results.DeltaA.Gibbs
if kwargs['force'] or (not hasattr(gsolv.results.DeltaA, 'Gibbs')):
logger.info("[%(directory)s] Forcing (re)analysis of hydration free energy data", vars())
# write out the settings when the analysis is performed
logger.info("Estimator is %s.", estimator)
logger.info("Free energy calculation method is %s.", gsolv.method)
if estimator == 'mdpow':
# do the analysis (only use keywords that are needed for FEP.analysis())
akw = {'stride': kwargs.pop('stride', None), 'force': kwargs.pop('force', False)}
gsolv.analyze(**akw)
elif estimator == 'alchemlyb':
if kwargs['SI']:
logger.info("Statistical inefficiency analysis will be performed.")
else:
logger.info("Statistical inefficiency analysis won't be performed.")
gsolv.analyze_alchemlyb(**kwargs)
else:
logger.info("[%(directory)s] Reading existing data from pickle file.", vars())
gsolv.logger_DeltaA0()
except (KeyError, AttributeError): # KeyError because results is a AttributeDict
logger.info("[%(directory)s] Analyzing %(solvent)s solvation free energy data for the first time", vars())
gsolv.analyze(**akw)

gsolv.logger_DeltaA0()

if datstat(gsolv) == 'BAD':
logger.warning("[%s] Possible file corruption: %s:%s", directory, solvent, datstat(gsolv))

if kwargs.get('energyfile', None):
with mdpow.filelock.FileLock(kwargs['energyfile'], timeout=2):
with open(kwargs['energyfile'], mode='a') as out:
if energyfile:
with mdpow.filelock.FileLock(energyfile, timeout=2):
with open(energyfile, mode='a') as out:
out.write(fmt_energy % (gsolv.summary(), directory))
logger.info("[%s] Wrote solvation energy terms to %r", directory, kwargs['energyfile'])
logger.info("[%s] Wrote solvation energy terms to %r", directory, energyfile)

plotfile = kwargs.get('plotfile', None)
if estimator == 'alchemlyb':
plotfile = False
orbeckst marked this conversation as resolved.
Show resolved Hide resolved
if plotfile:
if plotfile == 'auto':
plotfile = auto_plotfile(directory, gsolv)
Expand Down Expand Up @@ -179,58 +214,85 @@ def realpath_or_none(fn):
if __name__ == "__main__":
import sys
import os.path
from optparse import OptionParser

parser = OptionParser(usage=__doc__)
parser.add_option('-p', '--plotfile', dest="plotfile",
help="plot dV/dlambda to FILE; use png or pdf suffix to "
"determine the file type. 'auto' generates a pdf file "
"DIRECTORY/dVdl_<molname>_<solvent>.pdf and 'None' disables it [%default]",
metavar="FILE")
parser.add_option('-e', '--energies', dest="energyfile",
help="append solvation free energies to FILE [%default]",
metavar="FILE")
parser.add_option('--force', dest='force',
action="store_true",
help="force rereading all data [%default]")
parser.add_option('-s', '--stride', dest="stride", type='int',
help="Use every N-th datapoint from the original dV/dlambda data. "
"Using a number larger than 1 can significantly reduce memory usage "
"with little impact on the precision of the final output. [%default]",
metavar="N")
parser.add_option('--solvent', '-S', dest='solvent',
metavar="NAME",
help="solvent NAME for compound, 'water', 'octanol', or 'cyclohexane' [%default]")
parser.add_option('--ignore-corrupted', dest="permissive",
action="store_true",
help="skip lines in the md.xvg files that cannot be parsed. "
"WARNING: Other lines in the file might have been corrupted in "
"such a way that they appear correct but are in fact wrong. "
"WRONG RESULTS CAN OCCUR! USE AT YOUR OWN RISK [%default]")
parser.set_defaults(plotfile="auto",
energyfile="gsolv.txt",
force=False, stride=10, permissive=False, solvent='water')
opts,args = parser.parse_args()

if len(args) == 0:
logger.fatal("A directory is required. See --help.")
sys.exit(1)
elif len(args) > 1 and not opts.plotfile.lower() in ('none', 'auto'):
import argparse

parser = argparse.ArgumentParser(usage=__doc__, prog='mdpow-solvationenergy',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("directory", nargs='+',
help="directory or directories which contain all the files "
"resulting from running mdpow.fep.Ghyd.setup() ",)
parser.add_argument('--plotfile', dest="plotfile", default='none',
help="plot dV/dlambda to FILE; use png or pdf suffix to "
"determine the file type. 'auto' generates a pdf file "
"DIRECTORY/dVdl_<molname>_<solvent>.pdf and 'None' disables it "
"The plot function is only available for mdpow estimator,"
"and is disabled when using alchemlyb estimators.",
metavar="FILE")
parser.add_argument('--solvent', '-S', dest='solvent', default='water',
choices=['water', 'octanol', 'cyclohexane'], metavar="NAME",
help="solvent NAME for compound, 'water', 'octanol', or 'cyclohexane' ")
parser.add_argument('-o', '--outfile', dest="outfile", default="gsolv.txt",
help="append one-line results summary to FILE ",
metavar="FILE")
parser.add_argument('-e', '--energies', dest="energyfile", default="energies.txt",
help="append solvation free energies to FILE ",
metavar="FILE")
parser.add_argument('--estimator', dest="estimator", default='alchemlyb',
choices=['mdpow', 'alchemlyb'],
help="choose the estimator to be used, alchemlyb or mdpow estimators")
parser.add_argument('--method', dest="method", default='MBAR',
choices=['TI', 'MBAR', 'BAR'],
help="choose the method to calculate free energy ",)
parser.add_argument('--force', dest='force', default=False,
action="store_true",
help="force rereading all data ")
parser.add_argument('--SI', dest='SI',
action="store_true", default=True,
help="enable statistical inefficiency (SI) analysis. "
"Statistical inefficiency analysis is performed by default when using"
"alchemlyb estimators and is always disabled when using mdpow estimator.")
parser.add_argument('--no-SI', dest='SI',
action="store_false", default=True,
help="disable statistical inefficiency analysis. "
"Statistical inefficiency analysis is performed by default when using"
"alchemlyb estimators and is disabled when using mdpow estimator. ")
parser.add_argument('-s', '--stride', dest="stride", type=int, default=1,
help="use every N-th datapoint from the original dV/dlambda data. ",
metavar="N")
parser.add_argument('--start', dest="start", type=int, default=0,
help="start point for the data used from the original dV/dlambda data. ")
parser.add_argument('--stop', dest="stop", type=int, default=None,
help="stop point for the data used from the original dV/dlambda data. ",)
parser.add_argument('--ignore-corrupted', dest="permissive",
action="store_true", default=False,
help="skip lines in the md.xvg files that cannot be parsed. "
"WARNING: Other lines in the file might have been corrupted in "
"such a way that they appear correct but are in fact wrong. "
"WRONG RESULTS CAN OCCUR! USE AT YOUR OWN RISK ")
opts = parser.parse_args()

if len(opts.directory) > 1 and not opts.plotfile.lower() in ('none', 'auto'):
logger.fatal("Can only use --plotfile=None or --plotfile=auto with multiple directories.")
sys.exit(1)

for directory in args:
if opts.estimator == 'mdpow' and opts.SI:
logger.fatal("Statistical inefficiency analysis is only available for estimator 'alchemlyb'.")
sys.exit(1)

for directory in opts.directory:
if not os.path.exists(directory):
logger.warn("Directory %r not found, skipping...", directory)
continue
logger.info("Analyzing directory %r... (can take a while)", directory)

opts.plotfile = parse_filename_option(opts.plotfile)
plotfile = parse_filename_option(opts.plotfile)

try:
run_gsolv(opts.solvent, directory, plotfile=opts.plotfile,
energyfile=realpath_or_none(opts.energyfile),
force=opts.force, stride=opts.stride, permissive=opts.permissive)
run_gsolv(opts.solvent, directory, plotfile=plotfile,
energyfile=realpath_or_none(opts.energyfile),
force=opts.force, stride=opts.stride, start=opts.start,
stop=opts.stop, estimator=opts.estimator,
method=opts.method, permissive=opts.permissive, SI=opts.SI)
except (OSError, IOError), err:
logger.error("Running analysis in directory %r failed: %s", directory, str(err))
except Exception, err:
Expand Down