diff --git a/CHANGES b/CHANGES index b24c60dd..311eeeb3 100644 --- a/CHANGES +++ b/CHANGES @@ -26,6 +26,14 @@ orbeckst, VOD555 invocation (#128) * Configuration setting mdrun.maxthreads now also applies to energy minimization with mdpow-equilibrium.py/mdpow.run() (#119) +* supported forcefield options in scripts (#123) +* supported multiple edr files input in fep.py (#132) +* supported alchemlyb estimators in analysis functions and scripts + (#133 #135) +* fixed: return fig in fep.py so plot function in scripts can access the + object (#129) +* support opls-aa tip4pd, amber tip4pew and cyclohexane, and charmm TIP5P + and cyclohexane solvent type (#141) 2017-05-02 0.6.1 diff --git a/mdpow/fep.py b/mdpow/fep.py index c833f055..753deb5a 100644 --- a/mdpow/fep.py +++ b/mdpow/fep.py @@ -950,6 +950,7 @@ def analyze(self, force=False, stride=None, autosave=True, ncorrel=25000): .. _p526: http://books.google.co.uk/books?id=XmyO2oRUg0cC&pg=PA526 """ stride = stride or self.stride + logger.info("Analysis stride is %s.",stride) if force or not self.has_dVdl(): try: @@ -1021,7 +1022,7 @@ def collect_alchemlyb(self, SI=True, start=0, stop=None, stride=None, autosave=T xvg_file = self.dgdl_xvg(self.wdir(component, l)) xvg_df = extract(xvg_file, T=self.Temperature).iloc[start:stop:stride] if SI: - logger.info("Perform statistical inefficiency analysis.") + logger.info("Performing statistical inefficiency analysis for window %s %04d" % (component, 1000 * l)) ts = _extract_dataframe(xvg_file).iloc[start:stop:stride] ts = pd.DataFrame({'time': ts.iloc[:,0], 'dhdl': ts.iloc[:,1]}) ts = ts.set_index('time') @@ -1044,6 +1045,10 @@ def analyze_alchemlyb(self, SI=True, start=0, stop=None, stride=None, force=Fals start = start or self.start stop = stop or self.stop + logger.info("Analysis stride is %s.",stride) + logger.info("Analysis starts from frame %s.",start) + logger.info("Analysis stops at frame %s.", stop) + if self.method in ['TI', 'BAR', 'MBAR']: estimator = self.estimators[self.method]['estimator'] else: @@ -1346,6 +1351,7 @@ def p_transfer(G1, G2, **kwargs): errmsg = "estimator = %r is not supported, must be 'mdpow' or 'alchemlyb'" % estimator logger.error(errmsg) raise ValueError(errmsg) + if G1.molecule != G2.molecule: raise ValueError("The two simulations were done for different molecules.") if G1.Temperature != G2.Temperature: @@ -1354,40 +1360,39 @@ def p_transfer(G1, G2, **kwargs): logger.info("[%s] transfer free energy %s --> %s calculation", G1.molecule, G1.solvent_type, G2.solvent_type) for G in (G1, G2): + G_kwargs = kwargs.copy() # for fep files generated with old code which doesn't have these attributes if not hasattr(G, 'start'): - G.start = kwargs.pop('start', 0) + G.start = G_kwargs.pop('start', 0) if not hasattr(G, 'stop'): - G.stop = kwargs.pop('stop', None) + G.stop = G_kwargs.pop('stop', None) if not hasattr(G, 'SI'): - kwargs.setdefault('SI', True) + G_kwargs.setdefault('SI', True) else: - kwargs.setdefault('SI', G.SI) + G_kwargs.setdefault('SI', G.SI) # for this version. use the method given instead of the one in the input cfg file - G.method = kwargs.pop('method', 'MBAR') - if kwargs['force']: - if estimator == 'mdpow': - G.start = kwargs.pop('start', 0) - G.stop = kwargs.pop('stop', None) - G.SI = kwargs.pop('stop', False) - G.analyze(**kwargs) - if G.method != "TI": - errmsg = "Method %s is not implemented in MDPOW, use estimator='alchemlyb'" % G.method - logger.error(errmsg) - raise ValueError(errmsg) - elif estimator == 'alchemlyb': - G.analyze_alchemlyb(**kwargs) + G.method = G_kwargs.pop('method', 'MBAR') + if estimator == 'mdpow': + if G.method != "TI": + errmsg = "Method %s is not implemented in MDPOW, use estimator='alchemlyb'" % G.method + logger.error(errmsg) + raise ValueError(errmsg) - try: - G.results.DeltaA.Gibbs - G.logger_DeltaA0() - except (KeyError, AttributeError): # KeyError because results is a AttributeDict - logger.warn("Must analyze simulation because no hydration free energy data available...") + if kwargs['force'] or (not hasattr(G.results.DeltaA, 'Gibbs')): + # write out the settings when the analysis is performed + logger.info("The solvent is %s .", G.solvent_type) + logger.info("Estimator is %s.", estimator) + logger.info("Free energy calculation method is %s.", G.method) if estimator == 'mdpow': - G.analyze(**kwargs) + G.analyze(**G_kwargs) elif estimator == 'alchemlyb': - G.analyze_alchemlyb(**kwargs) + if G_kwargs['SI']: + logger.info("Statistical inefficiency analysis will be performed.") + else: + logger.info("Statistical inefficiency analysis won't be performed.") + G.analyze_alchemlyb(**G_kwargs) + # x.Gibbs are QuantityWithError so they do error propagation transferFE = G2.results.DeltaA.Gibbs - G1.results.DeltaA.Gibbs # note minus sign, with our convention of the free energy difference to be diff --git a/scripts/mdpow-pow b/scripts/mdpow-pow index 211f74fc..3fd198b3 100755 --- a/scripts/mdpow-pow +++ b/scripts/mdpow-pow @@ -189,7 +189,7 @@ if __name__ == "__main__": parser.add_option('-p', '--plotfile', dest="plotfile", help="plot dV/dlambda to FILE; use png or pdf suffix to " "determine the file type. 'auto' generates a pdf file " - "DIRECTORY/dVdl_MOLID_pow.pdf and 'None' disables it [%default]", + "DIRECTORY/dVdl_MOLID_pow.pdf and 'None' disables it [%default]" "The plot function is only available for mdpow estimator," "and is disabled when using alchemlyb estimators.", metavar="FILE") @@ -208,8 +208,8 @@ if __name__ == "__main__": help="force rereading all data [%default]") parser.add_option('--noSI', dest='SI', action="store_false", - help="Disable statistical inefficiency analysis." - "Statitical inefficiency analysis is performed by default when using" + help="Disable statistical inefficiency analysis" + "Statistical inefficiency analysis is performed by default when using" "alchemlyb estimators and is disabled when using mdpow estimator.") parser.add_option('-s', '--stride', dest="stride", type='int', help="Use every N-th datapoint from the original dV/dlambda data. " diff --git a/scripts/mdpow-solvationenergy b/scripts/mdpow-solvationenergy index 13d9dd36..33bb334f 100755 --- a/scripts/mdpow-solvationenergy +++ b/scripts/mdpow-solvationenergy @@ -1,5 +1,5 @@ #!/usr/bin/env python -"""%prog [options] DIRECTORY [DIRECTORY ...] +"""%(prog)s [options] DIRECTORY [DIRECTORY ...] Run the free energy analysis for a solvent in /FEP and return DeltaG. @@ -104,31 +104,66 @@ def run_gsolv(solvent, directory, **kwargs): return _datastatus[g.contains_corrupted_xvgs()] gsolv = load_gsolv(solvent, directory, permissive=kwargs.pop('permissive',False),) + logger.info("The solvent is %s.", solvent) + + # for this version. use the method given instead of the one in the input cfg file + estimator = kwargs.pop('estimator', 'alchemlyb') + if not estimator in ('mdpow', 'alchemlyb'): + errmsg = "estimator = %r is not supported, must be 'mdpow' or 'alchemlyb'" % estimator + logger.error(errmsg) + raise ValueError(errmsg) + + gsolv.method = kwargs.pop('method', 'MBAR') + if estimator == 'mdpow': + if gsolv.method != "TI": + errmsg = "Method %s is not implemented in MDPOW, use estimator='alchemlyb'" % gsolv.method + logger.error(errmsg) + raise ValueError(errmsg) + + energyfile = kwargs.pop('energyfile', None) + plotfile = kwargs.pop('plotfile', None) + + if not hasattr(gsolv, 'start'): + gsolv.start = kwargs.pop('start', 0) + if not hasattr(gsolv, 'stop'): + gsolv.stop = kwargs.pop('stop', None) + if not hasattr(gsolv, 'SI'): + kwargs.setdefault('SI', True) + else: + kwargs.setdefault('SI', gsolv.SI) - # do the analysis (only use keywords that are needed for FEP.analysis()) - akw = {'stride': kwargs.pop('stride', None), 'force': kwargs.pop('force', False)} - if akw['force']: - logger.info("[%(directory)s] Forcing (re)analysis of hydration free energy data", vars()) - gsolv.analyze(**akw) # make sure that we have data - try: - gsolv.results.DeltaA.Gibbs + if kwargs['force'] or (not hasattr(gsolv.results.DeltaA, 'Gibbs')): + logger.info("[%(directory)s] Forcing (re)analysis of hydration free energy data", vars()) + # write out the settings when the analysis is performed + logger.info("Estimator is %s.", estimator) + logger.info("Free energy calculation method is %s.", gsolv.method) + if estimator == 'mdpow': + # do the analysis (only use keywords that are needed for FEP.analysis()) + akw = {'stride': kwargs.pop('stride', None), 'force': kwargs.pop('force', False)} + gsolv.analyze(**akw) + elif estimator == 'alchemlyb': + if kwargs['SI']: + logger.info("Statistical inefficiency analysis will be performed.") + else: + logger.info("Statistical inefficiency analysis won't be performed.") + gsolv.analyze_alchemlyb(**kwargs) + else: logger.info("[%(directory)s] Reading existing data from pickle file.", vars()) - gsolv.logger_DeltaA0() - except (KeyError, AttributeError): # KeyError because results is a AttributeDict - logger.info("[%(directory)s] Analyzing %(solvent)s solvation free energy data for the first time", vars()) - gsolv.analyze(**akw) + + gsolv.logger_DeltaA0() if datstat(gsolv) == 'BAD': logger.warning("[%s] Possible file corruption: %s:%s", directory, solvent, datstat(gsolv)) - if kwargs.get('energyfile', None): - with mdpow.filelock.FileLock(kwargs['energyfile'], timeout=2): - with open(kwargs['energyfile'], mode='a') as out: + if energyfile: + with mdpow.filelock.FileLock(energyfile, timeout=2): + with open(energyfile, mode='a') as out: out.write(fmt_energy % (gsolv.summary(), directory)) - logger.info("[%s] Wrote solvation energy terms to %r", directory, kwargs['energyfile']) + logger.info("[%s] Wrote solvation energy terms to %r", directory, energyfile) - plotfile = kwargs.get('plotfile', None) + if estimator == 'alchemlyb': + plotfile = False if plotfile: if plotfile == 'auto': plotfile = auto_plotfile(directory, gsolv) @@ -179,58 +214,85 @@ def realpath_or_none(fn): if __name__ == "__main__": import sys import os.path - from optparse import OptionParser - - parser = OptionParser(usage=__doc__) - parser.add_option('-p', '--plotfile', dest="plotfile", - help="plot dV/dlambda to FILE; use png or pdf suffix to " - "determine the file type. 'auto' generates a pdf file " - "DIRECTORY/dVdl__.pdf and 'None' disables it [%default]", - metavar="FILE") - parser.add_option('-e', '--energies', dest="energyfile", - help="append solvation free energies to FILE [%default]", - metavar="FILE") - parser.add_option('--force', dest='force', - action="store_true", - help="force rereading all data [%default]") - parser.add_option('-s', '--stride', dest="stride", type='int', - help="Use every N-th datapoint from the original dV/dlambda data. " - "Using a number larger than 1 can significantly reduce memory usage " - "with little impact on the precision of the final output. [%default]", - metavar="N") - parser.add_option('--solvent', '-S', dest='solvent', - metavar="NAME", - help="solvent NAME for compound, 'water', 'octanol', or 'cyclohexane' [%default]") - parser.add_option('--ignore-corrupted', dest="permissive", - action="store_true", - help="skip lines in the md.xvg files that cannot be parsed. " - "WARNING: Other lines in the file might have been corrupted in " - "such a way that they appear correct but are in fact wrong. " - "WRONG RESULTS CAN OCCUR! USE AT YOUR OWN RISK [%default]") - parser.set_defaults(plotfile="auto", - energyfile="gsolv.txt", - force=False, stride=10, permissive=False, solvent='water') - opts,args = parser.parse_args() - - if len(args) == 0: - logger.fatal("A directory is required. See --help.") - sys.exit(1) - elif len(args) > 1 and not opts.plotfile.lower() in ('none', 'auto'): + import argparse + + parser = argparse.ArgumentParser(usage=__doc__, prog='mdpow-solvationenergy', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("directory", nargs='+', + help="directory or directories which contain all the files " + "resulting from running mdpow.fep.Ghyd.setup() ",) + parser.add_argument('--plotfile', dest="plotfile", default='none', + help="plot dV/dlambda to FILE; use png or pdf suffix to " + "determine the file type. 'auto' generates a pdf file " + "DIRECTORY/dVdl__.pdf and 'None' disables it " + "The plot function is only available for mdpow estimator," + "and is disabled when using alchemlyb estimators.", + metavar="FILE") + parser.add_argument('--solvent', '-S', dest='solvent', default='water', + choices=['water', 'octanol', 'cyclohexane'], metavar="NAME", + help="solvent NAME for compound, 'water', 'octanol', or 'cyclohexane' ") + parser.add_argument('-o', '--outfile', dest="outfile", default="gsolv.txt", + help="append one-line results summary to FILE ", + metavar="FILE") + parser.add_argument('-e', '--energies', dest="energyfile", default="energies.txt", + help="append solvation free energies to FILE ", + metavar="FILE") + parser.add_argument('--estimator', dest="estimator", default='alchemlyb', + choices=['mdpow', 'alchemlyb'], + help="choose the estimator to be used, alchemlyb or mdpow estimators") + parser.add_argument('--method', dest="method", default='MBAR', + choices=['TI', 'MBAR', 'BAR'], + help="choose the method to calculate free energy ",) + parser.add_argument('--force', dest='force', default=False, + action="store_true", + help="force rereading all data ") + parser.add_argument('--SI', dest='SI', + action="store_true", default=True, + help="enable statistical inefficiency (SI) analysis. " + "Statistical inefficiency analysis is performed by default when using" + "alchemlyb estimators and is always disabled when using mdpow estimator.") + parser.add_argument('--no-SI', dest='SI', + action="store_false", default=True, + help="disable statistical inefficiency analysis. " + "Statistical inefficiency analysis is performed by default when using" + "alchemlyb estimators and is disabled when using mdpow estimator. ") + parser.add_argument('-s', '--stride', dest="stride", type=int, default=1, + help="use every N-th datapoint from the original dV/dlambda data. ", + metavar="N") + parser.add_argument('--start', dest="start", type=int, default=0, + help="start point for the data used from the original dV/dlambda data. ") + parser.add_argument('--stop', dest="stop", type=int, default=None, + help="stop point for the data used from the original dV/dlambda data. ",) + parser.add_argument('--ignore-corrupted', dest="permissive", + action="store_true", default=False, + help="skip lines in the md.xvg files that cannot be parsed. " + "WARNING: Other lines in the file might have been corrupted in " + "such a way that they appear correct but are in fact wrong. " + "WRONG RESULTS CAN OCCUR! USE AT YOUR OWN RISK ") + opts = parser.parse_args() + + if len(opts.directory) > 1 and not opts.plotfile.lower() in ('none', 'auto'): logger.fatal("Can only use --plotfile=None or --plotfile=auto with multiple directories.") sys.exit(1) - for directory in args: + if opts.estimator == 'mdpow' and opts.SI: + logger.fatal("Statistical inefficiency analysis is only available for estimator 'alchemlyb'.") + sys.exit(1) + + for directory in opts.directory: if not os.path.exists(directory): logger.warn("Directory %r not found, skipping...", directory) continue logger.info("Analyzing directory %r... (can take a while)", directory) - opts.plotfile = parse_filename_option(opts.plotfile) + plotfile = parse_filename_option(opts.plotfile) try: - run_gsolv(opts.solvent, directory, plotfile=opts.plotfile, - energyfile=realpath_or_none(opts.energyfile), - force=opts.force, stride=opts.stride, permissive=opts.permissive) + run_gsolv(opts.solvent, directory, plotfile=plotfile, + energyfile=realpath_or_none(opts.energyfile), + force=opts.force, stride=opts.stride, start=opts.start, + stop=opts.stop, estimator=opts.estimator, + method=opts.method, permissive=opts.permissive, SI=opts.SI) except (OSError, IOError), err: logger.error("Running analysis in directory %r failed: %s", directory, str(err)) except Exception, err: