From 95f0707f5e293e5e513861da8dffc803e0769f70 Mon Sep 17 00:00:00 2001 From: Michael Himes Date: Fri, 1 Mar 2019 10:27:17 -0500 Subject: [PATCH 1/6] Updated CR to allow any number of disconnected regions, and any number of percentiles. Defaults to 68% and 95% CRs. --- MCcubed/mc/driver.py | 18 ++++--- MCcubed/mc/mcmc.py | 106 ++++++++++++++++++++++++++++----------- MCcubed/plots/mcplots.py | 50 ++++++++++-------- MCcubed/utils/mcutils.py | 69 +++++++++++++++++-------- 4 files changed, 165 insertions(+), 78 deletions(-) diff --git a/MCcubed/mc/driver.py b/MCcubed/mc/driver.py index f33b24d..7924bf7 100644 --- a/MCcubed/mc/driver.py +++ b/MCcubed/mc/driver.py @@ -30,8 +30,8 @@ def mcmc(data=None, uncert=None, func=None, indparams=None, plots=None, ioff=None, showbp=None, savefile=None, savemodel=None, resume=None, rms=None, log=None, pnames=None, texnames=None, - full_output=None, chireturn=None, - cfile=None, parname=None): + full_output=None, chireturn=None, percentile=None, + cfile=None, parname=None): """ MCMC driver routine to execute a Markov-chain Monte Carlo run. @@ -143,8 +143,6 @@ def mcmc(data=None, uncert=None, func=None, indparams=None, If True, calculate the RMS of data-bestmodel. log: String or Log object. Filename to store screen outputs. - cfile: String - Configuration file name. pnames: 1D string ndarray List of parameter names (including fixed and shared parameters) to display on output screen and figures. See also texnames. @@ -153,11 +151,16 @@ def mcmc(data=None, uncert=None, func=None, indparams=None, texnames: 1D string iterable Parameter names for figures, which may use latex syntax. If not defined, default to pnames. - parname: 1D string ndarray - Deprecated. Use pnames instead. full_output: Bool If True, return the full posterior sample, including the burned-in iterations. + chireturn: FINDME + percentile: list, floats + Percentile(s) to report credible region(s). + cfile: String + Configuration file name. + parname: 1D string ndarray + Deprecated. Use pnames instead. Returns ------- @@ -433,6 +436,9 @@ def parse(): help="If True, return chi-squared, red. chi-squared," "the chi-squared rescaling factor, and the BIC" " [default: %(default)s]") + group.add_argument("--percentile", dest="percentile", action="store", + type=mu.parray, default=[0.6827, 0.9545], + help="Percentile(s) to report credible region(s).") group.add_argument("--parname", dest="parname", action="store", type=mu.parray, default=None, help="Deprecated, see pnames.") diff --git a/MCcubed/mc/mcmc.py b/MCcubed/mc/mcmc.py index aee3ce0..1cc29b6 100644 --- a/MCcubed/mc/mcmc.py +++ b/MCcubed/mc/mcmc.py @@ -39,7 +39,7 @@ def mcmc(data, uncert=None, func=None, indparams=[], plots=False, ioff=False, showbp=True, savefile=None, savemodel=None, resume=False, rms=False, log=None, pnames=None, texnames=None, - full_output=False, chireturn=False, + full_output=False, chireturn=False, percentile=[0.6827, 0.9545], parname=None): """ This beautiful piece of code runs a Markov-chain Monte Carlo algorithm. @@ -155,6 +155,8 @@ def mcmc(data, uncert=None, func=None, indparams=[], iterations. chireturn: Bool If True, include chi-squared statistics in the return. + percentile: list, floats + Percentile(s) to report credible region(s). parname: 1D string ndarray Deprecated, use pnames. @@ -162,12 +164,13 @@ def mcmc(data, uncert=None, func=None, indparams=[], ------- bestp: 1D ndarray Array of the best-fitting parameters (including fixed and shared). - CRlo: 1D ndarray - The lower boundary of the marginal 68%-highest posterior density - (the credible region) for each parameter, with respect to bestp. - CRhi: 1D ndarray - The upper boundary of the marginal 68%-highest posterior density - (the credible region) for each parameter, with respect to bestp. + creg: list of strings + The posterior credible regions corresponding to the percentiles specified + by `percentile`. Format is e.g., '(lo_1, hi_1) U (lo_2, hi_2)' for a CR + composed of two disconnected regions. + creg[i] gives the i-th parameter's CRs. + creg[i][j] gives the i-th parameter's j-th CR, corresponding to the j-th + `percentile` value. stdp: 1D ndarray Array of the best-fitting parameter uncertainties, calculated as the standard deviation of the marginalized, thinned, burned-in posterior. @@ -612,19 +615,15 @@ def mcmc(data, uncert=None, func=None, indparams=[], format(numaccept.value*100.0/nsample), indent=2) # Compute the credible region for each parameter: - CRlo = np.zeros(nparams) - CRhi = np.zeros(nparams) + CR = [] # Holds boundaries of all regions making up the CRs + creg = [] # Holds a string representation, '[lo_1, hi_1] U [lo_2, hi_2] U ...' pdf = [] xpdf = [] for i in range(nfree): - PDF, Xpdf, HPDmin = mu.credregion(posterior[:,i]) + PDF, XPDF, regions = mu.credregion(posterior[:,i], percentile) pdf.append(PDF) - xpdf.append(Xpdf) - CRlo[ifree[i]] = np.amin(Xpdf[PDF>HPDmin]) - CRhi[ifree[i]] = np.amax(Xpdf[PDF>HPDmin]) - # CR relative to the best-fitting value: - CRlo[ifree] -= bestp[ifree] - CRhi[ifree] -= bestp[ifree] + xpdf.append(XPDF) + CR.append(regions) # Get the mean and standard deviation from the posterior: meanp = np.zeros(nparams, np.double) # Parameters mean @@ -635,27 +634,74 @@ def mcmc(data, uncert=None, func=None, indparams=[], bestp[s] = bestp[-int(stepsize[s])-1] meanp[s] = meanp[-int(stepsize[s])-1] stdp [s] = stdp [-int(stepsize[s])-1] - CRlo [s] = CRlo [-int(stepsize[s])-1] - CRhi [s] = CRhi [-int(stepsize[s])-1] - log.msg("\nParam name Best fit Lo HPD CR Hi HPD CR Mean Std dev S/N" - "\n----------- ----------------------------------- ---------------------- ---------", width=80) + log.msg("\nParam name Best fit Mean Std dev S/N" + "\n----------- ------------ ---------------------- ---------", width=80) for i in range(nparams): snr = "{:.1f}". format(np.abs(bestp[i])/stdp[i]) mean = "{: 11.4e}".format(meanp[i]) - lo = "{: 11.4e}".format(CRlo[i]) - hi = "{: 11.4e}".format(CRhi[i]) if i in ifree: # Free-fitting value - pass + icr = np.where(ifree == i)[0][0] + # Format each CR as '(lo1, hi1) U (lo2, hi2) U ... U (lon, hin)' + creg.append([' U '.join(['(' + \ + ', '.join(["{:10.4e}".format(CR[icr][j][k][l]) + for l in range(len(CR[icr][j][k]))]) + ')' + for k in range(len(CR[icr][j]))]) + for j in range(len(CR[icr]))]) elif i in ishare: # Shared value snr = "[share{:02d}]".format(-int(stepsize[i])) + creg.append('') # No CRs for shared values else: # Fixed value snr = "[fixed]" mean = "{: 11.4e}".format(bestp[i]) - log.msg("{:<11s} {:11.4e} {:>11s} {:>11s} {:>11s} {:10.4e} {:>9s}". - format(pnames[i][0:11], bestp[i], lo, hi, mean, stdp[i], snr), + creg.append('') # No CRs for fixed values + # Print all info except CRs + log.msg("{:<11s} {:11.4e} {:>11s} {:10.4e} {:>9s}". + format(pnames[i][0:11], bestp[i], mean, stdp[i], snr), width=160) - + # Print CRs + log.msg("\nParam name Credible Region" + "\n----------- " + "------------------------------------------------------------------", + width=80) + for i in ifree: + for p in range(len(percentile)): + # If more than 2 disconnected regions, split it into multiple lines + numlines = int(np.ceil((creg[i][p].count(' U ') + 1) / 2.)) + for r in range(numlines): + if p == 0 and r == 0: + if numlines == 1: + log.msg("{:<11s} {:>6s}%: {:<56s}". + format(pnames[i][0:11], str(100*percentile[p]), + ' U '.join(creg[i][p].split(' U ')[:2])), + width=80) + else: + log.msg("{:<11s} {:>6s}%: {:<56s}". + format(pnames[i][0:11], str(100*percentile[p]), + ' U '.join(creg[i][p].split(' U ')[:2]) + ' U'), + width=80) + elif r == 0: + if numlines == 1: + log.msg("{:<11s} {:>6s}%: {:<56s}". + format("", str(100*percentile[p]), + ' U '.join(creg[i][p].split(' U ')[:2])), + width=80) + else: + log.msg("{:<11s} {:>6s}%: {:<56s}". + format("", str(100*percentile[p]), + ' U '.join(creg[i][p].split(' U ')[:2]) + ' U'), + width=80) + else: + if r == numlines-1: + log.msg("{:<11s} {:>6s} {:<56s}". + format("", "", + ' U '.join(creg[i][p].split(' U ')[2*r:2*r+2])), + width=80) + else: + log.msg("{:<11s} {:>6s} {:<56s}". + format("", "", + ' U '.join(creg[i][p].split(' U ')[2*r:2*r+2]) + ' U'), + width=80) if leastsq and bestchisq.value-fitchisq < -3e-8: np.set_printoptions(precision=8) log.warning("MCMC found a better fit than the minimizer:\n" @@ -680,7 +726,7 @@ def mcmc(data, uncert=None, func=None, indparams=[], # Save definitive results: if savefile is not None: np.savez(savefile, bestp=bestp, Z=Z, Zchain=Zchain, Zchisq=Zchisq, - CRlo=CRlo, CRhi=CRhi, stdp=stdp, meanp=meanp, + CR=creg, stdp=stdp, meanp=meanp, bestchisq=bestchisq.value, redchisq=redchisq, chifactor=chifactor, BIC=BIC, sdr=sdr, numaccept=numaccept.value) #if savemodel is not None: @@ -714,10 +760,10 @@ def mcmc(data, uncert=None, func=None, indparams=[], # Histograms: mp.histogram(posterior, pnames=texnames[ifree], bestp=bestfreepars, savefile=fname+"_posterior.png", - percentile=0.683, pdf=pdf, xpdf=xpdf) + CR=CR, pdf=pdf, xpdf=xpdf) # RMS vs bin size: if rms: - mp.RMS(bs, RMS, stderr, RMSlo, RMShi, binstep=len(bs)//500+1, + mp.RMS(bs, RMS, stderr, RMSlo, RMShi, binstep=len(bs)//500 + 1, savefile=fname+"_RMS.png") # Sort of guessing that indparams[0] is the X array for data as in y=y(x): if (indparams != [] and @@ -734,7 +780,7 @@ def mcmc(data, uncert=None, func=None, indparams=[], log.close() # Build the output tuple: - output = bestp, CRlo, CRhi, stdp + output = bestp, creg, stdp if full_output: output += (Z, Zchain) diff --git a/MCcubed/plots/mcplots.py b/MCcubed/plots/mcplots.py index a22dc65..a174850 100644 --- a/MCcubed/plots/mcplots.py +++ b/MCcubed/plots/mcplots.py @@ -295,9 +295,9 @@ def pairwise(posterior, pnames=None, thinning=1, fignum=200, return axes, cb -def histogram(posterior, pnames=None, thinning=1, fignum=300, - savefile=None, bestp=None, percentile=None, pdf=None, - xpdf=None, ranges=None, axes=None, lw=2.0, fs=11): +def histogram(posterior, pnames=None, thinning=1, fignum=300, + savefile=None, bestp=None, CR=None, pdf=None, + xpdf=None, ranges=None, axes=None, lw=2.0, fs=11): """ Plot parameter marginal posterior distributions @@ -317,10 +317,10 @@ def histogram(posterior, pnames=None, thinning=1, fignum=300, bestp: 1D float ndarray If not None, plot the best-fitting values for each parameter given by bestp. - percentile: Float - If not None, plot the percentile- highest posterior density region - of the distribution. Note that this should actually be the - fractional part, i.e. set percentile=0.68 for a 68% HPD. + CR: list of list of strings. + Credible region boundaries. + CR[i] gives a list of the i-th parameter's credible region boundaries. + CR[i][j] gives a string of the i-th parameter's j-th credible region boundaries. pdf: 1D float ndarray or list of ndarrays A smoothed PDF of the distribution for each parameter. xpdf: 1D float ndarray or list of ndarrays @@ -358,7 +358,7 @@ def histogram(posterior, pnames=None, thinning=1, fignum=300, hkw = {'edgecolor':'navy', 'color':'b'} # Bestfit keywords: bkw = {'zorder':2, 'color':'orange'} - if percentile is not None: + if CR is not None: hkw = {'histtype':'step', 'lw':lw, 'edgecolor':'b'} bkw = {'zorder':-1, 'color':'red'} @@ -409,21 +409,27 @@ def histogram(posterior, pnames=None, thinning=1, fignum=300, ax.set_xlabel(pnames[i], size=fs) vals, bins, h = ax.hist(posterior[0::thinning, i], bins=25, range=ranges[i], normed=False, zorder=0, **hkw) - # Plot HPD region: - if percentile is not None: - PDF, Xpdf, HPDmin = mu.credregion(posterior[:,i], percentile, - pdf[i], xpdf[i]) + # Plot HPD region(s): + if CR is not None and pdf is not None and xpdf is not None: + # Setup to interpolate xpdf into the histogram vals = np.r_[0, vals, 0] - bins = np.r_[bins[0] - (bins[1]-bins[0]), bins] - # interpolate xpdf into the histogram: - f = si.interp1d(bins+0.5*(bins[1]-bins[0]), vals, kind='nearest') - # Plot the HPD region as shaded areas: - if ranges[i] is not None: - xran = np.argwhere((Xpdf>ranges[i][0]) & (Xpdf=HPDmin, - facecolor='0.75', edgecolor='none', interpolate=False, zorder=-2) + bins = np.r_[ bins[0] - (bins[1]-bins[0]), bins] + f = si.interp1d(bins+0.5 * (bins[1]-bins[0]), vals, kind='nearest') + # Plot the credible region(s) as shaded areas: + # Note reverse ordering is to allow overplotting of lower percentiles + for k in range(len(CR[i])-1, 0-1, -1): + if ranges[i] is not None: + xran = np.argwhere((xpdf>ranges[i][0]) & (xpdf=CR[i][k][r][0])*(xpdf[i]<=CR[i][k][r][1]), + facecolor=str(0.5 + 0.25*k/(len(CR[i])-1)), + edgecolor='none', interpolate=False, zorder=-2) + elif CR is not None or pdf is not None or xpdf is not None: + print("CR, pdf, and xpdf must all be specified to plot CRs.") + print("Correct this and try again.") if bestp is not None: ax.axvline(bestp[i], dashes=(7,4), lw=1.0, **bkw) maxylim = np.amax((maxylim, ax.get_ylim()[1])) diff --git a/MCcubed/utils/mcutils.py b/MCcubed/utils/mcutils.py index 52ecb2a..59eb908 100644 --- a/MCcubed/utils/mcutils.py +++ b/MCcubed/utils/mcutils.py @@ -253,7 +253,9 @@ def isfile(input, iname, log, dtype, unpack=True, notnone=False): return load(ifile) -def credregion(posterior=None, percentile=0.6827, pdf=None, xpdf=None): +def credregion(posterior= None, percentile= [0.6827, 0.9545], + pdf = None, xpdf = None, + lims = (None,None), numpts = 200): """ Compute a smoothed posterior density distribution and the minimum density for a given percentile of the highest posterior density. @@ -264,13 +266,18 @@ def credregion(posterior=None, percentile=0.6827, pdf=None, xpdf=None): ---------- posterior: 1D float ndarray A posterior distribution. - percentile: Float + percentile: 1D float ndarray, list, or float. The percentile (actually the fraction) of the credible region. A value in the range: (0, 1). pdf: 1D float ndarray A smoothed-interpolated PDF of the posterior distribution. xpdf: 1D float ndarray The X location of the pdf values. + lims: tuple, floats + Minimum and maximum allowed values for posterior. Should only be used if + there are physically-imposed limits. + numpts: int. + Number of points to use when calculating the PDF. Returns ------- @@ -278,8 +285,12 @@ def credregion(posterior=None, percentile=0.6827, pdf=None, xpdf=None): A smoothed-interpolated PDF of the posterior distribution. xpdf: 1D float ndarray The X location of the pdf values. - HPDmin: Float - The minimum density in the percentile-HPD region. + regions: list of 2D float ndarrays + The values of the credible regions specified by `percentile`. + regions[0] corresponds to percentile[0], etc. + regions[0][0] gives the start and stop values of the first region of the CR + regions[0][1] gives the second CR start/stop values, if the CR is composed + of disconnected regions Example ------- @@ -294,34 +305,52 @@ def credregion(posterior=None, percentile=0.6827, pdf=None, xpdf=None): >>> pdf, xpdf, HPDmin = credregion(pdf=pdf, xpdf=xpdf, percentile=0.9545) >>> print(np.amin(xpdf[pdf>HPDmin]), np.amax(xpdf[pdf>HPDmin])) """ + # Make sure `percentile` is a list or array + if type(percentile) == float: + percentile = np.array([percentile]) if pdf is None and xpdf is None: - # Thin if posterior has too many samples (> 120k): - thinning = np.amax([1, int(np.size(posterior)/120000)]) # Compute the posterior's PDF: - kernel = stats.gaussian_kde(posterior[::thinning]) - # Remove outliers: - mean = np.mean(posterior) - std = np.std(posterior) - k = 6 - lo = np.amax([mean-k*std, np.amin(posterior)]) - hi = np.amin([mean+k*std, np.amax(posterior)]) + kernel = stats.gaussian_kde(posterior) # Use a Gaussian kernel density estimate to trace the PDF: - x = np.linspace(lo, hi, 100) # Interpolate-resample over finer grid (because kernel.evaluate # is expensive): + lo = np.amin(posterior) + hi = np.amax(posterior) + x = np.linspace(lo, hi, numpts) f = si.interp1d(x, kernel.evaluate(x)) - xpdf = np.linspace(lo, hi, 3000) + xpdf = np.linspace(lo, hi, 100*numpts) pdf = f(xpdf) # Sort the PDF in descending order: ip = np.argsort(pdf)[::-1] # Sorted CDF: cdf = np.cumsum(pdf[ip]) - # Indices of the highest posterior density: - iHPD = np.where(cdf >= percentile*cdf[-1])[0][0] - # Minimum density in the HPD region: - HPDmin = np.amin(pdf[ip][0:iHPD]) - return pdf, xpdf, HPDmin + + # List to hold boundaries of CRs + # List is used because a given CR may be multiple disconnected regions + regions = [] + # Find boundary for each specified percentile + for i in range(len(percentile)): + # Indices of the highest posterior density: + iHPD = np.where(cdf >= percentile[i]*cdf[-1])[0][0] + # Minimum density in the HPD region: + HPDmin = np.amin(pdf[ip][0:iHPD]) + # Find the contiguous areas of the PDF greater than or equal to HPDmin + HPDbool = pdf >= HPDmin + idiff = np.diff(HPDbool) # True where HPDbool changes T to F or F to T + iregion, = idiff.nonzero() # Indexes of Trues. Note , because returns tuple + # Check boundaries + if HPDbool[0]: + iregion = np.insert(iregion, 0, -1) # This -1 is changed to 0 below when + if HPDbool[-1]: # correcting start index for regions + iregion = np.append(iregion, len(HPDbool)-1) + # Reshape into 2 columns of start/end indices + iregion.shape = (-1, 2) + # Add 1 to start of each region due to np.diff() functionality + iregion[:,0] += 1 + regions.append(xpdf[iregion]) + + return pdf, xpdf, regions def default_parnames(npars): From b93a00b5cf476f58213647d0ef0eec0e921c059c Mon Sep 17 00:00:00 2001 From: Michael Himes Date: Fri, 1 Mar 2019 10:29:45 -0500 Subject: [PATCH 2/6] Updated examples --- examples/demo01/demo01.py | 2 +- examples/tutorial01/tutorial01.py | 2 +- examples/tutorial02/tutorial02.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/demo01/demo01.py b/examples/demo01/demo01.py index 7347c4e..c7a20b4 100644 --- a/examples/demo01/demo01.py +++ b/examples/demo01/demo01.py @@ -34,7 +34,7 @@ params = np.array([10.0, -2.0, 0.1]) # Initial guess of fitting params. stepsize = np.array([0.03, 0.03, 0.05]) -bestp, CRlo, CRhi, stdp, posterior, Zchain = mc3.mcmc(data, uncert, +bestp, CR, stdp, posterior, Zchain = mc3.mcmc(data, uncert, func=quad, indparams=[x], params=params, stepsize=stepsize, nsamples=1e5, burnin=1000) diff --git a/examples/tutorial01/tutorial01.py b/examples/tutorial01/tutorial01.py index d321433..74b7f0f 100644 --- a/examples/tutorial01/tutorial01.py +++ b/examples/tutorial01/tutorial01.py @@ -119,7 +119,7 @@ fepsilon = 0.0 # Jump scale factor for DEMC's "e" distribution # Run the MCMC: -bestp, CRlo, CRhi, stdp, posterior, Zchain = mc3.mcmc(data=data, +bestp, CR, stdp, posterior, Zchain = mc3.mcmc(data=data, uncert=uncert, func=func, indparams=indparams, params=params, pmin=pmin, pmax=pmax, stepsize=stepsize, prior=prior, priorlow=priorlow, priorup=priorup, diff --git a/examples/tutorial02/tutorial02.py b/examples/tutorial02/tutorial02.py index f8a05cb..5e81b76 100644 --- a/examples/tutorial02/tutorial02.py +++ b/examples/tutorial02/tutorial02.py @@ -102,7 +102,7 @@ # Run the MCMC: -bestp, CRlo, CRhi, stdp, posterior, Zchain = mc3.mcmc(data=data, +bestp, CR, stdp, posterior, Zchain = mc3.mcmc(data=data, func=func, indparams=indparams, params=params, walk=walk, nsamples=nsamples, nchains=nchains, From 06535d4352e303cf979bcc108fc80d701589d4c6 Mon Sep 17 00:00:00 2001 From: Michael Himes Date: Mon, 4 Mar 2019 14:34:33 -0500 Subject: [PATCH 3/6] Implemented allowance of limits on the kernel density estimation --- MCcubed/utils/mcutils.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/MCcubed/utils/mcutils.py b/MCcubed/utils/mcutils.py index 59eb908..ffbf063 100644 --- a/MCcubed/utils/mcutils.py +++ b/MCcubed/utils/mcutils.py @@ -255,7 +255,7 @@ def isfile(input, iname, log, dtype, unpack=True, notnone=False): def credregion(posterior= None, percentile= [0.6827, 0.9545], pdf = None, xpdf = None, - lims = (None,None), numpts = 200): + lims = (None,None), numpts = 100): """ Compute a smoothed posterior density distribution and the minimum density for a given percentile of the highest posterior density. @@ -314,8 +314,14 @@ def credregion(posterior= None, percentile= [0.6827, 0.9545], # Use a Gaussian kernel density estimate to trace the PDF: # Interpolate-resample over finer grid (because kernel.evaluate # is expensive): - lo = np.amin(posterior) - hi = np.amax(posterior) + if lims[0] is not None: + lo = min(np.amin(posterior), lims[0]) + else: + lo = np.amin(posterior) + if lims[1] is not None: + hi = max(np.amax(posterior), lims[1]) + else: + hi = np.amax(posterior) x = np.linspace(lo, hi, numpts) f = si.interp1d(x, kernel.evaluate(x)) xpdf = np.linspace(lo, hi, 100*numpts) From 4d32979939bb84783fbcbc7efab28a5563539c2a Mon Sep 17 00:00:00 2001 From: Michael Himes Date: Mon, 4 Mar 2019 16:51:21 -0500 Subject: [PATCH 4/6] Reworked the CR update to maintain backwards compatibility --- MCcubed/mc/mcmc.py | 79 ++++++++++--------------------- MCcubed/plots/mcplots.py | 51 +++++++++++++------- MCcubed/utils/mcutils.py | 9 ++-- examples/demo01/demo01.py | 2 +- examples/tutorial01/tutorial01.py | 2 +- examples/tutorial02/tutorial02.py | 2 +- 6 files changed, 70 insertions(+), 75 deletions(-) diff --git a/MCcubed/mc/mcmc.py b/MCcubed/mc/mcmc.py index 1cc29b6..1759a63 100644 --- a/MCcubed/mc/mcmc.py +++ b/MCcubed/mc/mcmc.py @@ -617,13 +617,16 @@ def mcmc(data, uncert=None, func=None, indparams=[], # Compute the credible region for each parameter: CR = [] # Holds boundaries of all regions making up the CRs creg = [] # Holds a string representation, '[lo_1, hi_1] U [lo_2, hi_2] U ...' + CRlo = [] + CRhi = [] pdf = [] xpdf = [] for i in range(nfree): - PDF, XPDF, regions = mu.credregion(posterior[:,i], percentile) - pdf.append(PDF) - xpdf.append(XPDF) - CR.append(regions) + PDF, Xpdf, crlo, crhi = mu.credregion(posterior[:,i], percentile) + pdf .append(PDF) + xpdf.append(Xpdf) + CRlo.append(crlo) + CRhi.append(crhi) # Get the mean and standard deviation from the posterior: meanp = np.zeros(nparams, np.double) # Parameters mean @@ -636,72 +639,42 @@ def mcmc(data, uncert=None, func=None, indparams=[], stdp [s] = stdp [-int(stepsize[s])-1] log.msg("\nParam name Best fit Mean Std dev S/N" - "\n----------- ------------ ---------------------- ---------", width=80) + "\n----------- ------------ ---------------------- ---------", + width=80) for i in range(nparams): snr = "{:.1f}". format(np.abs(bestp[i])/stdp[i]) mean = "{: 11.4e}".format(meanp[i]) if i in ifree: # Free-fitting value icr = np.where(ifree == i)[0][0] # Format each CR as '(lo1, hi1) U (lo2, hi2) U ... U (lon, hin)' - creg.append([' U '.join(['(' + \ - ', '.join(["{:10.4e}".format(CR[icr][j][k][l]) - for l in range(len(CR[icr][j][k]))]) + ')' - for k in range(len(CR[icr][j]))]) - for j in range(len(CR[icr]))]) + creg.append([' U '.join(['({:10.4e}, {:10.4e})'.format(CRlo[icr][j][k], + CRhi[icr][j][k]) + for k in range(len(CRlo[icr][j]))]) + for j in range(len(CRlo[icr]))]) elif i in ishare: # Shared value snr = "[share{:02d}]".format(-int(stepsize[i])) - creg.append('') # No CRs for shared values + creg.append('Shared') # No CRs for shared values else: # Fixed value snr = "[fixed]" mean = "{: 11.4e}".format(bestp[i]) - creg.append('') # No CRs for fixed values + creg.append('Fixed') # No CRs for fixed values # Print all info except CRs log.msg("{:<11s} {:11.4e} {:>11s} {:10.4e} {:>9s}". format(pnames[i][0:11], bestp[i], mean, stdp[i], snr), width=160) # Print CRs log.msg("\nParam name Credible Region" - "\n----------- " + "\n----------- " "------------------------------------------------------------------", width=80) for i in ifree: for p in range(len(percentile)): - # If more than 2 disconnected regions, split it into multiple lines - numlines = int(np.ceil((creg[i][p].count(' U ') + 1) / 2.)) - for r in range(numlines): - if p == 0 and r == 0: - if numlines == 1: - log.msg("{:<11s} {:>6s}%: {:<56s}". - format(pnames[i][0:11], str(100*percentile[p]), - ' U '.join(creg[i][p].split(' U ')[:2])), - width=80) - else: - log.msg("{:<11s} {:>6s}%: {:<56s}". - format(pnames[i][0:11], str(100*percentile[p]), - ' U '.join(creg[i][p].split(' U ')[:2]) + ' U'), - width=80) - elif r == 0: - if numlines == 1: - log.msg("{:<11s} {:>6s}%: {:<56s}". - format("", str(100*percentile[p]), - ' U '.join(creg[i][p].split(' U ')[:2])), - width=80) - else: - log.msg("{:<11s} {:>6s}%: {:<56s}". - format("", str(100*percentile[p]), - ' U '.join(creg[i][p].split(' U ')[:2]) + ' U'), - width=80) - else: - if r == numlines-1: - log.msg("{:<11s} {:>6s} {:<56s}". - format("", "", - ' U '.join(creg[i][p].split(' U ')[2*r:2*r+2])), - width=80) - else: - log.msg("{:<11s} {:>6s} {:<56s}". - format("", "", - ' U '.join(creg[i][p].split(' U ')[2*r:2*r+2]) + ' U'), - width=80) + if p == 0: + log.msg("{:<11s} {:>6s}%: {:<56s}". + format(pnames[i][0:11], str(100*percentile[p]), creg[i][p])) + else: + log.msg("{:<11s} {:>6s}%: {:<56s}". + format("", str(100*percentile[p]), creg[i][p])) if leastsq and bestchisq.value-fitchisq < -3e-8: np.set_printoptions(precision=8) log.warning("MCMC found a better fit than the minimizer:\n" @@ -726,7 +699,7 @@ def mcmc(data, uncert=None, func=None, indparams=[], # Save definitive results: if savefile is not None: np.savez(savefile, bestp=bestp, Z=Z, Zchain=Zchain, Zchisq=Zchisq, - CR=creg, stdp=stdp, meanp=meanp, + CRlo=CRlo, CRhi=CRhi, stdp=stdp, meanp=meanp, bestchisq=bestchisq.value, redchisq=redchisq, chifactor=chifactor, BIC=BIC, sdr=sdr, numaccept=numaccept.value) #if savemodel is not None: @@ -759,8 +732,8 @@ def mcmc(data, uncert=None, func=None, indparams=[], savefile=fname+"_pairwise.png") # Histograms: mp.histogram(posterior, pnames=texnames[ifree], bestp=bestfreepars, - savefile=fname+"_posterior.png", - CR=CR, pdf=pdf, xpdf=xpdf) + savefile=fname+"_posterior.png", percentile=percentile, + CRlo=CRlo, CRhi=CRhi, pdf=pdf, xpdf=xpdf) # RMS vs bin size: if rms: mp.RMS(bs, RMS, stderr, RMSlo, RMShi, binstep=len(bs)//500 + 1, @@ -780,7 +753,7 @@ def mcmc(data, uncert=None, func=None, indparams=[], log.close() # Build the output tuple: - output = bestp, creg, stdp + output = bestp, CRlo, CRhi, stdp if full_output: output += (Z, Zchain) diff --git a/MCcubed/plots/mcplots.py b/MCcubed/plots/mcplots.py index a174850..684107b 100644 --- a/MCcubed/plots/mcplots.py +++ b/MCcubed/plots/mcplots.py @@ -295,9 +295,10 @@ def pairwise(posterior, pnames=None, thinning=1, fignum=200, return axes, cb -def histogram(posterior, pnames=None, thinning=1, fignum=300, - savefile=None, bestp=None, CR=None, pdf=None, - xpdf=None, ranges=None, axes=None, lw=2.0, fs=11): +def histogram(posterior, pnames=None, thinning=1, fignum=300, + savefile=None, bestp=None, percentile=None, pdf=None, + xpdf=None, ranges=None, axes=None, lw=2.0, fs=11, + CRlo=None, CRhi=None): """ Plot parameter marginal posterior distributions @@ -317,10 +318,11 @@ def histogram(posterior, pnames=None, thinning=1, fignum=300, bestp: 1D float ndarray If not None, plot the best-fitting values for each parameter given by bestp. - CR: list of list of strings. - Credible region boundaries. - CR[i] gives a list of the i-th parameter's credible region boundaries. - CR[i][j] gives a string of the i-th parameter's j-th credible region boundaries. + percentile: list of floats + If not None, plot the percentile- highest posterior density region + of the distribution. Note that this should actually be the + fractional part, i.e. set percentile=0.68 for a 68% HPD, or + percentile=[0.68, 0.95] for both 68% and 95% HPD. pdf: 1D float ndarray or list of ndarrays A smoothed PDF of the distribution for each parameter. xpdf: 1D float ndarray or list of ndarrays @@ -334,6 +336,12 @@ def histogram(posterior, pnames=None, thinning=1, fignum=300, Linewidth of the histogram contour. fs: Float Font size for texts. + CRlo: list of arrays of floats + CRlo[i] gives the i-th percentile's lower boundaries of the HPD region, + which may be disconnected. If not specified, it will be calculated. + CRhi: list of arrays of floats + CRhi[i] gives the i-th percentile's upper boundaries of the HPD region, + which may be disconnected. If not specified, it will be calculated. Returns ------- @@ -358,7 +366,7 @@ def histogram(posterior, pnames=None, thinning=1, fignum=300, hkw = {'edgecolor':'navy', 'color':'b'} # Bestfit keywords: bkw = {'zorder':2, 'color':'orange'} - if CR is not None: + if percentile is not None: hkw = {'histtype':'step', 'lw':lw, 'edgecolor':'b'} bkw = {'zorder':-1, 'color':'red'} @@ -410,26 +418,37 @@ def histogram(posterior, pnames=None, thinning=1, fignum=300, vals, bins, h = ax.hist(posterior[0::thinning, i], bins=25, range=ranges[i], normed=False, zorder=0, **hkw) # Plot HPD region(s): - if CR is not None and pdf is not None and xpdf is not None: + if percentile is not None: + # Calculate CRlo and CRhi if not specified + if (CRlo is None) or (CRhi is None): + PDF = [] + xpdf = [] + CRlo = [] + CRhi = [] + for p in range(len(percentile)): + Pdf, Xpdf, crlo, crhi = mu.credregion(posterior[:,i], percentile, + pdf[i], xpdf[i]) + PDF .append(Pdf) + xpdf.append(Xpdf) + CRlo.append(crlo) + CRhi.append(crhi) # Setup to interpolate xpdf into the histogram vals = np.r_[0, vals, 0] bins = np.r_[ bins[0] - (bins[1]-bins[0]), bins] f = si.interp1d(bins+0.5 * (bins[1]-bins[0]), vals, kind='nearest') # Plot the credible region(s) as shaded areas: # Note reverse ordering is to allow overplotting of lower percentiles - for k in range(len(CR[i])-1, 0-1, -1): + for k in range(len(CRlo[i])-1, 0-1, -1): if ranges[i] is not None: xran = np.argwhere((xpdf>ranges[i][0]) & (xpdf=CR[i][k][r][0])*(xpdf[i]<=CR[i][k][r][1]), - facecolor=str(0.5 + 0.25*k/(len(CR[i])-1)), + where=(xpdf[i]>=CRlo[i][k][r]) * \ + (xpdf[i]<=CRhi[i][k][r]), + facecolor=str(0.5 + 0.25*k/(len(CRlo[i])-1)), edgecolor='none', interpolate=False, zorder=-2) - elif CR is not None or pdf is not None or xpdf is not None: - print("CR, pdf, and xpdf must all be specified to plot CRs.") - print("Correct this and try again.") if bestp is not None: ax.axvline(bestp[i], dashes=(7,4), lw=1.0, **bkw) maxylim = np.amax((maxylim, ax.get_ylim()[1])) diff --git a/MCcubed/utils/mcutils.py b/MCcubed/utils/mcutils.py index ffbf063..ca9373c 100644 --- a/MCcubed/utils/mcutils.py +++ b/MCcubed/utils/mcutils.py @@ -334,7 +334,8 @@ def credregion(posterior= None, percentile= [0.6827, 0.9545], # List to hold boundaries of CRs # List is used because a given CR may be multiple disconnected regions - regions = [] + CRlo = [] + CRhi = [] # Find boundary for each specified percentile for i in range(len(percentile)): # Indices of the highest posterior density: @@ -354,9 +355,11 @@ def credregion(posterior= None, percentile= [0.6827, 0.9545], iregion.shape = (-1, 2) # Add 1 to start of each region due to np.diff() functionality iregion[:,0] += 1 - regions.append(xpdf[iregion]) + # Store the min and max of each (possibly disconnected) region + CRlo.append(xpdf[iregion[:,0]]) + CRhi.append(xpdf[iregion[:,1]]) - return pdf, xpdf, regions + return pdf, xpdf, CRlo, CRhi def default_parnames(npars): diff --git a/examples/demo01/demo01.py b/examples/demo01/demo01.py index c7a20b4..7347c4e 100644 --- a/examples/demo01/demo01.py +++ b/examples/demo01/demo01.py @@ -34,7 +34,7 @@ params = np.array([10.0, -2.0, 0.1]) # Initial guess of fitting params. stepsize = np.array([0.03, 0.03, 0.05]) -bestp, CR, stdp, posterior, Zchain = mc3.mcmc(data, uncert, +bestp, CRlo, CRhi, stdp, posterior, Zchain = mc3.mcmc(data, uncert, func=quad, indparams=[x], params=params, stepsize=stepsize, nsamples=1e5, burnin=1000) diff --git a/examples/tutorial01/tutorial01.py b/examples/tutorial01/tutorial01.py index 74b7f0f..d321433 100644 --- a/examples/tutorial01/tutorial01.py +++ b/examples/tutorial01/tutorial01.py @@ -119,7 +119,7 @@ fepsilon = 0.0 # Jump scale factor for DEMC's "e" distribution # Run the MCMC: -bestp, CR, stdp, posterior, Zchain = mc3.mcmc(data=data, +bestp, CRlo, CRhi, stdp, posterior, Zchain = mc3.mcmc(data=data, uncert=uncert, func=func, indparams=indparams, params=params, pmin=pmin, pmax=pmax, stepsize=stepsize, prior=prior, priorlow=priorlow, priorup=priorup, diff --git a/examples/tutorial02/tutorial02.py b/examples/tutorial02/tutorial02.py index 5e81b76..f8a05cb 100644 --- a/examples/tutorial02/tutorial02.py +++ b/examples/tutorial02/tutorial02.py @@ -102,7 +102,7 @@ # Run the MCMC: -bestp, CR, stdp, posterior, Zchain = mc3.mcmc(data=data, +bestp, CRlo, CRhi, stdp, posterior, Zchain = mc3.mcmc(data=data, func=func, indparams=indparams, params=params, walk=walk, nsamples=nsamples, nchains=nchains, From c42a6c0fb7eff3cef7772771901bdfd7419694ea Mon Sep 17 00:00:00 2001 From: Michael Himes Date: Mon, 4 Mar 2019 17:00:14 -0500 Subject: [PATCH 5/6] Allow specification of a single percentile value --- MCcubed/mc/driver.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MCcubed/mc/driver.py b/MCcubed/mc/driver.py index 7924bf7..7698660 100644 --- a/MCcubed/mc/driver.py +++ b/MCcubed/mc/driver.py @@ -292,6 +292,10 @@ def mcmc(data=None, uncert=None, func=None, indparams=None, args["indparams"] = mu.isfile(args["indparams"], 'indparams', log, 'bin', unpack=False) + # Make sure percentile is of the proper data type + if type(args["percentile"]) == float: + args["percentile"] = [percentile] + # Call MCMC: outputs = mc.mcmc(**args) From a28b3ba5495beba2e1f1e1b3108b85fd93e2fdbb Mon Sep 17 00:00:00 2001 From: Michael Himes Date: Mon, 4 Mar 2019 21:27:02 -0500 Subject: [PATCH 6/6] Removed CRlo and CRhi params from histogram() --- MCcubed/mc/mcmc.py | 2 +- MCcubed/plots/mcplots.py | 33 ++++++++++++--------------------- 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/MCcubed/mc/mcmc.py b/MCcubed/mc/mcmc.py index 1759a63..394bf01 100644 --- a/MCcubed/mc/mcmc.py +++ b/MCcubed/mc/mcmc.py @@ -733,7 +733,7 @@ def mcmc(data, uncert=None, func=None, indparams=[], # Histograms: mp.histogram(posterior, pnames=texnames[ifree], bestp=bestfreepars, savefile=fname+"_posterior.png", percentile=percentile, - CRlo=CRlo, CRhi=CRhi, pdf=pdf, xpdf=xpdf) + pdf=pdf, xpdf=xpdf) # RMS vs bin size: if rms: mp.RMS(bs, RMS, stderr, RMSlo, RMShi, binstep=len(bs)//500 + 1, diff --git a/MCcubed/plots/mcplots.py b/MCcubed/plots/mcplots.py index 684107b..6edf6a8 100644 --- a/MCcubed/plots/mcplots.py +++ b/MCcubed/plots/mcplots.py @@ -297,8 +297,7 @@ def pairwise(posterior, pnames=None, thinning=1, fignum=200, def histogram(posterior, pnames=None, thinning=1, fignum=300, savefile=None, bestp=None, percentile=None, pdf=None, - xpdf=None, ranges=None, axes=None, lw=2.0, fs=11, - CRlo=None, CRhi=None): + xpdf=None, ranges=None, axes=None, lw=2.0, fs=11): """ Plot parameter marginal posterior distributions @@ -336,12 +335,6 @@ def histogram(posterior, pnames=None, thinning=1, fignum=300, Linewidth of the histogram contour. fs: Float Font size for texts. - CRlo: list of arrays of floats - CRlo[i] gives the i-th percentile's lower boundaries of the HPD region, - which may be disconnected. If not specified, it will be calculated. - CRhi: list of arrays of floats - CRhi[i] gives the i-th percentile's upper boundaries of the HPD region, - which may be disconnected. If not specified, it will be calculated. Returns ------- @@ -419,19 +412,17 @@ def histogram(posterior, pnames=None, thinning=1, fignum=300, range=ranges[i], normed=False, zorder=0, **hkw) # Plot HPD region(s): if percentile is not None: - # Calculate CRlo and CRhi if not specified - if (CRlo is None) or (CRhi is None): - PDF = [] - xpdf = [] - CRlo = [] - CRhi = [] - for p in range(len(percentile)): - Pdf, Xpdf, crlo, crhi = mu.credregion(posterior[:,i], percentile, - pdf[i], xpdf[i]) - PDF .append(Pdf) - xpdf.append(Xpdf) - CRlo.append(crlo) - CRhi.append(crhi) + PDF = [] + xpdf = [] + CRlo = [] + CRhi = [] + for p in range(len(percentile)): + Pdf, Xpdf, crlo, crhi = mu.credregion(posterior[:,i], percentile, + pdf[i], xpdf[i]) + PDF .append(Pdf) + xpdf.append(Xpdf) + CRlo.append(crlo) + CRhi.append(crhi) # Setup to interpolate xpdf into the histogram vals = np.r_[0, vals, 0] bins = np.r_[ bins[0] - (bins[1]-bins[0]), bins]