pcubillos · mdhimes · Mar 1, 2019 · Mar 1, 2019 · Mar 4, 2019 · Mar 4, 2019
diff --git a/MCcubed/mc/driver.py b/MCcubed/mc/driver.py
@@ -30,8 +30,8 @@ def mcmc(data=None,     uncert=None,     func=None,      indparams=None,
          plots=None,    ioff=None,       showbp=None,
          savefile=None, savemodel=None,  resume=None,
          rms=None,      log=None,        pnames=None,    texnames=None,
-         full_output=None, chireturn=None,
-         cfile=None,         parname=None):
+         full_output=None, chireturn=None, percentile=None,
+         cfile=None,    parname=None):
   """
   MCMC driver routine to execute a Markov-chain Monte Carlo run.
 
@@ -143,8 +143,6 @@ def mcmc(data=None,     uncert=None,     func=None,      indparams=None,
      If True, calculate the RMS of data-bestmodel.
   log: String or Log object.
      Filename to store screen outputs.
-  cfile: String
-     Configuration file name.
   pnames: 1D string ndarray
      List of parameter names (including fixed and shared parameters)
      to display on output screen and figures.  See also texnames.
@@ -153,11 +151,16 @@ def mcmc(data=None,     uncert=None,     func=None,      indparams=None,
   texnames: 1D string iterable
      Parameter names for figures, which may use latex syntax.
      If not defined, default to pnames.
-  parname: 1D string ndarray
-     Deprecated.  Use pnames instead.
   full_output:  Bool
      If True, return the full posterior sample, including the burned-in
      iterations.
+  chireturn: FINDME
+  percentile: list, floats
+     Percentile(s) to report credible region(s).
+  cfile: String
+     Configuration file name.
+  parname: 1D string ndarray
+     Deprecated.  Use pnames instead.
 
   Returns
   -------
@@ -289,6 +292,10 @@ def mcmc(data=None,     uncert=None,     func=None,      indparams=None,
       args["indparams"] = mu.isfile(args["indparams"], 'indparams', log, 'bin',
                                     unpack=False)
 
+    # Make sure percentile is of the proper data type
+    if type(args["percentile"]) == float:
+      args["percentile"] = [percentile]
+
     # Call MCMC:
     outputs = mc.mcmc(**args)
 
@@ -433,6 +440,9 @@ def parse():
                      help="If True, return chi-squared, red. chi-squared,"
                           "the chi-squared rescaling factor, and the BIC"
                           " [default: %(default)s]")
+  group.add_argument("--percentile", dest="percentile", action="store", 
+                     type=mu.parray, default=[0.6827, 0.9545], 
+                     help="Percentile(s) to report credible region(s).")
   group.add_argument("--parname",   dest="parname", action="store",
                      type=mu.parray, default=None,
                      help="Deprecated, see pnames.")

diff --git a/MCcubed/mc/mcmc.py b/MCcubed/mc/mcmc.py
@@ -39,7 +39,7 @@ def mcmc(data,          uncert=None,    func=None,      indparams=[],
          plots=False,   ioff=False,     showbp=True,
          savefile=None, savemodel=None, resume=False,
          rms=False,     log=None,       pnames=None,    texnames=None,
-         full_output=False, chireturn=False,
+         full_output=False, chireturn=False, percentile=[0.6827, 0.9545], 
          parname=None):
   """
   This beautiful piece of code runs a Markov-chain Monte Carlo algorithm.
@@ -155,19 +155,22 @@ def mcmc(data,          uncert=None,    func=None,      indparams=[],
      iterations.
   chireturn: Bool
      If True, include chi-squared statistics in the return.
+  percentile: list, floats
+     Percentile(s) to report credible region(s).
   parname: 1D string ndarray
      Deprecated, use pnames.
 
   Returns
   -------
   bestp: 1D ndarray
      Array of the best-fitting parameters (including fixed and shared).
-  CRlo:  1D ndarray
-     The lower boundary of the marginal 68%-highest posterior density
-     (the credible region) for each parameter, with respect to bestp.
-  CRhi:  1D ndarray
-     The upper boundary of the marginal 68%-highest posterior density
-     (the credible region) for each parameter, with respect to bestp.
+  creg: list of strings
+     The posterior credible regions corresponding to the percentiles specified 
+     by `percentile`. Format is e.g., '(lo_1, hi_1) U (lo_2, hi_2)' for a CR 
+     composed of two disconnected regions.
+     creg[i]    gives the i-th parameter's      CRs.
+     creg[i][j] gives the i-th parameter's j-th CR, corresponding to the j-th 
+                `percentile` value.
   stdp: 1D ndarray
      Array of the best-fitting parameter uncertainties, calculated as the
      standard deviation of the marginalized, thinned, burned-in posterior.
@@ -612,19 +615,18 @@ def mcmc(data,          uncert=None,    func=None,      indparams=[],
            format(numaccept.value*100.0/nsample), indent=2)
 
   # Compute the credible region for each parameter:
-  CRlo = np.zeros(nparams)
-  CRhi = np.zeros(nparams)
+  CR   = [] # Holds boundaries of all regions making up the CRs
+  creg = [] # Holds a string representation, '[lo_1, hi_1] U [lo_2, hi_2] U ...'
+  CRlo = []
+  CRhi = []
   pdf  = []
   xpdf = []
   for i in range(nfree):
-    PDF, Xpdf, HPDmin = mu.credregion(posterior[:,i])
-    pdf.append(PDF)
+    PDF, Xpdf, crlo, crhi = mu.credregion(posterior[:,i], percentile)
+    pdf .append(PDF)
     xpdf.append(Xpdf)
-    CRlo[ifree[i]] = np.amin(Xpdf[PDF>HPDmin])
-    CRhi[ifree[i]] = np.amax(Xpdf[PDF>HPDmin])
-  # CR relative to the best-fitting value:
-  CRlo[ifree] -= bestp[ifree]
-  CRhi[ifree] -= bestp[ifree]
+    CRlo.append(crlo)
+    CRhi.append(crhi)
 
   # Get the mean and standard deviation from the posterior:
   meanp = np.zeros(nparams, np.double) # Parameters mean
@@ -635,27 +637,44 @@ def mcmc(data,          uncert=None,    func=None,      indparams=[],
     bestp[s] = bestp[-int(stepsize[s])-1]
     meanp[s] = meanp[-int(stepsize[s])-1]
     stdp [s] = stdp [-int(stepsize[s])-1]
-    CRlo [s] = CRlo [-int(stepsize[s])-1]
-    CRhi [s] = CRhi [-int(stepsize[s])-1]
 
-  log.msg("\nParam name     Best fit   Lo HPD CR   Hi HPD CR        Mean    Std dev       S/N"
-          "\n----------- ----------------------------------- ---------------------- ---------", width=80)
+  log.msg("\nParam name     Best fit        Mean    Std dev       S/N"
+          "\n----------- ------------ ---------------------- ---------", 
+          width=80)
   for i in range(nparams):
     snr  = "{:.1f}".   format(np.abs(bestp[i])/stdp[i])
     mean = "{: 11.4e}".format(meanp[i])
-    lo   = "{: 11.4e}".format(CRlo[i])
-    hi   = "{: 11.4e}".format(CRhi[i])
     if   i in ifree:  # Free-fitting value
-      pass
+      icr = np.where(ifree == i)[0][0]
+      # Format each CR as '(lo1, hi1) U (lo2, hi2) U ... U (lon, hin)'
+      creg.append([' U '.join(['({:10.4e}, {:10.4e})'.format(CRlo[icr][j][k], 
+                                                             CRhi[icr][j][k])
+                  for k in range(len(CRlo[icr][j]))]) 
+                  for j in range(len(CRlo[icr]))])
     elif i in ishare: # Shared value
       snr  = "[share{:02d}]".format(-int(stepsize[i]))
+      creg.append('Shared') # No CRs for shared values
     else:             # Fixed value
       snr  = "[fixed]"
       mean = "{: 11.4e}".format(bestp[i])
-    log.msg("{:<11s} {:11.4e} {:>11s} {:>11s} {:>11s} {:10.4e} {:>9s}".
-            format(pnames[i][0:11], bestp[i], lo, hi, mean, stdp[i], snr),
+      creg.append('Fixed') # No CRs for fixed values
+    # Print all info except CRs
+    log.msg("{:<11s} {:11.4e} {:>11s} {:10.4e} {:>9s}".
+            format(pnames[i][0:11], bestp[i], mean, stdp[i], snr),
             width=160)
-
+  # Print CRs
+  log.msg("\nParam name  Credible Region"
+          "\n----------- " 
+          "------------------------------------------------------------------", 
+          width=80)
+  for i in ifree:
+    for p in range(len(percentile)):
+      if p == 0:
+        log.msg("{:<11s} {:>6s}%: {:<56s}".
+                format(pnames[i][0:11], str(100*percentile[p]), creg[i][p]))
+      else:
+        log.msg("{:<11s} {:>6s}%: {:<56s}".
+                format("",              str(100*percentile[p]), creg[i][p]))
   if leastsq and bestchisq.value-fitchisq < -3e-8:
     np.set_printoptions(precision=8)
     log.warning("MCMC found a better fit than the minimizer:\n"
@@ -713,11 +732,11 @@ def mcmc(data,          uncert=None,    func=None,      indparams=[],
         savefile=fname+"_pairwise.png")
     # Histograms:
     mp.histogram(posterior, pnames=texnames[ifree], bestp=bestfreepars,
-        savefile=fname+"_posterior.png",
-        percentile=0.683, pdf=pdf, xpdf=xpdf)
+        savefile=fname+"_posterior.png", percentile=percentile, 
+        pdf=pdf, xpdf=xpdf)
     # RMS vs bin size:
     if rms:
-      mp.RMS(bs, RMS, stderr, RMSlo, RMShi, binstep=len(bs)//500+1,
+      mp.RMS(bs, RMS, stderr, RMSlo, RMShi, binstep=len(bs)//500 + 1,
              savefile=fname+"_RMS.png")
     # Sort of guessing that indparams[0] is the X array for data as in y=y(x):
     if (indparams != [] and

diff --git a/MCcubed/plots/mcplots.py b/MCcubed/plots/mcplots.py
@@ -295,9 +295,9 @@ def pairwise(posterior, pnames=None, thinning=1, fignum=200,
   return axes, cb
 
 
-def histogram(posterior, pnames=None, thinning=1, fignum=300,
-              savefile=None, bestp=None, percentile=None, pdf=None,
-              xpdf=None, ranges=None, axes=None, lw=2.0, fs=11):
+def histogram(posterior,     pnames=None, thinning=1,      fignum=300,
+              savefile=None, bestp=None,  percentile=None, pdf=None,
+              xpdf=None,     ranges=None, axes=None,  lw=2.0, fs=11):
   """
   Plot parameter marginal posterior distributions
 
@@ -317,10 +317,11 @@ def histogram(posterior, pnames=None, thinning=1, fignum=300,
   bestp: 1D float ndarray
      If not None, plot the best-fitting values for each parameter
      given by bestp.
-  percentile: Float
+  percentile: list of floats
      If not None, plot the percentile- highest posterior density region
      of the distribution.  Note that this should actually be the
-     fractional part, i.e. set percentile=0.68 for a 68% HPD.
+     fractional part, i.e. set percentile=0.68 for a 68% HPD, or 
+     percentile=[0.68, 0.95] for both 68% and 95% HPD.
   pdf: 1D float ndarray or list of ndarrays
      A smoothed PDF of the distribution for each parameter.
   xpdf: 1D float ndarray or list of ndarrays
@@ -409,21 +410,36 @@ def histogram(posterior, pnames=None, thinning=1, fignum=300,
       ax.set_xlabel(pnames[i], size=fs)
       vals, bins, h = ax.hist(posterior[0::thinning, i], bins=25,
                               range=ranges[i], normed=False, zorder=0, **hkw)
-      # Plot HPD region:
-      if percentile is not None:
-        PDF, Xpdf, HPDmin = mu.credregion(posterior[:,i], percentile,
-                                          pdf[i], xpdf[i])
+      # Plot HPD region(s):
+      if percentile is not None: 
+        PDF  = []
+        xpdf = []
+        CRlo = []
+        CRhi = []
+        for p in range(len(percentile)):
+          Pdf, Xpdf, crlo, crhi = mu.credregion(posterior[:,i], percentile,
+                                                pdf[i],         xpdf[i])
+          PDF .append(Pdf)
+          xpdf.append(Xpdf)
+          CRlo.append(crlo)
+          CRhi.append(crhi)
+        # Setup to interpolate xpdf into the histogram
         vals = np.r_[0, vals, 0]
-        bins = np.r_[bins[0] - (bins[1]-bins[0]), bins]
-        # interpolate xpdf into the histogram:
-        f = si.interp1d(bins+0.5*(bins[1]-bins[0]), vals, kind='nearest')
-        # Plot the HPD region as shaded areas:
-        if ranges[i] is not None:
-          xran = np.argwhere((Xpdf>ranges[i][0]) & (Xpdf<ranges[i][1]))
-          Xpdf = Xpdf[np.amin(xran):np.amax(xran)]
-          PDF  = PDF [np.amin(xran):np.amax(xran)]
-        ax.fill_between(Xpdf, 0, f(Xpdf), where=PDF>=HPDmin,
-           facecolor='0.75', edgecolor='none', interpolate=False, zorder=-2)
+        bins = np.r_[      bins[0]  - (bins[1]-bins[0]), bins]
+        f    = si.interp1d(bins+0.5 * (bins[1]-bins[0]), vals, kind='nearest')
+        # Plot the credible region(s) as shaded areas:
+        # Note reverse ordering is to allow overplotting of lower percentiles
+        for k in range(len(CRlo[i])-1, 0-1, -1):
+          if ranges[i] is not None:
+            xran    = np.argwhere((xpdf>ranges[i][0]) & (xpdf<ranges[i][1]))
+            xpdf[i] = xpdf[i][np.amin(xran):np.amax(xran)]
+            PDF [i] = PDF [i][np.amin(xran):np.amax(xran)]
+          for r in range(len(CRlo[i][k])):
+            ax.fill_between(xpdf[i], 0, f(xpdf[i]), 
+                            where=(xpdf[i]>=CRlo[i][k][r]) * \
+                                  (xpdf[i]<=CRhi[i][k][r]),
+                            facecolor=str(0.5 + 0.25*k/(len(CRlo[i])-1)), 
+                            edgecolor='none', interpolate=False, zorder=-2)
       if bestp is not None:
         ax.axvline(bestp[i], dashes=(7,4), lw=1.0, **bkw)
       maxylim = np.amax((maxylim, ax.get_ylim()[1]))

diff --git a/MCcubed/utils/mcutils.py b/MCcubed/utils/mcutils.py
@@ -253,7 +253,9 @@ def isfile(input, iname, log, dtype, unpack=True, notnone=False):
     return load(ifile)
 
 
-def credregion(posterior=None, percentile=0.6827, pdf=None, xpdf=None):
+def credregion(posterior= None,        percentile= [0.6827, 0.9545], 
+               pdf      = None,        xpdf      = None, 
+               lims     = (None,None), numpts    = 100):
   """
   Compute a smoothed posterior density distribution and the minimum
   density for a given percentile of the highest posterior density.
@@ -264,22 +266,31 @@ def credregion(posterior=None, percentile=0.6827, pdf=None, xpdf=None):
   ----------
   posterior: 1D float ndarray
      A posterior distribution.
-  percentile: Float
+  percentile: 1D float ndarray, list, or float.
      The percentile (actually the fraction) of the credible region.
      A value in the range: (0, 1).
   pdf: 1D float ndarray
      A smoothed-interpolated PDF of the posterior distribution.
   xpdf: 1D float ndarray
      The X location of the pdf values.
+  lims: tuple, floats
+     Minimum and maximum allowed values for posterior. Should only be used if 
+     there are physically-imposed limits.
+  numpts: int.
+     Number of points to use when calculating the PDF.
 
   Returns
   -------
   pdf: 1D float ndarray
      A smoothed-interpolated PDF of the posterior distribution.
   xpdf: 1D float ndarray
      The X location of the pdf values.
-  HPDmin: Float
-     The minimum density in the percentile-HPD region.
+  regions: list of 2D float ndarrays
+     The values of the credible regions specified by `percentile`.
+     regions[0] corresponds to percentile[0], etc.
+     regions[0][0] gives the start and stop values of the first region of the CR
+     regions[0][1] gives the second CR start/stop values, if the CR is composed 
+                   of disconnected regions
 
   Example
   -------
@@ -294,34 +305,61 @@ def credregion(posterior=None, percentile=0.6827, pdf=None, xpdf=None):
   >>> pdf, xpdf, HPDmin = credregion(pdf=pdf, xpdf=xpdf, percentile=0.9545)
   >>> print(np.amin(xpdf[pdf>HPDmin]), np.amax(xpdf[pdf>HPDmin]))
   """
+  # Make sure `percentile` is a list or array
+  if type(percentile) == float:
+    percentile = np.array([percentile])
   if pdf is None and xpdf is None:
-    # Thin if posterior has too many samples (> 120k):
-    thinning = np.amax([1, int(np.size(posterior)/120000)])
     # Compute the posterior's PDF:
-    kernel = stats.gaussian_kde(posterior[::thinning])
-    # Remove outliers:
-    mean = np.mean(posterior)
-    std  = np.std(posterior)
-    k = 6
-    lo = np.amax([mean-k*std, np.amin(posterior)])
-    hi = np.amin([mean+k*std, np.amax(posterior)])
+    kernel = stats.gaussian_kde(posterior)
     # Use a Gaussian kernel density estimate to trace the PDF:
-    x  = np.linspace(lo, hi, 100)
     # Interpolate-resample over finer grid (because kernel.evaluate
     #  is expensive):
+    if lims[0] is not None:
+      lo = min(np.amin(posterior), lims[0])
+    else:
+      lo = np.amin(posterior)
+    if lims[1] is not None:
+      hi = max(np.amax(posterior), lims[1])
+    else:
+      hi = np.amax(posterior)
+    x    = np.linspace(lo, hi, numpts)
     f    = si.interp1d(x, kernel.evaluate(x))
-    xpdf = np.linspace(lo, hi, 3000)
+    xpdf = np.linspace(lo, hi, 100*numpts)
     pdf  = f(xpdf)
 
   # Sort the PDF in descending order:
   ip = np.argsort(pdf)[::-1]
   # Sorted CDF:
   cdf = np.cumsum(pdf[ip])
-  # Indices of the highest posterior density:
-  iHPD = np.where(cdf >= percentile*cdf[-1])[0][0]
-  # Minimum density in the HPD region:
-  HPDmin = np.amin(pdf[ip][0:iHPD])
-  return pdf, xpdf, HPDmin
+
+  # List to hold boundaries of CRs
+  # List is used because a given CR may be multiple disconnected regions
+  CRlo = []
+  CRhi = []
+  # Find boundary for each specified percentile
+  for i in range(len(percentile)):
+    # Indices of the highest posterior density:
+    iHPD = np.where(cdf >= percentile[i]*cdf[-1])[0][0]
+    # Minimum density in the HPD region:
+    HPDmin   = np.amin(pdf[ip][0:iHPD])
+    # Find the contiguous areas of the PDF greater than or equal to HPDmin
+    HPDbool  = pdf >= HPDmin
+    idiff    = np.diff(HPDbool) # True where HPDbool changes T to F or F to T
+    iregion, = idiff.nonzero()  # Indexes of Trues. Note , because returns tuple
+    # Check boundaries
+    if HPDbool[0]:
+      iregion = np.insert(iregion, 0, -1) # This -1 is changed to 0 below when 
+    if HPDbool[-1]:                       #   correcting start index for regions
+      iregion = np.append(iregion, len(HPDbool)-1)
+    # Reshape into 2 columns of start/end indices
+    iregion.shape = (-1, 2)
+    # Add 1 to start of each region due to np.diff() functionality
+    iregion[:,0] += 1
+    # Store the min and max of each (possibly disconnected) region
+    CRlo.append(xpdf[iregion[:,0]])
+    CRhi.append(xpdf[iregion[:,1]])
+
+  return pdf, xpdf, CRlo, CRhi
 
 
 def default_parnames(npars):