From ca8e0f6ec4d467835d3cf0473d94bef6e51ee20b Mon Sep 17 00:00:00 2001 From: Patricio Cubillos Date: Thu, 30 May 2024 16:44:24 +0200 Subject: [PATCH] Removed dynesty references --- docs/get_started.rst | 7 +- examples/ns_tutorial.py | 77 -------------- mc3/ns_driver.py | 216 ---------------------------------------- mc3/sampler_driver.py | 31 ------ pyproject.toml | 1 - tests/test_dynesty.py | 165 ------------------------------ 6 files changed, 5 insertions(+), 492 deletions(-) delete mode 100644 examples/ns_tutorial.py delete mode 100644 mc3/ns_driver.py delete mode 100644 tests/test_dynesty.py diff --git a/docs/get_started.rst b/docs/get_started.rst index af68fad..8066adf 100644 --- a/docs/get_started.rst +++ b/docs/get_started.rst @@ -34,7 +34,7 @@ Alternatively (e.g., for developers), clone the repository to your local machine pip install -e . -``mc3`` provides MCMC and nested-sampling posterior sampling, +``mc3`` provides MCMC posterior sampling, optimization and other lower-level statistical and plotting routines. See the full docs in the :ref:`api` or through the Python interpreter: @@ -44,10 +44,13 @@ interpreter: import mc3 # Bayesian posterior sampling: help(mc3.sample) + # Optimization: help(mc3.fit) - # Assorted stats: + + # Assorted stats utilities: help(mc3.stats) + # Plotting utilities: help(mc3.plots) diff --git a/examples/ns_tutorial.py b/examples/ns_tutorial.py deleted file mode 100644 index e36b6b1..0000000 --- a/examples/ns_tutorial.py +++ /dev/null @@ -1,77 +0,0 @@ -import sys -import numpy as np -import mc3 - - -def quad(p, x): - """ - Quadratic polynomial function. - - Parameters - p: Polynomial constant, linear, and quadratic coefficients. - x: Array of dependent variables where to evaluate the polynomial. - Returns - y: Polinomial evaluated at x: y(x) = p0 + p1*x + p2*x^2 - """ - y = p[0] + p[1]*x + p[2]*x**2.0 - return y - -# :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -# Preamble (create a synthetic dataset, in a real scenario you would -# get your dataset from your own data analysis pipeline): -np.random.seed(3) -x = np.linspace(0, 10, 100) -p0 = [3, -2.4, 0.5] -y = quad(p0, x) -uncert = np.sqrt(np.abs(y)) -data = y + np.random.normal(0, uncert) -# :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -# Modeling function: -func = quad -# List of additional arguments of func (if necessary): -indparams = [x] - -# Array of initial-guess values of fitting parameters: -params = np.array([ 10.0, -2.0, 0.1]) -# Lower and upper boundaries for the MCMC exploration: -pmin = np.array([-10.0, -20.0, -10.0]) -pmax = np.array([ 40.0, 20.0, 10.0]) -# Parameters' stepping behavior: -pstep = np.array([1.0, 0.5, 0.1]) - -# Two-sided Gaussian prior on first parameter, uniform priors on rest: -prior = np.array([3.5, 0.0, 0.0]) -priorlow = np.array([0.1, 0.0, 0.0]) -priorup = np.array([0.3, 0.0, 0.0]) - -# Parameter names: -pnames = ['y0', 'alpha', 'beta'] -texnames = [r'$y_{0}$', r'$\alpha$', r'$\beta$'] - -# Sampler algorithm, choose from: 'snooker', 'demc', 'mrw', or 'dynesty'. -sampler = 'dynesty' - -# Optimization before MCMC, choose from: 'lm' or 'trf': -leastsq = 'lm' -chisqscale = False -# NS setup: -ncpu = 7 -thinning = 1 - -# Logging: -log = 'NS_tutorial.log' - -# File outputs: -savefile = 'NS_tutorial.npz' -plots = True -rms = True - -# Run the MCMC: -mc3_output = mc3.sample(data=data, uncert=uncert, func=func, params=params, - indparams=indparams, pmin=pmin, pmax=pmax, pstep=pstep, - pnames=pnames, texnames=texnames, - prior=prior, priorlow=priorlow, priorup=priorup, - sampler=sampler, ncpu=ncpu, thinning=thinning, - leastsq=leastsq, chisqscale=chisqscale, - plots=plots, rms=rms, log=log, savefile=savefile) diff --git a/mc3/ns_driver.py b/mc3/ns_driver.py deleted file mode 100644 index e61b524..0000000 --- a/mc3/ns_driver.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) 2015-2023 Patricio Cubillos and contributors. -# mc3 is open-source software under the MIT license (see LICENSE). - -__all__ = [ - 'nested_sampling', -] - -import sys -import inspect - -import multiprocessing as mp -import numpy as np - -from . import stats as ms - - -def resample_equal(weights, rstate=None): - """Based on dynesty.utils.resample_equal()""" - SQRTEPS = np.sqrt(float(np.finfo(np.float64).eps)) - if rstate is None: - rstate = np.random - if abs(np.sum(weights) - 1.) > SQRTEPS: - raise ValueError("Weights do not sum to 1.") - - nsamples = len(weights) - positions = (rstate.random() + np.arange(nsamples)) / nsamples - - idx = np.zeros(nsamples, dtype=int) - cumulative_sum = np.cumsum(weights) - i, j = 0, 0 - while i < nsamples: - if positions[i] < cumulative_sum[j]: - idx[i] = j - i += 1 - else: - j += 1 - return idx - - -def nested_sampling(data, uncert, func, params, indparams, pmin, pmax, pstep, - prior, priorlow, priorup, ncpu, thinning, resume, log, - **kwargs): - """ - This beautiful piece of code runs a Markov-chain Monte Carlo algorithm. - - Parameters - ---------- - data: 1D float ndarray - Data to be fit by func. - uncert: 1D float ndarray - Uncertainties of data. - func: Callable - The callable function that models data as model=func(params,*indparams) - params: 1D float ndarray - Set of initial fitting parameters for func. - indparams: tuple - Additional arguments required by func. - pmin: 1D ndarray - Lower boundaries for the posterior exploration. - pmax: 1D ndarray - Upper boundaries for the posterior exploration. - pstep: 1D ndarray - Parameter stepping. - prior: 1D ndarray - Parameter prior distribution means. - priorlow: 1D ndarray - Lower prior uncertainty values. - priorup: 1D ndarray - Upper prior uncertainty values. - ncpu: Integer - Number of processors for the NS sampling. - thinning: Integer - Thinning factor of the samples. - resume: Boolean - If True resume a previous run. - log: mc3.utils.Log instance - Logging object. - - Returns - ------- - output: Dict - A Dictionary containing the NS posterior distribution and related - stats, including: - - posterior: thinned posterior distribution of shape [nsamples, nfree]. - - zchain: chain indices for each sample in posterior. - - chisq: chi^2 value for each sample in posterior. - - log_post: log(posterior) for the samples in posterior. - - burnin: number of burned-in samples per chain (zero for NS). - - bestp: model parameters for the optimal log(posterior) sample. - - best_chisq: chi^2 for the optimal log(posterior) sample. - - best_model: model evaluated at bestp. - - best_log_post: optimal log(posterior) in posterior. - - eff: sampling efficiency. - - dynesty_sampler: The dynesty sampler object. - - Examples - -------- - >>> # See example in mc3.sample() and - >>> # https://mc3.readthedocs.io/en/latest/ns_tutorial.html - """ - try: - import dynesty - except ImportError as error: - log.error("ModuleNotFoundError: {}".format(error)) - - nfree = int(np.sum(pstep > 0)) - ifree = np.where(pstep > 0)[0] - ishare = np.where(pstep < 0)[0] - - # Multiprocessing setup: - if ncpu > 1: - dyn_args = {'pool':mp.Pool(ncpu), 'queue_size':ncpu} - else: - dyn_args = {} - - # Intercept kwargs that go into DynamicNestedSampler(): - if 'loglikelihood' in kwargs: - loglike = kwargs.pop('loglikelihood') - else: - loglike = ms.Loglike(data, uncert, func, params, indparams, pstep) - - if 'prior_transform' in kwargs: - prior_transform = kwargs.pop('prior_transform') - skip_logp = True - else: - prior_transform = ms.Prior_transform(prior, priorlow, priorup, - pmin, pmax, pstep) - skip_logp = False - - if 'ndim' in kwargs: - nfree = kwargs.pop('ndim') - - # Pop other DynamicNestedSampler() arguments from kwargs: - if sys.version_info.major == 3: - signature = inspect.signature(dynesty.DynamicNestedSampler).parameters - else: - signature = inspect.getcallargs(dynesty.DynamicNestedSampler, - None,None,None) - - dyn_args_list = np.intersect1d( - list(signature.keys()), - list(kwargs.keys())) - dyn_kwargs = {key: kwargs.pop(key) for key in dyn_args_list} - dyn_args.update(dyn_kwargs) - - # Run dynesty: - log.msg('Running dynesty dynamic nested-samping run:\n') - sampler = dynesty.DynamicNestedSampler(loglike, prior_transform, nfree, - **dyn_args) - sampler.run_nested(**kwargs) - - weights = np.exp(sampler.results.logwt - sampler.results.logz[-1]) - isample = resample_equal(weights) - posterior = sampler.results.samples[isample] - chisq = -2.0*sampler.results.logl[isample] - - # Contribution to chi-square from non-uniform priors: - if skip_logp: - # TBD: Compute from prior_transform() - log_prior = 0.0 - else: - log_prior = ms.log_prior(posterior, prior, priorlow, priorup, pstep) - log_post = -0.5*chisq + log_prior - - # Best-fit statistics from sample: - ibest = np.argmin(log_post) - bestp = np.copy(params) - bestp[ifree] = posterior[ibest] - for s in ishare: - bestp[s] = bestp[-int(pstep[s])-1] - best_model = func(bestp, *indparams) - best_chisq = chisq[ibest] - best_log_post = log_post[ibest] - - # Print out Summary: - log.msg("\nNested Sampling Summary:" - "\n------------------------") - - posterior = posterior[::thinning] - chisq = chisq[::thinning] - log_post = log_post[::thinning] - # Number of evaluated and kept samples: - nsample = sampler.results['niter'] - nzsample = len(posterior) - - fmt = len(str(nsample)) - log.msg("Number of evaluated samples: {:{}d}". - format(nsample, fmt), indent=2) - log.msg("Thinning factor: {:{}d}". - format(thinning, fmt), indent=2) - log.msg("NS sample size (thinned): {:{}d}". - format(nzsample, fmt), indent=2) - log.msg("Sampling efficiency: {:.2f}%\n". - format(sampler.results['eff']), indent=2) - - # Build the output dict: - output = { - # The posterior: - 'posterior':posterior, - 'zchain':np.zeros(nzsample, int), - 'zmask':np.arange(nzsample), - 'chisq':chisq, - 'log_post':log_post, - 'burnin':0, - # Posterior stats: - 'eff':sampler.results['eff'], - # Best-fit stats: - 'bestp':bestp, - 'best_model':best_model, - 'best_log_post':best_log_post, - 'best_chisq':best_chisq, - # Extra stuff: - 'dynesty_sampler':sampler, - } - return output - diff --git a/mc3/sampler_driver.py b/mc3/sampler_driver.py index cbe6a8f..72a03c0 100644 --- a/mc3/sampler_driver.py +++ b/mc3/sampler_driver.py @@ -88,7 +88,6 @@ def sample( - 'mrw': Metropolis random walk. - 'demc': Differential Evolution Markov chain. - 'snooker': DEMC-z with snooker update. - - 'dynesty': DynamicNestedSampler() sampler from dynesty. ncpu: Integer Number of processors for the MCMC chains (mc3 defaults to one CPU for each chain plus a CPU for the central hub). @@ -250,16 +249,8 @@ def sample( >>> prior=prior, priorlow=priorlow, priorup=priorup, >>> leastsq='lm', nsamples=1e5, burnin=1000, plots=True) - >>> # Nested sampling: - >>> ns_output = mc3.sample( - >>> data, uncert, func, params, indparams=indparams, - >>> sampler='dynesty', pstep=pstep, ncpu=ncpu, pmin=pmin, pmax=pmax, - >>> prior=prior, priorlow=priorlow, priorup=priorup, - >>> leastsq='lm', plots=True) - >>> # See more examples and details at: >>> # https://mc3.readthedocs.io/en/latest/mcmc_tutorial.html - >>> # https://mc3.readthedocs.io/en/latest/ns_tutorial.html """ # Logging object: if isinstance(log, str): @@ -342,8 +333,6 @@ def sample( if ncpu is None and sampler in ['snooker', 'demc', 'mrw']: ncpu = nchains - elif ncpu is None and sampler == 'dynesty': - ncpu = 1 # Cap the number of processors: if ncpu >= mpr.cpu_count(): log.warning( @@ -371,10 +360,6 @@ def sample( pmax = np.tile( np.inf, nparams) pmin = np.asarray(pmin) pmax = np.asarray(pmax) - if (np.any(np.isinf(pmin)) or np.any(np.isinf(pmax))) \ - and sampler=='dynesty': - log.error('Parameter space must be constrained by pmin and pmax') - if pstep is None: pstep = 0.1 * np.abs(params) pstep = np.asarray(pstep) @@ -425,11 +410,6 @@ def sample( ) os.makedirs(fpath) - # At the moment, skip optimization when these dynesty inputs exist: - if sampler == 'dynesty' \ - and ('loglikelihood' in kwargs or 'prior_transform' in kwargs): - leastsq = None - # Least-squares minimization: chisq_factor = 1.0 if leastsq is not None: @@ -476,12 +456,6 @@ def sample( fgamma, fepsilon, hsize, kickoff, savefile, resume, log, pnames, texnames, ) - elif sampler == 'dynesty': - output = nested_sampling( - data, uncert, func, params, indparams, indparams_dict, - pmin, pmax, pstep, prior, priorlow, priorup, ncpu, - thinning, resume, log, **kwargs, - ) # Get some stats: output['chisq_factor'] = chisq_factor @@ -587,13 +561,8 @@ def sample( log.msg("\nOutput sampler files:") log.msg(stats_file, indent=2) - # Save results (pop unpickables before saving, then put back): if savefile is not None: - unpickables = ['dynesty_sampler'] - unpickables = np.intersect1d(unpickables, list(output.keys())) - tmp_outputs = {key: output.pop(key) for key in unpickables} np.savez(savefile, **output) - output.update(tmp_outputs) log.msg(savefile, indent=2) if plots: diff --git a/pyproject.toml b/pyproject.toml index 0de22bf..65c3dfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,6 @@ dependencies = [ [project.optional-dependencies] test = [ 'pytest>=6.0', - 'dynesty>=0.9.5', ] [project.urls] diff --git a/tests/test_dynesty.py b/tests/test_dynesty.py deleted file mode 100644 index ea5ac69..0000000 --- a/tests/test_dynesty.py +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (c) 2015-2023 Patricio Cubillos and contributors. -# MC3 is open-source software under the MIT license (see LICENSE). - -import os -import sys -import pytest - -import numpy as np - -import mc3 - - -def quad(p, x): - """ - Quadratic polynomial function. - - Parameters - p: Polynomial constant, linear, and quadratic coefficients. - x: Array of dependent variables where to evaluate the polynomial. - Returns - y: Polinomial evaluated at x: y(x) = p0 + p1*x + p2*x^2 - """ - y = p[0] + p[1]*x + p[2]*x**2.0 - return y - - -np.random.seed(12) -# Create a synthetic dataset: -x = np.linspace(0, 10, 100) -p0 = [4.5, -2.4, 0.5] -y = quad(p0, x) -uncert = np.sqrt(np.abs(y)) -error = np.random.normal(0, uncert) -data = y + error - -p1 = [4.5, 4.5, 0.5] -y1 = quad(p1, x) -uncert1 = np.sqrt(np.abs(y1)) -data1 = y1 + np.random.normal(0, uncert1) - -# Fit the quad polynomial coefficients: -params = np.array([10.0, -2.0, 0.1]) # Initial guess of fitting params. -pmin = np.array([ 0.0, -5.0, -1.0]) -pmax = np.array([10.0, 5.0, 1.0]) -pstep = np.array([0.03, 0.03, 0.05]) -pnames = ["constant", "linear", "quadratic"] -texnames = ["$\\alpha$", "$\\log(\\beta)$", "quadratic"] -sampler = 'dynesty' - - -@pytest.mark.skip(reason="Stop dynesty support") -def test_dynesty_minimal(): - output = mc3.sample(data, uncert, func=quad, params=np.copy(params), - indparams=[x], pmin=pmin, pmax=pmax, - pstep=pstep, sampler=sampler, maxiter=5000) - # No error? that's a pass. - assert output is not None - - -@pytest.mark.skip(reason="Stop dynesty support") -def test_dynesty_ncpu(): - output = mc3.sample(data, uncert, func=quad, params=np.copy(params), - indparams=[x], pmin=pmin, pmax=pmax, ncpu=8, - pstep=pstep, sampler=sampler, maxiter=5000) - assert output is not None - - -@pytest.mark.skip(reason="Stop dynesty support") -def test_dynesty_shared(): - output = mc3.sample(data1, uncert1, func=quad, params=np.copy(params), - sampler=sampler, indparams=[x], pmin=pmin, pmax=pmax, - pstep=[0.03, -1, 0.05], maxiter=5000) - assert output is not None - assert output['bestp'][1] == output['bestp'][0] - - -@pytest.mark.skip(reason="Stop dynesty support") -def test_dynesty_fixed(): - pars = np.copy(params) - pars[0] = p0[0] - output = mc3.sample(data, uncert, func=quad, params=np.copy(pars), - sampler=sampler, indparams=[x], pmin=pmin, pmax=pmax, - pstep=[0, 0.03, 0.05], maxiter=5000) - assert output is not None - assert len(output['bestp']) == len(params) - assert output['bestp'][0] == pars[0] - assert output['CRlo'][0] == 0 - assert output['CRhi'][0] == 0 - assert output['stdp'][0] == 0 - - -@pytest.mark.skip(reason="Stop dynesty support") -@pytest.mark.parametrize('leastsq', ['lm', 'trf']) -def test_dynesty_optimize(capsys, leastsq): - output = mc3.sample(data, uncert, func=quad, params=np.copy(params), - sampler=sampler, indparams=[x], pmin=pmin, pmax=pmax, - pstep=pstep, maxiter=5000, - leastsq=leastsq) - captured = capsys.readouterr() - assert output is not None - assert "Least-squares best-fitting parameters:" in captured.out - np.testing.assert_allclose(output['bestp'], - np.array([4.28263253, -2.40781859, 0.49534411]), rtol=1e-7) - - -@pytest.mark.skip(reason="Stop dynesty support") -def test_dynesty_priors_gauss(): - prior = np.array([ 4.5, 0.0, 0.0]) - priorlow = np.array([ 0.1, 0.0, 0.0]) - priorup = np.array([ 0.1, 0.0, 0.0]) - output = mc3.sample(data, uncert, func=quad, params=np.copy(params), - sampler=sampler, indparams=[x], pmin=pmin, pmax=pmax, - pstep=pstep, maxiter=5000, - prior=prior, priorlow=priorlow, priorup=priorup) - assert output is not None - assert -2*output['best_log_post'] > output['best_chisq'] - assert np.all(-2*output['log_post'] > output['chisq']) - - -@pytest.mark.skip(reason="Stop dynesty support") -def test_dynesty_log(capsys, tmp_path): - os.chdir(str(tmp_path)) - output = mc3.sample(data, uncert, func=quad, params=np.copy(params), - sampler=sampler, indparams=[x], pmin=pmin, pmax=pmax, - pstep=pstep, maxiter=5000, - log='NS.log') - captured = capsys.readouterr() - assert output is not None - assert "NS.log" in captured.out - assert "NS.log" in os.listdir(".") - - -@pytest.mark.skip(reason="Stop dynesty support") -def test_dynesty_savefile(capsys, tmp_path): - os.chdir(str(tmp_path)) - output = mc3.sample(data, uncert, func=quad, params=np.copy(params), - sampler=sampler, indparams=[x], pmin=pmin, pmax=pmax, - pstep=pstep, maxiter=5000, - savefile='NS.npz') - captured = capsys.readouterr() - assert output is not None - assert 'dynesty_sampler' in output - assert "NS.npz" in captured.out - assert "NS.npz" in os.listdir(".") - - -# Trigger errors: -@pytest.mark.skip(reason="Stop dynesty support") -def test_dynesty_pmin_error(capsys): - output = mc3.sample(data, uncert, func=quad, params=np.copy(params), - sampler=sampler, indparams=[x], pstep=pstep, pmax=pmax) - captured = capsys.readouterr() - assert output is None - assert "Parameter space must be constrained by pmin and pmax." \ - in captured.out - - -@pytest.mark.skip(reason="Stop dynesty support") -def test_dynesty_pmax_error(capsys): - output = mc3.sample(data, uncert, func=quad, params=np.copy(params), - sampler=sampler, indparams=[x], pstep=pstep, pmin=pmin) - captured = capsys.readouterr() - assert output is None - assert "Parameter space must be constrained by pmin and pmax." \ - in captured.out