forked from cranmer/active_sciencing
-
Notifications
You must be signed in to change notification settings - Fork 0
/
common.py
147 lines (118 loc) · 5 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import distr
import numpy as np
import emcee
import datetime
from multiprocessing import Pool
from skopt import gp_minimize
import bayesopt
import plots
import time
import science_loop_widget
import eig_widget
from ipywidgets import widgets
def lnprior(theta, prior):
p = prior.pdf(theta)
if p <= 1e-8:
return -np.inf
else:
return np.log(p)
def lnlike(theta, x, phi,simulator, simulation_kwargs = dict(n_samples = 5000), distr_kwargs = {}, logpdf_kwargs = {}):
mc = simulator(theta,phi, **simulation_kwargs)
p = distr.Distribution(name = '', samples = mc, **distr_kwargs)
logpdf = p.approx_logpdf(**logpdf_kwargs)
return np.sum(logpdf(x))
def lnprob(theta, x, prior, phi, lnlike_kwargs):
lp = lnprior(theta, prior)
if not np.isfinite(lp):
return -np.inf
return lp + lnlike(theta, x, phi,**lnlike_kwargs)
def calculate_posterior(prior, data, phi, n_walkers = 10, n_warmup = 10, n_chainlen = 20, lnprob_args = None):
"""Compute samples from the posterior"""
ndim, n_walkers = 1, n_walkers
# initialise walkers from the MAP + noise
# XXX alternatively sample a point from the KDE without adding noise?
# XXX not sure if the noise takes us into a region where the prior is zero?
pos = [prior.map() + 1e-1*np.random.randn(ndim) for i in range(n_walkers)]
sampler = emcee.EnsembleSampler(
n_walkers, 1, lnprob,
args=(data,prior,phi,lnprob_args),
# threads = n_walkers
)
pos, prob, state = sampler.run_mcmc(pos, n_warmup)
sampler.reset()
pos, prob, state = sampler.run_mcmc(pos, n_chainlen)
return distr.Distribution(prior.name, prior.range, sampler.flatchain[:,0])
def info_gain(p1, p2):
return p1.entropy() - p2.entropy()
def _simulate(args):
theta_map, phi, prior, sim_n_data, emcee_kwargs = args
# external workflow provides simulated data
sim_data = emcee_kwargs['lnprob_args']['simulator'](theta_map, phi, n_samples = 1000)
#external workflow uses simulator to provide likelihood
sim_posterior = calculate_posterior(prior, sim_data, phi,**emcee_kwargs)
return info_gain(prior, sim_posterior)
def expected_information_gain(phi, prior, emcee_kwargs, sim_n_data , map_bins, widget = None):
'calculate the expression above using workflow for simulations'
n_simulations = 4
n_parallel = 4
phi = phi[0]
#need to pass in prior through some extra arguments
widget.max = n_simulations
widget.value = 0
# use saddle-point approximation
theta_map = prior.map(bins = map_bins)
print str(datetime.datetime.now()),'EIG via 4 parallel experiments with [theta,phi]',theta_map,phi
# currently the MCMC sampler is the slower part, which already uses threads so we don't gain
# this should change once we have a more realistic simulator that takes time to run
pool = Pool(n_parallel)
eig_results = [pool.apply_async(_simulate, args = ([theta_map, phi, prior,sim_n_data,emcee_kwargs],)) for i in range(n_simulations)]
while not all([r.ready() for r in eig_results]):
time.sleep(0.01)
widget.value = [r.ready() for r in eig_results].count(True)
eig = [r.get() for r in eig_results]
pool.close()
pool.join()
return np.mean(eig)
def overview_widgets(model):
science_wdg = science_loop_widget.loopwidget()
collect_ui = model.collect_widget()
eig_wdg = eig_widget.widget()
sub_widgets = [science_wdg,collect_ui,eig_wdg]
return widgets.HBox([science_wdg.view,collect_ui,eig_wdg]), [science_wdg,collect_ui,eig_wdg]
def eig_search_settings(model,ndata, widget, mcmc_length = 50):
eig_kwargs = {'emcee_kwargs' : {
'n_chainlen': mcmc_length,
'lnprob_args': model.details_likelihood_settings},
'sim_n_data': ndata,
'map_bins': model.details_map_bins,
'widget': widget
}
return eig_kwargs
def design_next_experiment_bayesopt(prior,phi_bounds, eig_kwargs,
n_totalcalls=10, n_random_calls = 5, ax = None, fig = None, widget = None):
opt = bayesopt.get_optimizer(phi_bounds,n_random_calls)
func = lambda p: -expected_information_gain(p, prior,**eig_kwargs)
widget.value = 0
for i in range(n_totalcalls):
# ask next x
next_x = opt.ask()
# print 'ASK',next_x
next_f = func(next_x)
widget.value = widget.value + 1
# print 'TELL',next_f
# tell a pair to the optimizer
res = opt.tell(next_x, next_f)
# print 'DATA',res.x_iters,res.func_vals
if ax:
ax.clear()
plots.plot_bayes(res, phi_range = phi_bounds, ax = ax)
fig.canvas.draw()
return res, res.x[0], res.x_iters
def design_next_experiment_simplegrid(prior,phi_bounds, eig_kwargs, n_points=6):
eig_test_phis = np.linspace(*phi_bounds, num = n_points)
eig = []
for x in eig_test_phis.reshape(-1,1):
eig.append(expected_information_gain(x,prior,**eig_kwargs))
eig = np.array(eig)
next_phi = eig_test_phis[np.argmax(eig)]
return next_phi, eig_test_phis, eig