Skip to content

Commit

Permalink
Merge pull request #584 from mrava87/dev
Browse files Browse the repository at this point in the history
doc: updated bayesian tutorial
  • Loading branch information
mrava87 authored Jun 16, 2024
2 parents d1a341a + 3f271e2 commit b19a63c
Showing 1 changed file with 102 additions and 49 deletions.
151 changes: 102 additions & 49 deletions tutorials/bayesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,26 @@
Based on the above definition, we construct some prior models in the frequency
domain, convert each of them to the time domain and use such an ensemble
to estimate the prior mean :math:`\mu_\mathbf{x}` and model
covariance :math:`\mathbf{C_x}`.
to estimate the prior mean :math:`\mathbf{x}_0` and model
covariance :math:`\mathbf{C}_{x_0}`.
We then create our data by sampling the true signal at certain locations
We then create our data by sampling the true signal at certain locations
and solve the resconstruction problem within a Bayesian framework. Since we are
assuming gaussianity in our priors, the equation to obtain the posterion mean
can be derived analytically:
and covariance can be derived analytically:
.. math::
\mathbf{x} = \mathbf{x_0} + \mathbf{C}_x \mathbf{R}^T
(\mathbf{R} \mathbf{C}_x \mathbf{R}^T + \mathbf{C}_y)^{-1} (\mathbf{y} -
\mathbf{x} = \mathbf{x_0} + \mathbf{C}_{x_0} \mathbf{R}^T
(\mathbf{R} \mathbf{C}_{x_0} \mathbf{R}^T + \mathbf{C}_y)^{-1} (\mathbf{y} -
\mathbf{R} \mathbf{x_0})
and
.. math::
\mathbf{C}_x = \mathbf{C}_{x_0} - \mathbf{C}_{x_0} \mathbf{R}^T
(\mathbf{R} \mathbf{C}_x \mathbf{R}^T + \mathbf{C}_y)^{-1}
\mathbf{R} \mathbf{C}_{x_0}
"""
import matplotlib.pyplot as plt

Expand Down Expand Up @@ -80,22 +87,26 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft):
sigmaa = [0.1, 0.5, 0.6]
phi0 = [-90.0, 0.0, 0.0]
sigmaphi = [0.1, 0.2, 0.4]
sigmad = 1e-2
sigmad = 1
scaling = 100 # Scale by a factor to allow noise std=1

# Prior models
nt = 200
nfft = 2**11
dt = 0.004
t = np.arange(nt) * dt
xs = np.array(
xs = scaling * np.array(
[
prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft)
for _ in range(nreals)
]
)

# True model (taken as one possible realization)
x = prior_realization(f0, a0, phi0, [0, 0, 0], [0, 0, 0], [0, 0, 0], dt, nt, nfft)
x = scaling * prior_realization(
f0, a0, phi0, [0, 0, 0], [0, 0, 0], [0, 0, 0], dt, nt, nfft
)


###############################################################################
# We have now a set of prior models in time domain. We can easily use sample
Expand All @@ -110,7 +121,7 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft):
N = 30 # lenght of decorrelation
diags = np.array([Cm[i, i - N : i + N + 1] for i in range(N, nt - N)])
diag_ave = np.average(diags, axis=0)
# add a taper at the end to avoid edge effects
# add a taper at the start and end to avoid edge effects
diag_ave *= np.hamming(2 * N + 1)

fig, ax = plt.subplots(1, 1, figsize=(12, 4))
Expand Down Expand Up @@ -157,65 +168,107 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft):
ynmask = Rop.mask(x + n)

###############################################################################
# First we apply the Bayesian inversion equation
xbayes = x0 + Cm_op * Rop.H * (
# First, since the problem is rather small, we construct the dense version of
# all our matrices and we compute the analytical posterior mean and covariance

Cm = Cm_op.todense()
Cd = Cd_op.todense()
R = Rop.todense()

# Bayesian analytical solution
xpost_ana = x0 + Cm @ R.T @ (np.linalg.solve(R @ Cm @ R.T + Cd, yn - R @ x0))
Cmpost_ana = Cm - Cm @ R.T @ (np.linalg.solve(R @ Cm @ R.T + Cd, R @ Cm))

###############################################################################
# Next we solve the same Bayesian inversion equation iteratively. We will see
# that provided we use enough iterations we can retrieve the same values of
# the analytical posterior mean
xpost_iter = x0 + Cm_op * Rop.H * (
lsqr(Rop * Cm_op * Rop.H + Cd_op, yn - Rop * x0, iter_lim=400)[0]
)

# Visualize
###############################################################################
# But what is the problem did not allow creating dense matrices for both the
# operator and the input covariance matrices. In this case, we can resort to the
# Randomize-Then-Optimize algorithm of Bardsley et al., 2014, which simply solves
# the same problem that we solved to find the MAP solution repeatedly by adding
# random noise to the data. It can be shown that the sample mean and covariance
# of the solutions of the different perturbed problems provide a good
# approximation for the true posterior mean and covariance.

# RTO number of solutions
nreals = 1000

xrto = []
for ireal in range(nreals):
yreal = yn + Rop * np.random.normal(0, sigmad, nt)
xrto.append(
x0
+ Cm_op
* Rop.H
* (lsqr(Rop * Cm_op * Rop.H + Cd_op, yreal - Rop * x0, iter_lim=400))[0]
)

xrto = np.array(xrto)
xpost_rto = np.average(xrto, axis=0)
Cmpost_rto = ((xrto - xpost_rto).T @ (xrto - xpost_rto)) / nreals

###############################################################################
# Finally we visualize the different results

# Means
fig, ax = plt.subplots(1, 1, figsize=(12, 5))
ax.plot(t, x, "k", lw=6, label="true")
ax.plot(t, xpost_ana, "r", lw=7, label="bayesian inverse (ana)")
ax.plot(t, xpost_iter, "g", lw=5, label="bayesian inverse (iter)")
ax.plot(t, xpost_rto, "b", lw=3, label="bayesian inverse (rto)")
ax.plot(t, ymask, ".k", ms=25, label="available samples")
ax.plot(t, ynmask, ".r", ms=25, label="available noisy samples")
ax.plot(t, xbayes, "r", lw=3, label="bayesian inverse")
ax.legend()
ax.set_title("Signal")
ax.set_title("Mean reconstruction")
ax.set_xlim(0, 0.8)
plt.tight_layout()

###############################################################################
# So far we have been able to estimate our posterion mean. What about its
# uncertainties (i.e., posterion covariance)?
#
# In real-life applications it is very difficult (if not impossible)
# to directly compute the posterior covariance matrix. It is much more
# useful to create a set of models that sample the posterion probability.
# We can do that by solving our problem several times using different prior
# realizations as starting guesses:

xpost = [
x0
+ Cm_op
* Rop.H
* (lsqr(Rop * Cm_op * Rop.H + Cd_op, yn - Rop * x0, iter_lim=400)[0])
for x0 in xs[:30]
]
xpost = np.array(xpost)

x0post = np.average(xpost, axis=0)
Cm_post = ((xpost - x0post).T @ (xpost - x0post)) / nreals

# Visualize
# RTO realizations
fig, ax = plt.subplots(1, 1, figsize=(12, 5))
ax.plot(t, x, "k", lw=6, label="true")
ax.plot(t, xpost.T, "--r", lw=1)
ax.plot(t, x0post, "r", lw=3, label="bayesian inverse")
ax.plot(t, xrto[::10].T, "--b", lw=0.5)
ax.plot(t, xpost_rto, "b", lw=3, label="bayesian inverse (rto)")
ax.plot(t, ymask, ".k", ms=25, label="available samples")
ax.plot(t, ynmask, ".r", ms=25, label="available noisy samples")
ax.legend()
ax.set_title("Signal")
ax.set_title("RTO realizations")
ax.set_xlim(0, 0.8)

fig, ax = plt.subplots(1, 1, figsize=(5, 4))
im = ax.imshow(
Cm_post, interpolation="nearest", cmap="seismic", extent=(t[0], t[-1], t[-1], t[0])
# Covariances
fig, axs = plt.subplots(1, 2, figsize=(12, 4))
axs[0].imshow(
Cmpost_ana,
interpolation="nearest",
cmap="seismic",
vmin=-5e-1,
vmax=2,
extent=(t[0], t[-1], t[-1], t[0]),
)
axs[0].set_title(r"$\mathbf{C}_m^{post,ANA}$")
axs[0].axis("tight")

axs[1].imshow(
Cmpost_rto,
interpolation="nearest",
cmap="seismic",
vmin=-5e-1,
vmax=2,
extent=(t[0], t[-1], t[-1], t[0]),
)
ax.set_title(r"$\mathbf{C}_m^{posterior}$")
ax.axis("tight")
axs[1].set_title(r"$\mathbf{C}_m^{post,RTO}$")
axs[1].axis("tight")
plt.tight_layout()

###############################################################################
# Note that here we have been able to compute a sample posterior covariance
# from its estimated samples. By displaying it we can see how both the overall
# from its estimated samples. By displaying it we can see how both the overall
# variances and the correlation between different parameters have become
# narrower compared to their prior counterparts.
# narrower compared to their prior counterparts. Moreover, whilst the RTO
# covariance seems to be slightly under-estimated, this represents an appealing
# alternative to the closed-form solution for large-scale problems under
# Gaussian assumptions.

0 comments on commit b19a63c

Please sign in to comment.