Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

doc: updated bayesian tutorial #584

Merged
merged 1 commit into from
Jun 16, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 102 additions & 49 deletions tutorials/bayesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,26 @@

Based on the above definition, we construct some prior models in the frequency
domain, convert each of them to the time domain and use such an ensemble
to estimate the prior mean :math:`\mu_\mathbf{x}` and model
covariance :math:`\mathbf{C_x}`.
to estimate the prior mean :math:`\mathbf{x}_0` and model
covariance :math:`\mathbf{C}_{x_0}`.

We then create our data by sampling the true signal at certain locations
We then create our data by sampling the true signal at certain locations
and solve the resconstruction problem within a Bayesian framework. Since we are
assuming gaussianity in our priors, the equation to obtain the posterion mean
can be derived analytically:
and covariance can be derived analytically:

.. math::
\mathbf{x} = \mathbf{x_0} + \mathbf{C}_x \mathbf{R}^T
(\mathbf{R} \mathbf{C}_x \mathbf{R}^T + \mathbf{C}_y)^{-1} (\mathbf{y} -
\mathbf{x} = \mathbf{x_0} + \mathbf{C}_{x_0} \mathbf{R}^T
(\mathbf{R} \mathbf{C}_{x_0} \mathbf{R}^T + \mathbf{C}_y)^{-1} (\mathbf{y} -
\mathbf{R} \mathbf{x_0})

and

.. math::
\mathbf{C}_x = \mathbf{C}_{x_0} - \mathbf{C}_{x_0} \mathbf{R}^T
(\mathbf{R} \mathbf{C}_x \mathbf{R}^T + \mathbf{C}_y)^{-1}
\mathbf{R} \mathbf{C}_{x_0}

"""
import matplotlib.pyplot as plt

Expand Down Expand Up @@ -80,22 +87,26 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft):
sigmaa = [0.1, 0.5, 0.6]
phi0 = [-90.0, 0.0, 0.0]
sigmaphi = [0.1, 0.2, 0.4]
sigmad = 1e-2
sigmad = 1
scaling = 100 # Scale by a factor to allow noise std=1

# Prior models
nt = 200
nfft = 2**11
dt = 0.004
t = np.arange(nt) * dt
xs = np.array(
xs = scaling * np.array(
[
prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft)
for _ in range(nreals)
]
)

# True model (taken as one possible realization)
x = prior_realization(f0, a0, phi0, [0, 0, 0], [0, 0, 0], [0, 0, 0], dt, nt, nfft)
x = scaling * prior_realization(
f0, a0, phi0, [0, 0, 0], [0, 0, 0], [0, 0, 0], dt, nt, nfft
)


###############################################################################
# We have now a set of prior models in time domain. We can easily use sample
Expand All @@ -110,7 +121,7 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft):
N = 30 # lenght of decorrelation
diags = np.array([Cm[i, i - N : i + N + 1] for i in range(N, nt - N)])
diag_ave = np.average(diags, axis=0)
# add a taper at the end to avoid edge effects
# add a taper at the start and end to avoid edge effects
diag_ave *= np.hamming(2 * N + 1)

fig, ax = plt.subplots(1, 1, figsize=(12, 4))
Expand Down Expand Up @@ -157,65 +168,107 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft):
ynmask = Rop.mask(x + n)

###############################################################################
# First we apply the Bayesian inversion equation
xbayes = x0 + Cm_op * Rop.H * (
# First, since the problem is rather small, we construct the dense version of
# all our matrices and we compute the analytical posterior mean and covariance

Cm = Cm_op.todense()
Cd = Cd_op.todense()
R = Rop.todense()

# Bayesian analytical solution
xpost_ana = x0 + Cm @ R.T @ (np.linalg.solve(R @ Cm @ R.T + Cd, yn - R @ x0))
Cmpost_ana = Cm - Cm @ R.T @ (np.linalg.solve(R @ Cm @ R.T + Cd, R @ Cm))

###############################################################################
# Next we solve the same Bayesian inversion equation iteratively. We will see
# that provided we use enough iterations we can retrieve the same values of
# the analytical posterior mean
xpost_iter = x0 + Cm_op * Rop.H * (
lsqr(Rop * Cm_op * Rop.H + Cd_op, yn - Rop * x0, iter_lim=400)[0]
)

# Visualize
###############################################################################
# But what is the problem did not allow creating dense matrices for both the
# operator and the input covariance matrices. In this case, we can resort to the
# Randomize-Then-Optimize algorithm of Bardsley et al., 2014, which simply solves
# the same problem that we solved to find the MAP solution repeatedly by adding
# random noise to the data. It can be shown that the sample mean and covariance
# of the solutions of the different perturbed problems provide a good
# approximation for the true posterior mean and covariance.

# RTO number of solutions
nreals = 1000

xrto = []
for ireal in range(nreals):
yreal = yn + Rop * np.random.normal(0, sigmad, nt)
xrto.append(
x0
+ Cm_op
* Rop.H
* (lsqr(Rop * Cm_op * Rop.H + Cd_op, yreal - Rop * x0, iter_lim=400))[0]
)

xrto = np.array(xrto)
xpost_rto = np.average(xrto, axis=0)
Cmpost_rto = ((xrto - xpost_rto).T @ (xrto - xpost_rto)) / nreals

###############################################################################
# Finally we visualize the different results

# Means
fig, ax = plt.subplots(1, 1, figsize=(12, 5))
ax.plot(t, x, "k", lw=6, label="true")
ax.plot(t, xpost_ana, "r", lw=7, label="bayesian inverse (ana)")
ax.plot(t, xpost_iter, "g", lw=5, label="bayesian inverse (iter)")
ax.plot(t, xpost_rto, "b", lw=3, label="bayesian inverse (rto)")
ax.plot(t, ymask, ".k", ms=25, label="available samples")
ax.plot(t, ynmask, ".r", ms=25, label="available noisy samples")
ax.plot(t, xbayes, "r", lw=3, label="bayesian inverse")
ax.legend()
ax.set_title("Signal")
ax.set_title("Mean reconstruction")
ax.set_xlim(0, 0.8)
plt.tight_layout()

###############################################################################
# So far we have been able to estimate our posterion mean. What about its
# uncertainties (i.e., posterion covariance)?
#
# In real-life applications it is very difficult (if not impossible)
# to directly compute the posterior covariance matrix. It is much more
# useful to create a set of models that sample the posterion probability.
# We can do that by solving our problem several times using different prior
# realizations as starting guesses:

xpost = [
x0
+ Cm_op
* Rop.H
* (lsqr(Rop * Cm_op * Rop.H + Cd_op, yn - Rop * x0, iter_lim=400)[0])
for x0 in xs[:30]
]
xpost = np.array(xpost)

x0post = np.average(xpost, axis=0)
Cm_post = ((xpost - x0post).T @ (xpost - x0post)) / nreals

# Visualize
# RTO realizations
fig, ax = plt.subplots(1, 1, figsize=(12, 5))
ax.plot(t, x, "k", lw=6, label="true")
ax.plot(t, xpost.T, "--r", lw=1)
ax.plot(t, x0post, "r", lw=3, label="bayesian inverse")
ax.plot(t, xrto[::10].T, "--b", lw=0.5)
ax.plot(t, xpost_rto, "b", lw=3, label="bayesian inverse (rto)")
ax.plot(t, ymask, ".k", ms=25, label="available samples")
ax.plot(t, ynmask, ".r", ms=25, label="available noisy samples")
ax.legend()
ax.set_title("Signal")
ax.set_title("RTO realizations")
ax.set_xlim(0, 0.8)

fig, ax = plt.subplots(1, 1, figsize=(5, 4))
im = ax.imshow(
Cm_post, interpolation="nearest", cmap="seismic", extent=(t[0], t[-1], t[-1], t[0])
# Covariances
fig, axs = plt.subplots(1, 2, figsize=(12, 4))
axs[0].imshow(
Cmpost_ana,
interpolation="nearest",
cmap="seismic",
vmin=-5e-1,
vmax=2,
extent=(t[0], t[-1], t[-1], t[0]),
)
axs[0].set_title(r"$\mathbf{C}_m^{post,ANA}$")
axs[0].axis("tight")

axs[1].imshow(
Cmpost_rto,
interpolation="nearest",
cmap="seismic",
vmin=-5e-1,
vmax=2,
extent=(t[0], t[-1], t[-1], t[0]),
)
ax.set_title(r"$\mathbf{C}_m^{posterior}$")
ax.axis("tight")
axs[1].set_title(r"$\mathbf{C}_m^{post,RTO}$")
axs[1].axis("tight")
plt.tight_layout()

###############################################################################
# Note that here we have been able to compute a sample posterior covariance
# from its estimated samples. By displaying it we can see how both the overall
# from its estimated samples. By displaying it we can see how both the overall
# variances and the correlation between different parameters have become
# narrower compared to their prior counterparts.
# narrower compared to their prior counterparts. Moreover, whilst the RTO
# covariance seems to be slightly under-estimated, this represents an appealing
# alternative to the closed-form solution for large-scale problems under
# Gaussian assumptions.