diff --git a/docs/source/acknowledgements.rst b/docs/source/acknowledgements.rst
index abef5dc8..9a94fb73 100644
--- a/docs/source/acknowledgements.rst
+++ b/docs/source/acknowledgements.rst
@@ -103,19 +103,6 @@ and in the supporting information for
   Journal of Geophysical Research: Oceans, 125(11), e2020JC016370, 2020,
   doi: 10.1029/2020JC016370
 
-L-BFGS
-``````
-
-The file `tlm_adjoint/optimization.py
-<autoapi/tlm_adjoint/optimization/index.html>`_ includes an implementation of
-the L-BFGS algorithm, described in
-
-- Jorge Nocedal and Stephen J. Wright, 'Numerical optimization', Springer, New
-  York, NY, 2006, Second edition, doi: 10.1007/978-0-387-40065-5
-- Richard H. Byrd, Peihuang Lu, and Jorge Nocedal, and Ciyou Zhu, 'A limited
-  memory algorithm for bound constrained optimization', SIAM Journal on
-  Scientific Computing, 16(5), 1190--1208, 1995, doi: 10.1137/0916069
-
 PyTorch
 ```````
 
diff --git a/docs/source/examples/8_hessian_uq.ipynb b/docs/source/examples/8_hessian_uq.ipynb
index 7a25a332..ccb4504d 100644
--- a/docs/source/examples/8_hessian_uq.ipynb
+++ b/docs/source/examples/8_hessian_uq.ipynb
@@ -287,7 +287,7 @@
    "source": [
     "optimizer = TAOSolver(lambda m: forward(m)[2], space, H_0_action=B_action,\n",
     "                      solver_parameters={\"tao_type\": \"lmvm\",\n",
-    "                                         \"tao_gatol\": 1.0e-4,\n",
+    "                                         \"tao_gatol\": 1.0e-5,\n",
     "                                         \"tao_grtol\": 0.0,\n",
     "                                         \"tao_gttol\": 0.0,\n",
     "                                         \"tao_monitor\": None})\n",
diff --git a/tests/fenics/test_minimize.py b/tests/fenics/test_minimize.py
index cb05141f..e056aff5 100644
--- a/tests/fenics/test_minimize.py
+++ b/tests/fenics/test_minimize.py
@@ -26,12 +26,6 @@ def scipy_trust_ncg_minimization(forward, M0):
     return M
 
 
-def l_bfgs_minimization(forward, M0):
-    M, _ = minimize_l_bfgs(
-        forward, M0, s_atol=0.0, g_atol=1.0e-11)
-    return M
-
-
 def tao_lmvm_minimization(forward, m0):
     return minimize_tao(forward, m0,
                         solver_parameters={"tao_type": "lmvm",
@@ -51,7 +45,6 @@ def tao_nls_minimization(forward, m0):
 @pytest.mark.fenics
 @pytest.mark.parametrize("minimize", [scipy_l_bfgs_b_minimization,
                                       scipy_trust_ncg_minimization,
-                                      pytest.param(l_bfgs_minimization, marks=pytest.mark.xfail),  # noqa: E501
                                       tao_lmvm_minimization,
                                       pytest.param(tao_nls_minimization, marks=pytest.mark.xfail)])  # noqa: E501
 @pytest.mark.skipif(complex_mode, reason="real only")
@@ -96,7 +89,6 @@ def forward_J(alpha):
 @pytest.mark.fenics
 @pytest.mark.parametrize("minimize", [scipy_l_bfgs_b_minimization,
                                       scipy_trust_ncg_minimization,
-                                      pytest.param(l_bfgs_minimization, marks=pytest.mark.xfail),  # noqa: E501
                                       tao_lmvm_minimization,
                                       pytest.param(tao_nls_minimization, marks=pytest.mark.xfail)])  # noqa: E501
 @pytest.mark.skipif(complex_mode, reason="real only")
@@ -151,155 +143,3 @@ def forward_J(alpha, beta):
     var_assign(error, beta_ref)
     var_axpy(error, -1.0, beta)
     assert var_linf_norm(error) < 1.0e-8
-
-
-@pytest.mark.fenics
-@pytest.mark.skipif(complex_mode, reason="real only")
-@pytest.mark.xfail
-@seed_test
-def test_l_bfgs_single(setup_test, test_leaks):
-    mesh = UnitSquareMesh(3, 3)
-    X = SpatialCoordinate(mesh)
-    space = FunctionSpace(mesh, "Lagrange", 1)
-    test = TestFunction(space)
-    M_l = Function(space, name="M_l", space_type="conjugate_dual")
-    assemble(test * dx, tensor=M_l)
-
-    x_star = Function(space, name="x_star")
-    interpolate_expression(x_star, sin(pi * X[0]) * sin(2.0 * pi * X[1]))
-
-    def F(x):
-        check_space_type(x, "primal")
-        return assemble(0.5 * inner(x - x_star, x - x_star) * dx)
-
-    def Fp(x):
-        check_space_type(x, "primal")
-        Fp = Function(space, name="Fp", space_type="conjugate_dual")
-        assemble(inner(x - x_star, test) * dx, tensor=Fp)
-        return Fp
-
-    def H_0_action(x):
-        check_space_type(x, "conjugate_dual")
-        H_0_action = Function(space, name="H_0_action")
-        var_set_values(H_0_action,
-                       var_get_values(x)
-                       / var_get_values(M_l))
-        return H_0_action
-
-    def B_0_action(x):
-        check_space_type(x, "primal")
-        B_0_action = Function(space, name="B_0_action",
-                              space_type="conjugate_dual")
-        var_set_values(B_0_action,
-                       var_get_values(x)
-                       * var_get_values(M_l))
-        return B_0_action
-
-    x0 = Function(space, name="x0")
-    x, (its, F_calls, Fp_calls, _) = l_bfgs(
-        F, Fp, x0, m=30, s_atol=0.0, g_atol=1.0e-12,
-        H_0_action=H_0_action, M_action=B_0_action, M_inv_action=H_0_action)
-
-    error = var_copy(x, name="error")
-    var_axpy(error, -1.0, x_star)
-    error_norm = var_linf_norm(error)
-    info(f"{error_norm=:.6e}")
-    info(f"{F_calls=:d}")
-    info(f"{Fp_calls=:d}")
-
-    assert abs(F(x)) < 1.0e-25
-    assert error_norm < 1.0e-12
-    assert its == 15
-    assert F_calls == 17
-    assert Fp_calls == 17
-
-
-@pytest.mark.fenics
-@pytest.mark.skipif(complex_mode, reason="real only")
-@pytest.mark.xfail
-@seed_test
-def test_l_bfgs_multiple(setup_test, test_leaks):
-    mesh = UnitSquareMesh(3, 3)
-    X = SpatialCoordinate(mesh)
-    space_x = FunctionSpace(mesh, "Lagrange", 1)
-    space_y = FunctionSpace(mesh, "Discontinuous Lagrange", 1)
-    test_x = TestFunction(space_x)
-    test_y = TestFunction(space_y)
-    M_l_x = Function(space_x, name="M_l_x", space_type="conjugate_dual")
-    M_l_y = Function(space_y, name="M_l_y", space_type="conjugate_dual")
-    assemble(test_x * dx, tensor=M_l_x)
-    assemble(test_y * dx, tensor=M_l_y)
-
-    x_star = Function(space_x, name="x_star")
-    interpolate_expression(x_star, sin(pi * X[0]) * sin(2.0 * pi * X[1]))
-    y_star = Function(space_y, name="y_star")
-    interpolate_expression(y_star, exp(X[0]) * exp(X[1]))
-    alpha_y = (1.0 + X[0]) * (1.0 + X[0])
-
-    def F(x, y):
-        check_space_type(x, "primal")
-        check_space_type(y, "primal")
-        return assemble(0.5 * inner(x - x_star, x - x_star) * dx
-                        + 0.5 * inner(y - y_star, alpha_y * (y - y_star)) * dx)
-
-    def Fp(x, y):
-        check_space_type(x, "primal")
-        check_space_type(y, "primal")
-        Fp = (Function(space_x, name="Fp_0", space_type="conjugate_dual"),
-              Function(space_y, name="Fp_1", space_type="conjugate_dual"))
-        assemble(inner(x - x_star, test_x) * dx, tensor=Fp[0])
-        assemble(inner(alpha_y * (y - y_star), test_y) * dx, tensor=Fp[1])
-        return Fp
-
-    def H_0_action(x, y):
-        check_space_type(x, "conjugate_dual")
-        check_space_type(y, "conjugate_dual")
-        H_0_action = (Function(space_x, name="H_0_action_0"),
-                      Function(space_y, name="H_0_action_1"))
-        var_set_values(H_0_action[0],
-                       var_get_values(x)
-                       / var_get_values(M_l_x))
-        var_set_values(H_0_action[1],
-                       var_get_values(y)
-                       / var_get_values(M_l_y))
-        return H_0_action
-
-    def B_0_action(x, y):
-        check_space_type(x, "primal")
-        check_space_type(y, "primal")
-        B_0_action = (Function(space_x, name="B_0_action_0",
-                               space_type="conjugate_dual"),
-                      Function(space_y, name="B_0_action_1",
-                               space_type="conjugate_dual"))
-        var_set_values(B_0_action[0],
-                       var_get_values(x)
-                       * var_get_values(M_l_x))
-        var_set_values(B_0_action[1],
-                       var_get_values(y)
-                       * var_get_values(M_l_y))
-        return B_0_action
-
-    x0 = Function(space_x, name="x0")
-    y0 = Function(space_y, name="y0")
-    (x, y), (its, F_calls, Fp_calls, _) = \
-        l_bfgs(F, Fp, (x0, y0), m=30, s_atol=0.0, g_atol=1.0e-12,
-               H_0_action=H_0_action,
-               M_action=B_0_action, M_inv_action=H_0_action)
-
-    x_error = var_copy(x, name="x_error")
-    var_axpy(x_error, -1.0, x_star)
-    x_error_norm = var_linf_norm(x_error)
-    y_error = var_copy(y, name="y_error")
-    var_axpy(y_error, -1.0, y_star)
-    y_error_norm = var_linf_norm(y_error)
-    info(f"{x_error_norm=:.6e}")
-    info(f"{y_error_norm=:.6e}")
-    info(f"{F_calls=:d}")
-    info(f"{Fp_calls=:d}")
-
-    assert abs(F(x, y)) < 1.0e-23
-    assert x_error_norm < 1.0e-11
-    assert y_error_norm < 1.0e-11
-    assert its <= 38
-    assert F_calls <= 42
-    assert Fp_calls <= 42
diff --git a/tests/firedrake/test_hessian_system.py b/tests/firedrake/test_hessian_system.py
index 1b0187a1..db0974ae 100644
--- a/tests/firedrake/test_hessian_system.py
+++ b/tests/firedrake/test_hessian_system.py
@@ -73,10 +73,13 @@ def forward_J(m):
         sin(2.0 * pi * X[0]) * sin(3.0 * pi * X[1]) * exp(4.0 * X[0] * X[1]))
 
     m0 = Function(space, name="m0")
-    m, _ = minimize_l_bfgs(
+    m = minimize_tao(
         forward_J, m0,
-        s_atol=0.0, g_atol=1.0e-7,
-        H_0_action=B, M_action=B_inv, M_inv_action=B)
+        solver_parameters={"tao_type": "lmvm",
+                           "tao_gatol": 1.0e-7,
+                           "tao_grtol": 0.0,
+                           "tao_gttol": 0.0},
+        H_0_action=B)
 
     b_ref = Cofunction(space.dual(), name="b_ref")
     assemble(inner((sin(5.0 * pi * X[0]) * sin(7.0 * pi * X[1])) ** 2, test) * dx,  # noqa: E501
diff --git a/tests/firedrake/test_minimize.py b/tests/firedrake/test_minimize.py
index 313f2447..c172a2dd 100644
--- a/tests/firedrake/test_minimize.py
+++ b/tests/firedrake/test_minimize.py
@@ -26,12 +26,6 @@ def scipy_trust_ncg_minimization(forward, M0):
     return M
 
 
-def l_bfgs_minimization(forward, M0):
-    M, _ = minimize_l_bfgs(
-        forward, M0, s_atol=0.0, g_atol=1.0e-11)
-    return M
-
-
 def tao_lmvm_minimization(forward, m0):
     return minimize_tao(forward, m0,
                         solver_parameters={"tao_type": "lmvm",
@@ -67,7 +61,6 @@ def tao_nls_minimization(forward, m0):
 @pytest.mark.firedrake
 @pytest.mark.parametrize("minimize", [scipy_l_bfgs_b_minimization,
                                       scipy_trust_ncg_minimization,
-                                      l_bfgs_minimization,
                                       tao_lmvm_minimization,
                                       tao_lmvm_minimization_M_inv,
                                       tao_nls_minimization])
@@ -113,7 +106,6 @@ def forward_J(alpha):
 @pytest.mark.firedrake
 @pytest.mark.parametrize("minimize", [scipy_l_bfgs_b_minimization,
                                       scipy_trust_ncg_minimization,
-                                      l_bfgs_minimization,
                                       tao_lmvm_minimization,
                                       tao_nls_minimization])
 @pytest.mark.skipif(complex_mode, reason="real only")
@@ -168,150 +160,3 @@ def forward_J(alpha, beta):
     var_assign(error, beta_ref)
     var_axpy(error, -1.0, beta)
     assert var_linf_norm(error) < 1.0e-8
-
-
-@pytest.mark.firedrake
-@pytest.mark.skipif(complex_mode, reason="real only")
-@seed_test
-def test_l_bfgs_single(setup_test, test_leaks):
-    mesh = UnitSquareMesh(3, 3)
-    X = SpatialCoordinate(mesh)
-    space = FunctionSpace(mesh, "Lagrange", 1)
-    test = TestFunction(space)
-    M_l = Cofunction(space.dual(), name="M_l")
-    assemble(test * dx, tensor=M_l)
-
-    x_star = Function(space, name="x_star")
-    interpolate_expression(x_star, sin(pi * X[0]) * sin(2.0 * pi * X[1]))
-
-    def F(x):
-        check_space_type(x, "primal")
-        return assemble(0.5 * inner(x - x_star, x - x_star) * dx)
-
-    def Fp(x):
-        check_space_type(x, "primal")
-        Fp = Cofunction(space.dual(), name="Fp")
-        assemble(inner(x - x_star, test) * dx, tensor=Fp)
-        return Fp
-
-    def H_0_action(x):
-        check_space_type(x, "conjugate_dual")
-        H_0_action = Function(space, name="H_0_action")
-        var_set_values(H_0_action,
-                       var_get_values(x)
-                       / var_get_values(M_l))
-        return H_0_action
-
-    def B_0_action(x):
-        check_space_type(x, "primal")
-        B_0_action = Cofunction(space.dual(), name="B_0_action")
-        var_set_values(B_0_action,
-                       var_get_values(x)
-                       * var_get_values(M_l))
-        return B_0_action
-
-    x0 = Function(space, name="x0")
-    x, (its, F_calls, Fp_calls, _) = l_bfgs(
-        F, Fp, x0, m=30, s_atol=0.0, g_atol=1.0e-12,
-        H_0_action=H_0_action, M_action=B_0_action, M_inv_action=H_0_action)
-
-    error = var_copy(x, name="error")
-    var_axpy(error, -1.0, x_star)
-    error_norm = var_linf_norm(error)
-    info(f"{error_norm=:.6e}")
-    info(f"{F_calls=:d}")
-    info(f"{Fp_calls=:d}")
-
-    assert abs(F(x)) < 1.0e-25
-    assert error_norm < 1.0e-12
-    assert its == 15
-    assert F_calls == 17
-    assert Fp_calls == 17
-
-
-@pytest.mark.firedrake
-@pytest.mark.skipif(complex_mode, reason="real only")
-@seed_test
-def test_l_bfgs_multiple(setup_test, test_leaks):
-    mesh = UnitSquareMesh(3, 3)
-    X = SpatialCoordinate(mesh)
-    space_x = FunctionSpace(mesh, "Lagrange", 1)
-    space_y = FunctionSpace(mesh, "Discontinuous Lagrange", 1)
-    test_x = TestFunction(space_x)
-    test_y = TestFunction(space_y)
-    M_l_x = Cofunction(space_x.dual(), name="M_l_x")
-    M_l_y = Cofunction(space_y.dual(), name="M_l_y")
-    assemble(test_x * dx, tensor=M_l_x)
-    assemble(test_y * dx, tensor=M_l_y)
-
-    x_star = Function(space_x, name="x_star")
-    interpolate_expression(x_star, sin(pi * X[0]) * sin(2.0 * pi * X[1]))
-    y_star = Function(space_y, name="y_star")
-    interpolate_expression(y_star, exp(X[0]) * exp(X[1]))
-    alpha_y = (1.0 + X[0]) * (1.0 + X[0])
-
-    def F(x, y):
-        check_space_type(x, "primal")
-        check_space_type(y, "primal")
-        return assemble(0.5 * inner(x - x_star, x - x_star) * dx
-                        + 0.5 * inner(y - y_star, alpha_y * (y - y_star)) * dx)
-
-    def Fp(x, y):
-        check_space_type(x, "primal")
-        check_space_type(y, "primal")
-        Fp = (Cofunction(space_x.dual(), name="Fp_0"),
-              Cofunction(space_y.dual(), name="Fp_1"))
-        assemble(inner(x - x_star, test_x) * dx, tensor=Fp[0])
-        assemble(inner(alpha_y * (y - y_star), test_y) * dx, tensor=Fp[1])
-        return Fp
-
-    def H_0_action(x, y):
-        check_space_type(x, "conjugate_dual")
-        check_space_type(y, "conjugate_dual")
-        H_0_action = (Function(space_x, name="H_0_action_0"),
-                      Function(space_y, name="H_0_action_1"))
-        var_set_values(H_0_action[0],
-                       var_get_values(x)
-                       / var_get_values(M_l_x))
-        var_set_values(H_0_action[1],
-                       var_get_values(y)
-                       / var_get_values(M_l_y))
-        return H_0_action
-
-    def B_0_action(x, y):
-        check_space_type(x, "primal")
-        check_space_type(y, "primal")
-        B_0_action = (Cofunction(space_x.dual(), name="B_0_action_0"),
-                      Cofunction(space_y.dual(), name="B_0_action_1"))
-        var_set_values(B_0_action[0],
-                       var_get_values(x)
-                       * var_get_values(M_l_x))
-        var_set_values(B_0_action[1],
-                       var_get_values(y)
-                       * var_get_values(M_l_y))
-        return B_0_action
-
-    x0 = Function(space_x, name="x0")
-    y0 = Function(space_y, name="y0")
-    (x, y), (its, F_calls, Fp_calls, _) = \
-        l_bfgs(F, Fp, (x0, y0), m=30, s_atol=0.0, g_atol=1.0e-12,
-               H_0_action=H_0_action,
-               M_action=B_0_action, M_inv_action=H_0_action)
-
-    x_error = var_copy(x, name="x_error")
-    var_axpy(x_error, -1.0, x_star)
-    x_error_norm = var_linf_norm(x_error)
-    y_error = var_copy(y, name="y_error")
-    var_axpy(y_error, -1.0, y_star)
-    y_error_norm = var_linf_norm(y_error)
-    info(f"{x_error_norm=:.6e}")
-    info(f"{y_error_norm=:.6e}")
-    info(f"{F_calls=:d}")
-    info(f"{Fp_calls=:d}")
-
-    assert abs(F(x, y)) < 1.0e-23
-    assert x_error_norm < 1.0e-11
-    assert y_error_norm < 1.0e-10
-    assert its <= 39
-    assert F_calls <= 42
-    assert Fp_calls <= 42
diff --git a/tlm_adjoint/hessian.py b/tlm_adjoint/hessian.py
index a74c26dd..77712f6e 100644
--- a/tlm_adjoint/hessian.py
+++ b/tlm_adjoint/hessian.py
@@ -83,7 +83,7 @@ class GeneralHessian(Hessian):
     ----------
 
     forward : callable
-        Accepts one or more variable arguments, and returns a variable
+        Accepts one or more variable arguments, and returns a scalar variable
         defining the functional.
     manager : :class:`.EquationManager`
         Used to create an internal manager via :meth:`.EquationManager.new`.
diff --git a/tlm_adjoint/optimization.py b/tlm_adjoint/optimization.py
index 63d828bf..96198a2a 100644
--- a/tlm_adjoint/optimization.py
+++ b/tlm_adjoint/optimization.py
@@ -1,10 +1,8 @@
 from .interface import (
-    Packed, comm_dup_cached, garbage_cleanup, is_var, packed,
-    paused_space_type_checking, space_dtype, var_axpy, var_comm, var_copy,
-    var_dtype, var_get_values, var_is_cached, var_is_static, var_linf_norm,
-    var_local_size, var_locked, var_new, var_scalar_value, var_set_values,
-    var_space, vars_assign, vars_axpy, vars_copy, vars_inner, vars_new,
-    vars_new_conjugate_dual)
+    Packed, comm_dup_cached, garbage_cleanup, is_var, packed, space_dtype,
+    var_axpy, var_copy, var_dtype, var_get_values, var_is_cached,
+    var_is_static, var_linf_norm, var_local_size, var_locked, var_new,
+    var_new_conjugate_dual, var_scalar_value, var_set_values, var_space)
 
 from .caches import clear_caches, local_caches
 from .hessian import GeneralHessian as Hessian
@@ -16,10 +14,8 @@
     compute_gradient, manager_disabled, reset_manager, restore_manager,
     set_manager, start_manager, stop_manager)
 
-from collections import deque
 import contextlib
 from functools import cached_property, wraps
-import logging
 import numbers
 import numpy as np
 
@@ -27,11 +23,7 @@
     [
         "minimize_scipy",
 
-        "LBFGSHessianApproximation",
         "TAOSolver",
-        "l_bfgs",
-        "line_search",
-        "minimize_l_bfgs",
         "minimize_tao"
     ]
 
@@ -90,7 +82,7 @@ def objective(self, M, *,
             M = tuple(var_copy(m, static=var_is_static(m),
                                cache=var_is_cached(m))
                       for m in M)
-            M_val = vars_copy(M)
+            M_val = tuple(map(var_copy, M))
 
             reset_manager()
             clear_caches()
@@ -160,26 +152,35 @@ def duplicated_comm(comm):
 @local_caches
 def minimize_scipy(forward, M0, *,
                    manager=None, **kwargs):
-    """Provides an interface with :func:`scipy.optimize.minimize` for
-    gradient-based optimization.
-
-    Note that the control is gathered onto the root process so that the serial
-    :func:`scipy.optimize.minimize` function may be used.
-
-    All keyword arguments except for `manager` are passed to
-    :func:`scipy.optimize.minimize`.
-
-    :arg forward: A callable which accepts one or more variable arguments, and
-        which returns a variable defining the forward functional.
-    :arg M0: A variable or :class:`Sequence` of variables defining the control,
-        and the initial guess for the optimization.
-    :arg manager: An :class:`.EquationManager` used to create an internal
-        manager via :meth:`.EquationManager.new`. `manager()` is used if not
-        supplied.
-    :returns: A :class:`tuple` `(M, return_value)`. `M` is variable or a
-        :class:`Sequence` of variables depending on the type of `M0`, and
-        stores the result. `return_value` is the return value of
-        :func:`scipy.optimize.minimize`.
+    """Minimize a functional using :func:`scipy.optimize.minimize`.
+
+    Warnings
+    --------
+
+    Data is gathered onto the root process (process zero) so that the serial
+    :func:`scipy.optimize.minimize` can be used.
+
+    Parameters
+    ----------
+
+    forward : callable
+        Accepts one or more variable arguments, and returns a scalar variable
+        defining the functional.
+    M0 : variable or Sequence[variable, ...]
+        The initial guess.
+    manager : :class:`.EquationManager`
+        Used to create an internal manager via :meth:`.EquationManager.new`.
+        `manager()` is used if not supplied.
+    kwargs
+        Passed to :func:`scipy.optimize.minimize`.
+
+    Returns
+    -------
+
+    M : variable or Sequence[variable, ...]
+        The result of the minimization
+    minimize_return_value
+        The return value from :func:`scipy.optimize.minimize`.
     """
 
     M0_packed = Packed(M0)
@@ -298,13 +299,6 @@ def hessp_bcast(x, p):
         return M0_packed.unpack(M), return_value
 
 
-def conjugate_dual_identity_action(*X):
-    M_X = vars_new_conjugate_dual(X)
-    with paused_space_type_checking():
-        vars_assign(M_X, X)
-    return M_X
-
-
 def wrapped_action(M):
     M_arg = M
 
@@ -320,622 +314,34 @@ def M(*X):
     return M
 
 
-class LBFGSHessianApproximation:
-    """L-BFGS Hessian approximation.
-
-    :arg m: Maximum number of vector pairs to retain in the L-BFGS Hessian
-        approximation.
-    """
-
-    def __init__(self, m):
-        self._iterates = deque(maxlen=m)
-
-    def append(self, S, Y, S_inner_Y):
-        """Add a step + gradient change pair.
-
-        :arg S: A variable or a :class:`Sequence` of variables defining the
-            step.
-        :arg Y: A variable or a :class:`Sequence` of variables defining the
-            gradient change.
-        :arg S_inner_Y: The projection of the gradient change onto the step.
-            A separate argument so that a value consistent with
-            that used in the line search can be supplied.
-        """
-
-        S = packed(S)
-        Y = packed(Y)
-        if len(S) != len(Y):
-            raise ValueError("Incompatible shape")
-        for s in S:
-            if not issubclass(var_dtype(s), np.floating):
-                raise ValueError("Invalid dtype")
-        for y in Y:
-            if not issubclass(var_dtype(y), np.floating):
-                raise ValueError("Invalid dtype")
-        if S_inner_Y <= 0.0:
-            raise ValueError("Invalid S_inner_Y")
-
-        rho = 1.0 / S_inner_Y
-        self._iterates.append((rho, vars_copy(S), vars_copy(Y)))
-
-    def inverse_action(self, X, *,
-                       H_0_action=None, theta=1.0):
-        """Compute the action of the approximate Hessian inverse on some given
-        direction.
-
-        Implements the L-BFGS Hessian inverse action approximation as in
-        Algorithm 7.4 of
-
-            - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization',
-              Springer, New York, NY, 2006, Second edition,
-              doi: 10.1007/978-0-387-40065-5
-
-        Uses 'theta scaling' as in equation (3.7) of
-
-            - Richard H. Byrd, Peihuang Lu, and Jorge Nocedal, and Ciyou Zhu,
-              'A limited memory algorithm for bound constrained optimization',
-              SIAM Journal on Scientific Computing, 16(5), 1190--1208, 1995,
-              doi: 10.1137/0916069
-
-        :arg X: A variable or a :class:`Sequence` of variables defining the
-            direction on which to compute the approximate Hessian inverse
-            action.
-        :arg H_0_action: A callable defining the action of the non-updated
-            Hessian inverse approximation on some direction. Accepts one or
-            more variables as arguments, defining the direction, and returns a
-            variable or a :class:`Sequence` of variables defining the action on
-            this direction. Should correspond to a positive definite operator.
-            Arguments should not be modified. An identity is used if not
-            supplied.
-        :returns: A variable or a :class:`Sequence` of variables storing the
-            result.
-        """
-
-        X_packed = Packed(X)
-        X = vars_copy(X_packed)
-
-        if H_0_action is None:
-            H_0_action = wrapped_action(conjugate_dual_identity_action)
-        else:
-            H_0_action = wrapped_action(H_0_action)
-
-        alphas = []
-        for rho, S, Y in reversed(self._iterates):
-            alpha = rho * vars_inner(S, X)
-            vars_axpy(X, -alpha, Y)
-            alphas.append(alpha)
-        alphas.reverse()
-
-        R = vars_copy(H_0_action(*X))
-        if theta != 1.0:
-            for r in R:
-                var_set_values(r, var_get_values(r) / theta)
-
-        assert len(self._iterates) == len(alphas)
-        for (rho, S, Y), alpha in zip(self._iterates, alphas):
-            beta = rho * vars_inner(R, Y)
-            vars_axpy(R, alpha - beta, S)
-
-        return X_packed.unpack(R)
-
-
-def line_search(F, Fp, X, minus_P, *,
-                c1=1.0e-4, c2=0.9,
-                old_F_val=None, old_Fp_val=None,
-                comm=None):
-    """Line search using TAO.
-
-    Uses the `PETSc.TAOLineSearch.Type.MORETHUENTE` line search type, yielding
-    a step which satisfies the Wolfe conditions (and the strong curvature
-    condition). See
-
-        - Jorge J. Moré and David J. Thuente, 'Line search algorithms with
-          guaranteed sufficient decrease', ACM Transactions on Mathematical
-          Software 20(3), 286--307, 1994, doi: 10.1145/192115.192132
-
-    :arg F: A callable defining the functional. Accepts one or more variables
-        as arguments, and returns the value of the functional. Arguments should
-        not be modified.
-    :arg Fp: A callable defining the functional gradient. Accepts one or more
-        variables as inputs, and returns a variable or :class:`Sequence` of
-        variables storing the value of the gradient. Arguments should not be
-        modified.
-    :arg X: A variable or a :class:`Sequence` of variables defining the
-        starting point for the line search.
-    :arg minus_P: A variable or a :class:`Sequence` of variables defining the
-        *negative* of the line search direction.
-    :arg c1: Armijo condition parameter. :math:`c_1` in equation (3.6a) of
-
-            - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization',
-              Springer, New York, NY, 2006, Second edition,
-              doi: 10.1007/978-0-387-40065-5
-
-    :arg c2: Curvature condition parameter. :math:`c_2` in equation (3.6b) of
-
-            - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization',
-              Springer, New York, NY, 2006, Second edition,
-              doi: 10.1007/978-0-387-40065-5
-
-    :arg old_F_val: The value of `F` at the starting point of the line search.
-    :arg old_Fp_val: The value of `Fp` at the starting point of the line
-        search.
-    :arg comm: A communicator.
-    :returns: A :class:`tuple` `(alpha, new_F_val, new_Fp_val)`
-        where
-
-        - `alpha`: Defines the step size. The step is given by the product of
-          `alpha` with `-minus_P` (noting the *negative* sign for the latter).
-        - `new_F_val`: The new value of `F`.
-        - `new_Fp_val`: The new value of `Fp`.
-    """
-
-    import petsc4py.PETSc as PETSc
-
-    F_arg = F
-
-    def F(*X):
-        with var_locked(*X):
-            return F_arg(*X)
-
-    Fp = wrapped_action(Fp)
-    X = packed(X)
-    minus_P = packed(minus_P)
-    if old_F_val is None:
-        old_F_val = F(*X)
-    if old_Fp_val is None:
-        old_Fp_val = Fp(*X)
-    else:
-        old_Fp_val = packed(old_Fp_val)
-
-    if comm is None:
-        comm = var_comm(X[0])
-    comm = comm_dup_cached(comm, key="tao")
-
-    vec_interface = PETScVecInterface(tuple(map(var_space, X)),
-                                      dtype=PETSc.RealType, comm=comm)
-    to_petsc, from_petsc = vec_interface.to_petsc, vec_interface.from_petsc
-
-    Y = tuple(var_new(x, static=var_is_static(x), cache=var_is_cached(x))
-              for x in X)
-
-    def objective(taols, x):
-        from_petsc(x, Y)
-        F_val = F(*Y) - old_F_val
-        return F_val
-
-    def gradient(taols, x, g):
-        from_petsc(x, Y)
-        dJ = Fp(*Y)
-        to_petsc(g, dJ)
-
-    def objective_gradient(taols, x, g):
-        from_petsc(x, Y)
-        F_val = F(*Y) - old_F_val
-        dJ = Fp(*Y)
-        to_petsc(g, dJ)
-        return F_val
-
-    taols = PETSc.TAOLineSearch().create(comm=comm)
-    taols.setObjective(objective)
-    taols.setGradient(gradient)
-    taols.setObjectiveGradient(objective_gradient)
-    taols.setType(PETSc.TAOLineSearch.Type.MORETHUENTE)
-
-    options = PETScOptions(f"_tlm_adjoint__{taols.name:s}_")
-    options["tao_ls_ftol"] = c1
-    options["tao_ls_gtol"] = c2
-    taols.setOptionsPrefix(options.options_prefix)
-
-    taols.setFromOptions()
-    taols.setUp()
-
-    x = vec_interface.new_vec()
-    x.to_petsc(X)
-
-    g = vec_interface.new_vec()
-    g.to_petsc(old_Fp_val)
-
-    s = vec_interface.new_vec()
-    s.to_petsc(minus_P)
-    s.vec.scale(-1.0)
-
-    try:
-        phi, alpha, reason = taols.apply(x.vec, g.vec, s.vec)
-        if reason != PETSc.TAOLineSearch.Reason.SUCCESS:
-            raise RuntimeError("Line search failure")
-    finally:
-        taols.destroy()
+class TAOSolver:
+    """Functional minimization using TAO.
 
-    new_Fp_val = vars_new_conjugate_dual(X)
-    g.from_petsc(new_Fp_val)
-
-    return alpha, phi + old_F_val, new_Fp_val
-
-
-def l_bfgs(F, Fp, X0, *,
-           m=30, s_atol, g_atol, converged=None, max_its=1000,
-           H_0_action=None, theta_scale=True, delta=1.0,
-           M_action=None, M_inv_action=None,
-           c1=1.0e-4, c2=0.9,
-           comm=None):
-    r"""Functional minimization using the L-BFGS algorithm.
-
-    Implements Algorithm 7.5 of
-
-        - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization',
-          Springer, New York, NY, 2006, Second edition,
-          doi: 10.1007/978-0-387-40065-5
-
-    in a more general inner product space.
-
-    By default uses 'theta scaling' to define the initial Hessian inverse
-    approximation, based on the approach in equation (3.7) and point 7 on p.
-    1204 of
-
-        - Richard H. Byrd, Peihuang Lu, and Jorge Nocedal, and Ciyou Zhu, 'A
-          limited memory algorithm for bound constrained optimization', SIAM
-          Journal on Scientific Computing, 16(5), 1190--1208, 1995,
-          doi: 10.1137/0916069
-
-    and with an initial value for the scaling parameter based on the discussion
-    in 'Implementation' in section 6.1 of
-
-        - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization',
-          Springer, New York, NY, 2006, Second edition,
-          doi: 10.1007/978-0-387-40065-5
-
-    Precisely the Hessian inverse approximation, before being updated, is
-    scaled by :math:`1 / \theta` with, on the first iteration,
-
-    .. math::
-
-        \theta = \frac{\sqrt{\left| g_k^* M^{-1} g_k \right|}}{\delta}
-
-    and on later iterations
-
-    .. math::
-
-        \theta = \frac{y_k^* H_0 y_k}{y_k^* s_k},
-
-    where :math:`g_k`, :math:`y_k`, and :math:`s_k` are respectively the
-    gradient, gradient change, and step, and where :math:`M^{-1}` and
-    :math:`H_0` are defined by `M_inv_action` and `H_0_action` respectively.
-
-    The line search is performed using :func:`.line_search`.
-
-    :arg F: A callable defining the functional. Accepts one or more variables
-        as arguments, and returns the value of the functional. Arguments should
-        not be modified.
-    :arg Fp: A callable defining the functional gradient. Accepts one or more
-        variables as inputs, and returns a variable or :class:`Sequence` of
-        variables storing the value of the gradient. Arguments should not be
-        modified.
-    :arg X0: A variable or a :class:`Sequence` of variables defining the
-        initial guess for the parameters.
-    :arg m: The maximum number of step + gradient change pairs to use in the
-        Hessian inverse approximation.
-    :arg s_atol: Absolute tolerance for the step change norm convergence
-        criterion.
-    :arg g_atol: Absolute tolerance for the gradient norm convergence
-        criterion.
-    :arg converged: A callable defining a callback, and which can be used to
-        define custom convergence criteria. Takes the form
-
-        .. code-block:: python
-
-            def converged(it, F_old, F_new, X_new, G_new, S, Y):
-
-        with
-
-            - `it`: The iteration number.
-            - `F_old`: The previous value of the functional.
-            - `F_new`: The new value of the functional.
-            - `X_new`: A variable or a :class:`Sequence` of variables defining
-              the new value of the parameters.
-            - `G_new`: A variable or a :class:`Sequence` of variables defining
-              the new value of the gradient.
-            - `S`: A variable or a :class:`Sequence` of variables defining the
-              step.
-            - `Y`: A variable or a sequence of variables defining the gradient
-              change.
-
-        Input variables should not be modified. Returns a :class:`bool`
-        indicating whether the optimization has converged.
-    :arg max_its: The maximum number of iterations.
-    :arg H_0_action: A callable defining the action of the non-updated Hessian
-        inverse approximation on some direction. Accepts one or more variables
-        as arguments, defining the direction, and returns a variable or a
-        :class:`Sequence` of variables defining the action on this direction.
-        Should correspond to a positive definite operator. Arguments should not
-        be modified. An identity is used if not supplied.
-    :arg theta_scale: Whether to apply 'theta scaling', discussed above.
-    :arg delta: Controls the initial value of :math:`\theta` in 'theta
-        scaling'. If `None` then on the first iteration :math:`\theta` is set
-        equal to one.
-    :arg M_action: A callable defining a primal space inner product,
-
-        .. math::
-
-            \left< x, y \right>_M = y^* M x,
-
-        where :math:`x` and :math:`y` are degree of freedom vectors for primal
-        space elements and :math:`M` is a Hermitian and positive definite
-        matrix. Accepts one or more variables as arguments, defining the
-        direction, and returns a variable or a :class:`Sequence` of variables
-        defining the action of :math:`M` on this direction. An identity is used
-        if not supplied. Arguments should not be modified. Required if
-        `H_0_action` or `M_inv_action` are supplied.
-    :arg M_inv_action: A callable defining a (conjugate) dual space inner
-        product,
-
-        .. math::
-
-            \left< x, y \right>_{M^{-1}} = y^* M^{-1} x,
-
-        where :math:`x` and :math:`y` are degree of freedom vectors for
-        (conjugate) dual space elements and :math:`M` is as for `M_action`.
-        Accepts one or more variables as arguments, defining the direction, and
-        returns a variable or a :class:`Sequence` of variables defining the
-        action of :math:`M^{-1}` on this direction. Arguments should not be
+    Parameters
+    ----------
+
+    forward : callable
+        Accepts one or more variable arguments, and returns a scalar variable
+        defining the functional.
+    space : space or Sequence[space, ...]
+        The control space.
+    solver_parameters : Mapping
+        TAO solver parameters.
+    H_0_action : callable
+        Defines the initial Hessian inverse approximation. Accepts one or more
+        variables as arguments, defining a direction, and returns a variable or
+        a :class:`Sequence` of variables defining the (conjugate of) the action
+        of an initial Hessian inverse approximation on this direction.
+        Arguments should not be modified.
+    M_inv_action : callable
+        Defines a dual space norm. Accepts one or more variables as arguments,
+        defining a direction, and returns a variable or a :class:`Sequence` of
+        variables defining the (conjugate of) the action of a Hermitian and
+        positive definite matrix on this direction. Arguments should not be
         modified. `H_0_action` is used if not supplied.
-    :arg c1: Armijo condition parameter. :math:`c_1` in equation (3.6a) of
-
-            - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization',
-              Springer, New York, NY, 2006, Second edition,
-              doi: 10.1007/978-0-387-40065-5
-
-    :arg c2: Curvature condition parameter. :math:`c_2` in equation (3.6b) of
-
-            - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization',
-              Springer, New York, NY, 2006, Second edition,
-              doi: 10.1007/978-0-387-40065-5
-
-    :arg comm: A communicator.
-    :returns: A :class:`tuple` `(X, (it, F_calls, Fp_calls, hessian_approx))`
-        with
-
-        - `X`: The solution. A variable or a :class:`tuple` of variables.
-        - `it`: The number of iterations.
-        - `F_calls`: The number of functional evaluations.
-        - `Fp_calls`: The number of gradient evaluations.
-        - `hessian_approx`: The :class:`.LBFGSHessianApproximation`.
-    """
-
-    logger = logging.getLogger("tlm_adjoint.l_bfgs")
-
-    F_arg = F
-    F_calls = 0
-
-    def F(*X):
-        nonlocal F_calls
-
-        F_calls += 1
-        with var_locked(*X):
-            F_val = F_arg(*X)
-        if isinstance(F_val, numbers.Integral) \
-                or not isinstance(F_val, numbers.Real):
-            raise TypeError("Invalid type")
-        return F_val
-
-    Fp_arg = Fp
-    Fp_calls = 0
-
-    def Fp(*X):
-        nonlocal Fp_calls
-
-        Fp_calls += 1
-        with var_locked(*X):
-            Fp_val = Fp_arg(*X)
-        Fp_val = packed(Fp_val)
-        if len(Fp_val) != len(X):
-            raise ValueError("Incompatible shape")
-        return Fp_val
-
-    X0_packed = Packed(X0)
-    X0 = tuple(X0_packed)
-
-    if converged is None:
-        def converged(it, F_old, F_new, X_new, G_new, S, Y):
-            return False
-    else:
-        converged_arg = converged
-
-        @wraps(converged_arg)
-        def converged(it, F_old, F_new, X_new, G_new, S, Y):
-            return converged_arg(it, F_old, F_new,
-                                 X0_packed.unpack(X_new), X0_packed.unpack(G_new),  # noqa: E501
-                                 X0_packed.unpack(S), X0_packed.unpack(Y))
-
-    if (H_0_action is None and M_inv_action is None) and M_action is not None:
-        raise TypeError("If M_action is supplied, then H_0_action or "
-                        "M_inv_action must be supplied")
-    if (H_0_action is not None or M_inv_action is not None) and M_action is None:  # noqa: E501
-        raise TypeError("If H_0_action or M_inv_action are supplied, then "
-                        "M_action must be supplied")
-
-    if H_0_action is None:
-        def H_0_norm_sq(X):
-            with paused_space_type_checking():
-                return abs(vars_inner(X, X))
-    else:
-        H_0_norm_sq_H_0_action = wrapped_action(H_0_action)
-
-        def H_0_norm_sq(X):
-            return abs(vars_inner(H_0_norm_sq_H_0_action(*X), X))
-
-    if M_action is None:
-        def M_norm_sq(X):
-            with paused_space_type_checking():
-                return abs(vars_inner(X, X))
-    else:
-        M_norm_sq_M_action = wrapped_action(M_action)
-
-        def M_norm_sq(X):
-            return abs(vars_inner(X, M_norm_sq_M_action(*X)))
-    del M_action
-
-    if M_inv_action is None:
-        M_inv_norm_sq = H_0_norm_sq
-    else:
-        M_inv_norm_sq_M_inv_action = wrapped_action(M_inv_action)
-
-        def M_inv_norm_sq(X):
-            return abs(vars_inner(M_inv_norm_sq_M_inv_action(*X), X))
-    del M_inv_action
-
-    if comm is None:
-        comm = var_comm(X0[0])
-    comm = comm_dup_cached(comm)
-
-    X = tuple(var_copy(x0, static=var_is_static(x0), cache=var_is_cached(x0))
-              for x0 in X0)
-    del X0
-    old_F_val = F(*X)
-    old_Fp_val = Fp(*X)
-    old_Fp_norm_sq = M_inv_norm_sq(old_Fp_val)
-
-    hessian_approx = LBFGSHessianApproximation(m)
-    if theta_scale and delta is not None:
-        theta = np.sqrt(old_Fp_norm_sq) / delta
-    else:
-        theta = 1.0
-
-    it = 0
-    logger.debug(f"L-BFGS: Iteration {it:d}, "
-                 f"F calls {F_calls:d}, "
-                 f"Fp calls {Fp_calls:d}, "
-                 f"functional value {old_F_val:.6e}")
-    while True:
-        logger.debug(f"  Gradient norm = {np.sqrt(old_Fp_norm_sq):.6e}")
-        if g_atol is not None and old_Fp_norm_sq <= g_atol * g_atol:
-            break
-
-        if it >= max_its:
-            raise RuntimeError("L-BFGS: Maximum number of iterations exceeded")
-
-        minus_P = hessian_approx.inverse_action(
-            old_Fp_val,
-            H_0_action=H_0_action, theta=theta)
-        minus_P = packed(minus_P)
-        old_Fp_val_rank0 = -vars_inner(minus_P, old_Fp_val)
-        alpha, new_F_val, new_Fp_val = line_search(
-            F, Fp, X, minus_P, c1=c1, c2=c2,
-            old_F_val=old_F_val, old_Fp_val=old_Fp_val,
-            comm=comm)
-        new_Fp_val_rank0 = -vars_inner(minus_P, new_Fp_val)
-
-        if alpha * old_Fp_val_rank0 >= 0.0:
-            raise RuntimeError("L-BFGS: Line search failure")
-        if new_F_val > old_F_val + c1 * alpha * old_Fp_val_rank0:
-            raise RuntimeError("L-BFGS: Armijo condition not satisfied")
-        if new_Fp_val_rank0 < c2 * old_Fp_val_rank0:
-            raise RuntimeError("L-BFGS: Curvature condition not satisfied")
-        if abs(new_Fp_val_rank0) > c2 * abs(old_Fp_val_rank0):
-            logger.warning("L-BFGS: Strong curvature condition not satisfied")
-
-        S = vars_new(minus_P)
-        vars_axpy(S, -alpha, minus_P)
-        vars_axpy(X, 1.0, S)
-
-        Y = vars_copy(new_Fp_val)
-        vars_axpy(Y, -1.0, old_Fp_val)
-
-        # >=0 by curvature condition
-        S_inner_Y = alpha * (new_Fp_val_rank0 - c2 * old_Fp_val_rank0)
-        # >0 by definition of c2, and as steps are downhill
-        S_inner_Y += alpha * (c2 - 1.0) * old_Fp_val_rank0
-
-        hessian_approx.append(S, Y, S_inner_Y)
-        if theta_scale:
-            theta = H_0_norm_sq(Y) / S_inner_Y
-        else:
-            theta = 1.0
-
-        garbage_cleanup(comm)
-        it += 1
-        logger.debug(f"L-BFGS: Iteration {it:d}, "
-                     f"F calls {F_calls:d}, "
-                     f"Fp calls {Fp_calls:d}, "
-                     f"functional value {new_F_val:.6e}")
-        if converged(it, old_F_val, new_F_val, X, new_Fp_val, S, Y):
-            break
-        if s_atol is not None:
-            s_norm_sq = M_norm_sq(S)
-            logger.debug(f"  Change norm = {np.sqrt(s_norm_sq):.6e}")
-            if s_norm_sq <= s_atol * s_atol:
-                break
-
-        old_F_val = new_F_val
-        old_Fp_val = new_Fp_val
-        del new_F_val, new_Fp_val, new_Fp_val_rank0
-        old_Fp_norm_sq = M_inv_norm_sq(old_Fp_val)
-
-    return X0_packed.unpack(X), (it, F_calls, Fp_calls, hessian_approx)
-
-
-@local_caches
-def minimize_l_bfgs(forward, M0, *,
-                    m=30, manager=None, **kwargs):
-    """Functional minimization using the L-BFGS algorithm.
-
-    :arg forward: A callable which accepts one or more variable arguments, and
-        which returns a variable defining the forward functional.
-    :arg M0: A variable or :class:`Sequence` of variables defining the control,
-        and the initial guess for the optimization.
-    :arg manager: An :class:`.EquationManager` used to create an internal
-        manager via :meth:`.EquationManager.new`. `manager()` is used if not
-        supplied.
-
-    Remaining arguments and the return value are described in the
-    :func:`.l_bfgs` documentation.
-    """
-
-    M0_packed = Packed(M0)
-    M0 = tuple(M0_packed)
-
-    for m0 in M0:
-        if not issubclass(var_dtype(m0), np.floating):
-            raise ValueError("Invalid dtype")
-
-    J_hat = ReducedFunctional(forward, manager=manager)
-
-    X, optimization_data = l_bfgs(
-        lambda *M: J_hat.objective(M), lambda *M: J_hat.gradient(M), M0,
-        m=m, comm=J_hat.comm, **kwargs)
-
-    return M0_packed.unpack(X), optimization_data
-
-
-class TAOSolver:
-    r"""Functional minimization using TAO.
-
-    :arg forward: A callable which accepts one or more variable arguments, and
-        which returns a variable defining the forward functional.
-    :arg space: A space or variable, or a :class:`Sequence` of these, defining
-        the control space.
-    :arg solver_parameters: A :class:`Mapping` defining TAO solver parameters.
-    :arg H_0_action: A callable defining the action of the non-updated Hessian
-        inverse approximation on some direction. Accepts one or more variables
-        as arguments, defining the direction, and returns a variable or a
-        :class:`Sequence` of variables defining the action on this direction.
-        Should correspond to a positive definite operator. Arguments should not
-        be modified. An identity is used if not supplied.
-    :arg M_inv_action: A callable defining a (conjugate) dual space inner
-        product,
-
-        .. math::
-
-            \left< x, y \right>_{M^{-1}} = y^* M^{-1} x,
-
-        where :math:`x` and :math:`y` are degree of freedom vectors for
-        (conjugate) dual space elements and :math:`M` is a Hermitian and
-        positive definite matrix. Accepts one or more variables as arguments,
-        defining the direction, and returns a variable or a :class:`Sequence`
-        of variables defining the action of :math:`M^{-1}` on this direction.
-        Arguments should not be modified. H_0_action is used if not supplied.
-    :arg manager: An :class:`.EquationManager` used to create an internal
-        manager via :meth:`.EquationManager.new`. `manager()` is used if not
-        supplied.
+    manager : :class:`.EquationManager`
+        Used to create an internal manager via :meth:`.EquationManager.new`.
+        `manager()` is used if not supplied.
     """
 
     def __init__(self, forward, space, *, solver_parameters=None,
@@ -948,7 +354,9 @@ def __init__(self, forward, space, *, solver_parameters=None,
             solver_parameters = {}
         if H_0_action is not None:
             H_0_action = wrapped_action(H_0_action)
-        if M_inv_action is not None:
+        if M_inv_action is None:
+            M_inv_action = H_0_action
+        else:
             M_inv_action = wrapped_action(M_inv_action)
 
         for space_i in space:
@@ -1030,7 +438,7 @@ def mult(self, A, x, y):
         if M_inv_action is not None:
             class GradientNorm:
                 def mult(self, A, x, y):
-                    dJ = vars_new_conjugate_dual(M[0])
+                    dJ = tuple(map(var_new_conjugate_dual, M[0]))
                     from_petsc(x, dJ)
                     M_inv_X = M_inv_action(*dJ)
                     to_petsc(y, M_inv_X)
@@ -1055,7 +463,7 @@ class InitialHessian:
 
             class InitialHessianPreconditioner:
                 def apply(self, pc, x, y):
-                    X = vars_new_conjugate_dual(M[0])
+                    X = tuple(map(var_new_conjugate_dual, M[0]))
                     from_petsc(x, X)
                     H_0_X = H_0_action(*X)
                     to_petsc(y, H_0_X)
@@ -1102,17 +510,22 @@ def tao(self):
     def solve(self, M):
         """Solve the optimization problem.
 
-        :arg M: Defines the solution, and the initial guess.
+        Parameters
+        ----------
+
+        M : variable or Sequence[variable, ...]
+            Defines the initial guess, and stores the result of the
+            minimization.
         """
 
         M = packed(M)
         x = self._vec_interface.new_vec()
         x.to_petsc(M)
 
-        self._M[0] = tuple(var_new(m, static=var_is_static(m),
-                                   cache=var_is_cached(m))
-                           for m in M)
         try:
+            self._M[0] = tuple(var_new(m, static=var_is_static(m),
+                                       cache=var_is_cached(m))
+                               for m in M)
             self.tao.solve(x.vec)
         finally:
             self._M[0] = None
@@ -1123,14 +536,24 @@ def solve(self, M):
 
 
 def minimize_tao(forward, M0, *args, **kwargs):
-    """Functional minimization using TAO.
+    """Minimize a functional using TAO.
+
+    Parameters
+    ----------
+
+    forward : callable
+        Accepts one or more variable arguments, and returns a scalar variable
+        defining the functional.
+    M0 : variable or Sequence[variable, ...]
+        The initial guess.
+    args, kwargs
+        Passed to the :class:`.TAOSolver` constructor.
 
-    :arg forward: A callable which accepts one or more variable arguments, and
-        which returns a variable defining the forward functional.
-    :arg M0: A variable or :class:`Sequence` of variables defining the control,
-        and the initial guess for the optimization.
+    Returns
+    -------
 
-    Remaining arguments are passed to the :class:`.TAOSolver` constructor.
+    variable or Sequence[variable, ...]
+        The result of the minimization.
     """
 
     M0_packed = Packed(M0)