diff --git a/docs/source/acknowledgements.rst b/docs/source/acknowledgements.rst index abef5dc8..9a94fb73 100644 --- a/docs/source/acknowledgements.rst +++ b/docs/source/acknowledgements.rst @@ -103,19 +103,6 @@ and in the supporting information for Journal of Geophysical Research: Oceans, 125(11), e2020JC016370, 2020, doi: 10.1029/2020JC016370 -L-BFGS -`````` - -The file `tlm_adjoint/optimization.py -`_ includes an implementation of -the L-BFGS algorithm, described in - -- Jorge Nocedal and Stephen J. Wright, 'Numerical optimization', Springer, New - York, NY, 2006, Second edition, doi: 10.1007/978-0-387-40065-5 -- Richard H. Byrd, Peihuang Lu, and Jorge Nocedal, and Ciyou Zhu, 'A limited - memory algorithm for bound constrained optimization', SIAM Journal on - Scientific Computing, 16(5), 1190--1208, 1995, doi: 10.1137/0916069 - PyTorch ``````` diff --git a/docs/source/examples/8_hessian_uq.ipynb b/docs/source/examples/8_hessian_uq.ipynb index 7a25a332..ccb4504d 100644 --- a/docs/source/examples/8_hessian_uq.ipynb +++ b/docs/source/examples/8_hessian_uq.ipynb @@ -287,7 +287,7 @@ "source": [ "optimizer = TAOSolver(lambda m: forward(m)[2], space, H_0_action=B_action,\n", " solver_parameters={\"tao_type\": \"lmvm\",\n", - " \"tao_gatol\": 1.0e-4,\n", + " \"tao_gatol\": 1.0e-5,\n", " \"tao_grtol\": 0.0,\n", " \"tao_gttol\": 0.0,\n", " \"tao_monitor\": None})\n", diff --git a/tests/fenics/test_minimize.py b/tests/fenics/test_minimize.py index cb05141f..e056aff5 100644 --- a/tests/fenics/test_minimize.py +++ b/tests/fenics/test_minimize.py @@ -26,12 +26,6 @@ def scipy_trust_ncg_minimization(forward, M0): return M -def l_bfgs_minimization(forward, M0): - M, _ = minimize_l_bfgs( - forward, M0, s_atol=0.0, g_atol=1.0e-11) - return M - - def tao_lmvm_minimization(forward, m0): return minimize_tao(forward, m0, solver_parameters={"tao_type": "lmvm", @@ -51,7 +45,6 @@ def tao_nls_minimization(forward, m0): @pytest.mark.fenics @pytest.mark.parametrize("minimize", [scipy_l_bfgs_b_minimization, scipy_trust_ncg_minimization, - pytest.param(l_bfgs_minimization, marks=pytest.mark.xfail), # noqa: E501 tao_lmvm_minimization, pytest.param(tao_nls_minimization, marks=pytest.mark.xfail)]) # noqa: E501 @pytest.mark.skipif(complex_mode, reason="real only") @@ -96,7 +89,6 @@ def forward_J(alpha): @pytest.mark.fenics @pytest.mark.parametrize("minimize", [scipy_l_bfgs_b_minimization, scipy_trust_ncg_minimization, - pytest.param(l_bfgs_minimization, marks=pytest.mark.xfail), # noqa: E501 tao_lmvm_minimization, pytest.param(tao_nls_minimization, marks=pytest.mark.xfail)]) # noqa: E501 @pytest.mark.skipif(complex_mode, reason="real only") @@ -151,155 +143,3 @@ def forward_J(alpha, beta): var_assign(error, beta_ref) var_axpy(error, -1.0, beta) assert var_linf_norm(error) < 1.0e-8 - - -@pytest.mark.fenics -@pytest.mark.skipif(complex_mode, reason="real only") -@pytest.mark.xfail -@seed_test -def test_l_bfgs_single(setup_test, test_leaks): - mesh = UnitSquareMesh(3, 3) - X = SpatialCoordinate(mesh) - space = FunctionSpace(mesh, "Lagrange", 1) - test = TestFunction(space) - M_l = Function(space, name="M_l", space_type="conjugate_dual") - assemble(test * dx, tensor=M_l) - - x_star = Function(space, name="x_star") - interpolate_expression(x_star, sin(pi * X[0]) * sin(2.0 * pi * X[1])) - - def F(x): - check_space_type(x, "primal") - return assemble(0.5 * inner(x - x_star, x - x_star) * dx) - - def Fp(x): - check_space_type(x, "primal") - Fp = Function(space, name="Fp", space_type="conjugate_dual") - assemble(inner(x - x_star, test) * dx, tensor=Fp) - return Fp - - def H_0_action(x): - check_space_type(x, "conjugate_dual") - H_0_action = Function(space, name="H_0_action") - var_set_values(H_0_action, - var_get_values(x) - / var_get_values(M_l)) - return H_0_action - - def B_0_action(x): - check_space_type(x, "primal") - B_0_action = Function(space, name="B_0_action", - space_type="conjugate_dual") - var_set_values(B_0_action, - var_get_values(x) - * var_get_values(M_l)) - return B_0_action - - x0 = Function(space, name="x0") - x, (its, F_calls, Fp_calls, _) = l_bfgs( - F, Fp, x0, m=30, s_atol=0.0, g_atol=1.0e-12, - H_0_action=H_0_action, M_action=B_0_action, M_inv_action=H_0_action) - - error = var_copy(x, name="error") - var_axpy(error, -1.0, x_star) - error_norm = var_linf_norm(error) - info(f"{error_norm=:.6e}") - info(f"{F_calls=:d}") - info(f"{Fp_calls=:d}") - - assert abs(F(x)) < 1.0e-25 - assert error_norm < 1.0e-12 - assert its == 15 - assert F_calls == 17 - assert Fp_calls == 17 - - -@pytest.mark.fenics -@pytest.mark.skipif(complex_mode, reason="real only") -@pytest.mark.xfail -@seed_test -def test_l_bfgs_multiple(setup_test, test_leaks): - mesh = UnitSquareMesh(3, 3) - X = SpatialCoordinate(mesh) - space_x = FunctionSpace(mesh, "Lagrange", 1) - space_y = FunctionSpace(mesh, "Discontinuous Lagrange", 1) - test_x = TestFunction(space_x) - test_y = TestFunction(space_y) - M_l_x = Function(space_x, name="M_l_x", space_type="conjugate_dual") - M_l_y = Function(space_y, name="M_l_y", space_type="conjugate_dual") - assemble(test_x * dx, tensor=M_l_x) - assemble(test_y * dx, tensor=M_l_y) - - x_star = Function(space_x, name="x_star") - interpolate_expression(x_star, sin(pi * X[0]) * sin(2.0 * pi * X[1])) - y_star = Function(space_y, name="y_star") - interpolate_expression(y_star, exp(X[0]) * exp(X[1])) - alpha_y = (1.0 + X[0]) * (1.0 + X[0]) - - def F(x, y): - check_space_type(x, "primal") - check_space_type(y, "primal") - return assemble(0.5 * inner(x - x_star, x - x_star) * dx - + 0.5 * inner(y - y_star, alpha_y * (y - y_star)) * dx) - - def Fp(x, y): - check_space_type(x, "primal") - check_space_type(y, "primal") - Fp = (Function(space_x, name="Fp_0", space_type="conjugate_dual"), - Function(space_y, name="Fp_1", space_type="conjugate_dual")) - assemble(inner(x - x_star, test_x) * dx, tensor=Fp[0]) - assemble(inner(alpha_y * (y - y_star), test_y) * dx, tensor=Fp[1]) - return Fp - - def H_0_action(x, y): - check_space_type(x, "conjugate_dual") - check_space_type(y, "conjugate_dual") - H_0_action = (Function(space_x, name="H_0_action_0"), - Function(space_y, name="H_0_action_1")) - var_set_values(H_0_action[0], - var_get_values(x) - / var_get_values(M_l_x)) - var_set_values(H_0_action[1], - var_get_values(y) - / var_get_values(M_l_y)) - return H_0_action - - def B_0_action(x, y): - check_space_type(x, "primal") - check_space_type(y, "primal") - B_0_action = (Function(space_x, name="B_0_action_0", - space_type="conjugate_dual"), - Function(space_y, name="B_0_action_1", - space_type="conjugate_dual")) - var_set_values(B_0_action[0], - var_get_values(x) - * var_get_values(M_l_x)) - var_set_values(B_0_action[1], - var_get_values(y) - * var_get_values(M_l_y)) - return B_0_action - - x0 = Function(space_x, name="x0") - y0 = Function(space_y, name="y0") - (x, y), (its, F_calls, Fp_calls, _) = \ - l_bfgs(F, Fp, (x0, y0), m=30, s_atol=0.0, g_atol=1.0e-12, - H_0_action=H_0_action, - M_action=B_0_action, M_inv_action=H_0_action) - - x_error = var_copy(x, name="x_error") - var_axpy(x_error, -1.0, x_star) - x_error_norm = var_linf_norm(x_error) - y_error = var_copy(y, name="y_error") - var_axpy(y_error, -1.0, y_star) - y_error_norm = var_linf_norm(y_error) - info(f"{x_error_norm=:.6e}") - info(f"{y_error_norm=:.6e}") - info(f"{F_calls=:d}") - info(f"{Fp_calls=:d}") - - assert abs(F(x, y)) < 1.0e-23 - assert x_error_norm < 1.0e-11 - assert y_error_norm < 1.0e-11 - assert its <= 38 - assert F_calls <= 42 - assert Fp_calls <= 42 diff --git a/tests/firedrake/test_hessian_system.py b/tests/firedrake/test_hessian_system.py index 1b0187a1..db0974ae 100644 --- a/tests/firedrake/test_hessian_system.py +++ b/tests/firedrake/test_hessian_system.py @@ -73,10 +73,13 @@ def forward_J(m): sin(2.0 * pi * X[0]) * sin(3.0 * pi * X[1]) * exp(4.0 * X[0] * X[1])) m0 = Function(space, name="m0") - m, _ = minimize_l_bfgs( + m = minimize_tao( forward_J, m0, - s_atol=0.0, g_atol=1.0e-7, - H_0_action=B, M_action=B_inv, M_inv_action=B) + solver_parameters={"tao_type": "lmvm", + "tao_gatol": 1.0e-7, + "tao_grtol": 0.0, + "tao_gttol": 0.0}, + H_0_action=B) b_ref = Cofunction(space.dual(), name="b_ref") assemble(inner((sin(5.0 * pi * X[0]) * sin(7.0 * pi * X[1])) ** 2, test) * dx, # noqa: E501 diff --git a/tests/firedrake/test_minimize.py b/tests/firedrake/test_minimize.py index 313f2447..c172a2dd 100644 --- a/tests/firedrake/test_minimize.py +++ b/tests/firedrake/test_minimize.py @@ -26,12 +26,6 @@ def scipy_trust_ncg_minimization(forward, M0): return M -def l_bfgs_minimization(forward, M0): - M, _ = minimize_l_bfgs( - forward, M0, s_atol=0.0, g_atol=1.0e-11) - return M - - def tao_lmvm_minimization(forward, m0): return minimize_tao(forward, m0, solver_parameters={"tao_type": "lmvm", @@ -67,7 +61,6 @@ def tao_nls_minimization(forward, m0): @pytest.mark.firedrake @pytest.mark.parametrize("minimize", [scipy_l_bfgs_b_minimization, scipy_trust_ncg_minimization, - l_bfgs_minimization, tao_lmvm_minimization, tao_lmvm_minimization_M_inv, tao_nls_minimization]) @@ -113,7 +106,6 @@ def forward_J(alpha): @pytest.mark.firedrake @pytest.mark.parametrize("minimize", [scipy_l_bfgs_b_minimization, scipy_trust_ncg_minimization, - l_bfgs_minimization, tao_lmvm_minimization, tao_nls_minimization]) @pytest.mark.skipif(complex_mode, reason="real only") @@ -168,150 +160,3 @@ def forward_J(alpha, beta): var_assign(error, beta_ref) var_axpy(error, -1.0, beta) assert var_linf_norm(error) < 1.0e-8 - - -@pytest.mark.firedrake -@pytest.mark.skipif(complex_mode, reason="real only") -@seed_test -def test_l_bfgs_single(setup_test, test_leaks): - mesh = UnitSquareMesh(3, 3) - X = SpatialCoordinate(mesh) - space = FunctionSpace(mesh, "Lagrange", 1) - test = TestFunction(space) - M_l = Cofunction(space.dual(), name="M_l") - assemble(test * dx, tensor=M_l) - - x_star = Function(space, name="x_star") - interpolate_expression(x_star, sin(pi * X[0]) * sin(2.0 * pi * X[1])) - - def F(x): - check_space_type(x, "primal") - return assemble(0.5 * inner(x - x_star, x - x_star) * dx) - - def Fp(x): - check_space_type(x, "primal") - Fp = Cofunction(space.dual(), name="Fp") - assemble(inner(x - x_star, test) * dx, tensor=Fp) - return Fp - - def H_0_action(x): - check_space_type(x, "conjugate_dual") - H_0_action = Function(space, name="H_0_action") - var_set_values(H_0_action, - var_get_values(x) - / var_get_values(M_l)) - return H_0_action - - def B_0_action(x): - check_space_type(x, "primal") - B_0_action = Cofunction(space.dual(), name="B_0_action") - var_set_values(B_0_action, - var_get_values(x) - * var_get_values(M_l)) - return B_0_action - - x0 = Function(space, name="x0") - x, (its, F_calls, Fp_calls, _) = l_bfgs( - F, Fp, x0, m=30, s_atol=0.0, g_atol=1.0e-12, - H_0_action=H_0_action, M_action=B_0_action, M_inv_action=H_0_action) - - error = var_copy(x, name="error") - var_axpy(error, -1.0, x_star) - error_norm = var_linf_norm(error) - info(f"{error_norm=:.6e}") - info(f"{F_calls=:d}") - info(f"{Fp_calls=:d}") - - assert abs(F(x)) < 1.0e-25 - assert error_norm < 1.0e-12 - assert its == 15 - assert F_calls == 17 - assert Fp_calls == 17 - - -@pytest.mark.firedrake -@pytest.mark.skipif(complex_mode, reason="real only") -@seed_test -def test_l_bfgs_multiple(setup_test, test_leaks): - mesh = UnitSquareMesh(3, 3) - X = SpatialCoordinate(mesh) - space_x = FunctionSpace(mesh, "Lagrange", 1) - space_y = FunctionSpace(mesh, "Discontinuous Lagrange", 1) - test_x = TestFunction(space_x) - test_y = TestFunction(space_y) - M_l_x = Cofunction(space_x.dual(), name="M_l_x") - M_l_y = Cofunction(space_y.dual(), name="M_l_y") - assemble(test_x * dx, tensor=M_l_x) - assemble(test_y * dx, tensor=M_l_y) - - x_star = Function(space_x, name="x_star") - interpolate_expression(x_star, sin(pi * X[0]) * sin(2.0 * pi * X[1])) - y_star = Function(space_y, name="y_star") - interpolate_expression(y_star, exp(X[0]) * exp(X[1])) - alpha_y = (1.0 + X[0]) * (1.0 + X[0]) - - def F(x, y): - check_space_type(x, "primal") - check_space_type(y, "primal") - return assemble(0.5 * inner(x - x_star, x - x_star) * dx - + 0.5 * inner(y - y_star, alpha_y * (y - y_star)) * dx) - - def Fp(x, y): - check_space_type(x, "primal") - check_space_type(y, "primal") - Fp = (Cofunction(space_x.dual(), name="Fp_0"), - Cofunction(space_y.dual(), name="Fp_1")) - assemble(inner(x - x_star, test_x) * dx, tensor=Fp[0]) - assemble(inner(alpha_y * (y - y_star), test_y) * dx, tensor=Fp[1]) - return Fp - - def H_0_action(x, y): - check_space_type(x, "conjugate_dual") - check_space_type(y, "conjugate_dual") - H_0_action = (Function(space_x, name="H_0_action_0"), - Function(space_y, name="H_0_action_1")) - var_set_values(H_0_action[0], - var_get_values(x) - / var_get_values(M_l_x)) - var_set_values(H_0_action[1], - var_get_values(y) - / var_get_values(M_l_y)) - return H_0_action - - def B_0_action(x, y): - check_space_type(x, "primal") - check_space_type(y, "primal") - B_0_action = (Cofunction(space_x.dual(), name="B_0_action_0"), - Cofunction(space_y.dual(), name="B_0_action_1")) - var_set_values(B_0_action[0], - var_get_values(x) - * var_get_values(M_l_x)) - var_set_values(B_0_action[1], - var_get_values(y) - * var_get_values(M_l_y)) - return B_0_action - - x0 = Function(space_x, name="x0") - y0 = Function(space_y, name="y0") - (x, y), (its, F_calls, Fp_calls, _) = \ - l_bfgs(F, Fp, (x0, y0), m=30, s_atol=0.0, g_atol=1.0e-12, - H_0_action=H_0_action, - M_action=B_0_action, M_inv_action=H_0_action) - - x_error = var_copy(x, name="x_error") - var_axpy(x_error, -1.0, x_star) - x_error_norm = var_linf_norm(x_error) - y_error = var_copy(y, name="y_error") - var_axpy(y_error, -1.0, y_star) - y_error_norm = var_linf_norm(y_error) - info(f"{x_error_norm=:.6e}") - info(f"{y_error_norm=:.6e}") - info(f"{F_calls=:d}") - info(f"{Fp_calls=:d}") - - assert abs(F(x, y)) < 1.0e-23 - assert x_error_norm < 1.0e-11 - assert y_error_norm < 1.0e-10 - assert its <= 39 - assert F_calls <= 42 - assert Fp_calls <= 42 diff --git a/tlm_adjoint/hessian.py b/tlm_adjoint/hessian.py index a74c26dd..77712f6e 100644 --- a/tlm_adjoint/hessian.py +++ b/tlm_adjoint/hessian.py @@ -83,7 +83,7 @@ class GeneralHessian(Hessian): ---------- forward : callable - Accepts one or more variable arguments, and returns a variable + Accepts one or more variable arguments, and returns a scalar variable defining the functional. manager : :class:`.EquationManager` Used to create an internal manager via :meth:`.EquationManager.new`. diff --git a/tlm_adjoint/optimization.py b/tlm_adjoint/optimization.py index 63d828bf..96198a2a 100644 --- a/tlm_adjoint/optimization.py +++ b/tlm_adjoint/optimization.py @@ -1,10 +1,8 @@ from .interface import ( - Packed, comm_dup_cached, garbage_cleanup, is_var, packed, - paused_space_type_checking, space_dtype, var_axpy, var_comm, var_copy, - var_dtype, var_get_values, var_is_cached, var_is_static, var_linf_norm, - var_local_size, var_locked, var_new, var_scalar_value, var_set_values, - var_space, vars_assign, vars_axpy, vars_copy, vars_inner, vars_new, - vars_new_conjugate_dual) + Packed, comm_dup_cached, garbage_cleanup, is_var, packed, space_dtype, + var_axpy, var_copy, var_dtype, var_get_values, var_is_cached, + var_is_static, var_linf_norm, var_local_size, var_locked, var_new, + var_new_conjugate_dual, var_scalar_value, var_set_values, var_space) from .caches import clear_caches, local_caches from .hessian import GeneralHessian as Hessian @@ -16,10 +14,8 @@ compute_gradient, manager_disabled, reset_manager, restore_manager, set_manager, start_manager, stop_manager) -from collections import deque import contextlib from functools import cached_property, wraps -import logging import numbers import numpy as np @@ -27,11 +23,7 @@ [ "minimize_scipy", - "LBFGSHessianApproximation", "TAOSolver", - "l_bfgs", - "line_search", - "minimize_l_bfgs", "minimize_tao" ] @@ -90,7 +82,7 @@ def objective(self, M, *, M = tuple(var_copy(m, static=var_is_static(m), cache=var_is_cached(m)) for m in M) - M_val = vars_copy(M) + M_val = tuple(map(var_copy, M)) reset_manager() clear_caches() @@ -160,26 +152,35 @@ def duplicated_comm(comm): @local_caches def minimize_scipy(forward, M0, *, manager=None, **kwargs): - """Provides an interface with :func:`scipy.optimize.minimize` for - gradient-based optimization. - - Note that the control is gathered onto the root process so that the serial - :func:`scipy.optimize.minimize` function may be used. - - All keyword arguments except for `manager` are passed to - :func:`scipy.optimize.minimize`. - - :arg forward: A callable which accepts one or more variable arguments, and - which returns a variable defining the forward functional. - :arg M0: A variable or :class:`Sequence` of variables defining the control, - and the initial guess for the optimization. - :arg manager: An :class:`.EquationManager` used to create an internal - manager via :meth:`.EquationManager.new`. `manager()` is used if not - supplied. - :returns: A :class:`tuple` `(M, return_value)`. `M` is variable or a - :class:`Sequence` of variables depending on the type of `M0`, and - stores the result. `return_value` is the return value of - :func:`scipy.optimize.minimize`. + """Minimize a functional using :func:`scipy.optimize.minimize`. + + Warnings + -------- + + Data is gathered onto the root process (process zero) so that the serial + :func:`scipy.optimize.minimize` can be used. + + Parameters + ---------- + + forward : callable + Accepts one or more variable arguments, and returns a scalar variable + defining the functional. + M0 : variable or Sequence[variable, ...] + The initial guess. + manager : :class:`.EquationManager` + Used to create an internal manager via :meth:`.EquationManager.new`. + `manager()` is used if not supplied. + kwargs + Passed to :func:`scipy.optimize.minimize`. + + Returns + ------- + + M : variable or Sequence[variable, ...] + The result of the minimization + minimize_return_value + The return value from :func:`scipy.optimize.minimize`. """ M0_packed = Packed(M0) @@ -298,13 +299,6 @@ def hessp_bcast(x, p): return M0_packed.unpack(M), return_value -def conjugate_dual_identity_action(*X): - M_X = vars_new_conjugate_dual(X) - with paused_space_type_checking(): - vars_assign(M_X, X) - return M_X - - def wrapped_action(M): M_arg = M @@ -320,622 +314,34 @@ def M(*X): return M -class LBFGSHessianApproximation: - """L-BFGS Hessian approximation. - - :arg m: Maximum number of vector pairs to retain in the L-BFGS Hessian - approximation. - """ - - def __init__(self, m): - self._iterates = deque(maxlen=m) - - def append(self, S, Y, S_inner_Y): - """Add a step + gradient change pair. - - :arg S: A variable or a :class:`Sequence` of variables defining the - step. - :arg Y: A variable or a :class:`Sequence` of variables defining the - gradient change. - :arg S_inner_Y: The projection of the gradient change onto the step. - A separate argument so that a value consistent with - that used in the line search can be supplied. - """ - - S = packed(S) - Y = packed(Y) - if len(S) != len(Y): - raise ValueError("Incompatible shape") - for s in S: - if not issubclass(var_dtype(s), np.floating): - raise ValueError("Invalid dtype") - for y in Y: - if not issubclass(var_dtype(y), np.floating): - raise ValueError("Invalid dtype") - if S_inner_Y <= 0.0: - raise ValueError("Invalid S_inner_Y") - - rho = 1.0 / S_inner_Y - self._iterates.append((rho, vars_copy(S), vars_copy(Y))) - - def inverse_action(self, X, *, - H_0_action=None, theta=1.0): - """Compute the action of the approximate Hessian inverse on some given - direction. - - Implements the L-BFGS Hessian inverse action approximation as in - Algorithm 7.4 of - - - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization', - Springer, New York, NY, 2006, Second edition, - doi: 10.1007/978-0-387-40065-5 - - Uses 'theta scaling' as in equation (3.7) of - - - Richard H. Byrd, Peihuang Lu, and Jorge Nocedal, and Ciyou Zhu, - 'A limited memory algorithm for bound constrained optimization', - SIAM Journal on Scientific Computing, 16(5), 1190--1208, 1995, - doi: 10.1137/0916069 - - :arg X: A variable or a :class:`Sequence` of variables defining the - direction on which to compute the approximate Hessian inverse - action. - :arg H_0_action: A callable defining the action of the non-updated - Hessian inverse approximation on some direction. Accepts one or - more variables as arguments, defining the direction, and returns a - variable or a :class:`Sequence` of variables defining the action on - this direction. Should correspond to a positive definite operator. - Arguments should not be modified. An identity is used if not - supplied. - :returns: A variable or a :class:`Sequence` of variables storing the - result. - """ - - X_packed = Packed(X) - X = vars_copy(X_packed) - - if H_0_action is None: - H_0_action = wrapped_action(conjugate_dual_identity_action) - else: - H_0_action = wrapped_action(H_0_action) - - alphas = [] - for rho, S, Y in reversed(self._iterates): - alpha = rho * vars_inner(S, X) - vars_axpy(X, -alpha, Y) - alphas.append(alpha) - alphas.reverse() - - R = vars_copy(H_0_action(*X)) - if theta != 1.0: - for r in R: - var_set_values(r, var_get_values(r) / theta) - - assert len(self._iterates) == len(alphas) - for (rho, S, Y), alpha in zip(self._iterates, alphas): - beta = rho * vars_inner(R, Y) - vars_axpy(R, alpha - beta, S) - - return X_packed.unpack(R) - - -def line_search(F, Fp, X, minus_P, *, - c1=1.0e-4, c2=0.9, - old_F_val=None, old_Fp_val=None, - comm=None): - """Line search using TAO. - - Uses the `PETSc.TAOLineSearch.Type.MORETHUENTE` line search type, yielding - a step which satisfies the Wolfe conditions (and the strong curvature - condition). See - - - Jorge J. Moré and David J. Thuente, 'Line search algorithms with - guaranteed sufficient decrease', ACM Transactions on Mathematical - Software 20(3), 286--307, 1994, doi: 10.1145/192115.192132 - - :arg F: A callable defining the functional. Accepts one or more variables - as arguments, and returns the value of the functional. Arguments should - not be modified. - :arg Fp: A callable defining the functional gradient. Accepts one or more - variables as inputs, and returns a variable or :class:`Sequence` of - variables storing the value of the gradient. Arguments should not be - modified. - :arg X: A variable or a :class:`Sequence` of variables defining the - starting point for the line search. - :arg minus_P: A variable or a :class:`Sequence` of variables defining the - *negative* of the line search direction. - :arg c1: Armijo condition parameter. :math:`c_1` in equation (3.6a) of - - - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization', - Springer, New York, NY, 2006, Second edition, - doi: 10.1007/978-0-387-40065-5 - - :arg c2: Curvature condition parameter. :math:`c_2` in equation (3.6b) of - - - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization', - Springer, New York, NY, 2006, Second edition, - doi: 10.1007/978-0-387-40065-5 - - :arg old_F_val: The value of `F` at the starting point of the line search. - :arg old_Fp_val: The value of `Fp` at the starting point of the line - search. - :arg comm: A communicator. - :returns: A :class:`tuple` `(alpha, new_F_val, new_Fp_val)` - where - - - `alpha`: Defines the step size. The step is given by the product of - `alpha` with `-minus_P` (noting the *negative* sign for the latter). - - `new_F_val`: The new value of `F`. - - `new_Fp_val`: The new value of `Fp`. - """ - - import petsc4py.PETSc as PETSc - - F_arg = F - - def F(*X): - with var_locked(*X): - return F_arg(*X) - - Fp = wrapped_action(Fp) - X = packed(X) - minus_P = packed(minus_P) - if old_F_val is None: - old_F_val = F(*X) - if old_Fp_val is None: - old_Fp_val = Fp(*X) - else: - old_Fp_val = packed(old_Fp_val) - - if comm is None: - comm = var_comm(X[0]) - comm = comm_dup_cached(comm, key="tao") - - vec_interface = PETScVecInterface(tuple(map(var_space, X)), - dtype=PETSc.RealType, comm=comm) - to_petsc, from_petsc = vec_interface.to_petsc, vec_interface.from_petsc - - Y = tuple(var_new(x, static=var_is_static(x), cache=var_is_cached(x)) - for x in X) - - def objective(taols, x): - from_petsc(x, Y) - F_val = F(*Y) - old_F_val - return F_val - - def gradient(taols, x, g): - from_petsc(x, Y) - dJ = Fp(*Y) - to_petsc(g, dJ) - - def objective_gradient(taols, x, g): - from_petsc(x, Y) - F_val = F(*Y) - old_F_val - dJ = Fp(*Y) - to_petsc(g, dJ) - return F_val - - taols = PETSc.TAOLineSearch().create(comm=comm) - taols.setObjective(objective) - taols.setGradient(gradient) - taols.setObjectiveGradient(objective_gradient) - taols.setType(PETSc.TAOLineSearch.Type.MORETHUENTE) - - options = PETScOptions(f"_tlm_adjoint__{taols.name:s}_") - options["tao_ls_ftol"] = c1 - options["tao_ls_gtol"] = c2 - taols.setOptionsPrefix(options.options_prefix) - - taols.setFromOptions() - taols.setUp() - - x = vec_interface.new_vec() - x.to_petsc(X) - - g = vec_interface.new_vec() - g.to_petsc(old_Fp_val) - - s = vec_interface.new_vec() - s.to_petsc(minus_P) - s.vec.scale(-1.0) - - try: - phi, alpha, reason = taols.apply(x.vec, g.vec, s.vec) - if reason != PETSc.TAOLineSearch.Reason.SUCCESS: - raise RuntimeError("Line search failure") - finally: - taols.destroy() +class TAOSolver: + """Functional minimization using TAO. - new_Fp_val = vars_new_conjugate_dual(X) - g.from_petsc(new_Fp_val) - - return alpha, phi + old_F_val, new_Fp_val - - -def l_bfgs(F, Fp, X0, *, - m=30, s_atol, g_atol, converged=None, max_its=1000, - H_0_action=None, theta_scale=True, delta=1.0, - M_action=None, M_inv_action=None, - c1=1.0e-4, c2=0.9, - comm=None): - r"""Functional minimization using the L-BFGS algorithm. - - Implements Algorithm 7.5 of - - - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization', - Springer, New York, NY, 2006, Second edition, - doi: 10.1007/978-0-387-40065-5 - - in a more general inner product space. - - By default uses 'theta scaling' to define the initial Hessian inverse - approximation, based on the approach in equation (3.7) and point 7 on p. - 1204 of - - - Richard H. Byrd, Peihuang Lu, and Jorge Nocedal, and Ciyou Zhu, 'A - limited memory algorithm for bound constrained optimization', SIAM - Journal on Scientific Computing, 16(5), 1190--1208, 1995, - doi: 10.1137/0916069 - - and with an initial value for the scaling parameter based on the discussion - in 'Implementation' in section 6.1 of - - - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization', - Springer, New York, NY, 2006, Second edition, - doi: 10.1007/978-0-387-40065-5 - - Precisely the Hessian inverse approximation, before being updated, is - scaled by :math:`1 / \theta` with, on the first iteration, - - .. math:: - - \theta = \frac{\sqrt{\left| g_k^* M^{-1} g_k \right|}}{\delta} - - and on later iterations - - .. math:: - - \theta = \frac{y_k^* H_0 y_k}{y_k^* s_k}, - - where :math:`g_k`, :math:`y_k`, and :math:`s_k` are respectively the - gradient, gradient change, and step, and where :math:`M^{-1}` and - :math:`H_0` are defined by `M_inv_action` and `H_0_action` respectively. - - The line search is performed using :func:`.line_search`. - - :arg F: A callable defining the functional. Accepts one or more variables - as arguments, and returns the value of the functional. Arguments should - not be modified. - :arg Fp: A callable defining the functional gradient. Accepts one or more - variables as inputs, and returns a variable or :class:`Sequence` of - variables storing the value of the gradient. Arguments should not be - modified. - :arg X0: A variable or a :class:`Sequence` of variables defining the - initial guess for the parameters. - :arg m: The maximum number of step + gradient change pairs to use in the - Hessian inverse approximation. - :arg s_atol: Absolute tolerance for the step change norm convergence - criterion. - :arg g_atol: Absolute tolerance for the gradient norm convergence - criterion. - :arg converged: A callable defining a callback, and which can be used to - define custom convergence criteria. Takes the form - - .. code-block:: python - - def converged(it, F_old, F_new, X_new, G_new, S, Y): - - with - - - `it`: The iteration number. - - `F_old`: The previous value of the functional. - - `F_new`: The new value of the functional. - - `X_new`: A variable or a :class:`Sequence` of variables defining - the new value of the parameters. - - `G_new`: A variable or a :class:`Sequence` of variables defining - the new value of the gradient. - - `S`: A variable or a :class:`Sequence` of variables defining the - step. - - `Y`: A variable or a sequence of variables defining the gradient - change. - - Input variables should not be modified. Returns a :class:`bool` - indicating whether the optimization has converged. - :arg max_its: The maximum number of iterations. - :arg H_0_action: A callable defining the action of the non-updated Hessian - inverse approximation on some direction. Accepts one or more variables - as arguments, defining the direction, and returns a variable or a - :class:`Sequence` of variables defining the action on this direction. - Should correspond to a positive definite operator. Arguments should not - be modified. An identity is used if not supplied. - :arg theta_scale: Whether to apply 'theta scaling', discussed above. - :arg delta: Controls the initial value of :math:`\theta` in 'theta - scaling'. If `None` then on the first iteration :math:`\theta` is set - equal to one. - :arg M_action: A callable defining a primal space inner product, - - .. math:: - - \left< x, y \right>_M = y^* M x, - - where :math:`x` and :math:`y` are degree of freedom vectors for primal - space elements and :math:`M` is a Hermitian and positive definite - matrix. Accepts one or more variables as arguments, defining the - direction, and returns a variable or a :class:`Sequence` of variables - defining the action of :math:`M` on this direction. An identity is used - if not supplied. Arguments should not be modified. Required if - `H_0_action` or `M_inv_action` are supplied. - :arg M_inv_action: A callable defining a (conjugate) dual space inner - product, - - .. math:: - - \left< x, y \right>_{M^{-1}} = y^* M^{-1} x, - - where :math:`x` and :math:`y` are degree of freedom vectors for - (conjugate) dual space elements and :math:`M` is as for `M_action`. - Accepts one or more variables as arguments, defining the direction, and - returns a variable or a :class:`Sequence` of variables defining the - action of :math:`M^{-1}` on this direction. Arguments should not be + Parameters + ---------- + + forward : callable + Accepts one or more variable arguments, and returns a scalar variable + defining the functional. + space : space or Sequence[space, ...] + The control space. + solver_parameters : Mapping + TAO solver parameters. + H_0_action : callable + Defines the initial Hessian inverse approximation. Accepts one or more + variables as arguments, defining a direction, and returns a variable or + a :class:`Sequence` of variables defining the (conjugate of) the action + of an initial Hessian inverse approximation on this direction. + Arguments should not be modified. + M_inv_action : callable + Defines a dual space norm. Accepts one or more variables as arguments, + defining a direction, and returns a variable or a :class:`Sequence` of + variables defining the (conjugate of) the action of a Hermitian and + positive definite matrix on this direction. Arguments should not be modified. `H_0_action` is used if not supplied. - :arg c1: Armijo condition parameter. :math:`c_1` in equation (3.6a) of - - - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization', - Springer, New York, NY, 2006, Second edition, - doi: 10.1007/978-0-387-40065-5 - - :arg c2: Curvature condition parameter. :math:`c_2` in equation (3.6b) of - - - Jorge Nocedal and Stephen J. Wright, 'Numerical optimization', - Springer, New York, NY, 2006, Second edition, - doi: 10.1007/978-0-387-40065-5 - - :arg comm: A communicator. - :returns: A :class:`tuple` `(X, (it, F_calls, Fp_calls, hessian_approx))` - with - - - `X`: The solution. A variable or a :class:`tuple` of variables. - - `it`: The number of iterations. - - `F_calls`: The number of functional evaluations. - - `Fp_calls`: The number of gradient evaluations. - - `hessian_approx`: The :class:`.LBFGSHessianApproximation`. - """ - - logger = logging.getLogger("tlm_adjoint.l_bfgs") - - F_arg = F - F_calls = 0 - - def F(*X): - nonlocal F_calls - - F_calls += 1 - with var_locked(*X): - F_val = F_arg(*X) - if isinstance(F_val, numbers.Integral) \ - or not isinstance(F_val, numbers.Real): - raise TypeError("Invalid type") - return F_val - - Fp_arg = Fp - Fp_calls = 0 - - def Fp(*X): - nonlocal Fp_calls - - Fp_calls += 1 - with var_locked(*X): - Fp_val = Fp_arg(*X) - Fp_val = packed(Fp_val) - if len(Fp_val) != len(X): - raise ValueError("Incompatible shape") - return Fp_val - - X0_packed = Packed(X0) - X0 = tuple(X0_packed) - - if converged is None: - def converged(it, F_old, F_new, X_new, G_new, S, Y): - return False - else: - converged_arg = converged - - @wraps(converged_arg) - def converged(it, F_old, F_new, X_new, G_new, S, Y): - return converged_arg(it, F_old, F_new, - X0_packed.unpack(X_new), X0_packed.unpack(G_new), # noqa: E501 - X0_packed.unpack(S), X0_packed.unpack(Y)) - - if (H_0_action is None and M_inv_action is None) and M_action is not None: - raise TypeError("If M_action is supplied, then H_0_action or " - "M_inv_action must be supplied") - if (H_0_action is not None or M_inv_action is not None) and M_action is None: # noqa: E501 - raise TypeError("If H_0_action or M_inv_action are supplied, then " - "M_action must be supplied") - - if H_0_action is None: - def H_0_norm_sq(X): - with paused_space_type_checking(): - return abs(vars_inner(X, X)) - else: - H_0_norm_sq_H_0_action = wrapped_action(H_0_action) - - def H_0_norm_sq(X): - return abs(vars_inner(H_0_norm_sq_H_0_action(*X), X)) - - if M_action is None: - def M_norm_sq(X): - with paused_space_type_checking(): - return abs(vars_inner(X, X)) - else: - M_norm_sq_M_action = wrapped_action(M_action) - - def M_norm_sq(X): - return abs(vars_inner(X, M_norm_sq_M_action(*X))) - del M_action - - if M_inv_action is None: - M_inv_norm_sq = H_0_norm_sq - else: - M_inv_norm_sq_M_inv_action = wrapped_action(M_inv_action) - - def M_inv_norm_sq(X): - return abs(vars_inner(M_inv_norm_sq_M_inv_action(*X), X)) - del M_inv_action - - if comm is None: - comm = var_comm(X0[0]) - comm = comm_dup_cached(comm) - - X = tuple(var_copy(x0, static=var_is_static(x0), cache=var_is_cached(x0)) - for x0 in X0) - del X0 - old_F_val = F(*X) - old_Fp_val = Fp(*X) - old_Fp_norm_sq = M_inv_norm_sq(old_Fp_val) - - hessian_approx = LBFGSHessianApproximation(m) - if theta_scale and delta is not None: - theta = np.sqrt(old_Fp_norm_sq) / delta - else: - theta = 1.0 - - it = 0 - logger.debug(f"L-BFGS: Iteration {it:d}, " - f"F calls {F_calls:d}, " - f"Fp calls {Fp_calls:d}, " - f"functional value {old_F_val:.6e}") - while True: - logger.debug(f" Gradient norm = {np.sqrt(old_Fp_norm_sq):.6e}") - if g_atol is not None and old_Fp_norm_sq <= g_atol * g_atol: - break - - if it >= max_its: - raise RuntimeError("L-BFGS: Maximum number of iterations exceeded") - - minus_P = hessian_approx.inverse_action( - old_Fp_val, - H_0_action=H_0_action, theta=theta) - minus_P = packed(minus_P) - old_Fp_val_rank0 = -vars_inner(minus_P, old_Fp_val) - alpha, new_F_val, new_Fp_val = line_search( - F, Fp, X, minus_P, c1=c1, c2=c2, - old_F_val=old_F_val, old_Fp_val=old_Fp_val, - comm=comm) - new_Fp_val_rank0 = -vars_inner(minus_P, new_Fp_val) - - if alpha * old_Fp_val_rank0 >= 0.0: - raise RuntimeError("L-BFGS: Line search failure") - if new_F_val > old_F_val + c1 * alpha * old_Fp_val_rank0: - raise RuntimeError("L-BFGS: Armijo condition not satisfied") - if new_Fp_val_rank0 < c2 * old_Fp_val_rank0: - raise RuntimeError("L-BFGS: Curvature condition not satisfied") - if abs(new_Fp_val_rank0) > c2 * abs(old_Fp_val_rank0): - logger.warning("L-BFGS: Strong curvature condition not satisfied") - - S = vars_new(minus_P) - vars_axpy(S, -alpha, minus_P) - vars_axpy(X, 1.0, S) - - Y = vars_copy(new_Fp_val) - vars_axpy(Y, -1.0, old_Fp_val) - - # >=0 by curvature condition - S_inner_Y = alpha * (new_Fp_val_rank0 - c2 * old_Fp_val_rank0) - # >0 by definition of c2, and as steps are downhill - S_inner_Y += alpha * (c2 - 1.0) * old_Fp_val_rank0 - - hessian_approx.append(S, Y, S_inner_Y) - if theta_scale: - theta = H_0_norm_sq(Y) / S_inner_Y - else: - theta = 1.0 - - garbage_cleanup(comm) - it += 1 - logger.debug(f"L-BFGS: Iteration {it:d}, " - f"F calls {F_calls:d}, " - f"Fp calls {Fp_calls:d}, " - f"functional value {new_F_val:.6e}") - if converged(it, old_F_val, new_F_val, X, new_Fp_val, S, Y): - break - if s_atol is not None: - s_norm_sq = M_norm_sq(S) - logger.debug(f" Change norm = {np.sqrt(s_norm_sq):.6e}") - if s_norm_sq <= s_atol * s_atol: - break - - old_F_val = new_F_val - old_Fp_val = new_Fp_val - del new_F_val, new_Fp_val, new_Fp_val_rank0 - old_Fp_norm_sq = M_inv_norm_sq(old_Fp_val) - - return X0_packed.unpack(X), (it, F_calls, Fp_calls, hessian_approx) - - -@local_caches -def minimize_l_bfgs(forward, M0, *, - m=30, manager=None, **kwargs): - """Functional minimization using the L-BFGS algorithm. - - :arg forward: A callable which accepts one or more variable arguments, and - which returns a variable defining the forward functional. - :arg M0: A variable or :class:`Sequence` of variables defining the control, - and the initial guess for the optimization. - :arg manager: An :class:`.EquationManager` used to create an internal - manager via :meth:`.EquationManager.new`. `manager()` is used if not - supplied. - - Remaining arguments and the return value are described in the - :func:`.l_bfgs` documentation. - """ - - M0_packed = Packed(M0) - M0 = tuple(M0_packed) - - for m0 in M0: - if not issubclass(var_dtype(m0), np.floating): - raise ValueError("Invalid dtype") - - J_hat = ReducedFunctional(forward, manager=manager) - - X, optimization_data = l_bfgs( - lambda *M: J_hat.objective(M), lambda *M: J_hat.gradient(M), M0, - m=m, comm=J_hat.comm, **kwargs) - - return M0_packed.unpack(X), optimization_data - - -class TAOSolver: - r"""Functional minimization using TAO. - - :arg forward: A callable which accepts one or more variable arguments, and - which returns a variable defining the forward functional. - :arg space: A space or variable, or a :class:`Sequence` of these, defining - the control space. - :arg solver_parameters: A :class:`Mapping` defining TAO solver parameters. - :arg H_0_action: A callable defining the action of the non-updated Hessian - inverse approximation on some direction. Accepts one or more variables - as arguments, defining the direction, and returns a variable or a - :class:`Sequence` of variables defining the action on this direction. - Should correspond to a positive definite operator. Arguments should not - be modified. An identity is used if not supplied. - :arg M_inv_action: A callable defining a (conjugate) dual space inner - product, - - .. math:: - - \left< x, y \right>_{M^{-1}} = y^* M^{-1} x, - - where :math:`x` and :math:`y` are degree of freedom vectors for - (conjugate) dual space elements and :math:`M` is a Hermitian and - positive definite matrix. Accepts one or more variables as arguments, - defining the direction, and returns a variable or a :class:`Sequence` - of variables defining the action of :math:`M^{-1}` on this direction. - Arguments should not be modified. H_0_action is used if not supplied. - :arg manager: An :class:`.EquationManager` used to create an internal - manager via :meth:`.EquationManager.new`. `manager()` is used if not - supplied. + manager : :class:`.EquationManager` + Used to create an internal manager via :meth:`.EquationManager.new`. + `manager()` is used if not supplied. """ def __init__(self, forward, space, *, solver_parameters=None, @@ -948,7 +354,9 @@ def __init__(self, forward, space, *, solver_parameters=None, solver_parameters = {} if H_0_action is not None: H_0_action = wrapped_action(H_0_action) - if M_inv_action is not None: + if M_inv_action is None: + M_inv_action = H_0_action + else: M_inv_action = wrapped_action(M_inv_action) for space_i in space: @@ -1030,7 +438,7 @@ def mult(self, A, x, y): if M_inv_action is not None: class GradientNorm: def mult(self, A, x, y): - dJ = vars_new_conjugate_dual(M[0]) + dJ = tuple(map(var_new_conjugate_dual, M[0])) from_petsc(x, dJ) M_inv_X = M_inv_action(*dJ) to_petsc(y, M_inv_X) @@ -1055,7 +463,7 @@ class InitialHessian: class InitialHessianPreconditioner: def apply(self, pc, x, y): - X = vars_new_conjugate_dual(M[0]) + X = tuple(map(var_new_conjugate_dual, M[0])) from_petsc(x, X) H_0_X = H_0_action(*X) to_petsc(y, H_0_X) @@ -1102,17 +510,22 @@ def tao(self): def solve(self, M): """Solve the optimization problem. - :arg M: Defines the solution, and the initial guess. + Parameters + ---------- + + M : variable or Sequence[variable, ...] + Defines the initial guess, and stores the result of the + minimization. """ M = packed(M) x = self._vec_interface.new_vec() x.to_petsc(M) - self._M[0] = tuple(var_new(m, static=var_is_static(m), - cache=var_is_cached(m)) - for m in M) try: + self._M[0] = tuple(var_new(m, static=var_is_static(m), + cache=var_is_cached(m)) + for m in M) self.tao.solve(x.vec) finally: self._M[0] = None @@ -1123,14 +536,24 @@ def solve(self, M): def minimize_tao(forward, M0, *args, **kwargs): - """Functional minimization using TAO. + """Minimize a functional using TAO. + + Parameters + ---------- + + forward : callable + Accepts one or more variable arguments, and returns a scalar variable + defining the functional. + M0 : variable or Sequence[variable, ...] + The initial guess. + args, kwargs + Passed to the :class:`.TAOSolver` constructor. - :arg forward: A callable which accepts one or more variable arguments, and - which returns a variable defining the forward functional. - :arg M0: A variable or :class:`Sequence` of variables defining the control, - and the initial guess for the optimization. + Returns + ------- - Remaining arguments are passed to the :class:`.TAOSolver` constructor. + variable or Sequence[variable, ...] + The result of the minimization. """ M0_packed = Packed(M0)