From 96d6c85999514af653602e39b126adec962cfef9 Mon Sep 17 00:00:00 2001
From: Ronny Bergmann <git@ronnybergmann.net>
Date: Thu, 13 Apr 2023 16:19:52 +0200
Subject: [PATCH] Introduce the approximate Hessian as a default in trust
 regions. (#237)

* Introduce the approximate Hessian as a default in trust regions.
* Simplify tests and inc codecov by that.
* bump version.
* Improve trust_regions interface.
---
 Changelog.md                             | 14 ++++++
 Project.toml                             |  6 +--
 src/plans/hessian_plan.jl                | 22 ++++++---
 src/plans/nonmutating_manifolds_plans.jl |  2 -
 src/solvers/trust_regions.jl             | 62 +++++++++++++++++++++---
 test/solvers/test_trust_regions.jl       | 14 ------
 6 files changed, 88 insertions(+), 32 deletions(-)

diff --git a/Changelog.md b/Changelog.md
index c75236c1a9..49fb2079ad 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -5,6 +5,20 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.4.15] - 13/04/2023
+
+### Changed
+
+* `trust_regions(M, f, grad_f, hess_f, p)` now has the Hessian `hess_f` as well as
+  the start point `p0` as an optional parameter and approximate it otherwise.
+* `trust_regions!(M, f, grad_f, hess_f, p)` has the Hessian as an optional parameter
+  and approximate it otherwise.
+
+### Removed
+
+* support for `ManifoldsBase.jl` 0.13.x, since with the definition of `copy(M,p::Number)`,
+  in 0.14.4, we now use that instead of defining it ourselves.
+
 ## [0.4.14] - 06/04/2023
 
 ### Changed
diff --git a/Project.toml b/Project.toml
index dec5f8f64d..ac0341ad90 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Manopt"
 uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5"
 authors = ["Ronny Bergmann <manopt@ronnybergmann.net>"]
-version = "0.4.14"
+version = "0.4.15"
 
 [deps]
 ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
@@ -27,8 +27,8 @@ ColorTypes = "0.9.1, 0.10, 0.11"
 Colors = "0.11.2, 0.12"
 DataStructures = "0.17, 0.18"
 ManifoldDiff = "0.2, 0.3"
-Manifolds = "0.8.43"
-ManifoldsBase = "0.13.30, 0.14"
+Manifolds = "0.8.57"
+ManifoldsBase = "0.14.4"
 Requires = "0.5, 1"
 StaticArrays = "0.12, 1.0"
 julia = "1.6"
diff --git a/src/plans/hessian_plan.jl b/src/plans/hessian_plan.jl
index 34c211927f..f24a69cd46 100644
--- a/src/plans/hessian_plan.jl
+++ b/src/plans/hessian_plan.jl
@@ -151,7 +151,14 @@ update_hessian!(M, f, p, p_proposal, X) = f
 update_hessian_basis!(M, f, p) = f
 
 @doc raw"""
-    ApproxHessianFiniteDifference{E, P, T, G, RTR,, VTR, R <: Real}
+    AbstractApproxHessian <: Function
+
+An abstract supertypes for approximate hessian functions, declares them also to be functions.
+"""
+abstract type AbstractApproxHessian <: Function end
+
+@doc raw"""
+    ApproxHessianFiniteDifference{E, P, T, G, RTR,, VTR, R <: Real} <: AbstractApproxHessian
 
 A functor to approximate the Hessian by a finite difference of gradient evaluation.
 
@@ -191,7 +198,8 @@ Then we approximate the Hessian by the finite difference of the gradients, where
 * `retraction_method` – (`default_retraction_method(M, typeof(p))`) a `retraction(M, p, X)` to use in the approximation.
 * `vector_transport_method` - (`default_vector_transport_method(M, typeof(p))`) a vector transport to use
 """
-mutable struct ApproxHessianFiniteDifference{E,P,T,G,RTR,VTR,R<:Real}
+mutable struct ApproxHessianFiniteDifference{E,P,T,G,RTR,VTR,R<:Real} <:
+               AbstractApproxHessian
     p_dir::P
     gradient!!::G
     grad_tmp::T
@@ -251,7 +259,8 @@ function (f::ApproxHessianFiniteDifference{InplaceEvaluation})(M, Y, p, X)
 end
 
 @doc raw"""
-    ApproxHessianSymmetricRankOne{E, P, G, T, B<:AbstractBasis{ℝ}, VTR, R<:Real}
+    ApproxHessianSymmetricRankOne{E, P, G, T, B<:AbstractBasis{ℝ}, VTR, R<:Real} <: AbstractApproxHessian
+
 A functor to approximate the Hessian by the symmetric rank one update.
 # Fields
 * `gradient!!` the gradient function (either allocating or mutating, see `evaluation` parameter).
@@ -271,7 +280,8 @@ A functor to approximate the Hessian by the symmetric rank one update.
 * `evaluation` ([`AllocatingEvaluation`](@ref)) whether the gradient is given as an allocation function or an in-place ([`InplaceEvaluation`](@ref)).
 * `vector_transport_method` (`ParallelTransport()`) vector transport ``\mathcal T_{\cdot\gets\cdot}`` to use.
 """
-mutable struct ApproxHessianSymmetricRankOne{E,P,G,T,B<:AbstractBasis{ℝ},VTR,R<:Real}
+mutable struct ApproxHessianSymmetricRankOne{E,P,G,T,B<:AbstractBasis{ℝ},VTR,R<:Real} <:
+               AbstractApproxHessian
     p_tmp::P
     gradient!!::G
     grad_tmp::T
@@ -384,7 +394,7 @@ function update_hessian_basis!(M, f::ApproxHessianSymmetricRankOne{InplaceEvalua
 end
 
 @doc raw"""
-    ApproxHessianBFGS{E, P, G, T, B<:AbstractBasis{ℝ}, VTR, R<:Real}
+    ApproxHessianBFGS{E, P, G, T, B<:AbstractBasis{ℝ}, VTR, R<:Real} <: AbstractApproxHessian
 A functor to approximate the Hessian by the BFGS update.
 # Fields
 * `gradient!!` the gradient function (either allocating or mutating, see `evaluation` parameter).
@@ -406,7 +416,7 @@ A functor to approximate the Hessian by the BFGS update.
 """
 mutable struct ApproxHessianBFGS{
     E,P,G,T,B<:AbstractBasis{ℝ},VTR<:AbstractVectorTransportMethod
-}
+} <: AbstractApproxHessian
     p_tmp::P
     gradient!!::G
     grad_tmp::T
diff --git a/src/plans/nonmutating_manifolds_plans.jl b/src/plans/nonmutating_manifolds_plans.jl
index c0f6e5a78c..968affd8b7 100644
--- a/src/plans/nonmutating_manifolds_plans.jl
+++ b/src/plans/nonmutating_manifolds_plans.jl
@@ -88,5 +88,3 @@ function step_solver!(
     s.p = retract(get_manifold(p), s.p, -step * s.X, s.retraction_method)
     return s
 end
-#Hack for now?
-copy(::NONMUTATINGMANIFOLDS, p) = p
diff --git a/src/solvers/trust_regions.jl b/src/solvers/trust_regions.jl
index 7e736ab5a6..a56cd3f6e3 100644
--- a/src/solvers/trust_regions.jl
+++ b/src/solvers/trust_regions.jl
@@ -258,24 +258,50 @@ the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) fo
 [`truncated_conjugate_gradient_descent`](@ref)
 """
 function trust_regions(
-    M::AbstractManifold, f::TF, grad_f::TdF, Hess_f::TH, p; kwargs...
-) where {TF,TdF,TH}
+    M::AbstractManifold, f::TF, grad_f::TdF, Hess_f::TH, p=rand(M); kwargs...
+) where {TF,TdF,TH<:Function}
     q = copy(M, p)
     return trust_regions!(M, f, grad_f, Hess_f, q; kwargs...)
 end
-
+function trust_regions(
+    M::AbstractManifold,
+    f::TF,
+    grad_f::TdF,
+    p=rand(M);
+    evaluation=AllocatingEvaluation(),
+    retraction_method::AbstractRetractionMethod=default_retraction_method(M, typeof(p)),
+    kwargs...,
+) where {TF,TdF}
+    hess_f = ApproxHessianFiniteDifference(
+        M, copy(M, p), grad_f; evaluation=evaluation, retraction_method=retraction_method
+    )
+    return trust_regions(
+        M,
+        f,
+        grad_f,
+        hess_f,
+        p;
+        evaluation=evaluation,
+        retraction_method=retraction_method,
+        kwargs...,
+    )
+end
 @doc raw"""
-    trust_regions!(M, f, grad_f, Hess_f, x; kwargs...)
+    trust_regions!(M, f, grad_f, Hess_f, p; kwargs...)
+    trust_regions!(M, f, grad_f, p; kwargs...)
 
-evaluate the Riemannian trust-regions solver for optimization on manifolds in place of `x`.
+evaluate the Riemannian trust-regions solver for optimization on manifolds in place of `p`.
 
 # Input
 * `M` – a manifold ``\mathcal M``
 * `f` – a cost function ``F: \mathcal M → ℝ`` to minimize
 * `grad_f`- the gradient ``\operatorname{grad}F: \mathcal M → T \mathcal M`` of ``F``
-* `Hess_f` – the hessian ``H( \mathcal M, x, ξ)`` of ``F``
+* `Hess_f` – (optional) the hessian ``H( \mathcal M, x, ξ)`` of ``F``
 * `x` – an initial value ``x  ∈  \mathcal M``
 
+For the case that no hessian is provided, the Hessian is computed using finite difference, see
+[`ApproxHessianFiniteDifference`](@ref).
+
 for more details and all options, see [`trust_regions`](@ref)
 """
 function trust_regions!(
@@ -349,7 +375,29 @@ function trust_regions!(
     trs = decorate_state!(trs; kwargs...)
     return get_solver_return(solve!(mp, trs))
 end
-
+function trust_regions!(
+    M::AbstractManifold,
+    f::TF,
+    grad_f::TdF,
+    p;
+    evaluation=AllocatingEvaluation(),
+    retraction_method::AbstractRetractionMethod=default_retraction_method(M, typeof(p)),
+    kwargs...,
+) where {TF,TdF}
+    hess_f = ApproxHessianFiniteDifference(
+        M, copy(M, p), grad_f; evaluation=evaluation, retraction_method=retraction_method
+    )
+    return trust_regions!(
+        M,
+        f,
+        grad_f,
+        hess_f,
+        p;
+        evaluation=evaluation,
+        retraction_method=retraction_method,
+        kwargs...,
+    )
+end
 function initialize_solver!(mp::AbstractManoptProblem, trs::TrustRegionsState)
     M = get_manifold(mp)
     get_gradient!(mp, trs.X, trs.p)
diff --git a/test/solvers/test_trust_regions.jl b/test/solvers/test_trust_regions.jl
index f2c2d42b1d..56be18a87c 100644
--- a/test/solvers/test_trust_regions.jl
+++ b/test/solvers/test_trust_regions.jl
@@ -45,13 +45,6 @@ include("trust_region_model.jl")
             M,
             cost,
             rgrad,
-            ApproxHessianFiniteDifference(
-                M,
-                p,
-                rgrad;
-                steplength=2^(-9),
-                vector_transport_method=ProjectionTransport(),
-            ),
             p;
             max_trust_region_radius=8.0,
             stopping_criterion=StopAfterIteration(2000) | StopWhenGradientNormLess(1e-6),
@@ -61,13 +54,6 @@ include("trust_region_model.jl")
             M,
             cost,
             rgrad,
-            ApproxHessianFiniteDifference(
-                M,
-                p,
-                rgrad;
-                steplength=2^(-9),
-                vector_transport_method=ProjectionTransport(),
-            ),
             q2;
             stopping_criterion=StopAfterIteration(2000) | StopWhenGradientNormLess(1e-6),
             max_trust_region_radius=8.0,