From 96d6c85999514af653602e39b126adec962cfef9 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Thu, 13 Apr 2023 16:19:52 +0200 Subject: [PATCH] Introduce the approximate Hessian as a default in trust regions. (#237) * Introduce the approximate Hessian as a default in trust regions. * Simplify tests and inc codecov by that. * bump version. * Improve trust_regions interface. --- Changelog.md | 14 ++++++ Project.toml | 6 +-- src/plans/hessian_plan.jl | 22 ++++++--- src/plans/nonmutating_manifolds_plans.jl | 2 - src/solvers/trust_regions.jl | 62 +++++++++++++++++++++--- test/solvers/test_trust_regions.jl | 14 ------ 6 files changed, 88 insertions(+), 32 deletions(-) diff --git a/Changelog.md b/Changelog.md index c75236c1a9..49fb2079ad 100644 --- a/Changelog.md +++ b/Changelog.md @@ -5,6 +5,20 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.15] - 13/04/2023 + +### Changed + +* `trust_regions(M, f, grad_f, hess_f, p)` now has the Hessian `hess_f` as well as + the start point `p0` as an optional parameter and approximate it otherwise. +* `trust_regions!(M, f, grad_f, hess_f, p)` has the Hessian as an optional parameter + and approximate it otherwise. + +### Removed + +* support for `ManifoldsBase.jl` 0.13.x, since with the definition of `copy(M,p::Number)`, + in 0.14.4, we now use that instead of defining it ourselves. + ## [0.4.14] - 06/04/2023 ### Changed diff --git a/Project.toml b/Project.toml index dec5f8f64d..ac0341ad90 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Manopt" uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5" authors = ["Ronny Bergmann "] -version = "0.4.14" +version = "0.4.15" [deps] ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" @@ -27,8 +27,8 @@ ColorTypes = "0.9.1, 0.10, 0.11" Colors = "0.11.2, 0.12" DataStructures = "0.17, 0.18" ManifoldDiff = "0.2, 0.3" -Manifolds = "0.8.43" -ManifoldsBase = "0.13.30, 0.14" +Manifolds = "0.8.57" +ManifoldsBase = "0.14.4" Requires = "0.5, 1" StaticArrays = "0.12, 1.0" julia = "1.6" diff --git a/src/plans/hessian_plan.jl b/src/plans/hessian_plan.jl index 34c211927f..f24a69cd46 100644 --- a/src/plans/hessian_plan.jl +++ b/src/plans/hessian_plan.jl @@ -151,7 +151,14 @@ update_hessian!(M, f, p, p_proposal, X) = f update_hessian_basis!(M, f, p) = f @doc raw""" - ApproxHessianFiniteDifference{E, P, T, G, RTR,, VTR, R <: Real} + AbstractApproxHessian <: Function + +An abstract supertypes for approximate hessian functions, declares them also to be functions. +""" +abstract type AbstractApproxHessian <: Function end + +@doc raw""" + ApproxHessianFiniteDifference{E, P, T, G, RTR,, VTR, R <: Real} <: AbstractApproxHessian A functor to approximate the Hessian by a finite difference of gradient evaluation. @@ -191,7 +198,8 @@ Then we approximate the Hessian by the finite difference of the gradients, where * `retraction_method` – (`default_retraction_method(M, typeof(p))`) a `retraction(M, p, X)` to use in the approximation. * `vector_transport_method` - (`default_vector_transport_method(M, typeof(p))`) a vector transport to use """ -mutable struct ApproxHessianFiniteDifference{E,P,T,G,RTR,VTR,R<:Real} +mutable struct ApproxHessianFiniteDifference{E,P,T,G,RTR,VTR,R<:Real} <: + AbstractApproxHessian p_dir::P gradient!!::G grad_tmp::T @@ -251,7 +259,8 @@ function (f::ApproxHessianFiniteDifference{InplaceEvaluation})(M, Y, p, X) end @doc raw""" - ApproxHessianSymmetricRankOne{E, P, G, T, B<:AbstractBasis{ℝ}, VTR, R<:Real} + ApproxHessianSymmetricRankOne{E, P, G, T, B<:AbstractBasis{ℝ}, VTR, R<:Real} <: AbstractApproxHessian + A functor to approximate the Hessian by the symmetric rank one update. # Fields * `gradient!!` the gradient function (either allocating or mutating, see `evaluation` parameter). @@ -271,7 +280,8 @@ A functor to approximate the Hessian by the symmetric rank one update. * `evaluation` ([`AllocatingEvaluation`](@ref)) whether the gradient is given as an allocation function or an in-place ([`InplaceEvaluation`](@ref)). * `vector_transport_method` (`ParallelTransport()`) vector transport ``\mathcal T_{\cdot\gets\cdot}`` to use. """ -mutable struct ApproxHessianSymmetricRankOne{E,P,G,T,B<:AbstractBasis{ℝ},VTR,R<:Real} +mutable struct ApproxHessianSymmetricRankOne{E,P,G,T,B<:AbstractBasis{ℝ},VTR,R<:Real} <: + AbstractApproxHessian p_tmp::P gradient!!::G grad_tmp::T @@ -384,7 +394,7 @@ function update_hessian_basis!(M, f::ApproxHessianSymmetricRankOne{InplaceEvalua end @doc raw""" - ApproxHessianBFGS{E, P, G, T, B<:AbstractBasis{ℝ}, VTR, R<:Real} + ApproxHessianBFGS{E, P, G, T, B<:AbstractBasis{ℝ}, VTR, R<:Real} <: AbstractApproxHessian A functor to approximate the Hessian by the BFGS update. # Fields * `gradient!!` the gradient function (either allocating or mutating, see `evaluation` parameter). @@ -406,7 +416,7 @@ A functor to approximate the Hessian by the BFGS update. """ mutable struct ApproxHessianBFGS{ E,P,G,T,B<:AbstractBasis{ℝ},VTR<:AbstractVectorTransportMethod -} +} <: AbstractApproxHessian p_tmp::P gradient!!::G grad_tmp::T diff --git a/src/plans/nonmutating_manifolds_plans.jl b/src/plans/nonmutating_manifolds_plans.jl index c0f6e5a78c..968affd8b7 100644 --- a/src/plans/nonmutating_manifolds_plans.jl +++ b/src/plans/nonmutating_manifolds_plans.jl @@ -88,5 +88,3 @@ function step_solver!( s.p = retract(get_manifold(p), s.p, -step * s.X, s.retraction_method) return s end -#Hack for now? -copy(::NONMUTATINGMANIFOLDS, p) = p diff --git a/src/solvers/trust_regions.jl b/src/solvers/trust_regions.jl index 7e736ab5a6..a56cd3f6e3 100644 --- a/src/solvers/trust_regions.jl +++ b/src/solvers/trust_regions.jl @@ -258,24 +258,50 @@ the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) fo [`truncated_conjugate_gradient_descent`](@ref) """ function trust_regions( - M::AbstractManifold, f::TF, grad_f::TdF, Hess_f::TH, p; kwargs... -) where {TF,TdF,TH} + M::AbstractManifold, f::TF, grad_f::TdF, Hess_f::TH, p=rand(M); kwargs... +) where {TF,TdF,TH<:Function} q = copy(M, p) return trust_regions!(M, f, grad_f, Hess_f, q; kwargs...) end - +function trust_regions( + M::AbstractManifold, + f::TF, + grad_f::TdF, + p=rand(M); + evaluation=AllocatingEvaluation(), + retraction_method::AbstractRetractionMethod=default_retraction_method(M, typeof(p)), + kwargs..., +) where {TF,TdF} + hess_f = ApproxHessianFiniteDifference( + M, copy(M, p), grad_f; evaluation=evaluation, retraction_method=retraction_method + ) + return trust_regions( + M, + f, + grad_f, + hess_f, + p; + evaluation=evaluation, + retraction_method=retraction_method, + kwargs..., + ) +end @doc raw""" - trust_regions!(M, f, grad_f, Hess_f, x; kwargs...) + trust_regions!(M, f, grad_f, Hess_f, p; kwargs...) + trust_regions!(M, f, grad_f, p; kwargs...) -evaluate the Riemannian trust-regions solver for optimization on manifolds in place of `x`. +evaluate the Riemannian trust-regions solver for optimization on manifolds in place of `p`. # Input * `M` – a manifold ``\mathcal M`` * `f` – a cost function ``F: \mathcal M → ℝ`` to minimize * `grad_f`- the gradient ``\operatorname{grad}F: \mathcal M → T \mathcal M`` of ``F`` -* `Hess_f` – the hessian ``H( \mathcal M, x, ξ)`` of ``F`` +* `Hess_f` – (optional) the hessian ``H( \mathcal M, x, ξ)`` of ``F`` * `x` – an initial value ``x ∈ \mathcal M`` +For the case that no hessian is provided, the Hessian is computed using finite difference, see +[`ApproxHessianFiniteDifference`](@ref). + for more details and all options, see [`trust_regions`](@ref) """ function trust_regions!( @@ -349,7 +375,29 @@ function trust_regions!( trs = decorate_state!(trs; kwargs...) return get_solver_return(solve!(mp, trs)) end - +function trust_regions!( + M::AbstractManifold, + f::TF, + grad_f::TdF, + p; + evaluation=AllocatingEvaluation(), + retraction_method::AbstractRetractionMethod=default_retraction_method(M, typeof(p)), + kwargs..., +) where {TF,TdF} + hess_f = ApproxHessianFiniteDifference( + M, copy(M, p), grad_f; evaluation=evaluation, retraction_method=retraction_method + ) + return trust_regions!( + M, + f, + grad_f, + hess_f, + p; + evaluation=evaluation, + retraction_method=retraction_method, + kwargs..., + ) +end function initialize_solver!(mp::AbstractManoptProblem, trs::TrustRegionsState) M = get_manifold(mp) get_gradient!(mp, trs.X, trs.p) diff --git a/test/solvers/test_trust_regions.jl b/test/solvers/test_trust_regions.jl index f2c2d42b1d..56be18a87c 100644 --- a/test/solvers/test_trust_regions.jl +++ b/test/solvers/test_trust_regions.jl @@ -45,13 +45,6 @@ include("trust_region_model.jl") M, cost, rgrad, - ApproxHessianFiniteDifference( - M, - p, - rgrad; - steplength=2^(-9), - vector_transport_method=ProjectionTransport(), - ), p; max_trust_region_radius=8.0, stopping_criterion=StopAfterIteration(2000) | StopWhenGradientNormLess(1e-6), @@ -61,13 +54,6 @@ include("trust_region_model.jl") M, cost, rgrad, - ApproxHessianFiniteDifference( - M, - p, - rgrad; - steplength=2^(-9), - vector_transport_method=ProjectionTransport(), - ), q2; stopping_criterion=StopAfterIteration(2000) | StopWhenGradientNormLess(1e-6), max_trust_region_radius=8.0,