Simplify first Keyword which is now really much nicer to write.

JuliaManifolds · Aug 21, 2024 · bfe4d37 · bfe4d37
1 parent 4f1fe6d
commit bfe4d37
Show file tree

Hide file tree

Showing 38 changed files with 167 additions and 139 deletions.
diff --git a/docs/src/solvers/conjugate_residual.md b/docs/src/solvers/conjugate_residual.md
@@ -6,6 +6,7 @@ CurrentModule = Manopt
 
 ```@docs
 conjugate_residual
+conjugate_residual!
 ```
 
 ## State

diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl
@@ -55,16 +55,19 @@ define!(:LaTeX, :ast, raw"\ast")
 define!(:LaTeX, :bar, (letter) -> raw"\bar" * "$(letter)")
 define!(:LaTeX, :bigl, raw"\bigl")
 define!(:LaTeX, :bigr, raw"\bigr")
+define!(:LaTeX, :Bigl, raw"\Bigl")
+define!(:LaTeX, :Bigr, raw"\Bigr")
 define!(:LaTeX, :Cal, (letter) -> raw"\mathcal " * "$letter")
 define!(:LaTeX, :displaystyle, raw"\displaystyle")
 define!(:LaTeX, :frac, (a, b) -> raw"\frac" * "{$a}{$b}")
 define!(:LaTeX, :grad, raw"\operatorname{grad}")
+define!(:LaTeX, :hat, (letter) -> raw"\hat{" * "$letter" * "}")
 define!(:LaTeX, :Hess, raw"\operatorname{Hess}")
 define!(:LaTeX, :invretr, raw"\operatorname{retr}^{-1}")
 define!(:LaTeX, :log, raw"\log")
 define!(:LaTeX, :max, raw"\max")
 define!(:LaTeX, :min, raw"\min")
-define!(:LaTeX, :norm, (v; index = "") -> raw"\lVert" * "$v" * raw"\rVert" * "_{$index}")
+define!(:LaTeX, :norm, (v; index = "") -> raw"\lVert " * "$v" * raw" \rVert" * "_{$index}")
 define!(:LaTeX, :prox, raw"\operatorname{prox}")
 define!(:LaTeX, :reflect, raw"\operatorname{refl}")
 define!(:LaTeX, :retr, raw"\operatorname{retr}")
@@ -157,8 +160,8 @@ define!(
 define!(
     :Variable,
     :Field,
-    (s::Symbol, display="$s", t=""; kwargs...) ->
-        "* `$(display)::$(length(t) > 0 ? t : _var(s, :type))`: $(_var(s, :description; kwargs...))",
+    (s::Symbol, display="$s", t=""; comment="", kwargs...) ->
+        "* `$(display)::$(length(t) > 0 ? t : _var(s, :type))`: $(_var(s, :description; kwargs...))$(comment)",
 )
 define!(
     :Variable,
@@ -168,15 +171,39 @@ define!(
         display="$s",
         t="";
         default="",
+        comment="",
         type=false,
         description::Bool=true,
         kwargs...,
     ) ->
-        "* `$(display)$(type ? "::$(length(t) > 0 ? t : _var(s, :type))" : "")=`$(length(default) > 0 ? default : _var(s, :default; kwargs...))$(description ? ": $(_var(s, :description; kwargs...))" : "")",
+        "* `$(display)$(type ? "::$(length(t) > 0 ? t : _var(s, :type))" : "")=`$(length(default) > 0 ? default : _var(s, :default; kwargs...))$(description ? ": $(_var(s, :description; kwargs...))" : "")$(comment)",
 )
 #
 # Actual variables
 
+define!(
+    :Variable,
+    :at_iteration,
+    :description,
+    "an integer indicating at which the stopping criterion last indicted to stop, which might also be before the solver started (`0`). Any negative value indicates that this was not yet the case;",
+)
+define!(:Variable, :at_iteration, :type, "Int")
+
+define!(
+    :Variable,
+    :evaluation,
+    :description,
+    "specify whether the functions that return an array, for example a point or a tangent vector, work by allocating its result ([`AllocatingEvaluation`](@ref)) or whether they modify their input argument to return the result therein ([`InplaceEvaluation`](@ref)). Since usually the first argument is the manifold, the modified argument is the second.",
+)
+define!(:Variable, :evaluation, :type, "AbstractEvaluationType")
+define!(:Variable, :evaluation, :default, "[`AllocatingEvaluation`](@ref)`()`")
+define!(
+    :Variable,
+    :evaluation,
+    :GradientExample,
+    "For example `grad_f(M,p)` allocates, but `grad_f!(M, X, p)` computes the result in-place of `X`.",
+)
+
 define!(
     :Variable,
     :f,
@@ -287,9 +314,9 @@ define!(
     :X,
     :description,
     (; M="M", p="p", note="") ->
-        "a tangent bector at the point ``$p`` on the manifold ``$(_tex(:Cal, M))``. $note",
+        "a tangent vector at the point ``$p`` on the manifold ``$(_tex(:Cal, M))``. $note",
 )
-define!(:Variable, :X, :type, "X")
+define!(:Variable, :X, :type, "T")
 define!(:Variable, :X, :default, (; M="M", p="p") -> _link(:zero_vector; M=M, p=p))
 
 #
@@ -366,8 +393,6 @@ _sc(args...; kwargs...) = glossary(:StoppingCriterion, args...; kwargs...)
 # Old strings
 
 # Fields
-_field_at_iteration = "`at_iteration`: an integer indicating at which the stopping criterion last indicted to stop, which might also be before the solver started (`0`). Any negative value indicates that this was not yet the case; "
-_field_iterate = "`p`: the current iterate ``p=p^{(k)} ∈ $(_math(:M))``"
 _field_gradient = "`X`: the current gradient ``$(_tex(:grad))f(p^{(k)}) ∈ T_p$(_math(:M))``"
 _field_subgradient = "`X` : the current subgradient ``$(_tex(:subgrad))f(p^{(k)}) ∈ T_p$(_tex(:Cal, "M"))``"
 _field_inv_retr = "`inverse_retraction_method::`[`AbstractInverseRetractionMethod`](@extref `ManifoldsBase.AbstractInverseRetractionMethod`) : an inverse retraction ``$(_tex(:invretr))``"
@@ -382,10 +407,6 @@ _field_X = "`X`: a tangent vector"
 #
 #
 # Keywords
-_kw_evaluation_default = "`evaluation=`[`AllocatingEvaluation`](@ref)`()`"
-_kw_evaluation = "specify whether the functions that return an array, for example a point or a tangent vector, work by allocating its result ([`AllocatingEvaluation`](@ref)) or whether they modify their input argument to return the result therein ([`InplaceEvaluation`](@ref)). Since usually the first argument is the manifold, the modified argument is the second."
-_kw_evaluation_example = "For example `grad_f(M,p)` allocates, but `grad_f!(M, X, p)` computes the result in-place of `X`."
-
 _kw_inverse_retraction_method_default = "`inverse_retraction_method=`[`default_inverse_retraction_method`](@extref `ManifoldsBase.default_inverse_retraction_method-Tuple{AbstractManifold}`)`(M, typeof(p))`"
 _kw_inverse_retraction_method = "an inverse retraction ``$(_tex(:invretr))`` to use, see [the section on retractions and their inverses](@extref ManifoldsBase :doc:`retractions`)."
 

diff --git a/src/plans/conjugate_gradient_plan.jl b/src/plans/conjugate_gradient_plan.jl
@@ -27,7 +27,7 @@ specify options for a conjugate gradient descent algorithm, that solves a
 
 # Fields
 
-$(_var(:Field, :p))
+$(_var(:Field, :p; comment=" storing the current iterate"))
 $(_var(:Field, :X))
 * `δ`:                       the current descent direction, also a tangent vector
 * `β`:                       the current update coefficient rule, see .

diff --git a/src/plans/conjugate_residual_plan.jl b/src/plans/conjugate_residual_plan.jl
@@ -189,22 +189,20 @@ A state for the [`conjugate_residual`](@ref) solver.
 
 # Constructor
 
-    function ConjugateResidualState(TpM::TangentSpace,slso::SymmetricLinearSystemObjective;
-        kwargs...
-    )
+    ConjugateResidualState(TpM::TangentSpace,slso::SymmetricLinearSystemObjective; kwargs...)
 
 Initialise the state with default values.
 
 ## Keyword arguments
 
-* `X``$(_link(:zero_vector))`
-* `r=-get_gradient(TpM, slso, X)`
+* `r=-`[`get_gradient`](@ref)`(TpM, slso, X)`
 * `d=copy(TpM, r)`
-* `Ar=get_hessian(TpM, slso, X, r)`
+* `Ar=`[`get_hessian`](@ref)`(TpM, slso, X, r)`
 * `Ad=copy(TpM, Ar)`
 * `α::R=0.0`
 * `β::R=0.0`
-* `stopping_criterion=`[`StopAfterIteration`](@ref)`($(_link(:manifold_dimension)))`$(_sc(:Any))[`StopWhenGradientNormLess`](@ref)`(1e-8)`
+* `stopping_criterion=`[`StopAfterIteration`](@ref)`(`$(_link(:manifold_dimension))`)`$(_sc(:Any))[`StopWhenGradientNormLess`](@ref)`(1e-8)`
+$(_var(:Keyword, :X))
 
 # See also
 
@@ -300,7 +298,7 @@ from the [`conjugate_residual`](@ref)
 
 # Fields
 
-$(_field_at_iteration)
+$(_var(:Field, :at_iteration))
 * `c`: the initial norm
 * `ε`: the threshold
 * `norm_rk`: the last computed norm of the residual

diff --git a/src/plans/hessian_plan.jl b/src/plans/hessian_plan.jl
@@ -244,7 +244,7 @@ $_doc_ApproxHessian_formula
 
 ## Keyword arguments
 
-* `evaluation=`[`AllocatingEvaluation`](@ref)) whether the gradient is given as an allocation function or an in-place ([`InplaceEvaluation`](@ref)).
+$(_var(:Keyword, :evaluation))
 * `steplength=`2^{-14}``: step length ``c`` to approximate the gradient evaluations
 * $_kw_retraction_method_default
   $_kw_retraction_method
@@ -311,7 +311,7 @@ function (f::ApproxHessianFiniteDifference{InplaceEvaluation})(M, Y, p, X)
     return Y
 end
 
-@doc raw"""
+@doc """
     ApproxHessianSymmetricRankOne{E, P, G, T, B<:AbstractBasis{ℝ}, VTR, R<:Real} <: AbstractApproxHessian
 
 A functor to approximate the Hessian by the symmetric rank one update.
@@ -338,8 +338,8 @@ A functor to approximate the Hessian by the symmetric rank one update.
 * `initial_operator` (`Matrix{Float64}(I, manifold_dimension(M), manifold_dimension(M))`) the matrix representation of the initial approximating operator.
 * `basis` (`DefaultOrthonormalBasis()`) an orthonormal basis in the tangent space of the initial iterate p.
 * `nu` (`-1`)
-* `evaluation` ([`AllocatingEvaluation`](@ref)) whether the gradient is given as an allocation function or an in-place ([`InplaceEvaluation`](@ref)).
-* `vector_transport_method` (`ParallelTransport()`) vector transport ``\mathcal T_{\cdot\gets\cdot}`` to use.
+$(_var(:Keyword, :evaluation))
+* `vector_transport_method` (`ParallelTransport()`) vector transport ``$(_math(:vector_transport, :symbol))`` to use.
 """
 mutable struct ApproxHessianSymmetricRankOne{E,P,G,T,B<:AbstractBasis{ℝ},VTR,R<:Real} <:
                AbstractApproxHessian
@@ -458,26 +458,33 @@ function update_hessian_basis!(M, f::ApproxHessianSymmetricRankOne{InplaceEvalua
     return f.gradient!!(M, f.grad_tmp, f.p_tmp)
 end
 
-@doc raw"""
+@doc """
     ApproxHessianBFGS{E, P, G, T, B<:AbstractBasis{ℝ}, VTR, R<:Real} <: AbstractApproxHessian
 A functor to approximate the Hessian by the BFGS update.
+
 # Fields
+
 * `gradient!!` the gradient function (either allocating or mutating, see `evaluation` parameter).
 * `scale`
 * `vector_transport_method` a vector transport to use.
+
 ## Internal temporary fields
+
 * `p_tmp` a temporary storage the current point `p`.
 * `grad_tmp` a temporary storage for the gradient at the current `p`.
 * `matrix` a temporary storage for the matrix representation of the approximating operator.
 * `basis` a temporary storage for an orthonormal basis at the current `p`.
+
 # Constructor
     ApproxHessianBFGS(M, p, gradF; kwargs...)
+
 ## Keyword arguments
+
 * `initial_operator` (`Matrix{Float64}(I, manifold_dimension(M), manifold_dimension(M))`) the matrix representation of the initial approximating operator.
 * `basis` (`DefaultOrthonormalBasis()`) an orthonormal basis in the tangent space of the initial iterate p.
 * `nu` (`-1`)
-* `evaluation` ([`AllocatingEvaluation`](@ref)) whether the gradient is given as an allocation function or an in-place ([`InplaceEvaluation`](@ref)).
-* `vector_transport_method` (`ParallelTransport()`) vector transport ``\mathcal T_{\cdot\gets\cdot}`` to use.
+$(_var(:Keyword, :evaluation))
+$(_var(:Keyword, :vector_transport_method))
 """
 mutable struct ApproxHessianBFGS{
     E,P,G,T,B<:AbstractBasis{ℝ},VTR<:AbstractVectorTransportMethod
@@ -500,15 +507,15 @@ function ApproxHessianBFGS(
     basis::B=DefaultOrthonormalBasis(),
     scale::Bool=true,
     evaluation=AllocatingEvaluation(),
-    vector_transport_method::VTR=ParallelTransport(),
-) where {mT<:AbstractManifold,P,G,B<:AbstractBasis{ℝ},VTR<:AbstractVectorTransportMethod}
+    vector_transport_method::VTM=default_vector_transport_method(M, typeof(p)),
+) where {mT<:AbstractManifold,P,G,B<:AbstractBasis{ℝ},VTM<:AbstractVectorTransportMethod}
     if evaluation == AllocatingEvaluation()
         grad_tmp = gradient(M, p)
     elseif evaluation == InplaceEvaluation()
         grad_tmp = zero_vector(M, p)
         gradient(M, grad_tmp, p)
     end
-    return ApproxHessianBFGS{typeof(evaluation),P,G,typeof(grad_tmp),B,VTR}(
+    return ApproxHessianBFGS{typeof(evaluation),P,G,typeof(grad_tmp),B,VTM}(
         p, gradient, grad_tmp, initial_operator, basis, vector_transport_method, scale
     )
 end

diff --git a/src/plans/higher_order_primal_dual_plan.jl b/src/plans/higher_order_primal_dual_plan.jl
@@ -70,7 +70,7 @@ end
 
 $(_var(:Field, :p, "m"))
 $(_var(:Field, :p, "n", "Q"; M="N"))
-$(_var(:Field, :p))
+$(_var(:Field, :p; comment=" storing the current iterate"))
 $(_var(:Field, :X))
 * `primal_stepsize::Float64`:  proximal parameter of the primal prox
 * `dual_stepsize::Float64`:    proximal parameter of the dual prox

diff --git a/src/plans/interior_point_Newton_plan.jl b/src/plans/interior_point_Newton_plan.jl
@@ -29,13 +29,13 @@ set_iterate!(s::StepsizeState, M, p) = copyto!(M, s.p, p)
 set_gradient!(s::StepsizeState, M, p, X) = copyto!(M, s.X, p, X)
 
 @doc """
-    InteriorPointNewtonState <: AbstractHessianSolverState
+    InteriorPointNewtonState{P,T} <: AbstractHessianSolverState
 
 # Fields
 
 * `λ`:           the Lagrange multiplier with respect to the equality constraints
 * `μ`:           the Lagrange multiplier with respect to the inequality constraints
-* `p`:           the current iterate
+$(_var(:Field, :p; comment=" storing the current iterate"))
 * `s`:           the current slack variable
 * `sub_problem`: an [`AbstractManoptProblem`](@ref) problem for the subsolver
 * `sub_state`:   an [`AbstractManoptSolverState`](@ref) for the subsolver

diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl
@@ -138,7 +138,7 @@ Describes a Gradient based descent algorithm, with
 
 A default value is given in brackets if a parameter can be left out in initialization.
 
-* $_field_iterate
+$(_var(:Field, :p; comment=" storing the current iterate"))
 * $_field_stop
 * $_field_retr
 * `residual_values`:      value of ``F`` calculated in the solver setup or the previous iteration

diff --git a/src/plans/proximal_plan.jl b/src/plans/proximal_plan.jl
@@ -139,7 +139,7 @@ stores options for the [`cyclic_proximal_point`](@ref) algorithm. These are the
 
 # Fields
 
-$(_var(:Field, :p))
+$(_var(:Field, :p; comment=" storing the current iterate"))
 * $_field_stop
 * `λ`:         a function for the values of ``λ_k`` per iteration(cycle ``ì``
 * `oder_type`: whether to use a randomly permuted sequence (`:FixedRandomOrder`),

diff --git a/src/plans/solver_state.jl b/src/plans/solver_state.jl
@@ -1,17 +1,17 @@
 
 @inline _extract_val(::Val{T}) where {T} = T
 
-@doc raw"""
+@doc """
     AbstractManoptSolverState
 
 A general super type for all solver states.
 
 # Fields
 
 The following fields are assumed to be default. If you use different ones,
-provide the access functions accordingly
+adapt the the access functions [`get_iterate`](@ref) and [`get_stopping_criterion`](@ref) accordingly
 
-* `p` a point on a manifold with the current iterate
+$(_var(:Field, :p; comment=" storing the current iterate"))
 * `stop` a [`StoppingCriterion`](@ref).
 """
 abstract type AbstractManoptSolverState end

diff --git a/src/plans/stepsize.jl b/src/plans/stepsize.jl
@@ -1183,51 +1183,53 @@ function status_summary(a::WolfePowellBinaryLinesearch)
     return "$a$s"
 end
 
-@doc raw"""
+_awng_cases = raw"""
+```math
+(b_k, ω_k, c_k) = \begin{cases}
+\bigl(\hat b_{k-1}, \lVert X_k\rVert_{p_k}, 0 \bigr) & \text{ if } c_{k-1}+1 = \hat c\\
+\Bigl(b_{k-1} + \frac{\lVert X_k\rVert_{p_k}^2}{b_{k-1}}, \omega_{k-1}, c_{k-1}+1 \Bigr) & \text{ if } c_{k-1}+1<\hat c
+\end{cases}
+```
+"""
+@doc """
     AdaptiveWNGradient <: DirectionUpdateRule
 
 Represent an adaptive gradient method introduced by [GrapigliaStella:2023](@cite).
 
-Given a positive threshold ``\hat c \mathbb N``,
-an minimal bound ``b_{\mathrm{min}} > 0``,
-an initial ``b_0 ≥ b_{\mathrm{min}}``, and a
-gradient reduction factor threshold ``\alpha ∈ [0,1)``.
+Given a positive threshold ``$(_tex(:hat, "c")) ∈ ℕ``,
+an minimal bound ``b_{$(_tex(:text, "min"))} > 0``,
+an initial ``b_0 ≥ b_{$(_tex(:text, "min"))}``, and a
+gradient reduction factor threshold ``α ∈ [0,1)``.
 
-Set ``c_0=0`` and use ``\omega_0 = \lVert \operatorname{grad} f(p_0) \rvert_{p_0}``.
+Set ``c_0=0`` and use ``ω_0 = $(_tex(:norm, "$(_tex(:grad)) f(p_0)"; index="p_0"))``.
 
-For the first iterate use the initial step size ``s_0 = \frac{1}{b_0}``.
+For the first iterate use the initial step size ``s_0 = $(_tex(:frac, "1", "b_0"))``.
 
-Then, given the last gradient ``X_{k-1} = \operatorname{grad} f(x_{k-1})``,
-and a previous ``\omega_{k-1}``, the values ``(b_k, \omega_k, c_k)`` are computed
-using ``X_k = \operatorname{grad} f(p_k)`` and the following cases
+Then, given the last gradient ``X_{k-1} = $(_tex(:grad)) f(x_{k-1})``,
+and a previous ``ω_{k-1}``, the values ``(b_k, ω_k, c_k)`` are computed
+using ``X_k = $(_tex(:grad)) f(p_k)`` and the following cases
 
-If ``\lVert X_k \rVert_{p_k} \leq \alpha\omega_{k-1}``, then let
-``\hat b_{k-1} ∈ [b_\mathrm{min},b_{k-1}]`` and set
+If ``$(_tex(:norm, "X_k"; index="p_k")) ≤ αω_{k-1}``, then let
+``$(_tex(:hat, "b"))_{k-1} ∈ [b_{$(_tex(:text, "min"))},b_{k-1}]`` and set
 
-```math
-(b_k, \omega_k, c_k) = \begin{cases}
-\bigl(\hat b_{k-1}, \lVert X_k\rVert_{p_k}, 0 \bigr) & \text{ if } c_{k-1}+1 = \hat c\\
-\Bigl(b_{k-1} + \frac{\lVert X_k\rVert_{p_k}^2}{b_{k-1}}, \omega_{k-1}, c_{k-1}+1 \Bigr) & \text{ if } c_{k-1}+1<\hat c
-\end{cases}
-```
+$(_awng_cases)
 
-If ``\lVert X_k \rVert_{p_k} > \alpha\omega_{k-1}``, the set
+If ``$(_tex(:norm, "X_k"; index="p_k")) > αω_{k-1}``, the set
 
 ```math
-(b_k, \omega_k, c_k) =
-\Bigl( b_{k-1} + \frac{\lVert X_k\rVert_{p_k}^2}{b_{k-1}}, \omega_{k-1}, 0)
+(b_k, ω_k, c_k) = $(_tex(:Bigl))( b_{k-1} + $(_tex(:frac, _tex(:norm, "X_k"; index="p_k")*"^2", "b_{k-1}")), ω_{k-1}, 0 $(_tex(:Bigr)))
 ```
 
-and return the step size ``s_k = \frac{1}{b_k}``.
+and return the step size ``s_k = $(_tex(:frac, "1", "b_k"))``.
 
 Note that for ``α=0`` this is the Riemannian variant of `WNGRad`.
 
 # Fields
 
-* `count_threshold::Int=4`: an `Integer` for ``\hat c``
-* `minimal_bound::Float64=1e-4`: for ``b_{\mathrm{min}}``
+* `count_threshold::Int=4`: an `Integer` for ``$(_tex(:hat, "c"))``
+* `minimal_bound::Float64=1e-4`: for ``b_{$(_tex(:text, "min"))}``
 * `alternate_bound::Function=(bk, hat_c) -> min(gradient_bound, max(gradient_bound, bk/(3*hat_c)`:
-  how to determine ``\hat b_k`` as a function of `(bmin, bk, hat_c) -> hat_bk`
+  how to determine ``$(_tex(:hat, "k"))_k`` as a function of `(bmin, bk, hat_c) -> hat_bk`
 * `gradient_reduction::Float64=0.9`:
 * `gradient_bound` `norm(M, p0, grad_f(M,p0))` the bound ``b_k``.
 
@@ -1243,7 +1245,7 @@ as well as the internal fields
 Where all fields with defaults are keyword arguments and additional keyword arguments are
 
 * `adaptive=true`: switches the `gradient_reduction ``α`` to `0`.
-* `evaluation=AllocatingEvaluation()`: specifies whether the gradient (that is used for initialisation only) is mutating or allocating
+$(_var(:Keyword, :evaluation))
 """
 mutable struct AdaptiveWNGradient{I<:Integer,R<:Real,F<:Function} <: Stepsize
     count_threshold::I