From a1578e14622b12cceaa102276eb955ae05fd9481 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Thu, 22 Feb 2024 10:59:02 +0100 Subject: [PATCH] Allow passing an initial partial Schur decomposition (#143) This commit allows users to 1. avoid allocations by reusing existing arrays/buffers 2. specify custom matrix types for V, H and temporaries 3. starting the algorithm from an existing partial Schur decomposition by exporting `ArnoldiWorkspace`, which holds the major arrays, and `partialschur!(A, ::ArnoldiWorkspace; ...)`. Co-authored-by: Samuel Powell --- docs/src/index.md | 47 +++++++++++++++++++++++++++++ src/ArnoldiMethod.jl | 56 +++++++++++++++++++++++++++++----- src/run.jl | 70 +++++++++++++++++++++++++++++++++++++------ test/partial_schur.jl | 19 +++++++++++- 4 files changed, 175 insertions(+), 17 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index ca7caca..a403f9f 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -108,6 +108,7 @@ $A$ on the diagonal. ```@docs partialschur +partialschur! ``` ## Partial eigendecomposition @@ -153,6 +154,52 @@ ArnoldiMethod.PartialSchur ArnoldiMethod.History ``` +## Passing an initial guess + +If you have a good guess for a target eigenvector, you can potentially speed up +the method by passing it through `partialschur(A, v1=my_initial_vector)`. This +vector is then used to build the Krylov subspace. + +## Pre-allocating and custom matrix types + +If you call `partialschur` multiple times, and you want to allocate large arrays and buffers only +once ahead of time, you can allocate the relevant matrices manually and pass them to the algorithm. + +The same can be done if you want to work with custom matrix types. + +```@docs +ArnoldiMethod.ArnoldiWorkspace +``` + +## Starting from an initial partial Schur decomposition + +You can also use [`ArnoldiWorkspace`](@ref) to start the algorithm from an initial partial Schur +decomposition. This is useful if you already found a few Schur vectors, and want to continue to +find more. + +```julia +A = rand(100, 100) + +# Pre-allocate the relevant Krylov subspace arrays +V, H = rand(100, 21), rand(21, 20) +arnoldi = ArnoldiWorkspace(V, H) + +# Find a few eigenvalues +_, info_1 = partialschur!(A, arnoldi, nev = 3, tol = 1e-12) + +# Then continue to find a couple more. Notice: 5 in total, so 2 more. Allow larger errors by +# changin `tol`. +F, info_2 = partialschur!(A, arnoldi, nev = 5, start_from = info_1.nconverged + 1 , tol = 1e-8) +@show norm(A * F.Q - F.Q * F.R) +``` + +!!! note "Setting `start_from` correctly" + + As pointed out above, in real arithmetic the algorithm may find one eigenvalue more than + requested if it corresponds to a conjugate pair. Also it may find fewer, if not all + converge. So if you reuse your `ArnoldiWorkspace`, make sure to set `start_from` to one plus + the number of previously converged eigenvalues. + ## What algorithm is ArnoldiMethod.jl? The underlying algorithm is the restarted Arnoldi method, which be viewed as a diff --git a/src/ArnoldiMethod.jl b/src/ArnoldiMethod.jl index 1a50ab9..122fd9b 100644 --- a/src/ArnoldiMethod.jl +++ b/src/ArnoldiMethod.jl @@ -5,17 +5,46 @@ using StaticArrays using Base: RefValue, OneTo -export partialschur, LM, SR, LR, SI, LI, partialeigen +export partialschur, partialschur!, LM, SR, LR, SI, LI, partialeigen, ArnoldiWorkspace """ -ArnoldiWorkspace(n, k) → ArnoldiWorkspace + ArnoldiWorkspace(n, k) → ArnoldiWorkspace + ArnoldiWorkspace(v1, k) → ArnoldiWorkspace + ArnoldiWorkspace(V, H; V_tmp, Q) → ArnoldiWorkspace Holds the large arrays for the Arnoldi relation: Vₖ₊₁ and Hₖ are matrices that satisfy A * Vₖ = Vₖ₊₁ * Hₖ, where Vₖ₊₁ is orthonormal of size n × (k+1) and Hₖ upper Hessenberg of size (k+1) × k. The matrices V_tmp and Q are used for restarts, and have similar size as Vₖ₊₁ and Hₖ (but Q is k × k, not k+1 × k). + +## Examples + +```julia +# allocates workspace for 20-dimensional Krylov subspace +arnoldi = ArnoldiWorkspace(100, 20) + +# allocate workspace for 20-dimensional Krylov subspace, with initial vector ones(100) copied into +# the first column of V +arnoldi = ArnoldiWorkspace(ones(100), 20) + +# manually allocate workspace with V, H +V = Matrix{Float64}(undef, 100, 21) +H = Matrix{Float64}(undef, 21, 20) +arnoldi = ArnoldiWorkspace(V, H) + +# manually allocate all arrays, including temporaries +V, tmp = Matrix{Float64}(undef, 100, 21), Matrix{Float64}(undef, 100, 21) +H, Q = Matrix{Float64}(undef, 21, 20), Matrix{Float64}(undef, 20, 20) +arnoldi = ArnoldiWorkspace(V, H, V_tmp = tmp, Q = Q) +``` """ -struct ArnoldiWorkspace{T,TV<:AbstractMatrix{T},TH<:AbstractMatrix{T}} +struct ArnoldiWorkspace{ + T, + TV<:AbstractMatrix{T}, + TH<:AbstractMatrix{T}, + TVtmp<:AbstractMatrix{T}, + TQ<:AbstractMatrix{T}, +} # Orthonormal basis of the Krylov subspace. V::TV @@ -23,10 +52,10 @@ struct ArnoldiWorkspace{T,TV<:AbstractMatrix{T},TH<:AbstractMatrix{T}} H::TH # Temporary matrix similar to V, used to restart. - V_tmp::TV + V_tmp::TVtmp # Unitary matrix size of (square) H to do a change of basis. - Q::TH + Q::TQ function ArnoldiWorkspace(::Type{T}, matrix_order::Int, krylov_dimension::Int) where {T} # Without an initial vector. @@ -36,7 +65,7 @@ struct ArnoldiWorkspace{T,TV<:AbstractMatrix{T},TH<:AbstractMatrix{T}} V_tmp = similar(V) H = zeros(T, krylov_dimension + 1, krylov_dimension) Q = similar(H, krylov_dimension, krylov_dimension) - return new{T,typeof(V),typeof(H)}(V, H, V_tmp, Q) + return new{T,typeof(V),typeof(H),typeof(V_tmp),typeof(Q)}(V, H, V_tmp, Q) end function ArnoldiWorkspace(v1::AbstractVector{T}, krylov_dimension::Int) where {T} @@ -46,7 +75,20 @@ struct ArnoldiWorkspace{T,TV<:AbstractMatrix{T},TH<:AbstractMatrix{T}} H = similar(V, krylov_dimension + 1, krylov_dimension) fill!(H, zero(eltype(H))) Q = similar(H, krylov_dimension, krylov_dimension) - return new{T,typeof(V),typeof(H)}(V, H, V_tmp, Q) + return new{T,typeof(V),typeof(H),typeof(V_tmp),typeof(Q)}(V, H, V_tmp, Q) + end + + function ArnoldiWorkspace( + V::AbstractMatrix{T}, + H::AbstractMatrix{T}; + V_tmp::AbstractMatrix{T} = similar(V), + Q::AbstractMatrix{T} = similar(H, size(H, 2), size(H, 2)), + ) where {T} + size(V, 2) == size(H, 1) || + throw(ArgumentError("V should have the same number of columns as H has rows.")) + size(H, 1) == size(H, 2) + 1 || + throw(ArgumentError("H should have one more row than it has columns.")) + return new{T,typeof(V),typeof(H),typeof(V_tmp),typeof(Q)}(V, H, V_tmp, Q) end end diff --git a/src/run.jl b/src/run.jl index 11171c3..d40d5f0 100644 --- a/src/run.jl +++ b/src/run.jl @@ -35,6 +35,8 @@ The most important keyword arguments: | `tol` | `Real` | `√eps` | Tolerance for convergence: ‖Ax - xλ‖₂ < tol * ‖λ‖ | | `v1` | `AbstractVector` | `nothing` | Optional starting vector for the Krylov subspace | +Regarding the initial vector `v1`: it will not be mutated, and it does not have to be normalized. + The target `which` can be any of: | Target | Description | @@ -113,7 +115,7 @@ function partialschur( if v1 === nothing arnoldi = ArnoldiWorkspace(vtype(A), size(A, 1), maxdim) - reinitialize!(arnoldi, 0, v -> rand!(v)) + reinitialize!(arnoldi, 0, rand!) else length(v1) == size(A, 1) || throw(ArgumentError("v1 should have the same dimension as A")) @@ -123,6 +125,56 @@ function partialschur( _partialschur(A, arnoldi, mindim, maxdim, nev, tol, restarts, _which) end +""" +```julia +partialschur!(A, arnoldi; start_from, initialize, nev, which, tol, mindim, maxdim, restarts) → PartialSchur, History +``` + +This is a variant of [`partialschur`](@ref) that operates on a pre-allocated +[`ArnoldiWorkspace`](@ref) instance. This is useful in the following cases: + +1. To provide an initial partial Schur decomposition. In that case, set `start_from` to the + number of Schur vectors, and `partialschur` will continue from there. Notice that if you have + only one Schur vector, it can be simpler to pass it as `v1` instead. +2. To provide a custom array type for the basis of the Krylov subspace, the Hessenberg matrix, and + some temporaries. +3. To avoid allocations when calling `partialschur` repeatedly. + +You can also provide the initial vector that induces the Krylov subspace in the first column +arnoldi.V[:, 1]. If you do that, set `initialize` explicitly to `false`. + +Upon return, note that the PartialSchur struct will contain views of arnoldi.V and arnoldi.H, no +copies are made. +""" +function partialschur!( + A, + arnoldi::ArnoldiWorkspace{T}; + start_from::Int = 1, + initialize::Bool = start_from == 1, + nev::Int = min(6, size(A, 1)), + which::Union{Target,Symbol} = LM(), + tol::Real = sqrt(eps(real(vtype(A)))), + mindim::Int = min(max(10, nev), size(A, 1), size(arnoldi.V, 2) - 1), + maxdim::Int = min(max(20, 2nev), size(A, 1), size(arnoldi.V, 2) - 1), + restarts::Int = 200, +) where {T} + s = checksquare(A) + nev < 1 && throw(ArgumentError("nev cannot be less than 1")) + nev ≤ mindim ≤ maxdim ≤ s || throw( + ArgumentError( + "nev ≤ mindim ≤ maxdim ≤ size(A, 1) does not hold, got $nev ≤ $mindim ≤ $maxdim ≤ $s", + ), + ) + maxdim < size(arnoldi.V, 2) || + throw(ArgumentError("maxdim should be strictly less than size(arnoldi.V, 2)")) + 1 ≤ start_from ≤ maxdim || + throw(ArgumentError("start_from should be between 1 and maxdim")) + _which = which isa Target ? which : _symbol_to_target(which) + fill!(view(arnoldi.H, :, start_from:size(arnoldi.H, 2)), zero(T)) + initialize && reinitialize!(arnoldi, start_from - 1) + _partialschur(A, arnoldi, mindim, maxdim, nev, tol, restarts, _which, start_from) +end + _symbol_to_target(sym::Symbol) = sym == :LM ? LM() : sym == :LR ? LR() : @@ -175,11 +227,12 @@ function _partialschur( tol::Ttol, restarts::Int, which::Target, + active::Int = 1, # when > 1, assumes we already have a converged schur form ) where {T,Ttol<:Real} # Unpack for convenience H = arnoldi.H V = arnoldi.V - Vtmp = arnoldi.V_tmp + V_tmp = arnoldi.V_tmp Q = arnoldi.Q # We only need to store one eigenvector of the Hessenberg matrix. @@ -201,15 +254,14 @@ function _partialschur( # We keep length(active:k) ≈ mindim, but we should also retain # space to add new directions to the Krylov subspace, so # length(k+1:maxdim) should not be too small either. - active = 1 k = mindim effective_nev = nev # Bookkeeping for number of mv-products - prods = mindim + prods = length(active:mindim) # Initialize an Arnoldi relation of size `mindim` - iterate_arnoldi!(A, arnoldi, 1:mindim) + iterate_arnoldi!(A, arnoldi, active:mindim) for iter = 1:restarts @@ -305,8 +357,8 @@ function _partialschur( restore_arnoldi!(H, nlock + 1, k, Q, G) # Finally do the change of basis to get the length `k` Arnoldi relation. - @views mul!(Vtmp[:, purge:k], V[:, purge:maxdim], Q[purge:maxdim, purge:k]) - @views copyto!(V[:, purge:k], Vtmp[:, purge:k]) + @views mul!(V_tmp[:, purge:k], V[:, purge:maxdim], Q[purge:maxdim, purge:k]) + @views copyto!(V[:, purge:k], V_tmp[:, purge:k]) @views copyto!(V[:, k+1], V[:, maxdim+1]) # The active part @@ -324,8 +376,8 @@ function _partialschur( sortschur!(H, copyto!(Q, I), nconverged, ordering) # Change of basis - @views mul!(Vtmp[:, 1:nconverged], Vconverged, Q[1:nconverged, 1:nconverged]) - @views copyto!(Vconverged, Vtmp[:, 1:nconverged]) + @views mul!(V_tmp[:, 1:nconverged], Vconverged, Q[1:nconverged, 1:nconverged]) + @views copyto!(Vconverged, V_tmp[:, 1:nconverged]) # Copy the eigenvalues just one more time copy_eigenvalues!(ritz.λs, H, OneTo(nconverged)) diff --git a/test/partial_schur.jl b/test/partial_schur.jl index 184ac45..1e0bacd 100644 --- a/test/partial_schur.jl +++ b/test/partial_schur.jl @@ -1,6 +1,6 @@ using Test -using ArnoldiMethod: partialschur, vtype, eigenvalues, SR +using ArnoldiMethod: partialschur, partialschur!, vtype, eigenvalues, SR, ArnoldiWorkspace using LinearAlgebra @testset "Zero eigenvalues & low-rank matrices" begin @@ -118,3 +118,20 @@ end @test norm(A * schur.Q - schur.Q * schur.R) == 0 end end + +@testset "Passing an initial Schur decomp" begin + # First compute a few eigenvalues + A = rand(100, 100) + V, H = rand(100, 21), rand(21, 20) + arnoldi = ArnoldiWorkspace(V, H) + F, history = partialschur!(A, arnoldi, nev = 3, tol = 1e-12) + @test history.converged + @test history.nconverged in 3:4 + @test norm(A * F.Q - F.Q * F.R) < 1e-10 + + # Then continue to find a couple more, at higher tolerance + F, history = partialschur!(A, arnoldi, nev = 5, start_from = history.nconverged + 1 , tol = 1e-8) + @test history.converged + @test history.nconverged in 5:6 + @test norm(A * F.Q - F.Q * F.R) < 1e-6 +end \ No newline at end of file