From a1578e14622b12cceaa102276eb955ae05fd9481 Mon Sep 17 00:00:00 2001
From: Harmen Stoppels <harmenstoppels@gmail.com>
Date: Thu, 22 Feb 2024 10:59:02 +0100
Subject: [PATCH] Allow passing an initial partial Schur decomposition (#143)

This commit allows users to

1. avoid allocations by reusing existing arrays/buffers
2. specify custom matrix types for V, H and temporaries
3. starting the algorithm from an existing partial Schur decomposition

by exporting `ArnoldiWorkspace`, which holds the major arrays, and
`partialschur!(A, ::ArnoldiWorkspace; ...)`.

Co-authored-by: Samuel Powell <samuelpowell@users.noreply.github.com>
---
 docs/src/index.md     | 47 +++++++++++++++++++++++++++++
 src/ArnoldiMethod.jl  | 56 +++++++++++++++++++++++++++++-----
 src/run.jl            | 70 +++++++++++++++++++++++++++++++++++++------
 test/partial_schur.jl | 19 +++++++++++-
 4 files changed, 175 insertions(+), 17 deletions(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index ca7caca..a403f9f 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -108,6 +108,7 @@ $A$ on the diagonal.
 
 ```@docs
 partialschur
+partialschur!
 ```
 
 ## Partial eigendecomposition
@@ -153,6 +154,52 @@ ArnoldiMethod.PartialSchur
 ArnoldiMethod.History
 ```
 
+## Passing an initial guess
+
+If you have a good guess for a target eigenvector, you can potentially speed up
+the method by passing it through `partialschur(A, v1=my_initial_vector)`. This
+vector is then used to build the Krylov subspace.
+
+## Pre-allocating and custom matrix types
+
+If you call `partialschur` multiple times, and you want to allocate large arrays and buffers only
+once ahead of time, you can allocate the relevant matrices manually and pass them to the algorithm.
+
+The same can be done if you want to work with custom matrix types.
+
+```@docs
+ArnoldiMethod.ArnoldiWorkspace
+```
+
+## Starting from an initial partial Schur decomposition
+
+You can also use [`ArnoldiWorkspace`](@ref) to start the algorithm from an initial partial Schur
+decomposition. This is useful if you already found a few Schur vectors, and want to continue to
+find more.
+
+```julia
+A = rand(100, 100)
+
+# Pre-allocate the relevant Krylov subspace arrays
+V, H = rand(100, 21), rand(21, 20)
+arnoldi = ArnoldiWorkspace(V, H)
+
+# Find a few eigenvalues
+_, info_1 = partialschur!(A, arnoldi, nev = 3, tol = 1e-12)
+
+# Then continue to find a couple more. Notice: 5 in total, so 2 more. Allow larger errors by
+# changin `tol`.
+F, info_2 = partialschur!(A, arnoldi, nev = 5, start_from = info_1.nconverged + 1 , tol = 1e-8)
+@show norm(A * F.Q - F.Q * F.R)
+```
+
+!!! note "Setting `start_from` correctly"
+
+    As pointed out above, in real arithmetic the algorithm may find one eigenvalue more than
+    requested if it corresponds to a conjugate pair. Also it may find fewer, if not all
+    converge. So if you reuse your `ArnoldiWorkspace`, make sure to set `start_from` to one plus
+    the number of previously converged eigenvalues.
+
 ## What algorithm is ArnoldiMethod.jl?
 
 The underlying algorithm is the restarted Arnoldi method, which be viewed as a
diff --git a/src/ArnoldiMethod.jl b/src/ArnoldiMethod.jl
index 1a50ab9..122fd9b 100644
--- a/src/ArnoldiMethod.jl
+++ b/src/ArnoldiMethod.jl
@@ -5,17 +5,46 @@ using StaticArrays
 
 using Base: RefValue, OneTo
 
-export partialschur, LM, SR, LR, SI, LI, partialeigen
+export partialschur, partialschur!, LM, SR, LR, SI, LI, partialeigen, ArnoldiWorkspace
 
 """
-ArnoldiWorkspace(n, k) → ArnoldiWorkspace
+    ArnoldiWorkspace(n, k) → ArnoldiWorkspace
+    ArnoldiWorkspace(v1, k) → ArnoldiWorkspace
+    ArnoldiWorkspace(V, H; V_tmp, Q) → ArnoldiWorkspace
 
 Holds the large arrays for the Arnoldi relation: Vₖ₊₁ and Hₖ are matrices that
 satisfy A * Vₖ = Vₖ₊₁ * Hₖ, where Vₖ₊₁ is orthonormal of size n × (k+1) and Hₖ upper 
 Hessenberg of size (k+1) × k. The matrices V_tmp and Q are used for restarts, and
 have similar size as Vₖ₊₁ and Hₖ (but Q is k × k, not k+1 × k).
+
+## Examples
+
+```julia
+# allocates workspace for 20-dimensional Krylov subspace
+arnoldi = ArnoldiWorkspace(100, 20)
+
+# allocate workspace for 20-dimensional Krylov subspace, with initial vector ones(100) copied into
+# the first column of V
+arnoldi = ArnoldiWorkspace(ones(100), 20)
+
+# manually allocate workspace with V, H
+V = Matrix{Float64}(undef, 100, 21)
+H = Matrix{Float64}(undef, 21, 20)
+arnoldi = ArnoldiWorkspace(V, H)
+
+# manually allocate all arrays, including temporaries
+V, tmp = Matrix{Float64}(undef, 100, 21), Matrix{Float64}(undef, 100, 21)
+H, Q = Matrix{Float64}(undef, 21, 20), Matrix{Float64}(undef, 20, 20)
+arnoldi = ArnoldiWorkspace(V, H, V_tmp = tmp, Q = Q)
+```
 """
-struct ArnoldiWorkspace{T,TV<:AbstractMatrix{T},TH<:AbstractMatrix{T}}
+struct ArnoldiWorkspace{
+    T,
+    TV<:AbstractMatrix{T},
+    TH<:AbstractMatrix{T},
+    TVtmp<:AbstractMatrix{T},
+    TQ<:AbstractMatrix{T},
+}
     # Orthonormal basis of the Krylov subspace.
     V::TV
 
@@ -23,10 +52,10 @@ struct ArnoldiWorkspace{T,TV<:AbstractMatrix{T},TH<:AbstractMatrix{T}}
     H::TH
 
     # Temporary matrix similar to V, used to restart.
-    V_tmp::TV
+    V_tmp::TVtmp
 
     # Unitary matrix size of (square) H to do a change of basis.
-    Q::TH
+    Q::TQ
 
     function ArnoldiWorkspace(::Type{T}, matrix_order::Int, krylov_dimension::Int) where {T}
         # Without an initial vector.
@@ -36,7 +65,7 @@ struct ArnoldiWorkspace{T,TV<:AbstractMatrix{T},TH<:AbstractMatrix{T}}
         V_tmp = similar(V)
         H = zeros(T, krylov_dimension + 1, krylov_dimension)
         Q = similar(H, krylov_dimension, krylov_dimension)
-        return new{T,typeof(V),typeof(H)}(V, H, V_tmp, Q)
+        return new{T,typeof(V),typeof(H),typeof(V_tmp),typeof(Q)}(V, H, V_tmp, Q)
     end
 
     function ArnoldiWorkspace(v1::AbstractVector{T}, krylov_dimension::Int) where {T}
@@ -46,7 +75,20 @@ struct ArnoldiWorkspace{T,TV<:AbstractMatrix{T},TH<:AbstractMatrix{T}}
         H = similar(V, krylov_dimension + 1, krylov_dimension)
         fill!(H, zero(eltype(H)))
         Q = similar(H, krylov_dimension, krylov_dimension)
-        return new{T,typeof(V),typeof(H)}(V, H, V_tmp, Q)
+        return new{T,typeof(V),typeof(H),typeof(V_tmp),typeof(Q)}(V, H, V_tmp, Q)
+    end
+
+    function ArnoldiWorkspace(
+        V::AbstractMatrix{T},
+        H::AbstractMatrix{T};
+        V_tmp::AbstractMatrix{T} = similar(V),
+        Q::AbstractMatrix{T} = similar(H, size(H, 2), size(H, 2)),
+    ) where {T}
+        size(V, 2) == size(H, 1) ||
+            throw(ArgumentError("V should have the same number of columns as H has rows."))
+        size(H, 1) == size(H, 2) + 1 ||
+            throw(ArgumentError("H should have one more row than it has columns."))
+        return new{T,typeof(V),typeof(H),typeof(V_tmp),typeof(Q)}(V, H, V_tmp, Q)
     end
 end
 
diff --git a/src/run.jl b/src/run.jl
index 11171c3..d40d5f0 100644
--- a/src/run.jl
+++ b/src/run.jl
@@ -35,6 +35,8 @@ The most important keyword arguments:
 | `tol` | `Real` | `√eps` | Tolerance for convergence: ‖Ax - xλ‖₂ < tol * ‖λ‖ |
 | `v1` | `AbstractVector` | `nothing` | Optional starting vector for the Krylov subspace |
 
+Regarding the initial vector `v1`: it will not be mutated, and it does not have to be normalized.
+
 The target `which` can be any of:
 
 | Target          | Description                                    |
@@ -113,7 +115,7 @@ function partialschur(
 
     if v1 === nothing
         arnoldi = ArnoldiWorkspace(vtype(A), size(A, 1), maxdim)
-        reinitialize!(arnoldi, 0, v -> rand!(v))
+        reinitialize!(arnoldi, 0, rand!)
     else
         length(v1) == size(A, 1) ||
             throw(ArgumentError("v1 should have the same dimension as A"))
@@ -123,6 +125,56 @@ function partialschur(
     _partialschur(A, arnoldi, mindim, maxdim, nev, tol, restarts, _which)
 end
 
+"""
+```julia
+partialschur!(A, arnoldi; start_from, initialize, nev, which, tol, mindim, maxdim, restarts) → PartialSchur, History
+```
+
+This is a variant of [`partialschur`](@ref) that operates on a pre-allocated
+[`ArnoldiWorkspace`](@ref) instance. This is useful in the following cases:
+
+1. To provide an initial partial Schur decomposition. In that case, set `start_from` to the
+   number of Schur vectors, and `partialschur` will continue from there. Notice that if you have
+   only one Schur vector, it can be simpler to pass it as `v1` instead.
+2. To provide a custom array type for the basis of the Krylov subspace, the Hessenberg matrix, and
+   some temporaries.
+3. To avoid allocations when calling `partialschur` repeatedly.
+
+You can also provide the initial vector that induces the Krylov subspace in the first column
+arnoldi.V[:, 1]. If you do that, set `initialize` explicitly to `false`.
+
+Upon return, note that the PartialSchur struct will contain views of arnoldi.V and arnoldi.H, no
+copies are made.
+"""
+function partialschur!(
+    A,
+    arnoldi::ArnoldiWorkspace{T};
+    start_from::Int = 1,
+    initialize::Bool = start_from == 1,
+    nev::Int = min(6, size(A, 1)),
+    which::Union{Target,Symbol} = LM(),
+    tol::Real = sqrt(eps(real(vtype(A)))),
+    mindim::Int = min(max(10, nev), size(A, 1), size(arnoldi.V, 2) - 1),
+    maxdim::Int = min(max(20, 2nev), size(A, 1), size(arnoldi.V, 2) - 1),
+    restarts::Int = 200,
+) where {T}
+    s = checksquare(A)
+    nev < 1 && throw(ArgumentError("nev cannot be less than 1"))
+    nev ≤ mindim ≤ maxdim ≤ s || throw(
+        ArgumentError(
+            "nev ≤ mindim ≤ maxdim ≤ size(A, 1) does not hold, got $nev ≤ $mindim ≤ $maxdim ≤ $s",
+        ),
+    )
+    maxdim < size(arnoldi.V, 2) ||
+        throw(ArgumentError("maxdim should be strictly less than size(arnoldi.V, 2)"))
+    1 ≤ start_from ≤ maxdim ||
+        throw(ArgumentError("start_from should be between 1 and maxdim"))
+    _which = which isa Target ? which : _symbol_to_target(which)
+    fill!(view(arnoldi.H, :, start_from:size(arnoldi.H, 2)), zero(T))
+    initialize && reinitialize!(arnoldi, start_from - 1)
+    _partialschur(A, arnoldi, mindim, maxdim, nev, tol, restarts, _which, start_from)
+end
+
 _symbol_to_target(sym::Symbol) =
     sym == :LM ? LM() :
     sym == :LR ? LR() :
@@ -175,11 +227,12 @@ function _partialschur(
     tol::Ttol,
     restarts::Int,
     which::Target,
+    active::Int = 1, # when > 1, assumes we already have a converged schur form
 ) where {T,Ttol<:Real}
     # Unpack for convenience
     H = arnoldi.H
     V = arnoldi.V
-    Vtmp = arnoldi.V_tmp
+    V_tmp = arnoldi.V_tmp
     Q = arnoldi.Q
 
     # We only need to store one eigenvector of the Hessenberg matrix.
@@ -201,15 +254,14 @@ function _partialschur(
     #                        We keep length(active:k) ≈ mindim, but we should also retain
     #                        space to add new directions to the Krylov subspace, so 
     #                        length(k+1:maxdim) should not be too small either.
-    active = 1
     k = mindim
     effective_nev = nev
 
     # Bookkeeping for number of mv-products
-    prods = mindim
+    prods = length(active:mindim)
 
     # Initialize an Arnoldi relation of size `mindim`
-    iterate_arnoldi!(A, arnoldi, 1:mindim)
+    iterate_arnoldi!(A, arnoldi, active:mindim)
 
     for iter = 1:restarts
 
@@ -305,8 +357,8 @@ function _partialschur(
         restore_arnoldi!(H, nlock + 1, k, Q, G)
 
         # Finally do the change of basis to get the length `k` Arnoldi relation.
-        @views mul!(Vtmp[:, purge:k], V[:, purge:maxdim], Q[purge:maxdim, purge:k])
-        @views copyto!(V[:, purge:k], Vtmp[:, purge:k])
+        @views mul!(V_tmp[:, purge:k], V[:, purge:maxdim], Q[purge:maxdim, purge:k])
+        @views copyto!(V[:, purge:k], V_tmp[:, purge:k])
         @views copyto!(V[:, k+1], V[:, maxdim+1])
 
         # The active part 
@@ -324,8 +376,8 @@ function _partialschur(
     sortschur!(H, copyto!(Q, I), nconverged, ordering)
 
     # Change of basis
-    @views mul!(Vtmp[:, 1:nconverged], Vconverged, Q[1:nconverged, 1:nconverged])
-    @views copyto!(Vconverged, Vtmp[:, 1:nconverged])
+    @views mul!(V_tmp[:, 1:nconverged], Vconverged, Q[1:nconverged, 1:nconverged])
+    @views copyto!(Vconverged, V_tmp[:, 1:nconverged])
 
     # Copy the eigenvalues just one more time
     copy_eigenvalues!(ritz.λs, H, OneTo(nconverged))
diff --git a/test/partial_schur.jl b/test/partial_schur.jl
index 184ac45..1e0bacd 100644
--- a/test/partial_schur.jl
+++ b/test/partial_schur.jl
@@ -1,6 +1,6 @@
 using Test
 
-using ArnoldiMethod: partialschur, vtype, eigenvalues, SR
+using ArnoldiMethod: partialschur, partialschur!, vtype, eigenvalues, SR, ArnoldiWorkspace
 using LinearAlgebra
 
 @testset "Zero eigenvalues & low-rank matrices" begin
@@ -118,3 +118,20 @@ end
         @test norm(A * schur.Q - schur.Q * schur.R) == 0
     end
 end
+
+@testset "Passing an initial Schur decomp" begin
+    # First compute a few eigenvalues
+    A = rand(100, 100)
+    V, H = rand(100, 21), rand(21, 20)
+    arnoldi = ArnoldiWorkspace(V, H)
+    F, history = partialschur!(A, arnoldi, nev = 3, tol = 1e-12)
+    @test history.converged
+    @test history.nconverged in 3:4
+    @test norm(A * F.Q - F.Q * F.R) < 1e-10
+
+    # Then continue to find a couple more, at higher tolerance
+    F, history = partialschur!(A, arnoldi, nev = 5, start_from = history.nconverged + 1 , tol = 1e-8)
+    @test history.converged
+    @test history.nconverged in 5:6
+    @test norm(A * F.Q - F.Q * F.R) < 1e-6
+end
\ No newline at end of file