Skip to content

Commit

Permalink
Stable Wallis iteration. 20x speedup
Browse files Browse the repository at this point in the history
  • Loading branch information
Panadestein committed Dec 4, 2023
1 parent 059e8ce commit e098096
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 67 deletions.
29 changes: 9 additions & 20 deletions GX-AnalyticContinuation/api/gx_ac.f90
Original file line number Diff line number Diff line change
Expand Up @@ -42,39 +42,28 @@ subroutine thiele_pade_mp_api(n_par, x_ref, y_ref, x_query, y_query, num_query)
!! @param[in] do_greedy - whether to use the default greedy algorithm or the naive one
!! @param[out] y_query - array of the interpolated values at x_query
subroutine thiele_pade_api(n_par, x_ref, y_ref, x_query, y_query, do_greedy)
integer, intent(in) :: n_par
complex(kind=dp), dimension(:), intent(in) :: x_ref, y_ref, x_query
complex(kind=dp), dimension(:), intent(out) :: y_query
logical, optional, intent(in) :: do_greedy
integer, intent(in) :: n_par
complex(kind=dp), dimension(:), intent(in) :: x_ref, y_ref, x_query
complex(kind=dp), dimension(:), intent(out) :: y_query
logical, optional, intent(in) :: do_greedy

! Internal variables
integer :: i, num_query
complex(kind=dp), dimension(size(x_ref)) :: x_ref_local
complex(kind=dp), dimension(n_par) :: a_par
complex(kind=dp), dimension(:), allocatable :: acoef, bcoef

! Initialize Walli's coefficients
allocate(acoef(0:n_par))
allocate(bcoef(0:n_par))
acoef(0:n_par) = c_zero
bcoef(0:n_par) = c_zero
integer :: i, num_query
complex(kind=dp), dimension(size(x_ref)) :: x_ref_local
complex(kind=dp), dimension(n_par) :: a_par

! Compute the coefficients a_par
x_ref_local(:) = x_ref
call thiele_pade(n_par, x_ref_local, y_ref, a_par, acoef, bcoef, do_greedy)
call thiele_pade(n_par, x_ref_local, y_ref, a_par, do_greedy)

! Compute the number of query points
num_query = size(x_query)

! Evaluate the Thiele-Pade approximation at the query points
do i = 1, num_query
call evaluate_thiele_pade(n_par, x_ref_local, x_query(i), a_par, acoef, bcoef, y_query(i))
call evaluate_thiele_pade(n_par, x_ref_local, x_query(i), a_par, y_query(i))
end do

! Clean-up
deallocate(acoef)
deallocate(bcoef)

end subroutine thiele_pade_api

end module gx_ac
118 changes: 71 additions & 47 deletions GX-AnalyticContinuation/src/pade_approximant.f90
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ module pade_approximant
implicit none

private
public :: pade, pade_derivative, thiele_pade, evaluate_thiele_pade
public :: pade, pade_derivative, thiele_pade, evaluate_thiele_pade, evaluate_thiele_pade_tab

!> Complex zero
complex(dp), public :: c_zero = cmplx(0.0_dp, 0.0_dp, kind=dp)
Expand All @@ -64,13 +64,12 @@ complex(dp) function pade(n, x, f, xx)

!> Pade coefficients
complex(dp) :: a(n)
complex(dp) :: acoef(0:n), bcoef(0:n)

! Generate parameters
call pade_coefficient_derivative(x, f, a)

! Evaluate using Wallis method
call evaluate_thiele_pade(n, x, xx, a, acoef, bcoef, pade)
call evaluate_thiele_pade(n, x, xx, a, pade)

end function pade

Expand Down Expand Up @@ -150,34 +149,35 @@ end subroutine pade_coefficient_derivative
!! @param[in] y_ref - array of the reference function values
!! @param[in] do_greedy - whether to use the default greedy algorithm or the naive one
!! @param[out] par - array of the interpolant parameters
subroutine thiele_pade(n_par, x_ref, y_ref, a_par, acoef, bcoef, do_greedy)
integer, intent(in) :: n_par
complex(kind=dp), dimension(:), intent(inout) :: x_ref
complex(kind=dp), dimension(:), intent(in) :: y_ref
complex(kind=dp), dimension(:), intent(out) :: a_par
complex(kind=dp), dimension(0:n_par), intent(inout) :: acoef, bcoef
logical, optional, intent(in) :: do_greedy
subroutine thiele_pade(n_par, x_ref, y_ref, a_par, do_greedy)
integer, intent(in) :: n_par
complex(kind=dp), dimension(:), intent(inout) :: x_ref
complex(kind=dp), dimension(:), intent(in) :: y_ref
complex(kind=dp), dimension(:), intent(out) :: a_par
logical, optional, intent(in) :: do_greedy

! Internal variables
logical :: local_do_greedy = .True.
integer :: i, i_par, idx, jdx, kdx, n_rem
integer, dimension(n_par) :: n_rem_idx
real(kind=dp) :: deltap, pval
complex(kind=dp) :: pval_in, x_in, y_in, acoef_in, bcoef_in
complex(kind=dp), dimension(n_par, n_par) :: g_func
complex(kind=dp), dimension(n_par) :: x, xtmp, ytmp
logical :: local_do_greedy = .True.
integer :: i, i_par, idx, jdx, kdx, n_rem
integer, dimension(n_par) :: n_rem_idx
real(kind=dp), parameter :: tol = 1.0E-6_dp
real(kind=dp) :: deltap, pval
complex(kind=dp) :: pval_in, x_in, y_in, acoef_in, bcoef_in
complex(kind=dp), dimension(n_par, n_par) :: g_func
complex(kind=dp), dimension(n_par) :: x, xtmp, ytmp
complex(kind=dp), dimension(-1:n_par) :: acoef, bcoef

! Whether to perform the refined Thiele's interpolation (default)
if (present(do_greedy)) local_do_greedy = do_greedy

! Initialize arrays
n_rem_idx = (/(i, i = 1, n_par)/)
a_par = c_zero
acoef_in = c_zero
bcoef_in = c_zero
g_func = c_zero
x_in = c_zero
y_in = c_zero
acoef_in = c_zero
bcoef_in = c_zero
x = c_zero

if (local_do_greedy) then
Expand All @@ -201,25 +201,27 @@ subroutine thiele_pade(n_par, x_ref, y_ref, a_par, acoef, bcoef, do_greedy)
a_par(1) = g_func(1, 1)

! Initialize Walli's coefficients
acoef(-1) = c_one
acoef(0) = c_zero
acoef(1) = a_par(1)
bcoef(0:1) = c_one
bcoef(-1) = c_zero
bcoef(0) = c_one

! Add remaining points ensuring min |P_i(x_{1+1}) - F(x_{i+1})|
do idx = 2, n_par
pval = huge(0.0_dp)
do jdx = 1, n_rem
! Compute next convergent P_i(x_{i+1})
call evaluate_thiele_pade_tab(idx - 1, xtmp(1:idx-1), x(n_rem_idx(jdx)), a_par, acoef, bcoef, pval_in)
call evaluate_thiele_pade_tab(idx - 1, xtmp(1:idx-1), x(n_rem_idx(jdx)), a_par, acoef, bcoef)
pval_in = acoef(idx - 1) / bcoef(idx - 1)

! Select the point that minimizes difference's absolute value
deltap = abs(pval_in - y_ref(n_rem_idx(jdx)))
if (deltap .lt. pval) then
pval = deltap
x_in = x(n_rem_idx(jdx))
y_in = y_ref(n_rem_idx(jdx))
acoef_in = acoef(idx + 1)
bcoef_in = bcoef(idx + 1)
acoef_in = acoef(idx - 1)
bcoef_in = bcoef(idx - 1)
kdx = jdx
end if
end do
Expand All @@ -230,12 +232,22 @@ subroutine thiele_pade(n_par, x_ref, y_ref, a_par, acoef, bcoef, do_greedy)
n_rem_idx(i) = n_rem_idx(i + 1)
end do

! Add the winning point and recompute generating function
! Add the winning point
x_ref(idx) = x_in
xtmp(idx) = x_in
ytmp(idx) = y_in
acoef(idx + 1) = acoef_in
bcoef(idx + 1) = bcoef_in

! Rescale Wallis coefficients to avoid overflow
acoef(idx - 1) = acoef_in
bcoef(idx - 1) = bcoef_in
if (abs(bcoef_in) > tol) then
acoef(idx - 1) = acoef(idx - 1) / bcoef_in
acoef(idx - 2) = acoef(idx - 2) / bcoef_in
bcoef(idx - 1) = c_one
bcoef(idx - 2) = bcoef(idx - 2) / bcoef_in
end if

! Get the recurrence matrix
call thiele_pade_gcoeff(xtmp, ytmp, g_func, idx)

! Unpack parameters a_i = g_i(w_i)
Expand Down Expand Up @@ -283,47 +295,59 @@ end subroutine thiele_pade_gcoeff
!! @param[in] x - the point to evaluate
!! @param[in] a_par - array of the input parameters
!! @param[out] y - the value of the interpolant at x
subroutine evaluate_thiele_pade_tab(n_par, x_ref , x, a_par, acoef, bcoef, y)
integer, intent(in) :: n_par
complex(kind=dp), dimension(:), intent(in) :: x_ref
complex(kind=dp), intent(in) :: x
complex(kind=dp), dimension(:), intent(in) :: a_par
complex(dp), intent(inout) :: acoef(0:n_par), bcoef(0:n_par)
complex(kind=dp), intent(out) :: y
subroutine evaluate_thiele_pade_tab(n_par, x_ref , x, a_par, acoef, bcoef)
integer, intent(in) :: n_par
complex(kind=dp), dimension(:), intent(in) :: x_ref
complex(kind=dp), intent(in) :: x
complex(kind=dp), dimension(:), intent(in) :: a_par
complex(dp), dimension(-1:n_par), intent(inout) :: acoef, bcoef

acoef(n_par + 1) = acoef(n_par) + (x - x_ref(n_par)) * a_par(n_par) * acoef(n_par - 1)
bcoef(n_par + 1) = bcoef(n_par) + (x - x_ref(n_par)) * a_par(n_par) * bcoef(n_par - 1)
! Internal variables
complex(dp) ::delta

! Wallis' method iteration
delta = a_par(n_par)
if (n_par > 1) then
delta = delta * (x - x_ref(n_par - 1))
end if

y = acoef(n_par + 1) / bcoef(n_par + 1)
acoef(n_par) = acoef(n_par - 1) + delta * acoef(n_par - 2)
bcoef(n_par) = bcoef(n_par - 1) + delta * bcoef(n_par - 2)

end subroutine evaluate_thiele_pade_tab

!> brief Evaluates a Pade approximant constructed with Thiele's reciprocal differences
!> This is the tabulated version of the procedure
!! @param[in] n_par - number of parameters
!! @param[in] x_ref - array of the reference points
!! @param[in] x - the point to evaluate
!! @param[in] a_par - array of the input parameters
!! @param[out] y - the value of the interpolant at x
subroutine evaluate_thiele_pade(n_par, x_ref , x, a_par, acoef, bcoef, y)
subroutine evaluate_thiele_pade(n_par, x_ref , x, a_par, y)
integer, intent(in) :: n_par
complex(kind=dp), dimension(:), intent(in) :: x_ref
complex(kind=dp), intent(in) :: x
complex(kind=dp), dimension(:), intent(in) :: a_par
complex(dp), intent(inout) :: acoef(0:n_par), bcoef(0:n_par)
complex(kind=dp), intent(out) :: y

! Internal variables
integer :: i_par
real(kind=dp), parameter :: tol = 1.0E-6_dp
complex(dp), dimension(-1:n_par) :: acoef, bcoef

! Evaluate using Wallis method
! Evaluate using Wallis' method
acoef(-1) = c_one
acoef(0) = c_zero
acoef(1) = a_par(1)
bcoef(0:1) = c_one

do i_par = 1, n_par - 1
acoef(i_par + 1) = acoef(i_par) + (x - x_ref(i_par)) * a_par(i_par + 1) * acoef(i_par - 1)
bcoef(i_par + 1) = bcoef(i_par) + (x - x_ref(i_par)) * a_par(i_par + 1) * bcoef(i_par - 1)
bcoef(-1) = c_zero
bcoef(0) = c_one

do i_par = 1, n_par
call evaluate_thiele_pade_tab(i_par, x_ref , x, a_par, acoef, bcoef)
if (abs(bcoef(i_par)) > tol) then
acoef(i_par) = acoef(i_par) / bcoef(i_par)
acoef(i_par - 1) = acoef(i_par - 1) / bcoef(i_par)
bcoef(i_par - 1) = bcoef(i_par - 1) / bcoef(i_par)
bcoef(i_par) = c_one
end if
end do

y = acoef(n_par) / bcoef(n_par)
Expand Down
1 change: 1 addition & 0 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
valgrind
linuxKernel.packages.linux_zen.perf
hotspot
hyperfine
gmp
];
shellHook = ''
Expand Down

0 comments on commit e098096

Please sign in to comment.