From b6843371e22be62ff6627754e0df103a552fcdd5 Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Thu, 24 Oct 2024 15:32:19 -0400 Subject: [PATCH] add single-argument kernel evaluator (#541) --- include/finufft/spreadinterp.h | 2 + src/spreadinterp.cpp | 80 ++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/include/finufft/spreadinterp.h b/include/finufft/spreadinterp.h index 8a83af3ce..629b7def7 100644 --- a/include/finufft/spreadinterp.h +++ b/include/finufft/spreadinterp.h @@ -51,6 +51,8 @@ FINUFFT_EXPORT int FINUFFT_CDECL spreadinterpSorted( template FINUFFT_EXPORT T FINUFFT_CDECL evaluate_kernel(T x, const finufft_spread_opts &opts); template +FINUFFT_EXPORT T FINUFFT_CDECL evaluate_kernel_horner(T x, const finufft_spread_opts &opts); +template FINUFFT_EXPORT int FINUFFT_CDECL setup_spreader(finufft_spread_opts &opts, T eps, double upsampfac, int kerevalmeth, int debug, int showwarn, int dim); diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index 7c7309de2..e220dc6b8 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -2264,4 +2264,84 @@ T evaluate_kernel(T x, const finufft_spread_opts &opts) template float evaluate_kernel(float x, const finufft_spread_opts &opts); template double evaluate_kernel(double x, const finufft_spread_opts &opts); +template +T evaluate_kernel_horner_kernel(T x, const finufft_spread_opts &opts) +/* Templated ES ("exp sqrt") kernel evaluation at single real argument: + phi(x) = exp(beta.(sqrt(1 - (2x/n_s)^2) - 1)), for |x| < nspread/2 + using generated piecewise polynomial approximation to the kernel. +*/ +{ + if (abs(x) >= (T)opts.ES_halfwidth) { + // if spreading/FT careful, shouldn't need this if, but causes no speed hit + return 0.0; + } else { + static constexpr auto horner_coeffs = []() constexpr noexcept { + if constexpr (upsampfact == 200) { + return get_horner_coeffs_200(); + } else if constexpr (upsampfact == 125) { + return get_horner_coeffs_125(); + } + }(); + static constexpr auto nc = horner_coeffs.size(); + T res = 0.0; + for (uint8_t i = 0; i < ns; i++) { + if (x > -opts.ES_halfwidth + i && x <= -opts.ES_halfwidth + i + 1) { + T z = std::fma(T(2.0), x - T(i), T(ns - 1)); + for (uint8_t j = 0; j < nc; ++j) { + res = std::fma(res, z, horner_coeffs[j][i]); + } + break; + } + } + return res; + } +} + +template +T evaluate_kernel_horner_dispatch(T x, const finufft_spread_opts &opts) { + /* Template dispatcher ES ("exp sqrt") kernel evaluation at single real argument: + phi(x) = exp(beta.(sqrt(1 - (2x/n_s)^2) - 1)), for |x| < nspread/2 + using generated piecewise polynomial approximation to the kernel. + */ + static_assert(MIN_NSPREAD <= ns && ns <= MAX_NSPREAD, + "ns must be in the range (MIN_NSPREAD, MAX_NSPREAD)"); + if constexpr (ns == MIN_NSPREAD) { // Base case + if (opts.upsampfac == 2.0) { + return evaluate_kernel_horner_kernel(x, opts); + } else if (opts.upsampfac == 1.25) { + return evaluate_kernel_horner_kernel(x, opts); + } else { + fprintf(stderr, "[%s] upsampfac (%lf) not supported!\n", __func__, opts.upsampfac); + return 0.0; + } + } else { + if (opts.nspread == ns) { + if (opts.upsampfac == 2.0) { + return evaluate_kernel_horner_kernel(x, opts); + } else if (opts.upsampfac == 1.25) { + return evaluate_kernel_horner_kernel(x, opts); + } else { + fprintf(stderr, "[%s] upsampfac (%lf) not supported!\n", __func__, + opts.upsampfac); + return 0.0; + } + } else { + return evaluate_kernel_horner_dispatch(x, opts); + } + } +} + +template +T evaluate_kernel_horner(T x, const finufft_spread_opts &opts) +/* ES ("exp sqrt") kernel evaluation at single real argument: + phi(x) = exp(beta.(sqrt(1 - (2x/n_s)^2) - 1)), for |x| < nspread/2 + using generated piecewise polynomial approximation to the kernel. +*/ +{ + return evaluate_kernel_horner_dispatch(x, opts); +} + +template float evaluate_kernel_horner(float x, const finufft_spread_opts &opts); +template double evaluate_kernel_horner(double x, const finufft_spread_opts &opts); + } // namespace finufft::spreadinterp