-
Notifications
You must be signed in to change notification settings - Fork 79
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
regenerate kernel Horner coeffs: scaled to max val 1, and shave off s…
…ome degrees for upsampfac=1.25 (#499) * gen_all_horner_cpp_header.m 1st try * new ES kernel generation * regen ES kernel with max size 1 not exp(beta) * fix constexpr in matlab-gen ES kernel code * done rescaling ES kernel to max value 1, regen all coeffs, fixes #454 * src/ker_horner_allw_loop_constexpr.c * add 1 to degree for larger-w upsampfac=1.25 ker eval, recovers prior accuracy * fix 1dtest issue re allowing ier=1 (warning only), #500 * more automated matlb kernel coeff generation * automated and documented matlab kernel coefficient generation; tweaked the poly degrees for kernel, checked accuracy * regen ker coeffs .h without nc200() etc func; use .size() instead, much neater * reverted matlab before clang-format messed it up * clang-formatted regen with assert logic fixed in .h horner stuff
- Loading branch information
Showing
19 changed files
with
1,973 additions
and
537 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
Developer and experimental codes for FINUFFT | ||
-------------------------------------------- | ||
|
||
For generating kernel coefficient codes in ../src, | ||
the developer must run from MATLAB the following: | ||
|
||
gen_all_horner_C_code.m : writes C-style Horner coeffs (pre-2024) | ||
* a single call writes upsampfac=2 and 1.25 | ||
* calls gen_ker_horner_loop_C_code.m | ||
gen_all_horner_cpp_header.m : writes C++ header Horner coeffs (July 2024 on) | ||
* a single call writes upsampfac=2 and 1.25 | ||
* calls gen_ker_horner_loop_cpp_code.m | ||
|
||
Both of the gen_ker_* scripts call for the solve of the coeffs for each w: | ||
ker_ppval_coeff_mat.m | ||
(which has the kernel definition in it, which must match spreadinterp.cpp) | ||
|
||
The universal location for kernel approximation (degree, ES beta setting) is: | ||
get_degree_and_beta.m | ||
Tweaks should be done here, and see instructions there for resulting acc test. | ||
Another code that has to match ../src/spreadinterp.cpp is: | ||
reverse_engineer_tol.m | ||
|
||
Barnett 7/22/24 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,39 @@ | ||
% Script to make all C code for looped Horner eval of kernels of all widths. | ||
% writes to "ker" array, from a variable "z", and switches by width "w". | ||
% Resulting C code needs only placing in a function. | ||
% Now does both upsampfacs. | ||
% Resulting C code needs only including in a function. | ||
|
||
% Barnett 4/23/18; now calling Ludvig's loop version from 4/25/18. | ||
% version including low upsampfac, 6/17/18. | ||
% Ludvig put in w=4n padding, 1/31/20. Mystery about why d was bigger 2/6/20. | ||
% split out code for degree, beta, etc; loop upsampfacs Barnett 7/22/24. | ||
clear | ||
opts = struct(); | ||
|
||
ws = 2:16; | ||
upsampfac = 2; % sigma (upsampling): either 2 (default) or low (eg 5/4). | ||
opts.wpad = true; % pad kernel eval to multiple of 4 | ||
for upsampfac = [2.0, 1.25]; % sigma: either 2 (default) or low (eg 5/4) | ||
fprintf('upsampfac = %g...\n',upsampfac) | ||
|
||
ws = 2:16; | ||
opts.wpad = true; % pad kernel eval to multiple of 4 | ||
|
||
if upsampfac==2, fid = fopen('../src/ker_horner_allw_loop.c','w'); | ||
else, fid = fopen('../src/ker_lowupsampfac_horner_allw_loop.c','w'); | ||
end | ||
fwrite(fid,sprintf('// Code generated by gen_all_horner_C_code.m in finufft/devel\n')); | ||
fwrite(fid,sprintf('// Authors: Alex Barnett & Ludvig af Klinteberg.\n// (C) The Simons Foundation, Inc.\n')); | ||
for j=1:numel(ws) | ||
w = ws(j) | ||
if upsampfac==2 % hardwire the betas for this default case | ||
betaoverws = [2.20 2.26 2.38 2.30]; % matches setup_spreader | ||
beta = betaoverws(min(4,w-1)) * w; % uses last entry for w>=5 | ||
d = w + 2 + (w<=8); % between 2-3 more degree than w | ||
else % use formulae, must match params in setup_spreader... | ||
gamma=0.97; % safety factor | ||
betaoverws = gamma*pi*(1-1/(2*upsampfac)); % from cutoff freq formula | ||
beta = betaoverws * w; | ||
d = w + 1 + (w<=8); % less, since beta smaller, smoother | ||
if upsampfac==2, fid = fopen('../src/ker_horner_allw_loop_constexpr.c','w'); | ||
else, fid = fopen('../src/ker_lowupsampfac_horner_allw_loop_constexpr.c','w'); | ||
end | ||
str = gen_ker_horner_loop_C_code(w,d,beta,opts); | ||
if j==1 % write switch statement | ||
fwrite(fid,sprintf(' if (w==%d) {\n',w)); | ||
else | ||
fwrite(fid,sprintf(' } else if (w==%d) {\n',w)); | ||
fwrite(fid,sprintf('// Code generated by gen_all_horner_C_code.m in finufft/devel\n')); | ||
fwrite(fid,sprintf('// Authors: Alex Barnett & Ludvig af Klinteberg.\n// (C) The Simons Foundation, Inc.\n')); | ||
for j=1:numel(ws) | ||
w = ws(j); | ||
[d,beta] = get_degree_and_beta(w,upsampfac); | ||
fprintf('w=%d\td=%d\tbeta=%.3g\n',w,d,beta); | ||
str = gen_ker_horner_loop_C_code(w,d,beta,opts); | ||
if j==1 % write switch statement | ||
fwrite(fid,sprintf(' if constexpr(w==%d) {\n',w)); | ||
else | ||
fwrite(fid,sprintf(' } else if constexpr(w==%d) {\n',w)); | ||
end | ||
for i=1:numel(str); fwrite(fid,[' ',str{i}]); end | ||
end | ||
for i=1:numel(str); fwrite(fid,[' ',str{i}]); end | ||
fwrite(fid,sprintf(' } else\n printf("width not implemented!\\n");\n')); | ||
fclose(fid); | ||
|
||
end | ||
fwrite(fid,sprintf(' } else\n printf("width not implemented!\\n");\n')); | ||
fclose(fid); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
% Script to make C++ header for looped Horner eval of kernels of all widths, | ||
% for a particular opts.upsampfac (user to hand-choose below). | ||
% C++ version (now writes .h), uses constexpr to switch by width w. | ||
|
||
% Barnett 4/23/18; now calling Ludvig's loop version from 4/25/18. | ||
% version including low upsampfac, 6/17/18. | ||
% Ludvig put in w=4n padding, 1/31/20. Mystery about why d was bigger 2/6/20. | ||
% C++ header using constexpr of Barbone, replacing *_C_code.m. Barnett 7/16/24. | ||
% simplified to kill nc200() etc functions, Barbone + Barnett 7/22/24. | ||
clear | ||
%opts = struct(); % not needed yet | ||
|
||
for upsampfac = [2.0, 1.25] % sigma: either 2 (default) or low (eg 5/4) | ||
fprintf('upsampfac = %g...\n',upsampfac) | ||
|
||
ws = 2:16; % list of widths (the driver, rather than tols) | ||
% (must match MIN and MAX NSPREAD in source headers) | ||
if upsampfac==2 | ||
fid = fopen('../src/ker_horner_allw_loop_constexpr.h','w'); | ||
elseif upsampfac==1.25 | ||
fid = fopen('../src/ker_lowupsampfac_horner_allw_loop_constexpr.h','w'); | ||
end | ||
fwrite(fid,sprintf('// Header of static arrays of monomial coeffs of spreading kernel function in each\n')); | ||
fwrite(fid,sprintf('// fine-grid interval. Generated by gen_all_horner_cpp_header.m in devel/\n')); | ||
fwrite(fid,sprintf('// Authors: Alex Barnett, Ludvig af Klinteberg, Marco Barbone & Libin Lu.\n// (C) 2018--2024 The Simons Foundation, Inc.\n')); | ||
fwrite(fid,sprintf('#include <array>\n\n')); | ||
|
||
usf_tag = sprintf('%d',100*upsampfac); % string, follow Barbone convention: 200 or 125 | ||
fwrite(fid,sprintf('template<class T, uint8_t w>\nconstexpr auto get_horner_coeffs_%s() noexcept {\n',usf_tag)); | ||
|
||
for j=1:numel(ws) | ||
w = ws(j); | ||
[d,beta] = get_degree_and_beta(w,upsampfac); | ||
fprintf('w=%d\td=%d\tbeta=%.3g\n',w,d,beta); | ||
str = gen_ker_horner_loop_cpp_code(w,d,beta); % code strings for this w (nc hardcoded) | ||
|
||
if j==1 % write switch statement | ||
fwrite(fid,sprintf(' if constexpr (w == %d) {\n',w)); | ||
else | ||
fwrite(fid,sprintf(' } else if constexpr (w == %d) {\n',w)); | ||
end | ||
for i=1:numel(str); fwrite(fid,[' ',str{i}]); end % format 4 extra spaces | ||
end | ||
|
||
% handle bad w at compile time... | ||
fwrite(fid,sprintf(' }\n')); | ||
fwrite(fid,sprintf(' static_assert(w >= %d, "w must be >= %d");\n',ws(1),ws(1))); | ||
fwrite(fid,sprintf(' static_assert(w <= %d, "w must be <= %d");\n',ws(end),ws(end))); | ||
fwrite(fid,sprintf(' };\n')); % close all brackets | ||
fclose(fid); | ||
|
||
end |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
function str = gen_ker_horner_loop_cpp_code(w,d,be,o) | ||
% GEN_KER_HORNER_LOOP_CPP_CODE Write C++ for piecewise poly coeffs of kernel | ||
% | ||
% str = gen_ker_horner_loop_cpp_code(w,d,be,o) | ||
% | ||
% Inputs: | ||
% w = integer kernel width in grid points, eg 10 | ||
% d = 0 for auto, else poly degree to keep, eg 13. (will give # coeffs nc=d+1) | ||
% beta = kernel parameter, around 2.3*w (upsampfac=2 only) | ||
% opts - optional struct, with fields: [none for now]. | ||
% | ||
% Outputs: | ||
% str = cell array of C++ code strings to define (d+1)*w static coeffs array, | ||
% as in each w-specific block in ../src/ker_horner_allw_loop_constexpr.h | ||
% | ||
% Also see: KER_PPVAL_COEFF_MAT, FIG_SPEED_KER_PPVAL (which tests acc too) | ||
% GEN_KER_HORNER_CPP_HEADER | ||
% | ||
% Notes: | ||
% | ||
% It exploits that there are w calls to different poly's w/ *same* z arg, writing | ||
% this as a loop. This allows the kernel evals at one z to be a w*nc mat-vec. | ||
% The xsimd code which uses this (see spreadinterp.cpp) is now too elaborate to | ||
% explain here. | ||
% | ||
% Changes from gen_ker_horner_loop_C_code.m: | ||
% i) a simple C++ style 2D array is written, with the w-direction fast, poly coeff | ||
% direction slow. (It used to be a list of 1D arrays plus Horner eval code.) | ||
% ii) coeffs are now ordered c_d,c_{d-1},...,c_0, where degree d=nc-1. (Used | ||
% to be reversed.) | ||
% | ||
% Ideas: could try PSWF, cosh kernel ES variant, Reinecke tweaked-power ES | ||
% variant, etc.. | ||
|
||
% Ludvig af Klinteberg 4/25/18, based on Barnett 4/23/18. Ludvig wpad 1/31/20. | ||
% Barnett redo for Barbone templated arrays, no wpad, 7/16/24. | ||
% hardcoded outer nc array size, 7/22/24. | ||
|
||
if nargin==0, test_gen_ker_horner_loop_cpp_code; return; end | ||
if nargin<4, o=[]; end | ||
|
||
C = ker_ppval_coeff_mat(w,d,be,o); | ||
if d==0, d = size(C,2)-1; end | ||
nc = d+1; | ||
str = cell(nc+2,1); % one coeff per line + one return and one close-paren line | ||
% code to open the templated array... why two {{? (some C++ ambiguity thing) | ||
str{1} = sprintf(' return std::array<std::array<T, w>, %d> {{\n', nc); | ||
for n=1:d+1 % loop over poly coeff powers 0,1,..,d | ||
% sprintf implicitly loops over fine-grid interpolation intervals 1:w... | ||
coeffrow = sprintf('%.16E, ', C(n,:)); | ||
coeffrow = coeffrow(1:end-2); % easy kill trailing comma (but allowed in C++) | ||
str{d+3-n} = sprintf(' {%s},\n', coeffrow); % leave outer trailing comma | ||
end | ||
str{nc+2} = sprintf(' }};\n'); % terminate the array (two braces) | ||
|
||
|
||
%%%%%%%% | ||
function test_gen_ker_horner_loop_cpp_code % writes code to file, doesn't test | ||
w=13; d=w+1; % pick a single kernel width and degree to write code for | ||
%w=7; d=11; | ||
%w=2; d=5; | ||
beta=2.3*w; % upsampfac=2 only | ||
str = gen_ker_horner_loop_cpp_code(w,d,beta); | ||
% str{:} | ||
% check write and read to file... | ||
fnam = sprintf('ker_horner_w%d.cpp',w); | ||
fid = fopen(fnam,'w'); | ||
for i=1:numel(str); fwrite(fid,str{i}); end | ||
fclose(fid); | ||
system(['more ' fnam]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
function [d,beta] = get_degree_and_beta(w,upsampfac) | ||
% GET_DEGREE_AND_BETA defines degree & beta from w & upsampfac | ||
% | ||
% [d,beta] = get_degree_and_beta(w,upsampfac) | ||
% | ||
% Universal definition for piecewise poly degree chosen for kernel | ||
% coeff generation by matlab, and the ES kernel beta parameter. | ||
% The map from tol to width w must match code in spreadinterp used to | ||
% choose w. | ||
% | ||
% Used by all other *.m codes for generating coeffs. | ||
% | ||
% To test: use KER_PPVAL_COEFF_MAT self-test | ||
% | ||
% To verify accuracy in practice, compile FINUFFT CPU then run | ||
% test/checkallaccs.sh and matlab/test/fig_accuracy.m | ||
% | ||
% Also see: REVERSE_ENGINEER_TOL, KER_PPVAL_COEFF_MAT | ||
|
||
% Barnett 7/22/24 | ||
if upsampfac==0.0, upsampfac=2.0; end | ||
|
||
% if d set to 0 in following, means it gets auto-chosen... | ||
if upsampfac==2 % hardwire the betas for this default case | ||
betaoverws = [2.20 2.26 2.38 2.30]; % must match setup_spreader | ||
beta = betaoverws(min(4,w-1)) * w; % uses last entry for w>=5 | ||
d = w + 1 + (w<=7) - (w==2); % between 1-2 more degree than w. tweak | ||
elseif upsampfac==1.25 % use formulae, must match params in setup_spreader | ||
gamma=0.97; % safety factor | ||
betaoverws = gamma*pi*(1-1/(2*upsampfac)); % from cutoff freq formula | ||
beta = betaoverws * w; | ||
d = ceil(0.7*w+1.3); % less, since beta smaller. tweak | ||
%d = 0; % auto-choose override? No, too much jitter. | ||
end | ||
|
||
if d==0 | ||
tol = reverse_engineer_tol(w,upsampfac); | ||
opts.cutoff = 0.5 * tol; % fudge to get more poly-approx acc than tol | ||
C = ker_ppval_coeff_mat(w,0,beta,opts); % do solve merely to get d | ||
d = size(C,1)-1; % extract the auto-chosen d | ||
end |
Oops, something went wrong.