From 2726bc383a12d14411b8c5e1f63d97c736a894c8 Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Tue, 22 Oct 2024 19:20:51 +0200 Subject: [PATCH] fix PR --- CMakeLists.txt | 10 +- fortran/finufftfort.cpp | 221 +++++------ include/finufft.h | 3 +- include/finufft/defs.h | 137 ------- include/finufft/dirft.h | 42 ++- include/finufft/fft.h | 9 + include/finufft/finufft_core.h | 22 +- include/finufft/spreadinterp.h | 3 +- include/finufft/test_defs.h | 113 +++++- include/finufft/utils.h | 35 +- include/finufft_eitherprec.h | 7 +- makefile | 8 +- perftest/manysmallprobs.cpp | 2 +- perftest/spreadtestnd.cpp | 2 +- perftest/spreadtestndall.cpp | 2 +- src/{simpleinterfaces.cpp => c_interface.cpp} | 248 ++++-------- src/{finufft_core.cpp => finufft.cpp} | 357 ++++++++---------- src/spreadinterp.cpp | 125 +++--- src/utils.cpp | 2 +- test/directft/dirft1d.cpp | 38 +- test/directft/dirft2d.cpp | 66 ++-- test/directft/dirft3d.cpp | 79 ++-- test/dumbinputs.cpp | 2 +- 23 files changed, 696 insertions(+), 837 deletions(-) delete mode 100644 include/finufft/defs.h rename src/{simpleinterfaces.cpp => c_interface.cpp} (55%) rename src/{finufft_core.cpp => finufft.cpp} (82%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7e5e2cf5d..3ea8b76c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -169,7 +169,7 @@ if(FINUFFT_USE_CPU) set(XTL_VERSION 0.7.7) set(XSIMD_VERSION 13.0.0) # using latest ducc0 version for now as it fixes MacOS GCC build - set(DUCC0_VERSION b0beb85e03982344a31ebb119758d7aa3ef5d362) + set(DUCC0_VERSION ducc0_0_35_0) set(FINUFFT_FFTW_LIBRARIES) include(cmake/setupXSIMD.cmake) if(FINUFFT_USE_DUCC0) @@ -257,8 +257,8 @@ if(FINUFFT_USE_CPU) src/utils.cpp contrib/legendre_rule_fast.cpp src/fft.cpp - src/finufft_core.cpp - src/simpleinterfaces.cpp + src/finufft.cpp + src/c_interface.cpp fortran/finufftfort.cpp) else() add_library( @@ -267,8 +267,8 @@ if(FINUFFT_USE_CPU) src/utils.cpp contrib/legendre_rule_fast.cpp src/fft.cpp - src/finufft_core.cpp - src/simpleinterfaces.cpp + src/finufft.cpp + src/c_interface.cpp fortran/finufftfort.cpp) endif() set_finufft_options(finufft) diff --git a/fortran/finufftfort.cpp b/fortran/finufftfort.cpp index 400ff0985..059b44c4c 100644 --- a/fortran/finufftfort.cpp +++ b/fortran/finufftfort.cpp @@ -21,13 +21,19 @@ #include #include +using f32 = float; +using f64 = double; +using c64 = std::complex; +using c128 = std::complex; +using i64 = BIGINT; + #ifdef __cplusplus extern "C" { #endif // --------------------- guru interface from fortran ------------------------ -void finufft_makeplan_(int *type, int *n_dims, BIGINT *n_modes, int *iflag, int *n_transf, - double *tol, finufft_plan *plan, finufft_opts *o, int *ier) { +void finufft_makeplan_(int *type, int *n_dims, i64 *n_modes, int *iflag, int *n_transf, + f64 *tol, finufft_plan *plan, finufft_opts *o, int *ier) { if (!plan) fprintf(stderr, "%s fortran: plan must be allocated as at least the size of a C pointer " @@ -39,8 +45,8 @@ void finufft_makeplan_(int *type, int *n_dims, BIGINT *n_modes, int *iflag, int } } -void finufft_setpts_(finufft_plan *plan, BIGINT *M, double *xj, double *yj, double *zj, - BIGINT *nk, double *s, double *t, double *u, int *ier) { +void finufft_setpts_(finufft_plan *plan, i64 *M, f64 *xj, f64 *yj, f64 *zj, i64 *nk, + f64 *s, f64 *t, f64 *u, int *ier) { if (!*plan) { fprintf(stderr, "%s fortran: finufft_plan unallocated!", __func__); return; @@ -50,8 +56,7 @@ void finufft_setpts_(finufft_plan *plan, BIGINT *M, double *xj, double *yj, doub *ier = finufft_setpts(*plan, *M, xj, yj, zj, nk_safe, s, t, u); } -void finufft_execute_(finufft_plan *plan, std::complex *weights, - std::complex *result, int *ier) { +void finufft_execute_(finufft_plan *plan, c128 *weights, c128 *result, int *ier) { if (!plan) fprintf(stderr, "%s fortran: finufft_plan unallocated!", __func__); else @@ -77,124 +82,104 @@ void finufft_default_opts_(finufft_opts *o) { // -------------- simple and many-vector interfaces -------------------- // --- 1D --- -void finufft1d1_(BIGINT *nj, double *xj, std::complex *cj, int *iflag, - double *eps, BIGINT *ms, std::complex *fk, finufft_opts *o, - int *ier) { +void finufft1d1_(i64 *nj, f64 *xj, c128 *cj, int *iflag, f64 *eps, i64 *ms, c128 *fk, + finufft_opts *o, int *ier) { *ier = finufft1d1(*nj, xj, cj, *iflag, *eps, *ms, fk, o); } -void finufft1d1many_(int *ntransf, BIGINT *nj, double *xj, std::complex *cj, - int *iflag, double *eps, BIGINT *ms, std::complex *fk, - finufft_opts *o, int *ier) { +void finufft1d1many_(int *ntransf, i64 *nj, f64 *xj, c128 *cj, int *iflag, f64 *eps, + i64 *ms, c128 *fk, finufft_opts *o, int *ier) { *ier = finufft1d1many(*ntransf, *nj, xj, cj, *iflag, *eps, *ms, fk, o); } -void finufft1d2_(BIGINT *nj, double *xj, std::complex *cj, int *iflag, - double *eps, BIGINT *ms, std::complex *fk, finufft_opts *o, - int *ier) { +void finufft1d2_(i64 *nj, f64 *xj, c128 *cj, int *iflag, f64 *eps, i64 *ms, c128 *fk, + finufft_opts *o, int *ier) { *ier = finufft1d2(*nj, xj, cj, *iflag, *eps, *ms, fk, o); } -void finufft1d2many_(int *ntransf, BIGINT *nj, double *xj, std::complex *cj, - int *iflag, double *eps, BIGINT *ms, std::complex *fk, - finufft_opts *o, int *ier) { +void finufft1d2many_(int *ntransf, i64 *nj, f64 *xj, c128 *cj, int *iflag, f64 *eps, + i64 *ms, c128 *fk, finufft_opts *o, int *ier) { *ier = finufft1d2many(*ntransf, *nj, xj, cj, *iflag, *eps, *ms, fk, o); } -void finufft1d3_(BIGINT *nj, double *x, std::complex *c, int *iflag, double *eps, - BIGINT *nk, double *s, std::complex *f, finufft_opts *o, - int *ier) { +void finufft1d3_(i64 *nj, f64 *x, c128 *c, int *iflag, f64 *eps, i64 *nk, f64 *s, c128 *f, + finufft_opts *o, int *ier) { *ier = finufft1d3(*nj, x, c, *iflag, *eps, *nk, s, f, o); } -void finufft1d3many_(int *ntransf, BIGINT *nj, double *x, std::complex *c, - int *iflag, double *eps, BIGINT *nk, double *s, - std::complex *f, finufft_opts *o, int *ier) { +void finufft1d3many_(int *ntransf, i64 *nj, f64 *x, c128 *c, int *iflag, f64 *eps, + i64 *nk, f64 *s, c128 *f, finufft_opts *o, int *ier) { *ier = finufft1d3many(*ntransf, *nj, x, c, *iflag, *eps, *nk, s, f, o); } // --- 2D --- -void finufft2d1_(BIGINT *nj, double *xj, double *yj, std::complex *cj, int *iflag, - double *eps, BIGINT *ms, BIGINT *mt, std::complex *fk, - finufft_opts *o, int *ier) { +void finufft2d1_(i64 *nj, f64 *xj, f64 *yj, c128 *cj, int *iflag, f64 *eps, i64 *ms, + i64 *mt, c128 *fk, finufft_opts *o, int *ier) { *ier = finufft2d1(*nj, xj, yj, cj, *iflag, *eps, *ms, *mt, fk, o); } -void finufft2d1many_(int *ntransf, BIGINT *nj, double *xj, double *yj, - std::complex *cj, int *iflag, double *eps, BIGINT *ms, - BIGINT *mt, std::complex *fk, finufft_opts *o, int *ier) { +void finufft2d1many_(int *ntransf, i64 *nj, f64 *xj, f64 *yj, c128 *cj, int *iflag, + f64 *eps, i64 *ms, i64 *mt, c128 *fk, finufft_opts *o, int *ier) { *ier = finufft2d1many(*ntransf, *nj, xj, yj, cj, *iflag, *eps, *ms, *mt, fk, o); } -void finufft2d2_(BIGINT *nj, double *xj, double *yj, std::complex *cj, int *iflag, - double *eps, BIGINT *ms, BIGINT *mt, std::complex *fk, - finufft_opts *o, int *ier) { +void finufft2d2_(i64 *nj, f64 *xj, f64 *yj, c128 *cj, int *iflag, f64 *eps, i64 *ms, + i64 *mt, c128 *fk, finufft_opts *o, int *ier) { *ier = finufft2d2(*nj, xj, yj, cj, *iflag, *eps, *ms, *mt, fk, o); } -void finufft2d2many_(int *ntransf, BIGINT *nj, double *xj, double *yj, - std::complex *cj, int *iflag, double *eps, BIGINT *ms, - BIGINT *mt, std::complex *fk, finufft_opts *o, int *ier) { +void finufft2d2many_(int *ntransf, i64 *nj, f64 *xj, f64 *yj, c128 *cj, int *iflag, + f64 *eps, i64 *ms, i64 *mt, c128 *fk, finufft_opts *o, int *ier) { *ier = finufft2d2many(*ntransf, *nj, xj, yj, cj, *iflag, *eps, *ms, *mt, fk, o); } -void finufft2d3_(BIGINT *nj, double *x, double *y, std::complex *c, int *iflag, - double *eps, BIGINT *nk, double *s, double *t, std::complex *f, - finufft_opts *o, int *ier) { +void finufft2d3_(i64 *nj, f64 *x, f64 *y, c128 *c, int *iflag, f64 *eps, i64 *nk, f64 *s, + f64 *t, c128 *f, finufft_opts *o, int *ier) { *ier = finufft2d3(*nj, x, y, c, *iflag, *eps, *nk, s, t, f, o); } -void finufft2d3many_(int *ntransf, BIGINT *nj, double *x, double *y, - std::complex *c, int *iflag, double *eps, BIGINT *nk, - double *s, double *t, std::complex *f, finufft_opts *o, - int *ier) { +void finufft2d3many_(int *ntransf, i64 *nj, f64 *x, f64 *y, c128 *c, int *iflag, f64 *eps, + i64 *nk, f64 *s, f64 *t, c128 *f, finufft_opts *o, int *ier) { *ier = finufft2d3many(*ntransf, *nj, x, y, c, *iflag, *eps, *nk, s, t, f, o); } // --- 3D --- -void finufft3d1_(BIGINT *nj, double *xj, double *yj, double *zj, std::complex *cj, - int *iflag, double *eps, BIGINT *ms, BIGINT *mt, BIGINT *mu, - std::complex *fk, finufft_opts *o, int *ier) { +void finufft3d1_(i64 *nj, f64 *xj, f64 *yj, f64 *zj, c128 *cj, int *iflag, f64 *eps, + i64 *ms, i64 *mt, i64 *mu, c128 *fk, finufft_opts *o, int *ier) { *ier = finufft3d1(*nj, xj, yj, zj, cj, *iflag, *eps, *ms, *mt, *mu, fk, o); } -void finufft3d1many_(int *ntransf, BIGINT *nj, double *xj, double *yj, double *zj, - std::complex *cj, int *iflag, double *eps, BIGINT *ms, - BIGINT *mt, BIGINT *mu, std::complex *fk, finufft_opts *o, - int *ier) { +void finufft3d1many_(int *ntransf, i64 *nj, f64 *xj, f64 *yj, f64 *zj, c128 *cj, + int *iflag, f64 *eps, i64 *ms, i64 *mt, i64 *mu, c128 *fk, + finufft_opts *o, int *ier) { *ier = finufft3d1many(*ntransf, *nj, xj, yj, zj, cj, *iflag, *eps, *ms, *mt, *mu, fk, o); } -void finufft3d2_(BIGINT *nj, double *xj, double *yj, double *zj, std::complex *cj, - int *iflag, double *eps, BIGINT *ms, BIGINT *mt, BIGINT *mu, - std::complex *fk, finufft_opts *o, int *ier) { +void finufft3d2_(i64 *nj, f64 *xj, f64 *yj, f64 *zj, c128 *cj, int *iflag, f64 *eps, + i64 *ms, i64 *mt, i64 *mu, c128 *fk, finufft_opts *o, int *ier) { *ier = finufft3d2(*nj, xj, yj, zj, cj, *iflag, *eps, *ms, *mt, *mu, fk, o); } -void finufft3d2many_(int *ntransf, BIGINT *nj, double *xj, double *yj, double *zj, - std::complex *cj, int *iflag, double *eps, BIGINT *ms, - BIGINT *mt, BIGINT *mu, std::complex *fk, finufft_opts *o, - int *ier) { +void finufft3d2many_(int *ntransf, i64 *nj, f64 *xj, f64 *yj, f64 *zj, c128 *cj, + int *iflag, f64 *eps, i64 *ms, i64 *mt, i64 *mu, c128 *fk, + finufft_opts *o, int *ier) { *ier = finufft3d2many(*ntransf, *nj, xj, yj, zj, cj, *iflag, *eps, *ms, *mt, *mu, fk, o); } -void finufft3d3_(BIGINT *nj, double *x, double *y, double *z, std::complex *c, - int *iflag, double *eps, BIGINT *nk, double *s, double *t, double *u, - std::complex *f, finufft_opts *o, int *ier) { +void finufft3d3_(i64 *nj, f64 *x, f64 *y, f64 *z, c128 *c, int *iflag, f64 *eps, i64 *nk, + f64 *s, f64 *t, f64 *u, c128 *f, finufft_opts *o, int *ier) { *ier = finufft3d3(*nj, x, y, z, c, *iflag, *eps, *nk, s, t, u, f, o); } -void finufft3d3many_(int *ntransf, BIGINT *nj, double *x, double *y, double *z, - std::complex *c, int *iflag, double *eps, BIGINT *nk, - double *s, double *t, double *u, std::complex *f, - finufft_opts *o, int *ier) { +void finufft3d3many_(int *ntransf, i64 *nj, f64 *x, f64 *y, f64 *z, c128 *c, int *iflag, + f64 *eps, i64 *nk, f64 *s, f64 *t, f64 *u, c128 *f, finufft_opts *o, + int *ier) { *ier = finufft3d3many(*ntransf, *nj, x, y, z, c, *iflag, *eps, *nk, s, t, u, f, o); } // --------------------- guru interface from fortran ------------------------ -void finufftf_makeplan_(int *type, int *n_dims, BIGINT *n_modes, int *iflag, - int *n_transf, float *tol, finufftf_plan *plan, finufft_opts *o, - int *ier) { +void finufftf_makeplan_(int *type, int *n_dims, i64 *n_modes, int *iflag, int *n_transf, + f32 *tol, finufftf_plan *plan, finufft_opts *o, int *ier) { if (!plan) fprintf(stderr, "%s fortran: plan must be allocated as at least the size of a C pointer " @@ -206,8 +191,8 @@ void finufftf_makeplan_(int *type, int *n_dims, BIGINT *n_modes, int *iflag, } } -void finufftf_setpts_(finufftf_plan *plan, BIGINT *M, float *xj, float *yj, float *zj, - BIGINT *nk, float *s, float *t, float *u, int *ier) { +void finufftf_setpts_(finufftf_plan *plan, i64 *M, f32 *xj, f32 *yj, f32 *zj, i64 *nk, + f32 *s, f32 *t, f32 *u, int *ier) { if (!*plan) { fprintf(stderr, "%s fortran: finufft_plan unallocated!", __func__); return; @@ -217,8 +202,7 @@ void finufftf_setpts_(finufftf_plan *plan, BIGINT *M, float *xj, float *yj, floa *ier = finufftf_setpts(*plan, *M, xj, yj, zj, nk_safe, s, t, u); } -void finufftf_execute_(finufftf_plan *plan, std::complex *weights, - std::complex *result, int *ier) { +void finufftf_execute_(finufftf_plan *plan, c64 *weights, c64 *result, int *ier) { if (!plan) fprintf(stderr, "%s fortran: finufft_plan unallocated!", __func__); else @@ -244,115 +228,98 @@ void finufftf_default_opts_(finufft_opts *o) { // -------------- simple and many-vector interfaces -------------------- // --- 1D --- -void finufftf1d1_(BIGINT *nj, float *xj, std::complex *cj, int *iflag, float *eps, - BIGINT *ms, std::complex *fk, finufft_opts *o, int *ier) { +void finufftf1d1_(i64 *nj, f32 *xj, c64 *cj, int *iflag, f32 *eps, i64 *ms, c64 *fk, + finufft_opts *o, int *ier) { *ier = finufftf1d1(*nj, xj, cj, *iflag, *eps, *ms, fk, o); } -void finufftf1d1many_(int *ntransf, BIGINT *nj, float *xj, std::complex *cj, - int *iflag, float *eps, BIGINT *ms, std::complex *fk, - finufft_opts *o, int *ier) { +void finufftf1d1many_(int *ntransf, i64 *nj, f32 *xj, c64 *cj, int *iflag, f32 *eps, + i64 *ms, c64 *fk, finufft_opts *o, int *ier) { *ier = finufftf1d1many(*ntransf, *nj, xj, cj, *iflag, *eps, *ms, fk, o); } -void finufftf1d2_(BIGINT *nj, float *xj, std::complex *cj, int *iflag, float *eps, - BIGINT *ms, std::complex *fk, finufft_opts *o, int *ier) { +void finufftf1d2_(i64 *nj, f32 *xj, c64 *cj, int *iflag, f32 *eps, i64 *ms, c64 *fk, + finufft_opts *o, int *ier) { *ier = finufftf1d2(*nj, xj, cj, *iflag, *eps, *ms, fk, o); } -void finufftf1d2many_(int *ntransf, BIGINT *nj, float *xj, std::complex *cj, - int *iflag, float *eps, BIGINT *ms, std::complex *fk, - finufft_opts *o, int *ier) { +void finufftf1d2many_(int *ntransf, i64 *nj, f32 *xj, c64 *cj, int *iflag, f32 *eps, + i64 *ms, c64 *fk, finufft_opts *o, int *ier) { *ier = finufftf1d2many(*ntransf, *nj, xj, cj, *iflag, *eps, *ms, fk, o); } -void finufftf1d3_(BIGINT *nj, float *x, std::complex *c, int *iflag, float *eps, - BIGINT *nk, float *s, std::complex *f, finufft_opts *o, - int *ier) { +void finufftf1d3_(i64 *nj, f32 *x, c64 *c, int *iflag, f32 *eps, i64 *nk, f32 *s, c64 *f, + finufft_opts *o, int *ier) { *ier = finufftf1d3(*nj, x, c, *iflag, *eps, *nk, s, f, o); } -void finufftf1d3many_(int *ntransf, BIGINT *nj, float *x, std::complex *c, - int *iflag, float *eps, BIGINT *nk, float *s, - std::complex *f, finufft_opts *o, int *ier) { +void finufftf1d3many_(int *ntransf, i64 *nj, f32 *x, c64 *c, int *iflag, f32 *eps, + i64 *nk, f32 *s, c64 *f, finufft_opts *o, int *ier) { *ier = finufftf1d3many(*ntransf, *nj, x, c, *iflag, *eps, *nk, s, f, o); } // --- 2D --- -void finufftf2d1_(BIGINT *nj, float *xj, float *yj, std::complex *cj, int *iflag, - float *eps, BIGINT *ms, BIGINT *mt, std::complex *fk, - finufft_opts *o, int *ier) { +void finufftf2d1_(i64 *nj, f32 *xj, f32 *yj, c64 *cj, int *iflag, f32 *eps, i64 *ms, + i64 *mt, c64 *fk, finufft_opts *o, int *ier) { *ier = finufftf2d1(*nj, xj, yj, cj, *iflag, *eps, *ms, *mt, fk, o); } -void finufftf2d1many_(int *ntransf, BIGINT *nj, float *xj, float *yj, - std::complex *cj, int *iflag, float *eps, BIGINT *ms, - BIGINT *mt, std::complex *fk, finufft_opts *o, int *ier) { +void finufftf2d1many_(int *ntransf, i64 *nj, f32 *xj, f32 *yj, c64 *cj, int *iflag, + f32 *eps, i64 *ms, i64 *mt, c64 *fk, finufft_opts *o, int *ier) { *ier = finufftf2d1many(*ntransf, *nj, xj, yj, cj, *iflag, *eps, *ms, *mt, fk, o); } -void finufftf2d2_(BIGINT *nj, float *xj, float *yj, std::complex *cj, int *iflag, - float *eps, BIGINT *ms, BIGINT *mt, std::complex *fk, - finufft_opts *o, int *ier) { +void finufftf2d2_(i64 *nj, f32 *xj, f32 *yj, c64 *cj, int *iflag, f32 *eps, i64 *ms, + i64 *mt, c64 *fk, finufft_opts *o, int *ier) { *ier = finufftf2d2(*nj, xj, yj, cj, *iflag, *eps, *ms, *mt, fk, o); } -void finufftf2d2many_(int *ntransf, BIGINT *nj, float *xj, float *yj, - std::complex *cj, int *iflag, float *eps, BIGINT *ms, - BIGINT *mt, std::complex *fk, finufft_opts *o, int *ier) { +void finufftf2d2many_(int *ntransf, i64 *nj, f32 *xj, f32 *yj, c64 *cj, int *iflag, + f32 *eps, i64 *ms, i64 *mt, c64 *fk, finufft_opts *o, int *ier) { *ier = finufftf2d2many(*ntransf, *nj, xj, yj, cj, *iflag, *eps, *ms, *mt, fk, o); } -void finufftf2d3_(BIGINT *nj, float *x, float *y, std::complex *c, int *iflag, - float *eps, BIGINT *nk, float *s, float *t, std::complex *f, - finufft_opts *o, int *ier) { +void finufftf2d3_(i64 *nj, f32 *x, f32 *y, c64 *c, int *iflag, f32 *eps, i64 *nk, f32 *s, + f32 *t, c64 *f, finufft_opts *o, int *ier) { *ier = finufftf2d3(*nj, x, y, c, *iflag, *eps, *nk, s, t, f, o); } -void finufftf2d3many_(int *ntransf, BIGINT *nj, float *x, float *y, - std::complex *c, int *iflag, float *eps, BIGINT *nk, - float *s, float *t, std::complex *f, finufft_opts *o, - int *ier) { +void finufftf2d3many_(int *ntransf, i64 *nj, f32 *x, f32 *y, c64 *c, int *iflag, f32 *eps, + i64 *nk, f32 *s, f32 *t, c64 *f, finufft_opts *o, int *ier) { *ier = finufftf2d3many(*ntransf, *nj, x, y, c, *iflag, *eps, *nk, s, t, f, o); } // --- 3D --- -void finufftf3d1_(BIGINT *nj, float *xj, float *yj, float *zj, std::complex *cj, - int *iflag, float *eps, BIGINT *ms, BIGINT *mt, BIGINT *mu, - std::complex *fk, finufft_opts *o, int *ier) { +void finufftf3d1_(i64 *nj, f32 *xj, f32 *yj, f32 *zj, c64 *cj, int *iflag, f32 *eps, + i64 *ms, i64 *mt, i64 *mu, c64 *fk, finufft_opts *o, int *ier) { *ier = finufftf3d1(*nj, xj, yj, zj, cj, *iflag, *eps, *ms, *mt, *mu, fk, o); } -void finufftf3d1many_(int *ntransf, BIGINT *nj, float *xj, float *yj, float *zj, - std::complex *cj, int *iflag, float *eps, BIGINT *ms, - BIGINT *mt, BIGINT *mu, std::complex *fk, finufft_opts *o, - int *ier) { +void finufftf3d1many_(int *ntransf, i64 *nj, f32 *xj, f32 *yj, f32 *zj, c64 *cj, + int *iflag, f32 *eps, i64 *ms, i64 *mt, i64 *mu, c64 *fk, + finufft_opts *o, int *ier) { *ier = finufftf3d1many(*ntransf, *nj, xj, yj, zj, cj, *iflag, *eps, *ms, *mt, *mu, fk, o); } -void finufftf3d2_(BIGINT *nj, float *xj, float *yj, float *zj, std::complex *cj, - int *iflag, float *eps, BIGINT *ms, BIGINT *mt, BIGINT *mu, - std::complex *fk, finufft_opts *o, int *ier) { +void finufftf3d2_(i64 *nj, f32 *xj, f32 *yj, f32 *zj, c64 *cj, int *iflag, f32 *eps, + i64 *ms, i64 *mt, i64 *mu, c64 *fk, finufft_opts *o, int *ier) { *ier = finufftf3d2(*nj, xj, yj, zj, cj, *iflag, *eps, *ms, *mt, *mu, fk, o); } -void finufftf3d2many_(int *ntransf, BIGINT *nj, float *xj, float *yj, float *zj, - std::complex *cj, int *iflag, float *eps, BIGINT *ms, - BIGINT *mt, BIGINT *mu, std::complex *fk, finufft_opts *o, - int *ier) { +void finufftf3d2many_(int *ntransf, i64 *nj, f32 *xj, f32 *yj, f32 *zj, c64 *cj, + int *iflag, f32 *eps, i64 *ms, i64 *mt, i64 *mu, c64 *fk, + finufft_opts *o, int *ier) { *ier = finufftf3d2many(*ntransf, *nj, xj, yj, zj, cj, *iflag, *eps, *ms, *mt, *mu, fk, o); } -void finufftf3d3_(BIGINT *nj, float *x, float *y, float *z, std::complex *c, - int *iflag, float *eps, BIGINT *nk, float *s, float *t, float *u, - std::complex *f, finufft_opts *o, int *ier) { +void finufftf3d3_(i64 *nj, f32 *x, f32 *y, f32 *z, c64 *c, int *iflag, f32 *eps, i64 *nk, + f32 *s, f32 *t, f32 *u, c64 *f, finufft_opts *o, int *ier) { *ier = finufftf3d3(*nj, x, y, z, c, *iflag, *eps, *nk, s, t, u, f, o); } -void finufftf3d3many_(int *ntransf, BIGINT *nj, float *x, float *y, float *z, - std::complex *c, int *iflag, float *eps, BIGINT *nk, - float *s, float *t, float *u, std::complex *f, - finufft_opts *o, int *ier) { +void finufftf3d3many_(int *ntransf, i64 *nj, f32 *x, f32 *y, f32 *z, c64 *c, int *iflag, + f32 *eps, i64 *nk, f32 *s, f32 *t, f32 *u, c64 *f, finufft_opts *o, + int *ier) { *ier = finufftf3d3many(*ntransf, *nj, x, y, z, c, *iflag, *eps, *nk, s, t, u, f, o); } diff --git a/include/finufft.h b/include/finufft.h index 18eab5921..ab1da910d 100644 --- a/include/finufft.h +++ b/include/finufft.h @@ -2,8 +2,7 @@ // This contains both single and double precision user-facing commands. // "macro-safe" rewrite, including the plan object, Barnett 5/21/22-6/7/22. -// They will clobber any prior macros starting FINUFFT*, so in the lib/test -// sources finufft.h must be included before defs.h +// They will clobber any prior macros starting FINUFFT*. /* Devnotes. A) Two precisions done by including the "either precision" headers twice. diff --git a/include/finufft/defs.h b/include/finufft/defs.h deleted file mode 100644 index 633a7c1f3..000000000 --- a/include/finufft/defs.h +++ /dev/null @@ -1,137 +0,0 @@ -// Library private definitions & macros; also used by some test routines. -// If SINGLE defined, chooses single prec, otherwise double prec. -// Must be #included *after* finufft.h which clobbers FINUFFT* macros -// (see discussion near line 145 of this file). - -// Split out by Joakim Anden, Alex Barnett 9/20/18-9/24/18. -// Merged in dataTypes, private/public header split, clean. Barnett 6/7/22. -// finufft_plan_s made private, Wenda's idea. Barnett 8/8/22. - -/* Devnotes: - 1) Only need work for C++ since that's how compiled, including f_plan_s. - (But we use C-style templating, following fftw, etc.) -*/ - -#ifndef DEFS_H -#define DEFS_H - -// public header gives access to f_opts, f_spread_opts, f_plan... -// (and clobbers FINUFFT* macros; watch out!) -#include -#include -#include - -// --------------- Private data types for compilation in either prec --------- -// Devnote: must match those in relevant prec of public finufft.h interface! - -// All indexing in library that potentially can exceed 2^31 uses 64-bit signed. -// This includes all calling arguments (eg M,N) that could be huge someday. -// Precision-independent real and complex types, for private lib/test compile -#ifdef SINGLE -using FLT = float; -#else -using FLT = double; -#endif -#include // we define C++ complex type only -using CPX = std::complex; - -// -------------- Math consts (not in math.h) and useful math macros ---------- -#include - -// either-precision unit imaginary number... -#define IMA (CPX(0.0, 1.0)) - -// MR: In the longer term I suggest to move -// away from M_PI, which was never part of the standard. -// Perhaps a constexpr pi in the namespace finufft, or a constexpr finufft_pi -// if no namespaces are used? -// In C++20 these constants will be part of the language, and the problem will go away. -#ifndef M_PI // Windows apparently doesn't have this const -#define M_PI 3.14159265358979329 -#endif -#define M_1_2PI 0.159154943091895336 -#define M_2PI 6.28318530717958648 -// to avoid mixed precision operators in eg i*pi, an either-prec PI... -#define PI FLT(M_PI) - -// Random numbers: crappy unif random number generator in [0,1). -// These macros should probably be replaced by modern C++ std lib or random123. -// (RAND_MAX is in stdlib.h) -#include -static inline FLT rand01 [[maybe_unused]] () { return FLT(rand()) / FLT(RAND_MAX); } -// unif[-1,1]: -static inline FLT randm11 [[maybe_unused]] () { return 2 * rand01() - FLT(1); } -// complex unif[-1,1] for Re and Im: -static inline CPX crandm11 [[maybe_unused]] () { return randm11() + IMA * randm11(); } - -// Thread-safe seed-carrying versions of above (x is ptr to seed)... -// MR: we have to leave those as macros for now, as "rand_r" is deprecated -// and apparently no longer available on Windows. -#if 1 -#define rand01r(x) ((FLT)rand_r(x) / (FLT)RAND_MAX) -// unif[-1,1]: -#define randm11r(x) (2 * rand01r(x) - (FLT)1.0) -// complex unif[-1,1] for Re and Im: -#define crandm11r(x) (randm11r(x) + IMA * randm11r(x)) -#else -static inline FLT rand01r [[maybe_unused]] (unsigned int *x) { - return FLT(rand_r(x)) / FLT(RAND_MAX); -} -// unif[-1,1]: -static inline FLT randm11r [[maybe_unused]] (unsigned int *x) { - return 2 * rand01r(x) - FLT(1); -} -// complex unif[-1,1] for Re and Im: -static inline CPX crandm11r [[maybe_unused]] (unsigned int *x) { - return randm11r(x) + IMA * randm11r(x); -} -#endif - -// Prec-switching name macros (respond to SINGLE), used in lib & test sources -// and the plan object below. -// Note: crucially, these are now indep of macros used to gen public finufft.h! -// However, some overlap in name (FINUFFTIFY*, FINUFFT_PLAN*), meaning -// finufft.h cannot be included after defs.h since it undefs these overlaps :( -#ifdef SINGLE -// a macro to prepend finufft or finufftf to a string without a space. -// The 2nd level of indirection is needed for safety, see: -// https://isocpp.org/wiki/faq/misc-technical-issues#macros-with-token-pasting -#define FINUFFTIFY_UNSAFE(x) finufftf##x -#else -#define FINUFFTIFY_UNSAFE(x) finufft##x -#endif -#define FINUFFTIFY(x) FINUFFTIFY_UNSAFE(x) -// Now use the above tool to set up 2020-style macros used in tester source... -#define FINUFFT_PLAN FINUFFTIFY(_plan) -#define FINUFFT_PLAN_S FINUFFTIFY(_plan_s) -#define FINUFFT_DEFAULT_OPTS FINUFFTIFY(_default_opts) -#define FINUFFT_MAKEPLAN FINUFFTIFY(_makeplan) -#define FINUFFT_SETPTS FINUFFTIFY(_setpts) -#define FINUFFT_EXECUTE FINUFFTIFY(_execute) -#define FINUFFT_DESTROY FINUFFTIFY(_destroy) -#define FINUFFT1D1 FINUFFTIFY(1d1) -#define FINUFFT1D2 FINUFFTIFY(1d2) -#define FINUFFT1D3 FINUFFTIFY(1d3) -#define FINUFFT2D1 FINUFFTIFY(2d1) -#define FINUFFT2D2 FINUFFTIFY(2d2) -#define FINUFFT2D3 FINUFFTIFY(2d3) -#define FINUFFT3D1 FINUFFTIFY(3d1) -#define FINUFFT3D2 FINUFFTIFY(3d2) -#define FINUFFT3D3 FINUFFTIFY(3d3) -#define FINUFFT1D1MANY FINUFFTIFY(1d1many) -#define FINUFFT1D2MANY FINUFFTIFY(1d2many) -#define FINUFFT1D3MANY FINUFFTIFY(1d3many) -#define FINUFFT2D1MANY FINUFFTIFY(2d1many) -#define FINUFFT2D2MANY FINUFFTIFY(2d2many) -#define FINUFFT2D3MANY FINUFFTIFY(2d3many) -#define FINUFFT3D1MANY FINUFFTIFY(3d1many) -#define FINUFFT3D2MANY FINUFFTIFY(3d2many) -#define FINUFFT3D3MANY FINUFFTIFY(3d3many) - -// -------- FINUFFT's plan object, prec-switching version ------------------ -// NB: now private (the public C++ or C etc user sees an opaque pointer to it) - -#include // (must come after complex.h) -struct FINUFFT_PLAN_S : public FINUFFT_PLAN_T {}; - -#endif // DEFS_H diff --git a/include/finufft/dirft.h b/include/finufft/dirft.h index 5d13265a4..2449d864e 100644 --- a/include/finufft/dirft.h +++ b/include/finufft/dirft.h @@ -1,22 +1,36 @@ #ifndef DIRFT_H #define DIRFT_H -#include +#include -void dirft1d1(BIGINT nj, FLT *x, CPX *c, int isign, BIGINT ms, CPX *f); -void dirft1d2(BIGINT nj, FLT *x, CPX *c, int iflag, BIGINT ms, CPX *f); -void dirft1d3(BIGINT nj, FLT *x, CPX *c, int iflag, BIGINT nk, FLT *s, CPX *f); +template +void dirft1d1(BIGINT nj, T *x, std::complex *c, int isign, BIGINT ms, + std::complex *f); +template +void dirft1d2(BIGINT nj, T *x, std::complex *c, int iflag, BIGINT ms, + std::complex *f); +template +void dirft1d3(BIGINT nj, T *x, std::complex *c, int iflag, BIGINT nk, T *s, + std::complex *f); -void dirft2d1(BIGINT nj, FLT *x, FLT *y, CPX *c, int iflag, BIGINT ms, BIGINT mt, CPX *f); -void dirft2d2(BIGINT nj, FLT *x, FLT *y, CPX *c, int iflag, BIGINT ms, BIGINT mt, CPX *f); -void dirft2d3(BIGINT nj, FLT *x, FLT *y, CPX *c, int iflag, BIGINT nk, FLT *s, FLT *t, - CPX *f); +template +void dirft2d1(BIGINT nj, T *x, T *y, std::complex *c, int iflag, BIGINT ms, BIGINT mt, + std::complex *f); +template +void dirft2d2(BIGINT nj, T *x, T *y, std::complex *c, int iflag, BIGINT ms, BIGINT mt, + std::complex *f); +template +void dirft2d3(BIGINT nj, T *x, T *y, std::complex *c, int iflag, BIGINT nk, T *s, T *t, + std::complex *f); -void dirft3d1(BIGINT nj, FLT *x, FLT *y, FLT *z, CPX *c, int iflag, BIGINT ms, BIGINT mt, - BIGINT mu, CPX *f); -void dirft3d2(BIGINT nj, FLT *x, FLT *y, FLT *z, CPX *c, int iflag, BIGINT ms, BIGINT mt, - BIGINT mu, CPX *f); -void dirft3d3(BIGINT nj, FLT *x, FLT *y, FLT *z, CPX *c, int iflag, BIGINT nk, FLT *s, - FLT *t, FLT *u, CPX *f); +template +void dirft3d1(BIGINT nj, T *x, T *y, T *z, std::complex *c, int iflag, BIGINT ms, + BIGINT mt, BIGINT mu, std::complex *f); +template +void dirft3d2(BIGINT nj, T *x, T *y, T *z, std::complex *c, int iflag, BIGINT ms, + BIGINT mt, BIGINT mu, std::complex *f); +template +void dirft3d3(BIGINT nj, T *x, T *y, T *z, std::complex *c, int iflag, BIGINT nk, T *s, + T *t, T *u, std::complex *f); #endif diff --git a/include/finufft/fft.h b/include/finufft/fft.h index c6d5de7a5..1fd8d1635 100644 --- a/include/finufft/fft.h +++ b/include/finufft/fft.h @@ -10,6 +10,9 @@ template class Finufft_FFT_plan { public: [[maybe_unused]] Finufft_FFT_plan(void (*)(void *) = nullptr, void (*)(void *) = nullptr, void * = nullptr) {} + // deleting these operations to be consistent with the FFTW plans (seel below) + Finufft_FFT_plan(const Finufft_FFT_plan &) = delete; + Finufft_FFT_plan &operator=(const Finufft_FFT_plan &) = delete; [[maybe_unused]] void plan(const std::vector & /*dims*/, size_t /*batchSize*/, std::complex * /*ptr*/, int /*sign*/, int /*options*/, int /*nthreads*/) {} @@ -63,11 +66,15 @@ template<> struct Finufft_FFT_plan { #endif unlock(); } + // we have raw pointers in the object (the FFTW plan). + // If we allow copying those, we end up destroying the plans multiple times. + Finufft_FFT_plan(const Finufft_FFT_plan &) = delete; [[maybe_unused]] ~Finufft_FFT_plan() { lock(); fftwf_destroy_plan(plan_); unlock(); } + Finufft_FFT_plan &operator=(const Finufft_FFT_plan &) = delete; void plan [[maybe_unused]] (const std::vector &dims, size_t batchSize, @@ -131,11 +138,13 @@ template<> struct Finufft_FFT_plan { #endif unlock(); } + Finufft_FFT_plan(const Finufft_FFT_plan &) = delete; [[maybe_unused]] ~Finufft_FFT_plan() { lock(); fftw_destroy_plan(plan_); unlock(); } + Finufft_FFT_plan &operator=(const Finufft_FFT_plan &) = delete; void plan [[maybe_unused]] (const std::vector &dims, size_t batchSize, diff --git a/include/finufft/finufft_core.h b/include/finufft/finufft_core.h index de2f2dab9..cdfa4ab33 100644 --- a/include/finufft/finufft_core.h +++ b/include/finufft/finufft_core.h @@ -95,6 +95,17 @@ inline constexpr BIGINT MAX_NF = BIGINT(1e12); // values for M = nj (also nk in type 3)... inline constexpr BIGINT MAX_NU_PTS = BIGINT(1e14); +// MR: In the longer term I suggest to move +// away from M_PI, which was never part of the standard. +// Perhaps a constexpr pi in the namespace finufft, or a constexpr finufft_pi +// if no namespaces are used? +// In C++20 these constants will be part of the language, and the problem will go away. +#ifndef M_PI // Windows apparently doesn't have this const +#define M_PI 3.14159265358979329 +#endif +#define M_1_2PI 0.159154943091895336 +#define M_2PI 6.28318530717958648 + // ----- OpenMP macros which also work when omp not present ----- // Allows compile-time switch off of openmp, so compilation without any openmp // is done (Note: _OPENMP is automatically set by -fopenmp compile flag) @@ -138,7 +149,8 @@ template struct FINUFFT_PLAN_T { // the main plan object, fully C++ // These default and delete specifications just state the obvious, // but are here to silence compiler warnings. - FINUFFT_PLAN_T() = default; + FINUFFT_PLAN_T(int type, int dim, const BIGINT *n_modes, int iflag, int ntrans, TF tol, + finufft_opts *opts, int &ier); // Copy construction and assignent are already deleted implicitly // because of the unique_ptr member. FINUFFT_PLAN_T(const FINUFFT_PLAN_T &) = delete; @@ -189,7 +201,8 @@ template struct FINUFFT_PLAN_T { // the main plan object, fully C++ std::vector Sp, Tp, Up; // internal primed targs (s'_k, etc), // allocated type3params t3P; // groups together type 3 shift, scale, phase, parameters - FINUFFT_PLAN_T *innerT2plan = nullptr; // ptr used for type 2 in step 2 of type 3 + std::unique_ptr> innerT2plan; // ptr used for type 2 in step 2 of + // type 3 // other internal structs std::unique_ptr> fftPlan; @@ -204,10 +217,5 @@ void finufft_default_opts_t(finufft_opts *o); template int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int ntrans, TF tol, FINUFFT_PLAN_T **pp, finufft_opts *opts); -template -int finufft_setpts_t(FINUFFT_PLAN_T *p, BIGINT nj, TF *xj, TF *yj, TF *zj, BIGINT nk, - TF *s, TF *t, TF *u); -template -int finufft_execute_t(FINUFFT_PLAN_T *p, std::complex *cj, std::complex *fk); #endif // FINUFFT_CORE_H diff --git a/include/finufft/spreadinterp.h b/include/finufft/spreadinterp.h index 8a83af3ce..04c4734ba 100644 --- a/include/finufft/spreadinterp.h +++ b/include/finufft/spreadinterp.h @@ -1,12 +1,13 @@ // Defines interface to spreading/interpolation code. -// Devnotes: see defs.h for definition of MAX_NSPREAD (as of 9/24/18). +// Devnotes: see finufft_core.h for definition of MAX_NSPREAD // RESCALE macro moved to spreadinterp.cpp, 7/15/20. // finufft_spread_opts renamed 6/7/22. #ifndef SPREADINTERP_H #define SPREADINTERP_H +#include #include /* Bitwise debugging timing flag (TF) defs; see finufft_spread_opts.flags. diff --git a/include/finufft/test_defs.h b/include/finufft/test_defs.h index bdd4cf147..e8e80e9f8 100644 --- a/include/finufft/test_defs.h +++ b/include/finufft/test_defs.h @@ -11,21 +11,120 @@ // for omp rand filling #define TEST_RANDCHUNK 1000000 -// the public interface: since this clobbers FINUFFT* macros, must be included -// *before* private defs.h... +// the public interface #include -// convenient private finufft internals (must come after finufft.h) +// convenient private finufft internals +#include #include -// prec-switching (via SINGLE) to set up FLT, CPX, BIGINT, FINUFFT1D1, etc... -#include +#include + +// --------------- Private data types for compilation in either prec --------- +// Devnote: must match those in relevant prec of public finufft.h interface! + +// All indexing in library that potentially can exceed 2^31 uses 64-bit signed. +// This includes all calling arguments (eg M,N) that could be huge someday. + +// Precision-independent real and complex types, for private lib/test compile +#ifdef SINGLE +using FLT = float; +#else +using FLT = double; +#endif +#include // we define C++ complex type only +using CPX = std::complex; + +// -------------- Math consts (not in math.h) and useful math macros ---------- +#include + +// either-precision unit imaginary number... +#define IMA (CPX(0.0, 1.0)) + +// to avoid mixed precision operators in eg i*pi, an either-prec PI... +#define PI FLT(M_PI) + +// Random numbers: crappy unif random number generator in [0,1). +// These macros should probably be replaced by modern C++ std lib or random123. +// (RAND_MAX is in stdlib.h) +#include +static inline FLT rand01 [[maybe_unused]] () { return FLT(rand()) / FLT(RAND_MAX); } +// unif[-1,1]: +static inline FLT randm11 [[maybe_unused]] () { return 2 * rand01() - FLT(1); } +// complex unif[-1,1] for Re and Im: +static inline CPX crandm11 [[maybe_unused]] () { return randm11() + IMA * randm11(); } + +// Thread-safe seed-carrying versions of above (x is ptr to seed)... +// MR: we have to leave those as macros for now, as "rand_r" is deprecated +// and apparently no longer available on Windows. +#if 1 +#define rand01r(x) ((FLT)rand_r(x) / (FLT)RAND_MAX) +// unif[-1,1]: +#define randm11r(x) (2 * rand01r(x) - (FLT)1.0) +// complex unif[-1,1] for Re and Im: +#define crandm11r(x) (randm11r(x) + IMA * randm11r(x)) +#else +static inline FLT rand01r [[maybe_unused]] (unsigned int *x) { + return FLT(rand_r(x)) / FLT(RAND_MAX); +} +// unif[-1,1]: +static inline FLT randm11r [[maybe_unused]] (unsigned int *x) { + return 2 * rand01r(x) - FLT(1); +} +// complex unif[-1,1] for Re and Im: +static inline CPX crandm11r [[maybe_unused]] (unsigned int *x) { + return randm11r(x) + IMA * randm11r(x); +} +#endif + +// Prec-switching name macros (respond to SINGLE), used in lib & test sources +// and the plan object below. +// Note: crucially, these are now indep of macros used to gen public finufft.h! +#ifdef SINGLE +// a macro to prepend finufft or finufftf to a string without a space. +// The 2nd level of indirection is needed for safety, see: +// https://isocpp.org/wiki/faq/misc-technical-issues#macros-with-token-pasting +#define FINUFFTIFY_UNSAFE(x) finufftf##x +#else +#define FINUFFTIFY_UNSAFE(x) finufft##x +#endif +#define FINUFFTIFY(x) FINUFFTIFY_UNSAFE(x) +// Now use the above tool to set up 2020-style macros used in tester source... +#define FINUFFT_PLAN FINUFFTIFY(_plan) +#define FINUFFT_PLAN_S FINUFFTIFY(_plan_s) +#define FINUFFT_DEFAULT_OPTS FINUFFTIFY(_default_opts) +#define FINUFFT_MAKEPLAN FINUFFTIFY(_makeplan) +#define FINUFFT_SETPTS FINUFFTIFY(_setpts) +#define FINUFFT_EXECUTE FINUFFTIFY(_execute) +#define FINUFFT_DESTROY FINUFFTIFY(_destroy) +#define FINUFFT1D1 FINUFFTIFY(1d1) +#define FINUFFT1D2 FINUFFTIFY(1d2) +#define FINUFFT1D3 FINUFFTIFY(1d3) +#define FINUFFT2D1 FINUFFTIFY(2d1) +#define FINUFFT2D2 FINUFFTIFY(2d2) +#define FINUFFT2D3 FINUFFTIFY(2d3) +#define FINUFFT3D1 FINUFFTIFY(3d1) +#define FINUFFT3D2 FINUFFTIFY(3d2) +#define FINUFFT3D3 FINUFFTIFY(3d3) +#define FINUFFT1D1MANY FINUFFTIFY(1d1many) +#define FINUFFT1D2MANY FINUFFTIFY(1d2many) +#define FINUFFT1D3MANY FINUFFTIFY(1d3many) +#define FINUFFT2D1MANY FINUFFTIFY(2d1many) +#define FINUFFT2D2MANY FINUFFTIFY(2d2many) +#define FINUFFT2D3MANY FINUFFTIFY(2d3many) +#define FINUFFT3D1MANY FINUFFTIFY(3d1many) +#define FINUFFT3D2MANY FINUFFTIFY(3d2many) +#define FINUFFT3D3MANY FINUFFTIFY(3d3many) + +// -------- FINUFFT's plan object, prec-switching version ------------------ +// NB: now private (the public C++ or C etc user sees an opaque pointer to it) + +#include +struct FINUFFT_PLAN_S : public FINUFFT_PLAN_T {}; // std stuff for tester src #include #include #include -#include -#include #include #endif // TEST_DEFS_H diff --git a/include/finufft/utils.h b/include/finufft/utils.h index 040f60543..c550a8bb9 100644 --- a/include/finufft/utils.h +++ b/include/finufft/utils.h @@ -14,51 +14,44 @@ namespace utils { // ahb's low-level array helpers template -FINUFFT_EXPORT T FINUFFT_CDECL relerrtwonorm(BIGINT n, std::complex *a, - std::complex *b) +FINUFFT_EXPORT T FINUFFT_CDECL relerrtwonorm(BIGINT n, const std::complex *a, + const std::complex *b) // ||a-b||_2 / ||a||_2 { T err = 0.0, nrm = 0.0; for (BIGINT m = 0; m < n; ++m) { - nrm += real(conj(a[m]) * a[m]); - std::complex diff = a[m] - b[m]; - err += real(conj(diff) * diff); + nrm += std::norm(a[m]); + err += std::norm(a[m] - b[m]); } return sqrt(err / nrm); } template -FINUFFT_EXPORT T FINUFFT_CDECL errtwonorm(BIGINT n, std::complex *a, - std::complex *b) +FINUFFT_EXPORT T FINUFFT_CDECL errtwonorm(BIGINT n, const std::complex *a, + const std::complex *b) // ||a-b||_2 { T err = 0.0; // compute error 2-norm - for (BIGINT m = 0; m < n; ++m) { - std::complex diff = a[m] - b[m]; - err += real(conj(diff) * diff); - } + for (BIGINT m = 0; m < n; ++m) err += std::norm(a[m] - b[m]); return sqrt(err); } template -FINUFFT_EXPORT T FINUFFT_CDECL twonorm(BIGINT n, std::complex *a) +FINUFFT_EXPORT T FINUFFT_CDECL twonorm(BIGINT n, const std::complex *a) // ||a||_2 { T nrm = 0.0; - for (BIGINT m = 0; m < n; ++m) nrm += real(conj(a[m]) * a[m]); + for (BIGINT m = 0; m < n; ++m) nrm += std::norm(a[m]); return sqrt(nrm); } template -FINUFFT_EXPORT T FINUFFT_CDECL infnorm(BIGINT n, std::complex *a) +FINUFFT_EXPORT T FINUFFT_CDECL infnorm(BIGINT n, const std::complex *a) // ||a||_infty { T nrm = 0.0; - for (BIGINT m = 0; m < n; ++m) { - T aa = real(conj(a[m]) * a[m]); - if (aa > nrm) nrm = aa; - } + for (BIGINT m = 0; m < n; ++m) nrm = std::max(nrm, std::norm(a[m])); return sqrt(nrm); } template -FINUFFT_EXPORT void FINUFFT_CDECL arrayrange(BIGINT n, T *a, T *lo, T *hi) +FINUFFT_EXPORT void FINUFFT_CDECL arrayrange(BIGINT n, const T *a, T *lo, T *hi) // With a a length-n array, writes out min(a) to lo and max(a) to hi, // so that all a values lie in [lo,hi]. // If n==0, lo and hi are not finite. @@ -71,10 +64,10 @@ FINUFFT_EXPORT void FINUFFT_CDECL arrayrange(BIGINT n, T *a, T *lo, T *hi) } } template -FINUFFT_EXPORT void FINUFFT_CDECL arraywidcen(BIGINT n, T *a, T *w, T *c) +FINUFFT_EXPORT void FINUFFT_CDECL arraywidcen(BIGINT n, const T *a, T *w, T *c) // Writes out w = half-width and c = center of an interval enclosing all a[n]'s // Only chooses a nonzero center if this increases w by less than fraction -// ARRAYWIDCEN_GROWFRAC defined in defs.h. +// ARRAYWIDCEN_GROWFRAC defined in finufft_core.h. // This prevents rephasings which don't grow nf by much. 6/8/17 // If n==0, w and c are not finite. { diff --git a/include/finufft_eitherprec.h b/include/finufft_eitherprec.h index 3f0a7d95c..fa698cec2 100644 --- a/include/finufft_eitherprec.h +++ b/include/finufft_eitherprec.h @@ -4,8 +4,7 @@ /* Devnotes. 1) Since everything here is exposed to the public interface, macros must be safe, eg FINUFFTsomething. - 2) They will clobber any prior macros starting FINUFFT*, so in the lib/test - sources finufft.h must be included before defs.h + 2) They will clobber any prior macros starting FINUFFT* 3) for debug of header macros, see finufft.h */ @@ -82,7 +81,7 @@ extern "C" { typedef struct FINUFFT_PLAN_S *FINUFFT_PLAN; // ------------------ the guru interface ------------------------------------ -// (sources in finufft.cpp) +// (sources in c_interface.cpp) FINUFFT_EXPORT void FINUFFT_CDECL FINUFFTIFY(_default_opts)(finufft_opts *o); FINUFFT_EXPORT int FINUFFT_CDECL FINUFFTIFY(_makeplan)( @@ -96,7 +95,7 @@ FINUFFT_EXPORT int FINUFFT_CDECL FINUFFTIFY(_execute)( FINUFFT_EXPORT int FINUFFT_CDECL FINUFFTIFY(_destroy)(FINUFFT_PLAN plan); // ----------------- the 18 simple interfaces ------------------------------- -// (sources in simpleinterfaces.cpp) +// (sources in c_interface.cpp) FINUFFT_EXPORT int FINUFFT_CDECL FINUFFTIFY(1d1)( FINUFFT_BIGINT nj, FINUFFT_FLT *xj, FINUFFT_CPX *cj, int iflag, FINUFFT_FLT eps, diff --git a/makefile b/makefile index c9754a376..7a715a7d5 100644 --- a/makefile +++ b/makefile @@ -66,7 +66,7 @@ XSIMD_DIR := $(DEPS_ROOT)/xsimd # DUCC sources optional dependency repo DUCC_URL := https://gitlab.mpcdf.mpg.de/mtr/ducc.git -DUCC_VERSION := ducc0_0_34_0 +DUCC_VERSION := ducc0_0_35_0 DUCC_DIR := $(DEPS_ROOT)/ducc # this dummy file used as empty target by make... DUCC_COOKIE := $(DUCC_DIR)/.finufft_has_ducc @@ -137,7 +137,7 @@ ABSDYNLIB = $(FINUFFT)$(DYNLIB) SOBJS = src/utils.o src/spreadinterp.o # all lib dual-precision objs (note DUCC_OBJS empty if unused) -OBJS = $(SOBJS) contrib/legendre_rule_fast.o src/fft.o src/finufft_core.o src/simpleinterfaces.o fortran/finufftfort.o $(DUCC_OBJS) +OBJS = $(SOBJS) contrib/legendre_rule_fast.o src/fft.o src/finufft.o src/c_interface.o fortran/finufftfort.o $(DUCC_OBJS) .PHONY: usage lib examples test perftest spreadtest spreadtestall fortran matlab octave all mex python clean objclean pyclean mexclean wheel docker-wheel gurutime docs setup setupclean @@ -181,12 +181,10 @@ HEADERS = $(wildcard include/*.h include/finufft/*.h) $(DUCC_HEADERS) $(CC) -c $(CFLAGS) $< -o $@ %.o: %.f $(FC) -c $(FFLAGS) $< -o $@ -%_32.o: %.f - $(FC) -DSINGLE -c $(FFLAGS) $< -o $@ # spreadinterp include auto-generated code, xsimd header-only dependency; # if FFT=DUCC also setup ducc with fft.h dependency on $(DUCC_SETUP)... -# Note src/spreadinterp.cpp includes finufft/defs.h which includes finufft/fft.h +# Note src/spreadinterp.cpp includes finufft/finufft_core.h which includes finufft/fft.h # so fftw/ducc header needed for spreadinterp, though spreadinterp should not # depend on fftw/ducc directly? include/finufft/fft.h: $(DUCC_SETUP) diff --git a/perftest/manysmallprobs.cpp b/perftest/manysmallprobs.cpp index 5e27289d8..ded866ba0 100644 --- a/perftest/manysmallprobs.cpp +++ b/perftest/manysmallprobs.cpp @@ -1,6 +1,6 @@ // public header #include "finufft.h" -#include "finufft/defs.h" +#include "finufft/test_defs.h" // private access to timer #include "finufft/utils.h" diff --git a/perftest/spreadtestnd.cpp b/perftest/spreadtestnd.cpp index d30626007..8c35828bd 100644 --- a/perftest/spreadtestnd.cpp +++ b/perftest/spreadtestnd.cpp @@ -1,5 +1,5 @@ -#include #include +#include #include #include diff --git a/perftest/spreadtestndall.cpp b/perftest/spreadtestndall.cpp index 14aad3420..9787d86b0 100644 --- a/perftest/spreadtestndall.cpp +++ b/perftest/spreadtestndall.cpp @@ -1,5 +1,5 @@ -#include #include +#include #include #include diff --git a/src/simpleinterfaces.cpp b/src/c_interface.cpp similarity index 55% rename from src/simpleinterfaces.cpp rename to src/c_interface.cpp index d2606ee6b..6f7deb61b 100644 --- a/src/simpleinterfaces.cpp +++ b/src/c_interface.cpp @@ -12,8 +12,6 @@ using namespace std; As of v1.2 these simply invoke the guru interface, through a helper layer. See ../docs/usage.rst or http://finufft.readthedocs.io for documentation all routines here. - This compiles in either double or single precision (based on -DSINGLE), - producing functions finufft?d?{many} or finufftf?1?{many} respectively. Authors: Andrea Malleo and Alex Barnett, 2019-2020. Safe namespacing, Barnett, May 2022. @@ -42,20 +40,18 @@ int finufftf_makeplan(int type, int dim, const i64 *n_modes, int iflag, int ntra int finufft_setpts(finufft_plan p, i64 nj, f64 *xj, f64 *yj, f64 *zj, i64 nk, f64 *s, f64 *t, f64 *u) { - return finufft_setpts_t(reinterpret_cast *>(p), nj, xj, yj, zj, - nk, s, t, u); + return reinterpret_cast *>(p)->setpts(nj, xj, yj, zj, nk, s, t, u); } int finufftf_setpts(finufftf_plan p, i64 nj, f32 *xj, f32 *yj, f32 *zj, i64 nk, f32 *s, f32 *t, f32 *u) { - return finufft_setpts_t(reinterpret_cast *>(p), nj, xj, yj, zj, - nk, s, t, u); + return reinterpret_cast *>(p)->setpts(nj, xj, yj, zj, nk, s, t, u); } int finufft_execute(finufft_plan p, c128 *cj, c128 *fk) { - return finufft_execute_t(reinterpret_cast *>(p), cj, fk); + return reinterpret_cast *>(p)->execute(cj, fk); } int finufftf_execute(finufftf_plan p, c64 *cj, c64 *fk) { - return finufft_execute_t(reinterpret_cast *>(p), cj, fk); + return reinterpret_cast *>(p)->execute(cj, fk); } int finufft_destroy(finufft_plan p) @@ -68,7 +64,6 @@ int finufft_destroy(finufft_plan p) return 1; delete reinterpret_cast *>(p); - p = nullptr; return 0; // success } int finufftf_destroy(finufftf_plan p) @@ -81,19 +76,15 @@ int finufftf_destroy(finufftf_plan p) return 1; delete reinterpret_cast *>(p); - p = nullptr; return 0; // success } // Helper layer ........................................................... -namespace finufft { -namespace common { - template -static int guruCall(int n_dims, int type, int n_transf, i64 nj, T *xj, T *yj, T *zj, - std::complex *cj, int iflag, T eps, - const std::array &n_modes, i64 nk, T *s, T *t, T *u, - std::complex *fk, finufft_opts *popts) +static int guru(int n_dims, int type, int n_transf, i64 nj, const std::array &xyz, + std::complex *cj, int iflag, T eps, const std::array &n_modes, + i64 nk, const std::array &stu, std::complex *fk, + finufft_opts *popts) // Helper layer between simple interfaces (with opts) and the guru functions. // Author: Andrea Malleo, 2019. { @@ -107,14 +98,14 @@ static int guruCall(int n_dims, int type, int n_transf, i64 nj, T *xj, T *yj, T return ier; } - int ier2 = finufft_setpts_t(plan, nj, xj, yj, zj, nk, s, t, u); + int ier2 = plan->setpts(nj, xyz[0], xyz[1], xyz[2], nk, stu[0], stu[1], stu[2]); if (ier2 > 1) { fprintf(stderr, "FINUFFT invokeGuru: setpts error (ier=%d)!\n", ier2); delete plan; return ier2; } - int ier3 = finufft_execute_t(plan, cj, fk); + int ier3 = plan->execute(cj, fk); if (ier3 > 1) { fprintf(stderr, "FINUFFT invokeGuru: execute error (ier=%d)!\n", ier3); delete plan; @@ -125,258 +116,179 @@ static int guruCall(int n_dims, int type, int n_transf, i64 nj, T *xj, T *yj, T return max(max(ier, ier2), ier3); // in case any one gave a (positive!) warning } -} // namespace common -} // namespace finufft - -using namespace finufft::common; - // Dimension 1111111111111111111111111111111111111111111111111111111111111111 int finufft1d1many(int n_transf, i64 nj, f64 *xj, c128 *cj, int iflag, f64 eps, i64 ms, - c128 *fk, finufft_opts *opts) -// Type-1 1D complex nonuniform FFT for many vectors. See ../docs/usage.rst -{ - return guruCall(1, 1, n_transf, nj, xj, nullptr, nullptr, cj, iflag, eps, - {ms, 1, 1}, 0, nullptr, nullptr, nullptr, fk, opts); + c128 *fk, finufft_opts *opts) { + return guru(1, 1, n_transf, nj, {xj}, cj, iflag, eps, {ms, 1, 1}, 0, {}, fk, opts); } int finufftf1d1many(int n_transf, i64 nj, f32 *xj, c64 *cj, int iflag, f32 eps, i64 ms, - c64 *fk, finufft_opts *opts) -// Type-1 1D complex nonuniform FFT for many vectors. See ../docs/usage.rst -{ - return guruCall(1, 1, n_transf, nj, xj, nullptr, nullptr, cj, iflag, eps, - {ms, 1, 1}, 0, nullptr, nullptr, nullptr, fk, opts); + c64 *fk, finufft_opts *opts) { + return guru(1, 1, n_transf, nj, {xj}, cj, iflag, eps, {ms, 1, 1}, 0, {}, fk, opts); } int finufft1d1(i64 nj, f64 *xj, c128 *cj, int iflag, f64 eps, i64 ms, c128 *fk, - finufft_opts *opts) -// Type-1 1D complex nonuniform FFT. See ../docs/usage.rst -{ + finufft_opts *opts) { return finufft1d1many(1, nj, xj, cj, iflag, eps, ms, fk, opts); } int finufftf1d1(i64 nj, f32 *xj, c64 *cj, int iflag, f32 eps, i64 ms, c64 *fk, - finufft_opts *opts) -// Type-1 1D complex nonuniform FFT. See ../docs/usage.rst -{ + finufft_opts *opts) { return finufftf1d1many(1, nj, xj, cj, iflag, eps, ms, fk, opts); } int finufft1d2many(int n_transf, i64 nj, f64 *xj, c128 *cj, int iflag, f64 eps, i64 ms, - c128 *fk, finufft_opts *opts) -// Type-2 1D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(1, 2, n_transf, nj, xj, nullptr, nullptr, cj, iflag, eps, - {ms, 1, 1}, 0, nullptr, nullptr, nullptr, fk, opts); + c128 *fk, finufft_opts *opts) { + return guru(1, 2, n_transf, nj, {xj}, cj, iflag, eps, {ms, 1, 1}, 0, {}, fk, opts); } int finufftf1d2many(int n_transf, i64 nj, f32 *xj, c64 *cj, int iflag, f32 eps, i64 ms, - c64 *fk, finufft_opts *opts) -// Type-2 1D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(1, 2, n_transf, nj, xj, nullptr, nullptr, cj, iflag, eps, - {ms, 1, 1}, 0, nullptr, nullptr, nullptr, fk, opts); + c64 *fk, finufft_opts *opts) { + return guru(1, 2, n_transf, nj, {xj}, cj, iflag, eps, {ms, 1, 1}, 0, {}, fk, opts); } int finufft1d2(i64 nj, f64 *xj, c128 *cj, int iflag, f64 eps, i64 ms, c128 *fk, - finufft_opts *opts) -// Type-2 1D complex nonuniform FFT. See ../docs/usage.rst -{ + finufft_opts *opts) { return finufft1d2many(1, nj, xj, cj, iflag, eps, ms, fk, opts); } int finufftf1d2(i64 nj, f32 *xj, c64 *cj, int iflag, f32 eps, i64 ms, c64 *fk, - finufft_opts *opts) -// Type-2 1D complex nonuniform FFT. See ../docs/usage.rst -{ + finufft_opts *opts) { return finufftf1d2many(1, nj, xj, cj, iflag, eps, ms, fk, opts); } int finufft1d3many(int n_transf, i64 nj, f64 *xj, c128 *cj, int iflag, f64 eps, i64 nk, - f64 *s, c128 *fk, finufft_opts *opts) -// Type-3 1D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(1, 3, n_transf, nj, xj, nullptr, nullptr, cj, iflag, eps, - {0, 0, 0}, nk, s, nullptr, nullptr, fk, opts); + f64 *s, c128 *fk, finufft_opts *opts) { + return guru(1, 3, n_transf, nj, {xj}, cj, iflag, eps, {0, 0, 0}, nk, {s}, fk, + opts); } int finufftf1d3many(int n_transf, i64 nj, f32 *xj, c64 *cj, int iflag, f32 eps, i64 nk, - f32 *s, c64 *fk, finufft_opts *opts) -// Type-3 1D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(1, 3, n_transf, nj, xj, nullptr, nullptr, cj, iflag, eps, - {0, 0, 0}, nk, s, nullptr, nullptr, fk, opts); + f32 *s, c64 *fk, finufft_opts *opts) { + return guru(1, 3, n_transf, nj, {xj}, cj, iflag, eps, {0, 0, 0}, nk, {s}, fk, + opts); } int finufft1d3(i64 nj, f64 *xj, c128 *cj, int iflag, f64 eps, i64 nk, f64 *s, c128 *fk, - finufft_opts *opts) -// Type-3 1D complex nonuniform FFT. See ../docs/usage.rst -{ + finufft_opts *opts) { return finufft1d3many(1, nj, xj, cj, iflag, eps, nk, s, fk, opts); } int finufftf1d3(i64 nj, f32 *xj, c64 *cj, int iflag, f32 eps, i64 nk, f32 *s, c64 *fk, - finufft_opts *opts) -// Type-3 1D complex nonuniform FFT. See ../docs/usage.rst -{ + finufft_opts *opts) { return finufftf1d3many(1, nj, xj, cj, iflag, eps, nk, s, fk, opts); } // Dimension 22222222222222222222222222222222222222222222222222222222222222222 int finufft2d1many(int n_transf, i64 nj, f64 *xj, f64 *yj, c128 *c, int iflag, f64 eps, - i64 ms, i64 mt, c128 *fk, finufft_opts *opts) -// Type-1 2D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(2, 1, n_transf, nj, xj, yj, nullptr, c, iflag, eps, {ms, mt, 1}, 0, - nullptr, nullptr, nullptr, fk, opts); + i64 ms, i64 mt, c128 *fk, finufft_opts *opts) { + return guru(2, 1, n_transf, nj, {xj, yj}, c, iflag, eps, {ms, mt, 1}, 0, {}, fk, + opts); } int finufftf2d1many(int n_transf, i64 nj, f32 *xj, f32 *yj, c64 *c, int iflag, f32 eps, - i64 ms, i64 mt, c64 *fk, finufft_opts *opts) -// Type-1 2D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(2, 1, n_transf, nj, xj, yj, nullptr, c, iflag, eps, {ms, mt, 1}, 0, - nullptr, nullptr, nullptr, fk, opts); + i64 ms, i64 mt, c64 *fk, finufft_opts *opts) { + return guru(2, 1, n_transf, nj, {xj, yj}, c, iflag, eps, {ms, mt, 1}, 0, {}, fk, + opts); } int finufft2d1(i64 nj, f64 *xj, f64 *yj, c128 *cj, int iflag, f64 eps, i64 ms, i64 mt, - c128 *fk, finufft_opts *opts) -// Type-1 2D complex nonuniform FFT. See ../docs/usage.rst -{ + c128 *fk, finufft_opts *opts) { return finufft2d1many(1, nj, xj, yj, cj, iflag, eps, ms, mt, fk, opts); } int finufftf2d1(i64 nj, f32 *xj, f32 *yj, c64 *cj, int iflag, f32 eps, i64 ms, i64 mt, - c64 *fk, finufft_opts *opts) -// Type-1 2D complex nonuniform FFT. See ../docs/usage.rst -{ + c64 *fk, finufft_opts *opts) { return finufftf2d1many(1, nj, xj, yj, cj, iflag, eps, ms, mt, fk, opts); } int finufft2d2many(int n_transf, i64 nj, f64 *xj, f64 *yj, c128 *c, int iflag, f64 eps, - i64 ms, i64 mt, c128 *fk, finufft_opts *opts) -// Type-2 2D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(2, 2, n_transf, nj, xj, yj, nullptr, c, iflag, eps, {ms, mt, 1}, 0, - nullptr, nullptr, nullptr, fk, opts); + i64 ms, i64 mt, c128 *fk, finufft_opts *opts) { + return guru(2, 2, n_transf, nj, {xj, yj}, c, iflag, eps, {ms, mt, 1}, 0, {}, fk, + opts); } int finufftf2d2many(int n_transf, i64 nj, f32 *xj, f32 *yj, c64 *c, int iflag, f32 eps, - i64 ms, i64 mt, c64 *fk, finufft_opts *opts) -// Type-2 2D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(2, 2, n_transf, nj, xj, yj, nullptr, c, iflag, eps, {ms, mt, 1}, 0, - nullptr, nullptr, nullptr, fk, opts); + i64 ms, i64 mt, c64 *fk, finufft_opts *opts) { + return guru(2, 2, n_transf, nj, {xj, yj}, c, iflag, eps, {ms, mt, 1}, 0, {}, fk, + opts); } int finufft2d2(i64 nj, f64 *xj, f64 *yj, c128 *cj, int iflag, f64 eps, i64 ms, i64 mt, - c128 *fk, finufft_opts *opts) -// Type-2 2D complex nonuniform FFT. See ../docs/usage.rst -{ + c128 *fk, finufft_opts *opts) { return finufft2d2many(1, nj, xj, yj, cj, iflag, eps, ms, mt, fk, opts); } int finufftf2d2(i64 nj, f32 *xj, f32 *yj, c64 *cj, int iflag, f32 eps, i64 ms, i64 mt, - c64 *fk, finufft_opts *opts) -// Type-2 2D complex nonuniform FFT. See ../docs/usage.rst -{ + c64 *fk, finufft_opts *opts) { return finufftf2d2many(1, nj, xj, yj, cj, iflag, eps, ms, mt, fk, opts); } int finufft2d3many(int n_transf, i64 nj, f64 *xj, f64 *yj, c128 *cj, int iflag, f64 eps, - i64 nk, f64 *s, f64 *t, c128 *fk, finufft_opts *opts) -// Type-3 2D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(2, 3, n_transf, nj, xj, yj, nullptr, cj, iflag, eps, {0, 0, 0}, nk, - s, t, nullptr, fk, opts); + i64 nk, f64 *s, f64 *t, c128 *fk, finufft_opts *opts) { + return guru(2, 3, n_transf, nj, {xj, yj}, cj, iflag, eps, {0, 0, 0}, nk, {s, t}, + fk, opts); } int finufftf2d3many(int n_transf, i64 nj, f32 *xj, f32 *yj, c64 *cj, int iflag, f32 eps, - i64 nk, f32 *s, f32 *t, c64 *fk, finufft_opts *opts) -// Type-3 2D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(2, 3, n_transf, nj, xj, yj, nullptr, cj, iflag, eps, {0, 0, 0}, nk, - s, t, nullptr, fk, opts); + i64 nk, f32 *s, f32 *t, c64 *fk, finufft_opts *opts) { + return guru(2, 3, n_transf, nj, {xj, yj}, cj, iflag, eps, {0, 0, 0}, nk, {s, t}, + fk, opts); } int finufft2d3(i64 nj, f64 *xj, f64 *yj, c128 *cj, int iflag, f64 eps, i64 nk, f64 *s, - f64 *t, c128 *fk, finufft_opts *opts) -// Type-3 2D complex nonuniform FFT. See ../docs/usage.rst -{ + f64 *t, c128 *fk, finufft_opts *opts) { return finufft2d3many(1, nj, xj, yj, cj, iflag, eps, nk, s, t, fk, opts); } int finufftf2d3(i64 nj, f32 *xj, f32 *yj, c64 *cj, int iflag, f32 eps, i64 nk, f32 *s, - f32 *t, c64 *fk, finufft_opts *opts) -// Type-3 2D complex nonuniform FFT. See ../docs/usage.rst -{ + f32 *t, c64 *fk, finufft_opts *opts) { return finufftf2d3many(1, nj, xj, yj, cj, iflag, eps, nk, s, t, fk, opts); } // Dimension 3333333333333333333333333333333333333333333333333333333333333333 int finufft3d1many(int n_transf, i64 nj, f64 *xj, f64 *yj, f64 *zj, c128 *cj, int iflag, - f64 eps, i64 ms, i64 mt, i64 mu, c128 *fk, finufft_opts *opts) -// Type-1 3D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(3, 1, n_transf, nj, xj, yj, zj, cj, iflag, eps, {ms, mt, mu}, 0, - nullptr, nullptr, nullptr, fk, opts); + f64 eps, i64 ms, i64 mt, i64 mu, c128 *fk, finufft_opts *opts) { + return guru(3, 1, n_transf, nj, {xj, yj, zj}, cj, iflag, eps, {ms, mt, mu}, 0, {}, + fk, opts); } int finufftf3d1many(int n_transf, i64 nj, f32 *xj, f32 *yj, f32 *zj, c64 *cj, int iflag, - f32 eps, i64 ms, i64 mt, i64 mu, c64 *fk, finufft_opts *opts) -// Type-1 3D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(3, 1, n_transf, nj, xj, yj, zj, cj, iflag, eps, {ms, mt, mu}, 0, - nullptr, nullptr, nullptr, fk, opts); + f32 eps, i64 ms, i64 mt, i64 mu, c64 *fk, finufft_opts *opts) { + return guru(3, 1, n_transf, nj, {xj, yj, zj}, cj, iflag, eps, {ms, mt, mu}, 0, {}, + fk, opts); } int finufft3d1(i64 nj, f64 *xj, f64 *yj, f64 *zj, c128 *cj, int iflag, f64 eps, i64 ms, - i64 mt, i64 mu, c128 *fk, finufft_opts *opts) -// Type-1 3D complex nonuniform FFT. See ../docs/usage.rst -{ + i64 mt, i64 mu, c128 *fk, finufft_opts *opts) { return finufft3d1many(1, nj, xj, yj, zj, cj, iflag, eps, ms, mt, mu, fk, opts); } int finufftf3d1(i64 nj, f32 *xj, f32 *yj, f32 *zj, c64 *cj, int iflag, f32 eps, i64 ms, - i64 mt, i64 mu, c64 *fk, finufft_opts *opts) -// Type-1 3D complex nonuniform FFT. See ../docs/usage.rst -{ + i64 mt, i64 mu, c64 *fk, finufft_opts *opts) { return finufftf3d1many(1, nj, xj, yj, zj, cj, iflag, eps, ms, mt, mu, fk, opts); } int finufft3d2many(int n_transf, i64 nj, f64 *xj, f64 *yj, f64 *zj, c128 *cj, int iflag, - f64 eps, i64 ms, i64 mt, i64 mu, c128 *fk, finufft_opts *opts) -// Type-2 3D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(3, 2, n_transf, nj, xj, yj, zj, cj, iflag, eps, {ms, mt, mu}, 0, - nullptr, nullptr, nullptr, fk, opts); + f64 eps, i64 ms, i64 mt, i64 mu, c128 *fk, finufft_opts *opts) { + return guru(3, 2, n_transf, nj, {xj, yj, zj}, cj, iflag, eps, {ms, mt, mu}, 0, {}, + fk, opts); } int finufftf3d2many(int n_transf, i64 nj, f32 *xj, f32 *yj, f32 *zj, c64 *cj, int iflag, - f32 eps, i64 ms, i64 mt, i64 mu, c64 *fk, finufft_opts *opts) -// Type-2 3D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(3, 2, n_transf, nj, xj, yj, zj, cj, iflag, eps, {ms, mt, mu}, 0, - nullptr, nullptr, nullptr, fk, opts); + f32 eps, i64 ms, i64 mt, i64 mu, c64 *fk, finufft_opts *opts) { + return guru(3, 2, n_transf, nj, {xj, yj, zj}, cj, iflag, eps, {ms, mt, mu}, 0, {}, + fk, opts); } int finufft3d2(i64 nj, f64 *xj, f64 *yj, f64 *zj, c128 *cj, int iflag, f64 eps, i64 ms, - i64 mt, i64 mu, c128 *fk, finufft_opts *opts) -// Type-2 3D complex nonuniform FFT. See ../docs/usage.rst -{ + i64 mt, i64 mu, c128 *fk, finufft_opts *opts) { return finufft3d2many(1, nj, xj, yj, zj, cj, iflag, eps, ms, mt, mu, fk, opts); } int finufftf3d2(i64 nj, f32 *xj, f32 *yj, f32 *zj, c64 *cj, int iflag, f32 eps, i64 ms, - i64 mt, i64 mu, c64 *fk, finufft_opts *opts) -// Type-2 3D complex nonuniform FFT. See ../docs/usage.rst -{ + i64 mt, i64 mu, c64 *fk, finufft_opts *opts) { return finufftf3d2many(1, nj, xj, yj, zj, cj, iflag, eps, ms, mt, mu, fk, opts); } int finufft3d3many(int n_transf, i64 nj, f64 *xj, f64 *yj, f64 *zj, c128 *cj, int iflag, - f64 eps, i64 nk, f64 *s, f64 *t, f64 *u, c128 *fk, finufft_opts *opts) -// Type-3 3D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(3, 3, n_transf, nj, xj, yj, zj, cj, iflag, eps, {0, 0, 0}, nk, s, - t, u, fk, opts); + f64 eps, i64 nk, f64 *s, f64 *t, f64 *u, c128 *fk, + finufft_opts *opts) { + return guru(3, 3, n_transf, nj, {xj, yj, zj}, cj, iflag, eps, {0, 0, 0}, nk, + {s, t, u}, fk, opts); } int finufftf3d3many(int n_transf, i64 nj, f32 *xj, f32 *yj, f32 *zj, c64 *cj, int iflag, - f32 eps, i64 nk, f32 *s, f32 *t, f32 *u, c64 *fk, finufft_opts *opts) -// Type-3 3D complex nonuniform FFT, many vectors. See ../docs/usage.rst -{ - return guruCall(3, 3, n_transf, nj, xj, yj, zj, cj, iflag, eps, {0, 0, 0}, nk, s, - t, u, fk, opts); + f32 eps, i64 nk, f32 *s, f32 *t, f32 *u, c64 *fk, + finufft_opts *opts) { + return guru(3, 3, n_transf, nj, {xj, yj, zj}, cj, iflag, eps, {0, 0, 0}, nk, + {s, t, u}, fk, opts); } int finufft3d3(i64 nj, f64 *xj, f64 *yj, f64 *zj, c128 *cj, int iflag, f64 eps, i64 nk, - f64 *s, f64 *t, f64 *u, c128 *fk, finufft_opts *opts) -// Type-3 3D complex nonuniform FFT. See ../docs/usage.rst -{ + f64 *s, f64 *t, f64 *u, c128 *fk, finufft_opts *opts) { return finufft3d3many(1, nj, xj, yj, zj, cj, iflag, eps, nk, s, t, u, fk, opts); } int finufftf3d3(i64 nj, f32 *xj, f32 *yj, f32 *zj, c64 *cj, int iflag, f32 eps, i64 nk, - f32 *s, f32 *t, f32 *u, c64 *fk, finufft_opts *opts) -// Type-3 3D complex nonuniform FFT. See ../docs/usage.rst -{ + f32 *s, f32 *t, f32 *u, c64 *fk, finufft_opts *opts) { return finufftf3d3many(1, nj, xj, yj, zj, cj, iflag, eps, nk, s, t, u, fk, opts); } diff --git a/src/finufft_core.cpp b/src/finufft.cpp similarity index 82% rename from src/finufft_core.cpp rename to src/finufft.cpp index e950e2160..2130cfd8a 100644 --- a/src/finufft_core.cpp +++ b/src/finufft.cpp @@ -26,7 +26,7 @@ using namespace finufft::quadrature; As of v1.2 these replace the old hand-coded separate 9 finufft?d?() functions and the two finufft2d?many() functions. The (now 18) simple C++ interfaces - are in simpleinterfaces.cpp. + are in c_interface.cpp. Algorithm summaries taken from old finufft?d?() documentation, Feb-Jun 2017: @@ -65,10 +65,6 @@ Algorithm summaries taken from old finufft?d?() documentation, Feb-Jun 2017: Design notes for guru interface implementation: -* Since finufft_plan is C-compatible, we need to use malloc/free for its - allocatable arrays, keeping it quite low-level. We can't use std::vector - since that would only survive in the scope of each function. - * Thread-safety: FINUFFT plans are passed as pointers, so it has no global state apart from that associated with FFTW (and the did_fftw_init). */ @@ -93,8 +89,8 @@ static int set_nf_type12(BIGINT ms, const finufft_opts &opts, return 0; } else { fprintf(stderr, - "[%s] nf=%.3g exceeds MAX_NF of %.3g, so exit without attempting even a " - "malloc\n", + "[%s] nf=%.3g exceeds MAX_NF of %.3g, so exit without attempting " + "memory allocation\n", __func__, (double)*nf, (double)MAX_NF); return FINUFFT_ERR_MAXNALLOC; } @@ -540,64 +536,56 @@ void finufft_default_opts_t(finufft_opts *o) // PPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP template -int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int ntrans, - TF tol, FINUFFT_PLAN_T **pp, finufft_opts *opts) +FINUFFT_PLAN_T::FINUFFT_PLAN_T(int type_, int dim_, const BIGINT *n_modes, int iflag, + int ntrans_, TF tol_, finufft_opts *opts_, int &ier) + : type(type_), dim(dim_), ntrans(ntrans_), tol(tol_) // Populates the fields of finufft_plan which is pointed to by "pp". // opts is ptr to a finufft_opts to set options, or nullptr to use defaults. // For some of the fields (if "auto" selected) here choose the actual setting. // For types 1,2 allocates memory for internal working arrays, // evaluates spreading kernel coefficients, and instantiates the fftw_plan { - FINUFFT_PLAN_T *p; - p = new FINUFFT_PLAN_T; // allocate fresh plan struct - *pp = p; // pass out plan as ptr to plan struct - - if (!opts) // use default opts - finufft_default_opts_t(&(p->opts)); - else // or read from what's passed in - p->opts = *opts; // keep a deep copy; changing *opts now has no effect + if (!opts_) // use default opts + finufft_default_opts_t(&opts); + else // or read from what's passed in + opts = *opts_; // keep a deep copy; changing *opts now has no effect - if (p->opts.debug) // do a hello world + if (opts.debug) // do a hello world printf("[%s] new plan: FINUFFT version " FINUFFT_VER " .................\n", __func__); - p->fftPlan = std::make_unique>( - p->opts.fftw_lock_fun, p->opts.fftw_unlock_fun, p->opts.fftw_lock_data); + fftPlan = std::make_unique>( + opts.fftw_lock_fun, opts.fftw_unlock_fun, opts.fftw_lock_data); if ((type != 1) && (type != 2) && (type != 3)) { fprintf(stderr, "[%s] Invalid type (%d), should be 1, 2 or 3.\n", __func__, type); - return FINUFFT_ERR_TYPE_NOTVALID; + throw int(FINUFFT_ERR_TYPE_NOTVALID); } if ((dim != 1) && (dim != 2) && (dim != 3)) { fprintf(stderr, "[%s] Invalid dim (%d), should be 1, 2 or 3.\n", __func__, dim); - return FINUFFT_ERR_DIM_NOTVALID; + throw int(FINUFFT_ERR_DIM_NOTVALID); } if (ntrans < 1) { fprintf(stderr, "[%s] ntrans (%d) should be at least 1.\n", __func__, ntrans); - return FINUFFT_ERR_NTRANS_NOTVALID; + throw int(FINUFFT_ERR_NTRANS_NOTVALID); } - if (!p->opts.fftw_lock_fun != !p->opts.fftw_unlock_fun) { + if (!opts.fftw_lock_fun != !opts.fftw_unlock_fun) { fprintf(stderr, "[%s] fftw_(un)lock functions should be both null or both set\n", __func__); - return FINUFFT_ERR_LOCK_FUNS_INVALID; - ; + throw int(FINUFFT_ERR_LOCK_FUNS_INVALID); } // get stuff from args... - p->type = type; - p->dim = dim; - p->ntrans = ntrans; - p->tol = tol; - p->fftSign = (iflag >= 0) ? 1 : -1; // clean up flag input + fftSign = (iflag >= 0) ? 1 : -1; // clean up flag input - // choose overall # threads... + // choose overall # threads... #ifdef _OPENMP int ompmaxnthr = MY_OMP_GET_MAX_THREADS(); int nthr = ompmaxnthr; // default: use as many as OMP gives us // (the above could be set, or suggested set, to 1 for small enough problems...) - if (p->opts.nthreads > 0) { - nthr = p->opts.nthreads; // user override, now without limit - if (p->opts.showwarn && (nthr > ompmaxnthr)) + if (opts.nthreads > 0) { + nthr = opts.nthreads; // user override, now without limit + if (opts.showwarn && (nthr > ompmaxnthr)) fprintf(stderr, "%s warning: using opts.nthreads=%d, more than the %d OpenMP claims " "available; note large nthreads can be slower.\n", @@ -605,61 +593,61 @@ int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int } #else int nthr = 1; // always 1 thread (avoid segfault) - if (p->opts.nthreads > 1) + if (opts.nthreads > 1) fprintf(stderr, "%s warning: opts.nthreads=%d but library is single-threaded; ignoring!\n", - __func__, p->opts.nthreads); + __func__, opts.nthreads); #endif - p->opts.nthreads = nthr; // store actual # thr planned for + opts.nthreads = nthr; // store actual # thr planned for // (this sets/limits all downstream spread/interp, 1dkernel, and FFT thread counts...) // choose batchSize for types 1,2 or 3... (uses int ceil(b/a)=1+(b-1)/a trick) - if (p->opts.maxbatchsize == 0) { // logic to auto-set best batchsize - p->nbatch = 1 + (ntrans - 1) / nthr; // min # batches poss - p->batchSize = 1 + (ntrans - 1) / p->nbatch; // then cut # thr in each b - } else { // batchSize override by user - p->batchSize = std::min(p->opts.maxbatchsize, ntrans); - p->nbatch = 1 + (ntrans - 1) / p->batchSize; // resulting # batches + if (opts.maxbatchsize == 0) { // logic to auto-set best batchsize + nbatch = 1 + (ntrans - 1) / nthr; // min # batches poss + batchSize = 1 + (ntrans - 1) / nbatch; // then cut # thr in each b + } else { // batchSize override by user + batchSize = std::min(opts.maxbatchsize, ntrans); + nbatch = 1 + (ntrans - 1) / batchSize; // resulting # batches } - if (p->opts.spread_thread == 0) p->opts.spread_thread = 2; // our auto choice - if (p->opts.spread_thread != 1 && p->opts.spread_thread != 2) { + if (opts.spread_thread == 0) opts.spread_thread = 2; // our auto choice + if (opts.spread_thread != 1 && opts.spread_thread != 2) { fprintf(stderr, "[%s] illegal opts.spread_thread!\n", __func__); - return FINUFFT_ERR_SPREAD_THREAD_NOTVALID; + throw int(FINUFFT_ERR_SPREAD_THREAD_NOTVALID); } - if (type != 3) { // read in user Fourier mode array sizes... - p->ms = n_modes[0]; - p->mt = (dim > 1) ? n_modes[1] : 1; // leave as 1 for unused dims - p->mu = (dim > 2) ? n_modes[2] : 1; - p->N = p->ms * p->mt * p->mu; // N = total # modes + if (type != 3) { // read in user Fourier mode array sizes... + ms = n_modes[0]; + mt = (dim > 1) ? n_modes[1] : 1; // leave as 1 for unused dims + mu = (dim > 2) ? n_modes[2] : 1; + N = ms * mt * mu; // N = total # modes } // heuristic to choose default upsampfac... (currently two poss) - if (p->opts.upsampfac == 0.0) { // indicates auto-choose - p->opts.upsampfac = 2.0; // default, and need for tol small - if (tol >= (TF)1E-9) { // the tol sigma=5/4 can reach - if (type == 3) // could move to setpts, more known? - p->opts.upsampfac = 1.25; // faster b/c smaller RAM & FFT - else if ((dim == 1 && p->N > 10000000) || (dim == 2 && p->N > 300000) || - (dim == 3 && p->N > 3000000)) // type 1,2 heuristic cutoffs, double, - // typ tol, 12-core xeon - p->opts.upsampfac = 1.25; + if (opts.upsampfac == 0.0) { // indicates auto-choose + opts.upsampfac = 2.0; // default, and need for tol small + if (tol >= (TF)1E-9) { // the tol sigma=5/4 can reach + if (type == 3) // could move to setpts, more known? + opts.upsampfac = 1.25; // faster b/c smaller RAM & FFT + else if ((dim == 1 && N > 10000000) || (dim == 2 && N > 300000) || + (dim == 3 && N > 3000000)) // type 1,2 heuristic cutoffs, double, + // typ tol, 12-core xeon + opts.upsampfac = 1.25; } - if (p->opts.debug > 1) - printf("[%s] set auto upsampfac=%.2f\n", __func__, p->opts.upsampfac); + if (opts.debug > 1) + printf("[%s] set auto upsampfac=%.2f\n", __func__, opts.upsampfac); } // use opts to choose and write into plan's spread options... - int ier = setup_spreader_for_nufft(p->spopts, tol, p->opts, dim); + ier = setup_spreader_for_nufft(spopts, tol, opts, dim); if (ier > 1) // proceed if success or warning - return ier; + throw int(ier); // set others as defaults (or unallocated for arrays)... - p->X = nullptr; - p->Y = nullptr; - p->Z = nullptr; - p->nf1 = 1; - p->nf2 = 1; - p->nf3 = 1; // crucial to leave as 1 for unused dims + X = nullptr; + Y = nullptr; + Z = nullptr; + nf1 = 1; + nf2 = 1; + nf3 = 1; // crucial to leave as 1 for unused dims // ------------------------ types 1,2: planning needed --------------------- if (type == 1 || type == 2) { @@ -667,97 +655,115 @@ int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int int nthr_fft = nthr; // give FFTW all threads (or use o.spread_thread?) // Note: batchSize not used since might be only 1. - p->spopts.spread_direction = type; + spopts.spread_direction = type; constexpr TF EPSILON = std::numeric_limits::epsilon(); - if (p->opts.showwarn) { // user warn round-off error... - if (EPSILON * p->ms > 1.0) + if (opts.showwarn) { // user warn round-off error... + if (EPSILON * ms > 1.0) fprintf(stderr, "%s warning: rounding err predicted eps_mach*N1 = %.3g > 1 !\n", - __func__, (double)(EPSILON * p->ms)); - if (EPSILON * p->mt > 1.0) + __func__, (double)(EPSILON * ms)); + if (EPSILON * mt > 1.0) fprintf(stderr, "%s warning: rounding err predicted eps_mach*N2 = %.3g > 1 !\n", - __func__, (double)(EPSILON * p->mt)); - if (EPSILON * p->mu > 1.0) + __func__, (double)(EPSILON * mt)); + if (EPSILON * mu > 1.0) fprintf(stderr, "%s warning: rounding err predicted eps_mach*N3 = %.3g > 1 !\n", - __func__, (double)(EPSILON * p->mu)); + __func__, (double)(EPSILON * mu)); } // determine fine grid sizes, sanity check.. - int nfier = set_nf_type12(p->ms, p->opts, p->spopts, &(p->nf1)); - if (nfier) return nfier; // nf too big; we're done - p->phiHat1.resize(p->nf1 / 2 + 1); + int nfier = set_nf_type12(ms, opts, spopts, &nf1); + if (nfier) throw nfier; // nf too big; we're done + phiHat1.resize(nf1 / 2 + 1); if (dim > 1) { - nfier = set_nf_type12(p->mt, p->opts, p->spopts, &(p->nf2)); - if (nfier) return nfier; - p->phiHat2.resize(p->nf2 / 2 + 1); + nfier = set_nf_type12(mt, opts, spopts, &nf2); + if (nfier) throw nfier; + phiHat2.resize(nf2 / 2 + 1); } if (dim > 2) { - nfier = set_nf_type12(p->mu, p->opts, p->spopts, &(p->nf3)); - if (nfier) return nfier; - p->phiHat3.resize(p->nf3 / 2 + 1); + nfier = set_nf_type12(mu, opts, spopts, &nf3); + if (nfier) throw nfier; + phiHat3.resize(nf3 / 2 + 1); } - if (p->opts.debug) { // "long long" here is to avoid warnings with printf... + if (opts.debug) { // "long long" here is to avoid warnings with printf... printf("[%s] %dd%d: (ms,mt,mu)=(%lld,%lld,%lld) " "(nf1,nf2,nf3)=(%lld,%lld,%lld)\n ntrans=%d nthr=%d " "batchSize=%d ", - __func__, dim, type, (long long)p->ms, (long long)p->mt, (long long)p->mu, - (long long)p->nf1, (long long)p->nf2, (long long)p->nf3, ntrans, nthr, - p->batchSize); - if (p->batchSize == 1) // spread_thread has no effect in this case + __func__, dim, type, (long long)ms, (long long)mt, (long long)mu, + (long long)nf1, (long long)nf2, (long long)nf3, ntrans, nthr, batchSize); + if (batchSize == 1) // spread_thread has no effect in this case printf("\n"); else - printf(" spread_thread=%d\n", p->opts.spread_thread); + printf(" spread_thread=%d\n", opts.spread_thread); } // STEP 0: get Fourier coeffs of spreading kernel along each fine grid dim CNTime timer; timer.start(); - onedim_fseries_kernel(p->nf1, p->phiHat1, p->spopts); - if (dim > 1) onedim_fseries_kernel(p->nf2, p->phiHat2, p->spopts); - if (dim > 2) onedim_fseries_kernel(p->nf3, p->phiHat3, p->spopts); - if (p->opts.debug) - printf("[%s] kernel fser (ns=%d):\t\t%.3g s\n", __func__, p->spopts.nspread, + onedim_fseries_kernel(nf1, phiHat1, spopts); + if (dim > 1) onedim_fseries_kernel(nf2, phiHat2, spopts); + if (dim > 2) onedim_fseries_kernel(nf3, phiHat3, spopts); + if (opts.debug) + printf("[%s] kernel fser (ns=%d):\t\t%.3g s\n", __func__, spopts.nspread, timer.elapsedsec()); - p->nf = p->nf1 * p->nf2 * p->nf3; // fine grid total number of points - if (p->nf * p->batchSize > MAX_NF) { - fprintf(stderr, - "[%s] fwBatch would be bigger than MAX_NF, not attempting malloc!\n", - __func__); - // FIXME: this error causes memory leaks. We should free phiHat1, phiHat2, phiHat3 - return FINUFFT_ERR_MAXNALLOC; + nf = nf1 * nf2 * nf3; // fine grid total number of points + if (nf * batchSize > MAX_NF) { + fprintf( + stderr, + "[%s] fwBatch would be bigger than MAX_NF, not attempting memory allocation!\n", + __func__); + throw int(FINUFFT_ERR_MAXNALLOC); } timer.restart(); - p->fwBatch = p->fftPlan->alloc_complex(p->nf * p->batchSize); // the big workspace - if (p->opts.debug) + fwBatch = fftPlan->alloc_complex(nf * batchSize); // the big workspace + if (opts.debug) printf("[%s] fwBatch %.2fGB alloc: \t%.3g s\n", __func__, - (double)1E-09 * sizeof(std::complex) * p->nf * p->batchSize, + (double)1E-09 * sizeof(std::complex) * nf * batchSize, timer.elapsedsec()); - if (!p->fwBatch) { // we don't catch all such mallocs, just this big one - fprintf(stderr, "[%s] FFTW malloc failed for fwBatch (working fine grids)!\n", + if (!fwBatch) { // we don't catch all such allocs, just this big one + fprintf(stderr, "[%s] FFT allocation failed for fwBatch (working fine grids)!\n", __func__); - return FINUFFT_ERR_ALLOC; + throw int(FINUFFT_ERR_ALLOC); } timer.restart(); // plan the FFTW - const auto ns = gridsize_for_fft(p); - p->fftPlan->plan(ns, p->batchSize, p->fwBatch, p->fftSign, p->opts.fftw, nthr_fft); - if (p->opts.debug) - printf("[%s] FFT plan (mode %d, nthr=%d):\t%.3g s\n", __func__, p->opts.fftw, - nthr_fft, timer.elapsedsec()); + const auto ns = gridsize_for_fft(this); + fftPlan->plan(ns, batchSize, fwBatch, fftSign, opts.fftw, nthr_fft); + if (opts.debug) + printf("[%s] FFT plan (mode %d, nthr=%d):\t%.3g s\n", __func__, opts.fftw, nthr_fft, + timer.elapsedsec()); } else { // -------------------------- type 3 (no planning) ------------ - if (p->opts.debug) printf("[%s] %dd%d: ntrans=%d\n", __func__, dim, type, ntrans); + if (opts.debug) printf("[%s] %dd%d: ntrans=%d\n", __func__, dim, type, ntrans); // in case destroy occurs before setpts, need safe dummy ptrs/plans... - p->fwBatch = nullptr; - p->innerT2plan = nullptr; + fwBatch = nullptr; + innerT2plan = nullptr; // Type 3 will call finufft_makeplan for type 2; no need to init FFTW // Note we don't even know nj or nk yet, so can't do anything else! } - return ier; // report setup_spreader status (could be warning) + if (ier > 1) throw int(ier); // report setup_spreader status (could be warning) +} + +template +int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int ntrans, + TF tol, FINUFFT_PLAN_T **pp, finufft_opts *opts) +// Populates the fields of finufft_plan which is pointed to by "pp". +// opts is ptr to a finufft_opts to set options, or nullptr to use defaults. +// For some of the fields (if "auto" selected) here choose the actual setting. +// For types 1,2 allocates memory for internal working arrays, +// evaluates spreading kernel coefficients, and instantiates the fftw_plan +{ + *pp = nullptr; + int ier = 0; + try { + *pp = new FINUFFT_PLAN_T(type, dim, n_modes, iflag, ntrans, tol, opts, ier); + } catch (int errcode) { + return errcode; + } + return ier; } // For this function and the following ones (i.e. everything that is accessible @@ -824,7 +830,7 @@ int FINUFFT_PLAN_T::setpts(BIGINT nj, TF *xj, TF *yj, TF *zj, BIGINT nk, TF U = u; // pick x, s intervals & shifts & # fine grid pts (nf) in each dim... - TF S1, S2, S3; // get half-width X, center C, which contains {x_j}... + TF S1 = 0, S2 = 0, S3 = 0; // get half-width X, center C, which contains {x_j}... arraywidcen(nj, xj, &(t3P.X1), &(t3P.C1)); arraywidcen(nk, s, &S1, &(t3P.D1)); // same D, S, but for {s_k} set_nhg_type3(S1, t3P.X1, opts, spopts, &(nf1), &(t3P.h1), @@ -858,7 +864,8 @@ int FINUFFT_PLAN_T::setpts(BIGINT nj, TF *xj, TF *yj, TF *zj, BIGINT nk, TF nf = nf1 * nf2 * nf3; // fine grid total number of points if (nf * batchSize > MAX_NF) { fprintf(stderr, - "[%s t3] fwBatch would be bigger than MAX_NF, not attempting malloc!\n", + "[%s t3] fwBatch would be bigger than MAX_NF, not attempting memory " + "allocation!\n", __func__); return FINUFFT_ERR_MAXNALLOC; } @@ -872,24 +879,24 @@ int FINUFFT_PLAN_T::setpts(BIGINT nj, TF *xj, TF *yj, TF *zj, BIGINT nk, TF (double)1E-09 * sizeof(std::complex) * (nf + nj) * batchSize, timer.elapsedsec()); if (!fwBatch) { - fprintf(stderr, "[%s t3] malloc fail for fwBatch or CpBatch!\n", __func__); + fprintf(stderr, "[%s t3] allocation fail for fwBatch or CpBatch!\n", __func__); return FINUFFT_ERR_ALLOC; } // printf("fwbatch, cpbatch ptrs: %llx %llx\n",fwBatch,CpBatch); // alloc rescaled NU src pts x'_j (in X etc), rescaled NU targ pts s'_k ... // FIXME: should use realloc - if (X) free(X); - X = (TF *)malloc(sizeof(TF) * nj); + if (X) delete[] X; + X = new TF[nj]; Sp.resize(nk); if (d > 1) { - if (Y) free(Y); - Y = (TF *)malloc(sizeof(TF) * nj); + if (Y) delete[] Y; + Y = new TF[nj]; Tp.resize(nk); } if (d > 2) { - if (Z) free(Z); - Z = (TF *)malloc(sizeof(TF) * nj); + if (Z) delete[] Z; + Z = new TF[nj]; Up.resize(nk); } @@ -920,9 +927,9 @@ int FINUFFT_PLAN_T::setpts(BIGINT nj, TF *xj, TF *yj, TF *zj, BIGINT nk, TF } } else for (BIGINT j = 0; j < nj; ++j) - prephase[j] = (std::complex)1.0; // *** or keep flag so no mult in exec?? + prephase[j] = {1, 0}; // *** or keep flag so no mult in exec?? - // rescale the target s_k etc to s'_k etc... + // rescale the target s_k etc to s'_k etc... #pragma omp parallel for num_threads(opts.nthreads) schedule(static) for (BIGINT k = 0; k < nk; ++k) { Sp[k] = t3P.h1 * t3P.gam1 * (s[k] - t3P.D1); // so |s'_k| < pi/R @@ -947,16 +954,9 @@ int FINUFFT_PLAN_T::setpts(BIGINT nj, TF *xj, TF *yj, TF *zj, BIGINT nk, TF phiHatk3.resize(nk); onedim_nuft_kernel(nk, Up, phiHatk3, spopts); // fill phiHat3 } - int Cfinite = - std::isfinite(t3P.C1) && std::isfinite(t3P.C2) && std::isfinite(t3P.C3); // C can - // be nan - // or inf - // if - // M=0, - // no - // input - // NU pts - int Cnonzero = t3P.C1 != 0.0 || t3P.C2 != 0.0 || t3P.C3 != 0.0; // cen + // C can be nan or inf if M=0, no input NU pts + int Cfinite = std::isfinite(t3P.C1) && std::isfinite(t3P.C2) && std::isfinite(t3P.C3); + int Cnonzero = t3P.C1 != 0.0 || t3P.C2 != 0.0 || t3P.C3 != 0.0; // cen #pragma omp parallel for num_threads(opts.nthreads) schedule(static) for (BIGINT k = 0; k < nk; ++k) { // .... loop over NU targ freqs TF phiHat = phiHatk1[k]; @@ -990,20 +990,18 @@ int FINUFFT_PLAN_T::setpts(BIGINT nj, TF *xj, TF *yj, TF *zj, BIGINT nk, TF t2opts.spread_debug = std::max(0, opts.spread_debug - 1); t2opts.showwarn = 0; // so don't see warnings 2x // (...could vary other t2opts here?) - if (innerT2plan) { - delete innerT2plan; - innerT2plan = nullptr; - } - int ier = finufft_makeplan_t(2, d, t2nmodes, fftSign, batchSize, tol, - &innerT2plan, &t2opts); + // MR: temporary hack, until we have figured out the C++ interface. + FINUFFT_PLAN_T *tmpplan; + int ier = finufft_makeplan_t(2, d, t2nmodes, fftSign, batchSize, tol, &tmpplan, + &t2opts); + innerT2plan.reset(tmpplan); if (ier > 1) { // if merely warning, still proceed fprintf(stderr, "[%s t3]: inner type 2 plan creation failed with ier=%d!\n", __func__, ier); return ier; } - ier = finufft_setpts_t(innerT2plan, nk, Sp.data(), Tp.data(), Up.data(), 0, - nullptr, nullptr, - nullptr); // note nk = # output points (not nj) + ier = innerT2plan->setpts(nk, Sp.data(), Tp.data(), Up.data(), 0, nullptr, nullptr, + nullptr); // note nk = # output points (not nj) if (ier > 1) { fprintf(stderr, "[%s t3]: inner type 2 setpts failed, ier=%d!\n", __func__, ier); return ier; @@ -1013,23 +1011,10 @@ int FINUFFT_PLAN_T::setpts(BIGINT nj, TF *xj, TF *yj, TF *zj, BIGINT nk, TF } return 0; } -template -int finufft_setpts_t(FINUFFT_PLAN_T *p, BIGINT nj, TF *xj, TF *yj, TF *zj, BIGINT nk, - TF *s, TF *t, TF *u) -/* For type 1,2: just checks and (possibly) sorts the NU xyz points, in prep for - spreading. (The last 4 arguments are ignored.) - For type 3: allocates internal working arrays, scales/centers the NU points - and NU target freqs (stu), evaluates spreading kernel FT at all target freqs. -*/ -{ - return p->setpts(nj, xj, yj, zj, nk, s, t, u); -} -template int finufft_setpts_t(FINUFFT_PLAN_T *p, BIGINT nj, float *xj, - float *yj, float *zj, BIGINT nk, float *s, float *t, - float *u); -template int finufft_setpts_t(FINUFFT_PLAN_T *p, BIGINT nj, double *xj, - double *yj, double *zj, BIGINT nk, double *s, - double *t, double *u); +template int FINUFFT_PLAN_T::setpts(BIGINT nj, float *xj, float *yj, float *zj, + BIGINT nk, float *s, float *t, float *u); +template int FINUFFT_PLAN_T::setpts(BIGINT nj, double *xj, double *yj, double *zj, + BIGINT nk, double *s, double *t, double *u); // ............ end setpts .................................................. @@ -1152,7 +1137,7 @@ int FINUFFT_PLAN_T::execute(std::complex *cj, std::complex *fk) { innerT2plan->ntrans = thisBatchSize; // do not try this at home! /* (alarming that FFT not shrunk, but safe, because t2's fwBatch array still the same size, as Andrea explained; just wastes a few flops) */ - finufft_execute_t(innerT2plan, fkb, fwBatch); + innerT2plan->execute(fkb, fwBatch); t_t2 += timer.elapsedsec(); // STEP 3: apply deconvolve (precomputed 1/phiHat(targ_k), phasing too)... timer.restart(); @@ -1176,26 +1161,10 @@ int FINUFFT_PLAN_T::execute(std::complex *cj, std::complex *fk) { return 0; } -template -int finufft_execute_t(FINUFFT_PLAN_T *p, std::complex *cj, std::complex *fk) { - /* See ../docs/cguru.doc for current documentation. - - For given (stack of) weights cj or coefficients fk, performs NUFFTs with - existing (sorted) NU pts and existing plan. - For type 1 and 3: cj is input, fk is output. - For type 2: fk is input, cj is output. - Performs spread/interp, pre/post deconvolve, and FFT as appropriate - for each of the 3 types. - For cases of ntrans>1, performs work in blocks of size up to batchSize. - Return value 0 (no error diagnosis yet). - Barnett 5/20/20, based on Malleo 2019. -*/ - return p->execute(cj, fk); -} -template int finufft_execute_t(FINUFFT_PLAN_T *p, std::complex *cj, - std::complex *fk); -template int finufft_execute_t( - FINUFFT_PLAN_T *p, std::complex *cj, std::complex *fk); +template int FINUFFT_PLAN_T::execute(std::complex *cj, + std::complex *fk); +template int FINUFFT_PLAN_T::execute(std::complex *cj, + std::complex *fk); // DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD template FINUFFT_PLAN_T::~FINUFFT_PLAN_T() { @@ -1203,13 +1172,11 @@ template FINUFFT_PLAN_T::~FINUFFT_PLAN_T() { // Also must not crash if called immediately after finufft_makeplan. // Thus either each thing free'd here is guaranteed to be nullptr or correctly // allocated. - if (fftPlan) fftPlan->free(fwBatch); // free the big FFTW (or t3 spread) working array + if (fftPlan) fftPlan->free(fwBatch); // free the big FFT (or t3 spread) working array if (type == 3) { - delete innerT2plan; - innerT2plan = nullptr; - free(X); - free(Y); - free(Z); + delete[] X; + delete[] Y; + delete[] Z; } } template FINUFFT_PLAN_T::~FINUFFT_PLAN_T(); diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index 7c7309de2..a0341a0b5 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -1,6 +1,5 @@ // Spreading/interpolating module within FINUFFT. -#include #include #include @@ -9,6 +8,7 @@ #include +#include #include #include #include @@ -164,7 +164,7 @@ constexpr std::array, N> pad_2D_array_with_zeros( const std::array, N> &input) noexcept { constexpr auto pad_with_zeros = [](const auto &input) constexpr noexcept { std::array padded{0}; - for (auto i = 0; i < input.size(); ++i) { + for (size_t i = 0; i < input.size(); ++i) { padded[i] = input[i]; } return padded; @@ -187,20 +187,21 @@ FINUFFT_NEVER_INLINE void print_subgrid_info(int ndims, BIGINT offset1, BIGINT offset2, BIGINT offset3, UBIGINT padded_size1, UBIGINT size1, UBIGINT size2, UBIGINT size3, UBIGINT M0) { - printf("size1 %ld, padded_size1 %ld\n", size1, padded_size1); + printf("size1 %" PRIu64 ", padded_size1 %" PRIu64 "\n", size1, padded_size1); switch (ndims) { case 1: - printf("\tsubgrid: off %lld\t siz %lld\t #NU %lld\n", (long long)offset1, - (long long)padded_size1, (long long)M0); + printf("\tsubgrid: off %" PRId64 "\t siz %" PRIu64 "\t #NU %" PRIu64 "\n", offset1, + padded_size1, M0); break; case 2: - printf("\tsubgrid: off %lld,%lld\t siz %lld,%lld\t #NU %lld\n", (long long)offset1, - (long long)offset2, (long long)padded_size1, (long long)size2, (long long)M0); + printf("\tsubgrid: off %" PRId64 ",%" PRId64 "\t siz %" PRIu64 ",%" PRIu64 + "\t #NU %" PRIu64 "\n", + offset1, offset2, padded_size1, size2, M0); break; case 3: - printf("\tsubgrid: off %lld,%lld,%lld\t siz %lld,%lld,%lld\t #NU %lld\n", - (long long)offset1, (long long)offset2, (long long)offset3, - (long long)padded_size1, (long long)size2, (long long)size3, (long long)M0); + printf("\tsubgrid: off %" PRId64 ",%" PRId64 ",%" PRId64 "\t siz %" PRIu64 ",%" PRIu64 + ",%" PRIu64 "\t #NU %" PRIu64 "\n", + offset1, offset2, offset3, padded_size1, size2, size3, M0); break; default: printf("Invalid number of dimensions: %d\n", ndims); @@ -223,13 +224,6 @@ static FINUFFT_ALWAYS_INLINE T fold_rescale(const T x, const UBIGINT N) noexcept return (result - floor(result)) * T(N); } -template -static FINUFFT_ALWAYS_INLINE simd_type fold_rescale(const simd_type &x, - const BIGINT N) noexcept { - const simd_type x2pi = T(M_1_2PI); - const simd_type result = xsimd::fma(x, x2pi, simd_type(0.5)); - return (result - xsimd::floor(result)) * simd_type(T(N)); -} template static void set_kernel_args(T *args, T x) noexcept // Fills vector args[] with kernel arguments x, x+1, ..., x+ns-1. @@ -400,7 +394,7 @@ static void interp_line_wrap(T *FINUFFT_RESTRICT target, const T *du, const T *k out[0] = std::fma(du[2 * j], ker[dx], out[0]); out[1] = std::fma(du[2 * j + 1], ker[dx], out[1]); } - } else if (i1 + ns >= N1) { // wraps at right + } else if (i1 + ns >= BIGINT(N1)) { // wraps at right for (uint8_t dx = 0; dx < N1 - i1; ++dx, ++j) { out[0] = std::fma(du[2 * j], ker[dx], out[0]); out[1] = std::fma(du[2 * j + 1], ker[dx], out[1]); @@ -446,14 +440,13 @@ static void interp_line(T *FINUFFT_RESTRICT target, const T *du, const T *ker, B */ using arch_t = typename simd_type::arch_type; static constexpr auto padding = get_padding(); - static constexpr auto alignment = arch_t::alignment(); static constexpr auto simd_size = simd_type::size; static constexpr auto regular_part = (2 * ns + padding) & (-(2 * simd_size)); std::array out{0}; const auto j = i1; // removing the wrapping leads up to 10% speedup in certain cases // moved the wrapping to another function to reduce instruction cache pressure - if (i1 < 0 || i1 + ns >= N1 || i1 + ns + (padding + 1) / 2 >= N1) { + if (i1 < 0 || i1 + ns >= BIGINT(N1) || i1 + ns + (padding + 1) / 2 >= BIGINT(N1)) { return interp_line_wrap(target, du, ker, i1, N1); } else { // doesn't wrap // logic largely similar to spread 1D kernel, please see the explanation there @@ -519,7 +512,7 @@ static void interp_square_wrap(T *FINUFFT_RESTRICT target, const T *du, const T std::array out{0}; using arch_t = typename simd_type::arch_type; static constexpr auto alignment = arch_t::alignment(); - if (i1 >= 0 && i1 + ns <= N1 && i2 >= 0 && i2 + ns <= N2) { + if (i1 >= 0 && i1 + ns <= BIGINT(N1) && i2 >= 0 && i2 + ns <= BIGINT(N2)) { // store a horiz line (interleaved real,imag) alignas(alignment) std::array line{0}; // add remaining const-y lines to the line (expensive inner loop) @@ -539,10 +532,10 @@ static void interp_square_wrap(T *FINUFFT_RESTRICT target, const T *du, const T auto x = i1, y = i2; // initialize coords for (uint8_t d{0}; d < ns; d++) { // set up ptr lists if (x < 0) x += BIGINT(N1); - if (x >= N1) x -= BIGINT(N1); + if (x >= BIGINT(N1)) x -= BIGINT(N1); j1[d] = x++; if (y < 0) y += BIGINT(N2); - if (y >= N2) y -= BIGINT(N2); + if (y >= BIGINT(N2)) y -= BIGINT(N2); j2[d] = y++; } for (uint8_t dy{0}; dy < ns; dy++) { // use the pts lists @@ -598,11 +591,10 @@ static void interp_square(T *FINUFFT_RESTRICT target, const T *du, const T *ker1 // no wrapping: avoid ptrs using arch_t = typename simd_type::arch_type; static constexpr auto padding = get_padding(); - static constexpr auto alignment = arch_t::alignment(); static constexpr auto simd_size = simd_type::size; static constexpr uint8_t line_vectors = (2 * ns + padding) / simd_size; - if (i1 >= 0 && i1 + ns <= N1 && i2 >= 0 && i2 + ns <= N2 && - (i1 + ns + (padding + 1) / 2 < N1)) { + if (i1 >= 0 && i1 + ns <= BIGINT(N1) && i2 >= 0 && i2 + ns <= BIGINT(N2) && + (i1 + ns + (padding + 1) / 2 < BIGINT(N1))) { const auto line = [du, N1, i1 = UBIGINT(i1), i2 = UBIGINT(i2), ker2]() constexpr noexcept { // new array du_pts to store the du values for the current y line @@ -666,9 +658,9 @@ static void interp_cube_wrapped(T *FINUFFT_RESTRICT target, const T *du, const T */ using arch_t = typename simd_type::arch_type; static constexpr auto alignment = arch_t::alignment(); - const auto in_bounds_1 = (i1 >= 0) & (i1 + ns <= N1); - const auto in_bounds_2 = (i2 >= 0) & (i2 + ns <= N2); - const auto in_bounds_3 = (i3 >= 0) & (i3 + ns <= N3); + const auto in_bounds_1 = (i1 >= 0) & (i1 + ns <= BIGINT(N1)); + const auto in_bounds_2 = (i2 >= 0) & (i2 + ns <= BIGINT(N2)); + const auto in_bounds_3 = (i3 >= 0) & (i3 + ns <= BIGINT(N3)); std::array out{0}; // case no wrapping needed but padding spills over du array. // Hence, no explicit vectorization but the code is still faster @@ -701,13 +693,13 @@ static void interp_cube_wrapped(T *FINUFFT_RESTRICT target, const T *du, const T auto x = i1, y = i2, z = i3; // initialize coords for (uint8_t d{0}; d < ns; d++) { // set up ptr lists if (x < 0) x += BIGINT(N1); - if (x >= N1) x -= BIGINT(N1); + if (x >= BIGINT(N1)) x -= BIGINT(N1); j1[d] = x++; if (y < 0) y += BIGINT(N2); - if (y >= N2) y -= BIGINT(N2); + if (y >= BIGINT(N2)) y -= BIGINT(N2); j2[d] = y++; if (z < 0) z += BIGINT(N3); - if (z >= N3) z -= BIGINT(N3); + if (z >= BIGINT(N3)) z -= BIGINT(N3); j3[d] = z++; } for (uint8_t dz{0}; dz < ns; dz++) { // use the pts lists @@ -763,15 +755,14 @@ static void interp_cube(T *FINUFFT_RESTRICT target, const T *du, const T *ker1, { using arch_t = typename simd_type::arch_type; static constexpr auto padding = get_padding(); - static constexpr auto alignment = arch_t::alignment(); static constexpr auto simd_size = simd_type::size; - static constexpr auto ker23_size = (ns + simd_size - 1) & -simd_size; static constexpr uint8_t line_vectors = (2 * ns + padding) / simd_size; - const auto in_bounds_1 = (i1 >= 0) & (i1 + ns <= N1); - const auto in_bounds_2 = (i2 >= 0) & (i2 + ns <= N2); - const auto in_bounds_3 = (i3 >= 0) & (i3 + ns <= N3); + const auto in_bounds_1 = (i1 >= 0) & (i1 + ns <= BIGINT(N1)); + const auto in_bounds_2 = (i2 >= 0) & (i2 + ns <= BIGINT(N2)); + const auto in_bounds_3 = (i3 >= 0) & (i3 + ns <= BIGINT(N3)); std::array out{0}; - if (in_bounds_1 && in_bounds_2 && in_bounds_3 && (i1 + ns + (padding + 1) / 2 < N1)) { + if (in_bounds_1 && in_bounds_2 && in_bounds_3 && + (i1 + ns + (padding + 1) / 2 < BIGINT(N1))) { const auto line = [N1, N2, i1 = UBIGINT(i1), i2 = UBIGINT(i2), i3 = UBIGINT(i3), ker2, ker3, du]() constexpr noexcept { std::array line{0}; @@ -844,7 +835,7 @@ static FINUFFT_ALWAYS_INLINE auto ker_eval( */ const std::array inputs{elems...}; // compile time loop, no performance overhead - for (auto i = 0; i < sizeof...(elems); ++i) { + for (size_t i = 0; i < sizeof...(elems); ++i) { // compile time branch no performance overhead if constexpr (kerevalmeth == 1) { if (opts.upsampfac == 2.0) { @@ -1365,27 +1356,27 @@ static void add_wrapped_subgrid(BIGINT offset1, BIGINT offset2, BIGINT offset3, }; BIGINT y = offset2, z = offset3; // fill wrapped ptr lists in slower dims y,z... - for (int i = 0; i < size2; ++i) { + for (UBIGINT i = 0; i < size2; ++i) { if (y < 0) y += BIGINT(N2); - if (y >= N2) y -= BIGINT(N2); + if (y >= BIGINT(N2)) y -= BIGINT(N2); o2[i] = y++; } - for (int i = 0; i < size3; ++i) { + for (UBIGINT i = 0; i < size3; ++i) { if (z < 0) z += BIGINT(N3); - if (z >= N3) z -= BIGINT(N3); + if (z >= BIGINT(N3)) z -= BIGINT(N3); o3[i] = z++; } UBIGINT nlo = (offset1 < 0) ? -offset1 : 0; // # wrapping below in x UBIGINT nhi = (offset1 + size1 > N1) ? offset1 + size1 - N1 : 0; // " above in x // this triple loop works in all dims - for (int dz = 0; dz < size3; dz++) { // use ptr lists in each axis + for (UBIGINT dz = 0; dz < size3; dz++) { // use ptr lists in each axis const auto oz = N1 * N2 * o3[dz]; // offset due to z (0 in <3D) - for (int dy = 0; dy < size2; dy++) { + for (UBIGINT dy = 0; dy < size2; dy++) { const auto oy = N1 * o2[dy] + oz; // off due to y & z (0 in 1D) auto *FINUFFT_RESTRICT out = data_uniform + 2 * oy; const auto in = du0 + 2 * padded_size1 * (dy + size2 * dz); // ptr to subgrid array auto o = 2 * (offset1 + N1); // 1d offset for output - for (auto j = 0; j < 2 * nlo; j++) { // j is really dx/2 (since re,im parts) + for (UBIGINT j = 0; j < 2 * nlo; j++) { // j is really dx/2 (since re,im parts) accumulate(out[j + o], in[j]); } o = 2 * offset1; @@ -1447,7 +1438,7 @@ static void bin_sort_singlethread(std::vector &ret, UBIGINT M, const T * // count how many pts in each bin std::vector counts(nbins, 0); - for (auto i = 0; i < M; i++) { + for (UBIGINT i = 0; i < M; i++) { // find the bin index in however many dims are needed const auto i1 = BIGINT(fold_rescale(kx[i], N1) * inv_bin_size_x); const auto i2 = isky ? BIGINT(fold_rescale(ky[i], N2) * inv_bin_size_y) : 0; @@ -1464,7 +1455,7 @@ static void bin_sort_singlethread(std::vector &ret, UBIGINT M, const T * current_offset += tmp; } // (counts now contains the index offsets for each bin) - for (auto i = 0; i < M; i++) { + for (UBIGINT i = 0; i < M; i++) { // find the bin index (again! but better than using RAM) const auto i1 = BIGINT(fold_rescale(kx[i], N1) * inv_bin_size_x); const auto i2 = isky ? BIGINT(fold_rescale(ky[i], N2) * inv_bin_size_y) : 0; @@ -1726,7 +1717,7 @@ int spreadcheck(UBIGINT N1, UBIGINT N2, UBIGINT N3, UBIGINT M, T *kx, T *ky, T * */ { // INPUT CHECKING & REPORTING .... cuboid not too small for spreading? - int minN = 2 * opts.nspread; + UBIGINT minN = UBIGINT(2 * opts.nspread); if (N1 < minN || (N2 > 1 && N2 < minN) || (N3 > 1 && N3 < minN)) { fprintf(stderr, "%s error: one or more non-trivial box dims is less than 2.nspread!\n", @@ -1815,8 +1806,8 @@ int indexSort(std::vector &sort_indices, UBIGINT N1, UBIGINT N2, UBIGINT did_sort = 1; } else { #pragma omp parallel for num_threads(maxnthr) schedule(static, 1000000) - for (BIGINT i = 0; i < M; i++) // here omp helps xeon, hinders i7 - sort_indices[i] = i; // the identity permutation + for (BIGINT i = 0; i < BIGINT(M); i++) // here omp helps xeon, hinders i7 + sort_indices[i] = i; // the identity permutation if (opts.debug) printf("\tnot sorted (sort=%d): \t%.3g s\n", (int)opts.sort, timer.elapsedsec()); } @@ -1887,21 +1878,22 @@ static int spreadSorted(const std::vector &sort_indices, UBIGINT N1, UBI printf("\tnthr big: switching add_wrapped OMP from critical to atomic (!)\n"); std::vector brk(nb + 1); // NU index breakpoints defining nb subproblems - for (int p = 0; p <= nb; ++p) brk[p] = (M * p + nb - 1) / nb; + for (UBIGINT p = 0; p <= nb; ++p) brk[p] = (M * p + nb - 1) / nb; #pragma omp parallel num_threads(nthr) { // local copies of NU pts and data for each subproblem std::vector kx0{}, ky0{}, kz0{}, dd0{}, du0{}; -#pragma omp for schedule(dynamic, 1) // each is big - for (int isub = 0; isub < nb; isub++) { // Main loop through the subproblems - const auto M0 = brk[isub + 1] - brk[isub]; // # NU pts in this subproblem +#pragma omp for schedule(dynamic, 1) // each is big + for (BIGINT isub = 0; isub < BIGINT(nb); isub++) { // Main loop through the + // subproblems + const auto M0 = brk[isub + 1] - brk[isub]; // # NU pts in this subproblem // copy the location and data vectors for the nonuniform points kx0.resize(M0); ky0.resize(M0 * (N2 > 1)); kz0.resize(M0 * (N3 > 1)); dd0.resize(2 * M0); // complex strength data - for (auto j = 0; j < M0; j++) { // todo: can avoid this copying? + for (UBIGINT j = 0; j < M0; j++) { // todo: can avoid this copying? const auto kk = sort_indices[j + brk[isub]]; // NU pt from subprob index list kx0[j] = fold_rescale(kx[kk], N1); if (N2 > 1) ky0[j] = fold_rescale(ky[kk], N2); @@ -1950,7 +1942,8 @@ static int spreadSorted(const std::vector &sort_indices, UBIGINT N1, UBI } // end main loop over subprobs } if (opts.debug) - printf("\tt1 fancy spread: \t%.3g s (%ld subprobs)\n", timer.elapsedsec(), nb); + printf("\tt1 fancy spread: \t%.3g s (%" PRIu64 " subprobs)\n", timer.elapsedsec(), + nb); } // end of choice of which t1 spread type to use return 0; }; @@ -1998,10 +1991,10 @@ FINUFFT_NEVER_INLINE static int interpSorted_kernel( // main loop over NU trgs, interp each from U // (note: windows omp doesn't like unsigned loop vars) #pragma omp for schedule(dynamic, 1000) // assign threads to NU targ pts: - for (BIGINT i = 0; i < M; i += CHUNKSIZE) { + for (BIGINT i = 0; i < BIGINT(M); i += CHUNKSIZE) { // Setup buffers for this chunk const UBIGINT bufsize = (i + CHUNKSIZE > M) ? M - i : CHUNKSIZE; - for (int ibuf = 0; ibuf < bufsize; ibuf++) { + for (UBIGINT ibuf = 0; ibuf < bufsize; ibuf++) { UBIGINT j = sort_indices[i + ibuf]; jlist[ibuf] = j; xjlist[ibuf] = fold_rescale(kx[j], N1); @@ -2010,7 +2003,7 @@ FINUFFT_NEVER_INLINE static int interpSorted_kernel( } // Loop over targets in chunk - for (int ibuf = 0; ibuf < bufsize; ibuf++) { + for (UBIGINT ibuf = 0; ibuf < bufsize; ibuf++) { const auto xj = xjlist[ibuf]; const auto yj = (ndims > 1) ? yjlist[ibuf] : 0; const auto zj = (ndims > 2) ? zjlist[ibuf] : 0; @@ -2052,7 +2045,7 @@ FINUFFT_NEVER_INLINE static int interpSorted_kernel( } // end loop over targets in chunk // Copy result buffer to output array - for (int ibuf = 0; ibuf < bufsize; ibuf++) { + for (UBIGINT ibuf = 0; ibuf < bufsize; ibuf++) { const UBIGINT j = jlist[ibuf]; data_nonuniform[2 * j] = outbuf[2 * ibuf]; data_nonuniform[2 * j + 1] = outbuf[2 * ibuf + 1]; @@ -2154,8 +2147,9 @@ FINUFFT_EXPORT int FINUFFT_CDECL setup_spreader(finufft_spread_opts &opts, T eps opts. dim is spatial dimension (1,2, or 3). See finufft.cpp:finufft_plan() for where upsampfac is set. Must call this before any kernel evals done, otherwise segfault likely. Returns: 0 : success FINUFFT_WARN_EPS_TOO_SMALL : requested eps cannot be - achieved, but proceed with best possible eps otherwise : failure (see codes in defs.h); - spreading must not proceed Barnett 2017. debug, loosened eps logic 6/14/20. + achieved, but proceed with best possible eps otherwise : failure (see codes in + finufft_errors.h); spreading must not proceed Barnett 2017. debug, loosened eps logic + 6/14/20. */ { constexpr T EPSILON = std::numeric_limits::epsilon(); @@ -2206,7 +2200,7 @@ FINUFFT_EXPORT int FINUFFT_CDECL setup_spreader(finufft_spread_opts &opts, T eps if (upsampfac == 2.0) // standard sigma (see SISC paper) ns = std::ceil(-log10(eps / (T)10.0)); // 1 digit per power of 10 else // custom sigma - ns = std::ceil(-log(eps) / (PI * sqrt(1.0 - 1.0 / upsampfac))); // formula, gam=1 + ns = std::ceil(-log(eps) / (T(M_PI) * sqrt(1.0 - 1.0 / upsampfac))); // formula, gam=1 ns = max(2, ns); // (we don't have ns=1 version yet) if (ns > MAX_NSPREAD) { // clip to fit allocated arrays, Horner rules if (showwarn) @@ -2228,7 +2222,8 @@ FINUFFT_EXPORT int FINUFFT_CDECL setup_spreader(finufft_spread_opts &opts, T eps if (ns == 4) betaoverns = 2.38; if (upsampfac != 2.0) { // again, override beta for custom sigma T gamma = 0.97; // must match devel/gen_all_horner_C_code.m ! - betaoverns = gamma * PI * (1.0 - 1.0 / (2 * upsampfac)); // formula based on cutoff + betaoverns = gamma * T(M_PI) * (1.0 - 1.0 / (2 * upsampfac)); // formula based on + // cutoff } opts.ES_beta = betaoverns * ns; // set the kernel beta parameter if (debug) diff --git a/src/utils.cpp b/src/utils.cpp index f64009132..2627a179a 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -73,7 +73,7 @@ int get_num_threads_parallel_block() } // ---------- thread-safe rand number generator for Windows platform --------- -// (note this is used by macros in defs.h, and supplied in linux/macosx) +// (note this is used by macros in test_defs.h, and supplied in linux/macosx) #ifdef _WIN32 int rand_r(unsigned int * /*seedp*/) // Libin Lu, 6/18/20 diff --git a/test/directft/dirft1d.cpp b/test/directft/dirft1d.cpp index 5f36d76d7..c80299b47 100644 --- a/test/directft/dirft1d.cpp +++ b/test/directft/dirft1d.cpp @@ -1,11 +1,13 @@ -#include #include +#include #include // This is basically a port of dirft1d.f from CMCL package, except with // the 1/nj prefactors for type-1 removed. -void dirft1d1(BIGINT nj, FLT *x, CPX *c, int iflag, BIGINT ms, CPX *f) +template +void dirft1d1(BIGINT nj, T *x, std::complex *c, int iflag, BIGINT ms, + std::complex *f) /* Direct computation of 1D type-1 nonuniform FFT. Interface same as finufft1d1. c nj-1 c f[k1] = SUM c[j] exp(+-i k1 x[j]) @@ -17,12 +19,14 @@ c used, otherwise the - sign is used, in the exponential. * Uses C++ complex type and winding trick. Barnett 1/25/17 */ { - BIGINT kmin = -(ms / 2); // integer divide - for (BIGINT m = 0; m < ms; ++m) f[m] = CPX(0, 0); // it knows f is complex type + BIGINT kmin = -(ms / 2); // integer divide + for (BIGINT m = 0; m < ms; ++m) + f[m] = std::complex(0, 0); // it knows f is complex type for (BIGINT j = 0; j < nj; ++j) { - CPX a = (iflag > 0) ? exp(IMA * x[j]) : exp(-IMA * x[j]); - CPX p = pow(a, (FLT)kmin); // starting phase for most neg freq - CPX cc = c[j]; // no 1/nj prefac + std::complex a = (iflag > 0) ? exp(std::complex(0, 1) * x[j]) + : exp(-std::complex(0, 1) * x[j]); + std::complex p = pow(a, (T)kmin); // starting phase for most neg freq + std::complex cc = c[j]; // no 1/nj prefac for (BIGINT m = 0; m < ms; ++m) { f[m] += cc * p; p *= a; @@ -30,7 +34,9 @@ c used, otherwise the - sign is used, in the exponential. } } -void dirft1d2(BIGINT nj, FLT *x, CPX *c, int iflag, BIGINT ms, CPX *f) +template +void dirft1d2(BIGINT nj, T *x, std::complex *c, int iflag, BIGINT ms, + std::complex *f) /* Direct computation of 1D type-2 nonuniform FFT. Interface same as finufft1d2 c c c[j] = SUM f[k1] exp(+-i k1 x[j]) @@ -44,9 +50,10 @@ c used, otherwise the - sign is used, in the exponential. { BIGINT kmin = -(ms / 2); // integer divide for (BIGINT j = 0; j < nj; ++j) { - CPX a = (iflag > 0) ? exp(IMA * x[j]) : exp(-IMA * x[j]); - CPX p = pow(a, (FLT)kmin); // starting phase for most neg freq - CPX cc = CPX(0, 0); + std::complex a = (iflag > 0) ? exp(std::complex(0, 1) * x[j]) + : exp(-std::complex(0, 1) * x[j]); + std::complex p = pow(a, (T)kmin); // starting phase for most neg freq + std::complex cc = std::complex(0, 0); for (BIGINT m = 0; m < ms; ++m) { cc += f[m] * p; p *= a; @@ -55,7 +62,9 @@ c used, otherwise the - sign is used, in the exponential. } } -void dirft1d3(BIGINT nj, FLT *x, CPX *c, int iflag, BIGINT nk, FLT *s, CPX *f) +template +void dirft1d3(BIGINT nj, T *x, std::complex *c, int iflag, BIGINT nk, T *s, + std::complex *f) /* Direct computation of 1D type-3 nonuniform FFT. Interface same as finufft1d3 c nj-1 c f[k] = SUM c[j] exp(+-i s[k] x[j]) @@ -66,8 +75,9 @@ c exponential. Uses C++ complex type. Simple brute force. Barnett 1/25/17 */ { for (BIGINT k = 0; k < nk; ++k) { - CPX ss = (iflag > 0) ? IMA * s[k] : -IMA * s[k]; - f[k] = CPX(0, 0); + std::complex ss = + (iflag > 0) ? std::complex(0, 1) * s[k] : -std::complex(0, 1) * s[k]; + f[k] = std::complex(0, 0); for (BIGINT j = 0; j < nj; ++j) f[k] += c[j] * exp(ss * x[j]); } } diff --git a/test/directft/dirft2d.cpp b/test/directft/dirft2d.cpp index c13661549..62f126c15 100644 --- a/test/directft/dirft2d.cpp +++ b/test/directft/dirft2d.cpp @@ -1,11 +1,13 @@ -#include #include +#include #include // This is basically a port of dirft2d.f from CMCL package, except with // the 1/nj prefactors for type-1 removed. -void dirft2d1(BIGINT nj, FLT *x, FLT *y, CPX *c, int iflag, BIGINT ms, BIGINT mt, CPX *f) +template +void dirft2d1(BIGINT nj, T *x, T *y, std::complex *c, int iflag, BIGINT ms, BIGINT mt, + std::complex *f) /* Direct computation of 2D type-1 nonuniform FFT. Interface same as finufft2d1. c nj-1 c f[k1,k2] = SUM c[j] exp(+-i (k1 x[j] + k2 y[j])) @@ -18,19 +20,22 @@ c used, otherwise the - sign is used, in the exponential. * Uses C++ complex type and winding trick. Barnett 1/26/17 */ { - BIGINT k1min = -(ms / 2), k2min = -(mt / 2); // integer divide - BIGINT N = ms * mt; // total # output modes - for (BIGINT m = 0; m < N; ++m) f[m] = CPX(0, 0); // it knows f is complex type - for (BIGINT j = 0; j < nj; ++j) { // src pts - CPX a1 = (iflag > 0) ? exp(IMA * x[j]) : exp(-IMA * x[j]); - CPX a2 = (iflag > 0) ? exp(IMA * y[j]) : exp(-IMA * y[j]); - CPX sp1 = pow(a1, (FLT)k1min); // starting phase for most neg k1 freq - CPX p2 = pow(a2, (FLT)k2min); - CPX cc = c[j]; // no 1/nj norm - BIGINT m = 0; // output pointer + BIGINT k1min = -(ms / 2), k2min = -(mt / 2); // integer divide + BIGINT N = ms * mt; // total # output modes + for (BIGINT m = 0; m < N; ++m) + f[m] = std::complex(0, 0); // it knows f is complex type + for (BIGINT j = 0; j < nj; ++j) { // src pts + std::complex a1 = (iflag > 0) ? exp(std::complex(0, 1) * x[j]) + : exp(-std::complex(0, 1) * x[j]); + std::complex a2 = (iflag > 0) ? exp(std::complex(0, 1) * y[j]) + : exp(-std::complex(0, 1) * y[j]); + std::complex sp1 = pow(a1, (T)k1min); // starting phase for most neg k1 freq + std::complex p2 = pow(a2, (T)k2min); + std::complex cc = c[j]; // no 1/nj norm + BIGINT m = 0; // output pointer for (BIGINT m2 = 0; m2 < mt; ++m2) { - CPX p1 = sp1; // must reset p1 for each inner loop - for (BIGINT m1 = 0; m1 < ms; ++m1) { // ms is fast, mt slow + std::complex p1 = sp1; // must reset p1 for each inner loop + for (BIGINT m1 = 0; m1 < ms; ++m1) { // ms is fast, mt slow f[m++] += cc * p1 * p2; p1 *= a1; } @@ -39,7 +44,9 @@ c used, otherwise the - sign is used, in the exponential. } } -void dirft2d2(BIGINT nj, FLT *x, FLT *y, CPX *c, int iflag, BIGINT ms, BIGINT mt, CPX *f) +template +void dirft2d2(BIGINT nj, T *x, T *y, std::complex *c, int iflag, BIGINT ms, BIGINT mt, + std::complex *f) /* Direct computation of 2D type-2 nonuniform FFT. Interface same as finufft2d2 c[j] = SUM f[k1,k2] exp(+-i (k1 x[j] + k2 y[j])) @@ -56,14 +63,16 @@ void dirft2d2(BIGINT nj, FLT *x, FLT *y, CPX *c, int iflag, BIGINT ms, BIGINT mt { BIGINT k1min = -(ms / 2), k2min = -(mt / 2); // integer divide for (BIGINT j = 0; j < nj; ++j) { - CPX a1 = (iflag > 0) ? exp(IMA * x[j]) : exp(-IMA * x[j]); - CPX a2 = (iflag > 0) ? exp(IMA * y[j]) : exp(-IMA * y[j]); - CPX sp1 = pow(a1, (FLT)k1min); - CPX p2 = pow(a2, (FLT)k2min); - CPX cc = CPX(0, 0); - BIGINT m = 0; // input pointer + std::complex a1 = (iflag > 0) ? exp(std::complex(0, 1) * x[j]) + : exp(-std::complex(0, 1) * x[j]); + std::complex a2 = (iflag > 0) ? exp(std::complex(0, 1) * y[j]) + : exp(-std::complex(0, 1) * y[j]); + std::complex sp1 = pow(a1, (T)k1min); + std::complex p2 = pow(a2, (T)k2min); + std::complex cc = std::complex(0, 0); + BIGINT m = 0; // input pointer for (BIGINT m2 = 0; m2 < mt; ++m2) { - CPX p1 = sp1; + std::complex p1 = sp1; for (BIGINT m1 = 0; m1 < ms; ++m1) { cc += f[m++] * p1 * p2; p1 *= a1; @@ -74,8 +83,9 @@ void dirft2d2(BIGINT nj, FLT *x, FLT *y, CPX *c, int iflag, BIGINT ms, BIGINT mt } } -void dirft2d3(BIGINT nj, FLT *x, FLT *y, CPX *c, int iflag, BIGINT nk, FLT *s, FLT *t, - CPX *f) +template +void dirft2d3(BIGINT nj, T *x, T *y, std::complex *c, int iflag, BIGINT nk, T *s, T *t, + std::complex *f) /* Direct computation of 2D type-3 nonuniform FFT. Interface same as finufft2d3 c nj-1 c f[k] = SUM c[j] exp(+-i (s[k] x[j] + t[k] y[j])) @@ -86,9 +96,11 @@ c exponential. Uses C++ complex type. Simple brute force. Barnett 1/26/17 */ { for (BIGINT k = 0; k < nk; ++k) { - CPX ss = (iflag > 0) ? IMA * s[k] : -IMA * s[k]; - CPX tt = (iflag > 0) ? IMA * t[k] : -IMA * t[k]; - f[k] = CPX(0, 0); + std::complex ss = + (iflag > 0) ? std::complex(0, 1) * s[k] : -std::complex(0, 1) * s[k]; + std::complex tt = + (iflag > 0) ? std::complex(0, 1) * t[k] : -std::complex(0, 1) * t[k]; + f[k] = std::complex(0, 0); for (BIGINT j = 0; j < nj; ++j) f[k] += c[j] * exp(ss * x[j] + tt * y[j]); } } diff --git a/test/directft/dirft3d.cpp b/test/directft/dirft3d.cpp index 452b62471..b77111257 100644 --- a/test/directft/dirft3d.cpp +++ b/test/directft/dirft3d.cpp @@ -1,12 +1,13 @@ -#include #include +#include #include // This is basically a port of dirft2d.f from CMCL package, except with // the 1/nj prefactors for type-1 removed. -void dirft3d1(BIGINT nj, FLT *x, FLT *y, FLT *z, CPX *c, int iflag, BIGINT ms, BIGINT mt, - BIGINT mu, CPX *f) +template +void dirft3d1(BIGINT nj, T *x, T *y, T *z, std::complex *c, int iflag, BIGINT ms, + BIGINT mt, BIGINT mu, std::complex *f) /* Direct computation of 3D type-1 nonuniform FFT. Interface same as finufft3d1. c nj-1 c f[k1,k2,k3] = SUM c[j] exp(+-i (k1 x[j] + k2 y[j] + k2 z[j])) @@ -22,20 +23,24 @@ c used, otherwise the - sign is used, in the exponential. { BIGINT k1min = -(ms / 2), k2min = -(mt / 2), k3min = -(mu / 2); // integer divide BIGINT N = ms * mt * mu; // total # output modes - for (BIGINT m = 0; m < N; ++m) f[m] = CPX(0, 0); // it knows f is complex type - for (BIGINT j = 0; j < nj; ++j) { // src pts - CPX a1 = (iflag > 0) ? exp(IMA * x[j]) : exp(-IMA * x[j]); - CPX a2 = (iflag > 0) ? exp(IMA * y[j]) : exp(-IMA * y[j]); - CPX a3 = (iflag > 0) ? exp(IMA * z[j]) : exp(-IMA * z[j]); - CPX sp1 = pow(a1, (FLT)k1min); // starting phase for most neg k1 freq - CPX sp2 = pow(a2, (FLT)k2min); - CPX p3 = pow(a3, (FLT)k3min); - CPX cc = c[j]; // no 1/nj norm - BIGINT m = 0; // output pointer + for (BIGINT m = 0; m < N; ++m) + f[m] = std::complex(0, 0); // it knows f is complex type + for (BIGINT j = 0; j < nj; ++j) { // src pts + std::complex a1 = (iflag > 0) ? exp(std::complex(0, 1) * x[j]) + : exp(-std::complex(0, 1) * x[j]); + std::complex a2 = (iflag > 0) ? exp(std::complex(0, 1) * y[j]) + : exp(-std::complex(0, 1) * y[j]); + std::complex a3 = (iflag > 0) ? exp(std::complex(0, 1) * z[j]) + : exp(-std::complex(0, 1) * z[j]); + std::complex sp1 = pow(a1, (T)k1min); // starting phase for most neg k1 freq + std::complex sp2 = pow(a2, (T)k2min); + std::complex p3 = pow(a3, (T)k3min); + std::complex cc = c[j]; // no 1/nj norm + BIGINT m = 0; // output pointer for (BIGINT m3 = 0; m3 < mu; ++m3) { - CPX p2 = sp2; + std::complex p2 = sp2; for (BIGINT m2 = 0; m2 < mt; ++m2) { - CPX p1 = sp1; + std::complex p1 = sp1; for (BIGINT m1 = 0; m1 < ms; ++m1) { f[m++] += cc * p1 * p2 * p3; p1 *= a1; @@ -47,8 +52,9 @@ c used, otherwise the - sign is used, in the exponential. } } -void dirft3d2(BIGINT nj, FLT *x, FLT *y, FLT *z, CPX *c, int iflag, BIGINT ms, BIGINT mt, - BIGINT mu, CPX *f) +template +void dirft3d2(BIGINT nj, T *x, T *y, T *z, std::complex *c, int iflag, BIGINT ms, + BIGINT mt, BIGINT mu, std::complex *f) /* Direct computation of 3D type-2 nonuniform FFT. Interface same as finufft3d2 c[j] = SUM f[k1,k2,k3] exp(+-i (k1 x[j] + k2 y[j] + k3 z[j])) @@ -66,18 +72,21 @@ void dirft3d2(BIGINT nj, FLT *x, FLT *y, FLT *z, CPX *c, int iflag, BIGINT ms, B { BIGINT k1min = -(ms / 2), k2min = -(mt / 2), k3min = -(mu / 2); // integer divide for (BIGINT j = 0; j < nj; ++j) { - CPX a1 = (iflag > 0) ? exp(IMA * x[j]) : exp(-IMA * x[j]); - CPX a2 = (iflag > 0) ? exp(IMA * y[j]) : exp(-IMA * y[j]); - CPX a3 = (iflag > 0) ? exp(IMA * z[j]) : exp(-IMA * z[j]); - CPX sp1 = pow(a1, (FLT)k1min); - CPX sp2 = pow(a2, (FLT)k2min); - CPX p3 = pow(a3, (FLT)k3min); - CPX cc = CPX(0, 0); - BIGINT m = 0; // input pointer + std::complex a1 = (iflag > 0) ? exp(std::complex(0, 1) * x[j]) + : exp(-std::complex(0, 1) * x[j]); + std::complex a2 = (iflag > 0) ? exp(std::complex(0, 1) * y[j]) + : exp(-std::complex(0, 1) * y[j]); + std::complex a3 = (iflag > 0) ? exp(std::complex(0, 1) * z[j]) + : exp(-std::complex(0, 1) * z[j]); + std::complex sp1 = pow(a1, (T)k1min); + std::complex sp2 = pow(a2, (T)k2min); + std::complex p3 = pow(a3, (T)k3min); + std::complex cc = std::complex(0, 0); + BIGINT m = 0; // input pointer for (BIGINT m3 = 0; m3 < mu; ++m3) { - CPX p2 = sp2; + std::complex p2 = sp2; for (BIGINT m2 = 0; m2 < mt; ++m2) { - CPX p1 = sp1; + std::complex p1 = sp1; for (BIGINT m1 = 0; m1 < ms; ++m1) { cc += f[m++] * p1 * p2 * p3; p1 *= a1; @@ -90,8 +99,9 @@ void dirft3d2(BIGINT nj, FLT *x, FLT *y, FLT *z, CPX *c, int iflag, BIGINT ms, B } } -void dirft3d3(BIGINT nj, FLT *x, FLT *y, FLT *z, CPX *c, int iflag, BIGINT nk, FLT *s, - FLT *t, FLT *u, CPX *f) +template +void dirft3d3(BIGINT nj, T *x, T *y, T *z, std::complex *c, int iflag, BIGINT nk, T *s, + T *t, T *u, std::complex *f) /* Direct computation of 3D type-3 nonuniform FFT. Interface same as finufft3d3 c nj-1 c f[k] = SUM c[j] exp(+-i (s[k] x[j] + t[k] y[j] + u[k] z[j])) @@ -102,10 +112,13 @@ c exponential. Uses C++ complex type. Simple brute force. Barnett 2/1/17 */ { for (BIGINT k = 0; k < nk; ++k) { - CPX ss = (iflag > 0) ? IMA * s[k] : -IMA * s[k]; - CPX tt = (iflag > 0) ? IMA * t[k] : -IMA * t[k]; - CPX uu = (iflag > 0) ? IMA * u[k] : -IMA * u[k]; - f[k] = CPX(0, 0); + std::complex ss = + (iflag > 0) ? std::complex(0, 1) * s[k] : -std::complex(0, 1) * s[k]; + std::complex tt = + (iflag > 0) ? std::complex(0, 1) * t[k] : -std::complex(0, 1) * t[k]; + std::complex uu = + (iflag > 0) ? std::complex(0, 1) * u[k] : -std::complex(0, 1) * u[k]; + f[k] = std::complex(0, 0); for (BIGINT j = 0; j < nj; ++j) f[k] += c[j] * exp(ss * x[j] + tt * y[j] + uu * z[j]); } } diff --git a/test/dumbinputs.cpp b/test/dumbinputs.cpp index 5e74d6d30..e182cfe5f 100644 --- a/test/dumbinputs.cpp +++ b/test/dumbinputs.cpp @@ -103,7 +103,7 @@ int main(int argc, char *argv[]) { printf("1d1 M<0:\twrong err code %d\n", ier); return 1; } - int64_t Mhuge = (int64_t)(1e16); // cf defs.h MAX_NU_PTS + int64_t Mhuge = (int64_t)(1e16); // cf finufft_core.h MAX_NU_PTS ier = FINUFFT1D1(Mhuge, x, c, +1, acc, 0, F, &opts); if (ier != FINUFFT_ERR_NUM_NU_PTS_INVALID) { printf("1d1 M huge:\twrong err code %d\n", ier);