diff --git a/CHANGELOG b/CHANGELOG index 01107e1bf..dc1e57e14 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,8 @@ List of features / changes made / release notes, in reverse chronological order. If not stated, FINUFFT is assumed (cuFINUFFT <=1.3 is listed separately). -V 2.3.1 (10/8/24) support release + +V 2.3.1 (11/25/24, minor support release) * Support and docs for opts.gpu_spreadinterponly=1 for MRI "density compensation estimation" type 1&2 use-case with upsampfac=1.0 PR564 (Chaithya G R). diff --git a/CMakeLists.txt b/CMakeLists.txt index 423c8adc4..baf8dfd93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.19...3.30) project( FINUFFT - VERSION 2.3.0 + VERSION 2.3.1 LANGUAGES C CXX) # windows MSVC runtime flags policy diff --git a/docs/conf.py b/docs/conf.py index 8b3d3f6af..eeec02b44 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -76,7 +76,7 @@ # The short X.Y version. version = u'2.3' # The full version, including alpha/beta/rc tags. -release = u'2.3.0' +release = u'2.3.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/install.rst b/docs/install.rst index 8f2b4eca4..e8aac8086 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -51,13 +51,13 @@ Then add the following to your ``CMakeLists.txt``: .. code-block:: cmake # short version - CPMAddPackage("gh:flatironinstitute/finufft@2.3.0") + CPMAddPackage("gh:flatironinstitute/finufft@2.3.1") # alternative in case custom options are needed CPMAddPackage( NAME Finufft GIT_REPOSITORY https://github.com/flatironinstitute/finufft.git - GIT_TAG 2.3.0 + GIT_TAG 2.3.1 GIT_SHALLOW Yes GIT_PROGRESS Yes EXCLUDE_FROM_ALL Yes @@ -79,7 +79,7 @@ Add the following to your ``CMakeLists.txt``: FetchContent_Declare( finufft GIT_REPOSITORY https://github.com/flatironinstitute/finufft.git - GIT_TAG 2.3.0 + GIT_TAG 2.3.1 ) # Make the content available diff --git a/include/finufft/defs.h b/include/finufft/defs.h index 7184bdda8..c6e3c74e3 100644 --- a/include/finufft/defs.h +++ b/include/finufft/defs.h @@ -65,7 +65,7 @@ // ------------- Library-wide algorithm parameter settings ---------------- // Library version (is a string) -#define FINUFFT_VER "2.3.0" +#define FINUFFT_VER "2.3.1" // Smallest possible kernel spread width per dimension, in fine grid points // (used only in spreadinterp.cpp) diff --git a/include/finufft/finufft_core.h b/include/finufft/finufft_core.h new file mode 100644 index 000000000..92237b707 --- /dev/null +++ b/include/finufft/finufft_core.h @@ -0,0 +1,217 @@ +#ifndef FINUFFT_CORE_H +#define FINUFFT_CORE_H + +/* IMPORTANT: for Windows compilers, you should add a line + #define FINUFFT_DLL + here if you are compiling/using FINUFFT as a DLL, + in order to do the proper importing/exporting, or + alternatively compile with -DFINUFFT_DLL or the equivalent + command-line flag. This is not necessary under MinGW/Cygwin, where + libtool does the imports/exports automatically. + Alternatively use include(GenerateExportHeader) and + generate_export_header(finufft) to auto generate an header containing + these defines.The main reason is that if msvc changes the way it deals + with it in the future we just need to update cmake for it to work + instead of having a check on the msvc version. */ +#if defined(FINUFFT_DLL) && (defined(_WIN32) || defined(__WIN32__)) +#if defined(dll_EXPORTS) +#define FINUFFT_EXPORT __declspec(dllexport) +#else +#define FINUFFT_EXPORT __declspec(dllimport) +#endif +#else +#define FINUFFT_EXPORT +#endif + +/* specify calling convention (Windows only) + The cdecl calling convention is actually not the default in all but a very + few C/C++ compilers. + If the user code changes the default compiler calling convention, may need + this when generating DLL. */ +#if defined(_WIN32) || defined(__WIN32__) +#define FINUFFT_CDECL __cdecl +#else +#define FINUFFT_CDECL +#endif + +// inline macro, to force inlining of small functions +// this avoids the use of macros to implement functions +#if defined(_MSC_VER) +#define FINUFFT_ALWAYS_INLINE __forceinline inline +#define FINUFFT_NEVER_INLINE __declspec(noinline) +#define FINUFFT_RESTRICT __restrict +#define FINUFFT_UNREACHABLE __assume(0) +#define FINUFFT_UNLIKELY(x) (x) +#define FINUFFT_LIKELY(x) (x) +#elif defined(__GNUC__) || defined(__clang__) +#define FINUFFT_ALWAYS_INLINE __attribute__((always_inline)) inline +#define FINUFFT_NEVER_INLINE __attribute__((noinline)) +#define FINUFFT_RESTRICT __restrict__ +#define FINUFFT_UNREACHABLE __builtin_unreachable() +#define FINUFFT_UNLIKELY(x) __builtin_expect(!!(x), 0) +#define FINUFFT_LIKELY(x) __builtin_expect(!!(x), 1) +#else +#define FINUFFT_ALWAYS_INLINE inline +#define FINUFFT_NEVER_INLINE +#define FINUFFT_RESTRICT +#define FINUFFT_UNREACHABLE +#define FINUFFT_UNLIKELY(x) (x) +#define FINUFFT_LIKELY(x) (x) +#endif + +#include +#include +#include + +// All indexing in library that potentially can exceed 2^31 uses 64-bit signed. +// This includes all calling arguments (eg M,N) that could be huge someday. +using BIGINT = int64_t; +using UBIGINT = uint64_t; + +// ------------- Library-wide algorithm parameter settings ---------------- + +// Library version (is a string) +#define FINUFFT_VER "2.3.1" + +// Smallest possible kernel spread width per dimension, in fine grid points +// (used only in spreadinterp.cpp) +inline constexpr int MIN_NSPREAD = 2; + +// Largest possible kernel spread width per dimension, in fine grid points +// (used only in spreadinterp.cpp) +inline constexpr int MAX_NSPREAD = 16; + +// Fraction growth cut-off in utils:arraywidcen, sets when translate in type-3 +inline constexpr double ARRAYWIDCEN_GROWFRAC = 0.1; + +// Max number of positive quadr nodes for kernel FT (used only in common.cpp) +inline constexpr int MAX_NQUAD = 100; + +// Internal (nf1 etc) array allocation size that immediately raises error. +// (Note: next235 takes 1s for 1e11, so it is also to prevent hang here.) +// Increase this if you need >10TB (!) RAM... +inline constexpr BIGINT MAX_NF = BIGINT(1e12); + +// Maximum allowed number M of NU points; useful to catch incorrectly cast int32 +// values for M = nj (also nk in type 3)... +inline constexpr BIGINT MAX_NU_PTS = BIGINT(1e14); + +// We define our own PI here because M_PI is not actually part of standard C++ +inline constexpr double PI = 3.14159265358979329; +inline constexpr double INV_2PI = 0.159154943091895336; + +// ----- OpenMP macros which also work when omp not present ----- +// Allows compile-time switch off of openmp, so compilation without any openmp +// is done (Note: _OPENMP is automatically set by -fopenmp compile flag) +#ifdef _OPENMP +#include +// point to actual omp utils +static inline int MY_OMP_GET_NUM_THREADS [[maybe_unused]] () { + return omp_get_num_threads(); +} +static inline int MY_OMP_GET_MAX_THREADS [[maybe_unused]] () { + return omp_get_max_threads(); +} +static inline int MY_OMP_GET_THREAD_NUM [[maybe_unused]] () { + return omp_get_thread_num(); +} +static inline void MY_OMP_SET_NUM_THREADS [[maybe_unused]] (int x) { + omp_set_num_threads(x); +} +#else +// non-omp safe dummy versions of omp utils... +static inline int MY_OMP_GET_NUM_THREADS [[maybe_unused]] () { return 1; } +static inline int MY_OMP_GET_MAX_THREADS [[maybe_unused]] () { return 1; } +static inline int MY_OMP_GET_THREAD_NUM [[maybe_unused]] () { return 0; } +static inline void MY_OMP_SET_NUM_THREADS [[maybe_unused]] (int) {} +#endif + +#include // (must come after complex.h) +#include +#include + +// group together a bunch of type 3 rescaling/centering/phasing parameters: +template struct type3params { + T X1, C1, D1, h1, gam1; // x dim: X=halfwid C=center D=freqcen h,gam=rescale + T X2, C2, D2, h2, gam2; // y + T X3, C3, D3, h3, gam3; // z +}; + +template struct FINUFFT_PLAN_T { // the main plan class, fully C++ + + using TC = std::complex; + + // These default and delete specifications just state the obvious, + // but are here to silence compiler warnings. + FINUFFT_PLAN_T(int type, int dim, const BIGINT *n_modes, int iflag, int ntrans, TF tol, + finufft_opts *opts, int &ier); + // Copy construction and assignent are already deleted implicitly + // because of the unique_ptr member. + FINUFFT_PLAN_T(const FINUFFT_PLAN_T &) = delete; + FINUFFT_PLAN_T &operator=(const FINUFFT_PLAN_T &) = delete; + ~FINUFFT_PLAN_T(); + + int type; // transform type (Rokhlin naming): 1,2 or 3 + int dim; // overall dimension: 1,2 or 3 + int ntrans; // how many transforms to do at once (vector or "many" mode) + BIGINT nj; // num of NU pts in type 1,2 (for type 3, num input x pts) + BIGINT nk; // number of NU freq pts (type 3 only) + TF tol; // relative user tolerance + int batchSize; // # strength vectors to group together for FFTW, etc + int nbatch; // how many batches done to cover all ntrans vectors + + BIGINT ms; // number of modes in x (1) dir (historical CMCL name) = N1 + BIGINT mt; // number of modes in y (2) direction = N2 + BIGINT mu; // number of modes in z (3) direction = N3 + BIGINT N; // total # modes (prod of above three) + + BIGINT nf1 = 1; // size of internal fine grid in x (1) direction + BIGINT nf2 = 1; // " y (2) + BIGINT nf3 = 1; // " z (3) + BIGINT nf = 1; // total # fine grid points (product of the above three) + + int fftSign; // sign in exponential for NUFFT defn, guaranteed to be +-1 + + std::vector phiHat1; // FT of kernel in t1,2, on x-axis mode grid + std::vector phiHat2; // " y-axis. + std::vector phiHat3; // " z-axis. + + // fwBatch: (batches of) fine working grid(s) for the FFT to plan & act on. + // Usually the largest internal array. Its allocator is 64-byte (cache-line) aligned: + std::vector> fwBatch; + + std::vector sortIndices; // precomputed NU pt permutation, speeds spread/interp + bool didSort; // whether binsorting used (false: identity perm used) + + // for t1,2: ptr to user-supplied NU pts (no new allocs). + // for t3: will become ptr to internally allocated "primed" (scaled) Xp, Yp, Zp vecs. + TF *X = nullptr, *Y = nullptr, *Z = nullptr; + + // type 3 specific + TF *S = nullptr, *T = nullptr, *U = nullptr; // ptrs to user's target NU-point arrays + // (no new allocs) + std::vector prephase; // pre-phase, for all input NU pts + std::vector deconv; // reciprocal of kernel FT, phase, all output NU pts + std::vector CpBatch; // working array of prephased strengths + std::vector Xp, Yp, Zp; // internal primed NU points (x'_j, etc) + std::vector Sp, Tp, Up; // internal primed targs (s'_k, etc) + type3params t3P; // groups together type 3 shift, scale, phase, parameters + std::unique_ptr> innerT2plan; // ptr used for type 2 in step 2 of + // type 3 + + // other internal structs + std::unique_ptr> fftPlan; + finufft_opts opts; // this and spopts could be made ptrs + finufft_spread_opts spopts; + + // Remaining actions (not create/delete) in guru interface are now methods... + int setpts(BIGINT nj, TF *xj, TF *yj, TF *zj, BIGINT nk, TF *s, TF *t, TF *u); + int execute(std::complex *cj, std::complex *fk); +}; + +void finufft_default_opts_t(finufft_opts *o); +template +int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int ntrans, + TF tol, FINUFFT_PLAN_T **pp, finufft_opts *opts); + +#endif // FINUFFT_CORE_H diff --git a/matlab/Contents.m b/matlab/Contents.m index e10052856..30ab0a67b 100644 --- a/matlab/Contents.m +++ b/matlab/Contents.m @@ -1,5 +1,5 @@ % FINUFFT: Flatiron Institute Nonuniform Fast Fourier Transform -% Version 2.3.0 +% Version 2.3.1 % % Basic and many-vector interfaces % finufft1d1 - 1D complex nonuniform FFT of type 1 (nonuniform to uniform). diff --git a/python/cufinufft/cufinufft/__init__.py b/python/cufinufft/cufinufft/__init__.py index bf3664015..02daefa49 100644 --- a/python/cufinufft/cufinufft/__init__.py +++ b/python/cufinufft/cufinufft/__init__.py @@ -8,4 +8,4 @@ "nufft3d1", "nufft3d2", "Plan"] -__version__ = '2.3.0' +__version__ = '2.3.1' diff --git a/python/finufft/finufft/__init__.py b/python/finufft/finufft/__init__.py index def34313b..260ea2e2e 100644 --- a/python/finufft/finufft/__init__.py +++ b/python/finufft/finufft/__init__.py @@ -17,4 +17,4 @@ from finufft._interfaces import nufft2d1,nufft2d2,nufft2d3 from finufft._interfaces import nufft3d1,nufft3d2,nufft3d3 -__version__ = '2.3.0' +__version__ = '2.3.1'