Merge branch 'master' into flipwind

flatironinstitute · Sep 10, 2024 · 2dc001a · 2dc001a
2 parents 754d5b6 + 37c497a
commit 2dc001a
Show file tree

Hide file tree

Showing 93 changed files with 1,228 additions and 186 deletions.
diff --git a/.github/workflows/python_build_wheels.yml b/.github/workflows/python_build_wheels.yml
@@ -18,15 +18,22 @@ jobs:
         with:
           package-dir: 'python/finufft'
         env:
-          CIBW_BEFORE_ALL_MACOS: brew install gcc@14 fftw
+          CIBW_BEFORE_ALL_MACOS: |
+            # In order to reinstall a version of GCC compatible with older versions of macOS, we need to first uninstall the existing version.
+            brew uninstall gcc
+            pkg=$(brew fetch --force --bottle-tag=monterey gcc | grep 'Downloaded to' | cut -d' ' -f3)
+            brew install $pkg
+
+            pkg=$(brew fetch --force --bottle-tag=monterey fftw | grep 'Downloaded to' | cut -d' ' -f3)
+            brew install $pkg
           CIBW_ARCHS_MACOS: "x86_64"
           # Need following versions of GCC for compatibility with fftw
           # installed by homebrew. Similarly, we set the macOS version
           # for compatibility with those libraries.
           CIBW_ENVIRONMENT_MACOS: >
             CC=gcc-14
             CXX=g++-14
-            MACOSX_DEPLOYMENT_TARGET=13
+            MACOSX_DEPLOYMENT_TARGET=12
 
       - uses: actions/upload-artifact@v4
         with:
@@ -46,18 +53,18 @@ jobs:
           package-dir: 'python/finufft'
         env:
           CIBW_ARCHS_MACOS: "arm64"
-          # Make sure to install the ARM64-specific versions of FFTW and GCC.
-          # Perhaps this is done automatically on the macos-14 image. We should
-          # look into this further.
           CIBW_BEFORE_ALL_MACOS: |
-            pkg=$(brew fetch --force --bottle-tag=arm64_ventura fftw | grep 'Downloaded to' | cut -d' ' -f3)
+            # In order to reinstall a version of GCC compatible with older versions of macOS, we need to first uninstall the existing version.
+            brew uninstall gcc
+            pkg=$(brew fetch --force --bottle-tag=arm64_monterey gcc | grep 'Downloaded to' | cut -d' ' -f3)
             brew install $pkg
-            pkg=$(brew fetch --force --bottle-tag=arm64_ventura gcc | grep 'Downloaded to' | cut -d' ' -f3)
+
+            pkg=$(brew fetch --force --bottle-tag=arm64_monterey fftw | grep 'Downloaded to' | cut -d' ' -f3)
             brew install $pkg
           CIBW_ENVIRONMENT_MACOS: >
             CC=gcc-14
             CXX=g++-14
-            MACOSX_DEPLOYMENT_TARGET=14
+            MACOSX_DEPLOYMENT_TARGET=12
 
       - uses: actions/upload-artifact@v4
         with:
@@ -85,11 +92,6 @@ jobs:
         uses: pypa/[email protected]
         with:
           package-dir: 'python/finufft'
-        env:
-          # This is required to force cmake to avoid using MSVC (the default).
-          # By setting the generator to Ninja, cmake will pick gcc (mingw64)
-          # as the compiler.
-          CIBW_CONFIG_SETTINGS: "cmake.args='-G Ninja'"
 
       - uses: actions/upload-artifact@v4
         with:

diff --git a/CHANGELOG b/CHANGELOG
@@ -1,10 +1,12 @@
 List of features / changes made / release notes, in reverse chronological order.
 If not stated, FINUFFT is assumed (cuFINUFFT <=1.3 is listed separately).
 
+Master (9/10/24)
+
 * reduced roundoff error in a[n] phase calc in CPU onedim_fseries_kernel().
    #534 (Barnett).
 
-V 2.3.0-rc1 (8/6/24)
+V 2.3.0 (9/5/24)
 
 * Switched C++ standards from C++14 to C++17, allowing various templating
   improvements (Barbone).
@@ -75,6 +77,9 @@ V 2.3.0-rc1 (8/6/24)
   test/finufft?d_test.cpp to reduce CI fails due to random numbers on some
   platforms in single-prec (with DUCC, etc). (Barnett PR516)
 * fix GPU segfault due to stream deletion as pointer not value (Barbone PR520)
+* new performance-tracking doc page comparing releases (Barbone) #527
+* fix various Py 3.8 wheel and numpy distutils logging issues #549 #545
+* Cmake option to control -fPIC in static build; default now ON (as v2.2) #551
 
 V 2.2.0 (12/12/23)
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -19,6 +19,7 @@ option(FINUFFT_USE_OPENMP "Whether to use OpenMP for parallelization. If disable
 option(FINUFFT_USE_CPU "Whether to build the ordinary FINUFFT library (libfinufft)." ON)
 option(FINUFFT_USE_CUDA "Whether to build CUDA accelerated FINUFFT library (libcufinufft). This is completely independent of the main FINUFFT library" OFF)
 option(FINUFFT_STATIC_LINKING "If ON builds the static finufft library, if OFF build a shared finufft library." ON)
+option(FINUFFT_POSITION_INDEPENDENT_CODE "Whether to build the finufft library with position independent code (-fPIC). This forced ON when FINUFFT_SHARED_LINKING is ON." ON)
 option(FINUFFT_BUILD_DEVEL "Whether to build development executables" OFF)
 option(FINUFFT_BUILD_EXAMPLES "Whether to build the FINUFFT examples" OFF)
 option(FINUFFT_BUILD_TESTS "Whether to build the FINUFFT tests" OFF)
@@ -37,6 +38,11 @@ cmake_dependent_option(FINUFFT_STATIC_LINKING "Disable static libraries in the c
 cmake_dependent_option(FINUFFT_SHARED_LINKING "Shared should be the opposite of static linking" ON "NOT FINUFFT_STATIC_LINKING" OFF)
 # cmake-format: on
 
+# When building shared libraries, we need to build with -fPIC in all cases
+if(FINUFFT_SHARED_LINKING)
+  set(FINUFFT_POSITION_INDEPENDENT_CODE ON)
+endif()
+
 include(cmake/utils.cmake)
 
 set(FINUFFT_CXX_FLAGS_RELEASE
@@ -117,7 +123,13 @@ endif()
 # double precision The single precision compilation is done with -DSINGLE
 set(FINUFFT_PRECISION_DEPENDENT_SOURCES
     src/finufft.cpp src/fft.cpp src/simpleinterfaces.cpp src/spreadinterp.cpp
-    src/utils.cpp fortran/finufftfort.cpp)
+    src/utils.cpp)
+
+# If we're building for Fortran, make sure we also include the translation
+# layer.
+if(FINUFFT_BUILD_FORTRAN)
+  list(APPEND FINUFFT_PRECISION_DEPENDENT_SOURCES fortran/finufftfort.cpp)
+endif()
 
 # set linker flags for sanitizer
 set(FINUFFT_SANITIZER_FLAGS)
@@ -137,7 +149,7 @@ function(enable_asan target)
   endif()
 endfunction()
 
-set(CPM_DOWNLOAD_VERSION 0.40.0)
+set(CPM_DOWNLOAD_VERSION 0.40.2)
 include(cmake/setupCPM.cmake)
 
 if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
@@ -225,7 +237,7 @@ function(set_finufft_options target)
   set_target_properties(
     ${target}
     PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
-               POSITION_INDEPENDENT_CODE ${FINUFFT_SHARED_LINKING})
+               POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE})
   enable_asan(${target})
   if(FINUFFT_USE_OPENMP)
     target_link_libraries(${target} PRIVATE OpenMP::OpenMP_CXX)
@@ -280,13 +292,17 @@ if(FINUFFT_USE_CPU)
 endif()
 
 if(FINUFFT_USE_CUDA)
-  if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
-    message(
-      "FINUFFT WARNING: No CUDA architecture supplied via '-DCMAKE_CUDA_ARCHITECTURES=...', defaulting to 'native'"
-    )
-    message(
-      "See: https://developer.nvidia.com/cuda-gpus for more details on what architecture to supply."
-    )
+  if(NOT DEFINED FINUFFT_CUDA_ARCHITECTURES)
+    if(DEFINED CMAKE_CUDA_ARCHITECTURES)
+      set(FINUFFT_CUDA_ARCHITECTURES "{$CMAKE_CUDA_ARCHITECTURES}")
+    else()
+      message(
+        "FINUFFT WARNING: No CUDA architecture supplied via '-DFINUFFT_CUDA_ARCHITECTURES=...', defaulting to 'native'"
+      )
+      message(
+        "See: https://developer.nvidia.com/cuda-gpus for more details on what architecture to supply."
+      )
+    endif()
   endif()
   enable_language(CUDA)
   find_package(CUDAToolkit REQUIRED)
@@ -332,12 +348,14 @@ if(FINUFFT_BUILD_PYTHON)
   add_subdirectory(python)
 endif()
 
-message(STATUS " CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
+# cmake-format: off
 message(STATUS "FINUFFT configuration summary:")
+message(STATUS "  CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
 message(STATUS "  FINUFFT_USE_CPU: ${FINUFFT_USE_CPU}")
 message(STATUS "  FINUFFT_USE_CUDA: ${FINUFFT_USE_CUDA}")
 message(STATUS "  FINUFFT_USE_OPENMP: ${FINUFFT_USE_OPENMP}")
 message(STATUS "  FINUFFT_STATIC_LINKING: ${FINUFFT_STATIC_LINKING}")
+message(STATUS "  FINUFFT_POSITION_INDEPENDENT_CODE: ${FINUFFT_POSITION_INDEPENDENT_CODE}")
 message(STATUS "  FINUFFT_ENABLE_INSTALL: ${FINUFFT_ENABLE_INSTALL}")
 message(STATUS "  FINUFFT_BUILD_EXAMPLES: ${FINUFFT_BUILD_EXAMPLES}")
 message(STATUS "  FINUFFT_BUILD_TESTS: ${FINUFFT_BUILD_TESTS}")
@@ -349,7 +367,7 @@ message(STATUS "  FINUFFT_FFTW_SUFFIX: ${FINUFFT_FFTW_SUFFIX}")
 message(STATUS "  FINUFFT_FFTW_LIBRARIES: ${FINUFFT_FFTW_LIBRARIES}")
 message(STATUS "  FINUFFT_ARCH_FLAGS: ${FINUFFT_ARCH_FLAGS}")
 message(STATUS "  FINUFFT_USE_DUCC0: ${FINUFFT_USE_DUCC0}")
-
+# cmake-format: on
 if(FINUFFT_ENABLE_INSTALL)
   include(GNUInstallDirs)
   install(TARGETS ${INSTALL_TARGETS} PUBLIC_HEADER)

diff --git a/Jenkinsfile b/Jenkinsfile
@@ -59,7 +59,7 @@ pipeline {
       source $HOME/bin/activate
       python3 -m pip install --no-cache-dir --upgrade pycuda cupy-cuda112 numba
       python3 -m pip install --no-cache-dir torch==1.12.1+cu113 -f https://download.pytorch.org/whl/torch_stable.html
-      python3 -m pip install --no-cache-dir pytest
+      python3 -m pip install --no-cache-dir pytest pytest-mock
       python -c "from numba import cuda; cuda.cudadrv.libs.test()"
       python3 -m pytest --framework=pycuda python/cufinufft
       python3 -m pytest --framework=numba python/cufinufft

diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
-Copyright (C) 2017-2023 The Simons Foundation, Inc. - All Rights Reserved.
+Copyright (C) 2017-2024 The Simons Foundation, Inc. - All Rights Reserved.
 
-Lead developer: Alex H. Barnett; see docs/ackn.rst for other contributors.
+See docs/ackn.rst for the list of code authors and contributors.
 
 ------
 
@@ -29,16 +29,22 @@ tutorial/utils/lgwt.m
 
 If you find this library useful, or it helps you in creating software
 or publications, please let us know, and acknowledge that fact by citing our
-repository:
+source repository:
 
   https://github.com/flatironinstitute/finufft
 
-and the corresponding journal articles (particularly the first):
+and the corresponding journal articles (particularly the first for the CPU
+and/or the last for the GPU):
 
   A parallel non-uniform fast Fourier transform library based on an
   ``exponential of semicircle'' kernel. A. H. Barnett, J. F. Magland,
   and L. af Klinteberg.  SIAM J. Sci. Comput. 41(5), C479-C504 (2019).
 
-  Aliasing error of the exp$(\beta \sqrt{1-z^2})$ kernel in the
+  Aliasing error of the $\exp (\beta \sqrt{1-z^2})$ kernel in the
   nonuniform fast Fourier transform. A. H. Barnett,
   Appl. Comput. Harmon. Anal. 51, 1-16 (2021).
+
+  cuFINUFFT: a load-balanced GPU library for general-purpose nonuniform FFTs,
+  Yu-hsuan Shih, Garrett Wright, Joakim Andén, Johannes Blaschke, and
+  Alex H. Barnett. PDSEC2021 workshop of the IPDPS2021 conference.
+  https://arxiv.org/abs/2102.08463
diff --git a/cmake/setupDUCC.cmake b/cmake/setupDUCC.cmake
@@ -29,7 +29,7 @@ if(ducc0_ADDED)
   set_target_properties(
     ducc0
     PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
-               POSITION_INDEPENDENT_CODE ${FINUFFT_SHARED_LINKING})
+               POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE})
   check_cxx_compiler_flag(-ffast-math HAS_FAST_MATH)
   if(HAS_FAST_MATH)
     target_compile_options(ducc0 PRIVATE -ffast-math)

diff --git a/cmake/setupFFTW.cmake b/cmake/setupFFTW.cmake
@@ -72,7 +72,8 @@ if(FINUFFT_FFTW_LIBRARIES STREQUAL DEFAULT OR FINUFFT_FFTW_LIBRARIES STREQUAL
       set_target_properties(
         ${element}
         PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
-                   POSITION_INDEPENDENT_CODE ${FINUFFT_SHARED_LINKING})
+                   POSITION_INDEPENDENT_CODE
+                   ${FINUFFT_POSITION_INDEPENDENT_CODE})
     endforeach()
 
     target_include_directories(

diff --git a/docs/conf.py b/docs/conf.py
@@ -74,9 +74,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = u'2.3-rc1'
+version = u'2.3'
 # The full version, including alpha/beta/rc tags.
-release = u'2.3.0-rc1'
+release = u'2.3.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/docs/devnotes.rst b/docs/devnotes.rst
@@ -5,7 +5,9 @@ Developer notes
 
 * Developers needing to update/regenerate documentation in general, including our readthedocs website, see ``docs/README``. Developers changing MATLAB/octave interfaces or docs, also see ``matlab/README``. Please also see ``contributing.md`` for code style and git hook guidelines.
 
-* To update the version number, this needs to be done by hand in the following places:
+* FINUFFT is by default built with position-independent code (``-fPIC`` compile flag), by both the Makefile and CMake. See CMake options to change this for the static library. Developers changing the FINUFFT source code should use ``static`` functions unless they need to export the symbol; this will prevent performance degradation when using ``-fPIC``.
+
+* To update the version number, this needs to be done by hand in the following places (we decided that a version-bump script is not worth the hassle):
 
   - ``CMakeLists.txt`` for cmake
   - ``docs/conf.py`` for sphinx
@@ -23,7 +25,8 @@ Developer notes
 
 * If you add a new option field (recall it must be plain C style only, no special types) to ``include/finufft_opts.h``, don't forget to add it to ``include/finufft.fh``, ``include/finufft_mod.f90``, ``matlab/finufft.mw``, ``python/finufft/_finufft.py``, and the Julia interface, as well a paragraph describing its use in the docs. Also to set its default value in ``src/finufft.cpp``. You will then need to regenerate the docs as in ``docs/README``.
 
-* For testing and performance measuring routines see ``test/README`` and ``perftest/README``. We need more of the latter, eg, something making performance graphs that enable rapid eyeball comparison of various settings/machines. Marco is working on that.
+* For basic testing and performance measuring routines see ``test/README`` and ``perftest/README``.
+  To generate sets of performance graphs that enable rapid eyeball comparison between releases, see  the :ref:`performance page <performance>` with graphs currently generated by ``perftest/bench.py``.
 
 * The kernel function in spreadinterp is evaluated via piecewise-polynomial approximation (Horner's rule). The code for this is auto-generated in MATLAB, for all upsampling factors. There are two versions supported:
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -13,23 +13,24 @@ Flatiron Institute Nonuniform Fast Fourier Transform
 
 
 Documentation contents
-========================             
-             
+========================
+
 .. toctree::
    :maxdepth: 3
 
    install
    install_gpu
    dirs
    math
-   cex           
+   cex
    c
    c_gpu
    opts
    error
    trouble
+   performance
    tut
-   fortran          
+   fortran
    matlab
    python
    python_gpu
@@ -42,5 +43,3 @@ Documentation contents
    users
    ackn
    refs
-
-