Skip to content

Commit

Permalink
Merge branch 'master' into flipwind
Browse files Browse the repository at this point in the history
  • Loading branch information
ahbarnett authored Sep 10, 2024
2 parents 754d5b6 + 37c497a commit 2dc001a
Show file tree
Hide file tree
Showing 93 changed files with 1,228 additions and 186 deletions.
28 changes: 15 additions & 13 deletions .github/workflows/python_build_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,22 @@ jobs:
with:
package-dir: 'python/finufft'
env:
CIBW_BEFORE_ALL_MACOS: brew install gcc@14 fftw
CIBW_BEFORE_ALL_MACOS: |
# In order to reinstall a version of GCC compatible with older versions of macOS, we need to first uninstall the existing version.
brew uninstall gcc
pkg=$(brew fetch --force --bottle-tag=monterey gcc | grep 'Downloaded to' | cut -d' ' -f3)
brew install $pkg
pkg=$(brew fetch --force --bottle-tag=monterey fftw | grep 'Downloaded to' | cut -d' ' -f3)
brew install $pkg
CIBW_ARCHS_MACOS: "x86_64"
# Need following versions of GCC for compatibility with fftw
# installed by homebrew. Similarly, we set the macOS version
# for compatibility with those libraries.
CIBW_ENVIRONMENT_MACOS: >
CC=gcc-14
CXX=g++-14
MACOSX_DEPLOYMENT_TARGET=13
MACOSX_DEPLOYMENT_TARGET=12
- uses: actions/upload-artifact@v4
with:
Expand All @@ -46,18 +53,18 @@ jobs:
package-dir: 'python/finufft'
env:
CIBW_ARCHS_MACOS: "arm64"
# Make sure to install the ARM64-specific versions of FFTW and GCC.
# Perhaps this is done automatically on the macos-14 image. We should
# look into this further.
CIBW_BEFORE_ALL_MACOS: |
pkg=$(brew fetch --force --bottle-tag=arm64_ventura fftw | grep 'Downloaded to' | cut -d' ' -f3)
# In order to reinstall a version of GCC compatible with older versions of macOS, we need to first uninstall the existing version.
brew uninstall gcc
pkg=$(brew fetch --force --bottle-tag=arm64_monterey gcc | grep 'Downloaded to' | cut -d' ' -f3)
brew install $pkg
pkg=$(brew fetch --force --bottle-tag=arm64_ventura gcc | grep 'Downloaded to' | cut -d' ' -f3)
pkg=$(brew fetch --force --bottle-tag=arm64_monterey fftw | grep 'Downloaded to' | cut -d' ' -f3)
brew install $pkg
CIBW_ENVIRONMENT_MACOS: >
CC=gcc-14
CXX=g++-14
MACOSX_DEPLOYMENT_TARGET=14
MACOSX_DEPLOYMENT_TARGET=12
- uses: actions/upload-artifact@v4
with:
Expand Down Expand Up @@ -85,11 +92,6 @@ jobs:
uses: pypa/[email protected]
with:
package-dir: 'python/finufft'
env:
# This is required to force cmake to avoid using MSVC (the default).
# By setting the generator to Ninja, cmake will pick gcc (mingw64)
# as the compiler.
CIBW_CONFIG_SETTINGS: "cmake.args='-G Ninja'"

- uses: actions/upload-artifact@v4
with:
Expand Down
7 changes: 6 additions & 1 deletion CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
List of features / changes made / release notes, in reverse chronological order.
If not stated, FINUFFT is assumed (cuFINUFFT <=1.3 is listed separately).

Master (9/10/24)

* reduced roundoff error in a[n] phase calc in CPU onedim_fseries_kernel().
#534 (Barnett).

V 2.3.0-rc1 (8/6/24)
V 2.3.0 (9/5/24)

* Switched C++ standards from C++14 to C++17, allowing various templating
improvements (Barbone).
Expand Down Expand Up @@ -75,6 +77,9 @@ V 2.3.0-rc1 (8/6/24)
test/finufft?d_test.cpp to reduce CI fails due to random numbers on some
platforms in single-prec (with DUCC, etc). (Barnett PR516)
* fix GPU segfault due to stream deletion as pointer not value (Barbone PR520)
* new performance-tracking doc page comparing releases (Barbone) #527
* fix various Py 3.8 wheel and numpy distutils logging issues #549 #545
* Cmake option to control -fPIC in static build; default now ON (as v2.2) #551

V 2.2.0 (12/12/23)

Expand Down
42 changes: 30 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ option(FINUFFT_USE_OPENMP "Whether to use OpenMP for parallelization. If disable
option(FINUFFT_USE_CPU "Whether to build the ordinary FINUFFT library (libfinufft)." ON)
option(FINUFFT_USE_CUDA "Whether to build CUDA accelerated FINUFFT library (libcufinufft). This is completely independent of the main FINUFFT library" OFF)
option(FINUFFT_STATIC_LINKING "If ON builds the static finufft library, if OFF build a shared finufft library." ON)
option(FINUFFT_POSITION_INDEPENDENT_CODE "Whether to build the finufft library with position independent code (-fPIC). This forced ON when FINUFFT_SHARED_LINKING is ON." ON)
option(FINUFFT_BUILD_DEVEL "Whether to build development executables" OFF)
option(FINUFFT_BUILD_EXAMPLES "Whether to build the FINUFFT examples" OFF)
option(FINUFFT_BUILD_TESTS "Whether to build the FINUFFT tests" OFF)
Expand All @@ -37,6 +38,11 @@ cmake_dependent_option(FINUFFT_STATIC_LINKING "Disable static libraries in the c
cmake_dependent_option(FINUFFT_SHARED_LINKING "Shared should be the opposite of static linking" ON "NOT FINUFFT_STATIC_LINKING" OFF)
# cmake-format: on

# When building shared libraries, we need to build with -fPIC in all cases
if(FINUFFT_SHARED_LINKING)
set(FINUFFT_POSITION_INDEPENDENT_CODE ON)
endif()

include(cmake/utils.cmake)

set(FINUFFT_CXX_FLAGS_RELEASE
Expand Down Expand Up @@ -117,7 +123,13 @@ endif()
# double precision The single precision compilation is done with -DSINGLE
set(FINUFFT_PRECISION_DEPENDENT_SOURCES
src/finufft.cpp src/fft.cpp src/simpleinterfaces.cpp src/spreadinterp.cpp
src/utils.cpp fortran/finufftfort.cpp)
src/utils.cpp)

# If we're building for Fortran, make sure we also include the translation
# layer.
if(FINUFFT_BUILD_FORTRAN)
list(APPEND FINUFFT_PRECISION_DEPENDENT_SOURCES fortran/finufftfort.cpp)
endif()

# set linker flags for sanitizer
set(FINUFFT_SANITIZER_FLAGS)
Expand All @@ -137,7 +149,7 @@ function(enable_asan target)
endif()
endfunction()

set(CPM_DOWNLOAD_VERSION 0.40.0)
set(CPM_DOWNLOAD_VERSION 0.40.2)
include(cmake/setupCPM.cmake)

if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
Expand Down Expand Up @@ -225,7 +237,7 @@ function(set_finufft_options target)
set_target_properties(
${target}
PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
POSITION_INDEPENDENT_CODE ${FINUFFT_SHARED_LINKING})
POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE})
enable_asan(${target})
if(FINUFFT_USE_OPENMP)
target_link_libraries(${target} PRIVATE OpenMP::OpenMP_CXX)
Expand Down Expand Up @@ -280,13 +292,17 @@ if(FINUFFT_USE_CPU)
endif()

if(FINUFFT_USE_CUDA)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
message(
"FINUFFT WARNING: No CUDA architecture supplied via '-DCMAKE_CUDA_ARCHITECTURES=...', defaulting to 'native'"
)
message(
"See: https://developer.nvidia.com/cuda-gpus for more details on what architecture to supply."
)
if(NOT DEFINED FINUFFT_CUDA_ARCHITECTURES)
if(DEFINED CMAKE_CUDA_ARCHITECTURES)
set(FINUFFT_CUDA_ARCHITECTURES "{$CMAKE_CUDA_ARCHITECTURES}")
else()
message(
"FINUFFT WARNING: No CUDA architecture supplied via '-DFINUFFT_CUDA_ARCHITECTURES=...', defaulting to 'native'"
)
message(
"See: https://developer.nvidia.com/cuda-gpus for more details on what architecture to supply."
)
endif()
endif()
enable_language(CUDA)
find_package(CUDAToolkit REQUIRED)
Expand Down Expand Up @@ -332,12 +348,14 @@ if(FINUFFT_BUILD_PYTHON)
add_subdirectory(python)
endif()

message(STATUS " CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
# cmake-format: off
message(STATUS "FINUFFT configuration summary:")
message(STATUS " CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
message(STATUS " FINUFFT_USE_CPU: ${FINUFFT_USE_CPU}")
message(STATUS " FINUFFT_USE_CUDA: ${FINUFFT_USE_CUDA}")
message(STATUS " FINUFFT_USE_OPENMP: ${FINUFFT_USE_OPENMP}")
message(STATUS " FINUFFT_STATIC_LINKING: ${FINUFFT_STATIC_LINKING}")
message(STATUS " FINUFFT_POSITION_INDEPENDENT_CODE: ${FINUFFT_POSITION_INDEPENDENT_CODE}")
message(STATUS " FINUFFT_ENABLE_INSTALL: ${FINUFFT_ENABLE_INSTALL}")
message(STATUS " FINUFFT_BUILD_EXAMPLES: ${FINUFFT_BUILD_EXAMPLES}")
message(STATUS " FINUFFT_BUILD_TESTS: ${FINUFFT_BUILD_TESTS}")
Expand All @@ -349,7 +367,7 @@ message(STATUS " FINUFFT_FFTW_SUFFIX: ${FINUFFT_FFTW_SUFFIX}")
message(STATUS " FINUFFT_FFTW_LIBRARIES: ${FINUFFT_FFTW_LIBRARIES}")
message(STATUS " FINUFFT_ARCH_FLAGS: ${FINUFFT_ARCH_FLAGS}")
message(STATUS " FINUFFT_USE_DUCC0: ${FINUFFT_USE_DUCC0}")

# cmake-format: on
if(FINUFFT_ENABLE_INSTALL)
include(GNUInstallDirs)
install(TARGETS ${INSTALL_TARGETS} PUBLIC_HEADER)
Expand Down
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ pipeline {
source $HOME/bin/activate
python3 -m pip install --no-cache-dir --upgrade pycuda cupy-cuda112 numba
python3 -m pip install --no-cache-dir torch==1.12.1+cu113 -f https://download.pytorch.org/whl/torch_stable.html
python3 -m pip install --no-cache-dir pytest
python3 -m pip install --no-cache-dir pytest pytest-mock
python -c "from numba import cuda; cuda.cudadrv.libs.test()"
python3 -m pytest --framework=pycuda python/cufinufft
python3 -m pytest --framework=numba python/cufinufft
Expand Down
16 changes: 11 additions & 5 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Copyright (C) 2017-2023 The Simons Foundation, Inc. - All Rights Reserved.
Copyright (C) 2017-2024 The Simons Foundation, Inc. - All Rights Reserved.

Lead developer: Alex H. Barnett; see docs/ackn.rst for other contributors.
See docs/ackn.rst for the list of code authors and contributors.

------

Expand Down Expand Up @@ -29,16 +29,22 @@ tutorial/utils/lgwt.m

If you find this library useful, or it helps you in creating software
or publications, please let us know, and acknowledge that fact by citing our
repository:
source repository:

https://github.com/flatironinstitute/finufft

and the corresponding journal articles (particularly the first):
and the corresponding journal articles (particularly the first for the CPU
and/or the last for the GPU):

A parallel non-uniform fast Fourier transform library based on an
``exponential of semicircle'' kernel. A. H. Barnett, J. F. Magland,
and L. af Klinteberg. SIAM J. Sci. Comput. 41(5), C479-C504 (2019).

Aliasing error of the exp$(\beta \sqrt{1-z^2})$ kernel in the
Aliasing error of the $\exp (\beta \sqrt{1-z^2})$ kernel in the
nonuniform fast Fourier transform. A. H. Barnett,
Appl. Comput. Harmon. Anal. 51, 1-16 (2021).

cuFINUFFT: a load-balanced GPU library for general-purpose nonuniform FFTs,
Yu-hsuan Shih, Garrett Wright, Joakim Andén, Johannes Blaschke, and
Alex H. Barnett. PDSEC2021 workshop of the IPDPS2021 conference.
https://arxiv.org/abs/2102.08463
2 changes: 1 addition & 1 deletion cmake/setupDUCC.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ if(ducc0_ADDED)
set_target_properties(
ducc0
PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
POSITION_INDEPENDENT_CODE ${FINUFFT_SHARED_LINKING})
POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE})
check_cxx_compiler_flag(-ffast-math HAS_FAST_MATH)
if(HAS_FAST_MATH)
target_compile_options(ducc0 PRIVATE -ffast-math)
Expand Down
3 changes: 2 additions & 1 deletion cmake/setupFFTW.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ if(FINUFFT_FFTW_LIBRARIES STREQUAL DEFAULT OR FINUFFT_FFTW_LIBRARIES STREQUAL
set_target_properties(
${element}
PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
POSITION_INDEPENDENT_CODE ${FINUFFT_SHARED_LINKING})
POSITION_INDEPENDENT_CODE
${FINUFFT_POSITION_INDEPENDENT_CODE})
endforeach()

target_include_directories(
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@
# built documents.
#
# The short X.Y version.
version = u'2.3-rc1'
version = u'2.3'
# The full version, including alpha/beta/rc tags.
release = u'2.3.0-rc1'
release = u'2.3.0'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
7 changes: 5 additions & 2 deletions docs/devnotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ Developer notes

* Developers needing to update/regenerate documentation in general, including our readthedocs website, see ``docs/README``. Developers changing MATLAB/octave interfaces or docs, also see ``matlab/README``. Please also see ``contributing.md`` for code style and git hook guidelines.

* To update the version number, this needs to be done by hand in the following places:
* FINUFFT is by default built with position-independent code (``-fPIC`` compile flag), by both the Makefile and CMake. See CMake options to change this for the static library. Developers changing the FINUFFT source code should use ``static`` functions unless they need to export the symbol; this will prevent performance degradation when using ``-fPIC``.

* To update the version number, this needs to be done by hand in the following places (we decided that a version-bump script is not worth the hassle):

- ``CMakeLists.txt`` for cmake
- ``docs/conf.py`` for sphinx
Expand All @@ -23,7 +25,8 @@ Developer notes

* If you add a new option field (recall it must be plain C style only, no special types) to ``include/finufft_opts.h``, don't forget to add it to ``include/finufft.fh``, ``include/finufft_mod.f90``, ``matlab/finufft.mw``, ``python/finufft/_finufft.py``, and the Julia interface, as well a paragraph describing its use in the docs. Also to set its default value in ``src/finufft.cpp``. You will then need to regenerate the docs as in ``docs/README``.

* For testing and performance measuring routines see ``test/README`` and ``perftest/README``. We need more of the latter, eg, something making performance graphs that enable rapid eyeball comparison of various settings/machines. Marco is working on that.
* For basic testing and performance measuring routines see ``test/README`` and ``perftest/README``.
To generate sets of performance graphs that enable rapid eyeball comparison between releases, see the :ref:`performance page <performance>` with graphs currently generated by ``perftest/bench.py``.

* The kernel function in spreadinterp is evaluated via piecewise-polynomial approximation (Horner's rule). The code for this is auto-generated in MATLAB, for all upsampling factors. There are two versions supported:

Expand Down
11 changes: 5 additions & 6 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,24 @@ Flatiron Institute Nonuniform Fast Fourier Transform


Documentation contents
========================
========================

.. toctree::
:maxdepth: 3

install
install_gpu
dirs
math
cex
cex
c
c_gpu
opts
error
trouble
performance
tut
fortran
fortran
matlab
python
python_gpu
Expand All @@ -42,5 +43,3 @@ Documentation contents
users
ackn
refs


Loading

0 comments on commit 2dc001a

Please sign in to comment.