Skip to content

Commit

Permalink
Merge 3.2 release branch into amd-fftw
Browse files Browse the repository at this point in the history
Change-Id: If976175f9a64305ff59bb973fb44568c01802ae2
  • Loading branch information
BiplabRaut committed Jul 8, 2022
2 parents 2b0bbb5 + 7f36218 commit e04beee
Show file tree
Hide file tree
Showing 97 changed files with 2,321 additions and 658 deletions.
89 changes: 73 additions & 16 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ if (ENABLE_AMD_APP_OPT)
endif ()
endif ()

if(ENABLE_AMD_DYNAMIC_DISPATCHER)
message (FATAL_ERROR "AMD Dynamic Dispatcher feature is not available on windows.")
endif()

option (DISABLE_FORTRAN "Disable Fortran wrapper routines" OFF)

if (CMAKE_C_COMPILER_ID MATCHES MSVC OR CMAKE_C_COMPILER_ID MATCHES Clang)
Expand Down Expand Up @@ -243,7 +247,7 @@ if (MSVC)
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
endif(MSVC)

add_compile_definitions(AOCL_FFTW_VERSION="AOCL FFTW 3.1")
add_compile_definitions(AOCL_FFTW_VERSION="AOCL-FFTW-3.2")

find_library (LIBM_LIBRARY NAMES m)
if (LIBM_LIBRARY)
Expand Down Expand Up @@ -338,6 +342,7 @@ else()
if (ENABLE_SSE)
foreach (FLAG "-msse" "/arch:SSE")
unset (HAVE_SSE CACHE)
unset (HAVE_SSE)
check_c_compiler_flag (${FLAG} HAVE_SSE)
if (HAVE_SSE)
set (SSE_FLAG ${FLAG})
Expand All @@ -349,6 +354,7 @@ else()
if (ENABLE_SSE2)
foreach (FLAG "-msse2" "/arch:SSE2")
unset (HAVE_SSE2 CACHE)
unset (HAVE_SSE2)
check_c_compiler_flag (${FLAG} HAVE_SSE2)
if (HAVE_SSE2)
set (SSE2_FLAG ${FLAG})
Expand All @@ -360,6 +366,7 @@ else()
if (ENABLE_AVX)
foreach (FLAG "-mavx" "/arch:AVX")
unset (HAVE_AVX CACHE)
unset (HAVE_AVX)
check_c_compiler_flag (${FLAG} HAVE_AVX)
if (HAVE_AVX)
set (AVX_FLAG ${FLAG})
Expand All @@ -371,6 +378,7 @@ else()
if (ENABLE_AVX2)
foreach (FLAG "-mavx2" "/arch:AVX2")
unset (HAVE_AVX2 CACHE)
unset (HAVE_AVX2)
check_c_compiler_flag (${FLAG} HAVE_AVX2)
if (HAVE_AVX2)
set (AVX2_FLAG ${FLAG})
Expand All @@ -383,6 +391,7 @@ else()
if (ENABLE_AVX2)
foreach (FLAG "-mfma" "/arch:FMA")
unset (HAVE_FMA CACHE)
unset (HAVE_FMA)
check_c_compiler_flag (${FLAG} HAVE_FMA)
if (HAVE_FMA)
set (FMA_FLAG ${FLAG})
Expand Down Expand Up @@ -486,11 +495,7 @@ if (HAVE_AVX2)
list (APPEND SOURCEFILES ${fftw_dft_simd_avx2_SOURCE} ${fftw_rdft_simd_avx2_SOURCE})
endif ()

if (HAVE_MPI)
list (APPEND SOURCEFILES ${fftw_mpi_SOURCE})
endif ()

set (FFTW_VERSION 3.3.8)
set (FFTW_VERSION 3.3.10)

set (PREC_SUFFIX)
if (ENABLE_FLOAT)
Expand Down Expand Up @@ -539,7 +544,7 @@ if (ENABLE_AMD_TOP_N_planner)

endif ()

set (fftw3_lib fftw3${PREC_SUFFIX})
set (fftw3_lib libfftw3${PREC_SUFFIX})

configure_file (cmake.config.h.in config.h @ONLY)
include_directories (${CMAKE_CURRENT_BINARY_DIR})
Expand All @@ -549,6 +554,10 @@ if (BUILD_SHARED_LIBS)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif ()

set(lib_name ${fftw3_lib})
add_library (${lib_name} ${SOURCEFILES})
target_include_directories (${lib_name} INTERFACE $<INSTALL_INTERFACE:include>)

if (Threads_FOUND)
if (WITH_COMBINED_THREADS)
set (lib_name ${fftw3_lib})
Expand All @@ -561,18 +570,26 @@ if (Threads_FOUND)
target_include_directories (${lib_name} INTERFACE $<INSTALL_INTERFACE:include>)
target_link_libraries (${lib_name} ${CMAKE_THREAD_LIBS_INIT})
endif ()
elseif (OPENMP_FOUND)
endif ()

if (OPENMP_FOUND)
set (lib_name ${fftw3_lib}_omp)
add_library (${lib_name} ${fftw_omp_SOURCE} ${SOURCEFILES})
target_include_directories (${lib_name} INTERFACE $<INSTALL_INTERFACE:include>)
target_link_libraries (${lib_name} ${CMAKE_THREAD_LIBS_INIT})
target_compile_options (${lib_name} PRIVATE ${OpenMP_C_FLAGS})
else ()
set(lib_name ${fftw3_lib})
add_library (${lib_name} ${SOURCEFILES})
target_compile_options (${lib_name} PRIVATE ${OpenMP_C_FLAGS})
endif ()

if (HAVE_MPI)
list (APPEND SOURCEFILES ${fftw_mpi_SOURCE})
set (lib_name ${fftw3_lib}_mpi)
add_library (${lib_name} ${fftw_omp_SOURCE} ${SOURCEFILES})
target_include_directories (${lib_name} INTERFACE $<INSTALL_INTERFACE:include>)
target_link_libraries (${lib_name} ${CMAKE_THREAD_LIBS_INIT})
target_compile_options (${lib_name} PRIVATE ${OpenMP_C_FLAGS})
endif ()


target_include_directories(${lib_name} PRIVATE ${CMAKE_SOURCE_DIR}/api)

if (CMAKE_C_COMPILER_ID MATCHES MSVC)
Expand Down Expand Up @@ -616,18 +633,58 @@ install(TARGETS ${lib_name}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})

install (FILES api/fftw3.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if (EXISTS ${CMAKE_SOURCE_DIR}/api/fftw3.f)

file(COPY api/fftw3.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/api/fftw3.f)
install (FILES api/fftw3.f api/fftw3l.f03 api/fftw3q.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
file(COPY api/fftw3.f api/fftw3l.f03 api/fftw3q.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif ()
if (EXISTS ${CMAKE_SOURCE_DIR}/api/fftw3.f03.in)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/api/fftw3.f03.in)
file (READ api/fftw3.f03.in FFTW3_F03_IN OFFSET 42)
file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 "! Generated automatically. DO NOT EDIT!\n\n")
file (APPEND ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 " integer, parameter :: C_FFTW_R2R_KIND = ${C_FFTW_R2R_KIND}\n\n")
file (APPEND ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 "${FFTW3_F03_IN}")
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
file(COPY ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif ()


if (HAVE_MPI)
install (FILES mpi/fftw3-mpi.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mpi/fftw3-mpi.f03.in)
file (READ mpi/fftw3-mpi.f03.in FFTW3-MPI_F03_IN OFFSET 42)
file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/fftw3-mpi.f03 "! Generated automatically. DO NOT EDIT!\n\n")
file (APPEND ${CMAKE_CURRENT_BINARY_DIR}/fftw3-mpi.f03 "${FFTW3-MPI_F03_IN}")
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/fftw3-mpi.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif ()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mpi/fftw3l-mpi.f03.in)
file (READ mpi/fftw3l-mpi.f03.in FFTW3L-MPI_F03_IN OFFSET 42)
file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/fftw3l-mpi.f03 "! Generated automatically. DO NOT EDIT!\n\n")
file (APPEND ${CMAKE_CURRENT_BINARY_DIR}/fftw3l-mpi.f03 "${FFTW3L-MPI_F03_IN}")
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/fftw3l-mpi.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif ()
file(COPY mpi/fftw3-mpi.h ${CMAKE_CURRENT_BINARY_DIR}/fftw3-mpi.f03 ${CMAKE_CURRENT_BINARY_DIR}/fftw3l-mpi.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif ()

add_library (libbench2 STATIC ${fftw_libbench2_SOURCE})
add_library (libtestbench STATIC tests/bench.c tests/hook.c tests/fftw-bench.c tests/fftw-bench.h)

if(Threads_FOUND)
set(CMAKE_C_COMPILER_CFLAGS ${PTHREAD_CFLAGS})
if(NOT COMBINED_THREADS)
set(LIBFFTWTHREADS = libfftw3${PREC_SUFFIX}_threads)
endif()
elseif(OPENMP_FOUND)
set(CMAKE_C_COMPILER_CFLAGS ${OpenMP_C_FLAGS})
set(LIBFFTWTHREADS libfftw3${PREC_SUFFIX}_omp)
endif()

set(fftw${PREC_SUFFIX}-wisdom_src tools/fftw-wisdom.c)

add_executable(fftw${PREC_SUFFIX}-wisdom ${fftw${PREC_SUFFIX}-wisdom_src})
target_link_libraries(fftw${PREC_SUFFIX}-wisdom libbench2 ${LIBFFTWTHREADS} ${lib_name} libtestbench)
install(TARGETS fftw${PREC_SUFFIX}-wisdom ${INSTALL_TARGETS_DEFAULT_ARGS} DESTINATION ${CMAKE_INSTALL_BINDIR})


if (BUILD_TESTS)
add_executable (bench tests/bench.c tests/hook.c tests/fftw-bench.c)
Expand Down Expand Up @@ -661,9 +718,9 @@ set (exec_prefix ${CMAKE_INSTALL_PREFIX})
set (libdir ${CMAKE_INSTALL_FULL_LIBDIR})
set (includedir ${CMAKE_INSTALL_FULL_INCLUDEDIR})
set (VERSION ${FFTW_VERSION})
configure_file (fftw.pc.in fftw${PREC_SUFFIX}.pc @ONLY)
configure_file (fftw.pc.in fftw3${PREC_SUFFIX}.pc @ONLY)
install (FILES
${CMAKE_CURRENT_BINARY_DIR}/fftw${PREC_SUFFIX}.pc
${CMAKE_CURRENT_BINARY_DIR}/fftw3${PREC_SUFFIX}.pc
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig
COMPONENT Development)

Expand Down
2 changes: 1 addition & 1 deletion COPYRIGHT
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
* Copyright (C) 2019-2021, Advanced Micro Devices, Inc. All Rights Reserved.
* Copyright (C) 2019-2022, Advanced Micro Devices, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down
58 changes: 51 additions & 7 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,47 @@
FFTW 3.3.10:

* Fix bug that would cause 2-way SIMD (notably SSE2 in double precision)
to attempt unaligned accesses in certain obscure cases, causing
segfaults.

The following test triggers the bug (SSE2, double precision):

./tests/bench -oexhaustive r4*2:5:3

This test computes a pair of length-4 real->complex transforms where
the second input is 5 real numbers away from the first input. That
is, there is a gap of one real number between the first and second
input array. The -oexhaustive level allow FFTW to attempt to
compute this transform by reducing it to a pair of complex
transforms of length 2, but now the second input is not aligned to a
complex-number boundary. The fact that 5 is odd is the problem.

The bug cannot occur in complex->complex transforms because the
complex interface accepts strides in units of complex numbers, so
strides are aligned by construction.

This bug has been around at least since fftw-3.1.2 (July 2006), and
probably since fftw-3.0 (2003).

FFTW 3.3.9:

* New API fftw_planner_nthreads() returns the number of threads
currently being used by the planner.

* Fix incorrect math in 128-bit generic SIMD

* Fix wisdom for avx512.

The avx512 alignment requirement was set to 64 bytes, but this is
wrong. Alignment requirements are a property of the platform (e.g.,
x86) and not of the instruction set (e.g., AVX). Among other
things, this broke wisdom with avx512.

Note that avx512 support is still experimental because the FFTW
authors have no avx512 hardware available for testing.

* fftw_threads_set_callback function to change the threading backend at runtime.

FFTW 3.3.8:

* Fixed AVX, AVX2 for gcc-8.
Expand Down Expand Up @@ -33,7 +77,7 @@ FFTW 3.3.6-pl1:

* Bugfix: FFTW 3.3.6 had the wrong libtool version number, and generated
shared libraries of the form libfftw3.so.2.6.6 instead of
libfftw3.so.3.*.
libfftw3.so.3.*.

FFTW 3.3.6:

Expand All @@ -58,7 +102,7 @@ FFTW 3.3.5:
This code is expected to work but the FFTW maintainers do not have
hardware to test it.
- generic SIMD support using gcc vector intrinsics
* Add fftw_make_planner_thread_safe() API
* Add fftw_make_planner_thread_safe() API
* fix #18 (disable float128 for CUDACC)
* fix #19: missing Fortran interface for fftwq_alloc_real
* fix #21 (don't use float128 on Portland compilers, which pretend to be gcc)
Expand Down Expand Up @@ -107,7 +151,7 @@ FFTW 3.3.2

* Added stack-alignment hack necessary for gcc on Windows/i386. We
will regret this in ten years (see previous change).

* Fix incompatibility with Intel icc which pretends to be gcc
but does not support quad precision.

Expand All @@ -118,7 +162,7 @@ FFTW 3.3.2
FFTW 3.3.1

* Changes since 3.3.1-beta1:

- Reduced planning time in estimate mode for sizes with large
prime factors.

Expand Down Expand Up @@ -190,7 +234,7 @@ FFTW 3.3
fftw_import_wisdom_from_filename that export/import wisdom
to a file, which don't require you to open/close the file yourself.

* New function fftw_cost to return FFTW's internal cost metric for
* New function fftw_cost to return FFTW's internal cost metric for
a given plan; thanks to Rhys Ulerich and Nathanael Schaeffer for the
suggestion.

Expand Down Expand Up @@ -248,7 +292,7 @@ FFTW 3.2.1

* FAQ was accidentally omitted from 3.2 tarball.

* Remove some extraneous (harmless) files accidentally included in
* Remove some extraneous (harmless) files accidentally included in
a subdirectory of the 3.2 tarball.

FFTW 3.2
Expand Down Expand Up @@ -417,7 +461,7 @@ FFTW 3.1

* Various documentation clarifications.

* 64-bit clean. (Fixes a bug affecting the split guru planner on
* 64-bit clean. (Fixes a bug affecting the split guru planner on
64-bit machines, reported by David Necas.)

* Fixed Debian bug #259612: inadvertent use of SSE instructions on
Expand Down
21 changes: 21 additions & 0 deletions README
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
*DO NOT CHECK OUT THESE FILES FROM GITHUB UNLESS YOU KNOW WHAT YOU ARE
DOING.* (See below.)

This is the git repository for the FFTW library for computing Fourier
transforms (version 3.x), maintained by the FFTW authors.

Unlike most other programs, most of the FFTW source code (in C) is
generated automatically. This repository contains the *generator* and
it does not contain the *generated code*. *YOU WILL BE UNABLE TO
COMPILE CODE FROM THIS REPOSITORY* unless you have special tools and
know what you are doing. In particular, do not expect things to
work by simply executing `configure; make` or `cmake`.

Most users should ignore this repository, and should instead download
official tarballs from http://fftw.org/, which contain the generated
code, do not require any special tools or knowledge, and can be
compiled on any system with a C compiler.

Advanced users and FFTW maintainers may obtain code from github and
run the generation process themselves. See [README](README) for
details.
Loading

0 comments on commit e04beee

Please sign in to comment.