From af555c50124aa2e45ba9eb7851eb72b3bed885ee Mon Sep 17 00:00:00 2001 From: rahulraj Date: Mon, 11 Mar 2024 15:32:04 +0530 Subject: [PATCH 1/9] AOCL-FFTW version bumped to 4.2.1 AMD-Internal: [CPUPL-4748] Change-Id: Ia933883b0cbf22acd2e84193244d6a764b91a107 --- CMakeLists.txt | 2 +- configure | 2 +- configure.ac | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 76a51828..0247b0f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -252,7 +252,7 @@ if (MSVC) endif(MSVC) string(TIMESTAMP TODAY "%Y%m%d") -add_compile_definitions(AOCL_FFTW_VERSION="AOCL-FFTW 4.1.1 Build ${TODAY}") +add_compile_definitions(AOCL_FFTW_VERSION="AOCL-FFTW 4.2.1 Build ${TODAY}") find_library (LIBM_LIBRARY NAMES m) if (LIBM_LIBRARY) diff --git a/configure b/configure index 2ca00e6d..d8381407 100755 --- a/configure +++ b/configure @@ -18385,7 +18385,7 @@ fi dateVar=`date +"%Y%m%d"` -printf "%s\n" "#define AOCL_FFTW_VERSION \"AOCL-FFTW 4.1.1 Build $dateVar\"" >>confdefs.h +printf "%s\n" "#define AOCL_FFTW_VERSION \"AOCL-FFTW 4.2.1 Build $dateVar\"" >>confdefs.h # Check whether --enable-amd-fast-planner was given. if test ${enable_amd_fast_planner+y} diff --git a/configure.ac b/configure.ac index e5196509..d6b228d3 100644 --- a/configure.ac +++ b/configure.ac @@ -790,7 +790,7 @@ AC_ARG_ENABLE(openmp, [AC_HELP_STRING([--enable-openmp],[use OpenMP directives f AC_ARG_ENABLE(threads, [AC_HELP_STRING([--enable-threads],[compile FFTW SMP threads library])], enable_threads=$enableval, enable_threads=no) dnl aocl version number of amd-fftw dateVar=`date +"%Y%m%d"` -AC_DEFINE_UNQUOTED(AOCL_FFTW_VERSION,"AOCL-FFTW 4.1.1 Build $dateVar",[AOCL Version of AMD-FFTW]) +AC_DEFINE_UNQUOTED(AOCL_FFTW_VERSION,"AOCL-FFTW 4.2.1 Build $dateVar",[AOCL Version of AMD-FFTW]) dnl amd optimization switch to enable AMD Fast Planner for AMD cpus --enable-amd-fast-planner AC_ARG_ENABLE(amd-fast-planner, [AC_HELP_STRING([--enable-amd-fast-planner],[enable AMD Fast Planner for a faster planning time on AMD cpus])], have_amd_fast_planner=$enableval, have_amd_fast_planner=no) dnl amd optimization switch to enable AMD Top N Planner for AMD cpus --enable-amd-top-n-planner From 01d4d668ef2c3c44dd25866b90b25ca39f0e41da Mon Sep 17 00:00:00 2001 From: rahulraj Date: Mon, 15 Jul 2024 20:19:23 +0530 Subject: [PATCH 2/9] AOCL-FFTW: Code changes to link Intel OpenMP Library and user Defined OpenMP path Signed-off-by: Rahul AMD-Internal: [CPUPL-5177] Change-Id: I9db5b8e66c95e61ae8cf2c90333f5a5e21f072d5 --- CMakeLists.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0247b0f7..08ffde11 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -270,6 +270,10 @@ if (Threads_FOUND) set (HAVE_THREADS TRUE) endif () +if (WIN32) + set(OpenMP_libomp_LIBRARY "C:/Program Files/LLVM/lib/libomp.lib" CACHE STRING "openmp library path") +endif (WIN32) + if (ENABLE_OPENMP) find_package (OpenMP) endif () @@ -614,6 +618,9 @@ if (OPENMP_FOUND) target_include_directories (${lib_name} INTERFACE $) target_link_libraries (${lib_name} ${CMAKE_THREAD_LIBS_INIT}) target_compile_options (${lib_name} PRIVATE ${OpenMP_C_FLAGS}) + if (BUILD_SHARED_LIBS) + target_link_libraries(${lib_name} PRIVATE OpenMP::OpenMP_C) + endif(BUILD_SHARED_LIBS) endif () if (HAVE_MPI) @@ -623,6 +630,9 @@ if (HAVE_MPI) target_include_directories (${lib_name} INTERFACE $) target_link_libraries (${lib_name} ${CMAKE_THREAD_LIBS_INIT}) target_compile_options (${lib_name} PRIVATE ${OpenMP_C_FLAGS}) + if (BUILD_SHARED_LIBS AND OPENMP_FOUND) + target_link_libraries(${lib_name} PRIVATE OpenMP::OpenMP_C) + endif() endif () @@ -722,6 +732,9 @@ set(fftw${PREC_SUFFIX}-wisdom_src tools/fftw-wisdom.c) add_executable(fftw${PREC_SUFFIX}-wisdom ${fftw${PREC_SUFFIX}-wisdom_src}) target_link_libraries(fftw${PREC_SUFFIX}-wisdom libbench2 ${LIBFFTWTHREADS} ${lib_name} libtestbench) +if (OPENMP_FOUND) + target_link_libraries(fftw${PREC_SUFFIX}-wisdom OpenMP::OpenMP_C) +endif () install(TARGETS fftw${PREC_SUFFIX}-wisdom ${INSTALL_TARGETS_DEFAULT_ARGS} DESTINATION ${CMAKE_INSTALL_BINDIR}) @@ -729,12 +742,18 @@ if (BUILD_TESTS) add_executable (bench tests/bench.c tests/hook.c tests/fftw-bench.c) target_link_libraries (bench libbench2) target_link_libraries (bench ${lib_name}) + if (OPENMP_FOUND) + target_link_libraries(bench OpenMP::OpenMP_C) + endif () if (HAVE_MPI) add_executable (mpi-bench mpi/mpi-bench.c tests/hook.c tests/fftw-bench.c) target_include_directories (mpi-bench PRIVATE ${CMAKE_SOURCE_DIR}/api) target_include_directories (mpi-bench PRIVATE ${MPI_INCLUDE_PATH}) target_link_libraries (mpi-bench libbench2 ${MPI_LIBRARIES} ${lib_name}) + if (OPENMP_FOUND) + target_link_libraries(mpi-bench OpenMP::OpenMP_C) + endif () endif () enable_testing () From 1db2cf271c410aa7f756710eb24d5a4c9f7822d2 Mon Sep 17 00:00:00 2001 From: Madhusudhan S Date: Thu, 11 Jul 2024 07:37:51 -0700 Subject: [PATCH 3/9] Enable applications to generate and use wisdom files The commit adds this feature as part of AMD application optimization layer. This can be enabled by using configure option --enable-amd-app-opt. AMD-Internal: [SWLCSG-2161] Change-Id: I094a1f9df95dfea82ab15d9a24e86839cf999ab4 --- api/apiplan.c | 38 +++++++++++++++++++++----------------- kernel/ifftw.h | 5 +++-- kernel/planner.c | 11 ++++++++++- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/api/apiplan.c b/api/apiplan.c index acc73c88..11cf3bd5 100644 --- a/api/apiplan.c +++ b/api/apiplan.c @@ -25,9 +25,12 @@ #include "kernel/ifftw.h" #include "dft/dft.h" #include "rdft/rdft.h" +#endif -static int wisdom_one_time_read = 0; +#ifdef AMD_APP_OPT_GENERATE_WISDOM +int wisdom_write_set = 0; #endif + static planner_hook_t before_planner_hook = 0, after_planner_hook = 0; void X(set_planner_hooks)(planner_hook_t before, planner_hook_t after) @@ -176,19 +179,18 @@ static int create_amd_app_layer(int sign, unsigned *flags, problem *prb, app_lay *flags &= ~(FFTW_ESTIMATE | FFTW_MEASURE | FFTW_PATIENT | FFTW_EXHAUSTIVE); *flags |= FFTW_PATIENT; + #ifdef AMD_APP_OPT_USE_WISDOM - if (wisdom_one_time_read == 0) - { - if (!X(import_wisdom_from_filename)("wis.dat")) - { - //fprintf(stderr, "apiplan: ERROR reading wisdom wis.dat\n"); - } -#ifdef AMD_APP_OPT_GENERATE_WISDOM - wisdom_one_time_read = 1; -#endif + /* + * Enable applications to use the wisdom file if already present. + * If the wisdom file is not availabe/applicable, the planner creates + * a new plan for the problem. + */ + if (!X(import_wisdom_from_filename)("wis.dat")) + { + //fprintf(stderr, "apiplan: ERROR reading wisdom wis.dat\n"); } #endif - if(prb->adt->problem_kind == PROBLEM_DFT) { problem_dft *pdft = (problem_dft *) prb; @@ -424,14 +426,16 @@ static void destroy_amd_app_layer(problem *prb, app_layer_data *app_layer) { int inplace = 0; -#ifdef AMD_APP_OPT_USE_WISDOM - if (wisdom_one_time_read == 0) - { #ifdef AMD_APP_OPT_GENERATE_WISDOM - wisdom_one_time_read = 1; -#endif + /* + * The write permission is set by the planner to export wisdom. + * The newly generated plan is exported to the wisdom file. + */ + if (wisdom_write_set) + { X(export_wisdom_to_filename)("wis.dat"); - } + wisdom_write_set = 0; + } #endif if(prb->adt->problem_kind == PROBLEM_DFT) diff --git a/kernel/ifftw.h b/kernel/ifftw.h index c72d7d5d..b4c8a748 100644 --- a/kernel/ifftw.h +++ b/kernel/ifftw.h @@ -182,10 +182,11 @@ extern "C" #ifdef AMD_APP_OPT_LAYER //AMD's application optimization layer //Enable/disable separate memory even for output buffer in case of out-of-place FFT //#define AMD_APP_OPT_OUT_BUFFER_MEM +extern int wisdom_write_set; /* flag to check if the write operation to wisdom is allowed or not */ //Enable this switch to use wisdom feature in combination with application optimization layer. -//#define AMD_APP_OPT_USE_WISDOM +#define AMD_APP_OPT_USE_WISDOM //Enable this switch to generate wisdom file for the first time for the application. -//#define AMD_APP_OPT_GENERATE_WISDOM +#define AMD_APP_OPT_GENERATE_WISDOM //Debug print logs for the application optimization layer //#define AMD_APP_LAYER_API_LOGS //Maximum size of Unblessed Hash table kept alive to reuse the saved plans directly from it. diff --git a/kernel/planner.c b/kernel/planner.c index 6d5b22a9..741b0e72 100644 --- a/kernel/planner.c +++ b/kernel/planner.c @@ -28,6 +28,7 @@ int wisp_set; //Referring to extern variable declared in kernel/ifftw.h #endif + /* GNU Coding Standards, Sec. 5.2: "Please write the comments in a GNU program in English, because English is the one language that nearly all programmers in all countries can read." @@ -923,7 +924,7 @@ static plan *mkplan(planner *ego, const problem *p) #ifdef AMD_TOP_N_PLANNER if (wisp_set && AMD_OPT_TOP_N > 1) evaluate_plan(ego, pln, p); -#endif +#endif goto skip_search; } else if (ego->nowisdom_hook) /* for MPI, make sure lack of wisdom */ @@ -931,6 +932,14 @@ static plan *mkplan(planner *ego, const problem *p) } do_search: +#ifdef AMD_APP_OPT_GENERATE_WISDOM + /* When an application cannot find a plan from the wisdom file, + * the planner creates a best plan by searching/evaluating multiple plans. + * Below flag sets the permission to export best plan to the wisdom file for + * future reference. + */ + wisdom_write_set = 1; +#endif /* cannot search in WISDOM_ONLY mode */ if (ego->wisdom_state == WISDOM_ONLY) goto wisdom_is_bogus; From 501b1de35507bb37684b88bda059ffb6faef9474 Mon Sep 17 00:00:00 2001 From: sraut Date: Fri, 2 Aug 2024 18:23:59 +0000 Subject: [PATCH 4/9] Fix clang compilation error when using config option --enable-threads Add function multi-versioning by using target-clones in threads.c AMD-Internal: [CPUPL-5403] Change-Id: I0ab282a2d342a92182976ab659d85edee13f77a9 --- threads/threads.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/threads/threads.c b/threads/threads.c index 25e79b4c..a7508f74 100644 --- a/threads/threads.c +++ b/threads/threads.c @@ -396,6 +396,9 @@ int X(ithreads_init)(void) the same as the data parameter passed to X(spawn_loop). This function returns only after all the threads have completed. */ +#ifdef AMD_FMV_AUTO +__attribute__((target_clones(TARGET_STRINGS))) +#endif void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) { int block_size; From 93170d63c2ea6ba794fc92966ee58dcac7dc322c Mon Sep 17 00:00:00 2001 From: Rahul Raj Date: Thu, 22 Aug 2024 18:39:47 +0530 Subject: [PATCH 5/9] Fix for Compilation error while configuring with specific combination in Linux and Windows - Added higher radix missing kernels for fixing build errors with the config options --enable-avx-128-fma, --enable-generic-simd128 and --enable-generic-simd256 - Fix the CMake error for threads Signed-off-by: Rahul AMD-Internal: [CPUPL-5597] Change-Id: I581d6ab0b7210c6ad662c8d737d4ebd44e64382f (cherry picked from commit fafdff2afa788076b403f7586e562d9be8f542a7) --- CMakeLists.txt | 2 +- dft/simd/avx-128-fma/Makefile.in | 69 ++++++++++++++++++---------- dft/simd/avx-128-fma/n1fv_256.c | 3 ++ dft/simd/avx-128-fma/n1fv_512.c | 3 ++ dft/simd/avx-128-fma/n2fv_128.c | 3 ++ dft/simd/avx-128-fma/n2fv_256.c | 3 ++ dft/simd/avx-128-fma/n2fv_512.c | 3 ++ dft/simd/avx-128-fma/q1fv_10.c | 3 ++ dft/simd/avx-128-fma/q1fv_12.c | 3 ++ dft/simd/avx-128-fma/q1fv_16.c | 3 ++ dft/simd/avx-128-fma/q1fv_20.c | 3 ++ dft/simd/avx-128-fma/q1fv_32.c | 3 ++ dft/simd/avx-128-fma/t1fv_128.c | 3 ++ dft/simd/avx-128-fma/t1fv_256.c | 3 ++ dft/simd/avx-128-fma/t2fv_128.c | 3 ++ dft/simd/avx-128-fma/t2fv_256.c | 3 ++ dft/simd/avx-128-fma/t3fv_128.c | 3 ++ dft/simd/avx-128-fma/t3fv_64.c | 3 ++ dft/simd/generic-simd128/Makefile.in | 69 ++++++++++++++++++---------- dft/simd/generic-simd128/n1fv_256.c | 3 ++ dft/simd/generic-simd128/n1fv_512.c | 3 ++ dft/simd/generic-simd128/n2fv_128.c | 3 ++ dft/simd/generic-simd128/n2fv_256.c | 3 ++ dft/simd/generic-simd128/n2fv_512.c | 3 ++ dft/simd/generic-simd128/q1fv_10.c | 3 ++ dft/simd/generic-simd128/q1fv_12.c | 3 ++ dft/simd/generic-simd128/q1fv_16.c | 3 ++ dft/simd/generic-simd128/q1fv_20.c | 3 ++ dft/simd/generic-simd128/q1fv_32.c | 3 ++ dft/simd/generic-simd128/t1fv_128.c | 3 ++ dft/simd/generic-simd128/t1fv_256.c | 3 ++ dft/simd/generic-simd128/t2fv_128.c | 3 ++ dft/simd/generic-simd128/t2fv_256.c | 3 ++ dft/simd/generic-simd128/t3fv_128.c | 3 ++ dft/simd/generic-simd128/t3fv_64.c | 3 ++ dft/simd/generic-simd256/Makefile.in | 69 ++++++++++++++++++---------- dft/simd/generic-simd256/n1fv_256.c | 3 ++ dft/simd/generic-simd256/n1fv_512.c | 3 ++ dft/simd/generic-simd256/n2fv_128.c | 3 ++ dft/simd/generic-simd256/n2fv_256.c | 3 ++ dft/simd/generic-simd256/n2fv_512.c | 3 ++ dft/simd/generic-simd256/q1fv_10.c | 3 ++ dft/simd/generic-simd256/q1fv_12.c | 3 ++ dft/simd/generic-simd256/q1fv_16.c | 3 ++ dft/simd/generic-simd256/q1fv_20.c | 3 ++ dft/simd/generic-simd256/q1fv_32.c | 3 ++ dft/simd/generic-simd256/t1fv_128.c | 3 ++ dft/simd/generic-simd256/t1fv_256.c | 3 ++ dft/simd/generic-simd256/t2fv_128.c | 3 ++ dft/simd/generic-simd256/t2fv_256.c | 3 ++ dft/simd/generic-simd256/t3fv_128.c | 3 ++ dft/simd/generic-simd256/t3fv_64.c | 3 ++ kernel/cycle.h | 4 +- 53 files changed, 285 insertions(+), 72 deletions(-) create mode 100644 dft/simd/avx-128-fma/n1fv_256.c create mode 100644 dft/simd/avx-128-fma/n1fv_512.c create mode 100644 dft/simd/avx-128-fma/n2fv_128.c create mode 100644 dft/simd/avx-128-fma/n2fv_256.c create mode 100644 dft/simd/avx-128-fma/n2fv_512.c create mode 100644 dft/simd/avx-128-fma/q1fv_10.c create mode 100644 dft/simd/avx-128-fma/q1fv_12.c create mode 100644 dft/simd/avx-128-fma/q1fv_16.c create mode 100644 dft/simd/avx-128-fma/q1fv_20.c create mode 100644 dft/simd/avx-128-fma/q1fv_32.c create mode 100644 dft/simd/avx-128-fma/t1fv_128.c create mode 100644 dft/simd/avx-128-fma/t1fv_256.c create mode 100644 dft/simd/avx-128-fma/t2fv_128.c create mode 100644 dft/simd/avx-128-fma/t2fv_256.c create mode 100644 dft/simd/avx-128-fma/t3fv_128.c create mode 100644 dft/simd/avx-128-fma/t3fv_64.c create mode 100644 dft/simd/generic-simd128/n1fv_256.c create mode 100644 dft/simd/generic-simd128/n1fv_512.c create mode 100644 dft/simd/generic-simd128/n2fv_128.c create mode 100644 dft/simd/generic-simd128/n2fv_256.c create mode 100644 dft/simd/generic-simd128/n2fv_512.c create mode 100644 dft/simd/generic-simd128/q1fv_10.c create mode 100644 dft/simd/generic-simd128/q1fv_12.c create mode 100644 dft/simd/generic-simd128/q1fv_16.c create mode 100644 dft/simd/generic-simd128/q1fv_20.c create mode 100644 dft/simd/generic-simd128/q1fv_32.c create mode 100644 dft/simd/generic-simd128/t1fv_128.c create mode 100644 dft/simd/generic-simd128/t1fv_256.c create mode 100644 dft/simd/generic-simd128/t2fv_128.c create mode 100644 dft/simd/generic-simd128/t2fv_256.c create mode 100644 dft/simd/generic-simd128/t3fv_128.c create mode 100644 dft/simd/generic-simd128/t3fv_64.c create mode 100644 dft/simd/generic-simd256/n1fv_256.c create mode 100644 dft/simd/generic-simd256/n1fv_512.c create mode 100644 dft/simd/generic-simd256/n2fv_128.c create mode 100644 dft/simd/generic-simd256/n2fv_256.c create mode 100644 dft/simd/generic-simd256/n2fv_512.c create mode 100644 dft/simd/generic-simd256/q1fv_10.c create mode 100644 dft/simd/generic-simd256/q1fv_12.c create mode 100644 dft/simd/generic-simd256/q1fv_16.c create mode 100644 dft/simd/generic-simd256/q1fv_20.c create mode 100644 dft/simd/generic-simd256/q1fv_32.c create mode 100644 dft/simd/generic-simd256/t1fv_128.c create mode 100644 dft/simd/generic-simd256/t1fv_256.c create mode 100644 dft/simd/generic-simd256/t2fv_128.c create mode 100644 dft/simd/generic-simd256/t2fv_256.c create mode 100644 dft/simd/generic-simd256/t3fv_128.c create mode 100644 dft/simd/generic-simd256/t3fv_64.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 08ffde11..7aabf10a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -721,7 +721,7 @@ add_library (libtestbench STATIC tests/bench.c tests/hook.c tests/fftw-bench.c t if(Threads_FOUND) set(CMAKE_C_COMPILER_CFLAGS ${PTHREAD_CFLAGS}) if(NOT COMBINED_THREADS) - set(LIBFFTWTHREADS = libfftw3${PREC_SUFFIX}_threads) + set(LIBFFTWTHREADS libfftw3${PREC_SUFFIX}_threads) endif() elseif(OPENMP_FOUND) set(CMAKE_C_COMPILER_CFLAGS ${OpenMP_C_FLAGS}) diff --git a/dft/simd/avx-128-fma/Makefile.in b/dft/simd/avx-128-fma/Makefile.in index 44ad068f..b008f7cf 100644 --- a/dft/simd/avx-128-fma/Makefile.in +++ b/dft/simd/avx-128-fma/Makefile.in @@ -125,22 +125,24 @@ libdft_avx_128_fma_codelets_la_LIBADD = am__libdft_avx_128_fma_codelets_la_SOURCES_DIST = n1fv_2.c n1fv_3.c \ n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c n1fv_9.c \ n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \ - n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c \ - n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c \ - n1bv_9.c n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c \ - n1bv_15.c n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c \ - n1bv_25.c n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c \ - n2fv_12.c n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c \ + n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_256.c n1fv_512.c \ + n1fv_20.c n1fv_25.c n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c \ + n1bv_6.c n1bv_7.c n1bv_8.c n1bv_9.c n1bv_10.c n1bv_11.c \ + n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c n1bv_16.c n1bv_32.c \ + n1bv_64.c n1bv_128.c n1bv_20.c n1bv_25.c n2fv_2.c n2fv_4.c \ + n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c n2fv_14.c n2fv_16.c \ + n2fv_32.c n2fv_64.c n2fv_128.c n2fv_256.c n2fv_512.c n2fv_20.c \ n2bv_2.c n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c \ n2bv_14.c n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c n2sv_4.c \ n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c t1fuv_2.c t1fuv_3.c \ t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c t1fuv_8.c t1fuv_9.c \ t1fuv_10.c t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c \ t1fv_7.c t1fv_8.c t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c \ - t1fv_16.c t1fv_32.c t1fv_64.c t1fv_20.c t1fv_25.c t2fv_2.c \ - t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c t2fv_5.c \ - t2fv_10.c t2fv_20.c t2fv_25.c t3fv_4.c t3fv_8.c t3fv_16.c \ - t3fv_32.c t3fv_5.c t3fv_10.c t3fv_20.c t3fv_25.c t1buv_2.c \ + t1fv_16.c t1fv_32.c t1fv_64.c t1fv_20.c t1fv_25.c t1fv_128.c \ + t1fv_256.c t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c \ + t2fv_64.c t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c t2fv_128.c \ + t2fv_256.c t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c \ + t3fv_10.c t3fv_20.c t3fv_25.c t3fv_128.c t3fv_64.c t1buv_2.c \ t1buv_3.c t1buv_4.c t1buv_5.c t1buv_6.c t1buv_7.c t1buv_8.c \ t1buv_9.c t1buv_10.c t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c \ t1bv_6.c t1bv_7.c t1bv_8.c t1bv_9.c t1bv_10.c t1bv_12.c \ @@ -150,18 +152,20 @@ am__libdft_avx_128_fma_codelets_la_SOURCES_DIST = n1fv_2.c n1fv_3.c \ t3bv_16.c t3bv_32.c t3bv_5.c t3bv_10.c t3bv_20.c t3bv_25.c \ t1sv_2.c t1sv_4.c t1sv_8.c t1sv_16.c t1sv_32.c t2sv_4.c \ t2sv_8.c t2sv_16.c t2sv_32.c q1fv_2.c q1fv_4.c q1fv_5.c \ - q1fv_8.c q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c genus.c codlist.c + q1fv_8.c q1fv_10.c q1fv_12.c q1fv_16.c q1fv_20.c q1fv_32.c \ + q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c genus.c codlist.c am__objects_1 = n1fv_2.lo n1fv_3.lo n1fv_4.lo n1fv_5.lo n1fv_6.lo \ n1fv_7.lo n1fv_8.lo n1fv_9.lo n1fv_10.lo n1fv_11.lo n1fv_12.lo \ n1fv_13.lo n1fv_14.lo n1fv_15.lo n1fv_16.lo n1fv_32.lo \ - n1fv_64.lo n1fv_128.lo n1fv_20.lo n1fv_25.lo + n1fv_64.lo n1fv_128.lo n1fv_256.lo n1fv_512.lo n1fv_20.lo \ + n1fv_25.lo am__objects_2 = n1bv_2.lo n1bv_3.lo n1bv_4.lo n1bv_5.lo n1bv_6.lo \ n1bv_7.lo n1bv_8.lo n1bv_9.lo n1bv_10.lo n1bv_11.lo n1bv_12.lo \ n1bv_13.lo n1bv_14.lo n1bv_15.lo n1bv_16.lo n1bv_32.lo \ n1bv_64.lo n1bv_128.lo n1bv_20.lo n1bv_25.lo am__objects_3 = n2fv_2.lo n2fv_4.lo n2fv_6.lo n2fv_8.lo n2fv_10.lo \ n2fv_12.lo n2fv_14.lo n2fv_16.lo n2fv_32.lo n2fv_64.lo \ - n2fv_20.lo + n2fv_128.lo n2fv_256.lo n2fv_512.lo n2fv_20.lo am__objects_4 = n2bv_2.lo n2bv_4.lo n2bv_6.lo n2bv_8.lo n2bv_10.lo \ n2bv_12.lo n2bv_14.lo n2bv_16.lo n2bv_32.lo n2bv_64.lo \ n2bv_20.lo @@ -170,11 +174,13 @@ am__objects_6 = t1fuv_2.lo t1fuv_3.lo t1fuv_4.lo t1fuv_5.lo t1fuv_6.lo \ t1fuv_7.lo t1fuv_8.lo t1fuv_9.lo t1fuv_10.lo am__objects_7 = t1fv_2.lo t1fv_3.lo t1fv_4.lo t1fv_5.lo t1fv_6.lo \ t1fv_7.lo t1fv_8.lo t1fv_9.lo t1fv_10.lo t1fv_12.lo t1fv_15.lo \ - t1fv_16.lo t1fv_32.lo t1fv_64.lo t1fv_20.lo t1fv_25.lo + t1fv_16.lo t1fv_32.lo t1fv_64.lo t1fv_20.lo t1fv_25.lo \ + t1fv_128.lo t1fv_256.lo am__objects_8 = t2fv_2.lo t2fv_4.lo t2fv_8.lo t2fv_16.lo t2fv_32.lo \ - t2fv_64.lo t2fv_5.lo t2fv_10.lo t2fv_20.lo t2fv_25.lo + t2fv_64.lo t2fv_5.lo t2fv_10.lo t2fv_20.lo t2fv_25.lo \ + t2fv_128.lo t2fv_256.lo am__objects_9 = t3fv_4.lo t3fv_8.lo t3fv_16.lo t3fv_32.lo t3fv_5.lo \ - t3fv_10.lo t3fv_20.lo t3fv_25.lo + t3fv_10.lo t3fv_20.lo t3fv_25.lo t3fv_128.lo t3fv_64.lo am__objects_10 = t1buv_2.lo t1buv_3.lo t1buv_4.lo t1buv_5.lo \ t1buv_6.lo t1buv_7.lo t1buv_8.lo t1buv_9.lo t1buv_10.lo am__objects_11 = t1bv_2.lo t1bv_3.lo t1bv_4.lo t1bv_5.lo t1bv_6.lo \ @@ -186,7 +192,8 @@ am__objects_13 = t3bv_4.lo t3bv_8.lo t3bv_16.lo t3bv_32.lo t3bv_5.lo \ t3bv_10.lo t3bv_20.lo t3bv_25.lo am__objects_14 = t1sv_2.lo t1sv_4.lo t1sv_8.lo t1sv_16.lo t1sv_32.lo am__objects_15 = t2sv_4.lo t2sv_8.lo t2sv_16.lo t2sv_32.lo -am__objects_16 = q1fv_2.lo q1fv_4.lo q1fv_5.lo q1fv_8.lo +am__objects_16 = q1fv_2.lo q1fv_4.lo q1fv_5.lo q1fv_8.lo q1fv_10.lo \ + q1fv_12.lo q1fv_16.lo q1fv_20.lo q1fv_32.lo am__objects_17 = q1bv_2.lo q1bv_4.lo q1bv_5.lo q1bv_8.lo am__objects_18 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ $(am__objects_4) $(am__objects_5) $(am__objects_6) \ @@ -428,12 +435,12 @@ SIMD_HEADER = simd-support/simd-avx-128-fma.h # n1fv_ is a hard-coded FFTW_FORWARD FFT of size , using SIMD N1F = n1fv_2.c n1fv_3.c n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c \ n1fv_9.c n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \ -n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c +n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_256.c n1fv_512.c n1fv_20.c n1fv_25.c # as above, with restricted input vector stride N2F = n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \ -n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c +n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_128.c n2fv_256.c n2fv_512.c n2fv_20.c # as above, but FFTW_BACKWARD @@ -453,15 +460,15 @@ N2S = n2sv_4.c n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c # for an FFTW_FORWARD transform, using SIMD T1F = t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \ t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c t1fv_64.c \ -t1fv_20.c t1fv_25.c +t1fv_20.c t1fv_25.c t1fv_128.c t1fv_256.c # same as t1fv_*, but with different twiddle storage scheme T2F = t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c \ -t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c +t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c t2fv_128.c t2fv_256.c T3F = t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c t3fv_10.c \ -t3fv_20.c t3fv_25.c +t3fv_20.c t3fv_25.c t3fv_128.c t3fv_64.c T1FU = t1fuv_2.c t1fuv_3.c t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c \ t1fuv_8.c t1fuv_9.c t1fuv_10.c @@ -494,7 +501,7 @@ T2S = t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c # in-place transposes in sizes that are divisible by ^2. These # codelets have size ~ ^2, so you should probably not use # bigger than 8 or so. -Q1F = q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c +Q1F = q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c q1fv_10.c q1fv_12.c q1fv_16.c q1fv_20.c q1fv_32.c # as above, but FFTW_BACKWARD Q1B = q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c @@ -597,10 +604,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_20.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_256.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_3.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_512.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_6.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_7.Plo@am__quote@ @@ -619,12 +628,15 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_128.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_14.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_256.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_512.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_6.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_8.Plo@am__quote@ @@ -637,7 +649,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_5.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_5.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_8.Plo@am__quote@ @@ -677,11 +694,13 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_9.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_128.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_15.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_20.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_256.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_3.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_4.Plo@am__quote@ @@ -707,10 +726,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_128.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_20.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_256.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_5.Plo@am__quote@ @@ -729,12 +750,14 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_5.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_128.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_20.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_25.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_8.Plo@am__quote@ .c.o: diff --git a/dft/simd/avx-128-fma/n1fv_256.c b/dft/simd/avx-128-fma/n1fv_256.c new file mode 100644 index 00000000..433566ea --- /dev/null +++ b/dft/simd/avx-128-fma/n1fv_256.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/n1fv_256.c" diff --git a/dft/simd/avx-128-fma/n1fv_512.c b/dft/simd/avx-128-fma/n1fv_512.c new file mode 100644 index 00000000..3514dfe8 --- /dev/null +++ b/dft/simd/avx-128-fma/n1fv_512.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/n1fv_512.c" diff --git a/dft/simd/avx-128-fma/n2fv_128.c b/dft/simd/avx-128-fma/n2fv_128.c new file mode 100644 index 00000000..77cc0ae0 --- /dev/null +++ b/dft/simd/avx-128-fma/n2fv_128.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/n2fv_128.c" diff --git a/dft/simd/avx-128-fma/n2fv_256.c b/dft/simd/avx-128-fma/n2fv_256.c new file mode 100644 index 00000000..29b93443 --- /dev/null +++ b/dft/simd/avx-128-fma/n2fv_256.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/n2fv_256.c" diff --git a/dft/simd/avx-128-fma/n2fv_512.c b/dft/simd/avx-128-fma/n2fv_512.c new file mode 100644 index 00000000..ac4cf9d2 --- /dev/null +++ b/dft/simd/avx-128-fma/n2fv_512.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/n2fv_512.c" diff --git a/dft/simd/avx-128-fma/q1fv_10.c b/dft/simd/avx-128-fma/q1fv_10.c new file mode 100644 index 00000000..bc8c90fd --- /dev/null +++ b/dft/simd/avx-128-fma/q1fv_10.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/q1fv_10.c" diff --git a/dft/simd/avx-128-fma/q1fv_12.c b/dft/simd/avx-128-fma/q1fv_12.c new file mode 100644 index 00000000..dfad1326 --- /dev/null +++ b/dft/simd/avx-128-fma/q1fv_12.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/q1fv_12.c" diff --git a/dft/simd/avx-128-fma/q1fv_16.c b/dft/simd/avx-128-fma/q1fv_16.c new file mode 100644 index 00000000..5424fe85 --- /dev/null +++ b/dft/simd/avx-128-fma/q1fv_16.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/q1fv_16.c" diff --git a/dft/simd/avx-128-fma/q1fv_20.c b/dft/simd/avx-128-fma/q1fv_20.c new file mode 100644 index 00000000..45dffea5 --- /dev/null +++ b/dft/simd/avx-128-fma/q1fv_20.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/q1fv_20.c" diff --git a/dft/simd/avx-128-fma/q1fv_32.c b/dft/simd/avx-128-fma/q1fv_32.c new file mode 100644 index 00000000..85d5cd3f --- /dev/null +++ b/dft/simd/avx-128-fma/q1fv_32.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/q1fv_32.c" diff --git a/dft/simd/avx-128-fma/t1fv_128.c b/dft/simd/avx-128-fma/t1fv_128.c new file mode 100644 index 00000000..12c233d3 --- /dev/null +++ b/dft/simd/avx-128-fma/t1fv_128.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/t1fv_128.c" diff --git a/dft/simd/avx-128-fma/t1fv_256.c b/dft/simd/avx-128-fma/t1fv_256.c new file mode 100644 index 00000000..561b5ca6 --- /dev/null +++ b/dft/simd/avx-128-fma/t1fv_256.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/t1fv_256.c" diff --git a/dft/simd/avx-128-fma/t2fv_128.c b/dft/simd/avx-128-fma/t2fv_128.c new file mode 100644 index 00000000..ac2aca74 --- /dev/null +++ b/dft/simd/avx-128-fma/t2fv_128.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/t2fv_128.c" diff --git a/dft/simd/avx-128-fma/t2fv_256.c b/dft/simd/avx-128-fma/t2fv_256.c new file mode 100644 index 00000000..3828427f --- /dev/null +++ b/dft/simd/avx-128-fma/t2fv_256.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/t2fv_256.c" diff --git a/dft/simd/avx-128-fma/t3fv_128.c b/dft/simd/avx-128-fma/t3fv_128.c new file mode 100644 index 00000000..5a7bc8d7 --- /dev/null +++ b/dft/simd/avx-128-fma/t3fv_128.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/t3fv_128.c" diff --git a/dft/simd/avx-128-fma/t3fv_64.c b/dft/simd/avx-128-fma/t3fv_64.c new file mode 100644 index 00000000..757bd1f8 --- /dev/null +++ b/dft/simd/avx-128-fma/t3fv_64.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-avx-128-fma.h" +#include "../common/t3fv_64.c" diff --git a/dft/simd/generic-simd128/Makefile.in b/dft/simd/generic-simd128/Makefile.in index 53995db1..30948c88 100644 --- a/dft/simd/generic-simd128/Makefile.in +++ b/dft/simd/generic-simd128/Makefile.in @@ -125,22 +125,24 @@ libdft_generic_simd128_codelets_la_LIBADD = am__libdft_generic_simd128_codelets_la_SOURCES_DIST = n1fv_2.c \ n1fv_3.c n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c n1fv_9.c \ n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \ - n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c \ - n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c \ - n1bv_9.c n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c \ - n1bv_15.c n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c \ - n1bv_25.c n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c \ - n2fv_12.c n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c \ + n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_256.c n1fv_512.c \ + n1fv_20.c n1fv_25.c n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c \ + n1bv_6.c n1bv_7.c n1bv_8.c n1bv_9.c n1bv_10.c n1bv_11.c \ + n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c n1bv_16.c n1bv_32.c \ + n1bv_64.c n1bv_128.c n1bv_20.c n1bv_25.c n2fv_2.c n2fv_4.c \ + n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c n2fv_14.c n2fv_16.c \ + n2fv_32.c n2fv_64.c n2fv_128.c n2fv_256.c n2fv_512.c n2fv_20.c \ n2bv_2.c n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c \ n2bv_14.c n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c n2sv_4.c \ n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c t1fuv_2.c t1fuv_3.c \ t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c t1fuv_8.c t1fuv_9.c \ t1fuv_10.c t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c \ t1fv_7.c t1fv_8.c t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c \ - t1fv_16.c t1fv_32.c t1fv_64.c t1fv_20.c t1fv_25.c t2fv_2.c \ - t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c t2fv_5.c \ - t2fv_10.c t2fv_20.c t2fv_25.c t3fv_4.c t3fv_8.c t3fv_16.c \ - t3fv_32.c t3fv_5.c t3fv_10.c t3fv_20.c t3fv_25.c t1buv_2.c \ + t1fv_16.c t1fv_32.c t1fv_64.c t1fv_20.c t1fv_25.c t1fv_128.c \ + t1fv_256.c t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c \ + t2fv_64.c t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c t2fv_128.c \ + t2fv_256.c t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c \ + t3fv_10.c t3fv_20.c t3fv_25.c t3fv_128.c t3fv_64.c t1buv_2.c \ t1buv_3.c t1buv_4.c t1buv_5.c t1buv_6.c t1buv_7.c t1buv_8.c \ t1buv_9.c t1buv_10.c t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c \ t1bv_6.c t1bv_7.c t1bv_8.c t1bv_9.c t1bv_10.c t1bv_12.c \ @@ -150,18 +152,20 @@ am__libdft_generic_simd128_codelets_la_SOURCES_DIST = n1fv_2.c \ t3bv_16.c t3bv_32.c t3bv_5.c t3bv_10.c t3bv_20.c t3bv_25.c \ t1sv_2.c t1sv_4.c t1sv_8.c t1sv_16.c t1sv_32.c t2sv_4.c \ t2sv_8.c t2sv_16.c t2sv_32.c q1fv_2.c q1fv_4.c q1fv_5.c \ - q1fv_8.c q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c genus.c codlist.c + q1fv_8.c q1fv_10.c q1fv_12.c q1fv_16.c q1fv_20.c q1fv_32.c \ + q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c genus.c codlist.c am__objects_1 = n1fv_2.lo n1fv_3.lo n1fv_4.lo n1fv_5.lo n1fv_6.lo \ n1fv_7.lo n1fv_8.lo n1fv_9.lo n1fv_10.lo n1fv_11.lo n1fv_12.lo \ n1fv_13.lo n1fv_14.lo n1fv_15.lo n1fv_16.lo n1fv_32.lo \ - n1fv_64.lo n1fv_128.lo n1fv_20.lo n1fv_25.lo + n1fv_64.lo n1fv_128.lo n1fv_256.lo n1fv_512.lo n1fv_20.lo \ + n1fv_25.lo am__objects_2 = n1bv_2.lo n1bv_3.lo n1bv_4.lo n1bv_5.lo n1bv_6.lo \ n1bv_7.lo n1bv_8.lo n1bv_9.lo n1bv_10.lo n1bv_11.lo n1bv_12.lo \ n1bv_13.lo n1bv_14.lo n1bv_15.lo n1bv_16.lo n1bv_32.lo \ n1bv_64.lo n1bv_128.lo n1bv_20.lo n1bv_25.lo am__objects_3 = n2fv_2.lo n2fv_4.lo n2fv_6.lo n2fv_8.lo n2fv_10.lo \ n2fv_12.lo n2fv_14.lo n2fv_16.lo n2fv_32.lo n2fv_64.lo \ - n2fv_20.lo + n2fv_128.lo n2fv_256.lo n2fv_512.lo n2fv_20.lo am__objects_4 = n2bv_2.lo n2bv_4.lo n2bv_6.lo n2bv_8.lo n2bv_10.lo \ n2bv_12.lo n2bv_14.lo n2bv_16.lo n2bv_32.lo n2bv_64.lo \ n2bv_20.lo @@ -170,11 +174,13 @@ am__objects_6 = t1fuv_2.lo t1fuv_3.lo t1fuv_4.lo t1fuv_5.lo t1fuv_6.lo \ t1fuv_7.lo t1fuv_8.lo t1fuv_9.lo t1fuv_10.lo am__objects_7 = t1fv_2.lo t1fv_3.lo t1fv_4.lo t1fv_5.lo t1fv_6.lo \ t1fv_7.lo t1fv_8.lo t1fv_9.lo t1fv_10.lo t1fv_12.lo t1fv_15.lo \ - t1fv_16.lo t1fv_32.lo t1fv_64.lo t1fv_20.lo t1fv_25.lo + t1fv_16.lo t1fv_32.lo t1fv_64.lo t1fv_20.lo t1fv_25.lo \ + t1fv_128.lo t1fv_256.lo am__objects_8 = t2fv_2.lo t2fv_4.lo t2fv_8.lo t2fv_16.lo t2fv_32.lo \ - t2fv_64.lo t2fv_5.lo t2fv_10.lo t2fv_20.lo t2fv_25.lo + t2fv_64.lo t2fv_5.lo t2fv_10.lo t2fv_20.lo t2fv_25.lo \ + t2fv_128.lo t2fv_256.lo am__objects_9 = t3fv_4.lo t3fv_8.lo t3fv_16.lo t3fv_32.lo t3fv_5.lo \ - t3fv_10.lo t3fv_20.lo t3fv_25.lo + t3fv_10.lo t3fv_20.lo t3fv_25.lo t3fv_128.lo t3fv_64.lo am__objects_10 = t1buv_2.lo t1buv_3.lo t1buv_4.lo t1buv_5.lo \ t1buv_6.lo t1buv_7.lo t1buv_8.lo t1buv_9.lo t1buv_10.lo am__objects_11 = t1bv_2.lo t1bv_3.lo t1bv_4.lo t1bv_5.lo t1bv_6.lo \ @@ -186,7 +192,8 @@ am__objects_13 = t3bv_4.lo t3bv_8.lo t3bv_16.lo t3bv_32.lo t3bv_5.lo \ t3bv_10.lo t3bv_20.lo t3bv_25.lo am__objects_14 = t1sv_2.lo t1sv_4.lo t1sv_8.lo t1sv_16.lo t1sv_32.lo am__objects_15 = t2sv_4.lo t2sv_8.lo t2sv_16.lo t2sv_32.lo -am__objects_16 = q1fv_2.lo q1fv_4.lo q1fv_5.lo q1fv_8.lo +am__objects_16 = q1fv_2.lo q1fv_4.lo q1fv_5.lo q1fv_8.lo q1fv_10.lo \ + q1fv_12.lo q1fv_16.lo q1fv_20.lo q1fv_32.lo am__objects_17 = q1bv_2.lo q1bv_4.lo q1bv_5.lo q1bv_8.lo am__objects_18 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ $(am__objects_4) $(am__objects_5) $(am__objects_6) \ @@ -427,12 +434,12 @@ SIMD_HEADER = simd-support/simd-generic128.h # n1fv_ is a hard-coded FFTW_FORWARD FFT of size , using SIMD N1F = n1fv_2.c n1fv_3.c n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c \ n1fv_9.c n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \ -n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c +n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_256.c n1fv_512.c n1fv_20.c n1fv_25.c # as above, with restricted input vector stride N2F = n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \ -n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c +n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_128.c n2fv_256.c n2fv_512.c n2fv_20.c # as above, but FFTW_BACKWARD @@ -452,15 +459,15 @@ N2S = n2sv_4.c n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c # for an FFTW_FORWARD transform, using SIMD T1F = t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \ t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c t1fv_64.c \ -t1fv_20.c t1fv_25.c +t1fv_20.c t1fv_25.c t1fv_128.c t1fv_256.c # same as t1fv_*, but with different twiddle storage scheme T2F = t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c \ -t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c +t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c t2fv_128.c t2fv_256.c T3F = t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c t3fv_10.c \ -t3fv_20.c t3fv_25.c +t3fv_20.c t3fv_25.c t3fv_128.c t3fv_64.c T1FU = t1fuv_2.c t1fuv_3.c t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c \ t1fuv_8.c t1fuv_9.c t1fuv_10.c @@ -493,7 +500,7 @@ T2S = t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c # in-place transposes in sizes that are divisible by ^2. These # codelets have size ~ ^2, so you should probably not use # bigger than 8 or so. -Q1F = q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c +Q1F = q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c q1fv_10.c q1fv_12.c q1fv_16.c q1fv_20.c q1fv_32.c # as above, but FFTW_BACKWARD Q1B = q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c @@ -596,10 +603,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_20.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_256.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_3.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_512.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_6.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_7.Plo@am__quote@ @@ -618,12 +627,15 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_128.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_14.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_256.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_512.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_6.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_8.Plo@am__quote@ @@ -636,7 +648,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_5.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_5.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_8.Plo@am__quote@ @@ -676,11 +693,13 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_9.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_128.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_15.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_20.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_256.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_3.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_4.Plo@am__quote@ @@ -706,10 +725,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_128.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_20.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_256.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_5.Plo@am__quote@ @@ -728,12 +749,14 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_5.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_128.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_20.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_25.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_8.Plo@am__quote@ .c.o: diff --git a/dft/simd/generic-simd128/n1fv_256.c b/dft/simd/generic-simd128/n1fv_256.c new file mode 100644 index 00000000..87c9d7f7 --- /dev/null +++ b/dft/simd/generic-simd128/n1fv_256.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/n1fv_256.c" diff --git a/dft/simd/generic-simd128/n1fv_512.c b/dft/simd/generic-simd128/n1fv_512.c new file mode 100644 index 00000000..0872f032 --- /dev/null +++ b/dft/simd/generic-simd128/n1fv_512.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/n1fv_512.c" diff --git a/dft/simd/generic-simd128/n2fv_128.c b/dft/simd/generic-simd128/n2fv_128.c new file mode 100644 index 00000000..18d7be41 --- /dev/null +++ b/dft/simd/generic-simd128/n2fv_128.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/n2fv_128.c" diff --git a/dft/simd/generic-simd128/n2fv_256.c b/dft/simd/generic-simd128/n2fv_256.c new file mode 100644 index 00000000..2ab14d6c --- /dev/null +++ b/dft/simd/generic-simd128/n2fv_256.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/n2fv_256.c" diff --git a/dft/simd/generic-simd128/n2fv_512.c b/dft/simd/generic-simd128/n2fv_512.c new file mode 100644 index 00000000..2b60a4f7 --- /dev/null +++ b/dft/simd/generic-simd128/n2fv_512.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/n2fv_512.c" diff --git a/dft/simd/generic-simd128/q1fv_10.c b/dft/simd/generic-simd128/q1fv_10.c new file mode 100644 index 00000000..a7ef60d1 --- /dev/null +++ b/dft/simd/generic-simd128/q1fv_10.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/q1fv_10.c" diff --git a/dft/simd/generic-simd128/q1fv_12.c b/dft/simd/generic-simd128/q1fv_12.c new file mode 100644 index 00000000..08927184 --- /dev/null +++ b/dft/simd/generic-simd128/q1fv_12.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/q1fv_12.c" diff --git a/dft/simd/generic-simd128/q1fv_16.c b/dft/simd/generic-simd128/q1fv_16.c new file mode 100644 index 00000000..da603f3e --- /dev/null +++ b/dft/simd/generic-simd128/q1fv_16.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/q1fv_16.c" diff --git a/dft/simd/generic-simd128/q1fv_20.c b/dft/simd/generic-simd128/q1fv_20.c new file mode 100644 index 00000000..03197fc6 --- /dev/null +++ b/dft/simd/generic-simd128/q1fv_20.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/q1fv_20.c" diff --git a/dft/simd/generic-simd128/q1fv_32.c b/dft/simd/generic-simd128/q1fv_32.c new file mode 100644 index 00000000..d9833b1b --- /dev/null +++ b/dft/simd/generic-simd128/q1fv_32.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/q1fv_32.c" diff --git a/dft/simd/generic-simd128/t1fv_128.c b/dft/simd/generic-simd128/t1fv_128.c new file mode 100644 index 00000000..31174f53 --- /dev/null +++ b/dft/simd/generic-simd128/t1fv_128.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/t1fv_128.c" diff --git a/dft/simd/generic-simd128/t1fv_256.c b/dft/simd/generic-simd128/t1fv_256.c new file mode 100644 index 00000000..60937e6b --- /dev/null +++ b/dft/simd/generic-simd128/t1fv_256.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/t1fv_256.c" diff --git a/dft/simd/generic-simd128/t2fv_128.c b/dft/simd/generic-simd128/t2fv_128.c new file mode 100644 index 00000000..d08ad25f --- /dev/null +++ b/dft/simd/generic-simd128/t2fv_128.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/t2fv_128.c" diff --git a/dft/simd/generic-simd128/t2fv_256.c b/dft/simd/generic-simd128/t2fv_256.c new file mode 100644 index 00000000..6d22b919 --- /dev/null +++ b/dft/simd/generic-simd128/t2fv_256.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/t2fv_256.c" diff --git a/dft/simd/generic-simd128/t3fv_128.c b/dft/simd/generic-simd128/t3fv_128.c new file mode 100644 index 00000000..8ff20fa1 --- /dev/null +++ b/dft/simd/generic-simd128/t3fv_128.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/t3fv_128.c" diff --git a/dft/simd/generic-simd128/t3fv_64.c b/dft/simd/generic-simd128/t3fv_64.c new file mode 100644 index 00000000..8b09b5d8 --- /dev/null +++ b/dft/simd/generic-simd128/t3fv_64.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic128.h" +#include "../common/t3fv_64.c" diff --git a/dft/simd/generic-simd256/Makefile.in b/dft/simd/generic-simd256/Makefile.in index 685fbace..1f0d3ab5 100644 --- a/dft/simd/generic-simd256/Makefile.in +++ b/dft/simd/generic-simd256/Makefile.in @@ -125,22 +125,24 @@ libdft_generic_simd256_codelets_la_LIBADD = am__libdft_generic_simd256_codelets_la_SOURCES_DIST = n1fv_2.c \ n1fv_3.c n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c n1fv_9.c \ n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \ - n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c \ - n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c \ - n1bv_9.c n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c \ - n1bv_15.c n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c \ - n1bv_25.c n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c \ - n2fv_12.c n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c \ + n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_256.c n1fv_512.c \ + n1fv_20.c n1fv_25.c n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c \ + n1bv_6.c n1bv_7.c n1bv_8.c n1bv_9.c n1bv_10.c n1bv_11.c \ + n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c n1bv_16.c n1bv_32.c \ + n1bv_64.c n1bv_128.c n1bv_20.c n1bv_25.c n2fv_2.c n2fv_4.c \ + n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c n2fv_14.c n2fv_16.c \ + n2fv_32.c n2fv_64.c n2fv_128.c n2fv_256.c n2fv_512.c n2fv_20.c \ n2bv_2.c n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c \ n2bv_14.c n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c n2sv_4.c \ n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c t1fuv_2.c t1fuv_3.c \ t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c t1fuv_8.c t1fuv_9.c \ t1fuv_10.c t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c \ t1fv_7.c t1fv_8.c t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c \ - t1fv_16.c t1fv_32.c t1fv_64.c t1fv_20.c t1fv_25.c t2fv_2.c \ - t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c t2fv_5.c \ - t2fv_10.c t2fv_20.c t2fv_25.c t3fv_4.c t3fv_8.c t3fv_16.c \ - t3fv_32.c t3fv_5.c t3fv_10.c t3fv_20.c t3fv_25.c t1buv_2.c \ + t1fv_16.c t1fv_32.c t1fv_64.c t1fv_20.c t1fv_25.c t1fv_128.c \ + t1fv_256.c t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c \ + t2fv_64.c t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c t2fv_128.c \ + t2fv_256.c t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c \ + t3fv_10.c t3fv_20.c t3fv_25.c t3fv_128.c t3fv_64.c t1buv_2.c \ t1buv_3.c t1buv_4.c t1buv_5.c t1buv_6.c t1buv_7.c t1buv_8.c \ t1buv_9.c t1buv_10.c t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c \ t1bv_6.c t1bv_7.c t1bv_8.c t1bv_9.c t1bv_10.c t1bv_12.c \ @@ -150,18 +152,20 @@ am__libdft_generic_simd256_codelets_la_SOURCES_DIST = n1fv_2.c \ t3bv_16.c t3bv_32.c t3bv_5.c t3bv_10.c t3bv_20.c t3bv_25.c \ t1sv_2.c t1sv_4.c t1sv_8.c t1sv_16.c t1sv_32.c t2sv_4.c \ t2sv_8.c t2sv_16.c t2sv_32.c q1fv_2.c q1fv_4.c q1fv_5.c \ - q1fv_8.c q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c genus.c codlist.c + q1fv_8.c q1fv_10.c q1fv_12.c q1fv_16.c q1fv_20.c q1fv_32.c \ + q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c genus.c codlist.c am__objects_1 = n1fv_2.lo n1fv_3.lo n1fv_4.lo n1fv_5.lo n1fv_6.lo \ n1fv_7.lo n1fv_8.lo n1fv_9.lo n1fv_10.lo n1fv_11.lo n1fv_12.lo \ n1fv_13.lo n1fv_14.lo n1fv_15.lo n1fv_16.lo n1fv_32.lo \ - n1fv_64.lo n1fv_128.lo n1fv_20.lo n1fv_25.lo + n1fv_64.lo n1fv_128.lo n1fv_256.lo n1fv_512.lo n1fv_20.lo \ + n1fv_25.lo am__objects_2 = n1bv_2.lo n1bv_3.lo n1bv_4.lo n1bv_5.lo n1bv_6.lo \ n1bv_7.lo n1bv_8.lo n1bv_9.lo n1bv_10.lo n1bv_11.lo n1bv_12.lo \ n1bv_13.lo n1bv_14.lo n1bv_15.lo n1bv_16.lo n1bv_32.lo \ n1bv_64.lo n1bv_128.lo n1bv_20.lo n1bv_25.lo am__objects_3 = n2fv_2.lo n2fv_4.lo n2fv_6.lo n2fv_8.lo n2fv_10.lo \ n2fv_12.lo n2fv_14.lo n2fv_16.lo n2fv_32.lo n2fv_64.lo \ - n2fv_20.lo + n2fv_128.lo n2fv_256.lo n2fv_512.lo n2fv_20.lo am__objects_4 = n2bv_2.lo n2bv_4.lo n2bv_6.lo n2bv_8.lo n2bv_10.lo \ n2bv_12.lo n2bv_14.lo n2bv_16.lo n2bv_32.lo n2bv_64.lo \ n2bv_20.lo @@ -170,11 +174,13 @@ am__objects_6 = t1fuv_2.lo t1fuv_3.lo t1fuv_4.lo t1fuv_5.lo t1fuv_6.lo \ t1fuv_7.lo t1fuv_8.lo t1fuv_9.lo t1fuv_10.lo am__objects_7 = t1fv_2.lo t1fv_3.lo t1fv_4.lo t1fv_5.lo t1fv_6.lo \ t1fv_7.lo t1fv_8.lo t1fv_9.lo t1fv_10.lo t1fv_12.lo t1fv_15.lo \ - t1fv_16.lo t1fv_32.lo t1fv_64.lo t1fv_20.lo t1fv_25.lo + t1fv_16.lo t1fv_32.lo t1fv_64.lo t1fv_20.lo t1fv_25.lo \ + t1fv_128.lo t1fv_256.lo am__objects_8 = t2fv_2.lo t2fv_4.lo t2fv_8.lo t2fv_16.lo t2fv_32.lo \ - t2fv_64.lo t2fv_5.lo t2fv_10.lo t2fv_20.lo t2fv_25.lo + t2fv_64.lo t2fv_5.lo t2fv_10.lo t2fv_20.lo t2fv_25.lo \ + t2fv_128.lo t2fv_256.lo am__objects_9 = t3fv_4.lo t3fv_8.lo t3fv_16.lo t3fv_32.lo t3fv_5.lo \ - t3fv_10.lo t3fv_20.lo t3fv_25.lo + t3fv_10.lo t3fv_20.lo t3fv_25.lo t3fv_128.lo t3fv_64.lo am__objects_10 = t1buv_2.lo t1buv_3.lo t1buv_4.lo t1buv_5.lo \ t1buv_6.lo t1buv_7.lo t1buv_8.lo t1buv_9.lo t1buv_10.lo am__objects_11 = t1bv_2.lo t1bv_3.lo t1bv_4.lo t1bv_5.lo t1bv_6.lo \ @@ -186,7 +192,8 @@ am__objects_13 = t3bv_4.lo t3bv_8.lo t3bv_16.lo t3bv_32.lo t3bv_5.lo \ t3bv_10.lo t3bv_20.lo t3bv_25.lo am__objects_14 = t1sv_2.lo t1sv_4.lo t1sv_8.lo t1sv_16.lo t1sv_32.lo am__objects_15 = t2sv_4.lo t2sv_8.lo t2sv_16.lo t2sv_32.lo -am__objects_16 = q1fv_2.lo q1fv_4.lo q1fv_5.lo q1fv_8.lo +am__objects_16 = q1fv_2.lo q1fv_4.lo q1fv_5.lo q1fv_8.lo q1fv_10.lo \ + q1fv_12.lo q1fv_16.lo q1fv_20.lo q1fv_32.lo am__objects_17 = q1bv_2.lo q1bv_4.lo q1bv_5.lo q1bv_8.lo am__objects_18 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ $(am__objects_4) $(am__objects_5) $(am__objects_6) \ @@ -427,12 +434,12 @@ SIMD_HEADER = simd-support/simd-generic256.h # n1fv_ is a hard-coded FFTW_FORWARD FFT of size , using SIMD N1F = n1fv_2.c n1fv_3.c n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c \ n1fv_9.c n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \ -n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c +n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_256.c n1fv_512.c n1fv_20.c n1fv_25.c # as above, with restricted input vector stride N2F = n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \ -n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c +n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_128.c n2fv_256.c n2fv_512.c n2fv_20.c # as above, but FFTW_BACKWARD @@ -452,15 +459,15 @@ N2S = n2sv_4.c n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c # for an FFTW_FORWARD transform, using SIMD T1F = t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \ t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c t1fv_64.c \ -t1fv_20.c t1fv_25.c +t1fv_20.c t1fv_25.c t1fv_128.c t1fv_256.c # same as t1fv_*, but with different twiddle storage scheme T2F = t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c \ -t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c +t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c t2fv_128.c t2fv_256.c T3F = t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c t3fv_10.c \ -t3fv_20.c t3fv_25.c +t3fv_20.c t3fv_25.c t3fv_128.c t3fv_64.c T1FU = t1fuv_2.c t1fuv_3.c t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c \ t1fuv_8.c t1fuv_9.c t1fuv_10.c @@ -493,7 +500,7 @@ T2S = t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c # in-place transposes in sizes that are divisible by ^2. These # codelets have size ~ ^2, so you should probably not use # bigger than 8 or so. -Q1F = q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c +Q1F = q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c q1fv_10.c q1fv_12.c q1fv_16.c q1fv_20.c q1fv_32.c # as above, but FFTW_BACKWARD Q1B = q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c @@ -596,10 +603,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_20.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_256.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_3.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_512.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_6.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_7.Plo@am__quote@ @@ -618,12 +627,15 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_128.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_14.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_256.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_512.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_6.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_8.Plo@am__quote@ @@ -636,7 +648,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_5.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_5.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_8.Plo@am__quote@ @@ -676,11 +693,13 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_9.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_128.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_15.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_20.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_256.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_3.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_4.Plo@am__quote@ @@ -706,10 +725,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_128.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_20.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_256.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_5.Plo@am__quote@ @@ -728,12 +749,14 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_5.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_128.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_20.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_25.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_8.Plo@am__quote@ .c.o: diff --git a/dft/simd/generic-simd256/n1fv_256.c b/dft/simd/generic-simd256/n1fv_256.c new file mode 100644 index 00000000..afd463f4 --- /dev/null +++ b/dft/simd/generic-simd256/n1fv_256.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/n1fv_256.c" diff --git a/dft/simd/generic-simd256/n1fv_512.c b/dft/simd/generic-simd256/n1fv_512.c new file mode 100644 index 00000000..f2327fcd --- /dev/null +++ b/dft/simd/generic-simd256/n1fv_512.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/n1fv_512.c" diff --git a/dft/simd/generic-simd256/n2fv_128.c b/dft/simd/generic-simd256/n2fv_128.c new file mode 100644 index 00000000..ed17bd7d --- /dev/null +++ b/dft/simd/generic-simd256/n2fv_128.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/n2fv_128.c" diff --git a/dft/simd/generic-simd256/n2fv_256.c b/dft/simd/generic-simd256/n2fv_256.c new file mode 100644 index 00000000..80483b6f --- /dev/null +++ b/dft/simd/generic-simd256/n2fv_256.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/n2fv_256.c" diff --git a/dft/simd/generic-simd256/n2fv_512.c b/dft/simd/generic-simd256/n2fv_512.c new file mode 100644 index 00000000..ad684e4a --- /dev/null +++ b/dft/simd/generic-simd256/n2fv_512.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/n2fv_512.c" diff --git a/dft/simd/generic-simd256/q1fv_10.c b/dft/simd/generic-simd256/q1fv_10.c new file mode 100644 index 00000000..7ad5e5dd --- /dev/null +++ b/dft/simd/generic-simd256/q1fv_10.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/q1fv_10.c" diff --git a/dft/simd/generic-simd256/q1fv_12.c b/dft/simd/generic-simd256/q1fv_12.c new file mode 100644 index 00000000..00070d73 --- /dev/null +++ b/dft/simd/generic-simd256/q1fv_12.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/q1fv_12.c" diff --git a/dft/simd/generic-simd256/q1fv_16.c b/dft/simd/generic-simd256/q1fv_16.c new file mode 100644 index 00000000..ff8c27dd --- /dev/null +++ b/dft/simd/generic-simd256/q1fv_16.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/q1fv_16.c" diff --git a/dft/simd/generic-simd256/q1fv_20.c b/dft/simd/generic-simd256/q1fv_20.c new file mode 100644 index 00000000..883a73c8 --- /dev/null +++ b/dft/simd/generic-simd256/q1fv_20.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/q1fv_20.c" diff --git a/dft/simd/generic-simd256/q1fv_32.c b/dft/simd/generic-simd256/q1fv_32.c new file mode 100644 index 00000000..a2d5f760 --- /dev/null +++ b/dft/simd/generic-simd256/q1fv_32.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/q1fv_32.c" diff --git a/dft/simd/generic-simd256/t1fv_128.c b/dft/simd/generic-simd256/t1fv_128.c new file mode 100644 index 00000000..cdab4552 --- /dev/null +++ b/dft/simd/generic-simd256/t1fv_128.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/t1fv_128.c" diff --git a/dft/simd/generic-simd256/t1fv_256.c b/dft/simd/generic-simd256/t1fv_256.c new file mode 100644 index 00000000..c4400cb7 --- /dev/null +++ b/dft/simd/generic-simd256/t1fv_256.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/t1fv_256.c" diff --git a/dft/simd/generic-simd256/t2fv_128.c b/dft/simd/generic-simd256/t2fv_128.c new file mode 100644 index 00000000..58848fcb --- /dev/null +++ b/dft/simd/generic-simd256/t2fv_128.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/t2fv_128.c" diff --git a/dft/simd/generic-simd256/t2fv_256.c b/dft/simd/generic-simd256/t2fv_256.c new file mode 100644 index 00000000..d6d55c5d --- /dev/null +++ b/dft/simd/generic-simd256/t2fv_256.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/t2fv_256.c" diff --git a/dft/simd/generic-simd256/t3fv_128.c b/dft/simd/generic-simd256/t3fv_128.c new file mode 100644 index 00000000..d383b967 --- /dev/null +++ b/dft/simd/generic-simd256/t3fv_128.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/t3fv_128.c" diff --git a/dft/simd/generic-simd256/t3fv_64.c b/dft/simd/generic-simd256/t3fv_64.c new file mode 100644 index 00000000..62d19166 --- /dev/null +++ b/dft/simd/generic-simd256/t3fv_64.c @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-support/simd-generic256.h" +#include "../common/t3fv_64.c" diff --git a/kernel/cycle.h b/kernel/cycle.h index 16dfdc98..16a23a9a 100644 --- a/kernel/cycle.h +++ b/kernel/cycle.h @@ -515,7 +515,7 @@ INLINE_ELAPSED(inline) #endif #endif /* HAVE_MIPS_ZBUS_TIMER */ -#if defined(HAVE_ARMV7A_CNTVCT) +#if defined(HAVE_ARMV7A_CNTVCT) && defined(__aarch64__) typedef uint64_t ticks; static inline ticks getticks(void) { @@ -527,7 +527,7 @@ INLINE_ELAPSED(inline) #define HAVE_TICK_COUNTER #endif -#if defined(HAVE_ARMV7A_PMCCNTR) +#if defined(HAVE_ARMV7A_PMCCNTR) && defined(__aarch64__) typedef uint64_t ticks; static inline ticks getticks(void) { From f41f2f2583acde8705f5070286ee089c9060d7be Mon Sep 17 00:00:00 2001 From: Rahul Raj Date: Fri, 23 Aug 2024 21:06:38 +0530 Subject: [PATCH 6/9] Code changes to add support for building with znver5 on Windows and README update with the information on znver5. Signed-off-by: Rahul AMD-Internal: [CPUPL-5664] Change-Id: I8515660c82ca9c4232f23bbdf9553760171c209e (cherry picked from commit a88a929f6f398564e3d679b4ee6f948aed016feb) --- CMakeLists.txt | 6 ++++-- README_AMD.md | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7aabf10a..9ea9fcbd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ SET(AMD_ARCH "znver1" CACHE STRING "select AMD zen version for Clang toolchain") if (CMAKE_C_COMPILER_ID MATCHES Clang) if ("${AMD_ARCH}" STREQUAL "") - message(FATAL_ERROR "Machine arch missing! Select one of znver1, znver2, znver3 or znver4") + message(FATAL_ERROR "Machine arch missing! Select one of znver1, znver2, znver3, znver4 or znver5") elseif (${AMD_ARCH} STREQUAL "znver1") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=znver1") elseif (${AMD_ARCH} STREQUAL "znver2") @@ -23,8 +23,10 @@ if (CMAKE_C_COMPILER_ID MATCHES Clang) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=znver3") elseif (${AMD_ARCH} STREQUAL "znver4") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=znver4") + elseif (${AMD_ARCH} STREQUAL "znver5") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=znver5") else () - message(FATAL_ERROR "Unsupported Machine arch! Select one of znver1, znver2, znver3 or znver4") + message(FATAL_ERROR "Unsupported Machine arch! Select one of znver1, znver2, znver3, znver4 or znver5") endif () set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-prfchw") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mno-prfchw") diff --git a/README_AMD.md b/README_AMD.md index 0a8d81a3..da17fb97 100644 --- a/README_AMD.md +++ b/README_AMD.md @@ -76,7 +76,7 @@ It is supported in single-threaded execution in single and double precisions. An optional configure option "AMD_ARCH" is supported that can be set to CPU architecture values like "auto" or "znver1" or "znver2" or "znver3" or "znver4" -for AMD EPYC processors. +or "znver5" for AMD EPYC processors. The optional configure option "--enable-amd-app-opt" turns on AMD's application optimization layer to benefit performance of HPC and scientific applications. From 7397ba62340a45d6c7a719ace96a61c230251d50 Mon Sep 17 00:00:00 2001 From: rahulraj Date: Tue, 27 Aug 2024 15:20:14 +0530 Subject: [PATCH 7/9] Code changes to update Minimum CMake version to 3.22. Signed-off-by: Rahul AMD-Internal: [CPUPL-5675] Change-Id: Ib8aa2d5b015f39067a35f516430899967ec6b069 (cherry picked from commit a2a625bdd8aa85e9ef7387564fbbfce52a83b696) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ea9fcbd..28f25423 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 3.15) +cmake_minimum_required (VERSION 3.22) if (NOT DEFINED CMAKE_BUILD_TYPE) set (CMAKE_BUILD_TYPE Release CACHE STRING "Build type") From d0e7688a3aeef8c8b480406e22a2dcee56574940 Mon Sep 17 00:00:00 2001 From: Rahul Raj Date: Thu, 19 Sep 2024 11:13:36 +0530 Subject: [PATCH 8/9] AOCL-FFTW version bumped to 5.0.0 and Copyright year update Signed-off-by: Rahul AMD-Internal: [CPUPL-5811] Change-Id: I60534ba61a97472f7a5d15c23bc620a755ad25cf --- CMakeLists.txt | 2 +- COPYRIGHT | 2 +- configure | 2 +- configure.ac | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28f25423..ac668c90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -254,7 +254,7 @@ if (MSVC) endif(MSVC) string(TIMESTAMP TODAY "%Y%m%d") -add_compile_definitions(AOCL_FFTW_VERSION="AOCL-FFTW 4.2.1 Build ${TODAY}") +add_compile_definitions(AOCL_FFTW_VERSION="AOCL-FFTW 5.0.0 Build ${TODAY}") find_library (LIBM_LIBRARY NAMES m) if (LIBM_LIBRARY) diff --git a/COPYRIGHT b/COPYRIGHT index 0656ac69..346e0d65 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2007-14 Matteo Frigo * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology - * Copyright (C) 2019-2023, Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (C) 2019-2024, Advanced Micro Devices, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/configure b/configure index d8381407..61a3e2e6 100755 --- a/configure +++ b/configure @@ -18385,7 +18385,7 @@ fi dateVar=`date +"%Y%m%d"` -printf "%s\n" "#define AOCL_FFTW_VERSION \"AOCL-FFTW 4.2.1 Build $dateVar\"" >>confdefs.h +printf "%s\n" "#define AOCL_FFTW_VERSION \"AOCL-FFTW 5.0.0 Build $dateVar\"" >>confdefs.h # Check whether --enable-amd-fast-planner was given. if test ${enable_amd_fast_planner+y} diff --git a/configure.ac b/configure.ac index d6b228d3..85560eb6 100644 --- a/configure.ac +++ b/configure.ac @@ -790,7 +790,7 @@ AC_ARG_ENABLE(openmp, [AC_HELP_STRING([--enable-openmp],[use OpenMP directives f AC_ARG_ENABLE(threads, [AC_HELP_STRING([--enable-threads],[compile FFTW SMP threads library])], enable_threads=$enableval, enable_threads=no) dnl aocl version number of amd-fftw dateVar=`date +"%Y%m%d"` -AC_DEFINE_UNQUOTED(AOCL_FFTW_VERSION,"AOCL-FFTW 4.2.1 Build $dateVar",[AOCL Version of AMD-FFTW]) +AC_DEFINE_UNQUOTED(AOCL_FFTW_VERSION,"AOCL-FFTW 5.0.0 Build $dateVar",[AOCL Version of AMD-FFTW]) dnl amd optimization switch to enable AMD Fast Planner for AMD cpus --enable-amd-fast-planner AC_ARG_ENABLE(amd-fast-planner, [AC_HELP_STRING([--enable-amd-fast-planner],[enable AMD Fast Planner for a faster planning time on AMD cpus])], have_amd_fast_planner=$enableval, have_amd_fast_planner=no) dnl amd optimization switch to enable AMD Top N Planner for AMD cpus --enable-amd-top-n-planner From 0e8e2a13df2acc10fb666badc698e622fc6cf960 Mon Sep 17 00:00:00 2001 From: rahulraj Date: Thu, 10 Oct 2024 12:55:07 +0530 Subject: [PATCH 9/9] AOCL-FFTW: License File Update Signed-off-by: Rahul AMD-Internal: [CPUPL-5878] Change-Id: I08ab4bfe731457234b757712a4b147399c5e6d92 (cherry picked from commit b5e0d943f4c558dac74d0a97ff39585d7d1bf585) --- LICENSE | 468 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 468 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..544d51ce --- /dev/null +++ b/LICENSE @@ -0,0 +1,468 @@ +This summary and the license information provided below is for reference purposes and is not intended +to be a comprehensive list of all copyright notices or license terms and conditions applicable to FFTW +Library. Please refer to the source code files in FFTW Library for all copyrights and licenses. With +respect to any license that requires AMD to make available to recipients of object code distributed by +AMD pursuant to such license the corresponding source code, and if you desire to receive such source +code from AMD, please refer to the URL provided below corresponding to the library. If you cannot +access the internet to obtain a copy thereof, then AMD hereby offers (which offer is valid for as long as +required by the applicable license; and we may charge you the cost thereof unless prohibited by the +license) to provide you with a copy of such source code; and to accept such offer send a letter +requesting such source code (please be specific by identifying the particular AOCL library you are +inquiring about (name and version number), to: Advanced Micro Devices, Inc., Legal Department, +Attention: Software Compliance Officer, 2485 Augustine Drive, Santa Clara, CA U.S.A. 95054. + +Source code for the FFTW library is available at https://github.com/amd/amd-fftw + +Configuration scripts Generated by automake v-u (GPL-3.0-with-autoconf-exception) + +Copyright Statements + +Copyright 1992-2016 Free Software Foundation, Inc. + +License Text http://spdx.org/licenses/GPL-3.0-with-autoconf-exception.html + +Configuration validation subroutine script.timestamp='2016-11-04' + +This file is free software; you can redistribute it and/or modify it under the terms of the GNU General +Public License as published by the Free Software Foundation; either version 3 of the License, or (at your +option) any later version.This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program; if not, see +. + +As a special exception to the GNU General Public License, if you distribute this file as part of a program +that contains a configuration script generated by Autoconf, you may include it under the same +distribution terms that you use for the rest of that program. This Exception is an additional permission +under section 7 of the GNU General Public License, version 3 ("GPLv3"). + +Please send patches to . + +Configuration subroutine to validate and canonicalize a configuration type. Supply the specified +configuration type as an argument. If it is invalid, we print an error message on stderr and exit with code +1.Otherwise, we print the canonical config type on stdout and succeed. + +You can get the latest version of this script +from:http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub + +This file is supposed to be the same for all GNU packages and recognize all the CPU types, system types +and aliases that are meaningful with *any* GNU software. Each package is responsible for reporting +which valid configurations it does not support. The user should be able to distinguish a failure to +support a valid configuration from a meaningless configuration. + +The goal of this file is to map all the various variations of a given machine specification into a single +specification in the form:CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM + +or in some cases, the newer four-part form:CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM. It +is wrong to echo any other type of specification. + + + +fftw v3.3.10 (BSD-2-clause) + +Copyright Statements + +Copyright (c) 2003, 2007-14 Matteo Frigo + +Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + +License Text http://spdx.org/licenses/BSD-2-Clause + +Redistribution and use in source and binary forms, with or without modification, are permitted provided +that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list of conditions and the +following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +fftw-fftw3 v3.3.8 (GPL-2.0) + +Copyright Statements + +Copyright (c) 2001 Matteo Frigo + +Copyright (c) 2001 Massachusetts Institute of Technology + +Copyright (C) 2019, Advanced Micro Devices, Inc. All Rights Reserved. + +License Text http://spdx.org/licenses/GPL-2.0.html + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. + +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + +Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is +not allowed. + +Preamble + +The licenses for most software are designed to take away your freedom to share and change it. By +contrast, the GNU General Public License is intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. This General Public License applies to +most of the Free Software Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public +License instead.) You can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are +designed to make sure that you have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it if you want it, that you can change the +software or use pieces of it in new free programs; and that you know you can do these things. + +To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to +ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the +recipients all the rights that you have. You must make sure that they, too, receive or can get the source +code. And you must show them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which +gives you legal permission to copy, distribute and/or modify the software. + +Also, for each author's protection and ours, we want to make certain that everyone understands that +there is no warranty for this free software. If the software is modified by someone else and passed on, +we want its recipients to know that what they have is not the original, so that any problems introduced +by others will not reflect on the original authors' reputations. + +Finally, any free program is threatened constantly by software patents. We wish to avoid the danger +that redistributors of a free program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any patent must be licensed for +everyone's free use or not licensed at all. + +The precise terms and conditions for copying, distribution and modification follow. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License applies to any program or other work which contains a notice placed by the copyright +holder saying it may be distributed under the terms of this General Public License. The "Program", +below, refers to any such program or work, and a "work based on the Program" means either the +Program or any derivative work under copyright law: that is to say, a work containing the Program or a +portion of it, either verbatim or with modifications and/or translated into another language. +(Hereinafter, translation is included without limitation in the term "modification".) Each licensee is +addressed as "you". + +Activities other than copying, distribution and modification are not covered by this License; they are +outside its scope. The act of running the Program is not restricted, and the output from the Program is +covered only if its contents constitute a work based on the Program (independent of having been made +by running the Program). Whether that is true depends on what the Program does. + +1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any +medium, provided that you conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to +the absence of any warranty; and give any other recipients of the Program a copy of this License along +with the Program. + +You may charge a fee for the physical act of transferring a copy, and you may at your option offer +warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on +the Program, and copy and distribute such modifications or work under the terms of Section 1 above, +provided that you also meet all of these conditions: + +a) You must cause the modified files to carry prominent notices stating that you changed the files and +the date of any change. + +b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived +from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under +the terms of this License. + +c) If the modified program normally reads commands interactively when run, you must cause it, when +started running for such interactive use in the most ordinary way, to print or display an announcement +including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you +provide a warranty) and that users may redistribute the program under these conditions, and telling the +user how to view a copy of this License. (Exception: if the Program itself is interactive but does not +normally print such an announcement, your work based on the Program is not required to print an +announcement.) + +These requirements apply to the modified work as a whole. If identifiable sections of that work are not +derived from the Program, and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those sections when you distribute them as +separate works. But when you distribute the same sections as part of a whole which is a work based on +the Program, the distribution of the whole must be on the terms of this License, whose permissions for +other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by +you; rather, the intent is to exercise the right to control the distribution of derivative or collective works +based on the Program. + +In addition, mere aggregation of another work not based on the Program with the Program (or with a +work based on the Program) on a volume of a storage or distribution medium does not bring the other +work under the scope of this License. + +3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or +executable form under the terms of Sections 1 and 2 above provided that you also do one of the +following: + +a) Accompany it with the complete corresponding machine-readable source code, which must be +distributed under the terms of Sections 1 and 2 above on a medium customarily used for software +interchange; or, + +b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge +no more than your cost of physically performing source distribution, a complete machine-readable copy +of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange; or, + +c) Accompany it with the information you received as to the offer to distribute corresponding source +code. (This alternative is allowed only for noncommercial distribution and only if you received the +program in object code or executable form with such an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for making modifications to it. For an +executable work, complete source code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to control compilation and installation of the +executable. However, as a special exception, the source code distributed need not include anything that +is normally distributed (in either source or binary form) with the major components (compiler, kernel, +and so on) of the operating system on which the executable runs, unless that component itself +accompanies the executable. + +If distribution of executable or object code is made by offering access to copy from a designated place, +then offering equivalent access to copy the source code from the same place counts as distribution of +the source code, even though third parties are not compelled to copy the source along with the object +code. + +4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under +this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and +will automatically terminate your rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses terminated so long as such parties +remain in full compliance. + +5. You are not required to accept this License, since you have not signed it. However, nothing else grants +you permission to modify or distribute the Program or its derivative works. These actions are prohibited +by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any +work based on the Program), you indicate your acceptance of this License to do so, and all its terms and +conditions for copying, distributing or modifying the Program or works based on it. + +6. Each time you redistribute the Program (or any work based on the Program), the recipient +automatically receives a license from the original licensor to copy, distribute or modify the Program +subject to these terms and conditions. You may not impose any further restrictions on the recipients' +exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties +to this License. + +7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason +(not limited to patent issues), conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of +this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License +and any other pertinent obligations, then as a consequence you may not distribute the Program at all. +For example, if a patent license would not permit royalty-free redistribution of the Program by all those +who receive copies directly or indirectly through you, then the only way you could satisfy both it and this +License would be to refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under any particular circumstance, the +balance of the section is intended to apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any patents or other property right claims +or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of +the free software distribution system, which is implemented by public license practices. Many people +have made generous contributions to the wide range of software distributed through that system in +reliance on consistent application of that system; it is up to the author/donor to decide if he or she is +willing to distribute software through any other system and a licensee cannot impose that choice. + +This section is intended to make thoroughly clear what is believed to be a consequence of the rest of +this License. + +8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by +copyrighted interfaces, the original copyright holder who places the Program under this License may add +an explicit geographical distribution limitation excluding those countries, so that distribution is +permitted only in or among countries not thus excluded. In such case, this License incorporates the +limitation as if written in the body of this License. + +9. The Free Software Foundation may publish revised and/or new versions of the General Public License +from time to time. Such new versions will be similar in spirit to the present version, but may differ in +detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program specifies a version number of this +License which applies to it and "any later version", you have the option of following the terms and +conditions either of that version or of any later version published by the Free Software Foundation. If +the Program does not specify a version number of this License, you may choose any version ever +published by the Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs whose distribution +conditions are different, write to the author to ask for permission. For software which is copyrighted by +the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions +for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of +our free software and of promoting the sharing and reuse of software generally. + +NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE +PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN +WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT +WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE +RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM +PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY +COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM +AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, +INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE +OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH +ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest possible use to the public, the best +way to achieve this is to make it free software which everyone can redistribute and change under these +terms. + +To do so, attach the following notices to the program. It is safest to attach them to the start of each +source file to most effectively convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + +one line to give the program's name and an idea of what it does. + +Copyright (C) yyyy name of author + +This program is free software; you can redistribute it and/or modify it under the terms of the GNU +General Public License as published by the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without +even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program; if not, write +to the Free Software + +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when it starts in an interactive mode: + +Gnomovision version 69, Copyright (C) year name of author + +Gnomovision comes with ABSOLUTELY NO WARRANTY; for details + +type `show w'. This is free software, and you are welcome + +to redistribute it under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate parts of the General +Public License. Of course, the commands you use may be called something other than `show w' and +`show c'; they could even be mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your school, if any, to sign a +"copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright + +interest in the program `Gnomovision' + +(which makes passes at compilers) written + +by James Hacker. + +signature of Ty Coon, 1 April 1989 + +Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into proprietary programs. If +your program is a subroutine library, you may consider it more useful to permit linking proprietary +applications with the library. If this is what you want to do, use the GNU Lesser General Public License +instead of this License. + + + +gnu-libtool v2.4.6 (GNU w/libtool exception) + +Copyright Statements + +Copyright (C) 1996-2015 Free Software Foundation, Inc. + +License Text + +Provide generalized library-building support services. Written by Gordon Matzigkeit +gord@gnu.ai.mit.edu, 1996 + +This is free software; see the source for copying conditions. There is NO warranty; not even for +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +GNU Libtool is free software; you can redistribute it and/or modify it under the terms of the GNU +General Public License as published by the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +As a special exception to the GNU General Public License, if you distribute this file as part of a program +or library that is built using GNU Libtool, you may include this file under the +same distribution terms that you use for the rest of that program.GNU Libtool is distributed in the hope +that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with this program. If not, see +http://www.gnu.org/licenses/. + + + +texinfo v2013-02-01.11 (GPL 3 w/tex exception) + +Copyright Statements + +Copyright 1985, 1986, 1988, 1990, 1991, 1992, 1993, 1994, 1995,1996, 1997, 1998, 1999, 2000, 2001, +2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. + +License Text + +This texinfo.tex file is free software: you can redistribute it and/or modify it under the terms of the GNU +General Public License as published by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This texinfo.tex file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See +the GNU General Public License for more details.You should have received a copy of the GNU General +Public License along with this program. If not, see . + +As a special exception, when this file is read by TeX when processing a Texinfo source document, you +may use the result without restriction. This Exception is an additional permission under section 7 of the +GNU General Public License, version 3 ("GPLv3"). + +Please try the latest version of texinfo.tex before submitting bug reports; you can get the latest version +from: http://ftp.gnu.org/gnu/texinfo/ (the Texinfo release area), or + +http://ftpmirror.gnu.org/texinfo/ (same, via a mirror), or http://www.gnu.org/software/texinfo/ (the +Texinfo home page) The texinfo.tex in any given distribution could well be out + +of date, so if that's what you're using, please check. + +Send bug reports to bug-texinfo@gnu.org. Please include including a complete document in each bug +report with which we can reproduce the problem. Patches are, of course, greatly appreciated. + +To process a Texinfo manual with TeX, it's most reliable to use the texi2dvi shell script that comes with +the distribution. For a simple manual foo.texi, however, you can get away with this: + +tex foo.texi + +texindex foo.?? + +tex foo.texi + +tex foo.texi + +dvips foo.dvi -o # or whatever; this makes foo.ps. + +The extra TeX runs get the cross-reference information correct. Sometimes one run after texindex +suffices, and sometimes you need more than two; texi2dvi does it as many times as necessary. + +It is possible to adapt texinfo.tex for other languages, to some extent. You can get the existing +language-specific files from the full Texinfo distribution. + +The GNU Texinfo home page is http://www.gnu.org/software/texinfo. \ No newline at end of file