Skip to content

Commit

Permalink
Fix merge conflicts in Readme and configure files
Browse files Browse the repository at this point in the history
  • Loading branch information
prangana committed Jul 6, 2021
2 parents 94199c3 + 7147670 commit 764197a
Show file tree
Hide file tree
Showing 34 changed files with 3,137 additions and 179 deletions.
444 changes: 337 additions & 107 deletions CMakeLists.txt

Large diffs are not rendered by default.

11 changes: 9 additions & 2 deletions README_AMD.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ functions (cpy2d and cpy2d_pair used in rank-0 transform and buffering plan),
improved 256-bit kernels selection by Planner and an optional in-place
transpose for large problem sizes. AMD Optimized FFTW improves the performance
of in-place MPI FFT over FFTW 3.3.8 by employing a faster in-place MPI
transpose function. As of AMD FFTW 3.0, a new fast planner is added as an
transpose function. AMD Optimized FFTW provides a new fast planner as an
extension to the original planner that improves planning time of various
planning modes in general and PATIENT mode in particular.
planning modes in general and PATIENT mode in particular. As of AMD FFTW 3.0.1,
a new feature called Top N planner is introduced that minimizes single-threaded
run-to-run variations.

FFTW is a free collection of fast C routines for computing the
Discrete Fourier Transform and various special cases thereof in one or more
Expand Down Expand Up @@ -53,6 +55,11 @@ configure option "--enable-generic-simd128" or "--enable-generic-simd256".
The optional configure option "--enable-amd-mpifft" enables the MPI FFT
related optimizations.

An optional configure option "--enable-amd-mpi-vader-limit" is supported that
controls enabling of AMD's new MPI transpose algorithms. When using this
configure option, the user needs to set --mca btl_vader_eager_limit
appropriately (current preference is 65536) in the MPIRUN command.

The new fast planner can be enabled using optional configure option
"--enable-amd-fast-planner". It is supported for single and double precisions.

Expand Down
53 changes: 52 additions & 1 deletion api/apiplan.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
* Copyright (C) 2019, Advanced Micro Devices, Inc. All Rights Reserved.
* Copyright (C) 2019-2021, Advanced Micro Devices, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -29,10 +29,60 @@ void X(set_planner_hooks)(planner_hook_t before, planner_hook_t after)
after_planner_hook = after;
}

#ifdef AMD_TOP_N_PLANNER
plan *plans[AMD_OPT_TOP_N];
static int find_lowcost_plan()
{
int i, lowcost, lowcost_id;
lowcost = plans[0]->pcost;
lowcost_id = 0;

for (i = 1; i < AMD_OPT_TOP_N; i++) {
if (plans[i]->pcost < lowcost) {
lowcost = plans[i]->pcost;
lowcost_id = i;
}
}
return lowcost_id;
}
#endif

static plan *mkplan0(planner *plnr, unsigned flags,
const problem *prb, unsigned hash_info,
wisdom_state_t wisdom_state)
{
#ifdef AMD_TOP_N_PLANNER
static int lowcost_idx; /* to hold the index of the plan which has the least pcost among the top N plans*/
/* map API flags into FFTW flags */
X(mapflags)(plnr, flags);

plnr->flags.hash_info = hash_info;
plnr->wisdom_state = wisdom_state;

/* create plan */

if (AMD_OPT_TOP_N > 1) {
if (wisp_set == 1) {
for (int pln_idx = 0; pln_idx < AMD_OPT_TOP_N ; pln_idx ++) {
plnr->index = pln_idx;
plans[pln_idx] = plnr->adt->mkplan(plnr, prb);
}
lowcost_idx = find_lowcost_plan(plans);
return plans[lowcost_idx];
}
else {
for (int pln_idx = 0; pln_idx < AMD_OPT_TOP_N ; pln_idx ++) {
plnr->index = pln_idx;
plans[pln_idx] = plnr->adt->mkplan(plnr, prb);
}
return plans[0];
}
}
else {
plnr->index = 0;
return plnr->adt->mkplan(plnr, prb);
}
#else
/* map API flags into FFTW flags */
X(mapflags)(plnr, flags);

Expand All @@ -41,6 +91,7 @@ static plan *mkplan0(planner *plnr, unsigned flags,

/* create plan */
return plnr->adt->mkplan(plnr, prb);
#endif
}

static unsigned force_estimator(unsigned flags)
Expand Down
10 changes: 8 additions & 2 deletions api/fftw3.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
* Copyright (C) 2020, Advanced Micro Devices, Inc. All Rights Reserved.
* Copyright (C) 2020-2021, Advanced Micro Devices, Inc. All Rights Reserved.
*
* The following statement of license applies *only* to this header file,
* and *not* to the other files distributed with FFTW or derived therefrom:
Expand Down Expand Up @@ -94,6 +94,11 @@ extern "C"
# define FFTW_CDECL
#endif

/* to avoid symbol conflict with MSVS SDK for 'complex' (Windows only) */
#if defined(_WIN32) || defined(_WIN64)
#undef complex
#endif

enum fftw_r2r_kind_do_not_use_me {
FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2,
FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6,
Expand Down Expand Up @@ -464,7 +469,8 @@ FFTW_DEFINE_API(FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex)
for gcc >= 4.6 (compiled in FFTW with --enable-quad-precision) */
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) || (__clang__ && __clang_major__ >= 10)) \
&& !(defined(__ICC) || defined(__INTEL_COMPILER) || defined(__CUDACC__) || defined(__PGI)) \
&& (defined(__i386__) || defined(__x86_64__) || defined(__ia64__))
&& (defined(__i386__) || defined(__x86_64__) || defined(__ia64__)) \
&& !(defined(_WIN32) || defined(_WIN64))
# if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
/* note: __float128 is a typedef, which is not supported with the _Complex
keyword in gcc, so instead we use this ugly __attribute__ version.
Expand Down
4 changes: 2 additions & 2 deletions cmake.config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
/* #undef FC_DUMMY_MAIN_EQ_F77 */

/* C compiler name and flags */
#define FFTW_CC "@CMAKE_C_COMPILER@"
#define FFTW_CC "@CMAKE_C_COMPILER_FLAGS@"

/* Define to enable extra FFTW debugging code. */
/* #undef FFTW_DEBUG */
Expand Down Expand Up @@ -197,7 +197,7 @@
/* #undef HAVE_MIPS_ZBUS_TIMER */

/* Define if you have the MPI library. */
/* #undef HAVE_MPI */
#cmakedefine HAVE_MPI

/* Define to enable ARM NEON optimizations. */
/* #undef HAVE_NEON */
Expand Down
6 changes: 6 additions & 0 deletions config.h.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
/* config.h.in. Generated from configure.ac by autoheader. */

/* Set VADER LIMIT in order to enable new AMD MPI transpose algorithms. */
#undef AMD_MPI_VADER_LIMIT_SET

/* Define to enable AMD cpu specific optimizations. */
#undef AMD_OPT_ALL

Expand All @@ -9,6 +12,9 @@
/* Define to enable AMD cpu optimized MPI FFT. */
#undef AMD_OPT_MPIFFT

/* Define to enable AMD Top N Planner for AMD cpus. */
#undef AMD_OPT_TOP_N_PLANNER

/* Define to enable AMD cpu optimized Transpose. */
#undef AMD_OPT_TRANS

Expand Down
89 changes: 68 additions & 21 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -875,13 +875,15 @@ with_sysroot
enable_libtool_lock
enable_mpi
enable_amd_opt
enable_amd_mpi_vader_limit
enable_amd_trans
enable_amd_mpifft
enable_openmp
enable_threads
enable_amd_fast_planner
enable_amd_top_n_planner
enable_fortran
with_g77_wrappers
enable_openmp
enable_threads
with_combined_threads
'
ac_precious_vars='build_alias
Expand Down Expand Up @@ -1585,14 +1587,20 @@ Optional Features:
--disable-libtool-lock avoid locking (might break parallel builds)
--enable-mpi compile FFTW MPI library
--enable-amd-opt enable AMD cpu specific optimizations
--enable-amd-mpi-vader-limit
enable setting of VADER LIMIT that controls enabling
of new AMD MPI transpose algorithms
--enable-amd-trans enable AMD cpu optimized Transpose
--enable-amd-mpifft enable AMD cpu optimized MPI FFT
--enable-openmp use OpenMP directives for parallelism
--enable-threads compile FFTW SMP threads library
--enable-amd-fast-planner
enable AMD Fast Planner for a faster planning time
on AMD cpus
--enable-amd-top-n-planner
enable AMD Top N Planner feature to search, save and
reuse top N plans using wisdom file on AMD cpus
--disable-fortran don't include Fortran-callable wrappers
--enable-openmp use OpenMP directives for parallelism
--enable-threads compile FFTW SMP threads library

Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
Expand Down Expand Up @@ -17559,6 +17567,20 @@ if test "$have_amd_opt" = yes && test "${enable_debug+set}" != "set"; then

$as_echo "#define AMD_OPT_ALL 1" >>confdefs.h


# Check whether --enable-amd-mpi-vader-limit was given.
if test "${enable_amd_mpi_vader_limit+set}" = set; then :
enableval=$enable_amd_mpi_vader_limit; have_amd_mpi_vl=$enableval
else
have_amd_mpi_vl=no
fi

if test "$have_amd_mpi_vl" = yes ; then

$as_echo "#define AMD_MPI_VADER_LIMIT_SET 1" >>confdefs.h

fi

fi
# Check whether --enable-amd-trans was given.
if test "${enable_amd_trans+set}" = set; then :
Expand All @@ -17584,8 +17606,22 @@ if test "$have_amd_mpifft" = yes; then
$as_echo "#define AMD_OPT_MPIFFT 1" >>confdefs.h

fi
# Check whether --enable-openmp was given.
if test "${enable_openmp+set}" = set; then :
enableval=$enable_openmp; enable_openmp=$enableval
else
enable_openmp=no
fi

# Check whether --enable-threads was given.
if test "${enable_threads+set}" = set; then :
enableval=$enable_threads; enable_threads=$enableval
else
enable_threads=no
fi

$as_echo "#define AOCL_FFTW_VERSION \"AOCL-3.0\"" >>confdefs.h

$as_echo "#define AOCL_FFTW_VERSION \"AOCL FFTW 3.0.1\"" >>confdefs.h

# Check whether --enable-amd-fast-planner was given.
if test "${enable_amd_fast_planner+set}" = set; then :
Expand All @@ -17594,10 +17630,36 @@ else
have_amd_fast_planner=no
fi

if test "$have_amd_fast_planner" = yes && test "$quad_precision_supported" = no && test "$long_double_supported" = no; then
# Check whether --enable-amd-top-n-planner was given.
if test "${enable_amd_top_n_planner+set}" = set; then :
enableval=$enable_amd_top_n_planner; have_amd_top_n_planner=$enableval
else
have_amd_top_n_planner=no
fi

# Check if both amd-fast-planner and amd-top-n-planner are enabled together
if test "$have_amd_fast_planner" = yes && test "$have_amd_top_n_planner" = yes; then
as_fn_error $? "AMD_FAST_PLANNER and AMD_TOP_N_PLANNER can not be enabled together" "$LINENO" 5
else
if (test "$have_amd_fast_planner" = yes && (test "$quad_precision_supported" = yes || test "$long_double_supported" = yes)); then
as_fn_error $? "AMD_FAST_PLANNER can not be enabled for Quad or Long double" "$LINENO" 5
elif (test "$have_amd_fast_planner" = yes); then

$as_echo "#define AMD_OPT_FAST_PLANNER 1" >>confdefs.h

fi
# Check if amd-top-n-planner is enabled with mpi, openmp or threads
if (test "$have_amd_top_n_planner" = yes && (test "$enable_threads" = yes || test "$enable_openmp" = yes || test "$enable_mpi" = yes)); then
as_fn_error $? "AMD_TOP_N_PLANNER can not be enabled with mpi, openmp or threads as it is supported only for single threaded mode" "$LINENO" 5
else
if (test "$have_amd_top_n_planner" = yes && (test "$quad_precision_supported" = yes || test "$long_double_supported" = yes)); then
as_fn_error $? "AMD_TOP_N_PLANNER can not be enabled for Quad or Long double" "$LINENO" 5
elif (test "$have_amd_top_n_planner" = yes); then

$as_echo "#define AMD_OPT_TOP_N_PLANNER 1" >>confdefs.h

fi
fi
fi

if test "$USE_MAINTAINER_MODE" = yes; then
Expand Down Expand Up @@ -21604,13 +21666,6 @@ $as_echo "#define WITH_G77_WRAPPERS 1" >>confdefs.h
fi

have_smp="no"
# Check whether --enable-openmp was given.
if test "${enable_openmp+set}" = set; then :
enableval=$enable_openmp; enable_openmp=$enableval
else
enable_openmp=no
fi


if test "$enable_openmp" = "yes"; then

Expand Down Expand Up @@ -21687,14 +21742,6 @@ fi

fi

# Check whether --enable-threads was given.
if test "${enable_threads+set}" = set; then :
enableval=$enable_threads; enable_threads=$enableval
else
enable_threads=no
fi


if test "$enable_threads" = "yes"; then

$as_echo "#define HAVE_THREADS 1" >>confdefs.h
Expand Down
37 changes: 31 additions & 6 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,13 @@ if test "$have_amd_opt" = yes && test "${enable_debug+set}" != "set"; then
fi
fi
AC_DEFINE(AMD_OPT_ALL,1,[Define to enable AMD cpu specific optimizations.])

dnl amd switch for VADER LIMIT that controls enabling of AMD's new MPI transpose algorithms --enable-amd-mpi-vader-limit
AC_ARG_ENABLE(amd-mpi-vader-limit, [AC_HELP_STRING([--enable-amd-mpi-vader-limit],[enable setting of VADER LIMIT that controls enabling of new AMD MPI transpose algorithms])], have_amd_mpi_vl=$enableval, have_amd_mpi_vl=no)
if test "$have_amd_mpi_vl" = yes ; then
AC_DEFINE(AMD_MPI_VADER_LIMIT_SET,1,[Set VADER LIMIT in order to enable new AMD MPI transpose algorithms.])
fi

fi
dnl amd optimization switch to enable amd cpu optimized transpose --enable-amd-trans
AC_ARG_ENABLE(amd-trans, [AC_HELP_STRING([--enable-amd-trans],[enable AMD cpu optimized Transpose])], have_amd_trans=$enableval, have_amd_trans=no)
Expand All @@ -722,12 +729,33 @@ AC_ARG_ENABLE(amd-mpifft, [AC_HELP_STRING([--enable-amd-mpifft],[enable AMD cpu
if test "$have_amd_mpifft" = yes; then
AC_DEFINE(AMD_OPT_MPIFFT,1,[Define to enable AMD cpu optimized MPI FFT.])
fi
AC_ARG_ENABLE(openmp, [AC_HELP_STRING([--enable-openmp],[use OpenMP directives for parallelism])], enable_openmp=$enableval, enable_openmp=no)
AC_ARG_ENABLE(threads, [AC_HELP_STRING([--enable-threads],[compile FFTW SMP threads library])], enable_threads=$enableval, enable_threads=no)
dnl aocl version number of amd-fftw
AC_DEFINE(AOCL_FFTW_VERSION,"AOCL-3.0",[AOCL Version of AMD-FFTW])
AC_DEFINE(AOCL_FFTW_VERSION,"AOCL FFTW 3.0.1",[AOCL Version of AMD-FFTW])
dnl amd optimization switch to enable AMD Fast Planner for AMD cpus --enable-amd-fast-planner
AC_ARG_ENABLE(amd-fast-planner, [AC_HELP_STRING([--enable-amd-fast-planner],[enable AMD Fast Planner for a faster planning time on AMD cpus])], have_amd_fast_planner=$enableval, have_amd_fast_planner=no)
if test "$have_amd_fast_planner" = yes && test "$quad_precision_supported" = no && test "$long_double_supported" = no; then
AC_DEFINE(AMD_OPT_FAST_PLANNER,1,[Define to enable AMD Fast Planner for AMD cpus.])
dnl amd optimization switch to enable AMD Top N Planner for AMD cpus --enable-amd-top-n-planner
AC_ARG_ENABLE(amd-top-n-planner, [AC_HELP_STRING([--enable-amd-top-n-planner],[enable AMD Top N Planner feature to search, save and reuse top N plans using wisdom file on AMD cpus])], have_amd_top_n_planner=$enableval, have_amd_top_n_planner=no)
# Check if both amd-fast-planner and amd-top-n-planner are enabled together
if test "$have_amd_fast_planner" = yes && test "$have_amd_top_n_planner" = yes; then
AC_MSG_ERROR([AMD_FAST_PLANNER and AMD_TOP_N_PLANNER can not be enabled together])
else
if (test "$have_amd_fast_planner" = yes && (test "$quad_precision_supported" = yes || test "$long_double_supported" = yes)); then
AC_MSG_ERROR([AMD_FAST_PLANNER can not be enabled for Quad or Long double])
elif (test "$have_amd_fast_planner" = yes); then
AC_DEFINE(AMD_OPT_FAST_PLANNER,1,[Define to enable AMD Fast Planner for AMD cpus.])
fi
# Check if amd-top-n-planner is enabled with mpi, openmp or threads
if (test "$have_amd_top_n_planner" = yes && (test "$enable_threads" = yes || test "$enable_openmp" = yes || test "$enable_mpi" = yes)); then
AC_MSG_ERROR([AMD_TOP_N_PLANNER can not be enabled with mpi, openmp or threads as it is supported only for single threaded mode])
else
if (test "$have_amd_top_n_planner" = yes && (test "$quad_precision_supported" = yes || test "$long_double_supported" = yes)); then
AC_MSG_ERROR([AMD_TOP_N_PLANNER can not be enabled for Quad or Long double])
elif (test "$have_amd_top_n_planner" = yes); then
AC_DEFINE(AMD_OPT_TOP_N_PLANNER,1,[Define to enable AMD Top N Planner for AMD cpus.])
fi
fi
fi

dnl check for a proper indent in maintainer mode
Expand Down Expand Up @@ -789,15 +817,12 @@ fi

dnl -----------------------------------------------------------------------
have_smp="no"
AC_ARG_ENABLE(openmp, [AC_HELP_STRING([--enable-openmp],[use OpenMP directives for parallelism])], enable_openmp=$enableval, enable_openmp=no)

if test "$enable_openmp" = "yes"; then
AC_DEFINE(HAVE_OPENMP,1,[Define to enable OpenMP])
AX_OPENMP([], [AC_MSG_ERROR([don't know how to enable OpenMP])])
fi

AC_ARG_ENABLE(threads, [AC_HELP_STRING([--enable-threads],[compile FFTW SMP threads library])], enable_threads=$enableval, enable_threads=no)

if test "$enable_threads" = "yes"; then
AC_DEFINE(HAVE_THREADS,1,[Define to enable SMP threads])
fi
Expand Down
Loading

0 comments on commit 764197a

Please sign in to comment.