This change contains following modifications to README and configure.

1) Custom CFLAGS support added to configure and configure.ac scripts when --enable-amd-opt is being used. This allows cross-platform compilation of -march=znver2 on Naples and -march=znver1 on Rome. It resolves the JIRA issue [CPUPL-544] reported by HLRS. 2) Default MIT README renamed to README_MIT. New REAMDME_AMD.md is added along with a soft link README.md. This helps to display AMD README in AMD git-hub repository. Change-Id: I8dba8e13a4d74764983c63e6cf34cd37492b3693
amd · Dec 12, 2019 · f8a904c · f8a904c
1 parent 023a7b2
commit f8a904c
Show file tree

Hide file tree

Showing 5 changed files with 90 additions and 16 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1 @@
+README_AMD.md
diff --git a/README_AMD.md b/README_AMD.md
@@ -0,0 +1,69 @@
+AMD OPTIMIZED FFTW
+------------------
+
+AMD Optimized FFTW is the optimized FFTW implementation targeted for 
+AMD EPYC CPUs. It is developed on top of FFTW (version fftw-3.3.8).
+All known features and functionalities of FFTW are retained and supported
+as it is with this AMD optimized FFTW library.
+
+AMD Optimized FFTW achieves higher performance than the FFTW 3.3.8 due to
+various optimizations involving improved SIMD Kernel functions, improved copy
+functions (cpy2d and cpy2d_pair used in rank-0 transform and buffering plan),
+improved 256-bit kernels selection by Planner and an optional in-place 
+transpose for large problem sizes.
+
+FFTW is a free collection of fast C routines for computing the
+Discrete Fourier Transform and various special cases thereof in one or more
+dimensions. It includes complex, real, symmetric, and parallel transforms, 
+and can handle arbitrary array sizes efficiently.
+
+The doc/ directory contains the manual in texinfo, PDF, info, and HTML
+formats.  Frequently asked questions and answers can be found in the
+doc/FAQ/ directory in ASCII and HTML.
+
+For a quick introduction to calling FFTW, see the "Tutorial" section
+of the manual.
+
+INSTALLATION
+------------
+
+INSTALLATION FROM AMD Optimized FFTW GIT REPOSITORY:
+
+After downloading the latest stable release from the git repository,
+https://github.com/amd/amd-fftw, follow the below steps to configure and
+build it for AMD EPYC processor based on Naples, Rome and future 
+generation architectures.
+
+     ./configure --enable-sse2 --enable-avx --enable-avx2 
+                 --enable-mpi --enable-openmp --enable-shared 
+                 --enable-amd-opt 
+                 --prefix=<your-install-dir>
+     make
+     make install
+
+The configure option "--enable-amd-opt" enables all the improvements and 
+optimizations targeted for AMD EPYC CPUs.
+When enabling configure option "--enable-amd-opt", do not use the 
+configure option "--enable-generic-simd128" or "--enable-generic-simd256".
+
+An optional configure option "--enable-amd-trans" is provided that may benefit
+the performance of transpose operations in case of very large FFT problem sizes.
+This is by default not enabled and provided as an experimental optional switch. 
+
+By default, configure script enables double-precision mode. User should pass
+appropriate configure options to enable the single-precision or quad-precision
+or long-double mode.
+
+CONTACTS
+--------
+
+AMD Optimized FFTW is developed and maintained by AMD.
+You can contact us on the email-id [email protected].
+You can also raise any issue/suggestion on the git-hub repository at
+https://github.com/amd/amd-fftw/issues
+
+ACKNOWLEDGEMENTS
+----------------
+
+FFTW was developed by Matteo Frigo and Steven G. Johnson. We thank Matteo Frigo
+for his support provided to us.
diff --git a/README → README_MIT b/README → README_MIT
diff --git a/configure b/configure
@@ -17447,15 +17447,17 @@ else
 fi
 
 if test "$have_amd_opt" = yes; then
-	if test "$CC" = "clang"; then
-		CFLAGS="$CFLAGS -mavx2 -mfma"
-	else
-		GCCVERSION=$(expr `gcc -dumpversion | cut -f1 -d.` \>= 9)
-		AMDZEN2MODEL=$(expr `cat /proc/cpuinfo | grep -m1 model|cut -f2 -d:` \>= 48)
-		if test "$GCCVERSION" = "1"  && test "$AMDZEN2MODEL" = "1"; then
-			CFLAGS="$CFLAGS -march=znver2 -mavx2 -mno-avx256-split-unaligned-store -mno-avx256-split-unaligned-load -mno-prefer-avx128 -mfma"
+	if test "$ac_test_CFLAGS" != "set"; then
+		if test "$CC" = "clang"; then
+			CFLAGS="$CFLAGS -mavx2 -mfma"
 		else
-			CFLAGS="$CFLAGS -march=znver1 -mavx2 -mno-avx256-split-unaligned-store -mno-avx256-split-unaligned-load -mno-prefer-avx128 -mfma"
+			GCCVERSION=$(expr `gcc -dumpversion | cut -f1 -d.` \>= 9)
+			AMDZEN2MODEL=$(expr `cat /proc/cpuinfo | grep -m1 model|cut -f2 -d:` \>= 48)
+			if test "$GCCVERSION" = "1"  && test "$AMDZEN2MODEL" = "1"; then
+				CFLAGS="$CFLAGS -march=znver2 -mavx2 -mno-avx256-split-unaligned-store -mno-avx256-split-unaligned-load -mno-prefer-avx128 -mfma"
+			else
+				CFLAGS="$CFLAGS -march=znver1 -mavx2 -mno-avx256-split-unaligned-store -mno-avx256-split-unaligned-load -mno-prefer-avx128 -mfma"
+			fi
 		fi
 	fi
 

diff --git a/configure.ac b/configure.ac
@@ -617,15 +617,17 @@ fi
 dnl amd optimization switch and CFLAGS setting based on config arg option --enable-amd-opt
 AC_ARG_ENABLE(amd-opt, [AC_HELP_STRING([--enable-amd-opt],[enable AMD cpu specific optimizations])], have_amd_opt=$enableval, have_amd_opt=no)
 if test "$have_amd_opt" = yes; then
-	if test "$CC" = "clang"; then
-		CFLAGS="$CFLAGS -mavx2 -mfma"
-	else
-		GCCVERSION=$(expr `gcc -dumpversion | cut -f1 -d.` \>= 9)
-		AMDZEN2MODEL=$(expr `cat /proc/cpuinfo | grep -m1 model|cut -f2 -d:` \>= 48)
-		if test "$GCCVERSION" = "1"  && test "$AMDZEN2MODEL" = "1"; then
-			CFLAGS="$CFLAGS -march=znver2 -mavx2 -mno-avx256-split-unaligned-store -mno-avx256-split-unaligned-load -mno-prefer-avx128 -mfma"
+	if test "$ac_test_CFLAGS" != "set"; then
+		if test "$CC" = "clang"; then
+			CFLAGS="$CFLAGS -mavx2 -mfma"
 		else
-			CFLAGS="$CFLAGS -march=znver1 -mavx2 -mno-avx256-split-unaligned-store -mno-avx256-split-unaligned-load -mno-prefer-avx128 -mfma"
+			GCCVERSION=$(expr `gcc -dumpversion | cut -f1 -d.` \>= 9)
+			AMDZEN2MODEL=$(expr `cat /proc/cpuinfo | grep -m1 model|cut -f2 -d:` \>= 48)
+			if test "$GCCVERSION" = "1"  && test "$AMDZEN2MODEL" = "1"; then
+				CFLAGS="$CFLAGS -march=znver2 -mavx2 -mno-avx256-split-unaligned-store -mno-avx256-split-unaligned-load -mno-prefer-avx128 -mfma"
+			else
+				CFLAGS="$CFLAGS -march=znver1 -mavx2 -mno-avx256-split-unaligned-store -mno-avx256-split-unaligned-load -mno-prefer-avx128 -mfma"
+			fi
 		fi
 	fi
 	AC_DEFINE(AMD_OPT_ALL,1,[Define to enable AMD cpu specific optimizations.])