From 7147670ce6be8c189f1d84a95491f0677c14be06 Mon Sep 17 00:00:00 2001 From: sraut Date: Fri, 18 Jun 2021 21:21:42 +0530 Subject: [PATCH] This code change provides a set of fixes for specific build errors observed with GCC and AOCC compilers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1) GCC10 and AOCC3.0 treat global variables without extern as errors. Both compilers would throw "Multiple definitions" error for L1D_blk_size and L1Dsize variables when configured with "--enable-avx” SIMD option along with the option “--enable-amd-trans”. This has been now fixed. 2) Code fix provided for the "Undefined reference" error to ‘cpuid_all’ when configured with "--enable-sse2 --enable-avx2” SIMD options along with the option “—enable-amd-trans”. 3) Code fix provided for "Undeclared variables" errors thrown for ‘ALIGNMENT’ and ‘ALIGNMENTA’ when configured with “--enable-amd-fast-planner” without any SIMD configure options. Change-Id: I8b5408d72c1bb74a000ee6fdc95d1ea87d4baba3 --- dft/conf.c | 4 ++-- kernel/ifftw.h | 17 +++++++++++++++-- kernel/tile2d.c | 6 +++++- kernel/transpose.c | 4 ++++ simd-support/amd64-cpuid.h | 4 ++++ simd-support/avx.c | 9 +++++++-- simd-support/sse2.c | 5 ++++- 7 files changed, 41 insertions(+), 8 deletions(-) diff --git a/dft/conf.c b/dft/conf.c index 3ed498b7..97ec0e9c 100644 --- a/dft/conf.c +++ b/dft/conf.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2007-14 Matteo Frigo * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology - * Copyright (C) 2019, Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (C) 2019-2021, Advanced Micro Devices, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -90,7 +90,7 @@ void X(dft_conf_standard)(planner *p) #if HAVE_GENERIC_SIMD256 X(solvtab_exec)(X(solvtab_dft_generic_simd256), p); #endif -#ifdef AMD_OPT_AUTO_TUNED_TRANS_BLK_SIZE +#ifdef AMD_OPT_TRANS X(enquire_L1DcacheSize)(); #endif } diff --git a/kernel/ifftw.h b/kernel/ifftw.h index 89b38dd4..098bb4a4 100644 --- a/kernel/ifftw.h +++ b/kernel/ifftw.h @@ -107,10 +107,16 @@ extern "C" #if defined(HAVE_MPI) || defined(HAVE_OPENMP) #undef AMD_OPT_TRANS #endif +#if defined(HAVE_SSE) || defined(HAVE_SSE2) || \ + defined(HAVE_AVX) || defined(HAVE_AVX_128_FMA) || \ + defined(HAVE_AVX2) || defined(HAVE_AVX512) #ifdef AMD_OPT_TRANS #define AMD_OPT_AUTO_TUNED_TRANS_BLK_SIZE #define AMD_OPT_AUTO_TUNED_RASTER_TILED_TRANS_METHOD #endif +#else +#undef AMD_OPT_TRANS +#endif //Here they are again provided for manual override to enable them. //(i) enables auto-tuned block sized tiling as per CPU's L1D cache size (applicable for both original // FFTW's transpose and the new auto-tuned cache-efficient raster order tiled transpose @@ -148,10 +154,17 @@ extern "C" //UNBLESSED HASH table is kept alive till the process/thread life like the BLESSED HASH table. //Since UNBLESSED HASH table keeps growing, so it is cleared smartly beyond a MAX SIZE by swapping with BLESSED table. #ifdef AMD_OPT_FAST_PLANNER + +#if defined(HAVE_SSE) || defined(HAVE_SSE2) || \ + defined(HAVE_AVX) || defined(HAVE_AVX_128_FMA) || \ + defined(HAVE_AVX2) || defined(HAVE_AVX512) + #define AMD_FAST_PLANNER #define AMD_FAST_PLANNING_HASH_V1 //#define AMD_FAST_PLANNING_HASH_V2 #define AMD_HASH_UNBLESS_MAX_SIZE 10485760 + +#endif #endif //-------------------------------- //NEW TOP N PLANNER feature for AMD CPUs can be enabled with the below switch AMD_TOP_N_PLANNER. @@ -1068,8 +1081,8 @@ void X(rader_tl_delete)(R *W, rader_tl **tl); /* upper bound to the cache size based on latest CPU architectures, for AMD optimized tiled routines */ #define CACHESIZE 32768 #define BLK_SIZE 32 -unsigned int L1D_blk_size;// = CACHESIZE; -unsigned int L1Dsize;// = BLK_SIZE; +extern unsigned int L1D_blk_size;// = CACHESIZE; +extern unsigned int L1Dsize;// = BLK_SIZE; #else /* lower bound to the cache size, for tiled routines */ #define CACHESIZE 8192 diff --git a/kernel/tile2d.c b/kernel/tile2d.c index e99acc3d..2f962281 100644 --- a/kernel/tile2d.c +++ b/kernel/tile2d.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2007-14 Matteo Frigo * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology - * Copyright (C) 2019, Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (C) 2019-2021, Advanced Micro Devices, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,6 +22,10 @@ /* out of place 2D copy routines */ #include "kernel/ifftw.h" +#if defined(AMD_OPT_AUTO_TUNED_TRANS_BLK_SIZE) +unsigned int L1Dsize;// = BLK_SIZE; +#endif + void X(tile2d)(INT n0l, INT n0u, INT n1l, INT n1u, INT tilesz, void (*f)(INT n0l, INT n0u, INT n1l, INT n1u, void *args), void *args) diff --git a/kernel/transpose.c b/kernel/transpose.c index ca83fd73..39732546 100644 --- a/kernel/transpose.c +++ b/kernel/transpose.c @@ -25,6 +25,10 @@ #include "immintrin.h" #endif +#if defined(AMD_OPT_AUTO_TUNED_RASTER_TILED_TRANS_METHOD) +unsigned int L1D_blk_size;// = CACHESIZE; +#endif + /* in place square transposition, iterative */ void X(transpose)(R *I, INT n, INT s0, INT s1, INT vl) { diff --git a/simd-support/amd64-cpuid.h b/simd-support/amd64-cpuid.h index 9b91f497..acbc8288 100644 --- a/simd-support/amd64-cpuid.h +++ b/simd-support/amd64-cpuid.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2003, 2007-14 Matteo Frigo * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * Copyright (C) 2021, Advanced Micro Devices, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,6 +19,8 @@ * */ +#ifndef _AMD64_CPUID_H +#define _AMD64_CPUID_H #ifdef _MSC_VER #ifndef inline @@ -146,3 +149,4 @@ static inline int xgetbv_eax(int op) return eax; #endif } +#endif diff --git a/simd-support/avx.c b/simd-support/avx.c index 6e57b71b..7e96ee07 100644 --- a/simd-support/avx.c +++ b/simd-support/avx.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2007-14 Matteo Frigo * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology - * Copyright (C) 2019, Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (C) 2019-2021, Advanced Micro Devices, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -53,7 +53,12 @@ int X(have_simd_avx)(void) #endif -#ifdef AMD_OPT_AUTO_TUNED_TRANS_BLK_SIZE +#ifdef AMD_OPT_TRANS +#if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) +# include "amd64-cpuid.h" +#else +# include "x86-cpuid.h" +#endif void X(enquire_L1DcacheSize) (void) { int eax, ebx, ecx, edx; diff --git a/simd-support/sse2.c b/simd-support/sse2.c index c52c852e..d6cdceed 100644 --- a/simd-support/sse2.c +++ b/simd-support/sse2.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2003, 2007-14 Matteo Frigo * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * Copyright (C) 2021, Advanced Micro Devices, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,7 +31,9 @@ #if HAVE_SSE2 # if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) - +#ifdef AMD_OPT_TRANS +# include "amd64-cpuid.h" +#endif int X(have_simd_sse2)(void) { return 1;