Skip to content

Commit

Permalink
This code change provides a set of fixes for specific build errors ob…
Browse files Browse the repository at this point in the history
…served with GCC and AOCC compilers.

1) GCC10 and AOCC3.0 treat global variables without extern as errors.
   Both compilers would throw "Multiple definitions" error for L1D_blk_size
   and L1Dsize variables when configured with "--enable-avx” SIMD option along
   with the option “--enable-amd-trans”. This has been now fixed.
2) Code fix provided for the "Undefined reference" error to ‘cpuid_all’ when
   configured with "--enable-sse2 --enable-avx2” SIMD options along with the
   option “—enable-amd-trans”.
3) Code fix provided for "Undeclared variables" errors thrown for ‘ALIGNMENT’ and
   ‘ALIGNMENTA’ when configured with “--enable-amd-fast-planner” without any SIMD
   configure options.

Change-Id: I8b5408d72c1bb74a000ee6fdc95d1ea87d4baba3
  • Loading branch information
BiplabRaut committed Jun 18, 2021
1 parent a1fb312 commit 7147670
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 8 deletions.
4 changes: 2 additions & 2 deletions dft/conf.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
* Copyright (C) 2019, Advanced Micro Devices, Inc. All Rights Reserved.
* Copyright (C) 2019-2021, Advanced Micro Devices, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -90,7 +90,7 @@ void X(dft_conf_standard)(planner *p)
#if HAVE_GENERIC_SIMD256
X(solvtab_exec)(X(solvtab_dft_generic_simd256), p);
#endif
#ifdef AMD_OPT_AUTO_TUNED_TRANS_BLK_SIZE
#ifdef AMD_OPT_TRANS
X(enquire_L1DcacheSize)();
#endif
}
17 changes: 15 additions & 2 deletions kernel/ifftw.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,16 @@ extern "C"
#if defined(HAVE_MPI) || defined(HAVE_OPENMP)
#undef AMD_OPT_TRANS
#endif
#if defined(HAVE_SSE) || defined(HAVE_SSE2) || \
defined(HAVE_AVX) || defined(HAVE_AVX_128_FMA) || \
defined(HAVE_AVX2) || defined(HAVE_AVX512)
#ifdef AMD_OPT_TRANS
#define AMD_OPT_AUTO_TUNED_TRANS_BLK_SIZE
#define AMD_OPT_AUTO_TUNED_RASTER_TILED_TRANS_METHOD
#endif
#else
#undef AMD_OPT_TRANS
#endif
//Here they are again provided for manual override to enable them.
//(i) enables auto-tuned block sized tiling as per CPU's L1D cache size (applicable for both original
// FFTW's transpose and the new auto-tuned cache-efficient raster order tiled transpose
Expand Down Expand Up @@ -148,10 +154,17 @@ extern "C"
//UNBLESSED HASH table is kept alive till the process/thread life like the BLESSED HASH table.
//Since UNBLESSED HASH table keeps growing, so it is cleared smartly beyond a MAX SIZE by swapping with BLESSED table.
#ifdef AMD_OPT_FAST_PLANNER

#if defined(HAVE_SSE) || defined(HAVE_SSE2) || \
defined(HAVE_AVX) || defined(HAVE_AVX_128_FMA) || \
defined(HAVE_AVX2) || defined(HAVE_AVX512)

#define AMD_FAST_PLANNER
#define AMD_FAST_PLANNING_HASH_V1
//#define AMD_FAST_PLANNING_HASH_V2
#define AMD_HASH_UNBLESS_MAX_SIZE 10485760

#endif
#endif
//--------------------------------
//NEW TOP N PLANNER feature for AMD CPUs can be enabled with the below switch AMD_TOP_N_PLANNER.
Expand Down Expand Up @@ -1068,8 +1081,8 @@ void X(rader_tl_delete)(R *W, rader_tl **tl);
/* upper bound to the cache size based on latest CPU architectures, for AMD optimized tiled routines */
#define CACHESIZE 32768
#define BLK_SIZE 32
unsigned int L1D_blk_size;// = CACHESIZE;
unsigned int L1Dsize;// = BLK_SIZE;
extern unsigned int L1D_blk_size;// = CACHESIZE;
extern unsigned int L1Dsize;// = BLK_SIZE;
#else
/* lower bound to the cache size, for tiled routines */
#define CACHESIZE 8192
Expand Down
6 changes: 5 additions & 1 deletion kernel/tile2d.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
* Copyright (C) 2019, Advanced Micro Devices, Inc. All Rights Reserved.
* Copyright (C) 2019-2021, Advanced Micro Devices, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand All @@ -22,6 +22,10 @@
/* out of place 2D copy routines */
#include "kernel/ifftw.h"

#if defined(AMD_OPT_AUTO_TUNED_TRANS_BLK_SIZE)
unsigned int L1Dsize;// = BLK_SIZE;
#endif

void X(tile2d)(INT n0l, INT n0u, INT n1l, INT n1u, INT tilesz,
void (*f)(INT n0l, INT n0u, INT n1l, INT n1u, void *args),
void *args)
Expand Down
4 changes: 4 additions & 0 deletions kernel/transpose.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
#include "immintrin.h"
#endif

#if defined(AMD_OPT_AUTO_TUNED_RASTER_TILED_TRANS_METHOD)
unsigned int L1D_blk_size;// = CACHESIZE;
#endif

/* in place square transposition, iterative */
void X(transpose)(R *I, INT n, INT s0, INT s1, INT vl)
{
Expand Down
4 changes: 4 additions & 0 deletions simd-support/amd64-cpuid.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
* Copyright (C) 2021, Advanced Micro Devices, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand All @@ -18,6 +19,8 @@
*
*/

#ifndef _AMD64_CPUID_H
#define _AMD64_CPUID_H

#ifdef _MSC_VER
#ifndef inline
Expand Down Expand Up @@ -146,3 +149,4 @@ static inline int xgetbv_eax(int op)
return eax;
#endif
}
#endif
9 changes: 7 additions & 2 deletions simd-support/avx.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
* Copyright (C) 2019, Advanced Micro Devices, Inc. All Rights Reserved.
* Copyright (C) 2019-2021, Advanced Micro Devices, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -53,7 +53,12 @@ int X(have_simd_avx)(void)

#endif

#ifdef AMD_OPT_AUTO_TUNED_TRANS_BLK_SIZE
#ifdef AMD_OPT_TRANS
#if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64)
# include "amd64-cpuid.h"
#else
# include "x86-cpuid.h"
#endif
void X(enquire_L1DcacheSize) (void)
{
int eax, ebx, ecx, edx;
Expand Down
5 changes: 4 additions & 1 deletion simd-support/sse2.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
* Copyright (C) 2021, Advanced Micro Devices, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -30,7 +31,9 @@
#if HAVE_SSE2

# if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64)

#ifdef AMD_OPT_TRANS
# include "amd64-cpuid.h"
#endif
int X(have_simd_sse2)(void)
{
return 1;
Expand Down

0 comments on commit 7147670

Please sign in to comment.