diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake index 3930afc3fb..bed72d755d 100644 --- a/CMake/ClangCompilers.cmake +++ b/CMake/ClangCompilers.cmake @@ -36,7 +36,7 @@ endif(QMC_OMP) add_definitions(-Drestrict=__restrict__) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing -D__forceinline=inline") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing") # treat VLA as error set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla") diff --git a/CMake/GNUCompilers.cmake b/CMake/GNUCompilers.cmake index 5df6cfafa9..1937c98f82 100644 --- a/CMake/GNUCompilers.cmake +++ b/CMake/GNUCompilers.cmake @@ -21,7 +21,7 @@ add_definitions(-Drestrict=__restrict__) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops") set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops -D__forceinline=inline") + "${CMAKE_CXX_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fno-omit-frame-pointer") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer") diff --git a/CMake/IBMCompilers.cmake b/CMake/IBMCompilers.cmake index 0279a0855d..60e40ce3ea 100644 --- a/CMake/IBMCompilers.cmake +++ b/CMake/IBMCompilers.cmake @@ -12,8 +12,6 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le") set(CMAKE_CXX_FLAGS "") endif() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__forceinline=inline") - # Suppress compile warnings set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-deprecated -Wno-unused-value") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated -Wno-unused-value") diff --git a/CMake/IntelCompilers.cmake b/CMake/IntelCompilers.cmake index def9f60c53..77897ab510 100644 --- a/CMake/IntelCompilers.cmake +++ b/CMake/IntelCompilers.cmake @@ -40,7 +40,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM" OR INTEL_ONEAPI_COMPILER_FOUND) add_compile_definitions(restrict=__restrict__) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing -D__forceinline=inline") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing") # Force frame-pointer kept in DEBUG build. set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fno-omit-frame-pointer") diff --git a/CMake/NVHPCCompilers.cmake b/CMake/NVHPCCompilers.cmake index bb502169c2..6acb60d496 100644 --- a/CMake/NVHPCCompilers.cmake +++ b/CMake/NVHPCCompilers.cmake @@ -20,9 +20,6 @@ endif(QMC_OMP) add_definitions(-Drestrict=__restrict__) -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__forceinline=inline") - # Suppress compile warnings # 177 variable "XX" was declared but never referenced # 550 variable "XX" was set but never used diff --git a/src/Containers/OhmmsSoA/TensorSoaContainer.h b/src/Containers/OhmmsSoA/TensorSoaContainer.h index 38f0fd9e93..b18b4c68bc 100644 --- a/src/Containers/OhmmsSoA/TensorSoaContainer.h +++ b/src/Containers/OhmmsSoA/TensorSoaContainer.h @@ -59,7 +59,7 @@ struct TensorSoaContainer ~TensorSoaContainer() = default; - __forceinline void resize(int n) + inline void resize(int n) { nLocal = n; nGhosts = getAlignedSize(n); @@ -68,7 +68,7 @@ struct TensorSoaContainer /** return TinyVector */ - __forceinline Tensor operator[](int i) const + inline Tensor operator[](int i) const { const T* restrict b = m_data + i; T xx = *(b); @@ -87,10 +87,10 @@ struct TensorSoaContainer T* _base; Accessor() = delete; Accessor(const Accessor&) = delete; - __forceinline Accessor(T* a, int ng) : _base(a), M(ng) {} + inline Accessor(T* a, int ng) : _base(a), M(ng) {} template - __forceinline Accessor& operator=(const Tensor& rhs) + inline Accessor& operator=(const Tensor& rhs) { *_base = rhs(0); *(_base + M) = rhs(1); @@ -103,7 +103,7 @@ struct TensorSoaContainer /** assign value */ template - __forceinline Accessor& operator=(T1 rhs) + inline Accessor& operator=(T1 rhs) { *_base = rhs; *(_base + M) = rhs; @@ -119,20 +119,20 @@ struct TensorSoaContainer * * Use for (*this)[i]=Tensor; */ - __forceinline Accessor operator()(int i) { return Accessor(m_data.data() + i, nGhosts); } + inline Accessor operator()(int i) { return Accessor(m_data.data() + i, nGhosts); } ///return the base - __forceinline T* data() { return m_data.data(); } + inline T* data() { return m_data.data(); } ///return the base - __forceinline const T* data() const { return m_data.data(); } + inline const T* data() const { return m_data.data(); } ///return the base of XX components - __forceinline T* restrict data(int i, int j) + inline T* restrict data(int i, int j) { const int n = (i < j) ? i * 3 + j : j * 3 + i; return m_data().data() + n * nGhosts; } ///return the base of XX components - __forceinline const T* restrict data(int i, int j) const + inline const T* restrict data(int i, int j) const { const int n = (i < j) ? i * 3 + j : j * 3 + i; return m_data().data() + n * nGhosts; diff --git a/src/Containers/OhmmsSoA/VectorSoaContainer.h b/src/Containers/OhmmsSoA/VectorSoaContainer.h index 7745b2b6dd..40f0021139 100644 --- a/src/Containers/OhmmsSoA/VectorSoaContainer.h +++ b/src/Containers/OhmmsSoA/VectorSoaContainer.h @@ -111,7 +111,7 @@ struct VectorSoaContainer * * nAllocated is used to ensure no memory leak */ - __forceinline void resize(size_t n) + inline void resize(size_t n) { static_assert(std::is_same::value, "VectorSoaContainer and Alloc data types must agree!"); @@ -144,7 +144,7 @@ struct VectorSoaContainer } /// free allocated memory and clear status variables - __forceinline void free() + inline void free() { if (nAllocated) mAllocator.deallocate(myData, nAllocated); @@ -161,7 +161,7 @@ struct VectorSoaContainer * * To attach to existing memory, currently owned memory must be freed before calling attachReference */ - __forceinline void attachReference(size_t n, size_t n_padded, T* ptr) + inline void attachReference(size_t n, size_t n_padded, T* ptr) { if (nAllocated) { @@ -200,9 +200,9 @@ struct VectorSoaContainer } ///return the physical size - __forceinline size_t size() const { return nLocal; } + inline size_t size() const { return nLocal; } ///return the physical size - __forceinline size_t capacity() const { return nGhosts; } + inline size_t capacity() const { return nGhosts; } /** AoS to SoA : copy from Vector> * @@ -227,16 +227,16 @@ struct VectorSoaContainer /** return TinyVector */ - __forceinline const AoSElement_t operator[](size_t i) const { return AoSElement_t(myData + i, nGhosts); } + inline const AoSElement_t operator[](size_t i) const { return AoSElement_t(myData + i, nGhosts); } ///helper class for operator ()(size_t i) to assign a value struct Accessor { T* _base; size_t M; - __forceinline Accessor(T* a, size_t ng) : _base(a), M(ng) {} + inline Accessor(T* a, size_t ng) : _base(a), M(ng) {} template - __forceinline Accessor& operator=(const TinyVector& rhs) + inline Accessor& operator=(const TinyVector& rhs) { #pragma unroll for (size_t i = 0; i < D; ++i) @@ -246,7 +246,7 @@ struct VectorSoaContainer /** assign value */ template - __forceinline Accessor& operator=(T1 rhs) + inline Accessor& operator=(T1 rhs) { #pragma unroll for (size_t i = 0; i < D; ++i) @@ -259,44 +259,44 @@ struct VectorSoaContainer * * Use for (*this)[i]=TinyVector; */ - __forceinline Accessor operator()(size_t i) { return Accessor(myData + i, nGhosts); } + inline Accessor operator()(size_t i) { return Accessor(myData + i, nGhosts); } ///return the base - __forceinline T* data() { return myData; } + inline T* data() { return myData; } ///return the base - __forceinline const T* data() const { return myData; } + inline const T* data() const { return myData; } /// return non_const data T* getNonConstData() const { return myData; } ///return the pointer of the i-th components - __forceinline T* restrict data(size_t i) { return myData + i * nGhosts; } + inline T* restrict data(size_t i) { return myData + i * nGhosts; } ///return the const pointer of the i-th components - __forceinline const T* restrict data(size_t i) const { return myData + i * nGhosts; } + inline const T* restrict data(size_t i) const { return myData + i * nGhosts; } ///return the end - __forceinline T* end() { return myData + D * nGhosts; } + inline T* end() { return myData + D * nGhosts; } ///return the end - __forceinline const T* end() const { return myData + D * nGhosts; } + inline const T* end() const { return myData + D * nGhosts; } ///return the base, device template> - __forceinline T* device_data() + inline T* device_data() { return mAllocator.get_device_ptr(); } ///return the base, device template> - __forceinline const T* device_data() const + inline const T* device_data() const { return mAllocator.get_device_ptr(); } ///return the pointer of the i-th components, device template> - __forceinline T* restrict device_data(size_t i) + inline T* restrict device_data(size_t i) { return mAllocator.get_device_ptr() + i * nGhosts; } ///return the const pointer of the i-th components, device template> - __forceinline const T* restrict device_data(size_t i) const + inline const T* restrict device_data(size_t i) const { return mAllocator.get_device_ptr() + i * nGhosts; } diff --git a/src/spline2/MultiBsplineEval.hpp b/src/spline2/MultiBsplineEval.hpp index 8cea6e6feb..afb8327544 100644 --- a/src/spline2/MultiBsplineEval.hpp +++ b/src/spline2/MultiBsplineEval.hpp @@ -40,26 +40,26 @@ namespace spline2 { /// evaluate values optionally in the range [first,last) template -__forceinline void evaluate3d(const SPLINET& spline, const PT& r, VT& psi) +inline void evaluate3d(const SPLINET& spline, const PT& r, VT& psi) { evaluate_v_impl(spline, r[0], r[1], r[2], psi.data(), 0, psi.size()); } template -__forceinline void evaluate3d(const SPLINET& spline, const PT& r, VT& psi, int first, int last) +inline void evaluate3d(const SPLINET& spline, const PT& r, VT& psi, int first, int last) { evaluate_v_impl(spline, r[0], r[1], r[2], psi.data() + first, first, last); } /// evaluate values, gradients, laplacians optionally in the range [first,last) template -__forceinline void evaluate3d_vgl(const SPLINET& spline, const PT& r, VT& psi, GT& grad, LT& lap) +inline void evaluate3d_vgl(const SPLINET& spline, const PT& r, VT& psi, GT& grad, LT& lap) { evaluate_vgl_impl(spline, r[0], r[1], r[2], psi.data(), grad.data(), lap.data(), psi.size(), 0, psi.size()); } template -__forceinline void evaluate3d_vgl(const SPLINET& spline, const PT& r, VT& psi, GT& grad, LT& lap, int first, int last) +inline void evaluate3d_vgl(const SPLINET& spline, const PT& r, VT& psi, GT& grad, LT& lap, int first, int last) { evaluate_vgl_impl(spline, r[0], r[1], r[2], psi.data() + first, grad.data() + first, lap.data() + first, psi.size(), first, last); @@ -67,27 +67,27 @@ __forceinline void evaluate3d_vgl(const SPLINET& spline, const PT& r, VT& psi, G /// evaluate values, gradients, hessians optionally in the range [first,last) template -__forceinline void evaluate3d_vgh(const SPLINET& spline, const PT& r, VT& psi, GT& grad, HT& hess) +inline void evaluate3d_vgh(const SPLINET& spline, const PT& r, VT& psi, GT& grad, HT& hess) { evaluate_vgh_impl(spline, r[0], r[1], r[2], psi.data(), grad.data(), hess.data(), psi.size(), 0, psi.size()); } template -__forceinline void evaluate3d_vgh(const SPLINET& spline, const PT& r, VT& psi, GT& grad, HT& hess, int first, int last) +inline void evaluate3d_vgh(const SPLINET& spline, const PT& r, VT& psi, GT& grad, HT& hess, int first, int last) { evaluate_vgh_impl(spline, r[0], r[1], r[2], psi.data() + first, grad.data() + first, hess.data() + first, psi.size(), first, last); } template -__forceinline void evaluate3d_vghgh(const SPLINET& spline, const PT& r, VT& psi, GT& grad, HT& hess, GHT& ghess) +inline void evaluate3d_vghgh(const SPLINET& spline, const PT& r, VT& psi, GT& grad, HT& hess, GHT& ghess) { evaluate_vghgh_impl(spline, r[0], r[1], r[2], psi.data(), grad.data(), hess.data(), ghess.data(), psi.size(), 0, psi.size()); } template -__forceinline void evaluate3d_vghgh(const SPLINET& spline, +inline void evaluate3d_vghgh(const SPLINET& spline, const PT& r, VT& psi, GT& grad,