Skip to content

Commit

Permalink
Merge pull request #3353 from ye-luo/remove-forceinline
Browse files Browse the repository at this point in the history
Remove forceinline
  • Loading branch information
PDoakORNL authored Aug 11, 2021
2 parents 18efaa4 + a05b27b commit 0066161
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 47 deletions.
2 changes: 1 addition & 1 deletion CMake/ClangCompilers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ endif(QMC_OMP)
add_definitions(-Drestrict=__restrict__)

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing -D__forceinline=inline")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing")

# treat VLA as error
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
Expand Down
2 changes: 1 addition & 1 deletion CMake/GNUCompilers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ add_definitions(-Drestrict=__restrict__)

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops -D__forceinline=inline")
"${CMAKE_CXX_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops")

set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fno-omit-frame-pointer")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer")
Expand Down
2 changes: 0 additions & 2 deletions CMake/IBMCompilers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le")
set(CMAKE_CXX_FLAGS "")
endif()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__forceinline=inline")

# Suppress compile warnings
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-deprecated -Wno-unused-value")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated -Wno-unused-value")
Expand Down
2 changes: 1 addition & 1 deletion CMake/IntelCompilers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM" OR INTEL_ONEAPI_COMPILER_FOUND)
add_compile_definitions(restrict=__restrict__)

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing -D__forceinline=inline")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing")

# Force frame-pointer kept in DEBUG build.
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fno-omit-frame-pointer")
Expand Down
3 changes: 0 additions & 3 deletions CMake/NVHPCCompilers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ endif(QMC_OMP)

add_definitions(-Drestrict=__restrict__)

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__forceinline=inline")

# Suppress compile warnings
# 177 variable "XX" was declared but never referenced
# 550 variable "XX" was set but never used
Expand Down
20 changes: 10 additions & 10 deletions src/Containers/OhmmsSoA/TensorSoaContainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ struct TensorSoaContainer<T, 3>

~TensorSoaContainer() = default;

__forceinline void resize(int n)
inline void resize(int n)
{
nLocal = n;
nGhosts = getAlignedSize<T>(n);
Expand All @@ -68,7 +68,7 @@ struct TensorSoaContainer<T, 3>

/** return TinyVector<T,3>
*/
__forceinline Tensor<T, 3> operator[](int i) const
inline Tensor<T, 3> operator[](int i) const
{
const T* restrict b = m_data + i;
T xx = *(b);
Expand All @@ -87,10 +87,10 @@ struct TensorSoaContainer<T, 3>
T* _base;
Accessor() = delete;
Accessor(const Accessor&) = delete;
__forceinline Accessor(T* a, int ng) : _base(a), M(ng) {}
inline Accessor(T* a, int ng) : _base(a), M(ng) {}

template<unsigned D>
__forceinline Accessor& operator=(const Tensor<T, D>& rhs)
inline Accessor& operator=(const Tensor<T, D>& rhs)
{
*_base = rhs(0);
*(_base + M) = rhs(1);
Expand All @@ -103,7 +103,7 @@ struct TensorSoaContainer<T, 3>

/** assign value */
template<typename T1>
__forceinline Accessor& operator=(T1 rhs)
inline Accessor& operator=(T1 rhs)
{
*_base = rhs;
*(_base + M) = rhs;
Expand All @@ -119,20 +119,20 @@ struct TensorSoaContainer<T, 3>
*
* Use for (*this)[i]=Tensor<T,3>;
*/
__forceinline Accessor operator()(int i) { return Accessor(m_data.data() + i, nGhosts); }
inline Accessor operator()(int i) { return Accessor(m_data.data() + i, nGhosts); }

///return the base
__forceinline T* data() { return m_data.data(); }
inline T* data() { return m_data.data(); }
///return the base
__forceinline const T* data() const { return m_data.data(); }
inline const T* data() const { return m_data.data(); }
///return the base of XX components
__forceinline T* restrict data(int i, int j)
inline T* restrict data(int i, int j)
{
const int n = (i < j) ? i * 3 + j : j * 3 + i;
return m_data().data() + n * nGhosts;
}
///return the base of XX components
__forceinline const T* restrict data(int i, int j) const
inline const T* restrict data(int i, int j) const
{
const int n = (i < j) ? i * 3 + j : j * 3 + i;
return m_data().data() + n * nGhosts;
Expand Down
40 changes: 20 additions & 20 deletions src/Containers/OhmmsSoA/VectorSoaContainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ struct VectorSoaContainer
*
* nAllocated is used to ensure no memory leak
*/
__forceinline void resize(size_t n)
inline void resize(size_t n)
{
static_assert(std::is_same<Element_t, typename Alloc::value_type>::value,
"VectorSoaContainer and Alloc data types must agree!");
Expand Down Expand Up @@ -144,7 +144,7 @@ struct VectorSoaContainer
}

/// free allocated memory and clear status variables
__forceinline void free()
inline void free()
{
if (nAllocated)
mAllocator.deallocate(myData, nAllocated);
Expand All @@ -161,7 +161,7 @@ struct VectorSoaContainer
*
* To attach to existing memory, currently owned memory must be freed before calling attachReference
*/
__forceinline void attachReference(size_t n, size_t n_padded, T* ptr)
inline void attachReference(size_t n, size_t n_padded, T* ptr)
{
if (nAllocated)
{
Expand Down Expand Up @@ -200,9 +200,9 @@ struct VectorSoaContainer
}

///return the physical size
__forceinline size_t size() const { return nLocal; }
inline size_t size() const { return nLocal; }
///return the physical size
__forceinline size_t capacity() const { return nGhosts; }
inline size_t capacity() const { return nGhosts; }

/** AoS to SoA : copy from Vector<TinyVector<>>
*
Expand All @@ -227,16 +227,16 @@ struct VectorSoaContainer

/** return TinyVector<T,D>
*/
__forceinline const AoSElement_t operator[](size_t i) const { return AoSElement_t(myData + i, nGhosts); }
inline const AoSElement_t operator[](size_t i) const { return AoSElement_t(myData + i, nGhosts); }

///helper class for operator ()(size_t i) to assign a value
struct Accessor
{
T* _base;
size_t M;
__forceinline Accessor(T* a, size_t ng) : _base(a), M(ng) {}
inline Accessor(T* a, size_t ng) : _base(a), M(ng) {}
template<typename T1>
__forceinline Accessor& operator=(const TinyVector<T1, D>& rhs)
inline Accessor& operator=(const TinyVector<T1, D>& rhs)
{
#pragma unroll
for (size_t i = 0; i < D; ++i)
Expand All @@ -246,7 +246,7 @@ struct VectorSoaContainer

/** assign value */
template<typename T1>
__forceinline Accessor& operator=(T1 rhs)
inline Accessor& operator=(T1 rhs)
{
#pragma unroll
for (size_t i = 0; i < D; ++i)
Expand All @@ -259,44 +259,44 @@ struct VectorSoaContainer
*
* Use for (*this)[i]=TinyVector<T,D>;
*/
__forceinline Accessor operator()(size_t i) { return Accessor(myData + i, nGhosts); }
inline Accessor operator()(size_t i) { return Accessor(myData + i, nGhosts); }
///return the base
__forceinline T* data() { return myData; }
inline T* data() { return myData; }
///return the base
__forceinline const T* data() const { return myData; }
inline const T* data() const { return myData; }
/// return non_const data
T* getNonConstData() const { return myData; }
///return the pointer of the i-th components
__forceinline T* restrict data(size_t i) { return myData + i * nGhosts; }
inline T* restrict data(size_t i) { return myData + i * nGhosts; }
///return the const pointer of the i-th components
__forceinline const T* restrict data(size_t i) const { return myData + i * nGhosts; }
inline const T* restrict data(size_t i) const { return myData + i * nGhosts; }
///return the end
__forceinline T* end() { return myData + D * nGhosts; }
inline T* end() { return myData + D * nGhosts; }
///return the end
__forceinline const T* end() const { return myData + D * nGhosts; }
inline const T* end() const { return myData + D * nGhosts; }


///return the base, device
template<typename Allocator = Alloc, typename = IsDualSpace<Allocator>>
__forceinline T* device_data()
inline T* device_data()
{
return mAllocator.get_device_ptr();
}
///return the base, device
template<typename Allocator = Alloc, typename = IsDualSpace<Allocator>>
__forceinline const T* device_data() const
inline const T* device_data() const
{
return mAllocator.get_device_ptr();
}
///return the pointer of the i-th components, device
template<typename Allocator = Alloc, typename = IsDualSpace<Allocator>>
__forceinline T* restrict device_data(size_t i)
inline T* restrict device_data(size_t i)
{
return mAllocator.get_device_ptr() + i * nGhosts;
}
///return the const pointer of the i-th components, device
template<typename Allocator = Alloc, typename = IsDualSpace<Allocator>>
__forceinline const T* restrict device_data(size_t i) const
inline const T* restrict device_data(size_t i) const
{
return mAllocator.get_device_ptr() + i * nGhosts;
}
Expand Down
2 changes: 1 addition & 1 deletion src/QMCTools/DiracParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class DiracParser : public QMCGaussianParserBase, public OhmmsAsciiParser

public:
DiracParser(int argc, char** argv);
void parse(const std::string& fname);
void parse(const std::string& fname) override;

private:
void dumpHDF5(const std::string& fname);
Expand Down
16 changes: 8 additions & 8 deletions src/spline2/MultiBsplineEval.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,54 +40,54 @@ namespace spline2
{
/// evaluate values optionally in the range [first,last)
template<typename SPLINET, typename PT, typename VT>
__forceinline void evaluate3d(const SPLINET& spline, const PT& r, VT& psi)
inline void evaluate3d(const SPLINET& spline, const PT& r, VT& psi)
{
evaluate_v_impl(spline, r[0], r[1], r[2], psi.data(), 0, psi.size());
}

template<typename SPLINET, typename PT, typename VT>
__forceinline void evaluate3d(const SPLINET& spline, const PT& r, VT& psi, int first, int last)
inline void evaluate3d(const SPLINET& spline, const PT& r, VT& psi, int first, int last)
{
evaluate_v_impl(spline, r[0], r[1], r[2], psi.data() + first, first, last);
}

/// evaluate values, gradients, laplacians optionally in the range [first,last)
template<typename SPLINET, typename PT, typename VT, typename GT, typename LT>
__forceinline void evaluate3d_vgl(const SPLINET& spline, const PT& r, VT& psi, GT& grad, LT& lap)
inline void evaluate3d_vgl(const SPLINET& spline, const PT& r, VT& psi, GT& grad, LT& lap)
{
evaluate_vgl_impl(spline, r[0], r[1], r[2], psi.data(), grad.data(), lap.data(), psi.size(), 0, psi.size());
}

template<typename SPLINET, typename PT, typename VT, typename GT, typename LT>
__forceinline void evaluate3d_vgl(const SPLINET& spline, const PT& r, VT& psi, GT& grad, LT& lap, int first, int last)
inline void evaluate3d_vgl(const SPLINET& spline, const PT& r, VT& psi, GT& grad, LT& lap, int first, int last)
{
evaluate_vgl_impl(spline, r[0], r[1], r[2], psi.data() + first, grad.data() + first, lap.data() + first, psi.size(),
first, last);
}

/// evaluate values, gradients, hessians optionally in the range [first,last)
template<typename SPLINET, typename PT, typename VT, typename GT, typename HT>
__forceinline void evaluate3d_vgh(const SPLINET& spline, const PT& r, VT& psi, GT& grad, HT& hess)
inline void evaluate3d_vgh(const SPLINET& spline, const PT& r, VT& psi, GT& grad, HT& hess)
{
evaluate_vgh_impl(spline, r[0], r[1], r[2], psi.data(), grad.data(), hess.data(), psi.size(), 0, psi.size());
}

template<typename SPLINET, typename PT, typename VT, typename GT, typename HT>
__forceinline void evaluate3d_vgh(const SPLINET& spline, const PT& r, VT& psi, GT& grad, HT& hess, int first, int last)
inline void evaluate3d_vgh(const SPLINET& spline, const PT& r, VT& psi, GT& grad, HT& hess, int first, int last)
{
evaluate_vgh_impl(spline, r[0], r[1], r[2], psi.data() + first, grad.data() + first, hess.data() + first, psi.size(),
first, last);
}

template<typename SPLINET, typename PT, typename VT, typename GT, typename HT, typename GHT>
__forceinline void evaluate3d_vghgh(const SPLINET& spline, const PT& r, VT& psi, GT& grad, HT& hess, GHT& ghess)
inline void evaluate3d_vghgh(const SPLINET& spline, const PT& r, VT& psi, GT& grad, HT& hess, GHT& ghess)
{
evaluate_vghgh_impl(spline, r[0], r[1], r[2], psi.data(), grad.data(), hess.data(), ghess.data(), psi.size(), 0,
psi.size());
}

template<typename SPLINET, typename PT, typename VT, typename GT, typename HT, typename GHT>
__forceinline void evaluate3d_vghgh(const SPLINET& spline,
inline void evaluate3d_vghgh(const SPLINET& spline,
const PT& r,
VT& psi,
GT& grad,
Expand Down

0 comments on commit 0066161

Please sign in to comment.