diff --git a/src/Drivers/check_spo.cpp b/src/Drivers/check_spo.cpp index 57ee568d8..0afefc6ee 100644 --- a/src/Drivers/check_spo.cpp +++ b/src/Drivers/check_spo.cpp @@ -273,27 +273,27 @@ int main(int argc, char **argv) { // value evalVGH_v_err += - std::fabs((*spo.psi[ib])[n] - (*spo_ref.psi[ib])[n]); + std::fabs(spo.psi[ib][n] - spo_ref.psi[ib][n]); // grad - evalVGH_g_err += std::fabs(spo.grad[ib]->data(0)[n] - - spo_ref.grad[ib]->data(0)[n]); - evalVGH_g_err += std::fabs(spo.grad[ib]->data(1)[n] - - spo_ref.grad[ib]->data(1)[n]); - evalVGH_g_err += std::fabs(spo.grad[ib]->data(2)[n] - - spo_ref.grad[ib]->data(2)[n]); + evalVGH_g_err += std::fabs(spo.grad[ib].data(0)[n] - + spo_ref.grad[ib].data(0)[n]); + evalVGH_g_err += std::fabs(spo.grad[ib].data(1)[n] - + spo_ref.grad[ib].data(1)[n]); + evalVGH_g_err += std::fabs(spo.grad[ib].data(2)[n] - + spo_ref.grad[ib].data(2)[n]); // hess - evalVGH_h_err += std::fabs(spo.hess[ib]->data(0)[n] - - spo_ref.hess[ib]->data(0)[n]); - evalVGH_h_err += std::fabs(spo.hess[ib]->data(1)[n] - - spo_ref.hess[ib]->data(1)[n]); - evalVGH_h_err += std::fabs(spo.hess[ib]->data(2)[n] - - spo_ref.hess[ib]->data(2)[n]); - evalVGH_h_err += std::fabs(spo.hess[ib]->data(3)[n] - - spo_ref.hess[ib]->data(3)[n]); - evalVGH_h_err += std::fabs(spo.hess[ib]->data(4)[n] - - spo_ref.hess[ib]->data(4)[n]); - evalVGH_h_err += std::fabs(spo.hess[ib]->data(5)[n] - - spo_ref.hess[ib]->data(5)[n]); + evalVGH_h_err += std::fabs(spo.hess[ib].data(0)[n] - + spo_ref.hess[ib].data(0)[n]); + evalVGH_h_err += std::fabs(spo.hess[ib].data(1)[n] - + spo_ref.hess[ib].data(1)[n]); + evalVGH_h_err += std::fabs(spo.hess[ib].data(2)[n] - + spo_ref.hess[ib].data(2)[n]); + evalVGH_h_err += std::fabs(spo.hess[ib].data(3)[n] - + spo_ref.hess[ib].data(3)[n]); + evalVGH_h_err += std::fabs(spo.hess[ib].data(4)[n] - + spo_ref.hess[ib].data(4)[n]); + evalVGH_h_err += std::fabs(spo.hess[ib].data(5)[n] - + spo_ref.hess[ib].data(5)[n]); } if (ur[iel] > accept) { @@ -322,7 +322,7 @@ int main(int argc, char **argv) for (int ib = 0; ib < spo.nBlocks; ib++) for (int n = 0; n < spo.nSplinesPerBlock; n++) evalV_v_err += - std::fabs((*spo.psi[ib])[n] - (*spo_ref.psi[ib])[n]); + std::fabs(spo.psi[ib][n] - spo_ref.psi[ib][n]); } } // els } // ions diff --git a/src/Numerics/Containers.h b/src/Numerics/Containers.h index b32dd8d97..83f4cef35 100644 --- a/src/Numerics/Containers.h +++ b/src/Numerics/Containers.h @@ -159,9 +159,9 @@ template struct VectorSoAContainer * * Free existing memory and reset the internal variables */ - __forceinline void resetByRef(size_t n, size_t n_padded, T *ptr) + __forceinline void attachReference(size_t n, size_t n_padded, T *ptr) { - if (nAllocated) myAlloc.deallocate(myData, nAllocated); + if(nAllocated) throw std::runtime_error("Pointer attaching is not allowed on VectorSoAContainer with allocated memory."); nAllocated = 0; nLocal = n; nGhosts = n_padded; @@ -198,7 +198,7 @@ template struct VectorSoAContainer /** return TinyVector */ - __forceinline Type_t operator[](size_t i) const + __forceinline const Type_t operator[](size_t i) const { return Type_t(myData + i, nGhosts); } diff --git a/src/Numerics/OhmmsPETE/OhmmsMatrix.h b/src/Numerics/OhmmsPETE/OhmmsMatrix.h index 0990c02f3..4be5ae4d8 100644 --- a/src/Numerics/OhmmsPETE/OhmmsMatrix.h +++ b/src/Numerics/OhmmsPETE/OhmmsMatrix.h @@ -1,48 +1,45 @@ -//////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////// // This file is distributed under the University of Illinois/NCSA Open Source -// License. See LICENSE file in top directory for details. +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: -// Ken Esler, kpesler@gmail.com, -// University of Illinois at Urbana-Champaign -// Miguel Morales, moralessilva2@llnl.gov, -// Lawrence Livermore National Laboratory -// Jeremy McMinnis, jmcminis@gmail.com, -// University of Illinois at Urbana-Champaign +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore +//National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at +// Urbana-Champaign // -// File created by: -// Jeongnim Kim, jeongnim.kim@gmail.com, -// University of Illinois at Urbana-Champaign -//////////////////////////////////////////////////////////////////////////////// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// #ifndef OHMMS_PETE_MATRIX_H #define OHMMS_PETE_MATRIX_H #include "Numerics/PETE/PETE.h" #include -#include +#include "Numerics/OhmmsPETE/OhmmsVector.h" #include namespace qmcplusplus { -template > class Matrix +template > class Matrix { public: - // clang-format off - typedef T Type_t; - typedef T value_type; - typedef T* pointer; - typedef const T* const_pointer; - typedef C Container_t; - typedef typename C::size_type size_type; + typedef T Type_t; + typedef T value_type; + typedef T *pointer; + typedef const T *const_pointer; + typedef Vector Container_t; + typedef typename Container_t::size_type size_type; typedef typename Container_t::iterator iterator; - typedef Matrix This_t; - // clang-format on + typedef Matrix This_t; - Matrix() : D1(0), D2(0) {} // Default Constructor initializes to zero. + Matrix() + : D1(0), D2(0), TotSize(0) {} // Default Constructor initializes to zero. Matrix(size_type n) { @@ -56,13 +53,19 @@ template > class Matrix // assign(*this, T()); } + /** constructor with an initialized ref */ + inline Matrix(T *ref, size_type n, size_type m) + : D1(n), D2(m), TotSize(n * m), X(ref, n * m) + { + } + // Copy Constructor - Matrix(const Matrix &rhs) { copy(rhs); } + Matrix(const Matrix &rhs) { copy(rhs); } // Destructor ~Matrix() {} - inline size_type size() const { return X.size(); } + inline size_type size() const { return TotSize; } inline size_type rows() const { return D1; } inline size_type cols() const { return D2; } inline size_type size1() const { return D1; } @@ -100,26 +103,40 @@ template > class Matrix X.resize(n * m); } + // free the matrix storage + inline void free() { X.free(); } + + // Attach to pre-allocated memory + inline void attachReference(T *ref) { X.attachReference(ref, TotSize); } + + inline void attachReference(T *ref, size_type n, size_type m) + { + D1 = n; + D2 = m; + TotSize = n * m; + X.attachReference(ref, TotSize); + } + inline void add(size_type n) // you can add rows: adding columns are forbidden { X.insert(X.end(), n * D2, T()); D1 += n; } - inline void copy(const Matrix &rhs) + inline void copy(const Matrix &rhs) { resize(rhs.D1, rhs.D2); assign(*this, rhs); } // Assignment Operators - inline This_t &operator=(const Matrix &rhs) + inline This_t &operator=(const Matrix &rhs) { resize(rhs.D1, rhs.D2); return assign(*this, rhs); } - inline const This_t &operator=(const Matrix &rhs) const + inline const This_t &operator=(const Matrix &rhs) const { return assign(*this, rhs); } @@ -131,35 +148,35 @@ template > class Matrix // Get and Set Operations for assignment operators // returns a pointer of i-th row - inline pointer data() { return &(X[0]); } + inline pointer data() { return X.data(); } // returns a pointer of i-th row - inline const_pointer data() const { return &(X[0]); } + inline const_pointer data() const { return X.data(); } // returns a const pointer of i-th row - inline const Type_t *data(size_type i) const { return &(X[0]) + i * D2; } + inline const Type_t *data(size_type i) const { return X.data() + i * D2; } /// returns a pointer of i-th row, g++ iterator problem - inline Type_t *data(size_type i) { return &(X[0]) + i * D2; } + inline Type_t *data(size_type i) { return X.data() + i * D2; } - inline pointer first_address() { return &(X[0]); } + inline pointer first_address() { return X.data(); } // returns a pointer of i-th row - inline const_pointer first_address() const { return &(X[0]); } + inline const_pointer first_address() const { return X.data(); } - inline pointer last_address() { return &(X[0]) + TotSize; } + inline pointer last_address() { return X.data() + TotSize; } // returns a pointer of i-th row - inline const Type_t *last_address() const { return &(X[0]) + TotSize; } + inline const Type_t *last_address() const { return X.data() + TotSize; } // returns a const pointer of i-th row inline const Type_t *operator[](size_type i) const { - return &(X[0]) + i * D2; + return X.data() + i * D2; } /// returns a pointer of i-th row, g++ iterator problem - inline Type_t *operator[](size_type i) { return &(X[0]) + i * D2; } + inline Type_t *operator[](size_type i) { return X.data() + i * D2; } inline Type_t &operator()(size_type i) { return X[i]; } // returns the i-th value in D1*D2 vector @@ -178,7 +195,7 @@ template > class Matrix { for (int col = 0; col < D2; col++) { - Type_t tmp = (*this)(r1, col); + Type_t tmp = (*this)(r1, col); (*this)(r1, col) = (*this)(r2, col); (*this)(r2, col) = tmp; } @@ -188,7 +205,7 @@ template > class Matrix { for (int row = 0; row < D1; row++) { - Type_t tmp = (*this)(row, c1); + Type_t tmp = (*this)(row, c1); (*this)(row, c1) = (*this)(row, c2); (*this)(row, c2) = tmp; } @@ -202,13 +219,15 @@ template > class Matrix template inline void replaceColumn(IT first, size_type j) { typename Container_t::iterator ii(X.begin() + j); - for (int i = 0; i < D1; i++, ii += D2) *ii = *first++; + for (int i = 0; i < D1; i++, ii += D2) + *ii = *first++; } template inline void add2Column(IT first, size_type j) { typename Container_t::iterator ii(X.begin() + j); - for (int i = 0; i < D1; i++, ii += D2) *ii += *first++; + for (int i = 0; i < D1; i++, ii += D2) + *ii += *first++; } /** @@ -275,13 +294,13 @@ template > class Matrix template inline Msg &putMessage(Msg &m) { - m.Pack(&X[0], D1 * D2); + m.Pack(X.data(), D1 * D2); return m; } template inline Msg &getMessage(Msg &m) { - m.Unpack(&X[0], D1 * D2); + m.Unpack(X.data(), D1 * D2); return m; } @@ -292,23 +311,24 @@ template > class Matrix }; // I/O -template -std::ostream &operator<<(std::ostream &out, const Matrix &rhs) +template +std::ostream &operator<<(std::ostream &out, const Matrix &rhs) { - typedef typename Matrix::size_type size_type; + typedef typename Matrix::size_type size_type; size_type ii = 0; for (size_type i = 0; i < rhs.rows(); i++) { - for (size_type j = 0; j < rhs.cols(); j++) out << rhs(ii++) << " "; + for (size_type j = 0; j < rhs.cols(); j++) + out << rhs(ii++) << " "; out << std::endl; } return out; } -template -std::istream &operator>>(std::istream &is, Matrix &rhs) +template +std::istream &operator>>(std::istream &is, Matrix &rhs) { - typedef typename Matrix::size_type size_type; + typedef typename Matrix::size_type size_type; size_type ii = 0; for (size_type i = 0; i < rhs.size(); i++) { @@ -320,10 +340,10 @@ std::istream &operator>>(std::istream &is, Matrix &rhs) // We need to specialize CreateLeaf for our class, so that operators // know what to stick in the leaves of the expression tree. //----------------------------------------------------------------------------- -template struct CreateLeaf> +template struct CreateLeaf> { - typedef Reference> Leaf_t; - inline static Leaf_t make(const Matrix &a) { return Leaf_t(a); } + typedef Reference> Leaf_t; + inline static Leaf_t make(const Matrix &a) { return Leaf_t(a); } }; //----------------------------------------------------------------------------- @@ -360,10 +380,11 @@ template struct LeafFunctor, SizeLeaf2> } }; -template struct LeafFunctor, SizeLeaf2> +template +struct LeafFunctor, SizeLeaf2> { typedef bool Type_t; - inline static bool apply(const Matrix &v, const SizeLeaf2 &s) + inline static bool apply(const Matrix &v, const SizeLeaf2 &s) { return s(v.rows(), v.cols()); } @@ -373,12 +394,12 @@ template struct LeafFunctor, SizeLeaf2> // EvalLeaf1 is used to evaluate expression with matrices. // (It's already defined for Scalar values.) //----------------------------------------------------------------------------- -// template -// struct LeafFunctor,EvalLeaf1> +// template +// struct LeafFunctor,EvalLeaf1> // { // typedef T Type_t; // inline static -// Type_t apply(const Matrix& mat, const EvalLeaf1 &f) +// Type_t apply(const Matrix& mat, const EvalLeaf1 &f) // { // return vec[f.val1()]; // } @@ -387,10 +408,11 @@ template struct LeafFunctor, SizeLeaf2> // EvalLeaf2 is used to evaluate expression with matrices. // (It's already defined for Scalar values.) //----------------------------------------------------------------------------- -template struct LeafFunctor, EvalLeaf2> +template +struct LeafFunctor, EvalLeaf2> { typedef T Type_t; - inline static Type_t apply(const Matrix &mat, const EvalLeaf2 &f) + inline static Type_t apply(const Matrix &mat, const EvalLeaf2 &f) { return mat(f.val1(), f.val2()); } @@ -399,8 +421,8 @@ template struct LeafFunctor, EvalLeaf2> /////////////////////////////////////////////////////////////////////////////// // LOOP is done by evaluate function /////////////////////////////////////////////////////////////////////////////// -template -inline void evaluate(Matrix &lhs, const Op &op, +template +inline void evaluate(Matrix &lhs, const Op &op, const Expression &rhs) { if (forEach(rhs, SizeLeaf2(lhs.rows(), lhs.cols()), AndCombine())) @@ -423,7 +445,8 @@ inline void evaluate(Matrix &lhs, const Op &op, abort(); } } -} +} // namespace qmcplusplus + +#include "Numerics/OhmmsPETE/OhmmsMatrixOperators.h" -#include "Numerics/OhmmsPETE/OhmmsMatrixOperators.h" #endif // OHMMS_PETE_MATRIX_H diff --git a/src/Numerics/OhmmsPETE/OhmmsVector.h b/src/Numerics/OhmmsPETE/OhmmsVector.h index 344c3476b..f9f1cfc54 100644 --- a/src/Numerics/OhmmsPETE/OhmmsVector.h +++ b/src/Numerics/OhmmsPETE/OhmmsVector.h @@ -1,19 +1,14 @@ -//////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////// // This file is distributed under the University of Illinois/NCSA Open Source -// License. See LICENSE file in top directory for details. +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: -// Jeongnim Kim, jeongnim.kim@intel.com, -// Intel Corp. -// Ye Luo, yeluo@anl.gov, -// Argonne National Laboratory +// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory // -// File created by: -// Jeongnim Kim, jeongnim.kim@intel.com, -// Intel Corp. -//////////////////////////////////////////////////////////////////////////////// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// /** @file Vector.h * @@ -29,6 +24,7 @@ #include #include #include +#include namespace qmcplusplus { @@ -39,14 +35,20 @@ template > class Vector typedef T Type_t; typedef T *iterator; typedef const T *const_iterator; + typedef typename Alloc::size_type size_type; typedef typename Alloc::pointer pointer; typedef typename Alloc::const_pointer const_pointer; typedef Vector This_t; /** constructor with size n*/ - explicit inline Vector(size_t n = 0) : nLocal(n), nAllocated(0), X(nullptr) + explicit inline Vector(size_t n = 0, Type_t val = Type_t()) + : nLocal(n), nAllocated(0), X(nullptr) { - if (n) resize_impl(n); + if (n) + { + resize_impl(n); + std::fill_n(X, n, val); + } } /** constructor with an initialized ref */ @@ -96,30 +98,70 @@ template > class Vector } } + // Attach to pre-allocated memory + inline void attachReference(T *ref, size_t n) + { + if (nAllocated) + throw std::runtime_error( + "Pointer attaching is not allowed on Vector with allocated memory."); + nLocal = n; + nAllocated = 0; + X = ref; + } + //! return the current size inline size_t size() const { return nLocal; } /// resize - inline void resize(size_t n) + inline void resize(size_t n, Type_t val = Type_t()) { if (nLocal > nAllocated) throw std::runtime_error( "Resize not allowed on Vector constructed by initialized memory."); if (n > nAllocated) + { resize_impl(n); + std::fill_n(X, n, val); + } + else if (n > nLocal) + { + std::fill_n(X + nLocal, n - nLocal, val); + nLocal = n; + } else nLocal = n; return; } + /// clear + inline void clear() { nLocal = 0; } + + /// free + inline void free() + { + if (nAllocated) + { + mAllocator.deallocate(X, nAllocated); + } + nLocal = 0; + nAllocated = 0; + X = nullptr; + } + // Get and Set Operations inline Type_t &operator[](size_t i) { return X[i]; } - inline Type_t operator[](size_t i) const { return X[i]; } + inline const Type_t &operator[](size_t i) const { return X[i]; } - inline Type_t &operator()(size_t i) { return X[i]; } + // inline Type_t& operator()(size_t i) + //{ + // return X[i]; + //} - inline Type_t operator()(size_t i) const { return X[i]; } + // inline Type_t operator()( size_t i) const + //{ + // return X[i]; + //} inline iterator begin() { return X; } inline const_iterator begin() const { return X; } @@ -158,7 +200,7 @@ template > class Vector } }; -} // end-of qmcplusplus +} // namespace qmcplusplus #include "Numerics/OhmmsPETE/OhmmsVectorOperators.h" @@ -257,7 +299,8 @@ inline void evaluate(Vector &lhs, const Op &op, template std::ostream &operator<<(std::ostream &out, const Vector &rhs) { - for (int i = 0; i < rhs.size(); i++) out << rhs[i] << std::endl; + for (int i = 0; i < rhs.size(); i++) + out << rhs[i] << std::endl; return out; } @@ -265,9 +308,11 @@ template std::istream &operator>>(std::istream &is, Vector &rhs) { // printTinyVector >::print(out,rhs); - for (int i = 0; i < rhs.size(); i++) is >> rhs[i]; + for (int i = 0; i < rhs.size(); i++) + is >> rhs[i]; return is; } -} + +} // namespace qmcplusplus #endif // OHMMS_PARTICLEATTRIB_PEPE_H diff --git a/src/Particle/DistanceTableAA.h b/src/Particle/DistanceTableAA.h index cd0763e89..86306dd6d 100644 --- a/src/Particle/DistanceTableAA.h +++ b/src/Particle/DistanceTableAA.h @@ -26,8 +26,7 @@ namespace qmcplusplus * @brief A derived classe from DistacneTableData, specialized for dense case */ template -struct DistanceTableAA : public DTD_BConds, - public DistanceTableData +struct DistanceTableAA : public DTD_BConds, public DistanceTableData { int Ntargets; @@ -40,24 +39,30 @@ struct DistanceTableAA : public DTD_BConds, resize(target.getTotalNum()); } -#if (__cplusplus >= 201103L) - DistanceTableAA() = delete; + DistanceTableAA() = delete; DistanceTableAA(const DistanceTableAA &) = delete; -#endif ~DistanceTableAA() {} + size_t compute_size(int N) + { + const size_t N_padded = getAlignedSize(N); + const size_t Alignment = getAlignment(); + return (N_padded * (2 * N - N_padded + 1) + (Alignment - 1) * N_padded) / 2; + } + void resize(int n) { - N[SourceIndex] = N[VisitorIndex] = Ntargets = n; - Ntargets_padded = getAlignedSize(n); - BlockSize = Ntargets_padded * D; + N[SourceIndex] = n; + N[VisitorIndex] = n; + Ntargets = n; + Ntargets_padded = getAlignedSize(n); Distances.resize(Ntargets, Ntargets_padded); - - memoryPool.resize(Ntargets * BlockSize); + const size_t total_size = compute_size(Ntargets); + memoryPool.resize(total_size * D); Displacements.resize(Ntargets); for (int i = 0; i < Ntargets; ++i) - Displacements[i].resetByRef(Ntargets, Ntargets_padded, - memoryPool.data() + i * BlockSize); + Displacements[i].attachReference(i, total_size, + memoryPool.data() + compute_size(i)); Temp_r.resize(Ntargets); Temp_dr.resize(Ntargets); @@ -78,32 +83,28 @@ struct DistanceTableAA : public DTD_BConds, inline void evaluate(ParticleSet &P, IndexType jat) { - activePtcl = jat; DTD_BConds::computeDistances( P.R[jat], P.RSoA, Distances[jat], Displacements[jat], 0, Ntargets, jat); Distances[jat][jat] = std::numeric_limits::max(); // assign a big number } - inline void moveOnSphere(const ParticleSet &P, const PosType &rnew, - IndexType jat) + inline void moveOnSphere(const ParticleSet &P, const PosType &rnew) { DTD_BConds::computeDistances(rnew, P.RSoA, Temp_r.data(), Temp_dr, - 0, Ntargets, jat); - Temp_r[jat] = std::numeric_limits::max(); // assign a big number + 0, Ntargets, P.activePtcl); } /// evaluate the temporary pair relations - inline void move(const ParticleSet &P, const PosType &rnew, IndexType jat) + inline void move(const ParticleSet &P, const PosType &rnew) { //#pragma omp master - activePtcl = jat; - moveOnSphere(P, rnew, jat); + moveOnSphere(P, rnew); } /// update the iat-th row for iat=[0,iat-1) inline void update(IndexType iat) { - if (iat == 0 || iat != activePtcl) return; + if (iat == 0) return; // update by a cache line const int nupdate = getAlignedSize(iat); simd::copy_n(Temp_r.data(), nupdate, Distances[iat]); @@ -111,5 +112,5 @@ struct DistanceTableAA : public DTD_BConds, simd::copy_n(Temp_dr.data(idim), nupdate, Displacements[iat].data(idim)); } }; -} +} // namespace qmcplusplus #endif diff --git a/src/Particle/DistanceTableBA.h b/src/Particle/DistanceTableBA.h index 4cd38c5dd..bc5d1bd84 100644 --- a/src/Particle/DistanceTableBA.h +++ b/src/Particle/DistanceTableBA.h @@ -25,8 +25,7 @@ namespace qmcplusplus * transposed form */ template -struct DistanceTableBA : public DTD_BConds, - public DistanceTableData +struct DistanceTableBA : public DTD_BConds, public DistanceTableData { int Nsources; int Ntargets; @@ -53,8 +52,8 @@ struct DistanceTableBA : public DTD_BConds, memoryPool.resize(Ntargets * BlockSize); Displacements.resize(Ntargets); for (int i = 0; i < Ntargets; ++i) - Displacements[i].resetByRef(Nsources, Nsources_padded, - memoryPool.data() + i * BlockSize); + Displacements[i].attachReference(Nsources, Nsources_padded, + memoryPool.data() + i * BlockSize); Temp_r.resize(Nsources); Temp_dr.resize(Nsources); @@ -66,16 +65,13 @@ struct DistanceTableBA : public DTD_BConds, J2.resize(Nsources, Ntargets_padded); } -#if (__cplusplus >= 201103L) - DistanceTableBA() = delete; + DistanceTableBA() = delete; DistanceTableBA(const DistanceTableBA &) = delete; -#endif ~DistanceTableBA() {} /** evaluate the full table */ inline void evaluate(ParticleSet &P) { - activePtcl = -1; // be aware of the sign of Displacement for (int iat = 0; iat < Ntargets; ++iat) DTD_BConds::computeDistances(P.R[iat], Origin->RSoA, @@ -94,18 +90,15 @@ struct DistanceTableBA : public DTD_BConds, 0, Nsources); } - inline void moveOnSphere(const ParticleSet &P, const PosType &rnew, - IndexType jat) + inline void moveOnSphere(const ParticleSet &P, const PosType &rnew) { - activePtcl = jat; DTD_BConds::computeDistances(rnew, Origin->RSoA, Temp_r.data(), Temp_dr, 0, Nsources); } /// evaluate the temporary pair relations - inline void move(const ParticleSet &P, const PosType &rnew, IndexType jat) + inline void move(const ParticleSet &P, const PosType &rnew) { - activePtcl = jat; DTD_BConds::computeDistances(rnew, Origin->RSoA, Temp_r.data(), Temp_dr, 0, Nsources); } @@ -113,84 +106,13 @@ struct DistanceTableBA : public DTD_BConds, /// update the stripe for jat-th particle inline void update(IndexType iat) { - if (iat != activePtcl) return; simd::copy_n(Temp_r.data(), Nsources, Distances[iat]); for (int idim = 0; idim < D; ++idim) simd::copy_n(Temp_dr.data(idim), Nsources, Displacements[iat].data(idim)); } - size_t get_neighbors(int iat, RealType rcut, int *restrict jid, - RealType *restrict dist, PosType *restrict displ) const - { - CONSTEXPR T cminus(-1); - size_t nn = 0; - for (int jat = 0; jat < Ntargets; ++jat) - { - const RealType rij = Distances[jat][iat]; - if (rij < rcut) - { // make the compact list - jid[nn] = jat; - dist[nn] = rij; - displ[nn] = cminus * Displacements[jat][iat]; - nn++; - } - } - return nn; - } - - int get_first_neighbor(IndexType iat, RealType &r, PosType &dr) const - { - RealType min_dist = std::numeric_limits::max(); - int index = -1; - if (iat == activePtcl) - { - for (int jat = 0; jat < Nsources; ++jat) - if (Temp_r[jat] < min_dist) - { - min_dist = Temp_r[jat]; - index = jat; - } - if (index >= 0) - { - r = min_dist; - dr = Temp_dr[index]; - } - } - else - { - for (int jat = 0; jat < Nsources; ++jat) - if (Distances[iat][jat] < min_dist) - { - min_dist = Distances[iat][jat]; - index = jat; - } - if (index >= 0) - { - r = min_dist; - dr = Displacements[iat][index]; - } - } - return index; - } - - size_t get_neighbors(int iat, RealType rcut, RealType *restrict dist) const - { - size_t nn = 0; - for (int jat = 0; jat < Ntargets; ++jat) - { - const RealType rij = Distances[jat][iat]; - if (rij < rcut) - { // make the compact list - dist[nn] = rij; - nn++; - } - } - return nn; - } - inline void donePbyP() { - activePtcl = -1; // Rmax is zero: no need to transpose the table. if (Rmax < std::numeric_limits::epsilon()) return; @@ -216,5 +138,5 @@ struct DistanceTableBA : public DTD_BConds, } } }; -} +} // namespace qmcplusplus #endif diff --git a/src/Particle/DistanceTableData.h b/src/Particle/DistanceTableData.h index 95d1cea5e..69643734d 100644 --- a/src/Particle/DistanceTableData.h +++ b/src/Particle/DistanceTableData.h @@ -120,8 +120,6 @@ struct DistanceTableData int ID; /// Type of DT int DTType; - /// Index of the particle with a trial move - IndexType activePtcl; /// size of indicies TinyVector N; /// true, if ratio computations need displacement, e.g. LCAO type @@ -168,7 +166,7 @@ struct DistanceTableData /**defgroup SoA data */ /*@{*/ /** Distances[i][j] , [Nsources][Ntargets] */ - Matrix> Distances; + Matrix> Distances; /** Displacements[Nsources]x[3][Ntargets] */ std::vector Displacements; @@ -308,91 +306,14 @@ struct DistanceTableData virtual void evaluate(ParticleSet &P, int jat) = 0; /// evaluate the temporary pair relations - virtual void move(const ParticleSet &P, const PosType &rnew, - IndexType jat) = 0; + virtual void move(const ParticleSet &P, const PosType &rnew) = 0; /// evaluate the distance tables with a sphere move - virtual void moveOnSphere(const ParticleSet &P, const PosType &rnew, - IndexType jat) = 0; + virtual void moveOnSphere(const ParticleSet &P, const PosType &rnew) = 0; /// update the distance table by the pair relations virtual void update(IndexType jat) = 0; - /** build a compact list of a neighbor for the iat source - * @param iat source particle id - * @param rcut cutoff radius - * @param jid compressed index - * @param dist compressed distance - * @param displ compressed displacement - * @return number of target particles within rcut - */ - virtual size_t get_neighbors(int iat, RealType rcut, int *restrict jid, - RealType *restrict dist, - PosType *restrict displ) const - { - return 0; - } - - /** find the nearest neighbor - * @param iat source particle id - * @return the id of the nearest particle, -1 not found - */ - virtual int get_first_neighbor(IndexType iat, RealType &r, PosType &dr) const - { - return -1; - } - - /** build a compact list of a neighbor for the iat source - * @param iat source particle id - * @param rcut cutoff radius - * @param dist compressed distance - * @return number of target particles within rcut - */ - virtual size_t get_neighbors(int iat, RealType rcut, - RealType *restrict dist) const - { - return 0; - } - - /// find index and distance of each nearest neighbor particle - virtual void nearest_neighbor(std::vector &ri, - bool transposed = false) const - { - APP_ABORT("DistanceTableData::nearest_neighbor is not implemented in " - "calling base class"); - } - - /// find indices and distances of nearest neighbors particles to particle n - virtual void nearest_neighbors(int n, int neighbors, std::vector &ri, - bool transposed = false) - { - APP_ABORT("DistanceTableData::nearest_neighbors is not implemented in " - "calling base class"); - } - - /// find species resolved indices and distances of nearest particles to - /// particle n - virtual void nearest_neighbors_by_spec(int n, int neighbors, int spec_start, - std::vector &ri, - bool transposed = false) - { - APP_ABORT("DistanceTableData::nearest_neighbors is not implemented in " - "calling base class"); - } - - inline void check_neighbor_size(std::vector &ri, - bool transposed = false) const - { - int m; - if (transposed) - m = N[SourceIndex]; - else - m = N[VisitorIndex]; - if (ri.size() != m) - APP_ABORT("DistanceTableData::check_neighbor_size distance/index vector " - "length is not equal to the number of neighbor particles"); - } - inline void print(std::ostream &os) { os << "Table " << Origin->getName() << std::endl; @@ -416,11 +337,11 @@ struct DistanceTableData /** displacement vectors \f$dr(i,j) = R(j)-R(i)\f$ */ std::vector dr_m; /** full distance AB or AA return r2_m(iat,jat) */ - Matrix> r_m2; + Matrix> r_m2; /** full displacement AB or AA */ Matrix dr_m2; /** J2 for compact neighbors */ - Matrix> J2; + Matrix> J2; /*@}*/ /**resize the storage @@ -449,5 +370,5 @@ struct DistanceTableData } } }; -} +} // namespace qmcplusplus #endif diff --git a/src/Particle/ParticleSet.cpp b/src/Particle/ParticleSet.cpp index 1666924d0..fffe68a67 100644 --- a/src/Particle/ParticleSet.cpp +++ b/src/Particle/ParticleSet.cpp @@ -35,8 +35,8 @@ #include "Utilities/RandomGenerator.h" /** @file ParticleSet.cpp - * @brief Particle positions and related data - */ + * @brief Particle positions and related data + */ //#define PACK_DISTANCETABLES @@ -45,13 +45,13 @@ namespace qmcplusplus ParticleSet::ParticleSet() : UseBoundBox(true), IsGrouped(true), myName("none"), SameMass(true), - myTwist(0.0) + myTwist(0.0), activePtcl(-1) { } ParticleSet::ParticleSet(const ParticleSet &p) : UseBoundBox(p.UseBoundBox), IsGrouped(p.IsGrouped), - mySpecies(p.getSpeciesSet()), SameMass(true), myTwist(0.0) + mySpecies(p.getSpeciesSet()), SameMass(true), myTwist(0.0), activePtcl(-1) { // initBase(); assign(p); // only the base is copied, assumes that other properties are not @@ -96,15 +96,15 @@ void ParticleSet::create(const std::vector &agroup) { SubPtcl.resize(agroup.size() + 1); SubPtcl[0] = 0; - for (int is = 0; is < agroup.size(); is++) + for (int is = 0; is < agroup.size(); is++) SubPtcl[is + 1] = SubPtcl[is] + agroup[is]; - size_t nsum = SubPtcl[agroup.size()]; + size_t nsum = SubPtcl[agroup.size()]; resize(nsum); TotalNum = nsum; int loc = 0; for (int i = 0; i < agroup.size(); i++) { - for (int j = 0; j < agroup[i]; j++, loc++) + for (int j = 0; j < agroup[i]; j++, loc++) GroupID[loc] = i; } } @@ -127,9 +127,9 @@ void ParticleSet::resetGroups() mySpecies(qind, ig) = 0.0; } for (int iat = 0; iat < Z.size(); iat++) - Z[iat] = mySpecies(qind, GroupID[iat]); - natt = mySpecies.numAttributes(); - int massind = mySpecies.addAttribute("mass"); + Z[iat] = mySpecies(qind, GroupID[iat]); + natt = mySpecies.numAttributes(); + int massind = mySpecies.addAttribute("mass"); if (massind == natt) { for (int ig = 0; ig < nspecies; ig++) @@ -144,7 +144,7 @@ void ParticleSet::resetGroups() else app_log() << " Distinctive masses for each species " << std::endl; for (int iat = 0; iat < Mass.size(); iat++) - Mass[iat] = mySpecies(massind, GroupID[iat]); + Mass[iat] = mySpecies(massind, GroupID[iat]); std::vector ng(nspecies, 0); for (int iat = 0; iat < GroupID.size(); iat++) { @@ -155,9 +155,9 @@ void ParticleSet::resetGroups() } SubPtcl.resize(nspecies + 1); SubPtcl[0] = 0; - for (int i = 0; i < nspecies; ++i) + for (int i = 0; i < nspecies; ++i) SubPtcl[i + 1] = SubPtcl[i] + ng[i]; - int membersize = mySpecies.addAttribute("membersize"); + int membersize = mySpecies.addAttribute("membersize"); for (int ig = 0; ig < nspecies; ++ig) mySpecies(membersize, ig) = ng[ig]; // orgID=ID; @@ -166,7 +166,7 @@ void ParticleSet::resetGroups() for (int i = 0; i < nspecies; ++i) for (int iat = 0; iat < GroupID.size(); ++iat) if (GroupID[iat] == i) IndirectID[new_id++] = ID[iat]; - IsGrouped = true; + IsGrouped = true; for (int iat = 0; iat < ID.size(); ++iat) IsGrouped &= (IndirectID[iat] == ID[iat]); if (IsGrouped) @@ -280,7 +280,7 @@ void ParticleSet::update(bool skipSK) RSoA.copyIn(R); for (int i = 0; i < DistTables.size(); i++) DistTables[i]->evaluate(*this); - Ready4Measure = true; + activePtcl = -1; } void ParticleSet::setActive(int iat) @@ -301,33 +301,29 @@ bool ParticleSet::makeMoveAndCheck(Index_t iat, const SingleParticlePos_t &displ) { activePtcl = iat; - // SingleParticlePos_t red_displ(Lattice.toUnit(displ)); + activePos = R[iat] + displ; if (UseBoundBox) { if (Lattice.outOfBound(Lattice.toUnit(displ))) { + activePtcl = -1; return false; } - activePos = R[iat]; // save the current position - SingleParticlePos_t newpos(activePos + displ); - newRedPos = Lattice.toUnit(newpos); + newRedPos = Lattice.toUnit(activePos); if (Lattice.isValid(newRedPos)) { for (int i = 0; i < DistTables.size(); ++i) - DistTables[i]->move(*this, newpos, iat); - R[iat] = newpos; + DistTables[i]->move(*this, activePos); return true; } // out of bound + activePtcl = -1; return false; } else { - activePos = R[iat]; // save the current position - SingleParticlePos_t newpos(activePos + displ); for (int i = 0; i < DistTables.size(); ++i) - DistTables[i]->move(*this, newpos, iat); - R[iat] = newpos; + DistTables[i]->move(*this, activePos); return true; } } @@ -341,11 +337,9 @@ void ParticleSet::makeMoveOnSphere(Index_t iat, const SingleParticlePos_t &displ) { activePtcl = iat; - activePos = R[iat]; // save the current position - SingleParticlePos_t newpos(activePos + displ); + activePos = R[iat] + displ; for (int i = 0; i < DistTables.size(); ++i) - DistTables[i]->moveOnSphere(*this, newpos, iat); - R[iat] = newpos; + DistTables[i]->moveOnSphere(*this, activePos); } /** update the particle attribute by the proposed move @@ -362,7 +356,9 @@ void ParticleSet::acceptMove(Index_t iat) for (int i = 0, n = DistTables.size(); i < n; i++) DistTables[i]->update(iat); - RSoA(iat) = R[iat]; + R[iat] = activePos; + RSoA(iat) = activePos; + activePtcl = -1; } else { @@ -372,19 +368,13 @@ void ParticleSet::acceptMove(Index_t iat) } } -void ParticleSet::rejectMove(Index_t iat) -{ - // restore the position by the saved activePos - R[iat] = activePos; - for (int i = 0; i < DistTables.size(); ++i) - DistTables[i]->activePtcl = -1; -} +void ParticleSet::rejectMove(Index_t iat) { activePtcl = -1; } void ParticleSet::donePbyP(bool skipSK) { for (size_t i = 0, nt = DistTables.size(); i < nt; i++) DistTables[i]->donePbyP(); - Ready4Measure = true; + activePtcl = -1; } void ParticleSet::loadWalker(Walker_t &awalker, bool pbyp) @@ -400,15 +390,9 @@ void ParticleSet::loadWalker(Walker_t &awalker, bool pbyp) if (DistTables[i]->Need_full_table_loadWalker) DistTables[i]->evaluate(*this); } - - Ready4Measure = false; } -void ParticleSet::saveWalker(Walker_t &awalker) -{ - awalker.R = R; - // awalker.DataSet.rewind(); -} +void ParticleSet::saveWalker(Walker_t &awalker) { awalker.R = R; } void ParticleSet::clearDistanceTables() { @@ -417,4 +401,4 @@ void ParticleSet::clearDistanceTables() delete *iter; DistTables.clear(); } -} +} // namespace qmcplusplus diff --git a/src/Particle/ParticleSet.h b/src/Particle/ParticleSet.h index dfae98aa5..1ea9549df 100644 --- a/src/Particle/ParticleSet.h +++ b/src/Particle/ParticleSet.h @@ -121,8 +121,6 @@ class ParticleSet : public QMCTraits, public PtclOnLatticeTraits bool IsGrouped; /// true if the particles have the same mass bool SameMass; - /// true if all the internal state is ready for estimators - bool Ready4Measure; /// the index of the active particle for particle-by-particle moves Index_t activePtcl; @@ -207,6 +205,15 @@ class ParticleSet : public QMCTraits, public PtclOnLatticeTraits */ void setActive(int iat); + /** return the position of the active partice + * + * activePtcl=-1 is used to flag non-physical moves + */ + inline const PosType& activeR(int iat) const + { + return (activePtcl == iat)? activePos:R[iat]; + } + /** move a particle * @param iat the index of the particle to be moved * @param displ random displacement of the iat-th particle @@ -282,8 +289,7 @@ class ParticleSet : public QMCTraits, public PtclOnLatticeTraits inline void assign(const ParticleSet &ptclin) { - TotalNum = ptclin.getTotalNum(); - resize(TotalNum); + resize(ptclin.getTotalNum()); Lattice = ptclin.Lattice; PrimitiveLattice = ptclin.PrimitiveLattice; R.InUnit = ptclin.R.InUnit; diff --git a/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h b/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h index 799b64f61..eb3c3eb42 100644 --- a/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h +++ b/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h @@ -122,10 +122,11 @@ template struct BsplineFunctor : public OptimizableFunctorBase void setupParameters(int n, real_type rcut, real_type cusp, std::vector ¶ms) { - CuspValue = cusp; + CuspValue = cusp; cutoff_radius = rcut; resize(n); - for (int i = 0; i < n; i++) { + for (int i = 0; i < n; i++) + { Parameters[i] = params[i]; } reset(); @@ -142,7 +143,7 @@ template struct BsplineFunctor : public OptimizableFunctorBase * @param distIndices temp storage for the compressed index */ // clang-format off - void evaluateVGL(const int iStart, const int iEnd, + void evaluateVGL(const int iat, const int iStart, const int iEnd, const T* _distArray, T* restrict _valArray, T* restrict _gradArray, @@ -157,7 +158,8 @@ template struct BsplineFunctor : public OptimizableFunctorBase * @param distArrayCompressed temp storage to filter r_j < cutoff_radius * @return \f$\sum u(r_j)\f$ for r_j < cutoff_radius */ - T evaluateV(const int iStart, const int iEnd, const T *restrict _distArray, + T evaluateV(const int iat, const int iStart, const int iEnd, + const T *restrict _distArray, T *restrict distArrayCompressed) const; inline real_type evaluate(real_type r) @@ -251,7 +253,7 @@ template struct BsplineFunctor : public OptimizableFunctorBase int imin = std::max(i, 1); int imax = std::min(i + 4, NumParams + 1); - for (int n = imin; n < imax; ++n) + for (int n = imin; n < imax; ++n) derivs[n - 1] = SplineDerivs[n]; derivs[1] += SplineDerivs[0]; @@ -260,7 +262,8 @@ template struct BsplineFunctor : public OptimizableFunctorBase }; template -inline T BsplineFunctor::evaluateV(const int iStart, const int iEnd, +inline T BsplineFunctor::evaluateV(const int iat, const int iStart, + const int iEnd, const T *restrict _distArray, T *restrict distArrayCompressed) const { @@ -270,15 +273,17 @@ inline T BsplineFunctor::evaluateV(const int iStart, const int iEnd, int iCount = 0; const int iLimit = iEnd - iStart; - #pragma vector always +#pragma vector always for (int jat = 0; jat < iLimit; jat++) { - real_type r = distArray[jat]; - if (r < cutoff_radius) distArrayCompressed[iCount++] = distArray[jat]; + real_type r = distArray[jat]; + // pick the distances smaller than the cutoff and avoid the reference atom + if (r < cutoff_radius && iStart + jat != iat) + distArrayCompressed[iCount++] = distArray[jat]; } real_type d = 0.0; - #pragma omp simd reduction(+:d) +#pragma omp simd reduction(+ : d) for (int jat = 0; jat < iCount; jat++) { real_type r = distArrayCompressed[jat]; @@ -304,7 +309,7 @@ inline T BsplineFunctor::evaluateV(const int iStart, const int iEnd, template inline void BsplineFunctor::evaluateVGL( - const int iStart, const int iEnd, const T *_distArray, + const int iat, const int iStart, const int iEnd, const T *_distArray, T *restrict _valArray, T *restrict _gradArray, T *restrict _laplArray, T *restrict distArrayCompressed, int *restrict distIndices) const { @@ -323,11 +328,11 @@ inline void BsplineFunctor::evaluateVGL( real_type *gradArray = _gradArray + iStart; real_type *laplArray = _laplArray + iStart; - #pragma vector always +#pragma vector always for (int jat = 0; jat < iLimit; jat++) { real_type r = distArray[jat]; - if (r < cutoff_radius) + if (r < cutoff_radius && iStart + jat != iat) { distIndices[iCount] = jat; distArrayCompressed[iCount] = r; @@ -335,7 +340,7 @@ inline void BsplineFunctor::evaluateVGL( } } - #pragma omp simd +#pragma omp simd for (int j = 0; j < iCount; j++) { @@ -374,5 +379,5 @@ inline void BsplineFunctor::evaluateVGL( // clang-format on } } -} +} // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/Jastrow/OneBodyJastrow.h b/src/QMCWaveFunctions/Jastrow/OneBodyJastrow.h index 3fad77007..1adbf96dd 100644 --- a/src/QMCWaveFunctions/Jastrow/OneBodyJastrow.h +++ b/src/QMCWaveFunctions/Jastrow/OneBodyJastrow.h @@ -1,21 +1,20 @@ -//////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////// // This file is distributed under the University of Illinois/NCSA Open Source -// License. See LICENSE file in top directory for details. +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // // File developed by: // // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -//////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////// // -*- C++ -*- -#ifndef QMCPLUSPLUS_ONEBODYJASTROW_OPTIMIZED_SOA_H -#define QMCPLUSPLUS_ONEBODYJASTROW_OPTIMIZED_SOA_H +#ifndef QMCPLUSPLUS_ONEBODYJASTROW_H +#define QMCPLUSPLUS_ONEBODYJASTROW_H #include "Utilities/Configuration.h" #include "QMCWaveFunctions/WaveFunctionComponentBase.h" #include #include -#include #include /*! @@ -42,18 +41,19 @@ template struct OneBodyJastrow : public WaveFunctionComponentBase int myTableID; /// number of ions int Nions; + /// number of electrons + int Nelec; /// number of groups int NumGroups; /// reference to the sources (ions) const ParticleSet &Ions; - valT LogValue; valT curAt; valT curLap; posT curGrad; ///\f$Vat[i] = sum_(j) u_{i,j}\f$ - aligned_vector Vat; + Vector Vat; aligned_vector U, dU, d2U; aligned_vector DistCompressed; aligned_vector DistIndice; @@ -87,10 +87,10 @@ template struct OneBodyJastrow : public WaveFunctionComponentBase { NumGroups = 0; } - const int N = els.getTotalNum(); - Vat.resize(N); - Grad.resize(N); - Lap.resize(N); + Nelec = els.getTotalNum(); + Vat.resize(Nelec); + Grad.resize(Nelec); + Lap.resize(Nelec); U.resize(Nions); dU.resize(Nions); @@ -105,36 +105,42 @@ template struct OneBodyJastrow : public WaveFunctionComponentBase F[source_type] = afunc; } - RealType evaluateLog(ParticleSet &P, ParticleSet::ParticleGradient_t &G, - ParticleSet::ParticleLaplacian_t &L) + void recompute(ParticleSet &P) { - const int n = P.getTotalNum(); const DistanceTableData &d_ie(*(P.DistTables[myTableID])); - LogValue = valT(); - for (int iat = 0; iat < n; ++iat) + for (int iat = 0; iat < Nelec; ++iat) { computeU3(P, iat, d_ie.Distances[iat]); - LogValue -= Vat[iat] = simd::accumulate_n(U.data(), Nions, valT()); + Vat[iat] = simd::accumulate_n(U.data(), Nions, valT()); Lap[iat] = accumulateGL(dU.data(), d2U.data(), d_ie.Displacements[iat], Grad[iat]); - G[iat] += Grad[iat]; - L[iat] -= Lap[iat]; } + } + + RealType evaluateLog(ParticleSet &P, ParticleSet::ParticleGradient_t &G, + ParticleSet::ParticleLaplacian_t &L) + { + evaluateGL(P, G, L, true); return LogValue; } ValueType ratio(ParticleSet &P, int iat) { - UpdateMode = ORB_PBYP_RATIO; - curAt = valT(0); - const valT *restrict dist = P.DistTables[myTableID]->Temp_r.data(); + UpdateMode = ORB_PBYP_RATIO; + curAt = computeU(P.DistTables[myTableID]->Temp_r.data()); + return std::exp(Vat[iat] - curAt); + } + + inline valT computeU(const valT *dist) + { + valT curVat(0); if (NumGroups > 0) { for (int jg = 0; jg < NumGroups; ++jg) { if (F[jg] != nullptr) - curAt += F[jg]->evaluateV(Ions.first(jg), Ions.last(jg), dist, - DistCompressed.data()); + curVat += F[jg]->evaluateV(-1, Ions.first(jg), Ions.last(jg), dist, + DistCompressed.data()); } } else @@ -142,31 +148,23 @@ template struct OneBodyJastrow : public WaveFunctionComponentBase for (int c = 0; c < Nions; ++c) { int gid = Ions.GroupID[c]; - if (F[gid] != nullptr) curAt += F[gid]->evaluate(dist[c]); + if (F[gid] != nullptr) curVat += F[gid]->evaluate(dist[c]); } } - - if (!P.Ready4Measure) - { // need to compute per atom - computeU3(P, iat, P.DistTables[myTableID]->Distances[iat]); - Lap[iat] = - accumulateGL(dU.data(), d2U.data(), - P.DistTables[myTableID]->Displacements[iat], Grad[iat]); - Vat[iat] = simd::accumulate_n(U.data(), Nions, valT()); - } - - return std::exp(Vat[iat] - curAt); + return curVat; } inline void evaluateGL(ParticleSet &P, ParticleSet::ParticleGradient_t &G, ParticleSet::ParticleLaplacian_t &L, bool fromscratch = false) { - const size_t n = P.getTotalNum(); - for (size_t iat = 0; iat < n; ++iat) + if (fromscratch) recompute(P); + + for (size_t iat = 0; iat < Nelec; ++iat) G[iat] += Grad[iat]; - for (size_t iat = 0; iat < n; ++iat) + for (size_t iat = 0; iat < Nelec; ++iat) L[iat] -= Lap[iat]; + LogValue = -simd::accumulate_n(Vat.data(), Nelec, valT()); } /** compute gradient and lap @@ -177,14 +175,12 @@ template struct OneBodyJastrow : public WaveFunctionComponentBase { valT lap(0); constexpr valT lapfac = OHMMS_DIM - RealType(1); - //#pragma omp simd reduction(+:lap) for (int jat = 0; jat < Nions; ++jat) lap += d2u[jat] + lapfac * du[jat]; for (int idim = 0; idim < OHMMS_DIM; ++idim) { const valT *restrict dX = displ.data(idim); valT s = valT(); - //#pragma omp simd reduction(+:s) for (int jat = 0; jat < Nions; ++jat) s += du[jat] * dX[jat]; grad[idim] = s; @@ -210,7 +206,7 @@ template struct OneBodyJastrow : public WaveFunctionComponentBase for (int jg = 0; jg < NumGroups; ++jg) { if (F[jg] == nullptr) continue; - F[jg]->evaluateVGL(Ions.first(jg), Ions.last(jg), dist, U.data(), + F[jg]->evaluateVGL(-1, Ions.first(jg), Ions.last(jg), dist, U.data(), dU.data(), d2U.data(), DistCompressed.data(), DistIndice.data()); } @@ -233,15 +229,7 @@ template struct OneBodyJastrow : public WaveFunctionComponentBase * @param P quantum particleset * @param iat particle index */ - GradType evalGrad(ParticleSet &P, int iat) - { - computeU3(P, iat, P.DistTables[myTableID]->Distances[iat]); - Lap[iat] = - accumulateGL(dU.data(), d2U.data(), - P.DistTables[myTableID]->Displacements[iat], Grad[iat]); - Vat[iat] = simd::accumulate_n(U.data(), Nions, valT()); - return GradType(Grad[iat]); - } + GradType evalGrad(ParticleSet &P, int iat) { return GradType(Grad[iat]); } /** compute the gradient during particle-by-particle update * @param P quantum particleset @@ -256,7 +244,7 @@ template struct OneBodyJastrow : public WaveFunctionComponentBase computeU3(P, iat, P.DistTables[myTableID]->Temp_r.data()); curLap = accumulateGL(dU.data(), d2U.data(), P.DistTables[myTableID]->Temp_dr, curGrad); - curAt = simd::accumulate_n(U.data(), Nions, valT()); + curAt = simd::accumulate_n(U.data(), Nions, valT()); grad_iat += curGrad; return std::exp(Vat[iat] - curAt); } @@ -278,5 +266,6 @@ template struct OneBodyJastrow : public WaveFunctionComponentBase Lap[iat] = curLap; } }; -} + +} // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/Jastrow/OneBodyJastrowRef.h b/src/QMCWaveFunctions/Jastrow/OneBodyJastrowRef.h index 09f21eb59..7d7db4932 100644 --- a/src/QMCWaveFunctions/Jastrow/OneBodyJastrowRef.h +++ b/src/QMCWaveFunctions/Jastrow/OneBodyJastrowRef.h @@ -1,13 +1,13 @@ -//////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////// // This file is distributed under the University of Illinois/NCSA Open Source -// License. See LICENSE file in top directory for details. +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // // File developed by: // // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -//////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////// // -*- C++ -*- #ifndef QMCPLUSPLUS_ONEBODYJASTROW_REF_H #define QMCPLUSPLUS_ONEBODYJASTROW_REF_H @@ -23,6 +23,7 @@ namespace miniqmcreference { using namespace qmcplusplus; + /** @ingroup WaveFunctionComponent * @brief Specialization for one-body Jastrow function using multiple functors */ @@ -40,18 +41,19 @@ template struct OneBodyJastrowRef : public WaveFunctionComponentBase int myTableID; /// number of ions int Nions; + /// number of electrons + int Nelec; /// number of groups int NumGroups; /// reference to the sources (ions) const ParticleSet &Ions; - valT LogValue; valT curAt; valT curLap; posT curGrad; ///\f$Vat[i] = sum_(j) u_{i,j}\f$ - aligned_vector Vat; + Vector Vat; aligned_vector U, dU, d2U; aligned_vector DistCompressed; aligned_vector DistIndice; @@ -85,10 +87,10 @@ template struct OneBodyJastrowRef : public WaveFunctionComponentBase { NumGroups = 0; } - const int N = els.getTotalNum(); - Vat.resize(N); - Grad.resize(N); - Lap.resize(N); + Nelec = els.getTotalNum(); + Vat.resize(Nelec); + Grad.resize(Nelec); + Lap.resize(Nelec); U.resize(Nions); dU.resize(Nions); @@ -103,37 +105,42 @@ template struct OneBodyJastrowRef : public WaveFunctionComponentBase F[source_type] = afunc; } - RealType evaluateLog(ParticleSet &P, ParticleSet::ParticleGradient_t &G, - ParticleSet::ParticleLaplacian_t &L) + void recompute(ParticleSet &P) { - const int n = P.getTotalNum(); const DistanceTableData &d_ie(*(P.DistTables[myTableID])); - LogValue = valT(); - for (int iat = 0; iat < n; ++iat) + for (int iat = 0; iat < Nelec; ++iat) { computeU3(P, iat, d_ie.Distances[iat]); - LogValue -= Vat[iat] = - std::accumulate(U.begin(), U.begin() + Nions, valT()); + Vat[iat] = std::accumulate(U.begin(), U.begin() + Nions, valT()); Lap[iat] = accumulateGL(dU.data(), d2U.data(), d_ie.Displacements[iat], Grad[iat]); - G[iat] += Grad[iat]; - L[iat] -= Lap[iat]; } + } + + RealType evaluateLog(ParticleSet &P, ParticleSet::ParticleGradient_t &G, + ParticleSet::ParticleLaplacian_t &L) + { + evaluateGL(P, G, L, true); return LogValue; } ValueType ratio(ParticleSet &P, int iat) { - UpdateMode = ORB_PBYP_RATIO; - curAt = valT(0); - const valT *restrict dist = P.DistTables[myTableID]->Temp_r.data(); + UpdateMode = ORB_PBYP_RATIO; + curAt = computeU(P.DistTables[myTableID]->Temp_r.data()); + return std::exp(Vat[iat] - curAt); + } + + inline valT computeU(const valT *dist) + { + valT curVat(0); if (NumGroups > 0) { for (int jg = 0; jg < NumGroups; ++jg) { if (F[jg] != nullptr) - curAt += F[jg]->evaluateV(Ions.first(jg), Ions.last(jg), dist, - DistCompressed.data()); + curVat += F[jg]->evaluateV(-1, Ions.first(jg), Ions.last(jg), dist, + DistCompressed.data()); } } else @@ -141,31 +148,23 @@ template struct OneBodyJastrowRef : public WaveFunctionComponentBase for (int c = 0; c < Nions; ++c) { int gid = Ions.GroupID[c]; - if (F[gid] != nullptr) curAt += F[gid]->evaluate(dist[c]); + if (F[gid] != nullptr) curVat += F[gid]->evaluate(dist[c]); } } - - if (!P.Ready4Measure) - { // need to compute per atom - computeU3(P, iat, P.DistTables[myTableID]->Distances[iat]); - Lap[iat] = - accumulateGL(dU.data(), d2U.data(), - P.DistTables[myTableID]->Displacements[iat], Grad[iat]); - Vat[iat] = std::accumulate(U.begin(), U.begin() + Nions, valT()); - } - - return std::exp(Vat[iat] - curAt); + return curVat; } inline void evaluateGL(ParticleSet &P, ParticleSet::ParticleGradient_t &G, ParticleSet::ParticleLaplacian_t &L, bool fromscratch = false) { - const size_t n = P.getTotalNum(); - for (size_t iat = 0; iat < n; ++iat) + if (fromscratch) recompute(P); + + for (size_t iat = 0; iat < Nelec; ++iat) G[iat] += Grad[iat]; - for (size_t iat = 0; iat < n; ++iat) + for (size_t iat = 0; iat < Nelec; ++iat) L[iat] -= Lap[iat]; + LogValue = -std::accumulate(Vat.begin(), Vat.begin() + Nelec, valT()); } /** compute gradient and lap @@ -207,7 +206,7 @@ template struct OneBodyJastrowRef : public WaveFunctionComponentBase for (int jg = 0; jg < NumGroups; ++jg) { if (F[jg] == nullptr) continue; - F[jg]->evaluateVGL(Ions.first(jg), Ions.last(jg), dist, U.data(), + F[jg]->evaluateVGL(-1, Ions.first(jg), Ions.last(jg), dist, U.data(), dU.data(), d2U.data(), DistCompressed.data(), DistIndice.data()); } @@ -230,15 +229,7 @@ template struct OneBodyJastrowRef : public WaveFunctionComponentBase * @param P quantum particleset * @param iat particle index */ - GradType evalGrad(ParticleSet &P, int iat) - { - computeU3(P, iat, P.DistTables[myTableID]->Distances[iat]); - Lap[iat] = - accumulateGL(dU.data(), d2U.data(), - P.DistTables[myTableID]->Displacements[iat], Grad[iat]); - Vat[iat] = std::accumulate(U.begin(), U.begin() + Nions, valT()); - return GradType(Grad[iat]); - } + GradType evalGrad(ParticleSet &P, int iat) { return GradType(Grad[iat]); } /** compute the gradient during particle-by-particle update * @param P quantum particleset @@ -253,7 +244,7 @@ template struct OneBodyJastrowRef : public WaveFunctionComponentBase computeU3(P, iat, P.DistTables[myTableID]->Temp_r.data()); curLap = accumulateGL(dU.data(), d2U.data(), P.DistTables[myTableID]->Temp_dr, curGrad); - curAt = std::accumulate(U.begin(), U.begin() + Nions, valT()); + curAt = std::accumulate(U.begin(), U.begin() + Nions, valT()); grad_iat += curGrad; return std::exp(Vat[iat] - curAt); } @@ -274,6 +265,7 @@ template struct OneBodyJastrowRef : public WaveFunctionComponentBase Grad[iat] = curGrad; Lap[iat] = curLap; } -}; // class OneBodyJastrowRef -} // miniqmcreferencce +}; + +} // namespace miniqmcreference #endif diff --git a/src/QMCWaveFunctions/Jastrow/PolynomialFunctor3D.h b/src/QMCWaveFunctions/Jastrow/PolynomialFunctor3D.h index 2427182a2..17a3455d4 100644 --- a/src/QMCWaveFunctions/Jastrow/PolynomialFunctor3D.h +++ b/src/QMCWaveFunctions/Jastrow/PolynomialFunctor3D.h @@ -145,9 +145,9 @@ struct PolynomialFunctor3D : public OptimizableFunctorBase GammaPerm.resize(NumGamma); IndepVar.resize(NumGamma, false); // Set identity permutation - for (int i = 0; i < NumGamma; i++) + for (int i = 0; i < NumGamma; i++) GammaPerm[i] = i; - int col = -1; + int col = -1; for (int row = 0; row < NumConstraints; row++) { int max_loc; @@ -173,7 +173,7 @@ struct PolynomialFunctor3D : public OptimizableFunctorBase // manually swap the rows for (int ind_col = 0; ind_col < ConstraintMatrix.size2(); ind_col++) { - real_type temp = ConstraintMatrix(row, ind_col); + real_type temp = ConstraintMatrix(row, ind_col); ConstraintMatrix(row, ind_col) = ConstraintMatrix(max_loc, ind_col); ConstraintMatrix(max_loc, ind_col) = temp; } @@ -194,7 +194,7 @@ struct PolynomialFunctor3D : public OptimizableFunctorBase } } } - for (int c = col + 1; c < NumGamma; c++) + for (int c = col + 1; c < NumGamma; c++) IndepVar[c] = true; } @@ -210,7 +210,7 @@ struct PolynomialFunctor3D : public OptimizableFunctorBase std::fill(GammaVec.begin(), GammaVec.end(), 0.0); // First, set all independent variables int var = 0; - for (int i = 0; i < NumGamma; i++) + for (int i = 0; i < NumGamma; i++) if (IndepVar[i]) GammaVec[i] = scale * Parameters[var++]; assert(var == Parameters.size()); // Now, set dependent variables @@ -323,7 +323,7 @@ struct PolynomialFunctor3D : public OptimizableFunctorBase // assume r_1I < L && r_2I < L, compression and screening is handled outside inline real_type evaluateV(int Nptcl, const real_type *restrict r_12_array, - const real_type r_1I, + const real_type *restrict r_1I_array, const real_type *restrict r_2I_array) const { constexpr real_type czero(0); @@ -333,10 +333,11 @@ struct PolynomialFunctor3D : public OptimizableFunctorBase const real_type L = chalf * cutoff_radius; real_type val_tot = czero; -#pragma omp simd aligned(r_12_array, r_2I_array) reduction(+ : val_tot) +#pragma omp simd aligned(r_12_array,r_1I_array,r_2I_array) reduction(+:val_tot) for (int ptcl = 0; ptcl < Nptcl; ptcl++) { const real_type r_12 = r_12_array[ptcl]; + const real_type r_1I = r_1I_array[ptcl]; const real_type r_2I = r_2I_array[ptcl]; real_type val = czero; real_type r2l(cone); @@ -437,9 +438,9 @@ struct PolynomialFunctor3D : public OptimizableFunctorBase hess(1, 2) = both_minus_L * hess(1, 2) + r_1I_minus_L * grad[1] + r_2I_minus_L * grad[2] + val; hess(2, 2) = both_minus_L * hess(2, 2) + ctwo * r_1I_minus_L * grad[2]; - grad[0] = both_minus_L * grad[0]; - grad[1] = both_minus_L * grad[1] + r_2I_minus_L * val; - grad[2] = both_minus_L * grad[2] + r_1I_minus_L * val; + grad[0] = both_minus_L * grad[0]; + grad[1] = both_minus_L * grad[1] + r_2I_minus_L * val; + grad[2] = both_minus_L * grad[2] + r_1I_minus_L * val; val *= both_minus_L; } hess(1, 0) = hess(0, 1); @@ -450,7 +451,8 @@ struct PolynomialFunctor3D : public OptimizableFunctorBase // assume r_1I < L && r_2I < L, compression and screening is handled outside inline void evaluateVGL( - int Nptcl, const real_type *restrict r_12_array, const real_type r_1I, + int Nptcl, const real_type *restrict r_12_array, + const real_type *restrict r_1I_array, const real_type *restrict r_2I_array, real_type *restrict val_array, real_type *restrict grad0_array, real_type *restrict grad1_array, real_type *restrict grad2_array, real_type *restrict hess00_array, @@ -463,12 +465,14 @@ struct PolynomialFunctor3D : public OptimizableFunctorBase constexpr real_type ctwo(2); const real_type L = chalf * cutoff_radius; -#pragma omp simd aligned(r_12_array, r_2I_array, val_array, grad0_array, \ - grad1_array, grad2_array, hess00_array, hess11_array, \ - hess22_array, hess01_array, hess02_array) +#pragma omp simd aligned(r_12_array, r_1I_array, r_2I_array, val_array, \ + grad0_array, grad1_array, grad2_array, hess00_array, \ + hess11_array, hess22_array, hess01_array, \ + hess02_array) for (int ptcl = 0; ptcl < Nptcl; ptcl++) { const real_type r_12 = r_12_array[ptcl]; + const real_type r_1I = r_1I_array[ptcl]; const real_type r_2I = r_2I_array[ptcl]; real_type val(czero); @@ -549,5 +553,5 @@ struct PolynomialFunctor3D : public OptimizableFunctorBase } } }; -} +} // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/Jastrow/ThreeBodyJastrow.h b/src/QMCWaveFunctions/Jastrow/ThreeBodyJastrow.h index b0c93c7cb..4af4376aa 100644 --- a/src/QMCWaveFunctions/Jastrow/ThreeBodyJastrow.h +++ b/src/QMCWaveFunctions/Jastrow/ThreeBodyJastrow.h @@ -1,29 +1,22 @@ -//////////////////////////////////////////////////////////////////////////////// // This file is distributed under the University of Illinois/NCSA Open Source -// License. See LICENSE file in top directory for details. +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory // -// File created by: -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -//////////////////////////////////////////////////////////////////////////////// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// -#ifndef QMCPLUSPLUS_EEIJASTROW_OPTIMIZED_SOA_H -#define QMCPLUSPLUS_EEIJASTROW_OPTIMIZED_SOA_H +#ifndef QMCPLUSPLUS_EEIJASTROW_H +#define QMCPLUSPLUS_EEIJASTROW_H #include "Utilities/Configuration.h" #include "QMCWaveFunctions/WaveFunctionComponentBase.h" #include "Particle/DistanceTableData.h" -#include "Numerics/OhmmsPETE/OhmmsArray.h" -#include +#include +#include #include -/*! - * @file ThreeBodyJastrow.h - */ - namespace qmcplusplus { @@ -43,13 +36,12 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase using posT = TinyVector; /// use the same container using RowContainer = DistanceTableData::RowContainer; - /// define container types - using vContainer_type = aligned_vector; - using gContainer_type = VectorSoAContainer; /// table index for i-el, el-el is always zero int myTableID; // nuber of particles int Nelec, Nion; + /// number of particles + padded + size_t Nelec_padded; // number of groups of the target particleset int eGroups, iGroups; /// reference to the sources (ions) @@ -58,38 +50,36 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase RealType DiffVal; ///\f$Uat[i] = sum_(j) u_{i,j}\f$ - Vector Uat; - vContainer_type oldUk, newUk; + Vector Uat, oldUk, newUk; ///\f$dUat[i] = sum_(j) du_{i,j}\f$ - Vector dUat; - valT *FirstAddressOfdU, *LastAddressOfdU; - gContainer_type olddUk, newdUk; + using gContainer_type = VectorSoAContainer; + gContainer_type dUat, olddUk, newdUk; ///\f$d2Uat[i] = sum_(j) d2u_{i,j}\f$ - Vector d2Uat; - vContainer_type oldd2Uk, newd2Uk; + Vector d2Uat, oldd2Uk, newd2Uk; + /// current values during PbyP valT cur_Uat, cur_d2Uat; - posT cur_dUat; + posT cur_dUat, dUat_temp; /// container for the Jastrow functions Array F; - std::map J3Unique; - // YYYY - std::map J3UniqueIndex; - /// the cutoff for e-I pairs std::vector Ion_cutoff; /// the electrons around ions within the cutoff radius, grouped by species Array, 2> elecs_inside; + Array, 2> elecs_inside_dist; + Array, 2> elecs_inside_displ; + /// the ions around + std::vector ions_nearby; + /// work buffer size + size_t Nbuffer; /// compressed distances - aligned_vector Distjk_Compressed, DistkI_Compressed; - std::vector DistIndice; - - using VGL_type = VectorSoAContainer; - VGL_type mVGL; - - // Used for evaluating derivatives with respect to the parameters - int NumVars; + aligned_vector Distjk_Compressed, DistkI_Compressed, DistjI_Compressed; + std::vector DistIndice_k; + /// compressed displacements + gContainer_type Disp_jk_Compressed, Disp_jI_Compressed, Disp_kI_Compressed; + /// work result buffer + VectorSoAContainer mVGL; public: /// alias FuncType @@ -97,7 +87,7 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase ThreeBodyJastrow(const ParticleSet &ions, ParticleSet &elecs, bool is_master = false) - : Ions(ions), NumVars(0) + : Ions(ions) { WaveFunctionComponentName = "ThreeBodyJastrow"; myTableID = elecs.addTable(Ions, DT_SOA); @@ -109,15 +99,14 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase void init(ParticleSet &p) { - Nelec = p.getTotalNum(); - Nion = Ions.getTotalNum(); - iGroups = Ions.getSpeciesSet().getTotalNum(); - eGroups = p.groups(); + Nelec = p.getTotalNum(); + Nelec_padded = getAlignedSize(Nelec); + Nion = Ions.getTotalNum(); + iGroups = Ions.getSpeciesSet().getTotalNum(); + eGroups = p.groups(); Uat.resize(Nelec); dUat.resize(Nelec); - FirstAddressOfdU = &(dUat[0][0]); - LastAddressOfdU = FirstAddressOfdU + dUat.size() * OHMMS_DIM; d2Uat.resize(Nelec); oldUk.resize(Nelec); @@ -129,13 +118,22 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase F.resize(iGroups, eGroups, eGroups); F = nullptr; - elecs_inside.resize(Nion, eGroups); - Ion_cutoff.resize(Nion); - - mVGL.resize(Nelec); - DistkI_Compressed.resize(Nelec); - Distjk_Compressed.resize(Nelec); - DistIndice.resize(Nelec); + elecs_inside.resize(eGroups, Nion); + elecs_inside_dist.resize(eGroups, Nion); + elecs_inside_displ.resize(eGroups, Nion); + ions_nearby.resize(Nion); + Ion_cutoff.resize(Nion, 0.0); + + // initialize buffers + Nbuffer = Nelec; + mVGL.resize(Nbuffer); + Distjk_Compressed.resize(Nbuffer); + DistjI_Compressed.resize(Nbuffer); + DistkI_Compressed.resize(Nbuffer); + Disp_jk_Compressed.resize(Nbuffer); + Disp_jI_Compressed.resize(Nbuffer); + Disp_kI_Compressed.resize(Nbuffer); + DistIndice_k.resize(Nbuffer); } void addFunc(int iSpecies, int eSpecies1, int eSpecies2, FT *j) @@ -158,16 +156,13 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase if (j) { RealType rcut = 0.5 * j->cutoff_radius; - for (int i = 0; i < Nion; i++) + for (int i = 0; i < Nion; i++) if (Ions.GroupID[i] == iSpecies) Ion_cutoff[i] = rcut; } else { APP_ABORT("ThreeBodyJastrow::addFunc Jastrow function pointer is NULL"); } - std::stringstream aname; - aname << iSpecies << "_" << eSpecies1 << "_" << eSpecies2; - J3Unique[aname.str()] = j; } /** check that correlation information is complete @@ -190,14 +185,13 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase } if (!complete) { - APP_ABORT( - "ThreeBodyJastrow::check_complete J3 eeI is missing correlation " - "components\n see preceding messages for details"); + APP_ABORT("ThreeBodyJastrow::check_complete J3 eeI is missing " + "correlation components\n see preceding messages for details"); } // first set radii for (int i = 0; i < Nion; ++i) { - FT *f = F(Ions.GroupID[i], 0, 0); + FT *f = F(Ions.GroupID[i], 0, 0); if (f != 0) Ion_cutoff[i] = .5 * f->cutoff_radius; } // then check radii @@ -219,9 +213,8 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase } if (!all_radii_match) { - APP_ABORT( - "ThreeBodyJastrow::check_radii J3 eeI are inconsistent for some " - "ion species\n see preceding messages for details"); + APP_ABORT("ThreeBodyJastrow::check_radii J3 eeI are inconsistent for " + "some ion species\n see preceding messages for details"); } } @@ -231,13 +224,22 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase for (int iat = 0; iat < Nion; ++iat) for (int jg = 0; jg < eGroups; ++jg) - elecs_inside(iat, jg).clear(); + { + elecs_inside(jg, iat).clear(); + elecs_inside_dist(jg, iat).clear(); + elecs_inside_displ(jg, iat).clear(); + } for (int jg = 0; jg < eGroups; ++jg) for (int jel = P.first(jg); jel < P.last(jg); jel++) for (int iat = 0; iat < Nion; ++iat) if (eI_table.Distances[jel][iat] < Ion_cutoff[iat]) - elecs_inside(iat, jg).push_back(jel); + { + elecs_inside(jg, iat).push_back(jel); + elecs_inside_dist(jg, iat).push_back(eI_table.Distances[jel][iat]); + elecs_inside_displ(jg, iat).push_back( + eI_table.Displacements[jel][iat]); + } } RealType evaluateLog(ParticleSet &P, ParticleSet::ParticleGradient_t &G, @@ -253,7 +255,8 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase const DistanceTableData &eI_table = (*P.DistTables[myTableID]); const DistanceTableData &ee_table = (*P.DistTables[0]); - cur_Uat = computeU(P, iat, eI_table.Temp_r.data(), ee_table.Temp_r.data()); + cur_Uat = computeU(P, iat, P.GroupID[iat], eI_table.Temp_r.data(), + ee_table.Temp_r.data()); DiffVal = Uat[iat] - cur_Uat; return std::exp(DiffVal); } @@ -281,7 +284,7 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase // get the old value, grad, lapl computeU3(P, iat, eI_table.Distances[iat], eI_table.Displacements[iat], ee_table.Distances[iat], ee_table.Displacements[iat], Uat[iat], - dUat[iat], d2Uat[iat], oldUk, olddUk, oldd2Uk); + dUat_temp, d2Uat[iat], oldUk, olddUk, oldd2Uk); if (UpdateMode == ORB_PBYP_RATIO) { // ratio-only during the move; need to compute derivatives computeU3(P, iat, eI_table.Temp_r.data(), eI_table.Temp_dr, @@ -292,12 +295,20 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase for (int jel = 0; jel < Nelec; jel++) { Uat[jel] += newUk[jel] - oldUk[jel]; - dUat[jel] += newdUk[jel] - olddUk[jel]; d2Uat[jel] += newd2Uk[jel] - oldd2Uk[jel]; } + for (int idim = 0; idim < OHMMS_DIM; ++idim) + { + valT *restrict save_g = dUat.data(idim); + const valT *restrict new_g = newdUk.data(idim); + const valT *restrict old_g = olddUk.data(idim); + for (int jel = 0; jel < Nelec; jel++) + save_g[jel] += new_g[jel] - old_g[jel]; + } + LogValue += Uat[iat] - cur_Uat; Uat[iat] = cur_Uat; - dUat[iat] = cur_dUat; + dUat(iat) = cur_dUat; d2Uat[iat] = cur_d2Uat; const int ig = P.GroupID[iat]; @@ -305,18 +316,37 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase for (int jat = 0; jat < Nion; jat++) { bool inside = eI_table.Temp_r[jat] < Ion_cutoff[jat]; - std::vector::iterator iter; - iter = - find(elecs_inside(jat, ig).begin(), elecs_inside(jat, ig).end(), iat); + auto iter = + find(elecs_inside(ig, jat).begin(), elecs_inside(ig, jat).end(), iat); + auto iter_dist = elecs_inside_dist(ig, jat).begin() + + std::distance(elecs_inside(ig, jat).begin(), iter); + auto iter_displ = elecs_inside_displ(ig, jat).begin() + + std::distance(elecs_inside(ig, jat).begin(), iter); if (inside) { - if (iter == elecs_inside(jat, ig).end()) - elecs_inside(jat, ig).push_back(iat); + if (iter == elecs_inside(ig, jat).end()) + { + elecs_inside(ig, jat).push_back(iat); + elecs_inside_dist(ig, jat).push_back(eI_table.Temp_r[jat]); + elecs_inside_displ(ig, jat).push_back(eI_table.Temp_dr[jat]); + } + else + { + *iter_dist = eI_table.Temp_r[jat]; + *iter_displ = eI_table.Temp_dr[jat]; + } } else { - if (iter != elecs_inside(jat, ig).end()) - elecs_inside(jat, ig).erase(iter); + if (iter != elecs_inside(ig, jat).end()) + { + *iter = elecs_inside(ig, jat).back(); + elecs_inside(ig, jat).pop_back(); + *iter_dist = elecs_inside_dist(ig, jat).back(); + elecs_inside_dist(ig, jat).pop_back(); + *iter_displ = elecs_inside_displ(ig, jat).back(); + elecs_inside_displ(ig, jat).pop_back(); + } } } } @@ -332,75 +362,87 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase { computeU3(P, jel, eI_table.Distances[jel], eI_table.Displacements[jel], ee_table.Distances[jel], ee_table.Displacements[jel], Uat[jel], - dUat[jel], d2Uat[jel], newUk, newdUk, newd2Uk, true); + dUat_temp, d2Uat[jel], newUk, newdUk, newd2Uk, true); + dUat(jel) = dUat_temp; // add the contribution from the upper triangle for (int kel = 0; kel < jel; kel++) { Uat[kel] += newUk[kel]; - dUat[kel] += newdUk[kel]; d2Uat[kel] += newd2Uk[kel]; } + for (int idim = 0; idim < OHMMS_DIM; ++idim) + { + valT *restrict save_g = dUat.data(idim); + const valT *restrict new_g = newdUk.data(idim); + for (int kel = 0; kel < jel; kel++) + save_g[kel] += new_g[kel]; + } } } - inline valT computeU(ParticleSet &P, int jel, const RealType *distjI, - const RealType *distjk) + inline valT computeU(const ParticleSet &P, int jel, int jg, + const RealType *distjI, const RealType *distjk) { - valT Uj = valT(0); - const DistanceTableData &eI_table = (*P.DistTables[myTableID]); - const int jg = P.GroupID[jel]; + ions_nearby.clear(); for (int iat = 0; iat < Nion; ++iat) - if (distjI[iat] < Ion_cutoff[iat]) + if (distjI[iat] < Ion_cutoff[iat]) ions_nearby.push_back(iat); + + valT Uj = valT(0); + for (int kg = 0; kg < eGroups; ++kg) + { + int kel_counter = 0; + for (int iind = 0; iind < ions_nearby.size(); ++iind) { + const int iat = ions_nearby[iind]; const int ig = Ions.GroupID[iat]; - const valT r_Ij = distjI[iat]; - - for (int kg = 0; kg < eGroups; ++kg) + const valT r_jI = distjI[iat]; + for (int kind = 0; kind < elecs_inside(kg, iat).size(); kind++) { - const FT &feeI(*F(ig, jg, kg)); - int kel_counter = 0; - for (int kind = 0; kind < elecs_inside(iat, kg).size(); kind++) + const int kel = elecs_inside(kg, iat)[kind]; + if (kel != jel) { - const int kel = elecs_inside(iat, kg)[kind]; - if (kel != jel) + DistkI_Compressed[kel_counter] = elecs_inside_dist(kg, iat)[kind]; + Distjk_Compressed[kel_counter] = distjk[kel]; + DistjI_Compressed[kel_counter] = r_jI; + kel_counter++; + if (kel_counter == Nbuffer) { - DistkI_Compressed[kel_counter] = eI_table.Distances[kel][iat]; - Distjk_Compressed[kel_counter] = distjk[kel]; - kel_counter++; + const FT &feeI(*F(ig, jg, kg)); + Uj += feeI.evaluateV(kel_counter, Distjk_Compressed.data(), + DistjI_Compressed.data(), + DistkI_Compressed.data()); + kel_counter = 0; } } - Uj += feeI.evaluateV(kel_counter, Distjk_Compressed.data(), r_Ij, + } + if ((iind + 1 == ions_nearby.size() || + ig != Ions.GroupID[ions_nearby[iind + 1]]) && + kel_counter > 0) + { + const FT &feeI(*F(ig, jg, kg)); + Uj += feeI.evaluateV(kel_counter, Distjk_Compressed.data(), + DistjI_Compressed.data(), DistkI_Compressed.data()); + kel_counter = 0; } } + } return Uj; } - inline void computeU3(ParticleSet &P, int jel, const RealType *distjI, - const RowContainer &displjI, const RealType *distjk, - const RowContainer &displjk, valT &Uj, posT &dUj, - valT &d2Uj, vContainer_type &Uk, gContainer_type &dUk, - vContainer_type &d2Uk, bool triangle = false) + inline void computeU3_engine(const ParticleSet &P, const FT &feeI, + int kel_counter, valT &Uj, posT &dUj, valT &d2Uj, + Vector &Uk, gContainer_type &dUk, + Vector &d2Uk) { + const DistanceTableData &eI_table = (*P.DistTables[myTableID]); + constexpr valT czero(0); constexpr valT cone(1); - constexpr valT cminus(-1); constexpr valT ctwo(2); constexpr valT lapfac = OHMMS_DIM - cone; - Uj = czero; - dUj = posT(); - d2Uj = czero; - - const DistanceTableData &eI_table = (*P.DistTables[myTableID]); - const int jg = P.GroupID[jel]; - - const int kelmax = triangle ? jel : Nelec; - std::fill_n(Uk.data(), kelmax, czero); - std::fill_n(d2Uk.data(), kelmax, czero); - for (int idim = 0; idim < OHMMS_DIM; ++idim) - std::fill_n(dUk.data(idim), kelmax, czero); valT *restrict val = mVGL.data(0); valT *restrict gradF0 = mVGL.data(1); @@ -412,56 +454,133 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase valT *restrict hessF01 = mVGL.data(7); valT *restrict hessF02 = mVGL.data(8); + feeI.evaluateVGL(kel_counter, Distjk_Compressed.data(), + DistjI_Compressed.data(), DistkI_Compressed.data(), val, + gradF0, gradF1, gradF2, hessF00, hessF11, hessF22, hessF01, + hessF02); + + // compute the contribution to jel, kel + Uj = simd::accumulate_n(val, kel_counter, Uj); + valT gradF0_sum = simd::accumulate_n(gradF0, kel_counter, czero); + valT gradF1_sum = simd::accumulate_n(gradF1, kel_counter, czero); + valT hessF00_sum = simd::accumulate_n(hessF00, kel_counter, czero); + valT hessF11_sum = simd::accumulate_n(hessF11, kel_counter, czero); + d2Uj -= hessF00_sum + hessF11_sum + lapfac * (gradF0_sum + gradF1_sum); + std::fill_n(hessF11, kel_counter, czero); + for (int idim = 0; idim < OHMMS_DIM; ++idim) + { + valT *restrict jk = Disp_jk_Compressed.data(idim); + valT *restrict jI = Disp_jI_Compressed.data(idim); + valT *restrict kI = Disp_kI_Compressed.data(idim); + valT dUj_x(0); + for (int kel_index = 0; kel_index < kel_counter; kel_index++) + { + // recycle hessF11 + hessF11[kel_index] += kI[kel_index] * jk[kel_index]; + dUj_x += gradF1[kel_index] * jI[kel_index]; + // destroy jk, kI + const valT temp = jk[kel_index] * gradF0[kel_index]; + dUj_x += temp; + jk[kel_index] *= jI[kel_index]; + kI[kel_index] = kI[kel_index] * gradF2[kel_index] - temp; + } + dUj[idim] += dUj_x; + + valT *restrict jk0 = Disp_jk_Compressed.data(0); + if (idim > 0) + { + for (int kel_index = 0; kel_index < kel_counter; kel_index++) + jk0[kel_index] += jk[kel_index]; + } + + valT *restrict dUk_x = dUk.data(idim); + for (int kel_index = 0; kel_index < kel_counter; kel_index++) + dUk_x[DistIndice_k[kel_index]] += kI[kel_index]; + } + valT sum(0); + valT *restrict jk0 = Disp_jk_Compressed.data(0); + for (int kel_index = 0; kel_index < kel_counter; kel_index++) + sum += hessF01[kel_index] * jk0[kel_index]; + d2Uj -= ctwo * sum; + + for (int kel_index = 0; kel_index < kel_counter; kel_index++) + hessF00[kel_index] = hessF00[kel_index] + hessF22[kel_index] + + lapfac * (gradF0[kel_index] + gradF2[kel_index]) - + ctwo * hessF02[kel_index] * hessF11[kel_index]; + + for (int kel_index = 0; kel_index < kel_counter; kel_index++) + { + const int kel = DistIndice_k[kel_index]; + Uk[kel] += val[kel_index]; + d2Uk[kel] -= hessF00[kel_index]; + } + } + + inline void computeU3(const ParticleSet &P, int jel, const RealType *distjI, + const RowContainer &displjI, const RealType *distjk, + const RowContainer &displjk, valT &Uj, posT &dUj, + valT &d2Uj, Vector &Uk, gContainer_type &dUk, + Vector &d2Uk, bool triangle = false) + { + constexpr valT czero(0); + + Uj = czero; + dUj = posT(); + d2Uj = czero; + + const int jg = P.GroupID[jel]; + + const int kelmax = triangle ? jel : Nelec; + std::fill_n(Uk.data(), kelmax, czero); + std::fill_n(d2Uk.data(), kelmax, czero); + for (int idim = 0; idim < OHMMS_DIM; ++idim) + std::fill_n(dUk.data(idim), kelmax, czero); + + ions_nearby.clear(); for (int iat = 0; iat < Nion; ++iat) - if (distjI[iat] < Ion_cutoff[iat]) + if (distjI[iat] < Ion_cutoff[iat]) ions_nearby.push_back(iat); + + for (int kg = 0; kg < eGroups; ++kg) + { + int kel_counter = 0; + for (int iind = 0; iind < ions_nearby.size(); ++iind) { + const int iat = ions_nearby[iind]; const int ig = Ions.GroupID[iat]; - const valT r_Ij = distjI[iat]; - const posT disp_Ij = cminus * displjI[iat]; - - for (int kg = 0; kg < eGroups; ++kg) + const valT r_jI = distjI[iat]; + const posT disp_Ij = displjI[iat]; + for (int kind = 0; kind < elecs_inside(kg, iat).size(); kind++) { - const FT &feeI(*F(ig, jg, kg)); - int kel_counter = 0; - for (int kind = 0; kind < elecs_inside(iat, kg).size(); kind++) + const int kel = elecs_inside(kg, iat)[kind]; + if (kel < kelmax && kel != jel) { - const int kel = elecs_inside(iat, kg)[kind]; - if (kel < kelmax && kel != jel) + DistkI_Compressed[kel_counter] = elecs_inside_dist(kg, iat)[kind]; + DistjI_Compressed[kel_counter] = r_jI; + Distjk_Compressed[kel_counter] = distjk[kel]; + Disp_kI_Compressed(kel_counter) = elecs_inside_displ(kg, iat)[kind]; + Disp_jI_Compressed(kel_counter) = disp_Ij; + Disp_jk_Compressed(kel_counter) = displjk[kel]; + DistIndice_k[kel_counter] = kel; + kel_counter++; + if (kel_counter == Nbuffer) { - DistkI_Compressed[kel_counter] = eI_table.Distances[kel][iat]; - Distjk_Compressed[kel_counter] = distjk[kel]; - DistIndice[kel_counter] = kel; - kel_counter++; + const FT &feeI(*F(ig, jg, kg)); + computeU3_engine(P, feeI, kel_counter, Uj, dUj, d2Uj, Uk, dUk, + d2Uk); + kel_counter = 0; } } - - feeI.evaluateVGL(kel_counter, Distjk_Compressed.data(), r_Ij, - DistkI_Compressed.data(), val, gradF0, gradF1, - gradF2, hessF00, hessF11, hessF22, hessF01, hessF02); - - for (int kel_index = 0; kel_index < kel_counter; kel_index++) - { - int kel = DistIndice[kel_index]; - const posT disp_Ik = cminus * eI_table.Displacements[kel][iat]; - const posT disp_jk = displjk[kel]; - - // compute the contribution to jel - Uj += val[kel_index]; - dUj += gradF0[kel_index] * disp_jk - gradF1[kel_index] * disp_Ij; - d2Uj -= hessF00[kel_index] + hessF11[kel_index] + - lapfac * (gradF0[kel_index] + gradF1[kel_index]) - - ctwo * hessF01[kel_index] * dot(disp_jk, disp_Ij); - - // compute the contribution to kel - Uk[kel] += val[kel_index]; - dUk(kel) = dUk[kel] - gradF0[kel_index] * disp_jk - - gradF2[kel_index] * disp_Ik; - d2Uk[kel] -= hessF00[kel_index] + hessF22[kel_index] + - lapfac * (gradF0[kel_index] + gradF2[kel_index]) + - ctwo * hessF02[kel_index] * dot(disp_jk, disp_Ik); - } + } + if ((iind + 1 == ions_nearby.size() || + ig != Ions.GroupID[ions_nearby[iind + 1]]) && + kel_counter > 0) + { + const FT &feeI(*F(ig, jg, kg)); + computeU3_engine(P, feeI, kel_counter, Uj, dUj, d2Uj, Uk, dUk, d2Uk); + kel_counter = 0; } } + } } void evaluateGL(ParticleSet &P, ParticleSet::ParticleGradient_t &G, @@ -480,5 +599,6 @@ template class ThreeBodyJastrow : public WaveFunctionComponentBase LogValue = mhalf * LogValue; } }; -} + +} // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/Jastrow/ThreeBodyJastrowRef.h b/src/QMCWaveFunctions/Jastrow/ThreeBodyJastrowRef.h index 5252dc23d..fa98a46c2 100644 --- a/src/QMCWaveFunctions/Jastrow/ThreeBodyJastrowRef.h +++ b/src/QMCWaveFunctions/Jastrow/ThreeBodyJastrowRef.h @@ -1,29 +1,20 @@ -//////////////////////////////////////////////////////////////////////////////// // This file is distributed under the University of Illinois/NCSA Open Source -// License. See LICENSE file in top directory for details. +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory // -// File created by: -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -//////////////////////////////////////////////////////////////////////////////// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// #ifndef QMCPLUSPLUS_EEIJASTROW_REF_H #define QMCPLUSPLUS_EEIJASTROW_REF_H #include "Utilities/Configuration.h" #include "QMCWaveFunctions/WaveFunctionComponentBase.h" #include "Particle/DistanceTableData.h" -#include "Numerics/OhmmsPETE/OhmmsArray.h" -#include #include -/*! - * @file ThreeBodyJastrowRef.h - */ - namespace miniqmcreference { @@ -45,13 +36,12 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase using posT = TinyVector; /// use the same container using RowContainer = DistanceTableData::RowContainer; - /// define container types - using vContainer_type = aligned_vector; - using gContainer_type = VectorSoAContainer; /// table index for i-el, el-el is always zero int myTableID; // nuber of particles int Nelec, Nion; + /// number of particles + padded + size_t Nelec_padded; // number of groups of the target particleset int eGroups, iGroups; /// reference to the sources (ions) @@ -60,38 +50,36 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase RealType DiffVal; ///\f$Uat[i] = sum_(j) u_{i,j}\f$ - Vector Uat; - vContainer_type oldUk, newUk; + Vector Uat, oldUk, newUk; ///\f$dUat[i] = sum_(j) du_{i,j}\f$ - Vector dUat; - valT *FirstAddressOfdU, *LastAddressOfdU; - gContainer_type olddUk, newdUk; + using gContainer_type = VectorSoAContainer; + gContainer_type dUat, olddUk, newdUk; ///\f$d2Uat[i] = sum_(j) d2u_{i,j}\f$ - Vector d2Uat; - vContainer_type oldd2Uk, newd2Uk; + Vector d2Uat, oldd2Uk, newd2Uk; + /// current values during PbyP valT cur_Uat, cur_d2Uat; - posT cur_dUat; + posT cur_dUat, dUat_temp; /// container for the Jastrow functions Array F; - std::map J3Unique; - // YYYY - std::map J3UniqueIndex; - /// the cutoff for e-I pairs std::vector Ion_cutoff; /// the electrons around ions within the cutoff radius, grouped by species Array, 2> elecs_inside; + Array, 2> elecs_inside_dist; + Array, 2> elecs_inside_displ; + /// the ions around + std::vector ions_nearby; + /// work buffer size + size_t Nbuffer; /// compressed distances - aligned_vector Distjk_Compressed, DistkI_Compressed; - std::vector DistIndice; - - using VGL_type = VectorSoAContainer; - VGL_type mVGL; - - // Used for evaluating derivatives with respect to the parameters - int NumVars; + aligned_vector Distjk_Compressed, DistkI_Compressed, DistjI_Compressed; + std::vector DistIndice_k; + /// compressed displacements + gContainer_type Disp_jk_Compressed, Disp_jI_Compressed, Disp_kI_Compressed; + /// work result buffer + VectorSoAContainer mVGL; public: /// alias FuncType @@ -99,7 +87,7 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase ThreeBodyJastrowRef(const ParticleSet &ions, ParticleSet &elecs, bool is_master = false) - : Ions(ions), NumVars(0) + : Ions(ions) { WaveFunctionComponentName = "ThreeBodyJastrowRef"; myTableID = elecs.addTable(Ions, DT_SOA); @@ -111,15 +99,14 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase void init(ParticleSet &p) { - Nelec = p.getTotalNum(); - Nion = Ions.getTotalNum(); - iGroups = Ions.getSpeciesSet().getTotalNum(); - eGroups = p.groups(); + Nelec = p.getTotalNum(); + Nelec_padded = getAlignedSize(Nelec); + Nion = Ions.getTotalNum(); + iGroups = Ions.getSpeciesSet().getTotalNum(); + eGroups = p.groups(); Uat.resize(Nelec); dUat.resize(Nelec); - FirstAddressOfdU = &(dUat[0][0]); - LastAddressOfdU = FirstAddressOfdU + dUat.size() * OHMMS_DIM; d2Uat.resize(Nelec); oldUk.resize(Nelec); @@ -131,13 +118,22 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase F.resize(iGroups, eGroups, eGroups); F = nullptr; - elecs_inside.resize(Nion, eGroups); - Ion_cutoff.resize(Nion); - - mVGL.resize(Nelec); - DistkI_Compressed.resize(Nelec); - Distjk_Compressed.resize(Nelec); - DistIndice.resize(Nelec); + elecs_inside.resize(eGroups, Nion); + elecs_inside_dist.resize(eGroups, Nion); + elecs_inside_displ.resize(eGroups, Nion); + ions_nearby.resize(Nion); + Ion_cutoff.resize(Nion, 0.0); + + // initialize buffers + Nbuffer = Nelec; + mVGL.resize(Nbuffer); + Distjk_Compressed.resize(Nbuffer); + DistjI_Compressed.resize(Nbuffer); + DistkI_Compressed.resize(Nbuffer); + Disp_jk_Compressed.resize(Nbuffer); + Disp_jI_Compressed.resize(Nbuffer); + Disp_kI_Compressed.resize(Nbuffer); + DistIndice_k.resize(Nbuffer); } void addFunc(int iSpecies, int eSpecies1, int eSpecies2, FT *j) @@ -160,7 +156,7 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase if (j) { RealType rcut = 0.5 * j->cutoff_radius; - for (int i = 0; i < Nion; i++) + for (int i = 0; i < Nion; i++) if (Ions.GroupID[i] == iSpecies) Ion_cutoff[i] = rcut; } else @@ -168,9 +164,6 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase APP_ABORT( "ThreeBodyJastrowRef::addFunc Jastrow function pointer is NULL"); } - std::stringstream aname; - aname << iSpecies << "_" << eSpecies1 << "_" << eSpecies2; - J3Unique[aname.str()] = j; } /** check that correlation information is complete @@ -193,14 +186,13 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase } if (!complete) { - APP_ABORT( - "ThreeBodyJastrowRef::check_complete J3 eeI is missing correlation " - "components\n see preceding messages for details"); + APP_ABORT("ThreeBodyJastrowRef::check_complete J3 eeI is missing " + "correlation components\n see preceding messages for details"); } // first set radii for (int i = 0; i < Nion; ++i) { - FT *f = F(Ions.GroupID[i], 0, 0); + FT *f = F(Ions.GroupID[i], 0, 0); if (f != 0) Ion_cutoff[i] = .5 * f->cutoff_radius; } // then check radii @@ -222,9 +214,8 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase } if (!all_radii_match) { - APP_ABORT( - "ThreeBodyJastrowRef::check_radii J3 eeI are inconsistent for some " - "ion species\n see preceding messages for details"); + APP_ABORT("ThreeBodyJastrowRef::check_radii J3 eeI are inconsistent for " + "some ion species\n see preceding messages for details"); } } @@ -234,13 +225,22 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase for (int iat = 0; iat < Nion; ++iat) for (int jg = 0; jg < eGroups; ++jg) - elecs_inside(iat, jg).clear(); + { + elecs_inside(jg, iat).clear(); + elecs_inside_dist(jg, iat).clear(); + elecs_inside_displ(jg, iat).clear(); + } for (int jg = 0; jg < eGroups; ++jg) for (int jel = P.first(jg); jel < P.last(jg); jel++) for (int iat = 0; iat < Nion; ++iat) if (eI_table.Distances[jel][iat] < Ion_cutoff[iat]) - elecs_inside(iat, jg).push_back(jel); + { + elecs_inside(jg, iat).push_back(jel); + elecs_inside_dist(jg, iat).push_back(eI_table.Distances[jel][iat]); + elecs_inside_displ(jg, iat).push_back( + eI_table.Displacements[jel][iat]); + } } RealType evaluateLog(ParticleSet &P, ParticleSet::ParticleGradient_t &G, @@ -256,7 +256,8 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase const DistanceTableData &eI_table = (*P.DistTables[myTableID]); const DistanceTableData &ee_table = (*P.DistTables[0]); - cur_Uat = computeU(P, iat, eI_table.Temp_r.data(), ee_table.Temp_r.data()); + cur_Uat = computeU(P, iat, P.GroupID[iat], eI_table.Temp_r.data(), + ee_table.Temp_r.data()); DiffVal = Uat[iat] - cur_Uat; return std::exp(DiffVal); } @@ -284,7 +285,7 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase // get the old value, grad, lapl computeU3(P, iat, eI_table.Distances[iat], eI_table.Displacements[iat], ee_table.Distances[iat], ee_table.Displacements[iat], Uat[iat], - dUat[iat], d2Uat[iat], oldUk, olddUk, oldd2Uk); + dUat_temp, d2Uat[iat], oldUk, olddUk, oldd2Uk); if (UpdateMode == ORB_PBYP_RATIO) { // ratio-only during the move; need to compute derivatives computeU3(P, iat, eI_table.Temp_r.data(), eI_table.Temp_dr, @@ -295,12 +296,20 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase for (int jel = 0; jel < Nelec; jel++) { Uat[jel] += newUk[jel] - oldUk[jel]; - dUat[jel] += newdUk[jel] - olddUk[jel]; d2Uat[jel] += newd2Uk[jel] - oldd2Uk[jel]; } + for (int idim = 0; idim < OHMMS_DIM; ++idim) + { + valT *restrict save_g = dUat.data(idim); + const valT *restrict new_g = newdUk.data(idim); + const valT *restrict old_g = olddUk.data(idim); + for (int jel = 0; jel < Nelec; jel++) + save_g[jel] += new_g[jel] - old_g[jel]; + } + LogValue += Uat[iat] - cur_Uat; Uat[iat] = cur_Uat; - dUat[iat] = cur_dUat; + dUat(iat) = cur_dUat; d2Uat[iat] = cur_d2Uat; const int ig = P.GroupID[iat]; @@ -308,18 +317,37 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase for (int jat = 0; jat < Nion; jat++) { bool inside = eI_table.Temp_r[jat] < Ion_cutoff[jat]; - std::vector::iterator iter; - iter = - find(elecs_inside(jat, ig).begin(), elecs_inside(jat, ig).end(), iat); + auto iter = + find(elecs_inside(ig, jat).begin(), elecs_inside(ig, jat).end(), iat); + auto iter_dist = elecs_inside_dist(ig, jat).begin() + + std::distance(elecs_inside(ig, jat).begin(), iter); + auto iter_displ = elecs_inside_displ(ig, jat).begin() + + std::distance(elecs_inside(ig, jat).begin(), iter); if (inside) { - if (iter == elecs_inside(jat, ig).end()) - elecs_inside(jat, ig).push_back(iat); + if (iter == elecs_inside(ig, jat).end()) + { + elecs_inside(ig, jat).push_back(iat); + elecs_inside_dist(ig, jat).push_back(eI_table.Temp_r[jat]); + elecs_inside_displ(ig, jat).push_back(eI_table.Temp_dr[jat]); + } + else + { + *iter_dist = eI_table.Temp_r[jat]; + *iter_displ = eI_table.Temp_dr[jat]; + } } else { - if (iter != elecs_inside(jat, ig).end()) - elecs_inside(jat, ig).erase(iter); + if (iter != elecs_inside(ig, jat).end()) + { + *iter = elecs_inside(ig, jat).back(); + elecs_inside(ig, jat).pop_back(); + *iter_dist = elecs_inside_dist(ig, jat).back(); + elecs_inside_dist(ig, jat).pop_back(); + *iter_displ = elecs_inside_displ(ig, jat).back(); + elecs_inside_displ(ig, jat).pop_back(); + } } } } @@ -335,75 +363,87 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase { computeU3(P, jel, eI_table.Distances[jel], eI_table.Displacements[jel], ee_table.Distances[jel], ee_table.Displacements[jel], Uat[jel], - dUat[jel], d2Uat[jel], newUk, newdUk, newd2Uk, true); + dUat_temp, d2Uat[jel], newUk, newdUk, newd2Uk, true); + dUat(jel) = dUat_temp; // add the contribution from the upper triangle for (int kel = 0; kel < jel; kel++) { Uat[kel] += newUk[kel]; - dUat[kel] += newdUk[kel]; d2Uat[kel] += newd2Uk[kel]; } + for (int idim = 0; idim < OHMMS_DIM; ++idim) + { + valT *restrict save_g = dUat.data(idim); + const valT *restrict new_g = newdUk.data(idim); + for (int kel = 0; kel < jel; kel++) + save_g[kel] += new_g[kel]; + } } } - inline valT computeU(ParticleSet &P, int jel, const RealType *distjI, - const RealType *distjk) + inline valT computeU(const ParticleSet &P, int jel, int jg, + const RealType *distjI, const RealType *distjk) { - valT Uj = valT(0); - const DistanceTableData &eI_table = (*P.DistTables[myTableID]); - const int jg = P.GroupID[jel]; + ions_nearby.clear(); for (int iat = 0; iat < Nion; ++iat) - if (distjI[iat] < Ion_cutoff[iat]) + if (distjI[iat] < Ion_cutoff[iat]) ions_nearby.push_back(iat); + + valT Uj = valT(0); + for (int kg = 0; kg < eGroups; ++kg) + { + int kel_counter = 0; + for (int iind = 0; iind < ions_nearby.size(); ++iind) { + const int iat = ions_nearby[iind]; const int ig = Ions.GroupID[iat]; - const valT r_Ij = distjI[iat]; - - for (int kg = 0; kg < eGroups; ++kg) + const valT r_jI = distjI[iat]; + for (int kind = 0; kind < elecs_inside(kg, iat).size(); kind++) { - const FT &feeI(*F(ig, jg, kg)); - int kel_counter = 0; - for (int kind = 0; kind < elecs_inside(iat, kg).size(); kind++) + const int kel = elecs_inside(kg, iat)[kind]; + if (kel != jel) { - const int kel = elecs_inside(iat, kg)[kind]; - if (kel != jel) + DistkI_Compressed[kel_counter] = elecs_inside_dist(kg, iat)[kind]; + Distjk_Compressed[kel_counter] = distjk[kel]; + DistjI_Compressed[kel_counter] = r_jI; + kel_counter++; + if (kel_counter == Nbuffer) { - DistkI_Compressed[kel_counter] = eI_table.Distances[kel][iat]; - Distjk_Compressed[kel_counter] = distjk[kel]; - kel_counter++; + const FT &feeI(*F(ig, jg, kg)); + Uj += feeI.evaluateV(kel_counter, Distjk_Compressed.data(), + DistjI_Compressed.data(), + DistkI_Compressed.data()); + kel_counter = 0; } } - Uj += feeI.evaluateV(kel_counter, Distjk_Compressed.data(), r_Ij, + } + if ((iind + 1 == ions_nearby.size() || + ig != Ions.GroupID[ions_nearby[iind + 1]]) && + kel_counter > 0) + { + const FT &feeI(*F(ig, jg, kg)); + Uj += feeI.evaluateV(kel_counter, Distjk_Compressed.data(), + DistjI_Compressed.data(), DistkI_Compressed.data()); + kel_counter = 0; } } + } return Uj; } - inline void computeU3(ParticleSet &P, int jel, const RealType *distjI, - const RowContainer &displjI, const RealType *distjk, - const RowContainer &displjk, valT &Uj, posT &dUj, - valT &d2Uj, vContainer_type &Uk, gContainer_type &dUk, - vContainer_type &d2Uk, bool triangle = false) + inline void computeU3_engine(const ParticleSet &P, const FT &feeI, + int kel_counter, valT &Uj, posT &dUj, valT &d2Uj, + Vector &Uk, gContainer_type &dUk, + Vector &d2Uk) { + const DistanceTableData &eI_table = (*P.DistTables[myTableID]); + constexpr valT czero(0); constexpr valT cone(1); - constexpr valT cminus(-1); constexpr valT ctwo(2); constexpr valT lapfac = OHMMS_DIM - cone; - Uj = czero; - dUj = posT(); - d2Uj = czero; - - const DistanceTableData &eI_table = (*P.DistTables[myTableID]); - const int jg = P.GroupID[jel]; - - const int kelmax = triangle ? jel : Nelec; - std::fill_n(Uk.data(), kelmax, czero); - std::fill_n(d2Uk.data(), kelmax, czero); - for (int idim = 0; idim < OHMMS_DIM; ++idim) - std::fill_n(dUk.data(idim), kelmax, czero); valT *restrict val = mVGL.data(0); valT *restrict gradF0 = mVGL.data(1); @@ -415,56 +455,133 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase valT *restrict hessF01 = mVGL.data(7); valT *restrict hessF02 = mVGL.data(8); + feeI.evaluateVGL(kel_counter, Distjk_Compressed.data(), + DistjI_Compressed.data(), DistkI_Compressed.data(), val, + gradF0, gradF1, gradF2, hessF00, hessF11, hessF22, hessF01, + hessF02); + + // compute the contribution to jel, kel + Uj = std::accumulate(val, val + kel_counter, Uj); + valT gradF0_sum = std::accumulate(gradF0, gradF0 + kel_counter, czero); + valT gradF1_sum = std::accumulate(gradF1, gradF1 + kel_counter, czero); + valT hessF00_sum = std::accumulate(hessF00, hessF00 + kel_counter, czero); + valT hessF11_sum = std::accumulate(hessF11, hessF11 + kel_counter, czero); + d2Uj -= hessF00_sum + hessF11_sum + lapfac * (gradF0_sum + gradF1_sum); + std::fill_n(hessF11, kel_counter, czero); + for (int idim = 0; idim < OHMMS_DIM; ++idim) + { + valT *restrict jk = Disp_jk_Compressed.data(idim); + valT *restrict jI = Disp_jI_Compressed.data(idim); + valT *restrict kI = Disp_kI_Compressed.data(idim); + valT dUj_x(0); + for (int kel_index = 0; kel_index < kel_counter; kel_index++) + { + // recycle hessF11 + hessF11[kel_index] += kI[kel_index] * jk[kel_index]; + dUj_x += gradF1[kel_index] * jI[kel_index]; + // destroy jk, kI + const valT temp = jk[kel_index] * gradF0[kel_index]; + dUj_x += temp; + jk[kel_index] *= jI[kel_index]; + kI[kel_index] = kI[kel_index] * gradF2[kel_index] - temp; + } + dUj[idim] += dUj_x; + + valT *restrict jk0 = Disp_jk_Compressed.data(0); + if (idim > 0) + { + for (int kel_index = 0; kel_index < kel_counter; kel_index++) + jk0[kel_index] += jk[kel_index]; + } + + valT *restrict dUk_x = dUk.data(idim); + for (int kel_index = 0; kel_index < kel_counter; kel_index++) + dUk_x[DistIndice_k[kel_index]] += kI[kel_index]; + } + valT sum(0); + valT *restrict jk0 = Disp_jk_Compressed.data(0); + for (int kel_index = 0; kel_index < kel_counter; kel_index++) + sum += hessF01[kel_index] * jk0[kel_index]; + d2Uj -= ctwo * sum; + + for (int kel_index = 0; kel_index < kel_counter; kel_index++) + hessF00[kel_index] = hessF00[kel_index] + hessF22[kel_index] + + lapfac * (gradF0[kel_index] + gradF2[kel_index]) - + ctwo * hessF02[kel_index] * hessF11[kel_index]; + + for (int kel_index = 0; kel_index < kel_counter; kel_index++) + { + const int kel = DistIndice_k[kel_index]; + Uk[kel] += val[kel_index]; + d2Uk[kel] -= hessF00[kel_index]; + } + } + + inline void computeU3(const ParticleSet &P, int jel, const RealType *distjI, + const RowContainer &displjI, const RealType *distjk, + const RowContainer &displjk, valT &Uj, posT &dUj, + valT &d2Uj, Vector &Uk, gContainer_type &dUk, + Vector &d2Uk, bool triangle = false) + { + constexpr valT czero(0); + + Uj = czero; + dUj = posT(); + d2Uj = czero; + + const int jg = P.GroupID[jel]; + + const int kelmax = triangle ? jel : Nelec; + std::fill_n(Uk.data(), kelmax, czero); + std::fill_n(d2Uk.data(), kelmax, czero); + for (int idim = 0; idim < OHMMS_DIM; ++idim) + std::fill_n(dUk.data(idim), kelmax, czero); + + ions_nearby.clear(); for (int iat = 0; iat < Nion; ++iat) - if (distjI[iat] < Ion_cutoff[iat]) + if (distjI[iat] < Ion_cutoff[iat]) ions_nearby.push_back(iat); + + for (int kg = 0; kg < eGroups; ++kg) + { + int kel_counter = 0; + for (int iind = 0; iind < ions_nearby.size(); ++iind) { + const int iat = ions_nearby[iind]; const int ig = Ions.GroupID[iat]; - const valT r_Ij = distjI[iat]; - const posT disp_Ij = cminus * displjI[iat]; - - for (int kg = 0; kg < eGroups; ++kg) + const valT r_jI = distjI[iat]; + const posT disp_Ij = displjI[iat]; + for (int kind = 0; kind < elecs_inside(kg, iat).size(); kind++) { - const FT &feeI(*F(ig, jg, kg)); - int kel_counter = 0; - for (int kind = 0; kind < elecs_inside(iat, kg).size(); kind++) + const int kel = elecs_inside(kg, iat)[kind]; + if (kel < kelmax && kel != jel) { - const int kel = elecs_inside(iat, kg)[kind]; - if (kel < kelmax && kel != jel) + DistkI_Compressed[kel_counter] = elecs_inside_dist(kg, iat)[kind]; + DistjI_Compressed[kel_counter] = r_jI; + Distjk_Compressed[kel_counter] = distjk[kel]; + Disp_kI_Compressed(kel_counter) = elecs_inside_displ(kg, iat)[kind]; + Disp_jI_Compressed(kel_counter) = disp_Ij; + Disp_jk_Compressed(kel_counter) = displjk[kel]; + DistIndice_k[kel_counter] = kel; + kel_counter++; + if (kel_counter == Nbuffer) { - DistkI_Compressed[kel_counter] = eI_table.Distances[kel][iat]; - Distjk_Compressed[kel_counter] = distjk[kel]; - DistIndice[kel_counter] = kel; - kel_counter++; + const FT &feeI(*F(ig, jg, kg)); + computeU3_engine(P, feeI, kel_counter, Uj, dUj, d2Uj, Uk, dUk, + d2Uk); + kel_counter = 0; } } - - feeI.evaluateVGL(kel_counter, Distjk_Compressed.data(), r_Ij, - DistkI_Compressed.data(), val, gradF0, gradF1, - gradF2, hessF00, hessF11, hessF22, hessF01, hessF02); - - for (int kel_index = 0; kel_index < kel_counter; kel_index++) - { - int kel = DistIndice[kel_index]; - const posT disp_Ik = cminus * eI_table.Displacements[kel][iat]; - const posT disp_jk = displjk[kel]; - - // compute the contribution to jel - Uj += val[kel_index]; - dUj += gradF0[kel_index] * disp_jk - gradF1[kel_index] * disp_Ij; - d2Uj -= hessF00[kel_index] + hessF11[kel_index] + - lapfac * (gradF0[kel_index] + gradF1[kel_index]) - - ctwo * hessF01[kel_index] * dot(disp_jk, disp_Ij); - - // compute the contribution to kel - Uk[kel] += val[kel_index]; - dUk(kel) = dUk[kel] - gradF0[kel_index] * disp_jk - - gradF2[kel_index] * disp_Ik; - d2Uk[kel] -= hessF00[kel_index] + hessF22[kel_index] + - lapfac * (gradF0[kel_index] + gradF2[kel_index]) + - ctwo * hessF02[kel_index] * dot(disp_jk, disp_Ik); - } + } + if ((iind + 1 == ions_nearby.size() || + ig != Ions.GroupID[ions_nearby[iind + 1]]) && + kel_counter > 0) + { + const FT &feeI(*F(ig, jg, kg)); + computeU3_engine(P, feeI, kel_counter, Uj, dUj, d2Uj, Uk, dUk, d2Uk); + kel_counter = 0; } } + } } void evaluateGL(ParticleSet &P, ParticleSet::ParticleGradient_t &G, @@ -482,6 +599,7 @@ template class ThreeBodyJastrowRef : public WaveFunctionComponentBase constexpr valT mhalf(-0.5); LogValue = mhalf * LogValue; } -}; // class ThreeBodyJastrowRef -} // miniqmcreference +}; + +} // namespace miniqmcreference #endif diff --git a/src/QMCWaveFunctions/Jastrow/TwoBodyJastrow.h b/src/QMCWaveFunctions/Jastrow/TwoBodyJastrow.h index 907de41db..b4fdce464 100644 --- a/src/QMCWaveFunctions/Jastrow/TwoBodyJastrow.h +++ b/src/QMCWaveFunctions/Jastrow/TwoBodyJastrow.h @@ -1,24 +1,18 @@ -//////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////// // This file is distributed under the University of Illinois/NCSA Open Source -// License. See LICENSE file in top directory for details. +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: -// Jeongnim Kim, jeongnim.kim@intel.com, -// Intel Corp. -// Amrita Mathuriya, amrita.mathuriya@intel.com, -// Intel Corp. -// Ye Luo, yeluo@anl.gov, -// Argonne National Laboratory +// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp. +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory // -// File created by: -// Jeongnim Kim, jeongnim.kim@intel.com, -// Intel Corp. -//////////////////////////////////////////////////////////////////////////////// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// // -*- C++ -*- -#ifndef QMCPLUSPLUS_TWOBODYJASTROW_OPTIMIZED_SOA_H -#define QMCPLUSPLUS_TWOBODYJASTROW_OPTIMIZED_SOA_H +#ifndef QMCPLUSPLUS_TWOBODYJASTROW_H +#define QMCPLUSPLUS_TWOBODYJASTROW_H #include "Utilities/Configuration.h" #include "QMCWaveFunctions/WaveFunctionComponentBase.h" #include "Particle/DistanceTableData.h" @@ -40,7 +34,7 @@ namespace qmcplusplus * For electrons, distinct pair correlation functions are used * for spins up-up/down-down and up-down/down-up. * - * Based on J2OrbitalSoA.h with these considerations + * Based on TwoBodyJastrow.h with these considerations * - DistanceTableData using SoA containers * - support mixed precision: FT::real_type != OHMMS_PRECISION * - loops over the groups: elminated PairID @@ -61,6 +55,8 @@ template struct TwoBodyJastrow : public WaveFunctionComponentBase /// number of particles size_t N; + /// number of particles + padded + size_t N_padded; /// number of groups of the target particleset size_t NumGroups; /// Used to compute correction @@ -72,8 +68,8 @@ template struct TwoBodyJastrow : public WaveFunctionComponentBase ///\f$Uat[i] = sum_(j) u_{i,j}\f$ Vector Uat; ///\f$dUat[i] = sum_(j) du_{i,j}\f$ - Vector dUat; - valT *FirstAddressOfdU, *LastAddressOfdU; + using gContainer_type = VectorSoAContainer; + gContainer_type dUat; ///\f$d2Uat[i] = sum_(j) d2u_{i,j}\f$ Vector d2Uat; valT cur_Uat; @@ -83,6 +79,8 @@ template struct TwoBodyJastrow : public WaveFunctionComponentBase aligned_vector DistIndice; /// Container for \f$F[ig*NumGroups+jg]\f$ std::vector F; + /// Uniquue J2 set for cleanup + std::map J2Unique; TwoBodyJastrow(ParticleSet &p); TwoBodyJastrow(const TwoBodyJastrow &rhs) = delete; @@ -112,9 +110,25 @@ template struct TwoBodyJastrow : public WaveFunctionComponentBase bool fromscratch = false); /*@{ internal compute engines*/ - inline void computeU3(ParticleSet &P, int iat, const RealType *restrict dist, - RealType *restrict u, RealType *restrict du, - RealType *restrict d2u); + inline valT computeU(const ParticleSet &P, int iat, + const RealType *restrict dist) + { + valT curUat(0); + const int igt = P.GroupID[iat] * NumGroups; + for (int jg = 0; jg < NumGroups; ++jg) + { + const FuncType &f2(*F[igt + jg]); + int iStart = P.first(jg); + int iEnd = P.last(jg); + curUat += f2.evaluateV(iat, iStart, iEnd, dist, DistCompressed.data()); + } + return curUat; + } + + inline void computeU3(const ParticleSet &P, int iat, + const RealType *restrict dist, RealType *restrict u, + RealType *restrict du, RealType *restrict d2u, + bool triangle = false); /** compute gradient */ @@ -127,35 +141,13 @@ template struct TwoBodyJastrow : public WaveFunctionComponentBase const valT *restrict dX = displ.data(idim); valT s = valT(); -#pragma omp simd reduction(+ : s) aligned(du, dX) for (int jat = 0; jat < N; ++jat) s += du[jat] * dX[jat]; grad[idim] = s; } return grad; } - - /** compute gradient and lap - */ - inline void accumulateGL(const valT *restrict du, const valT *restrict d2u, - const RowContainer &displ, posT &grad, - valT &lap) const - { - constexpr valT lapfac = OHMMS_DIM - RealType(1); - lap = valT(0); - //#pragma omp simd reduction(+:lap) - for (int jat = 0; jat < N; ++jat) - lap += d2u[jat] + lapfac * du[jat]; - for (int idim = 0; idim < OHMMS_DIM; ++idim) - { - const valT *restrict dX = displ.data(idim); - valT s = valT(); - //#pragma omp simd reduction(+:s) - for (int jat = 0; jat < N; ++jat) - s += du[jat] * dX[jat]; - grad[idim] = s; - } - } + /**@} */ }; template TwoBodyJastrow::TwoBodyJastrow(ParticleSet &p) @@ -166,17 +158,24 @@ template TwoBodyJastrow::TwoBodyJastrow(ParticleSet &p) WaveFunctionComponentName = "TwoBodyJastrow"; } -template TwoBodyJastrow::~TwoBodyJastrow() {} +template TwoBodyJastrow::~TwoBodyJastrow() +{ + auto it = J2Unique.begin(); + while (it != J2Unique.end()) + { + delete ((*it).second); + ++it; + } +} // need to clean up J2Unique template void TwoBodyJastrow::init(ParticleSet &p) { N = p.getTotalNum(); + N_padded = getAlignedSize(N); NumGroups = p.groups(); Uat.resize(N); dUat.resize(N); - FirstAddressOfdU = &(dUat[0][0]); - LastAddressOfdU = FirstAddressOfdU + dUat.size() * OHMMS_DIM; d2Uat.resize(N); cur_u.resize(N); cur_du.resize(N); @@ -197,9 +196,11 @@ template void TwoBodyJastrow::addFunc(int ia, int ib, FT *j) { int ij = 0; for (int ig = 0; ig < NumGroups; ++ig) - for (int jg = 0; jg < NumGroups; ++jg, ++ij) + for (int jg = 0; jg < NumGroups; ++jg, ++ij) if (F[ij] == nullptr) F[ij] = j; } + else + F[ia * NumGroups + ib] = j; } else { @@ -208,20 +209,20 @@ template void TwoBodyJastrow::addFunc(int ia, int ib, FT *j) // a very special case, 1 up + 1 down // uu/dd was prevented by the builder for (int ig = 0; ig < NumGroups; ++ig) - for (int jg = 0; jg < NumGroups; ++jg) + for (int jg = 0; jg < NumGroups; ++jg) F[ig * NumGroups + jg] = j; } else { // generic case - F[ia * NumGroups + ib] = j; - if (ia < ib) F[ib * NumGroups + ia] = j; + F[ia * NumGroups + ib] = j; + F[ib * NumGroups + ia] = j; } } std::stringstream aname; aname << ia << ib; - // ChiesaKEcorrection(); - FirstTime = false; + J2Unique[aname.str()] = j; + FirstTime = false; } /** intenal function to compute \f$\sum_j u(r_j), du/dr, d2u/dr2\f$ @@ -233,24 +234,25 @@ template void TwoBodyJastrow::addFunc(int ia, int ib, FT *j) * @param d2u starting second deriv */ template -inline void TwoBodyJastrow::computeU3(ParticleSet &P, int iat, +inline void TwoBodyJastrow::computeU3(const ParticleSet &P, int iat, const RealType *restrict dist, RealType *restrict u, RealType *restrict du, - RealType *restrict d2u) + RealType *restrict d2u, bool triangle) { + const int jelmax = triangle ? iat : N; constexpr valT czero(0); - std::fill_n(u, N, czero); - std::fill_n(du, N, czero); - std::fill_n(d2u, N, czero); + std::fill_n(u, jelmax, czero); + std::fill_n(du, jelmax, czero); + std::fill_n(d2u, jelmax, czero); const int igt = P.GroupID[iat] * NumGroups; for (int jg = 0; jg < NumGroups; ++jg) { const FuncType &f2(*F[igt + jg]); int iStart = P.first(jg); - int iEnd = P.last(jg); - f2.evaluateVGL(iStart, iEnd, dist, u, du, d2u, DistCompressed.data(), + int iEnd = std::min(jelmax, P.last(jg)); + f2.evaluateVGL(iat, iStart, iEnd, dist, u, du, d2u, DistCompressed.data(), DistIndice.data()); } // u[iat]=czero; @@ -264,19 +266,7 @@ typename TwoBodyJastrow::ValueType TwoBodyJastrow::ratio(ParticleSet &P, { // only ratio, ready to compute it again UpdateMode = ORB_PBYP_RATIO; - - const DistanceTableData *d_table = P.DistTables[0]; - const auto dist = d_table->Temp_r.data(); - cur_Uat = valT(0); - const int igt = P.GroupID[iat] * NumGroups; - for (int jg = 0; jg < NumGroups; ++jg) - { - const FuncType &f2(*F[igt + jg]); - int iStart = P.first(jg); - int iEnd = P.last(jg); - cur_Uat += f2.evaluateV(iStart, iEnd, dist, DistCompressed.data()); - } - + cur_Uat = computeU(P, iat, P.DistTables[0]->Temp_r.data()); return std::exp(Uat[iat] - cur_Uat); } @@ -316,25 +306,39 @@ void TwoBodyJastrow::acceptMove(ParticleSet &P, int iat) } valT cur_d2Uat(0); - posT cur_dUat; - const auto &new_dr = d_table->Temp_dr; - const auto &old_dr = d_table->Displacements[iat]; + const auto &new_dr = d_table->Temp_dr; + const auto &old_dr = d_table->Displacements[iat]; + constexpr valT lapfac = OHMMS_DIM - RealType(1); for (int jat = 0; jat < N; jat++) { - constexpr valT lapfac = OHMMS_DIM - RealType(1); - valT du = cur_u[jat] - old_u[jat]; - posT newg = cur_du[jat] * new_dr[jat]; - posT dg = newg - old_du[jat] * old_dr[jat]; - valT newl = cur_d2u[jat] + lapfac * cur_du[jat]; - valT dl = old_d2u[jat] + lapfac * old_du[jat] - newl; + const valT du = cur_u[jat] - old_u[jat]; + const valT newl = cur_d2u[jat] + lapfac * cur_du[jat]; + const valT dl = old_d2u[jat] + lapfac * old_du[jat] - newl; Uat[jat] += du; - dUat[jat] -= dg; d2Uat[jat] += dl; - cur_dUat += newg; cur_d2Uat -= newl; } + posT cur_dUat; + for (int idim = 0; idim < OHMMS_DIM; ++idim) + { + const valT *restrict new_dX = new_dr.data(idim); + const valT *restrict old_dX = old_dr.data(idim); + const valT *restrict cur_du_pt = cur_du.data(); + const valT *restrict old_du_pt = old_du.data(); + valT *restrict save_g = dUat.data(idim); + valT cur_g = cur_dUat[idim]; + for (int jat = 0; jat < N; jat++) + { + const valT newg = cur_du_pt[jat] * new_dX[jat]; + const valT dg = newg - old_du_pt[jat] * old_dX[jat]; + save_g[jat] -= dg; + cur_g += newg; + } + cur_dUat[idim] = cur_g; + } + LogValue += Uat[iat] - cur_Uat; Uat[iat] = cur_Uat; - dUat[iat] = cur_dUat; + dUat(iat) = cur_dUat; d2Uat[iat] = cur_d2Uat; } @@ -343,17 +347,44 @@ template void TwoBodyJastrow::recompute(ParticleSet &P) const DistanceTableData *d_table = P.DistTables[0]; for (int ig = 0; ig < NumGroups; ++ig) { + const int igt = ig * NumGroups; for (int iat = P.first(ig), last = P.last(ig); iat < last; ++iat) { computeU3(P, iat, d_table->Distances[iat], cur_u.data(), cur_du.data(), - cur_d2u.data()); - Uat[iat] = simd::accumulate_n(cur_u.data(), N, valT()); + cur_d2u.data(), true); + Uat[iat] = simd::accumulate_n(cur_u.data(), iat, valT()); posT grad; - valT lap; - accumulateGL(cur_du.data(), cur_d2u.data(), d_table->Displacements[iat], - grad, lap); - dUat[iat] = grad; + valT lap(0); + const valT *restrict u = cur_u.data(); + const valT *restrict du = cur_du.data(); + const valT *restrict d2u = cur_d2u.data(); + const RowContainer &displ = d_table->Displacements[iat]; + constexpr valT lapfac = OHMMS_DIM - RealType(1); + for (int jat = 0; jat < iat; ++jat) + lap += d2u[jat] + lapfac * du[jat]; + for (int idim = 0; idim < OHMMS_DIM; ++idim) + { + const valT *restrict dX = displ.data(idim); + valT s = valT(); + for (int jat = 0; jat < iat; ++jat) + s += du[jat] * dX[jat]; + grad[idim] = s; + } + dUat(iat) = grad; d2Uat[iat] = -lap; + // add the contribution from the upper triangle + for (int jat = 0; jat < iat; jat++) + { + Uat[jat] += u[jat]; + d2Uat[jat] -= d2u[jat] + lapfac * du[jat]; + } + for (int idim = 0; idim < OHMMS_DIM; ++idim) + { + valT *restrict save_g = dUat.data(idim); + const valT *restrict dX = displ.data(idim); + for (int jat = 0; jat < iat; jat++) + save_g[jat] -= du[jat] * dX[jat]; + } } } } @@ -361,10 +392,10 @@ template void TwoBodyJastrow::recompute(ParticleSet &P) template typename TwoBodyJastrow::RealType TwoBodyJastrow::evaluateLog(ParticleSet &P, - ParticleSet::ParticleGradient_t &dG, - ParticleSet::ParticleLaplacian_t &dL) + ParticleSet::ParticleGradient_t &G, + ParticleSet::ParticleLaplacian_t &L) { - evaluateGL(P, dG, dL, true); + evaluateGL(P, G, L, true); return LogValue; } @@ -386,5 +417,6 @@ void TwoBodyJastrow::evaluateGL(ParticleSet &P, constexpr valT mhalf(-0.5); LogValue = mhalf * LogValue; } -} + +} // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h b/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h index cef2331c2..7e8d41c19 100644 --- a/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h +++ b/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h @@ -1,21 +1,15 @@ -//////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////// // This file is distributed under the University of Illinois/NCSA Open Source -// License. See LICENSE file in top directory for details. +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: -// Jeongnim Kim, jeongnim.kim@intel.com, -// Intel Corp. -// Amrita Mathuriya, amrita.mathuriya@intel.com, -// Intel Corp. -// Ye Luo, yeluo@anl.gov, -// Argonne National Laboratory +// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp. +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory // -// File created by: -// Jeongnim Kim, jeongnim.kim@intel.com, -// Intel Corp. -//////////////////////////////////////////////////////////////////////////////// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// // -*- C++ -*- #ifndef QMCPLUSPLUS_TWOBODYJASTROW_REF_H #define QMCPLUSPLUS_TWOBODYJASTROW_REF_H @@ -40,7 +34,7 @@ using namespace qmcplusplus; * For electrons, distinct pair correlation functions are used * for spins up-up/down-down and up-down/down-up. * - * Based on J2OrbitalRef.h with these considerations + * Based on TwoBodyJastrowRef.h with these considerations * - DistanceTableData using SoA containers * - support mixed precision: FT::real_type != OHMMS_PRECISION * - loops over the groups: elminated PairID @@ -61,6 +55,8 @@ template struct TwoBodyJastrowRef : public WaveFunctionComponentBase /// number of particles size_t N; + /// number of particles + padded + size_t N_padded; /// number of groups of the target particleset size_t NumGroups; /// Used to compute correction @@ -72,8 +68,8 @@ template struct TwoBodyJastrowRef : public WaveFunctionComponentBase ///\f$Uat[i] = sum_(j) u_{i,j}\f$ Vector Uat; ///\f$dUat[i] = sum_(j) du_{i,j}\f$ - Vector dUat; - valT *FirstAddressOfdU, *LastAddressOfdU; + using gContainer_type = VectorSoAContainer; + gContainer_type dUat; ///\f$d2Uat[i] = sum_(j) d2u_{i,j}\f$ Vector d2Uat; valT cur_Uat; @@ -83,6 +79,8 @@ template struct TwoBodyJastrowRef : public WaveFunctionComponentBase aligned_vector DistIndice; /// Container for \f$F[ig*NumGroups+jg]\f$ std::vector F; + /// Uniquue J2 set for cleanup + std::map J2Unique; TwoBodyJastrowRef(ParticleSet &p); TwoBodyJastrowRef(const TwoBodyJastrowRef &rhs) = delete; @@ -111,10 +109,26 @@ template struct TwoBodyJastrowRef : public WaveFunctionComponentBase ParticleSet::ParticleLaplacian_t &L, bool fromscratch = false); - /*@ internal compute engines*/ - inline void computeU3(ParticleSet &P, int iat, const RealType *restrict dist, - RealType *restrict u, RealType *restrict du, - RealType *restrict d2u); + /*@{ internal compute engines*/ + inline valT computeU(const ParticleSet &P, int iat, + const RealType *restrict dist) + { + valT curUat(0); + const int igt = P.GroupID[iat] * NumGroups; + for (int jg = 0; jg < NumGroups; ++jg) + { + const FuncType &f2(*F[igt + jg]); + int iStart = P.first(jg); + int iEnd = P.last(jg); + curUat += f2.evaluateV(iat, iStart, iEnd, dist, DistCompressed.data()); + } + return curUat; + } + + inline void computeU3(const ParticleSet &P, int iat, + const RealType *restrict dist, RealType *restrict u, + RealType *restrict du, RealType *restrict d2u, + bool triangle = false); /** compute gradient */ @@ -133,27 +147,8 @@ template struct TwoBodyJastrowRef : public WaveFunctionComponentBase } return grad; } - - /** compute gradient and lap - */ - inline void accumulateGL(const valT *restrict du, const valT *restrict d2u, - const RowContainer &displ, posT &grad, - valT &lap) const - { - constexpr valT lapfac = OHMMS_DIM - RealType(1); - lap = valT(0); - for (int jat = 0; jat < N; ++jat) - lap += d2u[jat] + lapfac * du[jat]; - for (int idim = 0; idim < OHMMS_DIM; ++idim) - { - const valT *restrict dX = displ.data(idim); - valT s = valT(); - for (int jat = 0; jat < N; ++jat) - s += du[jat] * dX[jat]; - grad[idim] = s; - } - } -}; // struct TwoBodyJastrowRef + /**@} */ +}; template TwoBodyJastrowRef::TwoBodyJastrowRef(ParticleSet &p) { @@ -163,17 +158,24 @@ template TwoBodyJastrowRef::TwoBodyJastrowRef(ParticleSet &p) WaveFunctionComponentName = "TwoBodyJastrowRef"; } -template TwoBodyJastrowRef::~TwoBodyJastrowRef() {} +template TwoBodyJastrowRef::~TwoBodyJastrowRef() +{ + auto it = J2Unique.begin(); + while (it != J2Unique.end()) + { + delete ((*it).second); + ++it; + } +} // need to clean up J2Unique template void TwoBodyJastrowRef::init(ParticleSet &p) { N = p.getTotalNum(); + N_padded = getAlignedSize(N); NumGroups = p.groups(); Uat.resize(N); dUat.resize(N); - FirstAddressOfdU = &(dUat[0][0]); - LastAddressOfdU = FirstAddressOfdU + dUat.size() * OHMMS_DIM; d2Uat.resize(N); cur_u.resize(N); cur_du.resize(N); @@ -198,6 +200,8 @@ void TwoBodyJastrowRef::addFunc(int ia, int ib, FT *j) for (int jg = 0; jg < NumGroups; ++jg, ++ij) if (F[ij] == nullptr) F[ij] = j; } + else + F[ia * NumGroups + ib] = j; } else { @@ -213,13 +217,13 @@ void TwoBodyJastrowRef::addFunc(int ia, int ib, FT *j) { // generic case F[ia * NumGroups + ib] = j; - if (ia < ib) F[ib * NumGroups + ia] = j; + F[ib * NumGroups + ia] = j; } } std::stringstream aname; aname << ia << ib; - // ChiesaKEcorrection(); - FirstTime = false; + J2Unique[aname.str()] = j; + FirstTime = false; } /** intenal function to compute \f$\sum_j u(r_j), du/dr, d2u/dr2\f$ @@ -231,24 +235,25 @@ void TwoBodyJastrowRef::addFunc(int ia, int ib, FT *j) * @param d2u starting second deriv */ template -inline void TwoBodyJastrowRef::computeU3(ParticleSet &P, int iat, - const RealType *restrict dist, - RealType *restrict u, - RealType *restrict du, - RealType *restrict d2u) +inline void +TwoBodyJastrowRef::computeU3(const ParticleSet &P, int iat, + const RealType *restrict dist, + RealType *restrict u, RealType *restrict du, + RealType *restrict d2u, bool triangle) { + const int jelmax = triangle ? iat : N; constexpr valT czero(0); - std::fill_n(u, N, czero); - std::fill_n(du, N, czero); - std::fill_n(d2u, N, czero); + std::fill_n(u, jelmax, czero); + std::fill_n(du, jelmax, czero); + std::fill_n(d2u, jelmax, czero); const int igt = P.GroupID[iat] * NumGroups; for (int jg = 0; jg < NumGroups; ++jg) { const FuncType &f2(*F[igt + jg]); int iStart = P.first(jg); - int iEnd = P.last(jg); - f2.evaluateVGL(iStart, iEnd, dist, u, du, d2u, DistCompressed.data(), + int iEnd = std::min(jelmax, P.last(jg)); + f2.evaluateVGL(iat, iStart, iEnd, dist, u, du, d2u, DistCompressed.data(), DistIndice.data()); } // u[iat]=czero; @@ -262,19 +267,7 @@ TwoBodyJastrowRef::ratio(ParticleSet &P, int iat) { // only ratio, ready to compute it again UpdateMode = ORB_PBYP_RATIO; - - const DistanceTableData *d_table = P.DistTables[0]; - const auto dist = d_table->Temp_r.data(); - cur_Uat = valT(0); - const int igt = P.GroupID[iat] * NumGroups; - for (int jg = 0; jg < NumGroups; ++jg) - { - const FuncType &f2(*F[igt + jg]); - int iStart = P.first(jg); - int iEnd = P.last(jg); - cur_Uat += f2.evaluateV(iStart, iEnd, dist, DistCompressed.data()); - } - + cur_Uat = computeU(P, iat, P.DistTables[0]->Temp_r.data()); return std::exp(Uat[iat] - cur_Uat); } @@ -314,25 +307,39 @@ void TwoBodyJastrowRef::acceptMove(ParticleSet &P, int iat) } valT cur_d2Uat(0); - posT cur_dUat; - const auto &new_dr = d_table->Temp_dr; - const auto &old_dr = d_table->Displacements[iat]; + const auto &new_dr = d_table->Temp_dr; + const auto &old_dr = d_table->Displacements[iat]; + constexpr valT lapfac = OHMMS_DIM - RealType(1); for (int jat = 0; jat < N; jat++) { - constexpr valT lapfac = OHMMS_DIM - RealType(1); - valT du = cur_u[jat] - old_u[jat]; - posT newg = cur_du[jat] * new_dr[jat]; - posT dg = newg - old_du[jat] * old_dr[jat]; - valT newl = cur_d2u[jat] + lapfac * cur_du[jat]; - valT dl = old_d2u[jat] + lapfac * old_du[jat] - newl; + const valT du = cur_u[jat] - old_u[jat]; + const valT newl = cur_d2u[jat] + lapfac * cur_du[jat]; + const valT dl = old_d2u[jat] + lapfac * old_du[jat] - newl; Uat[jat] += du; - dUat[jat] -= dg; d2Uat[jat] += dl; - cur_dUat += newg; cur_d2Uat -= newl; } + posT cur_dUat; + for (int idim = 0; idim < OHMMS_DIM; ++idim) + { + const valT *restrict new_dX = new_dr.data(idim); + const valT *restrict old_dX = old_dr.data(idim); + const valT *restrict cur_du_pt = cur_du.data(); + const valT *restrict old_du_pt = old_du.data(); + valT *restrict save_g = dUat.data(idim); + valT cur_g = cur_dUat[idim]; + for (int jat = 0; jat < N; jat++) + { + const valT newg = cur_du_pt[jat] * new_dX[jat]; + const valT dg = newg - old_du_pt[jat] * old_dX[jat]; + save_g[jat] -= dg; + cur_g += newg; + } + cur_dUat[idim] = cur_g; + } + LogValue += Uat[iat] - cur_Uat; Uat[iat] = cur_Uat; - dUat[iat] = cur_dUat; + dUat(iat) = cur_dUat; d2Uat[iat] = cur_d2Uat; } @@ -341,17 +348,44 @@ template void TwoBodyJastrowRef::recompute(ParticleSet &P) const DistanceTableData *d_table = P.DistTables[0]; for (int ig = 0; ig < NumGroups; ++ig) { + const int igt = ig * NumGroups; for (int iat = P.first(ig), last = P.last(ig); iat < last; ++iat) { computeU3(P, iat, d_table->Distances[iat], cur_u.data(), cur_du.data(), - cur_d2u.data()); - Uat[iat] = std::accumulate(cur_u.begin(), cur_u.begin() + N, valT()); + cur_d2u.data(), true); + Uat[iat] = std::accumulate(cur_u.begin(), cur_u.begin() + iat, valT()); posT grad; - valT lap; - accumulateGL(cur_du.data(), cur_d2u.data(), d_table->Displacements[iat], - grad, lap); - dUat[iat] = grad; + valT lap(0); + const valT *restrict u = cur_u.data(); + const valT *restrict du = cur_du.data(); + const valT *restrict d2u = cur_d2u.data(); + const RowContainer &displ = d_table->Displacements[iat]; + constexpr valT lapfac = OHMMS_DIM - RealType(1); + for (int jat = 0; jat < iat; ++jat) + lap += d2u[jat] + lapfac * du[jat]; + for (int idim = 0; idim < OHMMS_DIM; ++idim) + { + const valT *restrict dX = displ.data(idim); + valT s = valT(); + for (int jat = 0; jat < iat; ++jat) + s += du[jat] * dX[jat]; + grad[idim] = s; + } + dUat(iat) = grad; d2Uat[iat] = -lap; + // add the contribution from the upper triangle + for (int jat = 0; jat < iat; jat++) + { + Uat[jat] += u[jat]; + d2Uat[jat] -= d2u[jat] + lapfac * du[jat]; + } + for (int idim = 0; idim < OHMMS_DIM; ++idim) + { + valT *restrict save_g = dUat.data(idim); + const valT *restrict dX = displ.data(idim); + for (int jat = 0; jat < iat; jat++) + save_g[jat] -= du[jat] * dX[jat]; + } } } } @@ -359,10 +393,10 @@ template void TwoBodyJastrowRef::recompute(ParticleSet &P) template typename TwoBodyJastrowRef::RealType TwoBodyJastrowRef::evaluateLog(ParticleSet &P, - ParticleSet::ParticleGradient_t &dG, - ParticleSet::ParticleLaplacian_t &dL) + ParticleSet::ParticleGradient_t &G, + ParticleSet::ParticleLaplacian_t &L) { - evaluateGL(P, dG, dL, true); + evaluateGL(P, G, L, true); return LogValue; } @@ -384,5 +418,6 @@ void TwoBodyJastrowRef::evaluateGL(ParticleSet &P, constexpr valT mhalf(-0.5); LogValue = mhalf * LogValue; } -} // miniqmcreference + +} // namespace miniqmcreference #endif diff --git a/src/QMCWaveFunctions/WaveFunctionComponentBase.h b/src/QMCWaveFunctions/WaveFunctionComponentBase.h index e36610825..fc5c1fde2 100644 --- a/src/QMCWaveFunctions/WaveFunctionComponentBase.h +++ b/src/QMCWaveFunctions/WaveFunctionComponentBase.h @@ -86,11 +86,6 @@ struct WaveFunctionComponentBase : public QMCTraits * If true, this object is actively modified during optimization */ bool Optimizable; - /** true, if FermionWF */ - bool IsFermionWF; - /** true, if compute for the ratio instead of buffering */ - bool Need2Compute4PbyP; - /** current update mode */ int UpdateMode; /** current \f$\log\phi \f$ @@ -113,10 +108,7 @@ struct WaveFunctionComponentBase : public QMCTraits WaveFunctionComponentBase() : IsOptimizing(false), Optimizable(true), UpdateMode(ORB_WALKER), LogValue(1.0), PhaseValue(0.0), WaveFunctionComponentName("WaveFunctionComponentBase") - { - /// store instead of computing - Need2Compute4PbyP = false; - } + { } /// default destructor virtual ~WaveFunctionComponentBase() {} diff --git a/src/QMCWaveFunctions/einspline_spo.hpp b/src/QMCWaveFunctions/einspline_spo.hpp index 093598c3b..1f9ba42e9 100644 --- a/src/QMCWaveFunctions/einspline_spo.hpp +++ b/src/QMCWaveFunctions/einspline_spo.hpp @@ -59,9 +59,9 @@ struct einspline_spo compute_engine_type compute_engine; aligned_vector einsplines; - aligned_vector psi; - aligned_vector grad; - aligned_vector hess; + aligned_vector psi; + aligned_vector grad; + aligned_vector hess; /// default constructor einspline_spo() @@ -98,38 +98,22 @@ struct einspline_spo /// destructors ~einspline_spo() { - if (psi.size()) clean(); if (Owner) for (int i = 0; i < nBlocks; ++i) myAllocator.destroy(einsplines[i]); } - void clean() - { - const int n = psi.size(); - for (int i = 0; i < n; ++i) - { - delete psi[i]; - delete grad[i]; - delete hess[i]; - } - } - /// resize the containers void resize() { - if (nBlocks > psi.size()) + psi.resize(nBlocks); + grad.resize(nBlocks); + hess.resize(nBlocks); + for (int i = 0; i < nBlocks; ++i) { - clean(); - psi.resize(nBlocks); - grad.resize(nBlocks); - hess.resize(nBlocks); - for (int i = 0; i < nBlocks; ++i) - { - psi[i] = new vContainer_type(nSplinesPerBlock); - grad[i] = new gContainer_type(nSplinesPerBlock); - hess[i] = new hContainer_type(nSplinesPerBlock); - } + psi[i].resize(nSplinesPerBlock); + grad[i].resize(nSplinesPerBlock); + hess[i].resize(nSplinesPerBlock); } } @@ -171,7 +155,7 @@ struct einspline_spo { auto u = Lattice.toUnit_floor(p); for (int i = 0; i < nBlocks; ++i) - compute_engine.evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i]->data(), psi[i]->size()); + compute_engine.evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); } /** evaluate psi */ @@ -180,7 +164,7 @@ struct einspline_spo auto u = Lattice.toUnit_floor(p); #pragma omp for nowait for (int i = 0; i < nBlocks; ++i) - compute_engine.evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i]->data(), psi[i]->size()); + compute_engine.evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); } /** evaluate psi, grad and lap */ @@ -189,8 +173,8 @@ struct einspline_spo auto u = Lattice.toUnit_floor(p); for (int i = 0; i < nBlocks; ++i) compute_engine.evaluate_vgl(einsplines[i], u[0], u[1], u[2], - psi[i]->data(), grad[i]->data(), hess[i]->data(), - psi[i]->size()); + psi[i].data(), grad[i].data(), hess[i].data(), + nSplinesPerBlock); } /** evaluate psi, grad and lap */ @@ -200,8 +184,8 @@ struct einspline_spo #pragma omp for nowait for (int i = 0; i < nBlocks; ++i) compute_engine.evaluate_vgl(einsplines[i], u[0], u[1], u[2], - psi[i]->data(), grad[i]->data(), hess[i]->data(), - psi[i]->size()); + psi[i].data(), grad[i].data(), hess[i].data(), + nSplinesPerBlock); } /** evaluate psi, grad and hess */ @@ -210,8 +194,8 @@ struct einspline_spo auto u = Lattice.toUnit_floor(p); for (int i = 0; i < nBlocks; ++i) compute_engine.evaluate_vgh(einsplines[i], u[0], u[1], u[2], - psi[i]->data(), grad[i]->data(), hess[i]->data(), - psi[i]->size()); + psi[i].data(), grad[i].data(), hess[i].data(), + nSplinesPerBlock); } /** evaluate psi, grad and hess */ @@ -221,8 +205,8 @@ struct einspline_spo #pragma omp for nowait for (int i = 0; i < nBlocks; ++i) compute_engine.evaluate_vgh(einsplines[i], u[0], u[1], u[2], - psi[i]->data(), grad[i]->data(), hess[i]->data(), - psi[i]->size()); + psi[i].data(), grad[i].data(), hess[i].data(), + nSplinesPerBlock); } void print(std::ostream &os) diff --git a/src/Utilities/SIMD/Mallocator.hpp b/src/Utilities/SIMD/Mallocator.hpp index f039dfadb..fa3ce0a40 100644 --- a/src/Utilities/SIMD/Mallocator.hpp +++ b/src/Utilities/SIMD/Mallocator.hpp @@ -42,8 +42,11 @@ namespace qmcplusplus } void deallocate(T* p, std::size_t) { free(p); } }; + + template + bool operator==(const Mallocator&, const Mallocator&) { return Align1==Align2; } + template + bool operator!=(const Mallocator&, const Mallocator&) { return Align1!=Align2; } } -// template -// using aligned_allocator=Mallocator; #endif diff --git a/src/Utilities/SIMD/allocator.hpp b/src/Utilities/SIMD/allocator.hpp index 07c802187..9b2ce7d76 100644 --- a/src/Utilities/SIMD/allocator.hpp +++ b/src/Utilities/SIMD/allocator.hpp @@ -35,5 +35,9 @@ template inline size_t getAlignedSize(size_t n) return ((n+ND-1)/ND)*ND; } +template inline size_t getAlignment() +{ + return QMC_CLINE/sizeof(T); +} #endif