From 74c67061b0ea86496272c4c4091caaa3d03acf4c Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Thu, 17 Aug 2023 14:27:26 -0400 Subject: [PATCH] Add templated class LCAOrbitalSetT Signed-off-by: Steven Hahn --- src/QMCWaveFunctions/BasisSetBase.h | 5 +- src/QMCWaveFunctions/CMakeLists.txt | 2 +- src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp | 966 +++++++++++++++++++ src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h | 336 +++++++ 4 files changed, 1305 insertions(+), 4 deletions(-) create mode 100644 src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp create mode 100644 src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h diff --git a/src/QMCWaveFunctions/BasisSetBase.h b/src/QMCWaveFunctions/BasisSetBase.h index 7be77b13cb..8837e18832 100644 --- a/src/QMCWaveFunctions/BasisSetBase.h +++ b/src/QMCWaveFunctions/BasisSetBase.h @@ -134,9 +134,8 @@ struct SoaBasisSetBase using vgl_type = VectorSoaContainer; using vgh_type = VectorSoaContainer; using vghgh_type = VectorSoaContainer; - using ValueType = QMCTraits::ValueType; - using OffloadMWVGLArray = Array>; // [VGL, walker, Orbs] - using OffloadMWVArray = Array>; // [walker, Orbs] + using OffloadMWVGLArray = Array>; // [VGL, walker, Orbs] + using OffloadMWVArray = Array>; // [walker, Orbs] ///size of the basis set int BasisSetSize; diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt index 1bbfc0520f..2db2ed4f13 100644 --- a/src/QMCWaveFunctions/CMakeLists.txt +++ b/src/QMCWaveFunctions/CMakeLists.txt @@ -71,7 +71,7 @@ if(OHMMS_DIM MATCHES 3) set(JASTROW_SRCS ${JASTROW_SRCS} Jastrow/eeI_JastrowBuilder.cpp Jastrow/CountingJastrowBuilder.cpp) - set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSet.cpp LCAO/LCAOrbitalBuilder.cpp LCAO/MultiQuinticSpline1D.cpp + set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSet.cpp LCAO/LCAOrbitalSetT.cpp LCAO/LCAOrbitalBuilder.cpp LCAO/MultiQuinticSpline1D.cpp LCAO/AOBasisBuilder.cpp LCAO/SoaLocalizedBasisSet.cpp) if(QMC_COMPLEX) set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOSpinorBuilder.cpp) diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp new file mode 100644 index 0000000000..dba20478b7 --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp @@ -0,0 +1,966 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + + +#include "LCAOrbitalSetT.h" +#include "Numerics/MatrixOperators.h" +#include "CPU/BLAS.hpp" +#include + +namespace qmcplusplus +{ + +template +struct LCAOrbitalSetT::LCAOMultiWalkerMem : public Resource +{ + LCAOMultiWalkerMem() : Resource("LCAOrbitalSetT") {} + LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem() {} + + std::unique_ptr makeClone() const override { return std::make_unique(*this); } + + OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO] + OffloadMWVGLArray basis_mw; // [5][NW][NumAO] + OffloadMWVArray phi_v; // [NW][NumMO] + OffloadMWVArray basis_v_mw; // [NW][NumMO] +}; + +template +LCAOrbitalSetT::LCAOrbitalSetT(const std::string& my_name, std::unique_ptr&& bs) + : SPOSetT(my_name), + BasisSetSize(bs ? bs->getBasisSetSize() : 0), + Identity(true), + basis_timer_(createGlobalTimer("LCAOrbitalSetT::Basis", timer_level_fine)), + mo_timer_(createGlobalTimer("LCAOrbitalSetT::MO", timer_level_fine)) +{ + if (!bs) + throw std::runtime_error("LCAOrbitalSetT cannot take nullptr as its basis set!"); + myBasisSet = std::move(bs); + Temp.resize(BasisSetSize); + Temph.resize(BasisSetSize); + Tempgh.resize(BasisSetSize); + this->OrbitalSetSize = BasisSetSize; + LCAOrbitalSetT::checkObject(); +} + +template +LCAOrbitalSetT::LCAOrbitalSetT(const LCAOrbitalSetT& in) + : SPOSetT(in), + myBasisSet(in.myBasisSet->makeClone()), + C(in.C), + BasisSetSize(in.BasisSetSize), + C_copy(in.C_copy), + Identity(in.Identity), + basis_timer_(in.basis_timer_), + mo_timer_(in.mo_timer_) +{ + Temp.resize(BasisSetSize); + Temph.resize(BasisSetSize); + Tempgh.resize(BasisSetSize); + if (!in.Identity) + { + Tempv.resize(this->OrbitalSetSize); + Temphv.resize(this->OrbitalSetSize); + Tempghv.resize(this->OrbitalSetSize); + } + LCAOrbitalSetT::checkObject(); +} + +template +void LCAOrbitalSetT::setOrbitalSetSize(int norbs) +{ + if (C) + throw std::runtime_error("LCAOrbitalSetT::setOrbitalSetSize cannot reset existing MO coefficients"); + + Identity = false; + this->OrbitalSetSize = norbs; + C = std::make_shared(this->OrbitalSetSize, BasisSetSize); + Tempv.resize(this->OrbitalSetSize); + Temphv.resize(this->OrbitalSetSize); + Tempghv.resize(this->OrbitalSetSize); + LCAOrbitalSetT::checkObject(); +} + +template +void LCAOrbitalSetT::checkObject() const +{ + if (Identity) + { + if (this->OrbitalSetSize != BasisSetSize) + throw std::runtime_error( + "LCAOrbitalSetT::checkObject OrbitalSetSize and BasisSetSize must be equal if Identity = true!"); + if (C) + throw std::runtime_error("LCAOrbitalSetT::checkObject C should be nullptr if Identity = true!"); + } + else + { + if (!C) + throw std::runtime_error("LCAOrbitalSetT::checkObject C should not be nullptr if Identity = false!"); + if (this->OrbitalSetSize != C->rows()) + throw std::runtime_error("LCAOrbitalSetT::checkObject C rows doesn't match OrbitalSetSize."); + if (BasisSetSize != C->cols()) + throw std::runtime_error("LCAOrbitalSetT::checkObject C columns doesn't match BasisSetSize."); + } +} + +template +void LCAOrbitalSetT::createResource(ResourceCollection& collection) const +{ + auto resource_index = collection.addResource(std::make_unique()); +} + +template +void LCAOrbitalSetT::acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + spo_leader.mw_mem_handle_ = collection.lendResource(); +} + +template +void LCAOrbitalSetT::releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + collection.takebackResource(spo_leader.mw_mem_handle_); +} + +template +std::unique_ptr> LCAOrbitalSetT::makeClone() const { return std::make_unique>(*this); } + +template +void LCAOrbitalSetT::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) +{ + if (Identity) + { //PAY ATTENTION TO COMPLEX + myBasisSet->evaluateV(P, iat, psi.data()); + } + else + { + Vector vTemp(Temp.data(0), BasisSetSize); + this->myBasisSet->evaluateV(P, iat, vTemp.data()); + assert(psi.size() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + MatrixOperators::product(C_partial_view, vTemp, psi); + } +} + +/** Find a better place for other user classes, Matrix should be padded as well */ +template +static void Product_ABt(const VectorSoaContainer& A, const Matrix& B, VectorSoaContainer& C) +{ + constexpr char transa = 't'; + constexpr char transb = 'n'; + constexpr T zone(1); + constexpr T zero(0); + BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(), A.data(), A.capacity(), zero, C.data(), + C.capacity()); +} + +template +void LCAOrbitalSetT::evaluate_vgl_impl(const vgl_type& temp, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) const +{ + const size_t output_size = psi.size(); + std::copy_n(temp.data(0), output_size, psi.data()); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + for (size_t j = 0; j < output_size; j++) + { + dpsi[j][0] = gx[j]; + dpsi[j][1] = gy[j]; + dpsi[j][2] = gz[j]; + } + std::copy_n(temp.data(4), output_size, d2psi.data()); +} + +template +void LCAOrbitalSetT::evaluate_vgh_impl(const vgh_type& temp, + ValueVector& psi, + GradVector& dpsi, + HessVector& d2psi) const +{ + const size_t output_size = psi.size(); + std::copy_n(temp.data(0), output_size, psi.data()); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + + for (size_t j = 0; j < output_size; j++) + { + dpsi[j][0] = gx[j]; + dpsi[j][1] = gy[j]; + dpsi[j][2] = gz[j]; + + d2psi[j](0, 0) = hxx[j]; + d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; + d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; + d2psi[j](1, 1) = hyy[j]; + d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; + d2psi[j](2, 2) = hzz[j]; + } +} + +template +void LCAOrbitalSetT::evaluate_vghgh_impl(const vghgh_type& temp, + int i, + ValueMatrix& psi, + GradMatrix& dpsi, + HessMatrix& d2psi, + GGGMatrix& dghpsi) const +{ + const size_t output_size = psi.cols(); + std::copy_n(temp.data(0), output_size, psi[i]); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + const T* restrict gh_xxx = temp.data(10); + const T* restrict gh_xxy = temp.data(11); + const T* restrict gh_xxz = temp.data(12); + const T* restrict gh_xyy = temp.data(13); + const T* restrict gh_xyz = temp.data(14); + const T* restrict gh_xzz = temp.data(15); + const T* restrict gh_yyy = temp.data(16); + const T* restrict gh_yyz = temp.data(17); + const T* restrict gh_yzz = temp.data(18); + const T* restrict gh_zzz = temp.data(19); + + for (size_t j = 0; j < output_size; j++) + { + dpsi[i][j][0] = gx[j]; + dpsi[i][j][1] = gy[j]; + dpsi[i][j][2] = gz[j]; + + d2psi[i][j](0, 0) = hxx[j]; + d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; + d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; + d2psi[i][j](1, 1) = hyy[j]; + d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; + d2psi[i][j](2, 2) = hzz[j]; + + dghpsi[i][j][0](0, 0) = gh_xxx[j]; //x|xx + dghpsi[i][j][0](0, 1) = gh_xxy[j]; //x|xy + dghpsi[i][j][0](0, 2) = gh_xxz[j]; //x|xz + dghpsi[i][j][0](1, 0) = gh_xxy[j]; //x|yx = xxy + dghpsi[i][j][0](1, 1) = gh_xyy[j]; //x|yy + dghpsi[i][j][0](1, 2) = gh_xyz[j]; //x|yz + dghpsi[i][j][0](2, 0) = gh_xxz[j]; //x|zx = xxz + dghpsi[i][j][0](2, 1) = gh_xyz[j]; //x|zy = xyz + dghpsi[i][j][0](2, 2) = gh_xzz[j]; //x|zz + + dghpsi[i][j][1](0, 0) = gh_xxy[j]; //y|xx = xxy + dghpsi[i][j][1](0, 1) = gh_xyy[j]; //y|xy = xyy + dghpsi[i][j][1](0, 2) = gh_xyz[j]; //y|xz = xyz + dghpsi[i][j][1](1, 0) = gh_xyy[j]; //y|yx = xyy + dghpsi[i][j][1](1, 1) = gh_yyy[j]; //y|yy + dghpsi[i][j][1](1, 2) = gh_yyz[j]; //y|yz + dghpsi[i][j][1](2, 0) = gh_xyz[j]; //y|zx = xyz + dghpsi[i][j][1](2, 1) = gh_yyz[j]; //y|zy = yyz + dghpsi[i][j][1](2, 2) = gh_yzz[j]; //y|zz + + dghpsi[i][j][2](0, 0) = gh_xxz[j]; //z|xx = xxz + dghpsi[i][j][2](0, 1) = gh_xyz[j]; //z|xy = xyz + dghpsi[i][j][2](0, 2) = gh_xzz[j]; //z|xz = xzz + dghpsi[i][j][2](1, 0) = gh_xyz[j]; //z|yx = xyz + dghpsi[i][j][2](1, 1) = gh_yyz[j]; //z|yy = yyz + dghpsi[i][j][2](1, 2) = gh_yzz[j]; //z|yz = yzz + dghpsi[i][j][2](2, 0) = gh_xzz[j]; //z|zx = xzz + dghpsi[i][j][2](2, 1) = gh_yzz[j]; //z|zy = yzz + dghpsi[i][j][2](2, 2) = gh_zzz[j]; //z|zz + } +} + +template +void LCAOrbitalSetT::evaluate_vghgh_impl(const vghgh_type& temp, + ValueVector& psi, + GradVector& dpsi, + HessVector& d2psi, + GGGVector& dghpsi) const +{ + const size_t output_size = psi.size(); + std::copy_n(temp.data(0), output_size, psi.data()); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + const T* restrict gh_xxx = temp.data(10); + const T* restrict gh_xxy = temp.data(11); + const T* restrict gh_xxz = temp.data(12); + const T* restrict gh_xyy = temp.data(13); + const T* restrict gh_xyz = temp.data(14); + const T* restrict gh_xzz = temp.data(15); + const T* restrict gh_yyy = temp.data(16); + const T* restrict gh_yyz = temp.data(17); + const T* restrict gh_yzz = temp.data(18); + const T* restrict gh_zzz = temp.data(19); + + for (size_t j = 0; j < output_size; j++) + { + dpsi[j][0] = gx[j]; + dpsi[j][1] = gy[j]; + dpsi[j][2] = gz[j]; + + d2psi[j](0, 0) = hxx[j]; + d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; + d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; + d2psi[j](1, 1) = hyy[j]; + d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; + d2psi[j](2, 2) = hzz[j]; + + dghpsi[j][0](0, 0) = gh_xxx[j]; //x|xx + dghpsi[j][0](0, 1) = gh_xxy[j]; //x|xy + dghpsi[j][0](0, 2) = gh_xxz[j]; //x|xz + dghpsi[j][0](1, 0) = gh_xxy[j]; //x|yx = xxy + dghpsi[j][0](1, 1) = gh_xyy[j]; //x|yy + dghpsi[j][0](1, 2) = gh_xyz[j]; //x|yz + dghpsi[j][0](2, 0) = gh_xxz[j]; //x|zx = xxz + dghpsi[j][0](2, 1) = gh_xyz[j]; //x|zy = xyz + dghpsi[j][0](2, 2) = gh_xzz[j]; //x|zz + + dghpsi[j][1](0, 0) = gh_xxy[j]; //y|xx = xxy + dghpsi[j][1](0, 1) = gh_xyy[j]; //y|xy = xyy + dghpsi[j][1](0, 2) = gh_xyz[j]; //y|xz = xyz + dghpsi[j][1](1, 0) = gh_xyy[j]; //y|yx = xyy + dghpsi[j][1](1, 1) = gh_yyy[j]; //y|yy + dghpsi[j][1](1, 2) = gh_yyz[j]; //y|yz + dghpsi[j][1](2, 0) = gh_xyz[j]; //y|zx = xyz + dghpsi[j][1](2, 1) = gh_xyy[j]; //y|xy = xyy + dghpsi[j][1](2, 2) = gh_yzz[j]; //y|zz + + dghpsi[j][2](0, 0) = gh_xzz[j]; //z|xx = xzz + dghpsi[j][2](0, 1) = gh_xyz[j]; //z|xy = xyz + dghpsi[j][2](0, 2) = gh_xzz[j]; //z|xz = xzz + dghpsi[j][2](1, 0) = gh_xyz[j]; //z|yx = xyz + dghpsi[j][2](1, 1) = gh_yyz[j]; //z|yy = yyz + dghpsi[j][2](1, 2) = gh_yzz[j]; //z|yz = yzz + dghpsi[j][2](2, 0) = gh_xzz[j]; //z|zx = xzz + dghpsi[j][2](2, 1) = gh_yzz[j]; //z|zy = yzz + dghpsi[j][2](2, 2) = gh_zzz[j]; //z|zz + } +} + +template +void LCAOrbitalSetT::evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dpsi) const +{ + const size_t output_size = dpsi.size(); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + + for (size_t j = 0; j < output_size; j++) + { + //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that + // for an atomic center, the ion gradient is the negative of the elecron gradient. + // Hence minus signs for each of these. + dpsi[j][0] = -gx[j]; + dpsi[j][1] = -gy[j]; + dpsi[j][2] = -gz[j]; + } +} + +template +void LCAOrbitalSetT::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +{ + //TAKE CARE OF IDENTITY + { + ScopedTimer local(basis_timer_); + myBasisSet->evaluateVGL(P, iat, Temp); + } + + if (Identity) + evaluate_vgl_impl(Temp, psi, dpsi, d2psi); + else + { + assert(psi.size() <= this->OrbitalSetSize); + { + ScopedTimer local(mo_timer_); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + Product_ABt(Temp, C_partial_view, Tempv); + } + evaluate_vgl_impl(Tempv, psi, dpsi, d2psi); + } +} + +template +void LCAOrbitalSetT::mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& phi_vgl_v = spo_leader.mw_mem_handle_.getResource().phi_vgl_v; + + phi_vgl_v.resize(QMCTraits::DIM_VGL, spo_list.size(), this->OrbitalSetSize); + mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); + + const size_t nw = phi_vgl_v.size(1); + + //TODO: make this cleaner? + for (int iw = 0; iw < nw; iw++) + { + const size_t output_size = psi_v_list[iw].get().size(); + std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, psi_v_list[iw].get().data()); + std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, d2psi_v_list[iw].get().data()); + // grads are [dim, walker, orb] in phi_vgl_v + // [walker][orb, dim] in dpsi_v_list + for (size_t idim = 0; idim < QMCTraits::DIM; idim++) + BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, &dpsi_v_list[iw].get().data()[0][idim], QMCTraits::DIM); + } +} + +template +void LCAOrbitalSetT::mw_evaluateVGLImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader& P_list, + int iat, + OffloadMWVGLArray& phi_vgl_v) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& basis_mw = spo_leader.mw_mem_handle_.getResource().basis_mw; + basis_mw.resize(QMCTraits::DIM_VGL, spo_list.size(), BasisSetSize); + + { + ScopedTimer local(basis_timer_); + myBasisSet->mw_evaluateVGL(P_list, iat, basis_mw); + } + + if (Identity) + { + // output_size can be smaller than BasisSetSize + const size_t output_size = phi_vgl_v.size(2); + const size_t nw = phi_vgl_v.size(1); + + for (size_t idim = 0; idim < QMCTraits::DIM_VGL; idim++) + for (int iw = 0; iw < nw; iw++) + std::copy_n(basis_mw.data_at(idim, iw, 0), output_size, phi_vgl_v.data_at(idim, iw, 0)); + } + else + { + const size_t requested_orb_size = phi_vgl_v.size(2); + assert(requested_orb_size <= this->OrbitalSetSize); + { + ScopedTimer local(mo_timer_); + ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); + // TODO: make class for general blas interface in Platforms + // have instance of that class as member of LCAOrbitalSetT, call gemm through that + BLAS::gemm('T', 'N', + requested_orb_size, // MOs + spo_list.size() * QMCTraits::DIM_VGL, // walkers * DIM_VGL + BasisSetSize, // AOs + 1, C_partial_view.data(), BasisSetSize, basis_mw.data(), BasisSetSize, 0, phi_vgl_v.data(), + requested_orb_size); + } + } +} + +template +void LCAOrbitalSetT::mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader& P_list, + int iat, + const RefVector& psi_v_list) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& phi_v = spo_leader.mw_mem_handle_.getResource().phi_v; + phi_v.resize(spo_list.size(), this->OrbitalSetSize); + mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v); + + const size_t output_size = phi_v.size(1); + const size_t nw = phi_v.size(0); + + for (int iw = 0; iw < nw; iw++) + std::copy_n(phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data()); +} + +template +void LCAOrbitalSetT::mw_evaluateValueImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader& P_list, + int iat, + OffloadMWVArray& phi_v) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + const size_t nw = spo_list.size(); + auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw; + basis_v_mw.resize(nw, BasisSetSize); + + myBasisSet->mw_evaluateValue(P_list, iat, basis_v_mw); + + if (Identity) + { + std::copy_n(basis_v_mw.data_at(0, 0), this->OrbitalSetSize * nw, phi_v.data_at(0, 0)); + } + else + { + const size_t requested_orb_size = phi_v.size(1); + assert(requested_orb_size <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); + BLAS::gemm('T', 'N', + requested_orb_size, // MOs + spo_list.size(), // walkers + BasisSetSize, // AOs + 1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(), BasisSetSize, 0, phi_v.data(), + requested_orb_size); + } +} + +template +void LCAOrbitalSetT::mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const +{ + const size_t nw = spo_list.size(); + for (size_t iw = 0; iw < nw; iw++) + { + for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++) + { + spo_list[iw].evaluateValue(vp_list[iw], iat, psi_list[iw]); + ratios_list[iw][iat] = simd::dot(psi_list[iw].get().data(), invRow_ptr_list[iw], psi_list[iw].get().size()); + } + } +} + +template +void LCAOrbitalSetT::evaluateDetRatios(const VirtualParticleSet& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) +{ + Vector vTemp(Temp.data(0), BasisSetSize); + Vector invTemp(Temp.data(1), BasisSetSize); + + { + ScopedTimer local(mo_timer_); + // when only a subset of orbitals is used, extract limited rows of C. + Matrix C_occupied(C->data(), psiinv.size(), BasisSetSize); + MatrixOperators::product_Atx(C_occupied, psiinv, invTemp); + } + + for (size_t j = 0; j < VP.getTotalNum(); j++) + { + { + ScopedTimer local(basis_timer_); + myBasisSet->evaluateV(VP, j, vTemp.data()); + } + ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize); + } +} + +template +void LCAOrbitalSetT::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const +{ + assert(this == &spo_list.getLeader()); + assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL); + assert(phi_vgl_v.size(1) == spo_list.size()); + + mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); + // Device data of phi_vgl_v must be up-to-date upon return + phi_vgl_v.updateTo(); + + const size_t nw = spo_list.size(); + const size_t norb_requested = phi_vgl_v.size(2); + for (int iw = 0; iw < nw; iw++) + { + ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested); + GradType dphi; + for (size_t idim = 0; idim < QMCTraits::DIM; idim++) + dphi[idim] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / ratios[iw]; + grads[iw] = dphi; + } +} + +template +void LCAOrbitalSetT::evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& dhpsi) +{ + //TAKE CARE OF IDENTITY + myBasisSet->evaluateVGH(P, iat, Temph); + if (Identity) + evaluate_vgh_impl(Temph, psi, dpsi, dhpsi); + else + { + assert(psi.size() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + Product_ABt(Temph, C_partial_view, Temphv); + evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi); + } +} + +template +void LCAOrbitalSetT::evaluateVGHGH(const ParticleSet& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& dhpsi, + GGGVector& dghpsi) +{ + // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not implemented\n"); + + //TAKE CARE OF IDENTITY + myBasisSet->evaluateVGHGH(P, iat, Tempgh); + if (Identity) + evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi); + else + { + assert(psi.size() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + Product_ABt(Tempgh, C_partial_view, Tempghv); + evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi); + } +} + +/* implement using gemm algorithm */ +template +inline void LCAOrbitalSetT::evaluate_vgl_impl(const vgl_type& temp, + int i, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) const +{ + const size_t output_size = logdet.cols(); + std::copy_n(temp.data(0), output_size, logdet[i]); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + for (size_t j = 0; j < output_size; j++) + { + dlogdet[i][j][0] = gx[j]; + dlogdet[i][j][1] = gy[j]; + dlogdet[i][j][2] = gz[j]; + } + std::copy_n(temp.data(4), output_size, d2logdet[i]); +} +template +void LCAOrbitalSetT::evaluate_vgh_impl(const vgh_type& temp, + int i, + ValueMatrix& psi, + GradMatrix& dpsi, + HessMatrix& d2psi) const +{ + const size_t output_size = psi.cols(); + std::copy_n(temp.data(0), output_size, psi[i]); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + + for (size_t j = 0; j < output_size; j++) + { + dpsi[i][j][0] = gx[j]; + dpsi[i][j][1] = gy[j]; + dpsi[i][j][2] = gz[j]; + + d2psi[i][j](0, 0) = hxx[j]; + d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; + d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; + d2psi[i][j](1, 1) = hyy[j]; + d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; + d2psi[i][j](2, 2) = hzz[j]; + } +} + +template +void LCAOrbitalSetT::evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dpsi) const +{ + const size_t output_size = dpsi.cols(); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + + for (size_t j = 0; j < output_size; j++) + { + //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that + // for an atomic center, the ion gradient is the negative of the elecron gradient. + // Hence minus signs for each of these. + dpsi[i][j][0] = -gx[j]; + dpsi[i][j][1] = -gy[j]; + dpsi[i][j][2] = -gz[j]; + } +} + +template +void LCAOrbitalSetT::evaluate_ionderiv_vgl_impl(const vghgh_type& temp, + int i, + GradMatrix& dpsi, + HessMatrix& dgpsi, + GradMatrix& dlpsi) const +{ + const size_t output_size = dpsi.cols(); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + const T* restrict gh_xxx = temp.data(10); + const T* restrict gh_xxy = temp.data(11); + const T* restrict gh_xxz = temp.data(12); + const T* restrict gh_xyy = temp.data(13); + const T* restrict gh_xzz = temp.data(15); + const T* restrict gh_yyy = temp.data(16); + const T* restrict gh_yyz = temp.data(17); + const T* restrict gh_yzz = temp.data(18); + const T* restrict gh_zzz = temp.data(19); + + for (size_t j = 0; j < output_size; j++) + { + //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that + // for an atomic center, the ion gradient is the negative of the elecron gradient. + // Hence minus signs for each of these. + dpsi[i][j][0] = -gx[j]; + dpsi[i][j][1] = -gy[j]; + dpsi[i][j][2] = -gz[j]; + + dgpsi[i][j](0, 0) = -hxx[j]; + dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j]; + dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j]; + dgpsi[i][j](1, 1) = -hyy[j]; + dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j]; + dgpsi[i][j](2, 2) = -hzz[j]; + + //Since this returns the ion gradient of the laplacian, we have to trace the grad hessian vector. + dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]); + dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]); + dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]); + } +} + +template +void LCAOrbitalSetT::evaluate_notranspose(const ParticleSet& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) +{ + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGL(P, iat, Temp); + evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet); + } + } + else + { + assert(logdet.cols() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGL(P, iat, Temp); + Product_ABt(Temp, C_partial_view, Tempv); + evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet); + } + } +} + +template +void LCAOrbitalSetT::evaluate_notranspose(const ParticleSet& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) +{ + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGH(P, iat, Temph); + evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet); + } + } + else + { + assert(logdet.cols() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGH(P, iat, Temph); + Product_ABt(Temph, C_partial_view, Temphv); + evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet); + } + } +} + +template +void LCAOrbitalSetT::evaluate_notranspose(const ParticleSet& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) +{ + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGHGH(P, iat, Tempgh); + evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); + } + } + else + { + assert(logdet.cols() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGHGH(P, iat, this->Tempgh); + Product_ABt(this->Tempgh, C_partial_view, this->Tempghv); + evaluate_vghgh_impl(this->Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); + } + } +} + +template +void LCAOrbitalSetT::evaluateGradSource(const ParticleSet& P, + int first, + int last, + const ParticleSet& source, + int iat_src, + GradMatrix& gradphi) +{ + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp); + evaluate_ionderiv_v_impl(Temp, i, gradphi); + } + } + else + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp); + Product_ABt(this->Temp, *C, this->Tempv); + evaluate_ionderiv_v_impl(this->Tempv, i, gradphi); + } + } +} + +template +void LCAOrbitalSetT::evaluateGradSource(const ParticleSet& P, + int first, + int last, + const ParticleSet& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi) +{ + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh); + evaluate_ionderiv_vgl_impl(this->Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi); + } + } + else + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh); + Product_ABt(this->Tempgh, *C, this->Tempghv); + evaluate_ionderiv_vgl_impl(this->Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi); + } + } +} + +template +void LCAOrbitalSetT::evaluateGradSourceRow(const ParticleSet& P, + int iel, + const ParticleSet& source, + int iat_src, + GradVector& gradphi) +{ + if (Identity) + { + myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp); + evaluate_ionderiv_v_row_impl(this->Temp, gradphi); + } + else + { + myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp); + Product_ABt(Temp, *C, this->Tempv); + evaluate_ionderiv_v_row_impl(this->Tempv, gradphi); + } +} + +template +void LCAOrbitalSetT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) +{ + if (!use_stored_copy) + *C_copy = *C; + //gemm is out-of-place + BLAS::gemm('N', 'T', BasisSetSize, this->OrbitalSetSize, this->OrbitalSetSize, RealType(1.0), C_copy->data(), BasisSetSize, + rot_mat.data(), this->OrbitalSetSize, RealType(0.0), C->data(), BasisSetSize); + + /* debugging code + app_log() << "PRINTING MO COEFFICIENTS AFTER ROTATION " << objectName << std::endl; + for (int j = 0; j < OrbitalSetSize; j++) + for (int i = 0; i < BasisSetSize; i++) + { + app_log() << " " << std::right << std::fixed << std::setprecision(16) << std::setw(23) << std::scientific + << *(C->data() + j * BasisSetSize + i); + + if ((j * BasisSetSize + i + 1) % 4 == 0) + app_log() << std::endl; + } + */ +} + +// Class concrete types from ValueType +template class LCAOrbitalSetT; +template class LCAOrbitalSetT; +template class LCAOrbitalSetT>; +template class LCAOrbitalSetT>; + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h new file mode 100644 index 0000000000..d8070b58e5 --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h @@ -0,0 +1,336 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + + +#ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_TEMP_H +#define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_TEMP_H + +#include +#include "QMCWaveFunctions/SPOSetT.h" +#include "QMCWaveFunctions/BasisSetBase.h" + +#include "Numerics/MatrixOperators.h" +#include "Numerics/DeterminantOperators.h" + +namespace qmcplusplus +{ +/** class to handle linear combinations of basis orbitals used to evaluate the Dirac determinants. + * + * SoA verson of LCOrtbitalSet + * Localized basis set is always real + */ +template +class LCAOrbitalSetT : public SPOSetT +{ +public: + using basis_type = SoaBasisSetBase; + using vgl_type = typename basis_type::vgl_type; + using vgh_type = typename basis_type::vgh_type; + using vghgh_type = typename basis_type::vghgh_type; + + using IndexType = typename SPOSetT::IndexType; + using RealType = typename SPOSetT::RealType; + using ComplexType = typename SPOSetT::ComplexType; + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradVector = typename SPOSetT::GradVector; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessMatrix = typename SPOSetT::HessMatrix; + using PosType = typename SPOSetT::PosType; + using HessVector = typename SPOSetT::HessVector; + using GGGMatrix = typename SPOSetT::GGGMatrix; + using GGGVector = typename SPOSetT::GGGVector; + using GradType = typename SPOSetT::GradType; + using OffloadMWVGLArray = Array>; // [VGL, walker, Orbs] + using OffloadMWVArray = Array>; // [walker, Orbs] + + ///pointer to the basis set + std::unique_ptr myBasisSet; + /// pointer to matrix containing the coefficients + std::shared_ptr C; + + /** constructor + * @param bs pointer to the BasisSet + */ + LCAOrbitalSetT(const std::string& my_name, std::unique_ptr&& bs); + + LCAOrbitalSetT(const LCAOrbitalSetT& in); + + std::string getClassName() const final { return "LCAOrbitalSetT"; } + + bool isRotationSupported() const final { return true; } + + bool hasIonDerivs() const final { return true; } + + std::unique_ptr> makeClone() const final; + + void storeParamsBeforeRotation() final { C_copy = std::make_shared(*C); } + + void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) final; + + /** set the OrbitalSetSize and Identity=false and initialize internal storages + */ + void setOrbitalSetSize(int norbs) final; + + /** return the size of the basis set + */ + int getBasisSetSize() const { return (myBasisSet == nullptr) ? 0 : myBasisSet->getBasisSetSize(); } + + bool isIdentity() const { return Identity; }; + + /** check consistency between Identity and C + * + */ + void checkObject() const final; + + void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final; + + void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; + + void mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader& P_list, + int iat, + const RefVector& psi_v_list) const final; + + void mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const final; + + void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const final; + + void evaluateDetRatios(const VirtualParticleSet& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) final; + + void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const final; + + void evaluateVGH(const ParticleSet& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) final; + + void evaluateVGHGH(const ParticleSet& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) final; + + void evaluate_notranspose(const ParticleSet& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) final; + + void evaluate_notranspose(const ParticleSet& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) final; + + void evaluate_notranspose(const ParticleSet& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) final; + + //NOTE: The data types get complicated here, so here's an overview of the + // data types associated with ionic derivatives, and how to get their data. + // + //NOTE: These data structures hold the data for one particular ion, and so the ID is implicit. + // It's up to the user to keep track of which ion these derivatives refer to. + // + // 1.) GradMatrix grad_phi: Holds the ionic derivatives of each SPO for each electron. + // Example: grad_phi[iel][iorb][idim]. iel -- electron index. + // iorb -- orbital index. + // idim -- cartesian index of ionic derivative. + // X=0, Y=1, Z=2. + // + // 2.) HessMatrix grad_grad_phi: Holds the ionic derivatives of the electron gradient components + // for each SPO and each electron. + // Example: grad_grad_phi[iel][iorb](idim,edim) iel -- electron index. + // iorb -- orbital index. + // idim -- ionic derivative's cartesian index. + // X=0, Y=1, Z=2 + // edim -- electron derivative's cartesian index. + // x=0, y=1, z=2. + // + // 3.) GradMatrix grad_lapl_phi: Holds the ionic derivatives of the electron laplacian for each SPO and each electron. + // Example: grad_lapl_phi[iel][iorb][idim]. iel -- electron index. + // iorb -- orbital index. + // idim -- cartesian index of ionic derivative. + // X=0, Y=1, Z=2. + + /** + * \brief Calculate ion derivatives of SPO's. + * + * @param P Electron particle set. + * @param first index of first electron + * @@param last index of last electron + * @param source Ion particle set. + * @param iat_src Index of ion. + * @param gradphi Container storing ion gradients for all particles and all orbitals. + */ + void evaluateGradSource(const ParticleSet& P, + int first, + int last, + const ParticleSet& source, + int iat_src, + GradMatrix& grad_phi) final; + + /** + * \brief Calculate ion derivatives of SPO's, their gradients, and their laplacians. + * + * @param P Electron particle set. + * @param first index of first electron. + * @@param last index of last electron + * @param source Ion particle set. + * @param iat_src Index of ion. + * @param grad_phi Container storing ion gradients for all particles and all orbitals. + * @param grad_grad_phi Container storing ion gradients of electron gradients for all particles and all orbitals. + * @param grad_lapl_phi Container storing ion gradients of SPO laplacians for all particles and all orbitals. + */ + void evaluateGradSource(const ParticleSet& P, + int first, + int last, + const ParticleSet& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi) final; + + void evaluateGradSourceRow(const ParticleSet& P, + int iel, + const ParticleSet& source, + int iat_src, + GradVector& grad_phi) final; + + void createResource(ResourceCollection& collection) const final; + void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const final; + void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const final; + +protected: + ///number of Single-particle orbitals + const IndexType BasisSetSize; + /// a copy of the original C before orbital rotation is applied; + std::shared_ptr C_copy; + + ///true if C is an identity matrix + bool Identity; + ///Temp(BasisSetSize) : Row index=V,Gx,Gy,Gz,L + vgl_type Temp; + ///Tempv(OrbitalSetSize) Tempv=C*Temp + vgl_type Tempv; + + ///These are temporary VectorSoAContainers to hold value, gradient, and hessian for + ///all basis or SPO functions evaluated at a given point. + ///Nbasis x [1(value)+3(gradient)+6(hessian)] + vgh_type Temph; + ///Norbitals x [1(value)+3(gradient)+6(hessian)] + vgh_type Temphv; + + ///These are temporary VectorSoAContainers to hold value, gradient, hessian, and + /// gradient hessian for all basis or SPO functions evaluated at a given point. + ///Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)] + vghgh_type Tempgh; + ///Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)] + vghgh_type Tempghv; + +private: + ///helper functions to handle Identity + void evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) const; + + void evaluate_vgl_impl(const vgl_type& temp, + int i, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) const; + ///These two functions unpack the data in vgh_type temp object into wavefunction friendly data structures. + + + ///This unpacks temp into vectors psi, dpsi, and d2psi. + void evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi, GradVector& dpsi, HessVector& d2psi) const; + + ///Unpacks temp into the ith row (or electron index) of logdet, dlogdet, dhlogdet. + void evaluate_vgh_impl(const vgh_type& temp, + int i, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& dhlogdet) const; + ///Unpacks data in vghgh_type temp object into wavefunction friendly data structures for value, gradient, hessian + ///and gradient hessian. + void evaluate_vghgh_impl(const vghgh_type& temp, + ValueVector& psi, + GradVector& dpsi, + HessVector& d2psi, + GGGVector& dghpsi) const; + + void evaluate_vghgh_impl(const vghgh_type& temp, + int i, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& dhlogdet, + GGGMatrix& dghlogdet) const; + + + ///Unpacks data in vgl object and calculates/places ionic gradient result into dlogdet. + void evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dlogdet) const; + + ///Unpacks data in vgl object and calculates/places ionic gradient of value, + /// electron gradient, and electron laplacian result into dlogdet, dglogdet, and dllogdet respectively. + void evaluate_ionderiv_vgl_impl(const vghgh_type& temp, + int i, + GradMatrix& dlogdet, + HessMatrix& dglogdet, + GradMatrix& dllogdet) const; + + ///Unpacks data in vgl object and calculates/places ionic gradient of a single row (phi_j(r)) into dlogdet. + void evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dlogdet) const; + + void mw_evaluateVGLImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader& P_list, + int iat, + OffloadMWVGLArray& phi_vgl_v) const; + + /// packed walker GEMM implementation + void mw_evaluateValueImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader& P_list, + int iat, + OffloadMWVArray& phi_v) const; + + struct LCAOMultiWalkerMem; + ResourceHandle mw_mem_handle_; + /// timer for basis set + NewTimer& basis_timer_; + /// timer for MO + NewTimer& mo_timer_; +}; +} // namespace qmcplusplus +#endif