From 923ba27237bec4678cbb965ce87b8b37c30c3cd3 Mon Sep 17 00:00:00 2001 From: walshmm Date: Fri, 1 Sep 2023 18:41:52 +0000 Subject: [PATCH] add SplineR2RT Empty-Commit --- .../BsplineFactory/BsplineSetT.h | 1 + .../BsplineFactory/SplineR2RT.cpp | 560 ++++++++++++++++++ .../BsplineFactory/SplineR2RT.h | 223 +++++++ src/QMCWaveFunctions/CMakeLists.txt | 2 +- 4 files changed, 785 insertions(+), 1 deletion(-) create mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp create mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h index ba90502537..35f1580d16 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h @@ -245,5 +245,6 @@ class BsplineSetT : public SPOSetT std::vector offset; }; + } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp new file mode 100644 index 0000000000..e4695e6c11 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp @@ -0,0 +1,560 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +#include "Concurrency/OpenMP.h" +#include "SplineR2RT.h" +#include "spline2/MultiBsplineEval.hpp" +#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" + +namespace qmcplusplus +{ +template +SplineR2RT::SplineR2RT(const SplineR2RT& in) = default; + +template +inline void SplineR2RT::set_spline(SingleSplineType* spline_r, + SingleSplineType* spline_i, + int twist, + int ispline, + int level) +{ + SplineInst->copy_spline(spline_r, ispline); +} + +template +bool SplineR2RT::read_splines(hdf_archive& h5f) +{ + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); +} + +template +bool SplineR2RT::write_splines(hdf_archive& h5f) +{ + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); +} + +template +void SplineR2RT::storeParamsBeforeRotation() +{ + const auto spline_ptr = SplineInst->getSplinePtr(); + const auto coefs_tot_size = spline_ptr->coefs_size; + coef_copy_ = std::make_shared>(coefs_tot_size); + + std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); +} + +/* + ~~ Notes for rotation ~~ + spl_coefs = Raw pointer to spline coefficients + basis_set_size = Number of spline coefs per orbital + OrbitalSetSize = Number of orbitals (excluding padding) + + spl_coefs has a complicated layout depending on dimensionality of splines. + Luckily, for our purposes, we can think of spl_coefs as pointing to a + matrix of size BasisSetSize x (OrbitalSetSize + padding), with the spline + index adjacent in memory. The orbital index is SIMD aligned and therefore + may include padding. + + As a result, due to SIMD alignment, Nsplines may be larger than the + actual number of splined orbitals. This means that in practice rot_mat + may be smaller than the number of 'columns' in the coefs array! + + SplineR2RT spl_coef layout: + ^ | sp1 | ... | spN | pad | + | |=====|=====|=====|=====| + | | c11 | ... | c1N | 0 | + basis_set_size | c21 | ... | c2N | 0 | + | | ... | ... | ... | 0 | + | | cM1 | ... | cMN | 0 | + v |=====|=====|=====|=====| + <------ Nsplines ------> + + SplineC2C spl_coef layout: + ^ | sp1_r | sp1_i | ... | spN_r | spN_i | pad | + | |=======|=======|=======|=======|=======|=======| + | | c11_r | c11_i | ... | c1N_r | c1N_i | 0 | + basis_set_size | c21_r | c21_i | ... | c2N_r | c2N_i | 0 | + | | ... | ... | ... | ... | ... | ... | + | | cM1_r | cM1_i | ... | cMN_r | cMN_i | 0 | + v |=======|=======|=======|=======|=======|=======| + <------------------ Nsplines ------------------> + + NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs + "matrix" is very tall and skinny. +*/ +template +void SplineR2RT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) +{ + // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp + const auto spline_ptr = SplineInst->getSplinePtr(); + assert(spline_ptr != nullptr); + const auto spl_coefs = spline_ptr->coefs; + const auto Nsplines = spline_ptr->num_splines; // May include padding + const auto coefs_tot_size = spline_ptr->coefs_size; + const auto BasisSetSize = coefs_tot_size / Nsplines; + const auto TrueNOrbs = rot_mat.size1(); // == Nsplines - padding + assert(this->OrbitalSetSize == rot_mat.rows()); + assert(this->OrbitalSetSize == rot_mat.cols()); + + if (!use_stored_copy) + { + assert(coef_copy_ != nullptr); + std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin()); + } + + // Apply rotation the dumb way b/c I can't get BLAS::gemm to work... + for (auto i = 0; i < BasisSetSize; i++) + { + for (auto j = 0; j < this->OrbitalSetSize; j++) + { + const auto cur_elem = Nsplines * i + j; + auto newval{0.}; + for (auto k = 0; k < this->OrbitalSetSize; k++) + { + const auto index = i * Nsplines + k; + newval += (*coef_copy_)[index] * rot_mat[k][j]; + } + spl_coefs[cur_elem] = newval; + } + } +} + + +template +inline void SplineR2RT::assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) + const +{ + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; + + const ST signed_one = (bc_sign & 1) ? -1 : 1; +#pragma omp simd + for (size_t j = first; j < last; ++j) + psi[this->first_spo + j] = signed_one * myV[j]; +} + +template +void SplineR2RT::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) +{ + const PointType& r = P.activeR(iat); + PointType ru; + int bc_sign = convertPos(r, ru); + +#pragma omp parallel + { + int first, last; + FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); + assign_v(bc_sign, myV, psi, first, last); + } +} + +template +void SplineR2RT::evaluateDetRatios(const VirtualParticleSet& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) +{ + const bool need_resize = ratios_private.rows() < VP.getTotalNum(); + +#pragma omp parallel + { + int tid = omp_get_thread_num(); + // initialize thread private ratios + if (need_resize) + { + if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime + ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); +#pragma omp barrier + } + int first, last; + FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), tid, first, last); + const int last_real = this->kPoints.size() < last ? this->kPoints.size() : last; + + for (int iat = 0; iat < VP.getTotalNum(); ++iat) + { + const PointType& r = VP.activeR(iat); + PointType ru; + int bc_sign = convertPos(r, ru); + + spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); + assign_v(bc_sign, myV, psi, first, last_real); + ratios_private[iat][tid] = simd::dot(psi.data() + first, psiinv.data() + first, last_real - first); + } + } + + // do the reduction manually + for (int iat = 0; iat < VP.getTotalNum(); ++iat) + { + ratios[iat] = TT(0); + for (int tid = 0; tid < ratios_private.cols(); tid++) + ratios[iat] += ratios_private[iat][tid]; + } +} + +template +inline void SplineR2RT::assign_vgl(int bc_sign, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi, + int first, + int last) const +{ + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; + + const ST signed_one = (bc_sign & 1) ? -1 : 1; + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]}; + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); + +#pragma omp simd + for (size_t j = first; j < last; ++j) + { + const size_t psiIndex = this->first_spo + j; + psi[psiIndex] = signed_one * myV[j]; + dpsi[psiIndex][0] = signed_one * (g00 * g0[j] + g01 * g1[j] + g02 * g2[j]); + dpsi[psiIndex][1] = signed_one * (g10 * g0[j] + g11 * g1[j] + g12 * g2[j]); + dpsi[psiIndex][2] = signed_one * (g20 * g0[j] + g21 * g1[j] + g22 * g2[j]); + d2psi[psiIndex] = signed_one * SymTrace(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], symGG); + } +} + +/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian + */ +template +inline void SplineR2RT::assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +{ + const ST signed_one = (bc_sign & 1) ? -1 : 1; + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + +#pragma omp simd + for (int psiIndex = this->first_spo; psiIndex < this->last_spo; ++psiIndex) + { + const size_t j = psiIndex - this->first_spo; + psi[psiIndex] = signed_one * myV[j]; + dpsi[psiIndex][0] = signed_one * g0[j]; + dpsi[psiIndex][1] = signed_one * g1[j]; + dpsi[psiIndex][2] = signed_one * g2[j]; + d2psi[psiIndex] = signed_one * myL[j]; + } +} + +template +void SplineR2RT::evaluateVGL(const ParticleSet& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) +{ + const PointType& r = P.activeR(iat); + PointType ru; + int bc_sign = convertPos(r, ru); + +#pragma omp parallel + { + int first, last; + FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); + assign_vgl(bc_sign, psi, dpsi, d2psi, first, last); + } +} + +template +void SplineR2RT::assign_vgh(int bc_sign, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const +{ + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; + + const ST signed_one = (bc_sign & 1) ? -1 : 1; + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); + +#pragma omp simd + for (size_t j = first; j < last; ++j) + { + //dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j]; + const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j]; + const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j]; + + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = signed_one * myV[j]; + dpsi[psiIndex][0] = signed_one * dX_r; + dpsi[psiIndex][1] = signed_one * dY_r; + dpsi[psiIndex][2] = signed_one * dZ_r; + + const ST h_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g00, g01, g02); + const ST h_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g10, g11, g12); + const ST h_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g20, g21, g22); + const ST h_yx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g00, g01, g02); + const ST h_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g10, g11, g12); + const ST h_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g20, g21, g22); + const ST h_zx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g00, g01, g02); + const ST h_zy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g10, g11, g12); + const ST h_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g20, g21, g22); + + grad_grad_psi[psiIndex][0] = signed_one * h_xx_r; + grad_grad_psi[psiIndex][1] = signed_one * h_xy_r; + grad_grad_psi[psiIndex][2] = signed_one * h_xz_r; + grad_grad_psi[psiIndex][3] = signed_one * h_yx_r; + grad_grad_psi[psiIndex][4] = signed_one * h_yy_r; + grad_grad_psi[psiIndex][5] = signed_one * h_yz_r; + grad_grad_psi[psiIndex][6] = signed_one * h_zx_r; + grad_grad_psi[psiIndex][7] = signed_one * h_zy_r; + grad_grad_psi[psiIndex][8] = signed_one * h_zz_r; + } +} + +template +void SplineR2RT::evaluateVGH(const ParticleSet& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) +{ + const PointType& r = P.activeR(iat); + PointType ru; + int bc_sign = convertPos(r, ru); + +#pragma omp parallel + { + int first, last; + FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); + assign_vgh(bc_sign, psi, dpsi, grad_grad_psi, first, last); + } +} + +template +void SplineR2RT::assign_vghgh(int bc_sign, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first, + int last) const +{ + // protect last + last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last); + + const ST signed_one = (bc_sign & 1) ? -1 : 1; + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); + + const ST* restrict gh000 = mygH.data(0); + const ST* restrict gh001 = mygH.data(1); + const ST* restrict gh002 = mygH.data(2); + const ST* restrict gh011 = mygH.data(3); + const ST* restrict gh012 = mygH.data(4); + const ST* restrict gh022 = mygH.data(5); + const ST* restrict gh111 = mygH.data(6); + const ST* restrict gh112 = mygH.data(7); + const ST* restrict gh122 = mygH.data(8); + const ST* restrict gh222 = mygH.data(9); + + //SIMD doesn't work quite right yet. Comment out until further debugging. + //#pragma omp simd + for (size_t j = first; j < last; ++j) + { + const ST val_r = myV[j]; + + + //dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j]; + const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j]; + const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j]; + + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = signed_one * val_r; + dpsi[psiIndex][0] = signed_one * dX_r; + dpsi[psiIndex][1] = signed_one * dY_r; + dpsi[psiIndex][2] = signed_one * dZ_r; + + //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. + const ST f_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g00, g01, g02); + const ST f_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g10, g11, g12); + const ST f_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g20, g21, g22); + const ST f_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g10, g11, g12); + const ST f_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g20, g21, g22); + const ST f_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g20, g21, g22); + + /* const ST h_xx_r=f_xx_r; + const ST h_xy_r=f_xy_r+(kX*dY_i+kY*dX_i)-kX*kY*val_r; + const ST h_xz_r=f_xz_r+(kX*dZ_i+kZ*dX_i)-kX*kZ*val_r; + const ST h_yy_r=f_yy_r+2*kY*dY_i-kY*kY*val_r; + const ST h_yz_r=f_yz_r+(kY*dZ_i+kZ*dY_i)-kY*kZ*val_r; + const ST h_zz_r=f_zz_r+2*kZ*dZ_i-kZ*kZ*val_r; */ + + grad_grad_psi[psiIndex][0] = f_xx_r * signed_one; + grad_grad_psi[psiIndex][1] = f_xy_r * signed_one; + grad_grad_psi[psiIndex][2] = f_xz_r * signed_one; + grad_grad_psi[psiIndex][4] = f_yy_r * signed_one; + grad_grad_psi[psiIndex][5] = f_yz_r * signed_one; + grad_grad_psi[psiIndex][8] = f_zz_r * signed_one; + + //symmetry: + grad_grad_psi[psiIndex][3] = grad_grad_psi[psiIndex][1]; + grad_grad_psi[psiIndex][6] = grad_grad_psi[psiIndex][2]; + grad_grad_psi[psiIndex][7] = grad_grad_psi[psiIndex][5]; + //These are the real and imaginary components of the third SPO derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. + + const ST f3_xxx_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], + gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], + gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], + gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], + gh122[j], gh222[j], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], + gh122[j], gh222[j], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], + gh122[j], gh222[j], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], + gh122[j], gh222[j], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], + gh122[j], gh222[j], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], + gh122[j], gh222[j], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], + gh122[j], gh222[j], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + /* const ST gh_xxx_r= f3_xxx_r + 3*kX*f_xx_i - 3*kX*kX*dX_r - kX*kX*kX*val_i; + const ST gh_xxy_r= f3_xxy_r +(kY*f_xx_i+2*kX*f_xy_i) - (kX*kX*dY_r+2*kX*kY*dX_r)-kX*kX*kY*val_i; + const ST gh_xxz_r= f3_xxz_r +(kZ*f_xx_i+2*kX*f_xz_i) - (kX*kX*dZ_r+2*kX*kZ*dX_r)-kX*kX*kZ*val_i; + const ST gh_xyy_r= f3_xyy_r +(2*kY*f_xy_i+kX*f_yy_i) - (2*kX*kY*dY_r+kY*kY*dX_r)-kX*kY*kY*val_i; + const ST gh_xyz_r= f3_xyz_r +(kX*f_yz_i+kY*f_xz_i+kZ*f_xy_i)-(kX*kY*dZ_r+kY*kZ*dX_r+kZ*kX*dY_r) - kX*kY*kZ*val_i; + const ST gh_xzz_r= f3_xzz_r +(2*kZ*f_xz_i+kX*f_zz_i) - (2*kX*kZ*dZ_r+kZ*kZ*dX_r)-kX*kZ*kZ*val_i; + const ST gh_yyy_r= f3_yyy_r + 3*kY*f_yy_i - 3*kY*kY*dY_r - kY*kY*kY*val_i; + const ST gh_yyz_r= f3_yyz_r +(kZ*f_yy_i+2*kY*f_yz_i) - (kY*kY*dZ_r+2*kY*kZ*dY_r)-kY*kY*kZ*val_i; + const ST gh_yzz_r= f3_yzz_r +(2*kZ*f_yz_i+kY*f_zz_i) - (2*kY*kZ*dZ_r+kZ*kZ*dY_r)-kY*kZ*kZ*val_i; + const ST gh_zzz_r= f3_zzz_r + 3*kZ*f_zz_i - 3*kZ*kZ*dZ_r - kZ*kZ*kZ*val_i;*/ + //[x][xx] //These are the unique entries + grad_grad_grad_psi[psiIndex][0][0] = signed_one * f3_xxx_r; + grad_grad_grad_psi[psiIndex][0][1] = signed_one * f3_xxy_r; + grad_grad_grad_psi[psiIndex][0][2] = signed_one * f3_xxz_r; + grad_grad_grad_psi[psiIndex][0][4] = signed_one * f3_xyy_r; + grad_grad_grad_psi[psiIndex][0][5] = signed_one * f3_xyz_r; + grad_grad_grad_psi[psiIndex][0][8] = signed_one * f3_xzz_r; + + //filling in the symmetric terms. Filling out the xij terms + grad_grad_grad_psi[psiIndex][0][3] = grad_grad_grad_psi[psiIndex][0][1]; + grad_grad_grad_psi[psiIndex][0][6] = grad_grad_grad_psi[psiIndex][0][2]; + grad_grad_grad_psi[psiIndex][0][7] = grad_grad_grad_psi[psiIndex][0][5]; + + //Now for everything that's a permutation of the above: + grad_grad_grad_psi[psiIndex][1][0] = grad_grad_grad_psi[psiIndex][0][1]; + grad_grad_grad_psi[psiIndex][1][1] = grad_grad_grad_psi[psiIndex][0][4]; + grad_grad_grad_psi[psiIndex][1][2] = grad_grad_grad_psi[psiIndex][0][5]; + grad_grad_grad_psi[psiIndex][1][3] = grad_grad_grad_psi[psiIndex][0][4]; + grad_grad_grad_psi[psiIndex][1][6] = grad_grad_grad_psi[psiIndex][0][5]; + + grad_grad_grad_psi[psiIndex][2][0] = grad_grad_grad_psi[psiIndex][0][2]; + grad_grad_grad_psi[psiIndex][2][1] = grad_grad_grad_psi[psiIndex][0][5]; + grad_grad_grad_psi[psiIndex][2][2] = grad_grad_grad_psi[psiIndex][0][8]; + grad_grad_grad_psi[psiIndex][2][3] = grad_grad_grad_psi[psiIndex][0][5]; + grad_grad_grad_psi[psiIndex][2][6] = grad_grad_grad_psi[psiIndex][0][8]; + + grad_grad_grad_psi[psiIndex][1][4] = signed_one * f3_yyy_r; + grad_grad_grad_psi[psiIndex][1][5] = signed_one * f3_yyz_r; + grad_grad_grad_psi[psiIndex][1][8] = signed_one * f3_yzz_r; + + grad_grad_grad_psi[psiIndex][1][7] = grad_grad_grad_psi[psiIndex][1][5]; + grad_grad_grad_psi[psiIndex][2][4] = grad_grad_grad_psi[psiIndex][1][5]; + grad_grad_grad_psi[psiIndex][2][5] = grad_grad_grad_psi[psiIndex][1][8]; + grad_grad_grad_psi[psiIndex][2][7] = grad_grad_grad_psi[psiIndex][1][8]; + + grad_grad_grad_psi[psiIndex][2][8] = signed_one * f3_zzz_r; + } +} + +template +void SplineR2RT::evaluateVGHGH(const ParticleSet& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) +{ + const PointType& r = P.activeR(iat); + PointType ru; + int bc_sign = convertPos(r, ru); + +#pragma omp parallel + { + int first, last; + FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); + assign_vghgh(bc_sign, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first, last); + } +} + +template class SplineR2RT; +template class SplineR2RT; + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h new file mode 100644 index 0000000000..302d745f05 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h @@ -0,0 +1,223 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +#ifndef QMCPLUSPLUS_SPLINE_R2R_H +#define QMCPLUSPLUS_SPLINE_R2R_H + +#include +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" +#include "OhmmsSoA/VectorSoaContainer.h" +#include "spline2/MultiBspline.hpp" +#include "Utilities/FairDivide.h" + +namespace qmcplusplus +{ +/** class to match ST real spline with BsplineSet::ValueType (real) SPOs + * @tparam ST precision of spline + * + * Requires temporage storage and multiplication of the sign of the real part of the phase + * Internal storage ST type arrays are aligned and padded. + */ +template +class SplineR2RT : public BsplineSetT +{ +public: + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + // types for evaluation results + using TT = typename BsplineSetT::ValueType; + using GGGVector = typename BsplineSetT::GGGVector; + using ValueMatrix = typename BsplineSetT::ValueMatrix; + using GradVector = typename BsplineSetT::GradVector; + using HessVector = typename BsplineSetT::HessVector; + using ValueVector = typename BsplineSetT::ValueVector; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; + + using RealType = typename SPOSetT::RealType; + +private: + bool IsGamma; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian + Tensor GGt; + ///multi bspline set + std::shared_ptr> SplineInst; + + ///Copy of original splines for orbital rotation + std::shared_ptr> coef_copy_; + + ///thread private ratios for reduction when using nested threading, numVP x numThread + Matrix ratios_private; + + +protected: + ///primitive cell + CrystalLattice PrimLattice; + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; + +public: + SplineR2RT(const std::string& my_name) : BsplineSetT(my_name) {} + + SplineR2RT(const SplineR2RT& in); + virtual std::string getClassName() const override { return "SplineR2RT"; } + virtual std::string getKeyword() const override { return "SplineR2RT"; } + bool isComplex() const override { return false; }; + bool isRotationSupported() const override { return true; } + + std::unique_ptr> makeClone() const override { return std::make_unique>(*this); } + + /// Store an original copy of the spline coefficients for orbital rotation + void storeParamsBeforeRotation() override; + + /* + Implements orbital rotations via [1,2]. + Should be called by RotatedSPOs::apply_rotation() + + This implementation requires that NSPOs > Nelec. In other words, + if you want to run a orbopt wfn, you must include some virtual orbitals! + + Some results (using older Berkeley branch) were published in [3]. + + [1] Filippi & Fahy, JCP 112, (2000) + [2] Toulouse & Umrigar, JCP 126, (2007) + [3] Townsend et al., PRB 102, (2020) + */ + void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; + + inline void resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + const size_t npad = getAlignedSize(n); + this->myV.resize(npad); + this->myG.resize(npad); + this->myL.resize(npad); + this->myH.resize(npad); + this->mygH.resize(npad); + + IsGamma = ((this->HalfG[0] == 0) && (this->HalfG[1] == 0) && (this->HalfG[2] == 0)); + } + + void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } + + void gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); + } + + template + void create_spline(GT& xyz_g, BCT& xyz_bc) + { + GGt = dot(transpose(PrimLattice.G), PrimLattice.G); + SplineInst = std::make_shared>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " + << "for the coefficients in 3D spline orbital representation" << std::endl; + } + + inline void flush_zero() { SplineInst->flush_zero(); } + + void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); + + bool read_splines(hdf_archive& h5f); + + bool write_splines(hdf_archive& h5f); + + /** convert position in PrimLattice unit and return sign */ + inline int convertPos(const PointType& r, PointType& ru) + { + ru = PrimLattice.toUnit(r); + int bc_sign = 0; + for (int i = 0; i < this->D; i++) + if (-std::numeric_limits::epsilon() < ru[i] && ru[i] < 0) + ru[i] = ST(0.0); + else + { + ST img = std::floor(ru[i]); + ru[i] -= img; + bc_sign += this->HalfG[i] * (int)img; + } + return bc_sign; + } + + void assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) const; + + void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; + + void evaluateDetRatios(const VirtualParticleSet& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override; + + void assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) const; + + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian + */ + void assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + void evaluateVGL(const ParticleSet& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) override; + + void assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) + const; + + void evaluateVGH(const ParticleSet& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override; + + void assign_vghgh(int bc_sign, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first = 0, + int last = -1) const; + + void evaluateVGHGH(const ParticleSet& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + template + friend struct SplineSetReader; + friend struct BsplineReaderBase; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt index 9d6eace715..61fa6756fc 100644 --- a/src/QMCWaveFunctions/CMakeLists.txt +++ b/src/QMCWaveFunctions/CMakeLists.txt @@ -118,7 +118,7 @@ if(OHMMS_DIM MATCHES 3) set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2COMPTarget.cpp) else(QMC_COMPLEX) set(FERMION_SRCS ${FERMION_SRCS} BsplineFactory/createRealSingle.cpp BsplineFactory/createRealDouble.cpp - BsplineFactory/SplineC2R.cpp BsplineFactory/SplineR2R.cpp) + BsplineFactory/SplineC2R.cpp BsplineFactory/SplineR2R.cpp BsplineFactory/SplineR2RT.cpp) set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2ROMPTarget.cpp) endif(QMC_COMPLEX)